Let us walk on the 3-isogeny graph
Loading...
Searching...
No Matches
AsmMontRedcAVX2.py
Go to the documentation of this file.
1
#!/usr/bin/env sage -python
2
3
# rdi, rsi, rdx
4
# rax
5
6
# how to call
7
# sage -python AsmMultCodegenerator.py > fp9216.s
8
9
from
sage.all
import
*
10
import
math
11
12
sefOfLimbs = set()
13
printOut = 0
14
15
def
rotate
(l, x):
16
return
l[-x:] + l[:-x]
17
18
def
push
():
19
# S = "# -------------------\n"
20
S =
"# push\n"
21
# S = S + " push rbx\n push rbp\n push edi\n push esi\n push r12d\n push r13d\n push r14d\n push r15d\n\n"
22
S = S +
" push rbx\n push rbp\n push rsi\n push r12\n push r13\n push r14\n push r15\n\n"
23
# S = S + " push rdx\n push edi\n push esi\n\n"
24
return
S
25
26
def
pop
():
27
# S = "# -------------------\n"
28
S =
"# pop\n"
29
# S = S + " pop r15d\n pop r14d\n pop r13d\n pop r12d\n pop esi\n pop edi\n pop rbp\n pop rbx\n\n"
30
S = S +
" pop r15\n pop r14\n pop r13\n pop r12\n pop rsi\n pop rbp\n pop rbx\n\n"
31
# S = S + " pop esi\n pop edi\n pop rdx\n\n"
32
return
S
33
34
def
OneTimeCarry
(plimbs):
35
# registers reserved edi, esi, rdx
36
# eax, rbx = ecx, r8d
37
registers = [
"r8d"
,
"r9d"
,
"r10d"
,
"r11d"
,
"r12d"
,
"r13d"
,
"r14d"
,
"r15d"
]
38
registers64 = [
"r8"
,
"r9"
,
"r10"
,
"r11"
,
"r12"
,
"r13"
,
"r14"
,
"r15"
]
39
40
# if(plimbs > len(registers)):
41
# print("ERROR: Index out range")
42
# exit()
43
state = registers[:plimbs]
44
state64 = registers64[:plimbs]
45
46
S =
""
47
# S = ".intel_syntax noprefix\n\n"
48
# S = S + ".section .rodata\n\n"
49
# S = S + ".section .text\n\n"
50
51
# S = S + ".macro p_times_w\n"
52
# S = S + "mult_"+ str(plimbs) + "x" + str(plimbs) + ":\n"
53
54
S = S +
".global secsidh_internal_2047k221_OneTimeCarry\nsecsidh_internal_2047k221_OneTimeCarry:\n"
55
56
S = S +
push
()
57
58
for
j
in
range(0,plimbs):
59
S = S +
" vmovdqa ymm15, YMMWORD PTR [rsi + "
+ str(j) +
"*32]\n"
60
S = S +
" vpbroadcastq ymm0, qword ptr [rip + .base]\n"
61
S = S +
" vpand ymm1, ymm15, ymm0\n"
62
S = S +
" vpandn ymm15, ymm0, ymm15\n"
63
S = S +
" vpsrlq ymm15, ymm15, 26\n"
64
S = S +
" vpaddq ymm15, ymm15, ymm1\n"
65
S = S +
" vmovdqa YMMWORD PTR [rdi + "
+ str(j) +
"*32], ymm15\n"
66
S = S +
pop
()
67
68
S = S +
" ret\n"
69
return
S
70
71
72
def
MontRedcAdd
(plimbs):
73
# registers reserved edi, esi, rdx
74
# eax, rbx = ecx, r8d
75
registers = [
"r8d"
,
"r9d"
,
"r10d"
,
"r11d"
,
"r12d"
,
"r13d"
,
"r14d"
,
"r15d"
]
76
registers64 = [
"r8"
,
"r9"
,
"r10"
,
"r11"
,
"r12"
,
"r13"
,
"r14"
,
"r15"
]
77
78
# if(plimbs > len(registers)):
79
# print("ERROR: Index out range")
80
# exit()
81
state = registers[:plimbs]
82
state64 = registers64[:plimbs]
83
84
S =
""
85
# S = ".intel_syntax noprefix\n\n"
86
# S = S + ".section .rodata\n\n"
87
# S = S + ".section .text\n\n"
88
89
# S = S + ".macro p_times_w\n"
90
# S = S + "mult_"+ str(plimbs) + "x" + str(plimbs) + ":\n"
91
92
S = S +
".global secsidh_internal_2047k221_a_plus_u_i\secsidh_internal_2047k221_a_plus_u_i:\n"
93
94
S = S +
push
()
95
96
for
j
in
range(0,plimbs):
97
S = S +
" vmovdqu ymm15, YMMWORD PTR [rdi + rdx*4 + "
+ str(j) +
"*32]\n"
98
S = S +
" vpaddd ymm15 , ymm15, ymmword ptr [rsi + "
+ str(j) +
"*32]\n"
99
S = S +
" vmovdqu YMMWORD PTR [rdi + rdx*4 + "
+ str(j) +
"*32], ymm15\n"
100
S = S +
pop
()
101
102
S = S +
" ret\n"
103
return
S
104
105
# def fp_copy(plimbs):
106
# # registers reserved edi, esi, rdx
107
# # eax, rbx = ecx, r8d
108
# registers = ["r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d"]
109
# registers64 = ["r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"]
110
111
# # if(plimbs > len(registers)):
112
# # print("ERROR: Index out range")
113
# # exit()
114
# state = registers[:plimbs]
115
# state64 = registers64[:plimbs]
116
117
# S = ""
118
# # S = ".intel_syntax noprefix\n\n"
119
# # S = S + ".section .rodata\n\n"
120
# # S = S + ".section .text\n\n"
121
122
# # S = S + ".macro p_times_w\n"
123
# # S = S + "mult_"+ str(plimbs) + "x" + str(plimbs) + ":\n"
124
125
# S = S + ".global secsidh_internal_2047k221_a_plus_u_i\secsidh_internal_2047k221_a_plus_u_i:\n"
126
127
# S = S + push()
128
129
# S = S + " xor r8, r8\n"
130
# S = S + " xor r9, r9\n"
131
# for j in range(0,plimbs):
132
# S = S + " vmovdqa ymm15, YMMWORD PTR [rdi + " + str(j) + "*32]\n"
133
# S = S + " vptest r8 , ymm15, ymm15\n"
134
# S = S + " r9 , ymm2, ymm1\n"
135
# S = S + " vmovdqa YMMWORD PTR [rdi + " + str(j) + "*32], ymm15\n"
136
# S = S + pop()
137
138
# S = S + " ret\n"
139
# return S
140
141
142
def
fp_copy
(plimbs):
143
# registers reserved edi, esi, rdx
144
# eax, rbx = ecx, r8d
145
registers = [
"r8d"
,
"r9d"
,
"r10d"
,
"r11d"
,
"r12d"
,
"r13d"
,
"r14d"
,
"r15d"
]
146
registers64 = [
"r8"
,
"r9"
,
"r10"
,
"r11"
,
"r12"
,
"r13"
,
"r14"
,
"r15"
]
147
148
# if(plimbs > len(registers)):
149
# print("ERROR: Index out range")
150
# exit()
151
state = registers[:plimbs]
152
state64 = registers64[:plimbs]
153
154
S =
""
155
# S = ".intel_syntax noprefix\n\n"
156
# S = S + ".section .rodata\n\n"
157
# S = S + ".section .text\n\n"
158
159
# S = S + ".macro p_times_w\n"
160
# S = S + "mult_"+ str(plimbs) + "x" + str(plimbs) + ":\n"
161
162
S = S +
".global secsidh_internal_2047k221_fp_copy\secsidh_internal_2047k221_fp_copy:\n"
163
164
S = S +
push
()
165
166
for
j
in
range(0,plimbs):
167
S = S +
" vmovdqa ymm15, YMMWORD PTR [rsi + "
+ str(j) +
"*32]\n"
168
S = S +
" vmovdqa YMMWORD PTR [rdi + "
+ str(j) +
"*32], ymm15\n"
169
S = S +
pop
()
170
171
S = S +
" ret\n"
172
return
S
173
174
175
def
PrintMult
(plimbs):
176
177
178
# registers reserved edi, esi, rdx
179
# eax, rbx = ecx, r8d
180
registers = [
"r8d"
,
"r9d"
,
"r10d"
,
"r11d"
,
"r12d"
,
"r13d"
,
"r14d"
,
"r15d"
]
181
registers64 = [
"r8"
,
"r9"
,
"r10"
,
"r11"
,
"r12"
,
"r13"
,
"r14"
,
"r15"
]
182
183
# if(plimbs > len(registers)):
184
# print("ERROR: Index out range")
185
# exit()
186
state = registers[:plimbs]
187
state64 = registers64[:plimbs]
188
189
S =
""
190
# S = ".intel_syntax noprefix\n\n"
191
# S = S + ".section .rodata\n\n"
192
# S = S + ".section .text\n\n"
193
194
# S = S + ".macro p_times_w\n"
195
# S = S + "mult_"+ str(plimbs) + "x" + str(plimbs) + ":\n"
196
197
S = S +
".global secsidh_internal_2047k221_p_times_w\nsecsidh_internal_2047k221_p_times_w:\n"
198
199
S = S +
push
()
200
201
202
203
for
j
in
range(0,plimbs):
204
# S = S + " vmovd xmm0, edx\n"
205
# S = S + " vpbroadcastd ymm0, xmm0\n"
206
S = S +
" vmovdqu ymm2, ymmword ptr [rdx + "
+ str(j) +
"*4]\n"
207
S = S +
" vpbroadcastd ymm0, xmm2\n"
208
S = S +
" vmovdqa ymm1, ymmword ptr [rip + .SHUFFLE_MUL]\n"
209
S = S +
"#############################\n"
210
S = S +
" vmovdqa ymm15, YMMWORD PTR [esi + "
+ str(j) +
"*32]\n"
211
S = S +
" vpermd ymm15, ymm1, ymm15\n"
212
S = S +
" vpmuldq ymm11 , ymm15, ymm0\n"
213
S = S +
" vmovdqa ymmword ptr [rdi + "
+ str(2*j) +
"*32] , ymm11\n"
214
S = S +
" vpshufd ymm15, ymm15, 177\n"
215
S = S +
" vpmuldq ymm12 , ymm15, ymm0\n"
216
S = S +
" vmovdqa ymmword ptr [rdi + "
+ str(2*j+1) +
"*32] , ymm12\n"
217
S = S +
"#############################\n"
218
S = S +
pop
()
219
220
S = S +
" ret\n"
221
return
S
222
223
224
def
fp_cmov
(plimbs):
225
226
227
# registers reserved edi, esi, rdx
228
# eax, rbx = ecx, r8d
229
registers = [
"r8d"
,
"r9d"
,
"r10d"
,
"r11d"
,
"r12d"
,
"r13d"
,
"r14d"
,
"r15d"
]
230
registers64 = [
"r8"
,
"r9"
,
"r10"
,
"r11"
,
"r12"
,
"r13"
,
"r14"
,
"r15"
]
231
232
# if(plimbs > len(registers)):
233
# print("ERROR: Index out range")
234
# exit()
235
state = registers[:plimbs]
236
state64 = registers64[:plimbs]
237
238
S =
""
239
# S = ".intel_syntax noprefix\n\n"
240
# S = S + ".section .rodata\n\n"
241
# S = S + ".section .text\n\n"
242
243
# S = S + ".macro p_times_w\n"
244
# S = S + "mult_"+ str(plimbs) + "x" + str(plimbs) + ":\n"
245
246
S = S +
".global fp_cmov\nfp_cmov:\n"
247
248
S = S +
" mov eax, edx\n"
249
S = S +
" neg rax\n"
250
S = S +
" vmovq xmm0, rax\n"
251
S = S +
" vpbroadcastq ymm0, xmm0\n"
252
S = S +
"#############################\n"
253
for
j
in
range(0,plimbs):
254
255
S = S +
" vpandn ymm1, ymm0, ymmword ptr [rdi + "
+ str(j) +
"*32]\n"
256
S = S +
" vpand ymm2, ymm0, ymmword ptr [rsi + "
+ str(j) +
"*32]\n"
257
S = S +
" vpor ymm1, ymm2, ymm1\n"
258
S = S +
" vmovdqa ymmword ptr [rdi + "
+ str(j) +
"*32], ymm1\n"
259
260
261
S = S +
" vzeroupper\n"
262
S = S +
" ret\n"
263
return
S
264
265
266
#//+++++++++++++++ Main ++++++++++++++++++//
267
268
def
main(argv):
269
global
printOut
270
if
(len(argv) < 2):
271
print(
272
"\nplease specify the prime and type of execution [1 = only printing functions out, 0 = print full assembly]\n"
)
273
sys.exit()
274
else
:
275
printOut = int(argv[1])
276
prime = argv[0]
277
length = 0
278
279
if
(prime==
"2047k221"
):
280
length = 2048
281
282
elif(prime==
"4095k256"
):
283
length = 4096
284
285
elif(prime==
"5119k234"
):
286
length = 5120
287
288
elif(prime==
"6143k256"
):
289
length = 6144
290
291
elif(prime==
"8191k332"
):
292
length = 8192
293
294
elif(prime==
"9215k384"
):
295
length = 9216
296
297
else
:
298
print(
"\nError : no prime available for this input\n"
)
299
sys.exit()
300
301
302
S =
".intel_syntax noprefix\n\n"
303
S = S +
".section .rodata\n\n"
304
S = S +
".section .text\n\n"
305
306
307
print(S)
308
309
# plimbs = 32
310
plimbs = 10
311
312
# S = PrintMult(plimbs)
313
314
315
# S = ".global fp_mult_"+ str(plimbs) + "x" + str(plimbs) + "\n"
316
# S = S + "fp_mult_"+ str(plimbs) + "x" + str(plimbs) + ":\n"
317
# S = S + " mult_"+ str(plimbs) + "x" + str(plimbs) + "\n"
318
# S = S + " ret\n"
319
320
# print(S)
321
322
S =
MontRedcAdd
(plimbs)
323
324
print(S)
325
326
# S = fp_copy(plimbs)
327
328
# print(S)
329
330
# S = OneTimeCarry(plimbs)
331
332
# print(S)
333
334
# S = fp_cmov(plimbs)
335
336
# print(S)
337
338
print(
"\n"
)
339
340
if
__name__ ==
"__main__"
:
341
main(sys.argv[1:])
fp_cmov
#define fp_cmov
Definition
fp-gmp.h:323
fp_copy
#define fp_copy
Definition
fp-gmp.h:79
AsmMontRedcAVX2.OneTimeCarry
OneTimeCarry(plimbs)
Definition
AsmMontRedcAVX2.py:34
AsmMontRedcAVX2.PrintMult
PrintMult(plimbs)
Definition
AsmMontRedcAVX2.py:175
AsmMontRedcAVX2.push
push()
Definition
AsmMontRedcAVX2.py:18
AsmMontRedcAVX2.MontRedcAdd
MontRedcAdd(plimbs)
Definition
AsmMontRedcAVX2.py:72
AsmMontRedcAVX2.rotate
rotate(l, x)
Definition
AsmMontRedcAVX2.py:15
AsmMontRedcAVX2.pop
pop()
Definition
AsmMontRedcAVX2.py:26
all
if
end if
Definition
prime_search.m:17
dCTIDH
src
common
fp
AsmMontRedcAVX2.py
Generated by
1.9.8