Let us walk on the 3-isogeny graph
Loading...
Searching...
No Matches
AsmKaratsubaRedc Namespace Reference

Functions

 Print_mult_dyn (plimbs)
 rotate (l, x)
 push ()
 pop ()
 PrintMontRedc (plimbs)
 PrintMult (plimbs)
 PrintAdd (plimbs)
 PrintAddOdd (plimbs)
 PrintAddAtEnd (plimbs)
 PrintAddOddAtEnd (plimbs)
 PrintSubOdd (plimbs)
 PrintSub (plimbs)
 Print_Assembly (plimbs)
 main (argv)

Variables

 sefOfLimbs = set()
int printOut = 0
int prime = 0

Function Documentation

◆ main()

main ( argv)

Definition at line 1165 of file AsmKaratsubaRedc.py.

1165def main(argv):
1166 global printOut
1167 global prime
1168
1169 Print_Assembly(59)
1170 # S = ""
1171 # print(S)
1172 exit(0)
1173 if(len(argv) < 2):
1174 print(
1175 "\nplease specify the prime and type of execution [1 = only printing functions out, 0 = print full assembly\n")
1176 sys.exit()
1177 else:
1178 printOut = int(argv[1])
1179 prime = argv[0]
1180 length = 0
1181
1182 if(prime=="2047k221"):
1183 length = 2048
1184
1185 elif(prime=="4095k256"):
1186 length = 4096
1187
1188 elif(prime=="5119k234"):
1189 length = 5120
1190
1191 elif(prime=="6143k256"):
1192 length = 6144
1193
1194 elif(prime=="8191k332"):
1195 length = 8192
1196
1197 elif(prime=="9215k384"):
1198 length = 9216
1199
1200 else:
1201 print("\nError : no prime available for this input\n")
1202 sys.exit()
1203
1204
1205 S = ".intel_syntax noprefix\n\n"
1206 S = S + ".section .rodata\n\n"
1207 S = S + ".section .text\n\n"
1208
1209 print(S)
1210
1211 plimbs = length//64
1212
1213 Print_Assembly(plimbs)
1214
1215 S = ".global fp_mult_"+ str(plimbs) + "x" + str(plimbs) + "\n"
1216 S = S + "fp_mult_"+ str(plimbs) + "x" + str(plimbs) + ":\n"
1217 S = S + " mult_"+ str(plimbs) + "x" + str(plimbs) + "\n"
1218 S = S + " ret\n"
1219
1220 print(S)
1221
1222 S = "\n.global fp_mont_redc_a\n"
1223 S = S + "fp_mont_redc_a:\n"
1224 print(S)
1225 print(PrintMontRedc(plimbs))
1226
1227 print("\n")
1228
int main(void)
Definition checkct.c:52
end if

References if, main(), Print_Assembly(), and PrintMontRedc().

Here is the call graph for this function:

◆ pop()

pop ( )

Definition at line 394 of file AsmKaratsubaRedc.py.

394def pop():
395 # S = "# -------------------\n"
396 S = "# pop\n"
397 # S = S + " pop r15\n pop r14\n pop r13\n pop r12\n pop rsi\n pop rdi\n pop rbp\n pop rbx\n\n"
398 S = S + " pop r15\n pop r14\n pop r13\n pop r12\n pop rsi\n pop rbp\n pop rbx\n\n"
399 # S = S + " pop rsi\n pop rdi\n pop rdx\n\n"
400 return S
401
402

Referenced by PrintMult().

Here is the caller graph for this function:

◆ Print_Assembly()

Print_Assembly ( plimbs)

Definition at line 1067 of file AsmKaratsubaRedc.py.

1067def Print_Assembly(plimbs):
1068 global sefOfLimbs
1069 global printOut
1070 # global functions
1071 breakout = 9
1072 if(printOut):
1073 while 1 :
1074 if plimbs > breakout :
1075 print("++++++++++++++++++++++++++")
1076 if(plimbs not in sefOfLimbs):
1077 sefOfLimbs.add(plimbs)
1078
1079 if(plimbs%2!=1):
1080 print("add_" + str(plimbs//2) + "x" + str(plimbs//2))
1081 print("sub_d_" + str(plimbs + 2) + "x" + str(plimbs) + "_woc")
1082 print("add_" + str(plimbs + 2) + "x" + str(plimbs + 1))
1083 if(plimbs > 9):
1084 # print("mult_"+ str(plimbs//2) + "x" + str(plimbs//2))
1085 # print("mult_"+ str(plimbs//2+1) + "x" + str(plimbs//2+1))
1086 # else:
1087 Print_Assembly(plimbs//2)
1088 Print_Assembly(plimbs//2+1)
1089 else:
1090 print("add_"+ str(plimbs//2 + 1) + "x" + str(plimbs//2))
1091 print("sub_d_"+ str(plimbs + 3) + "x" + str(plimbs + 1) + "_woc")
1092 print("add_"+ str(plimbs + 2) + "x" + str(plimbs + 1))
1093 if(plimbs > 9):
1094 # print("mult_"+ str(plimbs//2) + "x" + str(plimbs//2))
1095 # print("mult_"+ str(plimbs//2+1) + "x" + str(plimbs//2+1))
1096 # else:
1097 Print_Assembly(plimbs//2)
1098 Print_Assembly(plimbs//2+2)
1099 Print_Assembly(plimbs//2+1)
1100 # plimbs = plimbs // 2
1101
1102 print(">mult_"+ str(plimbs) + "x" + str(plimbs))
1103
1104 else:
1105 plimbs = 0
1106 break
1107 else:
1108 if plimbs <= breakout:
1109 if(plimbs not in sefOfLimbs):
1110 sefOfLimbs.add(plimbs)
1111 print("mult_"+ str(plimbs) + "x" + str(plimbs))
1112 if(plimbs+1 not in sefOfLimbs):
1113 if(plimbs+1 <= 9):
1114 sefOfLimbs.add(plimbs+1)
1115 print("mult_"+ str(plimbs+1) + "x" + str(plimbs+1))
1116 # PrintMult(plimbs+1)
1117 break
1118 else:
1119 while 1 :
1120 if plimbs > breakout:
1121 if(plimbs not in sefOfLimbs):
1122 sefOfLimbs.add(plimbs)
1123
1124 if(plimbs%2!=1):
1125 PrintAdd(plimbs//2)
1126 PrintSubOdd(plimbs)
1127 PrintAddAtEnd(plimbs+1)
1128 # PrintAddOddAtEnd(plimbs+1)
1129 # if(plimbs > breakout):
1130 Print_Assembly(plimbs//2)
1131 Print_Assembly(plimbs//2+1)
1132 else:
1133 PrintAddOdd(plimbs//2)
1134 PrintSub(plimbs)
1135 # PrintAddOdd(plimbs+1)
1136 PrintAddOddAtEnd(plimbs+1)
1137
1138 # if(plimbs > breakout):
1139 Print_Assembly(plimbs//2)
1140 Print_Assembly(plimbs//2+2)
1141 Print_Assembly(plimbs//2+1)
1142
1143 # functions["mult_" + str(plimbs) + "x" + str(plimbs)]()
1144 Print_mult_dyn(plimbs)
1145 # print("# ------------------" + str(plimbs) + "x" + str(plimbs))
1146
1147 else:
1148 plimbs = 0
1149 break
1150 else:
1151 if plimbs <= breakout:
1152 if(plimbs not in sefOfLimbs):
1153 sefOfLimbs.add(plimbs)
1154 PrintMult(plimbs)
1155 if(plimbs+1 not in sefOfLimbs):
1156 if(plimbs+1 <= breakout):
1157 sefOfLimbs.add(plimbs+1)
1158 PrintMult(plimbs+1)
1159 break
1160
1161
1162
1163#//+++++++++++++++ Main ++++++++++++++++++//
1164

References if, Print_Assembly(), Print_mult_dyn(), PrintAdd(), PrintAddAtEnd(), PrintAddOdd(), PrintAddOddAtEnd(), PrintMult(), PrintSub(), and PrintSubOdd().

Referenced by main(), and Print_Assembly().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ Print_mult_dyn()

Print_mult_dyn ( plimbs)

Definition at line 14 of file AsmKaratsubaRedc.py.

14def Print_mult_dyn(plimbs):
15 if(plimbs%2==0):
16 # .global mult_32x32
17 S = ".macro mult_" + str(plimbs) + "x" + str(plimbs) + "\n"
18 # mult_32x32:
19 # S = S + "mult_" + str(plimbs) + "x" + str(plimbs) + ":\n"
20 S = S + "push r14\n"
21 # S = S + "xor eax, eax\n"
22 # mov ecx, 34
23 # S = S + "mov ecx, " + str(plimbs + 2) + "\n"
24 S = S + "push r13\n"
25 # lea r13, [rsi+128]
26 S = S + "lea r13, [rsi+" + str(4*plimbs) + "]\n"
27 S = S + "push r12\n"
28 S = S + "mov r12, rdx\n"
29 S = S + "mov rdx, r13\n"
30 S = S + "push rbp\n"
31 # lea r14, [r12+128]
32 S = S + "lea r14, [r12+" + str(4*plimbs) + "]\n"
33 S = S + "mov rbp, rsi\n"
34 S = S + "push rbx\n"
35 S = S + "mov rbx, rdi\n"
36 # sub rsp, 544
37 S = S + "sub rsp, " + str(((plimbs//2)+1)*32) + "\n"
38 # S = S + "mov rdi, rsp\n"
39 # S = S + "rep stosq\n"
40 # lea rdi, [rsp+272]
41 # S = S + "lea rdi, [rsp+" + str((((plimbs//2)+1)*32)//2) + "]\n"
42 # mov ecx, 34
43 # S = S + "mov ecx, " + str(plimbs + 2) + "\n"
44 # S = S + "rep stosq\n"
45 S = S + "mov rdi, rsp\n"
46 # call add_16x16
47 S = S + "add_" + str(plimbs//2) + "x" + str(plimbs//2) + "\n"
48 S = S + "mov rdx, r14\n"
49 S = S + "mov rsi, r12\n"
50 # S = S + "xor eax, eax\n"
51 # lea rdi, [rsp+136]
52 S = S + "lea rdi, [rsp+" + str((((plimbs//2)+1)*32)//4) + "]\n"
53 # call add_16x16
54 S = S + "add_" + str(plimbs//2) + "x" + str(plimbs//2) + "\n"
55 S = S + "mov rdx, r12\n"
56 S = S + "mov rsi, rbp\n"
57 S = S + "mov rdi, rbx\n"
58 # call mult_16x16
59 S = S + "mult_" + str(plimbs//2) + "x" + str(plimbs//2) + "\n"
60 # lea r12, [rbx+256]
61 S = S + "lea r12, [rbx+" + str(plimbs*8) + "]\n"
62 S = S + "mov rdx, rsp\n"
63 # lea rsi, [rsp+136]
64 S = S + "lea rsi, [rsp+" + str((((plimbs//2)+1)*32)//4) + "]\n"
65 # lea rdi, [rsp+272]
66 S = S + "lea rdi, [rsp+" + str((((plimbs//2)+1)*32)//2) + "]\n"
67 # call mult_17x17
68 S = S + "mult_" + str(plimbs//2 + 1) + "x" + str(plimbs//2 + 1) + "\n"
69 S = S + "mov rdx, r14\n"
70 S = S + "mov rsi, r13\n"
71 S = S + "mov rdi, r12\n"
72 # call mult_16x16
73 S = S + "mult_" + str(plimbs//2) + "x" + str(plimbs//2) + "\n"
74 S = S + "mov rdx, r12\n"
75 S = S + "mov rsi, rbx\n"
76 # S = S + "xor eax, eax\n"
77 # lea rdi, [rsp+272]
78 S = S + "lea rdi, [rsp+" + str((((plimbs//2)+1)*32)//2) + "]\n"
79 # call sub_d_34x32_woc
80 S = S + "sub_d_" + str(plimbs + 2) + "x" + str(plimbs) + "_woc\n"
81 # lea rdi, [rbx+128]
82 S = S + "lea rdi, [rbx+" + str(4*plimbs) + "]\n"
83
84 S = S + "lea rdx, [rsp+" + str((((plimbs//2)+1)*32)//2) + "]\n"
85 # S = S + "xor eax, eax\n"
86 S = S + "mov rsi, rdi\n"
87 # call add_34x33
88 S = S + "add_" + str(plimbs + 2) + "x" + str(plimbs + 1) + "\n"
89 # add rsp, 544
90 S = S + "add rsp, " + str(((plimbs//2)+1)*32) + "\n"
91 S = S + "pop rbx\n"
92 S = S + "pop rbp\n"
93 S = S + "pop r12\n"
94 S = S + "pop r13\n"
95 S = S + "pop r14\n"
96 S = S + ".endm\n"
97 else:
98 # .global mult_32x32
99 S = ".macro mult_" + str(plimbs) + "x" + str(plimbs) + "\n"
100 # mult_32x32:
101 # S = S + "mult_" + str(plimbs) + "x" + str(plimbs) + ":\n"
102 S = S + "push r14\n"
103 # S = S + "xor eax, eax\n"
104 # mov ecx, 34
105 # S = S + "mov ecx, " + str(plimbs + 1) + "\n"
106 S = S + "push r13\n"
107 # lea r13, [rsi+128]
108 S = S + "lea r13, [rsi+" + str(4*(plimbs-1)) + "]\n"
109 S = S + "push r12\n"
110 S = S + "mov r12, rdx\n"
111 S = S + "mov rdx, rsi\n"
112 S = S + "push rbp\n"
113 # lea r14, [r12+128]
114 S = S + "lea r14, [r12+" + str(4*(plimbs-1)) + "]\n"
115 S = S + "mov rbp, rsi\n"
116 S = S + "mov rsi, r13\n"
117 S = S + "push rbx\n"
118 S = S + "mov rbx, rdi\n"
119 # sub rsp, 544
120 S = S + "sub rsp, " + str(((plimbs//2)+2)*32) + "\n"
121 # S = S + "mov rdi, rsp\n"
122 # S = S + "rep stosq\n"
123 # lea rdi, [rsp+272]
124 # S = S + "lea rdi, [rsp+" + str((((plimbs//2)+1)*32)//2) + "]\n"
125 # mov ecx, 34
126 # S = S + "mov ecx, " + str(plimbs + 1) + "\n"
127 # S = S + "rep stosq\n"
128 S = S + "mov rdi, rsp\n"
129 # call add_16x16
130 S = S + "add_" + str(plimbs//2 + 1) + "x" + str(plimbs//2) + "\n"
131 S = S + "mov rdx, r12\n"
132 S = S + "mov rsi, r14\n"
133 # lea rdi, [rsp+136]
134 S = S + "lea rdi, [rsp+" + str((((plimbs//2)+2)*32)//4) + "]\n"
135 # S = S + "xor eax, eax\n"
136 # call add_16x16
137 S = S + "add_" + str(plimbs//2 + 1) + "x" + str(plimbs//2) + "\n"
138 S = S + "mov rdx, r12\n"
139 S = S + "mov rsi, rbp\n"
140 S = S + "mov rdi, rbx\n"
141 # S = S + "xor eax, eax\n"
142 # lea r12, [rbx+256]
143 S = S + "lea rbp, [rbx+" + str((plimbs -1) *8) + "]\n"
144 # call mult_16x16
145 S = S + "mult_" + str(plimbs//2) + "x" + str(plimbs//2) + "\n"
146 S = S + "mov rdx, rsp\n"
147 # lea rsi, [rsp+136]
148 S = S + "lea rsi, [rsp+" + str((((plimbs//2)+2)*32)//4) + "]\n"
149 # S = S + "xor eax, eax\n"
150 # lea rdi, [rsp+272]
151 S = S + "lea rdi, [rsp+" + str((((plimbs//2)+2)*32)//2) + "]\n"
152 # call mult_17x17
153 S = S + "mult_" + str(plimbs//2 + 2) + "x" + str(plimbs//2 + 2) + "\n"
154 S = S + "mov rdx, r14\n"
155 S = S + "mov rsi, r13\n"
156 S = S + "mov rdi, rbp\n"
157 # S = S + "xor eax, eax\n"
158 # call mult_16x16
159 S = S + "mult_" + str(plimbs//2 + 1) + "x" + str(plimbs//2 + 1) + "\n"
160 S = S + "mov rsi, rbx\n"
161 S = S + "mov rdx, rbp\n"
162 # S = S + "xor eax, eax\n"
163 # lea rdi, [rsp+272]
164 S = S + "lea rdi, [rsp+" + str((((plimbs//2)+2)*32)//2) + "]\n"
165 # call sub_d_34x32_woc
166 S = S + "_sub_d_" + str(plimbs + 3) + "x" + str(plimbs + 1) + "_woc\n"
167 # lea rdi, [rbx+128]
168 S = S + "lea rdi, [rbx+" + str(4*(plimbs-1)) + "]\n"
169
170 S = S + "lea rdx, [rsp+" + str((((plimbs//2)+2)*32)//2) + "]\n"
171 # S = S + "xor eax, eax\n"
172 S = S + "mov rsi, rdi\n"
173 # call add_34x33
174 S = S + "add_" + str(plimbs + 2) + "x" + str(plimbs + 1) + "\n"
175
176 # add rsp, 544
177 S = S + "add rsp, " + str(((plimbs//2)+2)*32) + "\n"
178 S = S + "pop rbx\n"
179 S = S + "pop rbp\n"
180 S = S + "pop r12\n"
181 S = S + "pop r13\n"
182 S = S + "pop r14\n"
183 S = S + ".endm\n"
184 print(S)
185 return 0
186
187# def Print_mult_32x32():
188# S = """.macro mult_32x32
189
190# push r14
191# xor eax, eax
192# mov ecx, 34
193# push r13
194# lea r13, [rsi+128]
195# push r12
196# mov r12, rdx
197# mov rdx, r13
198# push rbp
199# lea r14, [r12+128]
200# mov rbp, rsi
201# push rbx
202# mov rbx, rdi
203# sub rsp, 544
204# mov rdi, rsp
205# rep stosq
206# lea rdi, [rsp+272]
207# mov ecx, 34
208# rep stosq
209# mov rdi, rsp
210# add_16x16
211# mov rdx, r14
212# mov rsi, r12
213# xor eax, eax
214# lea rdi, [rsp+136]
215# add_16x16
216# mov rdx, r12
217# mov rsi, rbp
218# mov rdi, rbx
219# mult_16x16
220# lea r12, [rbx+256]
221# mov rdx, rsp
222# lea rsi, [rsp+136]
223# lea rdi, [rsp+272]
224# mult_17x17
225# mov rdx, r14
226# mov rsi, r13
227# mov rdi, r12
228# mult_16x16
229# mov rdx, r12
230# mov rsi, rbx
231# xor eax, eax
232# lea rdi, [rsp+272]
233# sub_d_34x32_woc
234# lea rdi, [rbx+128]
235# lea rdx, [rsp+272]
236# xor eax, eax
237# mov rsi, rdi
238# add_34x33
239# add rsp, 544
240# pop rbx
241# pop rbp
242# pop r12
243# pop r13
244# pop r14
245# ret
246# """
247# print(S)
248# return 0
249
250# def Print_mult_17x17():
251# S = """.macro mult_17x17
252# push r14
253# xor eax, eax
254# mov ecx, 18
255# push r13
256# lea r13, [rsi+64]
257# push r12
258# mov r12, rdx
259# mov rdx, rsi
260# push rbp
261# lea r14, [r12+64]
262# mov rbp, rsi
263# mov rsi, r13
264# push rbx
265# mov rbx, rdi
266# sub rsp, 288
267# mov rdi, rsp
268# rep stosq
269# lea rdi, [rsp+144]
270# mov ecx, 18
271# rep stosq
272# mov rdi, rsp
273# add_9x8
274# mov rdx, r12
275# mov rsi, r14
276# lea rdi, [rsp+72]
277# xor eax, eax
278# add_9x8
279# mov rdx, r12
280# mov rsi, rbp
281# mov rdi, rbx
282# xor eax, eax
283# lea rbp, [rbx+128]
284# mult_8x8
285# mov rdx, rsp
286# lea rsi, [rsp+72]
287# xor eax, eax
288# lea rdi, [rsp+144]
289# mult_9x9
290# mov rdx, r14
291# mov rsi, r13
292# mov rdi, rbp
293# xor eax, eax
294# mult_9x9
295# mov rdx, rbx
296# mov rsi, rbp
297# xor eax, eax
298# lea rdi, [rsp+144]
299# sub_d_18x18_woc
300# lea rdi, [rbx+64]
301# lea rdx, [rsp+144]
302# xor eax, eax
303# mov rsi, rdi
304# add_19x18
305# add rsp, 288
306# pop rbx
307# pop rbp
308# pop r12
309# pop r13
310# pop r14
311# .endm
312# """
313# print(S)
314# return 0
315
316# def Print_mult_16x16():
317# S = """.macro mult_16x16
318# push r14
319# xor eax, eax
320# mov ecx, 18
321# push r13
322# lea r13, [rsi+64]
323# push r12
324# mov r12, rdx
325# mov rdx, r13
326# push rbp
327# lea r14, [r12+64]
328# mov rbp, rsi
329# push rbx
330# mov rbx, rdi
331# sub rsp, 288
332# mov rdi, rsp
333# rep stosq
334# lea rdi, [rsp+144]
335# mov ecx, 18
336# rep stosq
337# mov rdi, rsp
338# add_8x8
339# mov rdx, r14
340# mov rsi, r12
341# lea rdi, [rsp+72]
342# xor eax, eax
343# add_8x8
344# mov rdx, r12
345# mov rsi, rbp
346# mov rdi, rbx
347# xor eax, eax
348# lea r12, [rbx+128]
349# mult_8x8
350# mov rdx, rsp
351# lea rsi, [rsp+72]
352# xor eax, eax
353# lea rdi, [rsp+144]
354# mult_9x9
355# mov rdx, r14
356# mov rsi, r13
357# mov rdi, r12
358# xor eax, eax
359# mult_8x8
360# mov rdx, r12
361# mov rsi, rbx
362# xor eax, eax
363# lea rdi, [rsp+144]
364# sub_d_18x16_woc
365# lea rdi, [rbx+64]
366# lea rdx, [rsp+144]
367# xor eax, eax
368# mov rsi, rdi
369# add_18x17
370# add rsp, 288
371# pop rbx
372# pop rbp
373# pop r12
374# pop r13
375# pop r14
376# .endm
377# """
378# print(S)
379# return 0
380
381# functions = {"mult_17x17":Print_mult_17x17,"mult_16x16":Print_mult_16x16,"mult_32x32":Print_mult_32x32}
382

References if.

Referenced by Print_Assembly().

Here is the caller graph for this function:

◆ PrintAdd()

PrintAdd ( plimbs)

Definition at line 674 of file AsmKaratsubaRedc.py.

674def PrintAdd(plimbs):
675
676 S = ""
677 # S = ".intel_syntax noprefix\n\n"
678 # S = S + ".section .rodata\n\n"
679 # S = S + ".section .text\n\n"
680 S = S + ".macro add_"+ str(plimbs) + "x" + str(plimbs) + "\n"
681
682 # S = S + "add_"+ str(plimbs) + "x" + str(plimbs) + ":\n"
683 S = S + "# intro\n"
684 S = S + " xor r8, r8\n"
685 S = S + " mov rax, [rsi + 0]\n add rax, [rdx + 0]\n"
686 S = S + " mov [rdi + 0], rax\n"
687 S = S + "# loop\n"
688 S = S + " .set k, 1\n"
689
690 S = S + " .rept " + str(plimbs-1) + "\n"
691
692 S = S + " mov rax, [rsi + 8*k]\n adc rax, [rdx + 8*k]\n"
693 S = S + " mov [rdi + 8*k], rax\n .set k, k+1\n .endr\n"
694 S = S + "# outro\n"
695 # S = S + " mov rax, [rdi + " + str(plimbs) + "*8]\n"
696 S = S + " adc r8, 0\n"
697 S = S + " mov [rdi + " + str(plimbs) + "*8], r8\n"
698 # S = S + " setc al\n movzx rax, al\n ret\n\n"
699 S = S + ".endm\n"
700
701 print(S)
702 return 0
703
704#
705# add function for "oddly" combinations like ADD_17x16, ADD_9x8
706#

Referenced by Print_Assembly().

Here is the caller graph for this function:

◆ PrintAddAtEnd()

PrintAddAtEnd ( plimbs)

Definition at line 741 of file AsmKaratsubaRedc.py.

741def PrintAddAtEnd(plimbs):
742
743 S = ""
744 # S = ".intel_syntax noprefix\n\n"
745 # S = S + ".section .rodata\n\n"
746 # S = S + ".section .text\n\n"
747 S = S + ".macro add_"+ str(plimbs + 1) + "x" + str(plimbs) + "\n"
748
749 # S = S + "add_"+ str(plimbs + 1) + "x" + str(plimbs) + ":\n"
750 S = S + "# intro\n"
751 # S = S + " xor r8, r8\n"
752 S = S + " mov rax, [rsi + 0]\n add rax, [rdx + 0]\n"
753 S = S + " mov [rdi + 0], rax\n"
754 S = S + "# loop\n"
755 S = S + " .set k, 1\n"
756
757 S = S + " .rept " + str(plimbs) + "\n"
758
759 S = S + " mov rax, [rsi + 8*k]\n adc rax, [rdx + 8*k]\n"
760 S = S + " mov [rdi + 8*k], rax\n .set k, k+1\n .endr\n"
761 S = S + "# outro\n"
762 for i in range(plimbs+1, plimbs+4):
763 S = S + " mov rax, [rsi + 8*" + str(i) + "]\n"
764 S = S + " adc rax, 0\n"
765 S = S + " mov [rdi + 8*" + str(i) + "], rax\n"
766 #
767 # S = S + " setc al\n movzx rax, al\n ret\n\n"
768 S = S + ".endm\n"
769
770
771 print(S)
772 return 0
773
774

Referenced by Print_Assembly().

Here is the caller graph for this function:

◆ PrintAddOdd()

PrintAddOdd ( plimbs)

Definition at line 707 of file AsmKaratsubaRedc.py.

707def PrintAddOdd(plimbs):
708
709 S = ""
710 # S = ".intel_syntax noprefix\n\n"
711 # S = S + ".section .rodata\n\n"
712 # S = S + ".section .text\n\n"
713 S = S + ".macro add_"+ str(plimbs + 1) + "x" + str(plimbs) + "\n"
714
715 # S = S + "add_"+ str(plimbs + 1) + "x" + str(plimbs) + ":\n"
716 S = S + "# intro\n"
717 S = S + " xor r8, r8\n"
718 S = S + " mov rax, [rsi + 0]\n add rax, [rdx + 0]\n"
719 S = S + " mov [rdi + 0], rax\n"
720 S = S + "# loop\n"
721 S = S + " .set k, 1\n"
722
723 S = S + " .rept " + str(plimbs-1) + "\n"
724
725 S = S + " mov rax, [rsi + 8*k]\n adc rax, [rdx + 8*k]\n"
726 S = S + " mov [rdi + 8*k], rax\n .set k, k+1\n .endr\n"
727 S = S + "# outro\n"
728 S = S + " mov rax, [rsi + 8*" + str(plimbs) + "]\n"
729 S = S + " adc rax, 0\n"
730 S = S + " mov [rdi + 8*" + str(plimbs) + "], rax\n"
731 S = S + " adc r8, 0\n"
732 S = S + " mov [rdi + 8*" + str(plimbs + 1) + "], r8\n"
733 #
734 # S = S + " setc al\n movzx rax, al\n ret\n\n"
735 S = S + ".endm\n"
736
737
738 print(S)
739 return 0
740

Referenced by Print_Assembly().

Here is the caller graph for this function:

◆ PrintAddOddAtEnd()

PrintAddOddAtEnd ( plimbs)

Definition at line 775 of file AsmKaratsubaRedc.py.

775def PrintAddOddAtEnd(plimbs):
776
777 S = ""
778 # S = ".intel_syntax noprefix\n\n"
779 # S = S + ".section .rodata\n\n"
780 # S = S + ".section .text\n\n"
781 S = S + ".macro add_"+ str(plimbs + 1) + "x" + str(plimbs) + "\n"
782
783 # S = S + "add_"+ str(plimbs + 1) + "x" + str(plimbs) + ":\n"
784 S = S + "# intro\n"
785 S = S + " mov rax, [rsi + 0]\n add rax, [rdx + 0]\n"
786 S = S + " mov [rdi + 0], rax\n"
787 S = S + "# loop\n"
788 S = S + " .set k, 1\n"
789
790 S = S + " .rept " + str(plimbs+1) + "\n"
791
792 S = S + " mov rax, [rsi + 8*k]\n adc rax, [rdx + 8*k]\n"
793 S = S + " mov [rdi + 8*k], rax\n .set k, k+1\n .endr\n"
794 S = S + "# outro\n"
795
796 # S = S + "# loop\n"
797 # S = S + " .set k, " + str(plimbs) + "\n"
798 # S = S + " .rept " + str((plimbs//2)*2) + "\n"
799
800 # S = S + " mov rax, [rsi + 8*k]\n adc rax, 0\n"
801 # S = S + " mov [rdi + 8*k], rax\n .set k, k+1\n .endr\n"
802 for i in range(plimbs+2, plimbs+6):
803 S = S + " mov rax, [rsi + 8*" + str(i) + "]\n"
804 S = S + " adc rax, 0\n"
805 S = S + " mov [rdi + 8*" + str(i) + "], rax\n"
806
807
808 # S = S + " setc al\n movzx rax, al\n ret\n\n"
809 S = S + ".endm\n"
810
811
812 print(S)
813 return 0
814

Referenced by Print_Assembly().

Here is the caller graph for this function:

◆ PrintMontRedc()

PrintMontRedc ( plimbs)

Definition at line 403 of file AsmKaratsubaRedc.py.

403def PrintMontRedc(plimbs):
404 global prime
405 S = ""
406 S = S + "push r14\n"
407 S = S + "sub rsp, 2*pbytes+8\n"
408 S = S + "mov r14, secsidh_internal_"+ str(prime) +"_p@GOTPCREL[rip]\n"
409
410 # for k in range(0,plimbs+2):
411 # S = S + "xor r11, r11\n"
412 # S = S + "mov [rsp + 8*" + str(k) + "], r11\n"
413
414 # S = S + "xor rax, rax\n"
415 # for k in range(0,3*plimbs+3):
416 # S = S + "mov [rsp + 8*" + str(k) + "], rax\n"
417
418 # for k in range(0,2*plimbs):
419 # S = S + "mov rax, [rsi + 8*" + str(k) + "]\n"
420 # S = S + "mov [rsp + 8*" + str(k) + "], rax\n"
421
422 S = S + "xor rax, rax\n"
423 S = S + "mov [rsp + 8*" + str(2*plimbs) + "], rax\n"
424
425# FIRST ITERATION ++++++++++++++++++++++++++++++++
426
427 S = S + "########################## mul\n"
428 S = S + "mov rdx, [rsi]\n"
429 S = S + "mulx rcx, rax, [r14]\n"
430 # S = S + "add r8, rax\n"
431
432 # S = S + "add rax, rdx\n"
433 S = S + "adcx rax, rdx\n"
434 S = S + "mov [rsp], rax\n\n"
435
436 for k in range(1,plimbs):
437 if(k&1):
438 # S = S + "mov r11, [rsp + 8*" + str(k) + "]\n"
439 # S = S + "xor r8, r8\n"
440 # S = S + "mov r11, 0\n"
441 S = S + "mulx rbx, rax, [r14 + 8*" + str(k) + "]\n"
442 S = S + "adcx rcx, [rsi + 8*" + str(k) + "]\n"
443 S = S + "adox rcx, rax\n"
444 S = S + "mov [rsp + 8*" + str(k) + "], rcx\n\n"
445 else:
446 # S = S + "mov r11, [rsp + 8*" + str(k) + "]\n"
447 # S = S + "xor r8, r8\n"
448 # S = S + "mov r11, 0\n"
449 S = S + "mulx rcx, rax, [r14 + 8*" + str(k) + "]\n"
450 S = S + "adcx rbx, [rsi + 8*" + str(k) + "]\n"
451 S = S + "adox rbx, rax\n"
452 S = S + "mov [rsp + 8*" + str(k) + "], rbx\n\n"
453 # S = S + "adox r11, rax\n"
454 # S = S + "adox [rsp + 8*" + str(j) + "], rax\n\n"
455
456
457 S = S + "######### carry\n"
458 # S = S + "mov r11, [rsp + 8*" + str(plimbs) + "]\n"
459 S = S + "mov r11, 0\n"
460 # S = S + "xor r8, r8\n"
461 S = S + "adox rbx, [rsi + 8*" + str(plimbs) + "]\n"
462 S = S + "adcx rbx, r11\n"
463 S = S + "mov [rsp + 8*" + str(plimbs) + "], rbx\n\n"
464
465 S = S + "mov rax, 0\n"
466 for k in range(plimbs+1,2*plimbs+1):
467 S = S + "mov r11, 0\n"
468 S = S + "adox r11, [rsi + 8*" + str(k) + "]\n"
469 S = S + "adcx r11, rax\n"
470 S = S + "mov [rsp + 8*" + str(k) + "], r11\n\n"
471
472# LOOP ++++++++++++++++++++++++++++++++
473
474 for j in range(1,plimbs-1):
475
476 # S = S + "mov r11, [rsp]\n"
477
478 S = S + "########################## mul\n"
479 S = S + "xor rax, rax\n"
480 S = S + "mov rdx, [rsp + 8*" + str(j) + "]\n"
481 S = S + "mulx rcx, rax, [r14]\n"
482 # S = S + "add r8, rax\n"
483
484 # S = S + "add rax, rdx\n"
485 S = S + "adcx rax, rdx\n"
486 S = S + "mov [rsp + 8*" + str(j) + "], rax\n\n"
487
488 for k in range(1,plimbs):
489 if(k&1):
490 # S = S + "mov r11, [rsp + 8*" + str(k) + "]\n"
491 # S = S + "xor r8, r8\n"
492 # S = S + "mov r11, 0\n"
493 S = S + "mulx rbx, rax, [r14 + 8*" + str(k) + "]\n"
494 S = S + "adcx rcx, [rsp + 8*" + str(j+k) + "]\n"
495 S = S + "adox rcx, rax\n"
496 S = S + "mov [rsp + 8*" + str(j+k) + "], rcx\n\n"
497 else:
498 # S = S + "mov r11, [rsp + 8*" + str(k) + "]\n"
499 # S = S + "xor r8, r8\n"
500 # S = S + "mov r11, 0\n"
501 S = S + "mulx rcx, rax, [r14 + 8*" + str(k) + "]\n"
502 S = S + "adcx rbx, [rsp + 8*" + str(j+k) + "]\n"
503 S = S + "adox rbx, rax\n"
504 S = S + "mov [rsp + 8*" + str(j+k) + "], rbx\n\n"
505 # S = S + "adox r11, rax\n"
506 # S = S + "adox [rsp + 8*" + str(j) + "], rax\n\n"
507
508
509 S = S + "######### carry\n"
510 # S = S + "mov r11, [rsp + 8*" + str(plimbs) + "]\n"
511 S = S + "mov r11, 0\n"
512 # S = S + "xor r8, r8\n"
513 S = S + "adox rbx, [rsp + 8*" + str(plimbs+j) + "]\n"
514 S = S + "adcx rbx, r11\n"
515 S = S + "mov [rsp + 8*" + str(plimbs+j) + "], rbx\n\n"
516
517 S = S + "mov rax, 0\n"
518 for k in range(plimbs+1+j,2*plimbs+1):
519 S = S + "mov r11, 0\n"
520 S = S + "adox r11, [rsp + 8*" + str(k) + "]\n"
521 S = S + "adcx r11, rax\n"
522 S = S + "mov [rsp + 8*" + str(k) + "], r11\n\n"
523
524# LAST ITERATION ++++++++++++++++++++++++++++++++
525
526 j = plimbs-1
527
528 S = S + "########################## mul\n"
529 S = S + "xor rax, rax\n"
530 S = S + "mov rdx, [rsp + 8*" + str(j) + "]\n"
531 S = S + "mulx rcx, rax, [r14]\n"
532 # S = S + "add r8, rax\n"
533
534 # S = S + "add rax, rdx\n"
535 S = S + "adcx rax, rdx\n"
536 S = S + "mov [rsp + 8*" + str(j) + "], rax\n\n"
537
538 for k in range(1,plimbs):
539 if(k&1):
540 # S = S + "mov r11, [rsp + 8*" + str(k) + "]\n"
541 # S = S + "xor r8, r8\n"
542 # S = S + "mov r11, 0\n"
543 S = S + "mulx rbx, rax, [r14 + 8*" + str(k) + "]\n"
544 S = S + "adcx rcx, [rsp + 8*" + str(j+k) + "]\n"
545 S = S + "adox rcx, rax\n"
546 S = S + "mov [rdi + 8*" + str(k-1) + "], rcx\n\n"
547 else:
548 # S = S + "mov r11, [rsp + 8*" + str(k) + "]\n"
549 # S = S + "xor r8, r8\n"
550 # S = S + "mov r11, 0\n"
551 S = S + "mulx rcx, rax, [r14 + 8*" + str(k) + "]\n"
552 S = S + "adcx rbx, [rsp + 8*" + str(j+k) + "]\n"
553 S = S + "adox rbx, rax\n"
554 S = S + "mov [rdi + 8*" + str(k-1) + "], rbx\n\n"
555 # S = S + "adox r11, rax\n"
556 # S = S + "adox [rsp + 8*" + str(j) + "], rax\n\n"
557
558 S = S + "mov r11, 0\n"
559 # # S = S + "xor r8, r8\n"
560 S = S + "adox rbx, [rsp + 8*" + str(plimbs+j) + "]\n"
561 S = S + "adcx rbx, r11\n"
562 S = S + "mov [rdi + 8*" + str(plimbs-1) + "], rbx\n\n"
563
564 # S = S + "######### carry\n"
565 # # S = S + "mov r11, [rsp + 8*" + str(plimbs) + "]\n"
566 # S = S + "mov r11, 0\n"
567 # # S = S + "xor r8, r8\n"
568 # S = S + "adox rbx, [rsp + 8*" + str(plimbs+j) + "]\n"
569 # S = S + "adcx rbx, r11\n"
570 # S = S + "mov [rsp + 8*" + str(plimbs+j) + "], rbx\n\n"
571
572 # S = S + "mov rax, 0\n"
573 # for k in range(plimbs+1+j,2*plimbs+1):
574 # S = S + "mov r11, 0\n"
575 # S = S + "adox r11, [rsp + 8*" + str(k) + "]\n"
576 # S = S + "adcx r11, rax\n"
577 # S = S + "mov [rsp + 8*" + str(k) + "], r11\n\n"
578
579
580 # S = S + "########################## writing back\n"
581 # for k in range(0,plimbs):
582 # S = S + "mov r11, [rsp + 8*" + str(plimbs+k) + "]\n"
583 # # S = S + "mov r11, [rsp + 8*" + str(k) + "]\n"
584 # S = S + "mov [rdi + 8*" + str(k) + "], r11\n"
585
586 S = S + "########################## done\n"
587 S = S + "add rsp, 2*pbytes+8\n"
588 S = S + "pop r14\n"
589
590 S = S + "jmp .reduce_once\n"
591
592 return S
593
594
595

References if.

Referenced by main().

Here is the caller graph for this function:

◆ PrintMult()

PrintMult ( plimbs)

Definition at line 596 of file AsmKaratsubaRedc.py.

596def PrintMult(plimbs):
597
598
599 # registers reserved rdi, rsi, rdx
600 # rax, rbx = rcx, r8
601 registers = ["rcx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"]
602 if(plimbs > len(registers)):
603 print("ERROR: Index out range")
604 exit()
605 state = registers[:plimbs]
606 #state = registers
607
608 S = ""
609 # S = ".intel_syntax noprefix\n\n"
610 # S = S + ".section .rodata\n\n"
611 # S = S + ".section .text\n\n"
612
613 S = S + ".macro mult_"+ str(plimbs) + "x" + str(plimbs) + "\n"
614 # S = S + "mult_"+ str(plimbs) + "x" + str(plimbs) + ":\n"
615
616 S = S + push()
617
618 S = S + "# intro \n"
619
620
621 S = S + " mov rbp, rdx\n"
622
623 S = S + " mov rdx, [rbp]\n"
624 S = S + " mulx " + state[len(state)-1] + ", rcx, [rsi + 0*8]\n"
625 S = S + " mov [rdi + 0*8], rcx\n"
626 for j in range(1,plimbs):
627 if(j == plimbs-1):
628 S = S + " mulx " + state[len(state)-1-(1*j)] + ", rax, [rsi + " + str(j) + "*8]\n"
629 S = S + " adc " + state[len(state)-(1*j)] + ", rax\n"
630 S = S + " adc " + state[len(state)-1-(1*j)] + ", 0\n"
631 elif(j == 1):
632 S = S + " mulx " + state[len(state)-1-(1*j)] + ", rax, [rsi + " + str(j) + "*8]\n"
633 S = S + " add " + state[len(state)-(1*j)] + ", rax\n"
634 else:
635 S = S + " mulx " + state[len(state)-1-(1*j)] + ", rax, [rsi + " + str(j) + "*8]\n"
636 S = S + " adc " + state[len(state)-(1*j)] + ", rax\n"
637
638
639
640 for i in range(1,plimbs):
641 S = S + "# loop i = " + str(i) + "\n"
642 S = S + " mov rdx, [rbp + " + str(i) + "*8]\n"
643
644 for j in range(0,plimbs):
645 S = S + " mulx rbx, rax, [rsi + " + str(j) + "*8]\n"
646 if(j == 0):
647 S = S + " adcx " + state[len(state)-1] + ", rax\n"
648 S = S + " adox " + state[len(state)-2] + ", rbx\n"
649 S = S + " mov [rdi + " + str(i) + "*8], " + state[len(state)-1] + "\n"
650 #S = S + " sub " + state[len(state)-1] + ", " + state[len(state)-1] + "\n"
651 S = S + " mov " + state[len(state)-1] + ", 0\n"
652 elif(j == plimbs-1):
653 S = S + " adcx " + state[len(state)-2-(1*(j-1))] + ", rax\n"
654 S = S + " adox " + state[len(state)-1] + ", rbx\n"
655 S = S + " adc " + state[len(state)-1] + ", 0\n"
656 else:
657 S = S + " adcx " + state[len(state)-2-(1*(j-1))] + ", rax\n"
658 S = S + " adox " + state[len(state)-2-(1*j)] + ", rbx\n"
659 state = rotate(state, 1)
660
661 S = S + "# outro\n"
662 for j in range(0,plimbs):
663 S = S + " mov [rdi + " + str(plimbs + j) + "*8], " + state[len(state)-1-(1*j)] + "\n"
664
665 S = S + pop()
666
667 S = S + ".endm\n"
668
669 print(S)
670
671
672 return 0
673

References if, pop(), push(), and rotate().

Referenced by Print_Assembly().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ PrintSub()

PrintSub ( plimbs)

Definition at line 941 of file AsmKaratsubaRedc.py.

941def PrintSub(plimbs):
942
943 registers = ["rax", "rbx", "rcx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"]
944
945 S = ""
946 # S = ".intel_syntax noprefix\n\n"
947 # S = S + ".section .rodata\n\n"
948 # S = S + ".section .text\n\n"
949 # S = S + ".macro sub_d_"+ str(plimbs + 2) + "x" + str(plimbs) + "_woc\n"
950 # # S = S + "sub_d_"+ str(plimbs + 2) + "x" + str(plimbs) + "_woc:\n"
951
952 # S = S + push()
953
954 plimbs += 1
955 limbs_counter = plimbs
956
957 S = ".macro _sub_d_"+ str(plimbs + 2) + "x" + str(plimbs) + "_woc\n"
958
959 S = S + "# intro\n"
960 S = S + " mov rax, [rdi]\n"
961 S = S + " sub rax, [rsi]\n"
962 S = S + " mov [rdi], rax\n"
963
964
965 S = S + "# loop\n"
966 for i in range(1, plimbs-2):
967 S = S + " mov rax, [rdi + " + str(i) + "*8]\n"
968 S = S + " sbb rax, [rsi + " + str(i) + "*8]\n"
969 S = S + " mov [rdi + " + str(i) + "*8], rax\n"
970
971 S = S + "# outro\n"
972 S = S + " mov rax, [rdi + " + str(plimbs-2) + "*8]\n"
973 S = S + " sbb rax, 0\n"
974 S = S + " mov [rdi + " + str(plimbs-2) + "*8], rax\n"
975 S = S + " mov rax, [rdi + " + str(plimbs-1) + "*8]\n"
976 S = S + " sbb rax, 0\n"
977 S = S + " mov [rdi + " + str(plimbs-1) + "*8], rax\n"
978 S = S + " mov rax, [rdi + " + str(plimbs) + "*8]\n"
979 S = S + " sbb rax, 0\n"
980 S = S + " mov [rdi + " + str(plimbs) + "*8], rax\n"
981 S = S + " mov rax, [rdi + " + str(plimbs+1) + "*8]\n"
982 S = S + " sbb rax, 0\n"
983 S = S + " mov [rdi + " + str(plimbs+1) + "*8], rax\n"
984
985 S = S + "# 2nd subtraction ---------\n"
986
987
988 S = S + "# intro\n"
989 S = S + " mov rax, [rdi]\n"
990 S = S + " sub rax, [rdx]\n"
991 S = S + " mov [rdi], rax\n"
992
993
994 S = S + "# loop\n"
995 for i in range(1, plimbs):
996 S = S + " mov rax, [rdi + " + str(i) + "*8]\n"
997 S = S + " sbb rax, [rdx + " + str(i) + "*8]\n"
998 S = S + " mov [rdi + " + str(i) + "*8], rax\n"
999
1000 S = S + "# outro\n"
1001 S = S + " mov rax, [rdi + " + str(plimbs) + "*8]\n"
1002 S = S + " sbb rax, 0\n"
1003 S = S + " mov [rdi + " + str(plimbs) + "*8], rax\n"
1004
1005 S = S + " mov rax, [rdi + " + str(plimbs + 1) + "*8]\n"
1006 S = S + " sbb rax, 0\n"
1007 S = S + " mov [rdi + " + str(plimbs + 1) + "*8], rax\n"
1008
1009
1010 # reg_counter = 0
1011 # S = S + "# intro\n"
1012 # while(limbs_counter > 0) and reg_counter < len(registers):
1013 # S = S + " mov " + registers[reg_counter] + ", [rdi + " + str(plimbs - limbs_counter) + "*8]\n"
1014 # limbs_counter -= 1
1015 # reg_counter += 1
1016
1017 # S = S + "# loop\n"
1018 # for i in range(0,math.ceil(plimbs/(len(registers)-1))):
1019 # if i > 0:
1020 # reg_counter = 0
1021 # # S = S + "# ------------------\n"
1022 # while(limbs_counter > 0) and reg_counter < len(registers)-1:
1023 # S = S + " mov " + registers[reg_counter] + ", [rdi + " + str(plimbs - limbs_counter) + "*8]\n"
1024 # limbs_counter -= 1
1025 # reg_counter += 1
1026 # registers = rotate(registers, 1)
1027 # else:
1028 # reg_counter -= 1
1029 # # S = S + "# ------------------\n"
1030 # if i == math.ceil(plimbs/(len(registers)-1)) - 1:
1031 # for j in range(i, reg_counter):
1032 # if(j == 0) and (i == 0):
1033 # S = S + " sub " + registers[j] + ", [rsi + " + str(j + (i*len(registers))) + "*8]\n"
1034 # else:
1035 # S = S + " sbb " + registers[j] + ", [rsi + " + str(j + (i*(len(registers)-1))) + "*8]\n"
1036 # S = S + "# ------------------\n"
1037 # for k in range(0, reg_counter):
1038 # if(k < (plimbs - limbs_counter)):
1039 # S = S + " mov [rdi + " + str(k + (i*(len(registers)-1))) + "*8], " + registers[k] + "\n"
1040 # else:
1041 # for j in range(i, reg_counter + 1):
1042 # if(j == 0) and (i == 0):
1043 # S = S + " sub " + registers[j] + ", [rsi + " + str(j + (i*len(registers))) + "*8]\n"
1044 # else:
1045 # S = S + " sbb " + registers[j] + ", [rsi + " + str(j + (i*(len(registers)-1))) + "*8]\n"
1046 # S = S + "# ------------------\n"
1047 # for k in range(0, reg_counter + 1):
1048 # if(k < (plimbs - limbs_counter)):
1049 # S = S + " mov [rdi + " + str(k + (i*(len(registers)-1))) + "*8], " + registers[k] + "\n"
1050
1051 # # outro
1052 # S = S + "# outro\n"
1053 # S = S + " sbb " + registers[reg_counter] + ", [rsi + " + str(reg_counter + (i*(len(registers)-1))) + "*8]\n"
1054 # S = S + " mov [rdi + " + str(reg_counter + (i*(len(registers)-1))) + "*8], " + registers[reg_counter] + "\n"
1055
1056 # generate the 2nd subtraction by replacing rsi with rdx
1057 # S2 = S.replace("rsi", "rdx" )
1058
1059
1060
1061 S = S + ".endm\n"
1062
1063 print(S)
1064 return 0
1065
1066#////////////////////////////////////////

Referenced by Print_Assembly().

Here is the caller graph for this function:

◆ PrintSubOdd()

PrintSubOdd ( plimbs)

Definition at line 815 of file AsmKaratsubaRedc.py.

815def PrintSubOdd(plimbs):
816
817
818 registers = ["rax", "rbx", "rcx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"]
819
820 S = ""
821 # S = ".intel_syntax noprefix\n\n"
822 # S = S + ".section .rodata\n\n"
823 # S = S + ".section .text\n\n"
824 # S = S + ".macro sub_d_"+ str(plimbs + 2) + "x" + str(plimbs) + "_woc\n"
825 # # S = S + "sub_d_"+ str(plimbs + 2) + "x" + str(plimbs) + "_woc:\n"
826
827 # S = S + push()
828
829 plimbs += 1
830 limbs_counter = plimbs
831
832 S = S + "# odd intro\n"
833 S = S + " mov rax, [rdi]\n"
834 S = S + " sub rax, [rsi]\n"
835 S = S + " mov [rdi], rax\n"
836
837
838 S = S + "# loop\n"
839 for i in range(1, plimbs - 1):
840 S = S + " mov rax, [rdi + " + str(i) + "*8]\n"
841 S = S + " sbb rax, [rsi + " + str(i) + "*8]\n"
842 S = S + " mov [rdi + " + str(i) + "*8], rax\n"
843
844 # outro
845 S = S + "# outro\n"
846 S = S + " mov rax, [rdi + " + str(plimbs - 1) + "*8]\n"
847 S = S + " sbb rax, 0\n"
848 S = S + " mov [rdi + " + str(plimbs - 1) + "*8], rax\n"
849 S = S + " mov rax, [rdi + " + str(plimbs) + "*8]\n"
850 S = S + " sbb rax, 0\n"
851 S = S + " mov [rdi + " + str(plimbs) + "*8], rax\n"
852
853 S = S + "# 2nd subtraction ---------\n"
854
855 S = S + "# odd intro\n"
856 S = S + " mov rax, [rdi]\n"
857 S = S + " sub rax, [rdx]\n"
858 S = S + " mov [rdi], rax\n"
859
860
861 S = S + "# loop\n"
862 for i in range(1, plimbs - 1):
863 S = S + " mov rax, [rdi + " + str(i) + "*8]\n"
864 S = S + " sbb rax, [rdx + " + str(i) + "*8]\n"
865 S = S + " mov [rdi + " + str(i) + "*8], rax\n"
866
867 # outro
868 S = S + "# outro\n"
869 S = S + " mov rax, [rdi + " + str(plimbs - 1) + "*8]\n"
870 S = S + " sbb rax, 0\n"
871 S = S + " mov [rdi + " + str(plimbs - 1) + "*8], rax\n"
872 S = S + " mov rax, [rdi + " + str(plimbs) + "*8]\n"
873 S = S + " sbb rax, 0\n"
874 S = S + " mov [rdi + " + str(plimbs) + "*8], rax\n"
875
876
877 # reg_counter = 0
878 # S = S + "# intro\n"
879 # while(limbs_counter > 0) and reg_counter < len(registers):
880 # S = S + " mov " + registers[reg_counter] + ", [rdi + " + str(plimbs - limbs_counter) + "*8]\n"
881 # limbs_counter -= 1
882 # reg_counter += 1
883
884 # S = S + "# loop\n"
885
886
887
888
889 # for i in range(0,math.ceil(plimbs/(len(registers)-1))):
890 # if i > 0:
891 # reg_counter = 0
892 # # S = S + "# ------------------\n"
893 # while(limbs_counter > 0) and reg_counter < len(registers)-1:
894 # S = S + " mov " + registers[reg_counter] + ", [rdi + " + str(plimbs - limbs_counter) + "*8]\n"
895 # limbs_counter -= 1
896 # reg_counter += 1
897 # registers = rotate(registers, 1)
898 # else:
899 # reg_counter -= 1
900 # # S = S + "# ------------------\n"
901 # if i == math.ceil(plimbs/(len(registers)-1)) - 1:
902 # S = S + "#---------------- i == math.ceil(plimbs/(len(registers)-1)) - 1\n"
903 # for j in range(i, reg_counter):
904 # if(j == 0) and (i == 0):
905 # S = S + " sub " + registers[j] + ", [rsi + " + str(j + (i*len(registers))) + "*8]\n"
906 # else:
907 # S = S + " sbb " + registers[j] + ", [rsi + " + str(j + (i*(len(registers)-1))) + "*8]\n"
908 # S = S + "# ------------------\n"
909 # for k in range(0, reg_counter):
910 # if(k < (plimbs - limbs_counter)):
911 # S = S + " mov [rdi + " + str(k + (i*(len(registers)-1))) + "*8], " + registers[k] + "\n"
912 # else:
913 # for j in range(i, reg_counter + 1):
914 # if(j == 0) and (i == 0):
915 # S = S + " sub " + registers[j] + ", [rsi + " + str(j + (i*len(registers))) + "*8]\n"
916 # else:
917 # S = S + " sbb " + registers[j] + ", [rsi + " + str(j + (i*(len(registers)-1))) + "*8]\n"
918 # S = S + "# ------------------\n"
919 # for k in range(0, reg_counter + 1):
920 # if(k < (plimbs - limbs_counter)):
921 # S = S + " mov [rdi + " + str(k + (i*(len(registers)-1))) + "*8], " + registers[k] + "\n"
922
923 # # outro
924 # S = S + "# outro\n"
925 # S = S + " sbb " + registers[reg_counter] + ", 0\n"
926 # S = S + " mov [rdi + " + str(reg_counter + (i*(len(registers)-1))) + "*8], " + registers[reg_counter] + "\n"
927
928 # generate the 2nd subtraction by replacing rsi with rdx
929 # S2 = S.replace("rsi", "rdx" )
930
931 S = ".macro sub_d_"+ str(plimbs + 1) + "x" + str(plimbs - 1) + "_woc\n" + S
932 # S = S + pop()
933
934 S = S + ".endm\n"
935
936
937
938 print(S)
939 return 0
940

Referenced by Print_Assembly().

Here is the caller graph for this function:

◆ push()

push ( )

Definition at line 386 of file AsmKaratsubaRedc.py.

386def push():
387 # S = "# -------------------\n"
388 S = "# push\n"
389 # S = S + " push rbx\n push rbp\n push rdi\n push rsi\n push r12\n push r13\n push r14\n push r15\n\n"
390 S = S + " push rbx\n push rbp\n push rsi\n push r12\n push r13\n push r14\n push r15\n\n"
391 # S = S + " push rdx\n push rdi\n push rsi\n\n"
392 return S
393

Referenced by PrintMult().

Here is the caller graph for this function:

◆ rotate()

rotate ( l,
x )

Definition at line 383 of file AsmKaratsubaRedc.py.

383def rotate(l, x):
384 return l[-x:] + l[:-x]
385

Referenced by PrintMult().

Here is the caller graph for this function:

Variable Documentation

◆ prime

int prime = 0

Definition at line 12 of file AsmKaratsubaRedc.py.

◆ printOut

int printOut = 0

Definition at line 11 of file AsmKaratsubaRedc.py.

◆ sefOfLimbs

sefOfLimbs = set()

Definition at line 10 of file AsmKaratsubaRedc.py.