18 S =
".macro mult_" + str(plimbs) +
"x" + str(plimbs) +
"\n"
27 S = S +
"lea r13, [rsi+" + str(4*plimbs) +
"]\n"
29 S = S +
"mov r12, rdx\n"
30 S = S +
"mov rdx, r13\n"
33 S = S +
"lea r14, [r12+" + str(4*plimbs) +
"]\n"
34 S = S +
"mov rbp, rsi\n"
36 S = S +
"mov rbx, rdi\n"
38 S = S +
"sub rsp, " + str(((plimbs//2)+1)*32) +
"\n"
46 S = S +
"mov rdi, rsp\n"
48 S = S +
"add_" + str(plimbs//2) +
"x" + str(plimbs//2) +
"\n"
49 S = S +
"mov rdx, r14\n"
50 S = S +
"mov rsi, r12\n"
53 S = S +
"lea rdi, [rsp+" + str((((plimbs//2)+1)*32)//4) +
"]\n"
55 S = S +
"add_" + str(plimbs//2) +
"x" + str(plimbs//2) +
"\n"
56 S = S +
"mov rdx, r12\n"
57 S = S +
"mov rsi, rbp\n"
58 S = S +
"mov rdi, rbx\n"
60 S = S +
"mult_" + str(plimbs//2) +
"x" + str(plimbs//2) +
"\n"
62 S = S +
"lea r12, [rbx+" + str(plimbs*8) +
"]\n"
63 S = S +
"mov rdx, rsp\n"
65 S = S +
"lea rsi, [rsp+" + str((((plimbs//2)+1)*32)//4) +
"]\n"
67 S = S +
"lea rdi, [rsp+" + str((((plimbs//2)+1)*32)//2) +
"]\n"
69 S = S +
"mult_" + str(plimbs//2 + 1) +
"x" + str(plimbs//2 + 1) +
"\n"
70 S = S +
"mov rdx, r14\n"
71 S = S +
"mov rsi, r13\n"
72 S = S +
"mov rdi, r12\n"
74 S = S +
"mult_" + str(plimbs//2) +
"x" + str(plimbs//2) +
"\n"
75 S = S +
"mov rdx, r12\n"
76 S = S +
"mov rsi, rbx\n"
79 S = S +
"lea rdi, [rsp+" + str((((plimbs//2)+1)*32)//2) +
"]\n"
81 S = S +
"sub_d_" + str(plimbs + 2) +
"x" + str(plimbs) +
"_woc\n"
83 S = S +
"lea rdi, [rbx+" + str(4*plimbs) +
"]\n"
85 S = S +
"lea rdx, [rsp+" + str((((plimbs//2)+1)*32)//2) +
"]\n"
87 S = S +
"mov rsi, rdi\n"
89 S = S +
"add_" + str(plimbs + 2) +
"x" + str(plimbs + 1) +
"\n"
91 S = S +
"add rsp, " + str(((plimbs//2)+1)*32) +
"\n"
100 S =
".macro mult_" + str(plimbs) +
"x" + str(plimbs) +
"\n"
109 S = S +
"lea r13, [rsi+" + str(4*(plimbs-1)) +
"]\n"
111 S = S +
"mov r12, rdx\n"
112 S = S +
"mov rdx, rsi\n"
115 S = S +
"lea r14, [r12+" + str(4*(plimbs-1)) +
"]\n"
116 S = S +
"mov rbp, rsi\n"
117 S = S +
"mov rsi, r13\n"
119 S = S +
"mov rbx, rdi\n"
121 S = S +
"sub rsp, " + str(((plimbs//2)+2)*32) +
"\n"
129 S = S +
"mov rdi, rsp\n"
131 S = S +
"add_" + str(plimbs//2 + 1) +
"x" + str(plimbs//2) +
"\n"
132 S = S +
"mov rdx, r12\n"
133 S = S +
"mov rsi, r14\n"
135 S = S +
"lea rdi, [rsp+" + str((((plimbs//2)+2)*32)//4) +
"]\n"
138 S = S +
"add_" + str(plimbs//2 + 1) +
"x" + str(plimbs//2) +
"\n"
139 S = S +
"mov rdx, r12\n"
140 S = S +
"mov rsi, rbp\n"
141 S = S +
"mov rdi, rbx\n"
144 S = S +
"lea rbp, [rbx+" + str((plimbs -1) *8) +
"]\n"
146 S = S +
"mult_" + str(plimbs//2) +
"x" + str(plimbs//2) +
"\n"
147 S = S +
"mov rdx, rsp\n"
149 S = S +
"lea rsi, [rsp+" + str((((plimbs//2)+2)*32)//4) +
"]\n"
152 S = S +
"lea rdi, [rsp+" + str((((plimbs//2)+2)*32)//2) +
"]\n"
154 S = S +
"mult_" + str(plimbs//2 + 2) +
"x" + str(plimbs//2 + 2) +
"\n"
155 S = S +
"mov rdx, r14\n"
156 S = S +
"mov rsi, r13\n"
157 S = S +
"mov rdi, rbp\n"
160 S = S +
"mult_" + str(plimbs//2 + 1) +
"x" + str(plimbs//2 + 1) +
"\n"
161 S = S +
"mov rsi, rbx\n"
162 S = S +
"mov rdx, rbp\n"
165 S = S +
"lea rdi, [rsp+" + str((((plimbs//2)+2)*32)//2) +
"]\n"
167 S = S +
"_sub_d_" + str(plimbs + 3) +
"x" + str(plimbs + 1) +
"_woc\n"
169 S = S +
"lea rdi, [rbx+" + str(4*(plimbs-1)) +
"]\n"
171 S = S +
"lea rdx, [rsp+" + str((((plimbs//2)+2)*32)//2) +
"]\n"
173 S = S +
"mov rsi, rdi\n"
175 S = S +
"add_" + str(plimbs + 2) +
"x" + str(plimbs + 1) +
"\n"
178 S = S +
"add rsp, " + str(((plimbs//2)+2)*32) +
"\n"
373 registers = [
"rcx",
"r8",
"r9",
"r10",
"r11",
"r12",
"r13",
"r14",
"r15"]
374 if(plimbs > len(registers)):
375 print(
"ERROR: Index out range")
377 state = registers[:plimbs]
385 S = S +
".macro mult_"+ str(plimbs) +
"x" + str(plimbs) +
"\n"
393 S = S +
" mov rbp, rdx\n"
395 S = S +
" mov rdx, [rbp]\n"
396 S = S +
" mulx " + state[len(state)-1] +
", rcx, [rsi + 0*8]\n"
397 S = S +
" mov [rdi + 0*8], rcx\n"
398 for j
in range(1,plimbs):
400 S = S +
" mulx " + state[len(state)-1-(1*j)] +
", rax, [rsi + " + str(j) +
"*8]\n"
401 S = S +
" adc " + state[len(state)-(1*j)] +
", rax\n"
402 S = S +
" adc " + state[len(state)-1-(1*j)] +
", 0\n"
404 S = S +
" mulx " + state[len(state)-1-(1*j)] +
", rax, [rsi + " + str(j) +
"*8]\n"
405 S = S +
" add " + state[len(state)-(1*j)] +
", rax\n"
407 S = S +
" mulx " + state[len(state)-1-(1*j)] +
", rax, [rsi + " + str(j) +
"*8]\n"
408 S = S +
" adc " + state[len(state)-(1*j)] +
", rax\n"
412 for i
in range(1,plimbs):
413 S = S +
"# loop i = " + str(i) +
"\n"
414 S = S +
" mov rdx, [rbp + " + str(i) +
"*8]\n"
416 for j
in range(0,plimbs):
417 S = S +
" mulx rbx, rax, [rsi + " + str(j) +
"*8]\n"
419 S = S +
" adcx " + state[len(state)-1] +
", rax\n"
420 S = S +
" adox " + state[len(state)-2] +
", rbx\n"
421 S = S +
" mov [rdi + " + str(i) +
"*8], " + state[len(state)-1] +
"\n"
423 S = S +
" mov " + state[len(state)-1] +
", 0\n"
425 S = S +
" adcx " + state[len(state)-2-(1*(j-1))] +
", rax\n"
426 S = S +
" adox " + state[len(state)-1] +
", rbx\n"
427 S = S +
" adc " + state[len(state)-1] +
", 0\n"
429 S = S +
" adcx " + state[len(state)-2-(1*(j-1))] +
", rax\n"
430 S = S +
" adox " + state[len(state)-2-(1*j)] +
", rbx\n"
434 for j
in range(0,plimbs):
435 S = S +
" mov [rdi + " + str(plimbs + j) +
"*8], " + state[len(state)-1-(1*j)] +
"\n"