17 S =
".macro mult_" + str(plimbs) +
"x" + str(plimbs) +
"\n"
26 S = S +
"lea r13, [rsi+" + str(4*plimbs) +
"]\n"
28 S = S +
"mov r12, rdx\n"
29 S = S +
"mov rdx, r13\n"
32 S = S +
"lea r14, [r12+" + str(4*plimbs) +
"]\n"
33 S = S +
"mov rbp, rsi\n"
35 S = S +
"mov rbx, rdi\n"
37 S = S +
"sub rsp, " + str(((plimbs//2)+1)*32) +
"\n"
45 S = S +
"mov rdi, rsp\n"
47 S = S +
"add_" + str(plimbs//2) +
"x" + str(plimbs//2) +
"\n"
48 S = S +
"mov rdx, r14\n"
49 S = S +
"mov rsi, r12\n"
52 S = S +
"lea rdi, [rsp+" + str((((plimbs//2)+1)*32)//4) +
"]\n"
54 S = S +
"add_" + str(plimbs//2) +
"x" + str(plimbs//2) +
"\n"
55 S = S +
"mov rdx, r12\n"
56 S = S +
"mov rsi, rbp\n"
57 S = S +
"mov rdi, rbx\n"
59 S = S +
"mult_" + str(plimbs//2) +
"x" + str(plimbs//2) +
"\n"
61 S = S +
"lea r12, [rbx+" + str(plimbs*8) +
"]\n"
62 S = S +
"mov rdx, rsp\n"
64 S = S +
"lea rsi, [rsp+" + str((((plimbs//2)+1)*32)//4) +
"]\n"
66 S = S +
"lea rdi, [rsp+" + str((((plimbs//2)+1)*32)//2) +
"]\n"
68 S = S +
"mult_" + str(plimbs//2 + 1) +
"x" + str(plimbs//2 + 1) +
"\n"
69 S = S +
"mov rdx, r14\n"
70 S = S +
"mov rsi, r13\n"
71 S = S +
"mov rdi, r12\n"
73 S = S +
"mult_" + str(plimbs//2) +
"x" + str(plimbs//2) +
"\n"
74 S = S +
"mov rdx, r12\n"
75 S = S +
"mov rsi, rbx\n"
78 S = S +
"lea rdi, [rsp+" + str((((plimbs//2)+1)*32)//2) +
"]\n"
80 S = S +
"sub_d_" + str(plimbs + 2) +
"x" + str(plimbs) +
"_woc\n"
82 S = S +
"lea rdi, [rbx+" + str(4*plimbs) +
"]\n"
84 S = S +
"lea rdx, [rsp+" + str((((plimbs//2)+1)*32)//2) +
"]\n"
86 S = S +
"mov rsi, rdi\n"
88 S = S +
"add_" + str(plimbs + 2) +
"x" + str(plimbs + 1) +
"\n"
90 S = S +
"add rsp, " + str(((plimbs//2)+1)*32) +
"\n"
99 S =
".macro mult_" + str(plimbs) +
"x" + str(plimbs) +
"\n"
108 S = S +
"lea r13, [rsi+" + str(4*(plimbs-1)) +
"]\n"
110 S = S +
"mov r12, rdx\n"
111 S = S +
"mov rdx, rsi\n"
114 S = S +
"lea r14, [r12+" + str(4*(plimbs-1)) +
"]\n"
115 S = S +
"mov rbp, rsi\n"
116 S = S +
"mov rsi, r13\n"
118 S = S +
"mov rbx, rdi\n"
120 S = S +
"sub rsp, " + str(((plimbs//2)+2)*32) +
"\n"
128 S = S +
"mov rdi, rsp\n"
130 S = S +
"add_" + str(plimbs//2 + 1) +
"x" + str(plimbs//2) +
"\n"
131 S = S +
"mov rdx, r12\n"
132 S = S +
"mov rsi, r14\n"
134 S = S +
"lea rdi, [rsp+" + str((((plimbs//2)+2)*32)//4) +
"]\n"
137 S = S +
"add_" + str(plimbs//2 + 1) +
"x" + str(plimbs//2) +
"\n"
138 S = S +
"mov rdx, r12\n"
139 S = S +
"mov rsi, rbp\n"
140 S = S +
"mov rdi, rbx\n"
143 S = S +
"lea rbp, [rbx+" + str((plimbs -1) *8) +
"]\n"
145 S = S +
"mult_" + str(plimbs//2) +
"x" + str(plimbs//2) +
"\n"
146 S = S +
"mov rdx, rsp\n"
148 S = S +
"lea rsi, [rsp+" + str((((plimbs//2)+2)*32)//4) +
"]\n"
151 S = S +
"lea rdi, [rsp+" + str((((plimbs//2)+2)*32)//2) +
"]\n"
153 S = S +
"mult_" + str(plimbs//2 + 2) +
"x" + str(plimbs//2 + 2) +
"\n"
154 S = S +
"mov rdx, r14\n"
155 S = S +
"mov rsi, r13\n"
156 S = S +
"mov rdi, rbp\n"
159 S = S +
"mult_" + str(plimbs//2 + 1) +
"x" + str(plimbs//2 + 1) +
"\n"
160 S = S +
"mov rsi, rbx\n"
161 S = S +
"mov rdx, rbp\n"
164 S = S +
"lea rdi, [rsp+" + str((((plimbs//2)+2)*32)//2) +
"]\n"
166 S = S +
"_sub_d_" + str(plimbs + 3) +
"x" + str(plimbs + 1) +
"_woc\n"
168 S = S +
"lea rdi, [rbx+" + str(4*(plimbs-1)) +
"]\n"
170 S = S +
"lea rdx, [rsp+" + str((((plimbs//2)+2)*32)//2) +
"]\n"
172 S = S +
"mov rsi, rdi\n"
174 S = S +
"add_" + str(plimbs + 2) +
"x" + str(plimbs + 1) +
"\n"
177 S = S +
"add rsp, " + str(((plimbs//2)+2)*32) +
"\n"
601 registers = [
"rcx",
"r8",
"r9",
"r10",
"r11",
"r12",
"r13",
"r14",
"r15"]
602 if(plimbs > len(registers)):
603 print(
"ERROR: Index out range")
605 state = registers[:plimbs]
613 S = S +
".macro mult_"+ str(plimbs) +
"x" + str(plimbs) +
"\n"
621 S = S +
" mov rbp, rdx\n"
623 S = S +
" mov rdx, [rbp]\n"
624 S = S +
" mulx " + state[len(state)-1] +
", rcx, [rsi + 0*8]\n"
625 S = S +
" mov [rdi + 0*8], rcx\n"
626 for j
in range(1,plimbs):
628 S = S +
" mulx " + state[len(state)-1-(1*j)] +
", rax, [rsi + " + str(j) +
"*8]\n"
629 S = S +
" adc " + state[len(state)-(1*j)] +
", rax\n"
630 S = S +
" adc " + state[len(state)-1-(1*j)] +
", 0\n"
632 S = S +
" mulx " + state[len(state)-1-(1*j)] +
", rax, [rsi + " + str(j) +
"*8]\n"
633 S = S +
" add " + state[len(state)-(1*j)] +
", rax\n"
635 S = S +
" mulx " + state[len(state)-1-(1*j)] +
", rax, [rsi + " + str(j) +
"*8]\n"
636 S = S +
" adc " + state[len(state)-(1*j)] +
", rax\n"
640 for i
in range(1,plimbs):
641 S = S +
"# loop i = " + str(i) +
"\n"
642 S = S +
" mov rdx, [rbp + " + str(i) +
"*8]\n"
644 for j
in range(0,plimbs):
645 S = S +
" mulx rbx, rax, [rsi + " + str(j) +
"*8]\n"
647 S = S +
" adcx " + state[len(state)-1] +
", rax\n"
648 S = S +
" adox " + state[len(state)-2] +
", rbx\n"
649 S = S +
" mov [rdi + " + str(i) +
"*8], " + state[len(state)-1] +
"\n"
651 S = S +
" mov " + state[len(state)-1] +
", 0\n"
653 S = S +
" adcx " + state[len(state)-2-(1*(j-1))] +
", rax\n"
654 S = S +
" adox " + state[len(state)-1] +
", rbx\n"
655 S = S +
" adc " + state[len(state)-1] +
", 0\n"
657 S = S +
" adcx " + state[len(state)-2-(1*(j-1))] +
", rax\n"
658 S = S +
" adox " + state[len(state)-2-(1*j)] +
", rbx\n"
662 for j
in range(0,plimbs):
663 S = S +
" mov [rdi + " + str(plimbs + j) +
"*8], " + state[len(state)-1-(1*j)] +
"\n"