Let us walk on the 3-isogeny graph
Loading...
Searching...
No Matches
AsmKaratsubaCodegenerator Namespace Reference

Functions

 Print_mult_dyn (plimbs)
 rotate (l, x)
 push ()
 pop ()
 PrintMontRedc (plimbs)
 PrintMult (plimbs)
 PrintAdd (plimbs)
 PrintAddOdd (plimbs)
 PrintAddAtEnd (plimbs)
 PrintAddOddAtEnd (plimbs)
 PrintSubOdd (plimbs)
 PrintSub (plimbs)
 Print_Assembly (plimbs)
 main (argv)

Variables

 sefOfLimbs = set()
int printOut = 0
int prime = 0
int fullLength = 0

Function Documentation

◆ main()

main ( argv)

Definition at line 801 of file AsmKaratsubaCodegenerator.py.

801def main(argv):
802 global printOut
803 global prime
804
805 Print_Assembly(85)
806 # S = ""
807 # print(S)
808 exit(0)
809 if(len(argv) < 2):
810 print(
811 "\nplease specify the prime and type of execution [1 = only printing functions out, 0 = print full assembly\n")
812 sys.exit()
813 else:
814 printOut = int(argv[1])
815 prime = argv[0]
816 length = 0
817
818 if(prime=="2047k221"):
819 length = 2048
820
821 elif(prime=="4095k256"):
822 length = 4096
823
824 elif(prime=="5119k234"):
825 length = 5120
826
827 elif(prime=="6143k256"):
828 length = 6144
829
830 elif(prime=="8191k332"):
831 length = 8192
832
833 elif(prime=="9215k384"):
834 length = 9216
835
836 else:
837 print("\nError : no prime available for this input\n")
838 sys.exit()
839
840
841 S = ".intel_syntax noprefix\n\n"
842 S = S + ".section .rodata\n\n"
843 S = S + ".section .text\n\n"
844
845 print(S)
846
847 plimbs = length//64
848
849 Print_Assembly(plimbs)
850
851 S = ".global fp_mult_"+ str(plimbs) + "x" + str(plimbs) + "\n"
852 S = S + "fp_mult_"+ str(plimbs) + "x" + str(plimbs) + ":\n"
853 S = S + " mult_"+ str(plimbs) + "x" + str(plimbs) + "\n"
854 S = S + " ret\n"
855
856 print(S)
857
858 # S = "\n.global fp_mont_redc_a\n"
859 # S = S + "fp_mont_redc_a:\n"
860 # print(S)
861 # print(PrintMontRedc(plimbs))
862
863 print("\n")
864
int main(void)
Definition checkct.c:52
end if

References if, main(), and Print_Assembly().

Here is the call graph for this function:

◆ pop()

pop ( )

Definition at line 199 of file AsmKaratsubaCodegenerator.py.

199def pop():
200 # S = "# -------------------\n"
201 S = "# pop\n"
202 # S = S + " pop r15\n pop r14\n pop r13\n pop r12\n pop rsi\n pop rdi\n pop rbp\n pop rbx\n\n"
203 S = S + " pop r15\n pop r14\n pop r13\n pop r12\n pop rsi\n pop rbp\n pop rbx\n\n"
204 # S = S + " pop rsi\n pop rdi\n pop rdx\n\n"
205 return S
206
207

Referenced by PrintMult().

Here is the caller graph for this function:

◆ Print_Assembly()

Print_Assembly ( plimbs)

Definition at line 703 of file AsmKaratsubaCodegenerator.py.

703def Print_Assembly(plimbs):
704 global sefOfLimbs
705 global printOut
706 # global functions
707 breakout = 9
708 if(printOut):
709 while 1 :
710 if plimbs > breakout :
711 print("++++++++++++++++++++++++++")
712 if(plimbs not in sefOfLimbs):
713 sefOfLimbs.add(plimbs)
714
715 if(plimbs%2!=1):
716 print("add_" + str(plimbs//2) + "x" + str(plimbs//2))
717 print("sub_d_" + str(plimbs + 2) + "x" + str(plimbs) + "_woc")
718 print("add_" + str(plimbs + 2) + "x" + str(plimbs + 1))
719 if(plimbs > 9):
720 # print("mult_"+ str(plimbs//2) + "x" + str(plimbs//2))
721 # print("mult_"+ str(plimbs//2+1) + "x" + str(plimbs//2+1))
722 # else:
723 Print_Assembly(plimbs//2)
724 Print_Assembly(plimbs//2+1)
725 else:
726 print("add_"+ str(plimbs//2 + 1) + "x" + str(plimbs//2))
727 print("sub_d_"+ str(plimbs + 3) + "x" + str(plimbs + 1) + "_woc")
728 print("add_"+ str(plimbs + 2) + "x" + str(plimbs + 1))
729 if(plimbs > 9):
730 # print("mult_"+ str(plimbs//2) + "x" + str(plimbs//2))
731 # print("mult_"+ str(plimbs//2+1) + "x" + str(plimbs//2+1))
732 # else:
733 Print_Assembly(plimbs//2)
734 Print_Assembly(plimbs//2+2)
735 Print_Assembly(plimbs//2+1)
736 # plimbs = plimbs // 2
737
738 print(">mult_"+ str(plimbs) + "x" + str(plimbs))
739
740 else:
741 plimbs = 0
742 break
743 else:
744 if plimbs <= breakout:
745 if(plimbs not in sefOfLimbs):
746 sefOfLimbs.add(plimbs)
747 print("mult_"+ str(plimbs) + "x" + str(plimbs))
748 if(plimbs+1 not in sefOfLimbs):
749 if(plimbs+1 <= 9):
750 sefOfLimbs.add(plimbs+1)
751 print("mult_"+ str(plimbs+1) + "x" + str(plimbs+1))
752 # PrintMult(plimbs+1)
753 break
754 else:
755 while 1 :
756 if plimbs > breakout:
757 if(plimbs not in sefOfLimbs):
758 sefOfLimbs.add(plimbs)
759
760 if(plimbs%2!=1):
761 PrintAdd(plimbs//2)
762 PrintSubOdd(plimbs)
763 PrintAddAtEnd(plimbs+1)
764 # PrintAddOddAtEnd(plimbs+1)
765 # if(plimbs > breakout):
766 Print_Assembly(plimbs//2)
767 Print_Assembly(plimbs//2+1)
768 else:
769 PrintAddOdd(plimbs//2)
770 PrintSub(plimbs)
771 # PrintAddOdd(plimbs+1)
772 PrintAddOddAtEnd(plimbs+1)
773
774 # if(plimbs > breakout):
775 Print_Assembly(plimbs//2)
776 Print_Assembly(plimbs//2+2)
777 Print_Assembly(plimbs//2+1)
778
779 # functions["mult_" + str(plimbs) + "x" + str(plimbs)]()
780 Print_mult_dyn(plimbs)
781 # print("# ------------------" + str(plimbs) + "x" + str(plimbs))
782
783 else:
784 plimbs = 0
785 break
786 else:
787 if plimbs <= breakout:
788 if(plimbs not in sefOfLimbs):
789 sefOfLimbs.add(plimbs)
790 PrintMult(plimbs)
791 if(plimbs+1 not in sefOfLimbs):
792 if(plimbs+1 <= breakout):
793 sefOfLimbs.add(plimbs+1)
794 PrintMult(plimbs+1)
795 break
796
797
798
799#//+++++++++++++++ Main ++++++++++++++++++//
800

References if, Print_Assembly(), Print_mult_dyn(), PrintAdd(), PrintAddAtEnd(), PrintAddOdd(), PrintAddOddAtEnd(), PrintMult(), PrintSub(), and PrintSubOdd().

Referenced by main(), and Print_Assembly().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ Print_mult_dyn()

Print_mult_dyn ( plimbs)

Definition at line 15 of file AsmKaratsubaCodegenerator.py.

15def Print_mult_dyn(plimbs):
16 if(plimbs%2==0):
17 # .global mult_32x32
18 S = ".macro mult_" + str(plimbs) + "x" + str(plimbs) + "\n"
19 # mult_32x32:
20 # S = S + "mult_" + str(plimbs) + "x" + str(plimbs) + ":\n"
21 S = S + "push r14\n"
22 # S = S + "xor eax, eax\n"
23 # mov ecx, 34
24 # S = S + "mov ecx, " + str(plimbs + 2) + "\n"
25 S = S + "push r13\n"
26 # lea r13, [rsi+128]
27 S = S + "lea r13, [rsi+" + str(4*plimbs) + "]\n"
28 S = S + "push r12\n"
29 S = S + "mov r12, rdx\n"
30 S = S + "mov rdx, r13\n"
31 S = S + "push rbp\n"
32 # lea r14, [r12+128]
33 S = S + "lea r14, [r12+" + str(4*plimbs) + "]\n"
34 S = S + "mov rbp, rsi\n"
35 S = S + "push rbx\n"
36 S = S + "mov rbx, rdi\n"
37 # sub rsp, 544
38 S = S + "sub rsp, " + str(((plimbs//2)+1)*32) + "\n"
39 # S = S + "mov rdi, rsp\n"
40 # S = S + "rep stosq\n"
41 # lea rdi, [rsp+272]
42 # S = S + "lea rdi, [rsp+" + str((((plimbs//2)+1)*32)//2) + "]\n"
43 # mov ecx, 34
44 # S = S + "mov ecx, " + str(plimbs + 2) + "\n"
45 # S = S + "rep stosq\n"
46 S = S + "mov rdi, rsp\n"
47 # call add_16x16
48 S = S + "add_" + str(plimbs//2) + "x" + str(plimbs//2) + "\n"
49 S = S + "mov rdx, r14\n"
50 S = S + "mov rsi, r12\n"
51 # S = S + "xor eax, eax\n"
52 # lea rdi, [rsp+136]
53 S = S + "lea rdi, [rsp+" + str((((plimbs//2)+1)*32)//4) + "]\n"
54 # call add_16x16
55 S = S + "add_" + str(plimbs//2) + "x" + str(plimbs//2) + "\n"
56 S = S + "mov rdx, r12\n"
57 S = S + "mov rsi, rbp\n"
58 S = S + "mov rdi, rbx\n"
59 # call mult_16x16
60 S = S + "mult_" + str(plimbs//2) + "x" + str(plimbs//2) + "\n"
61 # lea r12, [rbx+256]
62 S = S + "lea r12, [rbx+" + str(plimbs*8) + "]\n"
63 S = S + "mov rdx, rsp\n"
64 # lea rsi, [rsp+136]
65 S = S + "lea rsi, [rsp+" + str((((plimbs//2)+1)*32)//4) + "]\n"
66 # lea rdi, [rsp+272]
67 S = S + "lea rdi, [rsp+" + str((((plimbs//2)+1)*32)//2) + "]\n"
68 # call mult_17x17
69 S = S + "mult_" + str(plimbs//2 + 1) + "x" + str(plimbs//2 + 1) + "\n"
70 S = S + "mov rdx, r14\n"
71 S = S + "mov rsi, r13\n"
72 S = S + "mov rdi, r12\n"
73 # call mult_16x16
74 S = S + "mult_" + str(plimbs//2) + "x" + str(plimbs//2) + "\n"
75 S = S + "mov rdx, r12\n"
76 S = S + "mov rsi, rbx\n"
77 # S = S + "xor eax, eax\n"
78 # lea rdi, [rsp+272]
79 S = S + "lea rdi, [rsp+" + str((((plimbs//2)+1)*32)//2) + "]\n"
80 # call sub_d_34x32_woc
81 S = S + "sub_d_" + str(plimbs + 2) + "x" + str(plimbs) + "_woc\n"
82 # lea rdi, [rbx+128]
83 S = S + "lea rdi, [rbx+" + str(4*plimbs) + "]\n"
84
85 S = S + "lea rdx, [rsp+" + str((((plimbs//2)+1)*32)//2) + "]\n"
86 # S = S + "xor eax, eax\n"
87 S = S + "mov rsi, rdi\n"
88 # call add_34x33
89 S = S + "add_" + str(plimbs + 2) + "x" + str(plimbs + 1) + "\n"
90 # add rsp, 544
91 S = S + "add rsp, " + str(((plimbs//2)+1)*32) + "\n"
92 S = S + "pop rbx\n"
93 S = S + "pop rbp\n"
94 S = S + "pop r12\n"
95 S = S + "pop r13\n"
96 S = S + "pop r14\n"
97 S = S + ".endm\n"
98 else:
99 # .global mult_32x32
100 S = ".macro mult_" + str(plimbs) + "x" + str(plimbs) + "\n"
101 # mult_32x32:
102 # S = S + "mult_" + str(plimbs) + "x" + str(plimbs) + ":\n"
103 S = S + "push r14\n"
104 # S = S + "xor eax, eax\n"
105 # mov ecx, 34
106 # S = S + "mov ecx, " + str(plimbs + 1) + "\n"
107 S = S + "push r13\n"
108 # lea r13, [rsi+128]
109 S = S + "lea r13, [rsi+" + str(4*(plimbs-1)) + "]\n"
110 S = S + "push r12\n"
111 S = S + "mov r12, rdx\n"
112 S = S + "mov rdx, rsi\n"
113 S = S + "push rbp\n"
114 # lea r14, [r12+128]
115 S = S + "lea r14, [r12+" + str(4*(plimbs-1)) + "]\n"
116 S = S + "mov rbp, rsi\n"
117 S = S + "mov rsi, r13\n"
118 S = S + "push rbx\n"
119 S = S + "mov rbx, rdi\n"
120 # sub rsp, 544
121 S = S + "sub rsp, " + str(((plimbs//2)+2)*32) + "\n"
122 # S = S + "mov rdi, rsp\n"
123 # S = S + "rep stosq\n"
124 # lea rdi, [rsp+272]
125 # S = S + "lea rdi, [rsp+" + str((((plimbs//2)+1)*32)//2) + "]\n"
126 # mov ecx, 34
127 # S = S + "mov ecx, " + str(plimbs + 1) + "\n"
128 # S = S + "rep stosq\n"
129 S = S + "mov rdi, rsp\n"
130 # call add_16x16
131 S = S + "add_" + str(plimbs//2 + 1) + "x" + str(plimbs//2) + "\n"
132 S = S + "mov rdx, r12\n"
133 S = S + "mov rsi, r14\n"
134 # lea rdi, [rsp+136]
135 S = S + "lea rdi, [rsp+" + str((((plimbs//2)+2)*32)//4) + "]\n"
136 # S = S + "xor eax, eax\n"
137 # call add_16x16
138 S = S + "add_" + str(plimbs//2 + 1) + "x" + str(plimbs//2) + "\n"
139 S = S + "mov rdx, r12\n"
140 S = S + "mov rsi, rbp\n"
141 S = S + "mov rdi, rbx\n"
142 # S = S + "xor eax, eax\n"
143 # lea r12, [rbx+256]
144 S = S + "lea rbp, [rbx+" + str((plimbs -1) *8) + "]\n"
145 # call mult_16x16
146 S = S + "mult_" + str(plimbs//2) + "x" + str(plimbs//2) + "\n"
147 S = S + "mov rdx, rsp\n"
148 # lea rsi, [rsp+136]
149 S = S + "lea rsi, [rsp+" + str((((plimbs//2)+2)*32)//4) + "]\n"
150 # S = S + "xor eax, eax\n"
151 # lea rdi, [rsp+272]
152 S = S + "lea rdi, [rsp+" + str((((plimbs//2)+2)*32)//2) + "]\n"
153 # call mult_17x17
154 S = S + "mult_" + str(plimbs//2 + 2) + "x" + str(plimbs//2 + 2) + "\n"
155 S = S + "mov rdx, r14\n"
156 S = S + "mov rsi, r13\n"
157 S = S + "mov rdi, rbp\n"
158 # S = S + "xor eax, eax\n"
159 # call mult_16x16
160 S = S + "mult_" + str(plimbs//2 + 1) + "x" + str(plimbs//2 + 1) + "\n"
161 S = S + "mov rsi, rbx\n"
162 S = S + "mov rdx, rbp\n"
163 # S = S + "xor eax, eax\n"
164 # lea rdi, [rsp+272]
165 S = S + "lea rdi, [rsp+" + str((((plimbs//2)+2)*32)//2) + "]\n"
166 # call sub_d_34x32_woc
167 S = S + "_sub_d_" + str(plimbs + 3) + "x" + str(plimbs + 1) + "_woc\n"
168 # lea rdi, [rbx+128]
169 S = S + "lea rdi, [rbx+" + str(4*(plimbs-1)) + "]\n"
170
171 S = S + "lea rdx, [rsp+" + str((((plimbs//2)+2)*32)//2) + "]\n"
172 # S = S + "xor eax, eax\n"
173 S = S + "mov rsi, rdi\n"
174 # call add_34x33
175 S = S + "add_" + str(plimbs + 2) + "x" + str(plimbs + 1) + "\n"
176
177 # add rsp, 544
178 S = S + "add rsp, " + str(((plimbs//2)+2)*32) + "\n"
179 S = S + "pop rbx\n"
180 S = S + "pop rbp\n"
181 S = S + "pop r12\n"
182 S = S + "pop r13\n"
183 S = S + "pop r14\n"
184 S = S + ".endm\n"
185 print(S)
186 return 0
187

References if.

Referenced by Print_Assembly().

Here is the caller graph for this function:

◆ PrintAdd()

PrintAdd ( plimbs)

Definition at line 446 of file AsmKaratsubaCodegenerator.py.

446def PrintAdd(plimbs):
447
448 S = ""
449 # S = ".intel_syntax noprefix\n\n"
450 # S = S + ".section .rodata\n\n"
451 # S = S + ".section .text\n\n"
452 S = S + ".macro add_"+ str(plimbs) + "x" + str(plimbs) + "\n"
453
454 # S = S + "add_"+ str(plimbs) + "x" + str(plimbs) + ":\n"
455 S = S + "# intro\n"
456 S = S + " xor r8, r8\n"
457 S = S + " mov rax, [rsi + 0]\n add rax, [rdx + 0]\n"
458 S = S + " mov [rdi + 0], rax\n"
459 S = S + "# loop\n"
460 S = S + " .set k, 1\n"
461
462 S = S + " .rept " + str(plimbs-1) + "\n"
463
464 S = S + " mov rax, [rsi + 8*k]\n adc rax, [rdx + 8*k]\n"
465 S = S + " mov [rdi + 8*k], rax\n .set k, k+1\n .endr\n"
466 S = S + "# outro\n"
467 # S = S + " mov rax, [rdi + " + str(plimbs) + "*8]\n"
468 S = S + " adc r8, 0\n"
469 S = S + " mov [rdi + " + str(plimbs) + "*8], r8\n"
470 # S = S + " setc al\n movzx rax, al\n ret\n\n"
471 S = S + ".endm\n"
472
473 print(S)
474 return 0
475
476#
477# add function for "oddly" combinations like ADD_17x16, ADD_9x8
478#

Referenced by Print_Assembly().

Here is the caller graph for this function:

◆ PrintAddAtEnd()

PrintAddAtEnd ( plimbs)

Definition at line 513 of file AsmKaratsubaCodegenerator.py.

513def PrintAddAtEnd(plimbs):
514
515 S = ""
516
517 S = S + ".macro add_"+ str(plimbs + 1) + "x" + str(plimbs) + "\n"
518
519 S = S + "# intro\n"
520 S = S + " mov rax, [rsi + 0]\n add rax, [rdx + 0]\n"
521 S = S + " mov [rdi + 0], rax\n"
522 S = S + "# loop\n"
523 S = S + " .set k, 1\n"
524
525 S = S + " .rept " + str(plimbs) + "\n"
526
527 S = S + " mov rax, [rsi + 8*k]\n adc rax, [rdx + 8*k]\n"
528 S = S + " mov [rdi + 8*k], rax\n .set k, k+1\n .endr\n"
529 S = S + "# outro\n"
530 # for i in range(plimbs+1, plimbs+4):
531 for i in range(plimbs+1, 2*(plimbs-1)):
532 S = S + " mov rax, [rsi + 8*" + str(i) + "]\n"
533 S = S + " adc rax, 0\n"
534 S = S + " mov [rdi + 8*" + str(i) + "], rax\n"
535
536 S = S + ".endm\n"
537
538
539 print(S)
540 return 0
541
542

Referenced by Print_Assembly().

Here is the caller graph for this function:

◆ PrintAddOdd()

PrintAddOdd ( plimbs)

Definition at line 479 of file AsmKaratsubaCodegenerator.py.

479def PrintAddOdd(plimbs):
480
481 S = ""
482 # S = ".intel_syntax noprefix\n\n"
483 # S = S + ".section .rodata\n\n"
484 # S = S + ".section .text\n\n"
485 S = S + ".macro add_"+ str(plimbs + 1) + "x" + str(plimbs) + "\n"
486
487 # S = S + "add_"+ str(plimbs + 1) + "x" + str(plimbs) + ":\n"
488 S = S + "# intro\n"
489 S = S + " xor r8, r8\n"
490 S = S + " mov rax, [rsi + 0]\n add rax, [rdx + 0]\n"
491 S = S + " mov [rdi + 0], rax\n"
492 S = S + "# loop\n"
493 S = S + " .set k, 1\n"
494
495 S = S + " .rept " + str(plimbs-1) + "\n"
496
497 S = S + " mov rax, [rsi + 8*k]\n adc rax, [rdx + 8*k]\n"
498 S = S + " mov [rdi + 8*k], rax\n .set k, k+1\n .endr\n"
499 S = S + "# outro\n"
500 S = S + " mov rax, [rsi + 8*" + str(plimbs) + "]\n"
501 S = S + " adc rax, 0\n"
502 S = S + " mov [rdi + 8*" + str(plimbs) + "], rax\n"
503 S = S + " adc r8, 0\n"
504 S = S + " mov [rdi + 8*" + str(plimbs + 1) + "], r8\n"
505 #
506 # S = S + " setc al\n movzx rax, al\n ret\n\n"
507 S = S + ".endm\n"
508
509
510 print(S)
511 return 0
512

Referenced by Print_Assembly().

Here is the caller graph for this function:

◆ PrintAddOddAtEnd()

PrintAddOddAtEnd ( plimbs)

Definition at line 543 of file AsmKaratsubaCodegenerator.py.

543def PrintAddOddAtEnd(plimbs):
544
545 S = ""
546
547 S = S + ".macro add_"+ str(plimbs + 1) + "x" + str(plimbs) + "\n"
548
549 S = S + "# intro\n"
550 S = S + " mov rax, [rsi + 0]\n add rax, [rdx + 0]\n"
551 S = S + " mov [rdi + 0], rax\n"
552 S = S + "# loop\n"
553 S = S + " .set k, 1\n"
554
555 S = S + " .rept " + str(plimbs+1) + "\n"
556
557 S = S + " mov rax, [rsi + 8*k]\n adc rax, [rdx + 8*k]\n"
558 S = S + " mov [rdi + 8*k], rax\n .set k, k+1\n .endr\n"
559 S = S + "# outro\n"
560
561 for i in range(plimbs+2, 2*(plimbs-1)):
562 S = S + " mov rax, [rsi + 8*" + str(i) + "]\n"
563 S = S + " adc rax, 0\n"
564 S = S + " mov [rdi + 8*" + str(i) + "], rax\n"
565
566 S = S + ".endm\n"
567
568
569 print(S)
570 return 0
571

Referenced by Print_Assembly().

Here is the caller graph for this function:

◆ PrintMontRedc()

PrintMontRedc ( plimbs)

Definition at line 208 of file AsmKaratsubaCodegenerator.py.

208def PrintMontRedc(plimbs):
209 global prime
210 S = ""
211 S = S + "push r14\n"
212 S = S + "sub rsp, 2*pbytes+8\n"
213 S = S + "mov r14, secsidh_internal_"+ str(prime) +"_p@GOTPCREL[rip]\n"
214
215 S = S + "xor rax, rax\n"
216 S = S + "mov [rsp + 8*" + str(2*plimbs) + "], rax\n"
217
218# FIRST ITERATION ++++++++++++++++++++++++++++++++
219
220 S = S + "########################## mul\n"
221 S = S + "mov rdx, [rsi]\n"
222 S = S + "mulx rcx, rax, [r14]\n"
223 # S = S + "add r8, rax\n"
224
225 # S = S + "add rax, rdx\n"
226 S = S + "adcx rax, rdx\n"
227 S = S + "mov [rsp], rax\n\n"
228
229 for k in range(1,plimbs):
230 if(k&1):
231 # S = S + "mov r11, [rsp + 8*" + str(k) + "]\n"
232 # S = S + "xor r8, r8\n"
233 # S = S + "mov r11, 0\n"
234 S = S + "mulx rbx, rax, [r14 + 8*" + str(k) + "]\n"
235 S = S + "adcx rcx, [rsi + 8*" + str(k) + "]\n"
236 S = S + "adox rcx, rax\n"
237 S = S + "mov [rsp + 8*" + str(k) + "], rcx\n\n"
238 else:
239 # S = S + "mov r11, [rsp + 8*" + str(k) + "]\n"
240 # S = S + "xor r8, r8\n"
241 # S = S + "mov r11, 0\n"
242 S = S + "mulx rcx, rax, [r14 + 8*" + str(k) + "]\n"
243 S = S + "adcx rbx, [rsi + 8*" + str(k) + "]\n"
244 S = S + "adox rbx, rax\n"
245 S = S + "mov [rsp + 8*" + str(k) + "], rbx\n\n"
246 # S = S + "adox r11, rax\n"
247 # S = S + "adox [rsp + 8*" + str(j) + "], rax\n\n"
248
249
250 S = S + "######### carry\n"
251 # S = S + "mov r11, [rsp + 8*" + str(plimbs) + "]\n"
252 S = S + "mov r11, 0\n"
253 # S = S + "xor r8, r8\n"
254 S = S + "adox rbx, [rsi + 8*" + str(plimbs) + "]\n"
255 S = S + "adcx rbx, r11\n"
256 S = S + "mov [rsp + 8*" + str(plimbs) + "], rbx\n\n"
257
258 S = S + "mov rax, 0\n"
259 for k in range(plimbs+1,2*plimbs+1):
260 S = S + "mov r11, 0\n"
261 S = S + "adox r11, [rsi + 8*" + str(k) + "]\n"
262 S = S + "adcx r11, rax\n"
263 S = S + "mov [rsp + 8*" + str(k) + "], r11\n\n"
264
265# LOOP ++++++++++++++++++++++++++++++++
266
267 for j in range(1,plimbs-1):
268
269 # S = S + "mov r11, [rsp]\n"
270
271 S = S + "########################## mul\n"
272 S = S + "xor rax, rax\n"
273 S = S + "mov rdx, [rsp + 8*" + str(j) + "]\n"
274 S = S + "mulx rcx, rax, [r14]\n"
275 # S = S + "add r8, rax\n"
276
277 # S = S + "add rax, rdx\n"
278 S = S + "adcx rax, rdx\n"
279 S = S + "mov [rsp + 8*" + str(j) + "], rax\n\n"
280
281 for k in range(1,plimbs):
282 if(k&1):
283 # S = S + "mov r11, [rsp + 8*" + str(k) + "]\n"
284 # S = S + "xor r8, r8\n"
285 # S = S + "mov r11, 0\n"
286 S = S + "mulx rbx, rax, [r14 + 8*" + str(k) + "]\n"
287 S = S + "adcx rcx, [rsp + 8*" + str(j+k) + "]\n"
288 S = S + "adox rcx, rax\n"
289 S = S + "mov [rsp + 8*" + str(j+k) + "], rcx\n\n"
290 else:
291 # S = S + "mov r11, [rsp + 8*" + str(k) + "]\n"
292 # S = S + "xor r8, r8\n"
293 # S = S + "mov r11, 0\n"
294 S = S + "mulx rcx, rax, [r14 + 8*" + str(k) + "]\n"
295 S = S + "adcx rbx, [rsp + 8*" + str(j+k) + "]\n"
296 S = S + "adox rbx, rax\n"
297 S = S + "mov [rsp + 8*" + str(j+k) + "], rbx\n\n"
298 # S = S + "adox r11, rax\n"
299 # S = S + "adox [rsp + 8*" + str(j) + "], rax\n\n"
300
301
302 S = S + "######### carry\n"
303 # S = S + "mov r11, [rsp + 8*" + str(plimbs) + "]\n"
304 S = S + "mov r11, 0\n"
305 # S = S + "xor r8, r8\n"
306 S = S + "adox rbx, [rsp + 8*" + str(plimbs+j) + "]\n"
307 S = S + "adcx rbx, r11\n"
308 S = S + "mov [rsp + 8*" + str(plimbs+j) + "], rbx\n\n"
309
310 S = S + "mov rax, 0\n"
311 for k in range(plimbs+1+j,2*plimbs+1):
312 S = S + "mov r11, 0\n"
313 S = S + "adox r11, [rsp + 8*" + str(k) + "]\n"
314 S = S + "adcx r11, rax\n"
315 S = S + "mov [rsp + 8*" + str(k) + "], r11\n\n"
316
317# LAST ITERATION ++++++++++++++++++++++++++++++++
318
319 j = plimbs-1
320
321 S = S + "########################## mul\n"
322 S = S + "xor rax, rax\n"
323 S = S + "mov rdx, [rsp + 8*" + str(j) + "]\n"
324 S = S + "mulx rcx, rax, [r14]\n"
325 # S = S + "add r8, rax\n"
326
327 # S = S + "add rax, rdx\n"
328 S = S + "adcx rax, rdx\n"
329 S = S + "mov [rsp + 8*" + str(j) + "], rax\n\n"
330
331 for k in range(1,plimbs):
332 if(k&1):
333 # S = S + "mov r11, [rsp + 8*" + str(k) + "]\n"
334 # S = S + "xor r8, r8\n"
335 # S = S + "mov r11, 0\n"
336 S = S + "mulx rbx, rax, [r14 + 8*" + str(k) + "]\n"
337 S = S + "adcx rcx, [rsp + 8*" + str(j+k) + "]\n"
338 S = S + "adox rcx, rax\n"
339 S = S + "mov [rdi + 8*" + str(k-1) + "], rcx\n\n"
340 else:
341 # S = S + "mov r11, [rsp + 8*" + str(k) + "]\n"
342 # S = S + "xor r8, r8\n"
343 # S = S + "mov r11, 0\n"
344 S = S + "mulx rcx, rax, [r14 + 8*" + str(k) + "]\n"
345 S = S + "adcx rbx, [rsp + 8*" + str(j+k) + "]\n"
346 S = S + "adox rbx, rax\n"
347 S = S + "mov [rdi + 8*" + str(k-1) + "], rbx\n\n"
348 # S = S + "adox r11, rax\n"
349 # S = S + "adox [rsp + 8*" + str(j) + "], rax\n\n"
350
351 S = S + "mov r11, 0\n"
352 # # S = S + "xor r8, r8\n"
353 S = S + "adox rbx, [rsp + 8*" + str(plimbs+j) + "]\n"
354 S = S + "adcx rbx, r11\n"
355 S = S + "mov [rdi + 8*" + str(plimbs-1) + "], rbx\n\n"
356
357
358 S = S + "########################## done\n"
359 S = S + "add rsp, 2*pbytes+8\n"
360 S = S + "pop r14\n"
361
362 S = S + "jmp .reduce_once\n"
363
364 return S
365
366
367

References if.

◆ PrintMult()

PrintMult ( plimbs)

Definition at line 368 of file AsmKaratsubaCodegenerator.py.

368def PrintMult(plimbs):
369
370
371 # registers reserved rdi, rsi, rdx
372 # rax, rbx = rcx, r8
373 registers = ["rcx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"]
374 if(plimbs > len(registers)):
375 print("ERROR: Index out range")
376 exit()
377 state = registers[:plimbs]
378 #state = registers
379
380 S = ""
381 # S = ".intel_syntax noprefix\n\n"
382 # S = S + ".section .rodata\n\n"
383 # S = S + ".section .text\n\n"
384
385 S = S + ".macro mult_"+ str(plimbs) + "x" + str(plimbs) + "\n"
386 # S = S + "mult_"+ str(plimbs) + "x" + str(plimbs) + ":\n"
387
388 S = S + push()
389
390 S = S + "# intro \n"
391
392
393 S = S + " mov rbp, rdx\n"
394
395 S = S + " mov rdx, [rbp]\n"
396 S = S + " mulx " + state[len(state)-1] + ", rcx, [rsi + 0*8]\n"
397 S = S + " mov [rdi + 0*8], rcx\n"
398 for j in range(1,plimbs):
399 if(j == plimbs-1):
400 S = S + " mulx " + state[len(state)-1-(1*j)] + ", rax, [rsi + " + str(j) + "*8]\n"
401 S = S + " adc " + state[len(state)-(1*j)] + ", rax\n"
402 S = S + " adc " + state[len(state)-1-(1*j)] + ", 0\n"
403 elif(j == 1):
404 S = S + " mulx " + state[len(state)-1-(1*j)] + ", rax, [rsi + " + str(j) + "*8]\n"
405 S = S + " add " + state[len(state)-(1*j)] + ", rax\n"
406 else:
407 S = S + " mulx " + state[len(state)-1-(1*j)] + ", rax, [rsi + " + str(j) + "*8]\n"
408 S = S + " adc " + state[len(state)-(1*j)] + ", rax\n"
409
410
411
412 for i in range(1,plimbs):
413 S = S + "# loop i = " + str(i) + "\n"
414 S = S + " mov rdx, [rbp + " + str(i) + "*8]\n"
415
416 for j in range(0,plimbs):
417 S = S + " mulx rbx, rax, [rsi + " + str(j) + "*8]\n"
418 if(j == 0):
419 S = S + " adcx " + state[len(state)-1] + ", rax\n"
420 S = S + " adox " + state[len(state)-2] + ", rbx\n"
421 S = S + " mov [rdi + " + str(i) + "*8], " + state[len(state)-1] + "\n"
422 #S = S + " sub " + state[len(state)-1] + ", " + state[len(state)-1] + "\n"
423 S = S + " mov " + state[len(state)-1] + ", 0\n"
424 elif(j == plimbs-1):
425 S = S + " adcx " + state[len(state)-2-(1*(j-1))] + ", rax\n"
426 S = S + " adox " + state[len(state)-1] + ", rbx\n"
427 S = S + " adc " + state[len(state)-1] + ", 0\n"
428 else:
429 S = S + " adcx " + state[len(state)-2-(1*(j-1))] + ", rax\n"
430 S = S + " adox " + state[len(state)-2-(1*j)] + ", rbx\n"
431 state = rotate(state, 1)
432
433 S = S + "# outro\n"
434 for j in range(0,plimbs):
435 S = S + " mov [rdi + " + str(plimbs + j) + "*8], " + state[len(state)-1-(1*j)] + "\n"
436
437 S = S + pop()
438
439 S = S + ".endm\n"
440
441 print(S)
442
443
444 return 0
445

References if, pop(), push(), and rotate().

Referenced by Print_Assembly().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ PrintSub()

PrintSub ( plimbs)

Definition at line 636 of file AsmKaratsubaCodegenerator.py.

636def PrintSub(plimbs):
637
638 registers = ["rax", "rbx", "rcx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"]
639
640 S = ""
641
642 plimbs += 1
643 limbs_counter = plimbs
644
645 S = ".macro _sub_d_"+ str(plimbs + 2) + "x" + str(plimbs) + "_woc\n"
646
647 S = S + "# intro\n"
648 S = S + " mov rax, [rdi]\n"
649 S = S + " sub rax, [rsi]\n"
650 S = S + " mov [rdi], rax\n"
651
652
653 S = S + "# loop\n"
654 for i in range(1, plimbs-2):
655 S = S + " mov rax, [rdi + " + str(i) + "*8]\n"
656 S = S + " sbb rax, [rsi + " + str(i) + "*8]\n"
657 S = S + " mov [rdi + " + str(i) + "*8], rax\n"
658
659 S = S + "# outro\n"
660 S = S + " mov rax, [rdi + " + str(plimbs-2) + "*8]\n"
661 S = S + " sbb rax, 0\n"
662 S = S + " mov [rdi + " + str(plimbs-2) + "*8], rax\n"
663 S = S + " mov rax, [rdi + " + str(plimbs-1) + "*8]\n"
664 S = S + " sbb rax, 0\n"
665 S = S + " mov [rdi + " + str(plimbs-1) + "*8], rax\n"
666 S = S + " mov rax, [rdi + " + str(plimbs) + "*8]\n"
667 S = S + " sbb rax, 0\n"
668 S = S + " mov [rdi + " + str(plimbs) + "*8], rax\n"
669 S = S + " mov rax, [rdi + " + str(plimbs+1) + "*8]\n"
670 S = S + " sbb rax, 0\n"
671 S = S + " mov [rdi + " + str(plimbs+1) + "*8], rax\n"
672
673 S = S + "# 2nd subtraction ---------\n"
674
675
676 S = S + "# intro\n"
677 S = S + " mov rax, [rdi]\n"
678 S = S + " sub rax, [rdx]\n"
679 S = S + " mov [rdi], rax\n"
680
681
682 S = S + "# loop\n"
683 for i in range(1, plimbs):
684 S = S + " mov rax, [rdi + " + str(i) + "*8]\n"
685 S = S + " sbb rax, [rdx + " + str(i) + "*8]\n"
686 S = S + " mov [rdi + " + str(i) + "*8], rax\n"
687
688 S = S + "# outro\n"
689 S = S + " mov rax, [rdi + " + str(plimbs) + "*8]\n"
690 S = S + " sbb rax, 0\n"
691 S = S + " mov [rdi + " + str(plimbs) + "*8], rax\n"
692
693 S = S + " mov rax, [rdi + " + str(plimbs + 1) + "*8]\n"
694 S = S + " sbb rax, 0\n"
695 S = S + " mov [rdi + " + str(plimbs + 1) + "*8], rax\n"
696
697 S = S + ".endm\n"
698
699 print(S)
700 return 0
701
702#////////////////////////////////////////

Referenced by Print_Assembly().

Here is the caller graph for this function:

◆ PrintSubOdd()

PrintSubOdd ( plimbs)

Definition at line 572 of file AsmKaratsubaCodegenerator.py.

572def PrintSubOdd(plimbs):
573
574
575 registers = ["rax", "rbx", "rcx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"]
576
577 S = ""
578
579 plimbs += 1
580 limbs_counter = plimbs
581
582 S = S + "# odd intro\n"
583 S = S + " mov rax, [rdi]\n"
584 S = S + " sub rax, [rsi]\n"
585 S = S + " mov [rdi], rax\n"
586
587
588 S = S + "# loop\n"
589 for i in range(1, plimbs - 1):
590 S = S + " mov rax, [rdi + " + str(i) + "*8]\n"
591 S = S + " sbb rax, [rsi + " + str(i) + "*8]\n"
592 S = S + " mov [rdi + " + str(i) + "*8], rax\n"
593
594 # outro
595 S = S + "# outro\n"
596 S = S + " mov rax, [rdi + " + str(plimbs - 1) + "*8]\n"
597 S = S + " sbb rax, 0\n"
598 S = S + " mov [rdi + " + str(plimbs - 1) + "*8], rax\n"
599 S = S + " mov rax, [rdi + " + str(plimbs) + "*8]\n"
600 S = S + " sbb rax, 0\n"
601 S = S + " mov [rdi + " + str(plimbs) + "*8], rax\n"
602
603 S = S + "# 2nd subtraction ---------\n"
604
605 S = S + "# odd intro\n"
606 S = S + " mov rax, [rdi]\n"
607 S = S + " sub rax, [rdx]\n"
608 S = S + " mov [rdi], rax\n"
609
610
611 S = S + "# loop\n"
612 for i in range(1, plimbs - 1):
613 S = S + " mov rax, [rdi + " + str(i) + "*8]\n"
614 S = S + " sbb rax, [rdx + " + str(i) + "*8]\n"
615 S = S + " mov [rdi + " + str(i) + "*8], rax\n"
616
617 # outro
618 S = S + "# outro\n"
619 S = S + " mov rax, [rdi + " + str(plimbs - 1) + "*8]\n"
620 S = S + " sbb rax, 0\n"
621 S = S + " mov [rdi + " + str(plimbs - 1) + "*8], rax\n"
622 S = S + " mov rax, [rdi + " + str(plimbs) + "*8]\n"
623 S = S + " sbb rax, 0\n"
624 S = S + " mov [rdi + " + str(plimbs) + "*8], rax\n"
625
626 S = ".macro sub_d_"+ str(plimbs + 1) + "x" + str(plimbs - 1) + "_woc\n" + S
627 # S = S + pop()
628
629 S = S + ".endm\n"
630
631
632
633 print(S)
634 return 0
635

Referenced by Print_Assembly().

Here is the caller graph for this function:

◆ push()

push ( )

Definition at line 191 of file AsmKaratsubaCodegenerator.py.

191def push():
192 # S = "# -------------------\n"
193 S = "# push\n"
194 # S = S + " push rbx\n push rbp\n push rdi\n push rsi\n push r12\n push r13\n push r14\n push r15\n\n"
195 S = S + " push rbx\n push rbp\n push rsi\n push r12\n push r13\n push r14\n push r15\n\n"
196 # S = S + " push rdx\n push rdi\n push rsi\n\n"
197 return S
198

Referenced by PrintMult().

Here is the caller graph for this function:

◆ rotate()

rotate ( l,
x )

Definition at line 188 of file AsmKaratsubaCodegenerator.py.

188def rotate(l, x):
189 return l[-x:] + l[:-x]
190

Referenced by PrintMult().

Here is the caller graph for this function:

Variable Documentation

◆ fullLength

int fullLength = 0

Definition at line 13 of file AsmKaratsubaCodegenerator.py.

◆ prime

int prime = 0

Definition at line 12 of file AsmKaratsubaCodegenerator.py.

◆ printOut

int printOut = 0

Definition at line 11 of file AsmKaratsubaCodegenerator.py.

◆ sefOfLimbs

sefOfLimbs = set()

Definition at line 10 of file AsmKaratsubaCodegenerator.py.