8#define UNCONST(type, var) (*(type*)&(var))
18 for (
long long i = 0;
i <
alen;++
i) {
19 fp_mul3(&c[
i],&
a[
i],&b[0]);
27 fp_mul3(&c[0],&
a[0],&b[0]);
28 fp_mul3(&c[2],&
a[1],&b[1]);
32 fp_mul3(&c[1],(
const fp*) &
a01,(
const fp*) &
b01);
33 fp_sub2(&c[1],(
const fp*) &c[0]);
34 fp_sub2(&c[1],(
const fp*) &c[2]);
47 fp_mul3(&c[0],&
a[0],&b[0]);
48 fp_mul3(&c[2],&
a[1],&b[1]);
51 fp_mul3(&c[1],(
const fp*) &
a01,(
const fp*) &
b01);
52 fp_sub2(&c[1],(
const fp*) &c[0]);
53 fp_sub2(&c[1],(
const fp*) &c[2]);
54 fp_mul3(&c[3],&
a[2],&b[1]);
56 fp_add2(&c[2],(
const fp*) &
a2b0);
60 fp_mul3(&c[0],&
a[0],&b[0]);
61 fp_mul3(&c[2],&
a[1],&b[1]);
64 fp_mul3(&c[1],(
const fp*) &
a01,(
const fp*) &
b01);
65 fp_sub2(&c[1],(
const fp*) &c[0]);
66 fp_sub2(&c[1],(
const fp*) &c[2]);
69 fp_mul3(&
mid,&
a[2],&b[0]);
70 fp_mul3(&c[4],&
a[3],&b[1]);
72 fp_mul3(&c[3],(
const fp*) &
a23,(
const fp*) &
b01);
73 fp_sub2(&c[3],(
const fp*) &
mid);
74 fp_sub2(&c[3],(
const fp*) &c[4]);
75 fp_add2(&c[2],(
const fp*) &
mid);
95 fp_mul3(&c[1],(
const fp*) &
a10,(
const fp*) &
b01);
98 fp_mul3(&c[2],(
const fp*) &
a20,(
const fp*) &
b02);
101 fp_mul3(&c[3],(
const fp*) &
a21,(
const fp*) &
b12);
102 fp_mul3(&c[0],&
a[0],&b[0]);
103 fp_mul3(&c[4],&
a[2],&b[2]);
105 fp t; fp_add3(&t,(
const fp*) &
a1b1,(
const fp*) &c[0]);
106 fp_add2(&c[1],(
const fp*) &t);
107 fp_add3(&t,(
const fp*) &
a1b1,(
const fp*) &c[4]);
108 fp_add2(&c[3],(
const fp*) &t);
109 fp_add2(&t,(
const fp*) &c[0]);
110 fp_add2(&c[2],(
const fp*) &t);
113 fp_mul3(&t,&
a[3],&b[0]); fp_add2(&c[3],(
const fp*) &t);
114 fp_mul3(&t,&
a[3],&b[1]); fp_add2(&c[4],(
const fp*) &t);
115 fp_mul3(&c[5],&
a[3],&b[2]);
126 fp_mul3(&t,&
a[4],&b[0]); fp_add2(&c[4],(
const fp*) &t);
127 fp_mul3(&t,&
a[4],&b[1]); fp_add2(&c[5],(
const fp*) &t);
128 fp_mul3(&c[6],&
a[4],&b[2]);
141 for (
long long i = 0;
i <
blen-1;++
i)
177 for (
long long i = 0;
i <
c1len;++
i) {
185 fp_sub2(&c[
i+
kara],(
const fp*) &c[
i]);
226 for (
long long i = 0;
i <
clen;++
i)
240 for (
long long i = 0;
i <
clen;++
i)
241 fp_mul3(&c[
i],&
a[
i],&b[0]);
251 fp_mul3(&c[0],&
a[0],&b[0]);
252 fp_mul3(&c[1],&
a[0],&b[1]);
253 fp t; fp_mul3(&t,&
a[1],&b[0]);
254 fp_add2(&c[1],(
const fp*) &t);
260 fp_mul3(&c[0],&
a[0],&b[0]);
261 fp_mul3(&c[2],&
a[1],&b[1]);
264 fp_mul3(&c[1],(
const fp*) &
a01,(
const fp*) &
b01);
265 fp_sub2(&c[1],(
const fp*) &c[0]);
266 fp_sub2(&c[1],(
const fp*) &c[2]);
268 fp_add2(&c[2],(
const fp*) &
a2b0);
335 for (
long long i = 0;
i <
c1len-1;++
i) fp_add2(&c[2*
i+2],(
const fp*) &
c1[
i]);
344 long long split = (
alen+1)/2;
346 if (split+split <
clen) {
349 for (
long long i = 0;
i <
clen;++
i)
362 for (
long long i = 0;
i <
blen-1;++
i)
363 fp_add2(&c[
i+split],(
const fp*) &
c1[
i]);
372 if (
clen < split+split)
386 for (
long long i = 0;
i <
clen-split;++
i) {
387 fp_add2(&c[
i+split],(
const fp*) &
c01[
i]);
388 fp_add2(&c[
i+split],(
const fp*) &
c10[
i]);
400 fp_mul3(&c[0],&
a[0],&b[0]);
405 fp_mul3(&c[0],&
a[0],&b[0]);
406 fp_add3(&c[1],(
const fp*) &c[0],(
const fp*) &c[0]);
412 fp_mul3(&c[0],&
a[0],&b[0]);
413 fp_mul3(&c[2],&
a[1],&b[1]);
416 fp_mul3(&c[1],(
const fp*) &
a01,(
const fp*) &
b01);
417 fp_add2(&c[2],(
const fp*) &c[0]);
418 fp_sub2(&c[1],(
const fp*) &c[2]);
419 fp_add2(&c[2],(
const fp*) &c[0]);
426 fp_mul3(&c[0],&
a[0],&b[0]);
427 fp_mul3(&c[3],&
a[1],&b[1]);
430 fp_mul3(&c[2],(
const fp*) &
a01,(
const fp*) &
b01);
431 fp_sub2(&c[2],(
const fp*) &c[0]);
432 fp_sub3(&c[1],(
const fp*) &c[2],(
const fp*) &c[3]);
433 fp_add2(&c[3],(
const fp*) &c[0]);
434 fp_add2(&c[3],(
const fp*) &c[3]);
445 fp_mul3(&c[1],(
const fp*) &
a10,(
const fp*) &
b01);
448 fp_mul3(&c[2],(
const fp*) &
a20,(
const fp*) &
b02);
450 fp b12; fp_sub3(&
b12,(
const fp*) &b[1],(
const fp*) &b[2]);
451 fp_mul3(&c[3],(
const fp*) &
a21,(
const fp*) &
b12);
452 fp_mul3(&c[0],&
a[0],&b[0]);
456 fp t; fp_add3(&t,(
const fp*) &
a1b1,(
const fp*) &c[0]);
457 fp_add2(&c[1],(
const fp*) &t);
458 fp_add2(&c[3],(
const fp*) &c[1]);
459 fp_add3(&c[4],(
const fp*) &t,(
const fp*) &
a2b2);
460 fp_add2(&c[4],(
const fp*) &t);
461 fp_add2(&c[2],(
const fp*) &t);
462 fp_add2(&c[2],(
const fp*) &
a2b2);
463 fp_add2(&c[3],(
const fp*) &
a1b1);
464 fp_add2(&c[3],(
const fp*) &
a2b2);
497 for (
long long i = 0;
i <
len1;++
i) fp_add2(&
a0[
i+1],(
const fp*) &
a1[
i]);
498 for (
long long i = 0;
i <
len1;++
i) fp_add2(&
b0[
i+1],(
const fp*) &
b1[
i]);
535 for (
long long i = 0;
i <
alen-1;++
i)
536 fp_add2(&c[
i],(
const fp*) &
c0[
i]);
537 for (
long long i = 0;
i <
alen-1;++
i)
539 for (
long long i = 0;
i <
alen-1;++
i)
541 for (
long long i = 0;
i <
alen-1;++
i)
558 if (
blen <= 0)
return;
582 fp_add2(&c[0],(
const fp*) &t);
589 for (
long long i = 0;
i <
alen;++
i)
591 for (
long long i = 0;
i <
blen;++
i)
625 fp_add2(&c[0],(
const fp*) &t);
631 for (
long long i = 0;
i <
clen;++
i)
639 for (
long long i = 0;
i <
clen;++
i) {
641 fp_add2(&c[
i],(
const fp*) &t);
657 fp_sub3(&delta,&b[0],&b[1]);
658 fp_mul2(&delta,&
a[1]);
659 fp_add3(&c[0],&
a[0],&
a[1]);
660 fp_mul2(&c[0],&b[1]);
661 fp_add2(&c[0],(
const fp*) &delta);
662 fp_add3(&c[1],&
a[1],&
a[2]);
663 fp_mul2(&c[1],&b[0]);
664 fp_sub2(&c[1],(
const fp*) &delta);
684 fp_add3(&c[0],(
const fp*) &
a01,(
const fp*) &
a23);
685 fp_add3(&c[1],(
const fp*) &
a12,(
const fp*) &
a34);
686 fp_add2(&c[0],(
const fp*) &delta);
687 fp_sub2(&c[1],(
const fp*) &delta);
697 fp_sub3(&
b01,&b[0],&b[1]);
702 fp_add3(&c[0],&
a[0],&
a[1]);
703 fp_add3(&c[1],&
a[1],&
a[2]);
704 fp_add3(&c[2],&
a[2],&
a[3]);
705 fp_mul2(&c[0],&b[1]);
706 fp_mul2(&c[1],&b[1]);
707 fp_mul2(&c[2],(
const fp*) &b[0]);
721 for (
long long i = 0;
i < 6;++
i) fp_add3(&
a01[
i],&
a[
i],&
a[
i+3]);
724 for (
long long i = 0;
i < 3;++
i) fp_sub3(&
b01[
i],&b[
i],&b[
i+3]);
732 fp_add2(&c[0],(
const fp*) &delta[0]);
733 fp_add2(&c[1],(
const fp*) &delta[1]);
734 fp_add2(&c[2],(
const fp*) &delta[2]);
735 fp_sub2(&c[3],(
const fp*) &delta[0]);
747 fp_add3(&
a01[0],&
a[0],&
a[2]);
748 fp_add3(&
a01[1],&
a[1],&
a[3]);
749 fp_add3(&
a01[2],&
a[2],&
a[4]);
750 fp_add3(&
a01[3],&
a[3],&
a[5]);
751 fp_add3(&
a01[4],&
a[4],&
a[6]);
752 fp_add3(&
a01[5],&
a[5],&
a[7]);
755 fp_sub3(&
b01[0],&b[0],&b[2]);
756 fp_sub3(&
b01[1],&b[1],&b[3]);
764 fp_add2(&c[0],(
const fp*) &delta[0]);
765 fp_add2(&c[1],(
const fp*) &delta[1]);
766 fp_add2(&c[2],(
const fp*) &delta[2]);
767 fp_sub2(&c[3],(
const fp*) &delta[1]);
768 fp_sub2(&c[4],(
const fp*) &delta[2]);
773 long long split = (
clen+1)/2;
778 for (
long long i = 0;
i < 3*split-2;++
i) fp_add3(&
a01[
i],&
a[
i],&
a[
i+split]);
782 for (
long long i = 0;
i < split;++
i) fp_sub3(&
b01[
i],&b[
i],&b[
i+split]);
793 for (
long long i = 0;
i < split;++
i) fp_add2(&c[
i],(
const fp*) &delta[
i]);
794 for (
long long i = 0;
i < split-1;++
i) fp_sub2(&c[
i+split],(
const fp*) &delta[
i]);
800 long long split =
clen/2;
803 for (
long long i = 0;
i < 3*split-1;++
i) fp_add3(&
a01[
i],&
a[
i],&
a[
i+split]);
806 for (
long long i = 0;
i < split;++
i) fp_sub3(&
b01[
i],&b[
i],&b[
i+split]);
814 for (
long long i = 0;
i < split;++
i) {
815 fp_add2(&c[
i],(
const fp*) &delta[
i]);
816 fp_sub2(&c[split+
i],(
const fp*) &delta[
i]);
824 for (
long long i = 0;
i <
clen;++
i)
831 for (
long long i = 0;
i <
clen;++
i)
838 if (n <= 1)
return 0;
839 if (n == 2)
return 3;
840 if (n == 3)
return 7;
845 return left+right+n+1;
854 if (n <= 1)
return 0;
870 poly_mul(T+left+right,(
const fp*) T+left-(
m+1),
m+1,(
const fp*) T+left+right-(n-
m+1),n-
m+1);
871 return left+right+n+1;
874static long long poly_eval_precomputesize(
long long flen)
876 if (
flen <= 2)
return 0;
882 if (
flen <= 2)
return;
889 for (
long long i = 2;
i <
flen;++
i) {
896 for (
long long i = 1;
i <
flen-1;++
i)
916 fp_mul3(
v,&f[0],&
P->z);
917 fp_mul3(&
tmp,&f[1],&
P->x);
918 fp_add2(
v,(
const fp*) &
tmp);
924 for (
long long i = 1;
i <
flen;++
i) {
927 fp_add2(
v,(
const fp*) &
tmp);
945static void poly_pseudoreciprocal(
fp *d,
fp *
r,
long long rlen,
const fp *
m,
long long mdeg)
961 fp_neg2(&
r[0],&
m[
mdeg-1]);
977 long long top = (
rlen+1)/2;
980 poly_pseudoreciprocal(d,
s,top,
m,
mdeg);
1001 for (
long long i = 0;
i <
bot;++
i)
1003 for (
long long i = 0;
i < top;++
i)
1004 fp_mul3(&
r[
i+
bot],(
const fp*) &
s[
i],(
const fp*) d);
1009static long long poly_pseudoremainder_precomputesize(
long long glen,
long long flen)
1017static void poly_pseudoremainder_precompute(
fp *
precomp,
long long glen,
long long flen,
const fp *
m)
1027static void poly_pseudoremainder_postcompute(
fp *
g,
long long glen,
const fp *f,
long long flen,
const fp *
m,
const fp *
precomp)
1032 for (
long long i = 0;
i <
glen;++
i)
1061 for (
long long i = 0;
i <
glen;++
i) {
1062 fp_mul3(&
g[
i],&f[
i],d);
1063 fp_sub2(&
g[
i], (
const fp*) &
qm[
i]);
1084static long long poly_multieval_unscaled_precomputesize(
long long n,
long long flen)
1086 if (n <= 0)
return 0;
1088 return poly_eval_precomputesize(
flen);
1092 poly_multieval_unscaled_precomputesize(
m,
flen)
1093 + poly_multieval_unscaled_precomputesize(n-
m,
flen);
1096 poly_pseudoremainder_precomputesize(n,
flen)
1097 + poly_multieval_unscaled_precomputesize(1,n)
1098 + poly_multieval_unscaled_precomputesize(1,n);
1101 poly_pseudoremainder_precomputesize(n,
flen)
1102 + poly_multieval_unscaled_precomputesize(2,n)
1103 + poly_multieval_unscaled_precomputesize(1,n);
1105 poly_pseudoremainder_precomputesize(n,
flen)
1106 + poly_multieval_unscaled_precomputesize(
m,n)
1107 + poly_multieval_unscaled_precomputesize(n-
m,n);
1110static void poly_multieval_unscaled_precompute(
fp *
precomp,
long long n,
long long flen,
const fp *
P,
const fp *T)
1116 fp_neg2(&
Pp.x,&
P[0]);
1126 precomp += poly_multieval_unscaled_precomputesize(
m,
flen);
1127 poly_multieval_unscaled_precompute(
precomp,n-
m,
flen,
P+2*
m,T+left);
1132 poly_pseudoremainder_precompute(
precomp,n,
flen,T);
1133 precomp += poly_pseudoremainder_precomputesize(n,
flen);
1134 poly_multieval_unscaled_precompute(
precomp,1,n,
P,0);
1135 precomp += poly_multieval_unscaled_precomputesize(1,n);
1136 poly_multieval_unscaled_precompute(
precomp,1,n,
P+2,0);
1141 poly_pseudoremainder_precompute(
precomp,n,
flen,T+3);
1142 precomp += poly_pseudoremainder_precomputesize(n,
flen);
1143 poly_multieval_unscaled_precompute(
precomp,2,n,
P,T);
1144 precomp += poly_multieval_unscaled_precomputesize(2,n);
1145 poly_multieval_unscaled_precompute(
precomp,1,n,
P+4,0);
1150 poly_pseudoremainder_precompute(
precomp,n,
flen,T+left+right);
1151 precomp += poly_pseudoremainder_precomputesize(n,
flen);
1152 poly_multieval_unscaled_precompute(
precomp,
m,n,
P,T);
1153 precomp += poly_multieval_unscaled_precomputesize(
m,n);
1154 poly_multieval_unscaled_precompute(
precomp,n-
m,n,
P+2*
m,T+left);
1157static void poly_multieval_unscaled_postcompute(
fp *
v,
long long n,
const fp *f,
long long flen,
const fp *
P,
const fp *T,
const fp *
precomp)
1163 fp_neg2(&
Pp.x,&
P[0]);
1175 precomp += poly_multieval_unscaled_precomputesize(
m,
flen);
1183 poly_pseudoremainder_postcompute(
g,n,f,
flen,T,
precomp);
1184 precomp += poly_pseudoremainder_precomputesize(n,
flen);
1185 poly_multieval_unscaled_postcompute(
v,1,(
const fp*)
g,n,
P,0,
precomp);
1186 precomp += poly_multieval_unscaled_precomputesize(1,n);
1187 poly_multieval_unscaled_postcompute(
v+1,1,(
const fp*)
g,n,
P+2,0,
precomp);
1192 poly_pseudoremainder_postcompute(
g,n,f,
flen,T+3,
precomp);
1193 precomp += poly_pseudoremainder_precomputesize(n,
flen);
1194 poly_multieval_unscaled_postcompute(
v,2,(
const fp*)
g,n,
P,T,
precomp);
1195 precomp += poly_multieval_unscaled_precomputesize(2,n);
1196 poly_multieval_unscaled_postcompute(
v+2,1,(
const fp*)
g,n,
P+4,0,
precomp);
1201 poly_pseudoremainder_postcompute(
g,n,f,
flen,T+left+right,
precomp);
1202 precomp += poly_pseudoremainder_precomputesize(n,
flen);
1203 poly_multieval_unscaled_postcompute(
v,
m,(
const fp*)
g,n,
P,T,
precomp);
1204 precomp += poly_multieval_unscaled_precomputesize(
m,n);
1205 poly_multieval_unscaled_postcompute(
v+
m,n-
m,(
const fp*)
g,n,
P+2*
m,T+left,
precomp);
1221static void poly_multieval_scaled(
fp *
v,
long long n,
const fp *
r,
const fp *
P,
const fp *T)
1231 poly_multieval_scaled(
v,1,(
const fp*)
g,
P,0);
1233 poly_multieval_scaled(
v+1,1,(
const fp*)
g,
P+2,0);
1239 poly_multieval_scaled(
v,2,(
const fp*)
g,
P,T);
1241 poly_multieval_scaled(
v+2,1,(
const fp*)
g,
P+4,0);
1251 poly_multieval_scaled(
v,
m,(
const fp*)
g,
P,T);
1253 poly_multieval_scaled(
v+
m,n-
m,(
const fp*)
g,
P+2*
m,T+left);
1256static long long poly_multieval_chooseunscaled(
long long n,
long long flen)
1259 if (n <= 1)
return 1;
1260 if (
flen <= 1)
return 1;
1266 if (poly_multieval_chooseunscaled(n,
flen))
1267 return poly_multieval_unscaled_precomputesize(n,
flen);
1274 if (poly_multieval_chooseunscaled(n,
flen)) {
1275 poly_multieval_unscaled_precompute(
precomp,n,
flen,
P,T);
1288 if (poly_multieval_chooseunscaled(n,
flen)) {
1289 poly_multieval_unscaled_postcompute(
v,n,f,
flen,
P,T,
precomp);
1301 for(
long long i = 0;
i < n;++
i)
1313 poly_multieval_scaled(
v,n,(
const fp*)
frootinv,(
const fp*)
P,(
const fp*) T);
1339 for (
long long i = 0;
i <= 2*n;++
i)
fp_copy(T[
i], X[
i]);
1356 for (
long long i = 0;
i <= 2*n;++
i)
fp_copy(T[
i], X[
i]);
uint64_t fp[NUMBER_OF_WORDS]
#define UNCONST(type, var)
#define poly_multieval_postcompute
#define poly_mul_selfreciprocal
#define poly_multiprod2_selfreciprocal
#define poly_multieval_precomputesize
#define poly_multieval_precompute