8#define UNCONST(type, var) (*(type*)&(var))
18 for (
long long i = 0;
i < alen;++
i) {
19 fp_mul3(&c[
i],&
a[
i],&b[0]);
27 fp_mul3(&c[0],&
a[0],&b[0]);
28 fp_mul3(&c[2],&
a[1],&b[1]);
30 fp a01; fp_add3(&a01,&
a[0],&
a[1]);
31 fp b01; fp_add3(&b01,&b[0],&b[1]);
32 fp_mul3(&c[1],(
const fp*) &a01,(
const fp*) &b01);
33 fp_sub2(&c[1],(
const fp*) &c[0]);
34 fp_sub2(&c[1],(
const fp*) &c[2]);
47 fp_mul3(&c[0],&
a[0],&b[0]);
48 fp_mul3(&c[2],&
a[1],&b[1]);
49 fp b01; fp_add3(&b01,&b[0],&b[1]);
50 fp a01; fp_add3(&a01,&
a[0],&
a[1]);
51 fp_mul3(&c[1],(
const fp*) &a01,(
const fp*) &b01);
52 fp_sub2(&c[1],(
const fp*) &c[0]);
53 fp_sub2(&c[1],(
const fp*) &c[2]);
54 fp_mul3(&c[3],&
a[2],&b[1]);
55 fp a2b0; fp_mul3(&a2b0,&
a[2],&b[0]);
56 fp_add2(&c[2],(
const fp*) &a2b0);
60 fp_mul3(&c[0],&
a[0],&b[0]);
61 fp_mul3(&c[2],&
a[1],&b[1]);
62 fp b01; fp_add3(&b01,&b[0],&b[1]);
63 fp a01; fp_add3(&a01,&
a[0],&
a[1]);
64 fp_mul3(&c[1],(
const fp*) &a01,(
const fp*) &b01);
65 fp_sub2(&c[1],(
const fp*) &c[0]);
66 fp_sub2(&c[1],(
const fp*) &c[2]);
69 fp_mul3(&mid,&
a[2],&b[0]);
70 fp_mul3(&c[4],&
a[3],&b[1]);
71 fp a23; fp_add3(&a23,&
a[2],&
a[3]);
72 fp_mul3(&c[3],(
const fp*) &a23,(
const fp*) &b01);
73 fp_sub2(&c[3],(
const fp*) &mid);
74 fp_sub2(&c[3],(
const fp*) &c[4]);
75 fp_add2(&c[2],(
const fp*) &mid);
93 fp a10; fp_sub3(&a10,&
a[1],&
a[0]);
94 fp b01; fp_sub3(&b01,&b[0],&b[1]);
95 fp_mul3(&c[1],(
const fp*) &a10,(
const fp*) &b01);
96 fp a20; fp_sub3(&a20,&
a[2],&
a[0]);
97 fp b02; fp_sub3(&b02,&b[0],&b[2]);
98 fp_mul3(&c[2],(
const fp*) &a20,(
const fp*) &b02);
99 fp a21; fp_sub3(&a21,&
a[2],&
a[1]);
100 fp b12; fp_sub3(&b12,&b[1],&b[2]);
101 fp_mul3(&c[3],(
const fp*) &a21,(
const fp*) &b12);
102 fp_mul3(&c[0],&
a[0],&b[0]);
103 fp_mul3(&c[4],&
a[2],&b[2]);
104 fp a1b1; fp_mul3(&a1b1,&
a[1],&b[1]);
105 fp t; fp_add3(&t,(
const fp*) &a1b1,(
const fp*) &c[0]);
106 fp_add2(&c[1],(
const fp*) &t);
107 fp_add3(&t,(
const fp*) &a1b1,(
const fp*) &c[4]);
108 fp_add2(&c[3],(
const fp*) &t);
109 fp_add2(&t,(
const fp*) &c[0]);
110 fp_add2(&c[2],(
const fp*) &t);
113 fp_mul3(&t,&
a[3],&b[0]); fp_add2(&c[3],(
const fp*) &t);
114 fp_mul3(&t,&
a[3],&b[1]); fp_add2(&c[4],(
const fp*) &t);
115 fp_mul3(&c[5],&
a[3],&b[2]);
126 fp_mul3(&t,&
a[4],&b[0]); fp_add2(&c[4],(
const fp*) &t);
127 fp_mul3(&t,&
a[4],&b[1]); fp_add2(&c[5],(
const fp*) &t);
128 fp_mul3(&c[6],&
a[4],&b[2]);
134 long long kara = (alen+1)/2;
135 long long a1len = alen-kara;
141 for (
long long i = 0;
i < blen-1;++
i)
142 fp_add2(&c[
i+kara],(
const fp*) &c1[
i]);
143 for (
long long i = blen-1;
i < a1len+blen-1;++
i)
149 long long b1len = blen-kara;
154 for (
long long i = 0;
i < a1len;++
i)
155 fp_add3(&a01[
i],&
a[
i],&
a[
i+kara]);
156 for (
long long i = a1len;
i < kara;++
i)
160 for (
long long i = 0;
i < b1len;++
i)
161 fp_add3(&b01[
i],&b[
i],&b[
i+kara]);
162 for (
long long i = b1len;
i < kara;++
i)
166 long long c1len = a1len+b1len-1;
170 poly_mul(c01,(
const fp*) a01,kara,(
const fp*) b01,kara);
176 fp_sub3(&c[kara+kara-1],(
const fp*) &c01[kara-1],(
const fp*) &c[kara-1]);
177 for (
long long i = 0;
i < c1len;++
i) {
178 fp_sub3(&mix,(
const fp*) &c[kara+
i],(
const fp*) &c1[
i]);
179 fp_sub3(&c[
i+2*kara],(
const fp*) &c01[
i+kara],(
const fp*) &mix);
180 fp_sub3(&c[
i+kara],(
const fp*) &mix,(
const fp*) &c[
i]);
181 fp_add2(&c[
i+kara],(
const fp*) &c01[
i]);
183 for (
long long i = c1len;
i < kara-1;++
i) {
184 fp_sub3(&c[
i+2*kara],(
const fp*) &c01[
i+kara],(
const fp*) &c[
i+kara]);
185 fp_sub2(&c[
i+kara],(
const fp*) &c[
i]);
186 fp_add2(&c[
i+kara],(
const fp*) &c01[
i]);
191 for (
long long i = 0;
i < c1len-kara;++
i) {
192 fp_sub3(&mix,(
const fp*) &c[kara+
i],(
const fp*) &c1[
i]);
193 fp_sub3(&c[
i+kara],(
const fp*) &mix,(
const fp*) &c[
i]);
194 fp_add2(&c[
i+kara],(
const fp*) &c01[
i]);
195 fp_sub3(&c[
i+2*kara],(
const fp*) &c01[
i+kara],(
const fp*) &mix);
196 fp_sub2(&c[
i+2*kara],(
const fp*) &c1[
i+kara]);
198 for (
long long i = c1len-kara;
i < kara-1;++
i) {
199 fp_sub3(&mix,(
const fp*) &c[kara+
i],(
const fp*) &c1[
i]);
200 fp_sub3(&c[
i+kara],(
const fp*) &mix,(
const fp*) &c[
i]);
201 fp_add2(&c[
i+kara],(
const fp*) &c01[
i]);
202 fp_sub3(&c[
i+2*kara],(
const fp*) &c01[
i+kara],(
const fp*) &mix);
204 fp_sub3(&c[kara+kara-1],(
const fp*) &c01[kara-1],(
const fp*) &c[kara-1]);
205 fp_sub2(&c[kara+kara-1],(
const fp*) &c1[kara-1]);
206 for (
long long i = kara-1;
i < c1len;++
i)
219 if (clen == alen+blen-1) {
223 if (clen*4 >= 3*(alen+blen-1)) {
226 for (
long long i = 0;
i < clen;++
i)
232 const fp *t =
a;
long long tlen = alen;
236 if (alen > clen) alen = clen;
237 if (blen > clen) blen = clen;
240 for (
long long i = 0;
i < clen;++
i)
241 fp_mul3(&c[
i],&
a[
i],&b[0]);
248 assert(clen <= alen+blen-2);
251 fp_mul3(&c[0],&
a[0],&b[0]);
252 fp_mul3(&c[1],&
a[0],&b[1]);
253 fp t; fp_mul3(&t,&
a[1],&b[0]);
254 fp_add2(&c[1],(
const fp*) &t);
260 fp_mul3(&c[0],&
a[0],&b[0]);
261 fp_mul3(&c[2],&
a[1],&b[1]);
262 fp b01; fp_add3(&b01,&b[0],&b[1]);
263 fp a01; fp_add3(&a01,&
a[0],&
a[1]);
264 fp_mul3(&c[1],(
const fp*) &a01,(
const fp*) &b01);
265 fp_sub2(&c[1],(
const fp*) &c[0]);
266 fp_sub2(&c[1],(
const fp*) &c[2]);
267 fp a2b0; fp_mul3(&a2b0,&
a[2],&b[0]);
268 fp_add2(&c[2],(
const fp*) &a2b0);
274 long long a1len = alen/2;
275 long long a0len = alen-a1len;
282 long long b1len = blen/2;
283 long long b0len = blen-b1len;
286 for (
long long i = 0;
i < b0len;++
i)
fp_copy(b0[
i], b[2*
i]);
287 for (
long long i = 0;
i < b1len;++
i)
fp_copy(b1[
i], b[2*
i+1]);
291 for (
long long i = 0;
i < a1len;++
i) fp_add3(&a01[
i],(
const fp*) &a0[
i],(
const fp*) &
a1[
i]);
292 if (a1len < a0len)
fp_copy(a01[a1len], a0[a1len]);
295 for (
long long i = 0;
i < b1len;++
i) fp_add3(&b01[
i],(
const fp*) &b0[
i],(
const fp*) &b1[
i]);
296 if (b1len < b0len)
fp_copy(b01[b1len], b0[b1len]);
298 long long c0len = a0len+b0len-1;
299 if (c0len > (clen+1)/2) c0len = (clen+1)/2;
304 long long c01len = a0len+b0len-1;
305 if (c01len > clen/2) c01len = clen/2;
310 long long c1len = a1len+b1len-1;
311 if (c1len > clen/2) c1len = clen/2;
319 for (
long long i = 0;
i < c01len;++
i)
320 fp_sub2(&c01[
i],(
const fp*) &c0[
i]);
323 for (
long long i = 0;
i < c1len;++
i)
324 fp_sub2(&c01[
i],(
const fp*) &c1[
i]);
328 assert(2*(c0len-1) < clen);
330 assert(2*(c01len-1)+1 < clen);
331 assert(2*c01len+1 >= clen);
333 for (
long long i = 0;
i < c0len;++
i)
fp_copy(c[2*
i], c0[
i]);
334 for (
long long i = 0;
i < c01len;++
i)
fp_copy(c[2*
i+1], c01[
i]);
335 for (
long long i = 0;
i < c1len-1;++
i) fp_add2(&c[2*
i+2],(
const fp*) &c1[
i]);
336 if (2*c1len < clen) fp_add2(&c[2*c1len],(
const fp*) &c1[c1len-1]);
344 long long split = (alen+1)/2;
346 if (split+split < clen) {
349 for (
long long i = 0;
i < clen;++
i)
354 long long a1len = alen-split;
357 assert(split+blen-1 <= clen);
358 assert(a1len+blen-1 >= clen-split);
362 for (
long long i = 0;
i < blen-1;++
i)
363 fp_add2(&c[
i+split],(
const fp*) &c1[
i]);
364 for (
long long i = blen-1;
i+split < clen;++
i)
369 assert(split+split >= clen);
372 if (clen < split+split)
375 assert(clen == split+split);
386 for (
long long i = 0;
i < clen-split;++
i) {
387 fp_add2(&c[
i+split],(
const fp*) &c01[
i]);
388 fp_add2(&c[
i+split],(
const fp*) &c10[
i]);
399 if (alen == 1 && blen == 1) {
400 fp_mul3(&c[0],&
a[0],&b[0]);
404 if (alen == 2 && blen == 2) {
405 fp_mul3(&c[0],&
a[0],&b[0]);
406 fp_add3(&c[1],(
const fp*) &c[0],(
const fp*) &c[0]);
411 if (alen == 3 && blen == 3) {
412 fp_mul3(&c[0],&
a[0],&b[0]);
413 fp_mul3(&c[2],&
a[1],&b[1]);
414 fp a01; fp_add3(&a01,&
a[0],&
a[1]);
415 fp b01; fp_add3(&b01,&b[0],&b[1]);
416 fp_mul3(&c[1],(
const fp*) &a01,(
const fp*) &b01);
417 fp_add2(&c[2],(
const fp*) &c[0]);
418 fp_sub2(&c[1],(
const fp*) &c[2]);
419 fp_add2(&c[2],(
const fp*) &c[0]);
425 if (alen == 4 && blen == 4) {
426 fp_mul3(&c[0],&
a[0],&b[0]);
427 fp_mul3(&c[3],&
a[1],&b[1]);
428 fp a01; fp_add3(&a01,&
a[0],&
a[1]);
429 fp b01; fp_add3(&b01,&b[0],&b[1]);
430 fp_mul3(&c[2],(
const fp*) &a01,(
const fp*) &b01);
431 fp_sub2(&c[2],(
const fp*) &c[0]);
432 fp_sub3(&c[1],(
const fp*) &c[2],(
const fp*) &c[3]);
433 fp_add2(&c[3],(
const fp*) &c[0]);
434 fp_add2(&c[3],(
const fp*) &c[3]);
441 if (alen == 5 && blen == 5) {
443 fp a10; fp_sub3(&a10,&
a[1],&
a[0]);
444 fp b01; fp_sub3(&b01,&b[0],&b[1]);
445 fp_mul3(&c[1],(
const fp*) &a10,(
const fp*) &b01);
446 fp a20; fp_sub3(&a20,&
a[2],&
a[0]);
447 fp b02; fp_sub3(&b02,&b[0],&b[2]);
448 fp_mul3(&c[2],(
const fp*) &a20,(
const fp*) &b02);
449 fp a21; fp_sub3(&a21,(
const fp*) &
a[2],(
const fp*) &
a[1]);
450 fp b12; fp_sub3(&b12,(
const fp*) &b[1],(
const fp*) &b[2]);
451 fp_mul3(&c[3],(
const fp*) &a21,(
const fp*) &b12);
452 fp_mul3(&c[0],&
a[0],&b[0]);
453 fp a1b1; fp_mul3(&a1b1,(
const fp*) &
a[1],(
const fp*) &b[1]);
454 fp a2b2; fp_mul3(&a2b2,(
const fp*) &
a[2],(
const fp*) &b[2]);
456 fp t; fp_add3(&t,(
const fp*) &a1b1,(
const fp*) &c[0]);
457 fp_add2(&c[1],(
const fp*) &t);
458 fp_add2(&c[3],(
const fp*) &c[1]);
459 fp_add3(&c[4],(
const fp*) &t,(
const fp*) &a2b2);
460 fp_add2(&c[4],(
const fp*) &t);
461 fp_add2(&c[2],(
const fp*) &t);
462 fp_add2(&c[2],(
const fp*) &a2b2);
463 fp_add2(&c[3],(
const fp*) &a1b1);
464 fp_add2(&c[3],(
const fp*) &a2b2);
472 if (alen == blen && (alen&1)) {
473 long long len0 = (alen+1)/2;
474 long long len1 = alen/2;
483 assert(2*(len0-1) < alen);
484 assert(2*(len1-1)+1 < alen);
488 for (
long long i = 0;
i < len0;++
i)
fp_copy(b0[
i], b[2*
i]);
492 for (
long long i = 0;
i < len1;++
i)
fp_copy(b1[
i], b[2*
i+1]);
495 for (
long long i = 0;
i < len1;++
i) fp_add2(&a0[
i],(
const fp*) &
a1[
i]);
496 for (
long long i = 0;
i < len1;++
i) fp_add2(&b0[
i],(
const fp*) &b1[
i]);
497 for (
long long i = 0;
i < len1;++
i) fp_add2(&a0[
i+1],(
const fp*) &
a1[
i]);
498 for (
long long i = 0;
i < len1;++
i) fp_add2(&b0[
i+1],(
const fp*) &b1[
i]);
501 for (
long long i = 0;
i < len0+len0-1;++
i)
502 fp_sub2(&c01[
i],(
const fp*) &c0[
i]);
503 for (
long long i = 0;
i < len1+len1-1;++
i)
504 fp_sub2(&c01[
i],(
const fp*) &c1[
i]);
505 for (
long long i = 0;
i < len1+len1-1;++
i)
506 fp_sub2(&c01[
i+1],(
const fp*) &c1[
i]);
507 for (
long long i = 0;
i < len1+len1-1;++
i)
508 fp_sub2(&c01[
i+1],(
const fp*) &c1[
i]);
509 for (
long long i = 0;
i < len1+len1-1;++
i)
510 fp_sub2(&c01[
i+2],(
const fp*) &c1[
i]);
512 for (
long long i = 1;
i < len0+len0-1;++
i)
513 fp_sub2(&c01[
i],(
const fp*) &c01[
i-1]);
515 long long clen = alen+blen-1;
516 assert(2*(len0+len0-2) < clen);
517 assert(2*(len0+len0-3)+1 < clen);
518 assert(2*(len1+len1-2)+2 < clen);
520 for (
long long i = 0;
i < len0+len0-1;++
i)
fp_copy(c[2*
i], c0[
i]);
521 for (
long long i = 0;
i < len0+len0-2;++
i)
fp_copy(c[2*
i+1], c01[
i]);
522 for (
long long i = 0;
i < len1+len1-1;++
i) fp_add2(&c[2*
i+2],(
const fp*) &c1[
i]);
526 if (alen == blen && !(alen&1)) {
527 long long half = alen/2;
535 for (
long long i = 0;
i < alen-1;++
i)
536 fp_add2(&c[
i],(
const fp*) &c0[
i]);
537 for (
long long i = 0;
i < alen-1;++
i)
538 fp_add2(&c[alen+alen-2-
i],(
const fp*) &c0[
i]);
539 for (
long long i = 0;
i < alen-1;++
i)
540 fp_add2(&c[half+
i],(
const fp*) &c1[
i]);
541 for (
long long i = 0;
i < alen-1;++
i)
542 fp_add2(&c[alen+half-2-
i],(
const fp*) &c1[
i]);
546 long long clen = alen+blen-1;
548 for (
long long i = (clen+1)/2;
i < clen;++
i)
558 if (blen <= 0)
return;
561 assert(cstart <= alen+blen-1);
563 if (cstart == alen+blen-1)
return;
569 if (cstart == alen+blen-2) {
570 fp_mul3(&c[0],&
a[alen-1],&b[blen-1]);
574 for (
long long i = cstart;
i < alen+blen-1;++
i)
575 fp_mul3(&c[
i-cstart],&
a[
i],&b[0]);
578 if (cstart == alen+blen-3) {
579 fp_mul3(&c[0],&
a[alen-2],&b[blen-1]);
581 fp_mul3(&t,&
a[alen-1],&b[blen-2]);
582 fp_add2(&c[0],(
const fp*) &t);
583 fp_mul3(&c[1],&
a[alen-1],&b[blen-1]);
589 for (
long long i = 0;
i < alen;++
i)
591 for (
long long i = 0;
i < blen;++
i)
594 fp crev[alen+blen-1-cstart];
595 poly_mul_low(crev,alen+blen-1-cstart,(
const fp*) arev,alen,(
const fp*) brev,blen);
596 for (
long long i = cstart;
i < alen+blen-1;++
i)
597 fp_copy(c[
i-cstart], crev[alen+blen-2-
i]);
608void poly_mul_mid(
fp *c,
long long cstart,
long long clen,
const fp *
a,
long long alen,
const fp *b,
long long blen)
614 assert(cstart+clen <= alen+blen-1);
619 if (alen > cstart) alen = cstart+1;
621 if (blen <= cstart)
i = cstart-blen+1;
622 for (;
i < alen;++
i) {
624 fp_mul3(&t,&
a[
i],&b[cstart-
i]);
625 fp_add2(&c[0],(
const fp*) &t);
631 for (
long long i = 0;
i < clen;++
i)
632 fp_mul3(&c[
i],&
a[cstart+
i],&b[0]);
636 if (cstart > 0 && cstart+clen <= alen && (blen & 1)) {
639 for (
long long i = 0;
i < clen;++
i) {
640 fp_mul3(&t,&
a[cstart+
i],&b[0]);
641 fp_add2(&c[
i],(
const fp*) &t);
651 if (cstart == 1 && alen >= 3 && blen == 2) {
657 fp_sub3(&delta,&b[0],&b[1]);
658 fp_mul2(&delta,&
a[1]);
659 fp_add3(&c[0],&
a[0],&
a[1]);
660 fp_mul2(&c[0],&b[1]);
661 fp_add2(&c[0],(
const fp*) &delta);
662 fp_add3(&c[1],&
a[1],&
a[2]);
663 fp_mul2(&c[1],&b[0]);
664 fp_sub2(&c[1],(
const fp*) &delta);
667 if (cstart == 3 && alen >= 5 && blen == 4) {
671 fp b01; fp_sub3(&b01,&b[0],&b[1]);
672 fp b23; fp_sub3(&b23,&b[2],&b[3]);
673 fp a1b23; fp_mul3(&a1b23,&
a[1],(
const fp*) &b23);
674 fp a3b01; fp_mul3(&a3b01,&
a[3],(
const fp*) &b01);
675 fp delta; fp_add3(&delta,(
const fp*) &a1b23,(
const fp*) &a3b01);
676 fp a01; fp_add3(&a01,&
a[0],&
a[1]);
677 fp a12; fp_add3(&a12,&
a[1],&
a[2]);
678 fp a23; fp_add3(&a23,&
a[2],&
a[3]);
679 fp a34; fp_add3(&a34,&
a[3],&
a[4]);
684 fp_add3(&c[0],(
const fp*) &a01,(
const fp*) &a23);
685 fp_add3(&c[1],(
const fp*) &a12,(
const fp*) &a34);
686 fp_add2(&c[0],(
const fp*) &delta);
687 fp_sub2(&c[1],(
const fp*) &delta);
692 if (clen == 3 && cstart == 1 && blen == 2 && alen >= 4) {
697 fp_sub3(&b01,&b[0],&b[1]);
699 fp_mul3(&delta0,&
a[1],(
const fp*) &b01);
701 fp_mul3(&delta1,&
a[2],(
const fp*) &b01);
702 fp_add3(&c[0],&
a[0],&
a[1]);
703 fp_add3(&c[1],&
a[1],&
a[2]);
704 fp_add3(&c[2],&
a[2],&
a[3]);
705 fp_mul2(&c[0],&b[1]);
706 fp_mul2(&c[1],&b[1]);
707 fp_mul2(&c[2],(
const fp*) &b[0]);
708 fp_add2(&c[0],(
const fp*) &delta0);
709 fp_add2(&c[1],(
const fp*) &delta1);
710 fp_sub2(&c[2],(
const fp*) &delta1);
714 if (clen == 4 && cstart == 5 && blen == 6 && alen >= 9) {
721 for (
long long i = 0;
i < 6;++
i) fp_add3(&a01[
i],&
a[
i],&
a[
i+3]);
724 for (
long long i = 0;
i < 3;++
i) fp_sub3(&b01[
i],&b[
i],&b[
i+3]);
732 fp_add2(&c[0],(
const fp*) &delta[0]);
733 fp_add2(&c[1],(
const fp*) &delta[1]);
734 fp_add2(&c[2],(
const fp*) &delta[2]);
735 fp_sub2(&c[3],(
const fp*) &delta[0]);
739 if (clen == 5 && cstart == 3 && blen == 4 && alen >= 8) {
747 fp_add3(&a01[0],&
a[0],&
a[2]);
748 fp_add3(&a01[1],&
a[1],&
a[3]);
749 fp_add3(&a01[2],&
a[2],&
a[4]);
750 fp_add3(&a01[3],&
a[3],&
a[5]);
751 fp_add3(&a01[4],&
a[4],&
a[6]);
752 fp_add3(&a01[5],&
a[5],&
a[7]);
755 fp_sub3(&b01[0],&b[0],&b[2]);
756 fp_sub3(&b01[1],&b[1],&b[3]);
764 fp_add2(&c[0],(
const fp*) &delta[0]);
765 fp_add2(&c[1],(
const fp*) &delta[1]);
766 fp_add2(&c[2],(
const fp*) &delta[2]);
767 fp_sub2(&c[3],(
const fp*) &delta[1]);
768 fp_sub2(&c[4],(
const fp*) &delta[2]);
772 if ((clen&1) && cstart == clen && blen == clen+1 && alen >= cstart+clen) {
773 long long split = (clen+1)/2;
774 assert(2*split == clen+1);
777 assert(3*split-2+split <= alen);
778 for (
long long i = 0;
i < 3*split-2;++
i) fp_add3(&a01[
i],&
a[
i],&
a[
i+split]);
782 for (
long long i = 0;
i < split;++
i) fp_sub3(&b01[
i],&b[
i],&b[
i+split]);
784 assert(clen <= 3*split-2);
787 assert(clen-1+split <= 3*split-2);
788 poly_mul_mid(c+split,split-1,split-1,(
const fp*) a01+split,clen-1,b,split);
793 for (
long long i = 0;
i < split;++
i) fp_add2(&c[
i],(
const fp*) &delta[
i]);
794 for (
long long i = 0;
i < split-1;++
i) fp_sub2(&c[
i+split],(
const fp*) &delta[
i]);
798 if (!(clen&1) && cstart == clen-1 && blen == clen && alen >= cstart+clen) {
800 long long split = clen/2;
803 for (
long long i = 0;
i < 3*split-1;++
i) fp_add3(&a01[
i],&
a[
i],&
a[
i+split]);
806 for (
long long i = 0;
i < split;++
i) fp_sub3(&b01[
i],&b[
i],&b[
i+split]);
808 poly_mul_mid(c,split-1,split,(
const fp*) a01,2*split-1,b+split,split);
809 poly_mul_mid(c+split,split-1,split,(
const fp*) a01+split,2*split-1,b,split);
812 poly_mul_mid(delta,split-1,split,
a+split,2*split-1,(
const fp*) b01,split);
814 for (
long long i = 0;
i < split;++
i) {
815 fp_add2(&c[
i],(
const fp*) &delta[
i]);
816 fp_sub2(&c[split+
i],(
const fp*) &delta[
i]);
821 if (cstart+cstart+clen < alen+blen) {
824 for (
long long i = 0;
i < clen;++
i)
829 fp ab[alen+blen-1-cstart];
831 for (
long long i = 0;
i < clen;++
i)
838 if (n <= 1)
return 0;
839 if (n == 2)
return 3;
840 if (n == 3)
return 7;
845 return left+right+n+1;
854 if (n <= 1)
return 0;
869 long long right =
poly_tree1(T+left,P+2*m,n-m);
870 poly_mul(T+left+right,(
const fp*) T+left-(m+1),m+1,(
const fp*) T+left+right-(n-m+1),n-m+1);
871 return left+right+n+1;
874static long long poly_eval_precomputesize(
long long flen)
876 if (flen <= 2)
return 0;
880static void poly_eval_precompute(
fp *precomp,
long long flen,
const proj *P)
882 if (flen <= 2)
return;
889 for (
long long i = 2;
i < flen;++
i) {
890 fp_mul3(&pxpow[
i],(
const fp*) &pxpow[
i-1],&P->
x);
891 fp_mul3(&pzpow[
i],(
const fp*) &pzpow[
i-1],&P->
z);
894 fp_copy(precomp[0], pzpow[flen-1]);
895 fp_copy(precomp[flen-1], pxpow[flen-1]);
896 for (
long long i = 1;
i < flen-1;++
i)
897 fp_mul3(&precomp[
i],(
const fp*) &pxpow[
i],(
const fp*) &pzpow[flen-1-
i]);
907static void poly_eval_postcompute(
fp *v,
const fp *f,
long long flen,
const proj *P,
const fp *precomp)
916 fp_mul3(v,&f[0],&P->
z);
917 fp_mul3(&tmp,&f[1],&P->
x);
918 fp_add2(v,(
const fp*) &tmp);
922 fp_mul3(v,&f[0],&precomp[0]);
924 for (
long long i = 1;
i < flen;++
i) {
926 fp_mul3(&tmp,&f[
i],&precomp[
i]);
927 fp_add2(v,(
const fp*) &tmp);
945static void poly_pseudoreciprocal(
fp *d,
fp *r,
long long rlen,
const fp *m,
long long mdeg)
961 fp_neg2(&r[0],&m[mdeg-1]);
977 long long top = (rlen+1)/2;
978 long long bot = rlen-top;
980 poly_pseudoreciprocal(d,s,top,m,mdeg);
1001 for (
long long i = 0;
i < bot;++
i)
1002 fp_neg2(&r[
i],(
const fp*) &epss[
i]);
1003 for (
long long i = 0;
i < top;++
i)
1004 fp_mul3(&r[
i+bot],(
const fp*) &s[
i],(
const fp*) d);
1009static long long poly_pseudoremainder_precomputesize(
long long glen,
long long flen)
1012 if (flen == glen)
return 0;
1013 long long vlen = flen-glen;
1017static void poly_pseudoremainder_precompute(
fp *precomp,
long long glen,
long long flen,
const fp *m)
1020 if (flen == glen)
return;
1021 long long vlen = flen-glen;
1024 poly_pseudoreciprocal(d,v,vlen,m,glen);
1027static void poly_pseudoremainder_postcompute(
fp *g,
long long glen,
const fp *f,
long long flen,
const fp *m,
const fp *precomp)
1032 for (
long long i = 0;
i < glen;++
i)
1045 long long vlen = flen-glen;
1046 const fp *d = precomp;
1047 const fp *v = precomp+1;
1061 for (
long long i = 0;
i < glen;++
i) {
1062 fp_mul3(&g[
i],&f[
i],d);
1063 fp_sub2(&g[
i], (
const fp*) &qm[
i]);
1084static long long poly_multieval_unscaled_precomputesize(
long long n,
long long flen)
1086 if (n <= 0)
return 0;
1088 return poly_eval_precomputesize(flen);
1092 poly_multieval_unscaled_precomputesize(m,flen)
1093 + poly_multieval_unscaled_precomputesize(n-m,flen);
1096 poly_pseudoremainder_precomputesize(n,flen)
1097 + poly_multieval_unscaled_precomputesize(1,n)
1098 + poly_multieval_unscaled_precomputesize(1,n);
1101 poly_pseudoremainder_precomputesize(n,flen)
1102 + poly_multieval_unscaled_precomputesize(2,n)
1103 + poly_multieval_unscaled_precomputesize(1,n);
1105 poly_pseudoremainder_precomputesize(n,flen)
1106 + poly_multieval_unscaled_precomputesize(m,n)
1107 + poly_multieval_unscaled_precomputesize(n-m,n);
1110static void poly_multieval_unscaled_precompute(
fp *precomp,
long long n,
long long flen,
const fp *P,
const fp *T)
1116 fp_neg2(&Pp.
x,&P[0]);
1118 poly_eval_precompute(precomp,flen,&Pp);
1125 poly_multieval_unscaled_precompute(precomp,m,flen,P,T);
1126 precomp += poly_multieval_unscaled_precomputesize(m,flen);
1127 poly_multieval_unscaled_precompute(precomp,n-m,flen,P+2*m,T+left);
1132 poly_pseudoremainder_precompute(precomp,n,flen,T);
1133 precomp += poly_pseudoremainder_precomputesize(n,flen);
1134 poly_multieval_unscaled_precompute(precomp,1,n,P,0);
1135 precomp += poly_multieval_unscaled_precomputesize(1,n);
1136 poly_multieval_unscaled_precompute(precomp,1,n,P+2,0);
1141 poly_pseudoremainder_precompute(precomp,n,flen,T+3);
1142 precomp += poly_pseudoremainder_precomputesize(n,flen);
1143 poly_multieval_unscaled_precompute(precomp,2,n,P,T);
1144 precomp += poly_multieval_unscaled_precomputesize(2,n);
1145 poly_multieval_unscaled_precompute(precomp,1,n,P+4,0);
1150 poly_pseudoremainder_precompute(precomp,n,flen,T+left+right);
1151 precomp += poly_pseudoremainder_precomputesize(n,flen);
1152 poly_multieval_unscaled_precompute(precomp,m,n,P,T);
1153 precomp += poly_multieval_unscaled_precomputesize(m,n);
1154 poly_multieval_unscaled_precompute(precomp,n-m,n,P+2*m,T+left);
1157static void poly_multieval_unscaled_postcompute(
fp *v,
long long n,
const fp *f,
long long flen,
const fp *P,
const fp *T,
const fp *precomp)
1163 fp_neg2(&Pp.
x,&P[0]);
1165 poly_eval_postcompute(v,f,flen,&Pp,precomp);
1174 poly_multieval_unscaled_postcompute(v,m,f,flen,P,T,precomp);
1175 precomp += poly_multieval_unscaled_precomputesize(m,flen);
1176 poly_multieval_unscaled_postcompute(v+m,n-m,f,flen,P+2*m,T+left,precomp);
1183 poly_pseudoremainder_postcompute(g,n,f,flen,T,precomp);
1184 precomp += poly_pseudoremainder_precomputesize(n,flen);
1185 poly_multieval_unscaled_postcompute(v,1,(
const fp*) g,n,P,0,precomp);
1186 precomp += poly_multieval_unscaled_precomputesize(1,n);
1187 poly_multieval_unscaled_postcompute(v+1,1,(
const fp*) g,n,P+2,0,precomp);
1192 poly_pseudoremainder_postcompute(g,n,f,flen,T+3,precomp);
1193 precomp += poly_pseudoremainder_precomputesize(n,flen);
1194 poly_multieval_unscaled_postcompute(v,2,(
const fp*) g,n,P,T,precomp);
1195 precomp += poly_multieval_unscaled_precomputesize(2,n);
1196 poly_multieval_unscaled_postcompute(v+2,1,(
const fp*) g,n,P+4,0,precomp);
1201 poly_pseudoremainder_postcompute(g,n,f,flen,T+left+right,precomp);
1202 precomp += poly_pseudoremainder_precomputesize(n,flen);
1203 poly_multieval_unscaled_postcompute(v,m,(
const fp*) g,n,P,T,precomp);
1204 precomp += poly_multieval_unscaled_precomputesize(m,n);
1205 poly_multieval_unscaled_postcompute(v+m,n-m,(
const fp*) g,n,P+2*m,T+left,precomp);
1221static void poly_multieval_scaled(
fp *v,
long long n,
const fp *r,
const fp *P,
const fp *T)
1231 poly_multieval_scaled(v,1,(
const fp*) g,P,0);
1233 poly_multieval_scaled(v+1,1,(
const fp*) g,P+2,0);
1239 poly_multieval_scaled(v,2,(
const fp*) g,P,T);
1241 poly_multieval_scaled(v+2,1,(
const fp*) g,P+4,0);
1251 poly_multieval_scaled(v,m,(
const fp*) g,P,T);
1253 poly_multieval_scaled(v+m,n-m,(
const fp*) g,P+2*m,T+left);
1256static long long poly_multieval_chooseunscaled(
long long n,
long long flen)
1259 if (n <= 1)
return 1;
1260 if (flen <= 1)
return 1;
1266 if (poly_multieval_chooseunscaled(n,flen))
1267 return poly_multieval_unscaled_precomputesize(n,flen);
1268 if (flen < n) flen = n;
1274 if (poly_multieval_chooseunscaled(n,flen)) {
1275 poly_multieval_unscaled_precompute(precomp,n,flen,P,T);
1278 if (flen < n) flen = n;
1283 poly_pseudoreciprocal(&denom,precomp,flen,T+left+right,n);
1288 if (poly_multieval_chooseunscaled(n,flen)) {
1289 poly_multieval_unscaled_postcompute(v,n,f,flen,P,T,precomp);
1298 for (
long long i = 0;
i < flen;++
i)
fp_copy(fcopy[
i], f[
i]);
1301 for(
long long i = 0;
i < n;++
i)
1306 const fp *rootinv = precomp;
1313 poly_multieval_scaled(v,n,(
const fp*) frootinv,(
const fp*) P,(
const fp*) T);
1320 fp precomp[precompsize];
1337 poly_mul(X,(
const fp*) T,2*m+1,(
const fp*) T+3*m,2*(n-m)+1);
1339 for (
long long i = 0;
i <= 2*n;++
i)
fp_copy(T[
i], X[
i]);
1356 for (
long long i = 0;
i <= 2*n;++
i)
fp_copy(T[
i], X[
i]);
uint64_t fp[NUMBER_OF_WORDS]
#define UNCONST(type, var)
#define poly_multieval_postcompute
#define poly_mul_selfreciprocal
#define poly_multiprod2_selfreciprocal
#define poly_multieval_precomputesize
#define poly_multieval_precompute