Let us walk on the 3-isogeny graph
Loading...
Searching...
No Matches
poly.c
Go to the documentation of this file.
1#include <stdio.h>
2#include <string.h>
3#include <assert.h>
4
5#include "mont.h"
6#include "poly.h"
7
8#define UNCONST(type, var) (*(type*)&(var))
9
10void poly_mul(fp *c,const fp *a,long long alen,const fp *b,long long blen)
11{
12 if (alen < blen) {
13 poly_mul(c,b,blen,a,alen);
14 return;
15 }
16 if (!blen) return;
17 if (blen == 1) {
18 for (long long i = 0;i < alen;++i) {
19 fp_mul3(&c[i],&a[i],&b[0]);
20 }
21 return;
22 }
23
24 /* now alen >= blen >= 2 */
25
26 if (alen == 2) {
27 fp_mul3(&c[0],&a[0],&b[0]);
28 fp_mul3(&c[2],&a[1],&b[1]);
29
30 fp a01; fp_add3(&a01,&a[0],&a[1]);
31 fp b01; fp_add3(&b01,&b[0],&b[1]);
32 fp_mul3(&c[1],(const fp*) &a01,(const fp*) &b01);
33 fp_sub2(&c[1],(const fp*) &c[0]);
34 fp_sub2(&c[1],(const fp*) &c[2]);
35
36 /*
37 fp_mul3(&c[1],&a[0],&b[1]);
38 fp t;
39 fp_mul3(&t,&a[1],&b[0]);
40 fp_add2(&c[1],&t);
41 */
42 return;
43 }
44
45 if (blen == 2) {
46 if (alen == 3) {
47 fp_mul3(&c[0],&a[0],&b[0]);
48 fp_mul3(&c[2],&a[1],&b[1]);
49 fp b01; fp_add3(&b01,&b[0],&b[1]);
50 fp a01; fp_add3(&a01,&a[0],&a[1]);
51 fp_mul3(&c[1],(const fp*) &a01,(const fp*) &b01);
52 fp_sub2(&c[1],(const fp*) &c[0]);
53 fp_sub2(&c[1],(const fp*) &c[2]);
54 fp_mul3(&c[3],&a[2],&b[1]);
55 fp a2b0; fp_mul3(&a2b0,&a[2],&b[0]);
56 fp_add2(&c[2],(const fp*) &a2b0);
57 return;
58 }
59 if (alen == 4) {
60 fp_mul3(&c[0],&a[0],&b[0]);
61 fp_mul3(&c[2],&a[1],&b[1]);
62 fp b01; fp_add3(&b01,&b[0],&b[1]);
63 fp a01; fp_add3(&a01,&a[0],&a[1]);
64 fp_mul3(&c[1],(const fp*) &a01,(const fp*) &b01);
65 fp_sub2(&c[1],(const fp*) &c[0]);
66 fp_sub2(&c[1],(const fp*) &c[2]);
67
68 fp mid;
69 fp_mul3(&mid,&a[2],&b[0]);
70 fp_mul3(&c[4],&a[3],&b[1]);
71 fp a23; fp_add3(&a23,&a[2],&a[3]);
72 fp_mul3(&c[3],(const fp*) &a23,(const fp*) &b01);
73 fp_sub2(&c[3],(const fp*) &mid);
74 fp_sub2(&c[3],(const fp*) &c[4]);
75 fp_add2(&c[2],(const fp*) &mid);
76
77/*
78 fp_mul3(&c[3],&a[2],&b[1]);
79 fp a2b0; fp_mul3(&a2b0,&a[2],&b[0]);
80 fp_add2(&c[2],&a2b0);
81 fp_mul3(&c[4],&a[3],&b[1]);
82 fp a3b0; fp_mul3(&a3b0,&a[3],&b[0]);
83 fp_add2(&c[3],&a3b0);
84*/
85 return;
86 }
87 }
88
89 if (blen == 3) {
90 if (alen <= 3) {
91 /* see eprint 2015/1247 */
92 /* XXX: toom instead? */
93 fp a10; fp_sub3(&a10,&a[1],&a[0]);
94 fp b01; fp_sub3(&b01,&b[0],&b[1]);
95 fp_mul3(&c[1],(const fp*) &a10,(const fp*) &b01);
96 fp a20; fp_sub3(&a20,&a[2],&a[0]);
97 fp b02; fp_sub3(&b02,&b[0],&b[2]);
98 fp_mul3(&c[2],(const fp*) &a20,(const fp*) &b02);
99 fp a21; fp_sub3(&a21,&a[2],&a[1]);
100 fp b12; fp_sub3(&b12,&b[1],&b[2]);
101 fp_mul3(&c[3],(const fp*) &a21,(const fp*) &b12);
102 fp_mul3(&c[0],&a[0],&b[0]);
103 fp_mul3(&c[4],&a[2],&b[2]);
104 fp a1b1; fp_mul3(&a1b1,&a[1],&b[1]);
105 fp t; fp_add3(&t,(const fp*) &a1b1,(const fp*) &c[0]);
106 fp_add2(&c[1],(const fp*) &t);
107 fp_add3(&t,(const fp*) &a1b1,(const fp*) &c[4]);
108 fp_add2(&c[3],(const fp*) &t);
109 fp_add2(&t,(const fp*) &c[0]);
110 fp_add2(&c[2],(const fp*) &t);
111
112 if (alen >= 4) {
113 fp_mul3(&t,&a[3],&b[0]); fp_add2(&c[3],(const fp*) &t);
114 fp_mul3(&t,&a[3],&b[1]); fp_add2(&c[4],(const fp*) &t);
115 fp_mul3(&c[5],&a[3],&b[2]);
116 }
117 if (alen >= 5) {
118 // this would be same mults, more adds:
119 // a0b0
120 // (a1-a0)(b0-b1) + a0b0 + a1b1
121 // (a2-a0)(b0-b2) + a0b0 + a1b1 + a2b2
122 // (a2-a1)(b1-b2) + a1b1 + a2b2 + a3b0
123 // (a4-a2)(b0-b2) + a2b0 + a3b1 + a4b2
124 // (a4-a3)(b1-b2) + a3b1 + a4b2
125 // a4b2
126 fp_mul3(&t,&a[4],&b[0]); fp_add2(&c[4],(const fp*) &t);
127 fp_mul3(&t,&a[4],&b[1]); fp_add2(&c[5],(const fp*) &t);
128 fp_mul3(&c[6],&a[4],&b[2]);
129 }
130 return;
131 }
132 }
133
134 long long kara = (alen+1)/2;
135 long long a1len = alen-kara;
136
137 if (blen <= kara) { /* XXX: figure out best cutoff */
138 fp c1[a1len+blen-1];
139 poly_mul(c,a,kara,b,blen);
140 poly_mul(c1,a+kara,a1len,b,blen);
141 for (long long i = 0;i < blen-1;++i)
142 fp_add2(&c[i+kara],(const fp*) &c1[i]);
143 for (long long i = blen-1;i < a1len+blen-1;++i)
144 // c[i+kara] = c1[i];
145 fp_copy(c[i+kara], c1[i]);
146 return;
147 }
148
149 long long b1len = blen-kara;
150
151 fp a01[kara];
152 fp b01[kara];
153
154 for (long long i = 0;i < a1len;++i)
155 fp_add3(&a01[i],&a[i],&a[i+kara]);
156 for (long long i = a1len;i < kara;++i)
157 // a01[i] = a[i];
158 fp_copy(a01[i], a[i]);
159
160 for (long long i = 0;i < b1len;++i)
161 fp_add3(&b01[i],&b[i],&b[i+kara]);
162 for (long long i = b1len;i < kara;++i)
163 fp_copy(b01[i], b[i]);
164
165 fp c01[kara+kara-1];
166 long long c1len = a1len+b1len-1;
167 fp c1[c1len];
168
169 poly_mul(c,a,kara,b,kara);
170 poly_mul(c01,(const fp*) a01,kara,(const fp*) b01,kara);
171 poly_mul(c1,a+kara,a1len,b+kara,b1len);
172
173 fp mix;
174
175 if (c1len < kara) {
176 fp_sub3(&c[kara+kara-1],(const fp*) &c01[kara-1],(const fp*) &c[kara-1]);
177 for (long long i = 0;i < c1len;++i) {
178 fp_sub3(&mix,(const fp*) &c[kara+i],(const fp*) &c1[i]);
179 fp_sub3(&c[i+2*kara],(const fp*) &c01[i+kara],(const fp*) &mix);
180 fp_sub3(&c[i+kara],(const fp*) &mix,(const fp*) &c[i]);
181 fp_add2(&c[i+kara],(const fp*) &c01[i]);
182 }
183 for (long long i = c1len;i < kara-1;++i) {
184 fp_sub3(&c[i+2*kara],(const fp*) &c01[i+kara],(const fp*) &c[i+kara]);
185 fp_sub2(&c[i+kara],(const fp*) &c[i]);
186 fp_add2(&c[i+kara],(const fp*) &c01[i]);
187 }
188 return;
189 }
190
191 for (long long i = 0;i < c1len-kara;++i) {
192 fp_sub3(&mix,(const fp*) &c[kara+i],(const fp*) &c1[i]);
193 fp_sub3(&c[i+kara],(const fp*) &mix,(const fp*) &c[i]);
194 fp_add2(&c[i+kara],(const fp*) &c01[i]);
195 fp_sub3(&c[i+2*kara],(const fp*) &c01[i+kara],(const fp*) &mix);
196 fp_sub2(&c[i+2*kara],(const fp*) &c1[i+kara]);
197 }
198 for (long long i = c1len-kara;i < kara-1;++i) {
199 fp_sub3(&mix,(const fp*) &c[kara+i],(const fp*) &c1[i]);
200 fp_sub3(&c[i+kara],(const fp*) &mix,(const fp*) &c[i]);
201 fp_add2(&c[i+kara],(const fp*) &c01[i]);
202 fp_sub3(&c[i+2*kara],(const fp*) &c01[i+kara],(const fp*) &mix);
203 }
204 fp_sub3(&c[kara+kara-1],(const fp*) &c01[kara-1],(const fp*) &c[kara-1]);
205 fp_sub2(&c[kara+kara-1],(const fp*) &c1[kara-1]);
206 for (long long i = kara-1;i < c1len;++i)
207 // c[i+2*kara] = c1[i];
208 fp_copy(c[i+2*kara], c1[i]);
209
210 return;
211}
212
213void poly_mul_low(fp *c,long long clen,const fp *a,long long alen,const fp *b,long long blen)
214{
215 if (!alen) return;
216 if (!blen) return;
217 if (!clen) return;
218
219 if (clen == alen+blen-1) {
220 poly_mul(c,a,alen,b,blen);
221 return;
222 }
223 if (clen*4 >= 3*(alen+blen-1)) { /* XXX: tune cutoff */
224 fp ab[alen+blen-1];
225 poly_mul(ab,a,alen,b,blen);
226 for (long long i = 0;i < clen;++i)
227 fp_copy(c[i], ab[i]);
228 return;
229 }
230
231 if (alen < blen) {
232 const fp *t = a; long long tlen = alen;
233 a = b; alen = blen;
234 b = t; blen = tlen;
235 }
236 if (alen > clen) alen = clen;
237 if (blen > clen) blen = clen;
238
239 if (blen == 1) {
240 for (long long i = 0;i < clen;++i)
241 fp_mul3(&c[i],&a[i],&b[0]);
242 return;
243 }
244
245 assert(2 <= blen);
246 assert(blen <= alen);
247 assert(alen <= clen);
248 assert(clen <= alen+blen-2);
249
250 if (clen == 2) {
251 fp_mul3(&c[0],&a[0],&b[0]);
252 fp_mul3(&c[1],&a[0],&b[1]);
253 fp t; fp_mul3(&t,&a[1],&b[0]);
254 fp_add2(&c[1],(const fp*) &t);
255 return;
256 }
257
258 if (blen == 2) {
259 if (clen == 3) {
260 fp_mul3(&c[0],&a[0],&b[0]);
261 fp_mul3(&c[2],&a[1],&b[1]);
262 fp b01; fp_add3(&b01,&b[0],&b[1]);
263 fp a01; fp_add3(&a01,&a[0],&a[1]);
264 fp_mul3(&c[1],(const fp*) &a01,(const fp*) &b01);
265 fp_sub2(&c[1],(const fp*) &c[0]);
266 fp_sub2(&c[1],(const fp*) &c[2]);
267 fp a2b0; fp_mul3(&a2b0,&a[2],&b[0]);
268 fp_add2(&c[2],(const fp*) &a2b0);
269 return;
270 }
271 }
272
273 if(1) { /* XXX: tune this */
274 long long a1len = alen/2;
275 long long a0len = alen-a1len;
276 fp a0[a0len];
277 fp a1[a1len];
278 for (long long i = 0;i < a0len;++i) fp_copy(a0[i], a[2*i]);
279 for (long long i = 0;i < a1len;++i) fp_copy(a1[i], a[2*i+1]);
280 /* a = a0(x^2) + x a1(x^2) */
281
282 long long b1len = blen/2;
283 long long b0len = blen-b1len;
284 fp b0[b0len];
285 fp b1[b1len];
286 for (long long i = 0;i < b0len;++i) fp_copy(b0[i], b[2*i]);
287 for (long long i = 0;i < b1len;++i) fp_copy(b1[i], b[2*i+1]);
288 /* b = b0(x^2) + x b1(x^2) */
289
290 fp a01[a0len];
291 for (long long i = 0;i < a1len;++i) fp_add3(&a01[i],(const fp*) &a0[i],(const fp*) &a1[i]);
292 if (a1len < a0len) fp_copy(a01[a1len], a0[a1len]);
293
294 fp b01[b0len];
295 for (long long i = 0;i < b1len;++i) fp_add3(&b01[i],(const fp*) &b0[i],(const fp*) &b1[i]);
296 if (b1len < b0len) fp_copy(b01[b1len], b0[b1len]);
297
298 long long c0len = a0len+b0len-1;
299 if (c0len > (clen+1)/2) c0len = (clen+1)/2;
300
301 fp c0[c0len];
302 poly_mul_low(c0,c0len,(const fp*) a0,a0len,(const fp*) b0,b0len);
303
304 long long c01len = a0len+b0len-1;
305 if (c01len > clen/2) c01len = clen/2;
306
307 fp c01[c01len];
308 poly_mul_low(c01,c01len,(const fp*) a01,a0len,(const fp*) b01,b0len);
309
310 long long c1len = a1len+b1len-1;
311 if (c1len > clen/2) c1len = clen/2;
312
313 fp c1[c1len];
314 poly_mul_low(c1,c1len,(const fp*) a1,a1len,(const fp*) b1,b1len);
315
316 /* XXX: use refined karatsuba */
317
318 assert(c0len >= c01len);
319 for (long long i = 0;i < c01len;++i)
320 fp_sub2(&c01[i],(const fp*) &c0[i]);
321
322 assert(c1len <= c01len);
323 for (long long i = 0;i < c1len;++i)
324 fp_sub2(&c01[i],(const fp*) &c1[i]);
325
326 /* ab = c0(x^2) + x c01(x^2) + x^2 c1(x^2) */
327
328 assert(2*(c0len-1) < clen);
329 assert(2*c0len >= clen);
330 assert(2*(c01len-1)+1 < clen);
331 assert(2*c01len+1 >= clen);
332
333 for (long long i = 0;i < c0len;++i) fp_copy(c[2*i], c0[i]);
334 for (long long i = 0;i < c01len;++i) fp_copy(c[2*i+1], c01[i]);
335 for (long long i = 0;i < c1len-1;++i) fp_add2(&c[2*i+2],(const fp*) &c1[i]);
336 if (2*c1len < clen) fp_add2(&c[2*c1len],(const fp*) &c1[c1len-1]);
337
338 return;
339 }
340
341
342 /* XXX: try mulders split */
343
344 long long split = (alen+1)/2;
345
346 if (split+split < clen) {
347 fp ab[alen+blen-1];
348 poly_mul(ab,a,alen,b,blen);
349 for (long long i = 0;i < clen;++i)
350 fp_copy(c[i], ab[i]);
351 return;
352 }
353
354 long long a1len = alen-split;
355
356 if (blen <= split) { /* XXX: figure out best cutoff */
357 assert(split+blen-1 <= clen);
358 assert(a1len+blen-1 >= clen-split);
359 fp c1[clen-split];
360 poly_mul(c,a,split,b,blen);
361 poly_mul_low(c1,clen-split,a+split,a1len,b,blen);
362 for (long long i = 0;i < blen-1;++i)
363 fp_add2(&c[i+split],(const fp*) &c1[i]);
364 for (long long i = blen-1;i+split < clen;++i)
365 fp_copy(c[i+split], c1[i]);
366 return;
367 }
368
369 assert(split+split >= clen);
370 assert(split < clen);
371
372 if (clen < split+split)
373 poly_mul_low(c,clen,a,split,b,split);
374 else {
375 assert(clen == split+split);
376 poly_mul(c,a,split,b,split);
377 fp_copy(c[clen-1], fp_0);
378 }
379
380 fp c01[clen-split];
381 poly_mul_low(c01,clen-split,a,split,b+split,blen-split);
382
383 fp c10[clen-split];
384 poly_mul_low(c10,clen-split,a+split,alen-split,b,split);
385
386 for (long long i = 0;i < clen-split;++i) {
387 fp_add2(&c[i+split],(const fp*) &c01[i]);
388 fp_add2(&c[i+split],(const fp*) &c10[i]);
389 }
390
391 return;
392}
393
394void poly_mul_selfreciprocal(fp *c,const fp *a,long long alen,const fp *b,long long blen)
395{
396 if (!alen) return;
397 if (!blen) return;
398
399 if (alen == 1 && blen == 1) {
400 fp_mul3(&c[0],&a[0],&b[0]);
401 return;
402 }
403
404 if (alen == 2 && blen == 2) {
405 fp_mul3(&c[0],&a[0],&b[0]);
406 fp_add3(&c[1],(const fp*) &c[0],(const fp*) &c[0]);
407 fp_copy(c[2], c[0]);
408 return;
409 }
410
411 if (alen == 3 && blen == 3) {
412 fp_mul3(&c[0],&a[0],&b[0]);
413 fp_mul3(&c[2],&a[1],&b[1]);
414 fp a01; fp_add3(&a01,&a[0],&a[1]);
415 fp b01; fp_add3(&b01,&b[0],&b[1]);
416 fp_mul3(&c[1],(const fp*) &a01,(const fp*) &b01);
417 fp_add2(&c[2],(const fp*) &c[0]);
418 fp_sub2(&c[1],(const fp*) &c[2]);
419 fp_add2(&c[2],(const fp*) &c[0]);
420 fp_copy(c[3], c[1]);
421 fp_copy(c[4], c[0]);
422 return;
423 }
424
425 if (alen == 4 && blen == 4) {
426 fp_mul3(&c[0],&a[0],&b[0]);
427 fp_mul3(&c[3],&a[1],&b[1]);
428 fp a01; fp_add3(&a01,&a[0],&a[1]);
429 fp b01; fp_add3(&b01,&b[0],&b[1]);
430 fp_mul3(&c[2],(const fp*) &a01,(const fp*) &b01);
431 fp_sub2(&c[2],(const fp*) &c[0]);
432 fp_sub3(&c[1],(const fp*) &c[2],(const fp*) &c[3]);
433 fp_add2(&c[3],(const fp*) &c[0]);
434 fp_add2(&c[3],(const fp*) &c[3]);
435 fp_copy(c[4], c[2]);
436 fp_copy(c[5], c[1]);
437 fp_copy(c[6], c[0]);
438 return;
439 }
440
441 if (alen == 5 && blen == 5) {
442 /* XXX: toom instead? */
443 fp a10; fp_sub3(&a10,&a[1],&a[0]);
444 fp b01; fp_sub3(&b01,&b[0],&b[1]);
445 fp_mul3(&c[1],(const fp*) &a10,(const fp*) &b01);
446 fp a20; fp_sub3(&a20,&a[2],&a[0]);
447 fp b02; fp_sub3(&b02,&b[0],&b[2]);
448 fp_mul3(&c[2],(const fp*) &a20,(const fp*) &b02);
449 fp a21; fp_sub3(&a21,(const fp*) &a[2],(const fp*) &a[1]);
450 fp b12; fp_sub3(&b12,(const fp*) &b[1],(const fp*) &b[2]);
451 fp_mul3(&c[3],(const fp*) &a21,(const fp*) &b12);
452 fp_mul3(&c[0],&a[0],&b[0]);
453 fp a1b1; fp_mul3(&a1b1,(const fp*) &a[1],(const fp*) &b[1]);
454 fp a2b2; fp_mul3(&a2b2,(const fp*) &a[2],(const fp*) &b[2]);
455
456 fp t; fp_add3(&t,(const fp*) &a1b1,(const fp*) &c[0]);
457 fp_add2(&c[1],(const fp*) &t);
458 fp_add2(&c[3],(const fp*) &c[1]);
459 fp_add3(&c[4],(const fp*) &t,(const fp*) &a2b2);
460 fp_add2(&c[4],(const fp*) &t);
461 fp_add2(&c[2],(const fp*) &t);
462 fp_add2(&c[2],(const fp*) &a2b2);
463 fp_add2(&c[3],(const fp*) &a1b1);
464 fp_add2(&c[3],(const fp*) &a2b2);
465 fp_copy(c[5], c[3]);
466 fp_copy(c[6], c[2]);
467 fp_copy(c[7], c[1]);
468 fp_copy(c[8], c[0]);
469 return;
470 }
471
472 if (alen == blen && (alen&1)) {
473 long long len0 = (alen+1)/2;
474 long long len1 = alen/2;
475 fp a0[len0];
476 fp b0[len0];
477 fp a1[len1];
478 fp b1[len1];
479 fp c0[len0+len0-1];
480 fp c01[len0+len0-1];
481 fp c1[len1+len1-1];
482
483 assert(2*(len0-1) < alen);
484 assert(2*(len1-1)+1 < alen);
485 assert(len1 < len0);
486
487 for (long long i = 0;i < len0;++i) fp_copy(a0[i], a[2*i]);
488 for (long long i = 0;i < len0;++i) fp_copy(b0[i], b[2*i]);
489 poly_mul_selfreciprocal(c0,(const fp*) a0,len0,(const fp*) b0,len0);
490
491 for (long long i = 0;i < len1;++i) fp_copy(a1[i], a[2*i+1]);
492 for (long long i = 0;i < len1;++i) fp_copy(b1[i], b[2*i+1]);
493 poly_mul_selfreciprocal(c1,(const fp*) a1,len1,(const fp*) b1,len1);
494
495 for (long long i = 0;i < len1;++i) fp_add2(&a0[i],(const fp*) &a1[i]);
496 for (long long i = 0;i < len1;++i) fp_add2(&b0[i],(const fp*) &b1[i]);
497 for (long long i = 0;i < len1;++i) fp_add2(&a0[i+1],(const fp*) &a1[i]);
498 for (long long i = 0;i < len1;++i) fp_add2(&b0[i+1],(const fp*) &b1[i]);
499 poly_mul_selfreciprocal(c01,(const fp*) a0,len0,(const fp*) b0,len0);
500
501 for (long long i = 0;i < len0+len0-1;++i)
502 fp_sub2(&c01[i],(const fp*) &c0[i]);
503 for (long long i = 0;i < len1+len1-1;++i)
504 fp_sub2(&c01[i],(const fp*) &c1[i]);
505 for (long long i = 0;i < len1+len1-1;++i)
506 fp_sub2(&c01[i+1],(const fp*) &c1[i]);
507 for (long long i = 0;i < len1+len1-1;++i)
508 fp_sub2(&c01[i+1],(const fp*) &c1[i]);
509 for (long long i = 0;i < len1+len1-1;++i)
510 fp_sub2(&c01[i+2],(const fp*) &c1[i]);
511
512 for (long long i = 1;i < len0+len0-1;++i)
513 fp_sub2(&c01[i],(const fp*) &c01[i-1]);
514
515 long long clen = alen+blen-1;
516 assert(2*(len0+len0-2) < clen);
517 assert(2*(len0+len0-3)+1 < clen);
518 assert(2*(len1+len1-2)+2 < clen);
519 (void)clen; // Suppress unused varible warning
520 for (long long i = 0;i < len0+len0-1;++i) fp_copy(c[2*i], c0[i]);
521 for (long long i = 0;i < len0+len0-2;++i) fp_copy(c[2*i+1], c01[i]);
522 for (long long i = 0;i < len1+len1-1;++i) fp_add2(&c[2*i+2],(const fp*) &c1[i]);
523 return;
524 }
525
526 if (alen == blen && !(alen&1)) {
527 long long half = alen/2;
528 fp c0[alen-1];
529 fp c1[alen-1];
530
531 poly_mul(c0,a,half,b,half);
532 poly_mul(c1,a,half,b+half,half);
533
534 for (long long i = 0;i < alen+alen-1;++i) fp_copy(c[i], fp_0);
535 for (long long i = 0;i < alen-1;++i)
536 fp_add2(&c[i],(const fp*) &c0[i]);
537 for (long long i = 0;i < alen-1;++i)
538 fp_add2(&c[alen+alen-2-i],(const fp*) &c0[i]);
539 for (long long i = 0;i < alen-1;++i)
540 fp_add2(&c[half+i],(const fp*) &c1[i]);
541 for (long long i = 0;i < alen-1;++i)
542 fp_add2(&c[alen+half-2-i],(const fp*) &c1[i]);
543 return;
544 }
545
546 long long clen = alen+blen-1;
547 poly_mul_low(c,(clen+1)/2,a,alen,b,blen);
548 for (long long i = (clen+1)/2;i < clen;++i)
549 fp_copy(c[i], c[clen-1-i]);
550}
551
552void poly_mul_high(fp *c,long long cstart,const fp *a,long long alen,const fp *b,long long blen)
553{
554 if (alen < blen) {
555 poly_mul_high(c,cstart,b,blen,a,alen);
556 return;
557 }
558 if (blen <= 0) return;
559
560 assert(cstart >= 0);
561 assert(cstart <= alen+blen-1);
562
563 if (cstart == alen+blen-1) return;
564
565 if (cstart == 0) {
566 poly_mul(c,a,alen,b,blen);
567 return;
568 }
569 if (cstart == alen+blen-2) {
570 fp_mul3(&c[0],&a[alen-1],&b[blen-1]);
571 return;
572 }
573 if (blen == 1) {
574 for (long long i = cstart;i < alen+blen-1;++i)
575 fp_mul3(&c[i-cstart],&a[i],&b[0]);
576 return;
577 }
578 if (cstart == alen+blen-3) {
579 fp_mul3(&c[0],&a[alen-2],&b[blen-1]);
580 fp t;
581 fp_mul3(&t,&a[alen-1],&b[blen-2]);
582 fp_add2(&c[0],(const fp*) &t);
583 fp_mul3(&c[1],&a[alen-1],&b[blen-1]);
584 return;
585 }
586
587 fp arev[alen];
588 fp brev[blen];
589 for (long long i = 0;i < alen;++i)
590 fp_copy(arev[alen-1-i], a[i]);
591 for (long long i = 0;i < blen;++i)
592 fp_copy(brev[blen-1-i], b[i]);
593
594 fp crev[alen+blen-1-cstart];
595 poly_mul_low(crev,alen+blen-1-cstart,(const fp*) arev,alen,(const fp*) brev,blen);
596 for (long long i = cstart;i < alen+blen-1;++i)
597 fp_copy(c[i-cstart], crev[alen+blen-2-i]);
598
599/*
600 fp ab[alen+blen-1];
601 poly_mul(ab,a,alen,b,blen);
602 for (long long i = cstart;i < alen+blen-1;++i)
603 c[i-cstart] = ab[i];
604 return;
605*/
606}
607
608void poly_mul_mid(fp *c,long long cstart,long long clen,const fp *a,long long alen,const fp *b,long long blen)
609{
610 if (!alen) return;
611 if (!blen) return;
612 assert(0 <= cstart);
613 assert(0 <= clen);
614 assert(cstart+clen <= alen+blen-1);
615 if (!clen) return;
616
617 if (clen == 1) {
618 fp_copy(c[0], fp_0);
619 if (alen > cstart) alen = cstart+1;
620 long long i = 0;
621 if (blen <= cstart) i = cstart-blen+1;
622 for (;i < alen;++i) {
623 fp t;
624 fp_mul3(&t,&a[i],&b[cstart-i]);
625 fp_add2(&c[0],(const fp*) &t);
626 }
627 return;
628 }
629
630 if (blen == 1) {
631 for (long long i = 0;i < clen;++i)
632 fp_mul3(&c[i],&a[cstart+i],&b[0]);
633 return;
634 }
635
636 if (cstart > 0 && cstart+clen <= alen && (blen & 1)) {
637 poly_mul_mid(c,cstart-1,clen,a,alen-1,b+1,blen-1);
638 fp t;
639 for (long long i = 0;i < clen;++i) {
640 fp_mul3(&t,&a[cstart+i],&b[0]);
641 fp_add2(&c[i],(const fp*) &t);
642 }
643 return;
644 }
645
646 if (clen == 2) {
647 // basic plan:
648 // c[0] = a[0]*b[cstart]+a[1]*b[cstart-1]+a[2]*b[cstart-2]+...
649 // c[1] = a[0]*b[cstart+1]+a[1]*b[cstart]+a[2]*b[cstart-1]+...
650
651 if (cstart == 1 && alen >= 3 && blen == 2) {
652 // transposed karatsuba from hanrot--quercia--zimmermann
653 // delta = a[1]*(b[0]-b[1])
654 // c[0] = (a[0]+a[1])*b[1]+delta
655 // c[1] = (a[1]+a[2])*b[0]-delta
656 fp delta;
657 fp_sub3(&delta,&b[0],&b[1]);
658 fp_mul2(&delta,&a[1]);
659 fp_add3(&c[0],&a[0],&a[1]);
660 fp_mul2(&c[0],&b[1]);
661 fp_add2(&c[0],(const fp*) &delta);
662 fp_add3(&c[1],&a[1],&a[2]);
663 fp_mul2(&c[1],&b[0]);
664 fp_sub2(&c[1],(const fp*) &delta);
665 return;
666 }
667 if (cstart == 3 && alen >= 5 && blen == 4) {
668 // delta = a[1]*(b[2]-b[3])+a[3]*(b[0]-b[1])
669 // c[0] = (a[0]+a[1])*b[3]+(a[2]+a[3])*b[1]+delta
670 // c[1] = (a[1]+a[2])*b[2]+(a[3]+a[4])*b[0]-delta
671 fp b01; fp_sub3(&b01,&b[0],&b[1]);
672 fp b23; fp_sub3(&b23,&b[2],&b[3]);
673 fp a1b23; fp_mul3(&a1b23,&a[1],(const fp*) &b23);
674 fp a3b01; fp_mul3(&a3b01,&a[3],(const fp*) &b01);
675 fp delta; fp_add3(&delta,(const fp*) &a1b23,(const fp*) &a3b01);
676 fp a01; fp_add3(&a01,&a[0],&a[1]);
677 fp a12; fp_add3(&a12,&a[1],&a[2]);
678 fp a23; fp_add3(&a23,&a[2],&a[3]);
679 fp a34; fp_add3(&a34,&a[3],&a[4]);
680 fp_mul2(&a01,&b[3]);
681 fp_mul2(&a12,&b[2]);
682 fp_mul2(&a23,&b[1]);
683 fp_mul2(&a34,&b[0]);
684 fp_add3(&c[0],(const fp*) &a01,(const fp*) &a23);
685 fp_add3(&c[1],(const fp*) &a12,(const fp*) &a34);
686 fp_add2(&c[0],(const fp*) &delta);
687 fp_sub2(&c[1],(const fp*) &delta);
688 return;
689 }
690 }
691
692 if (clen == 3 && cstart == 1 && blen == 2 && alen >= 4) {
693 // c[0] = a[0]*b[1]+a[1]*b[0]
694 // c[1] = a[1]*b[1]+a[2]*b[0]
695 // c[2] = a[2]*b[1]+a[3]*b[0]
696 fp b01;
697 fp_sub3(&b01,&b[0],&b[1]);
698 fp delta0;
699 fp_mul3(&delta0,&a[1],(const fp*) &b01);
700 fp delta1;
701 fp_mul3(&delta1,&a[2],(const fp*) &b01);
702 fp_add3(&c[0],&a[0],&a[1]);
703 fp_add3(&c[1],&a[1],&a[2]);
704 fp_add3(&c[2],&a[2],&a[3]);
705 fp_mul2(&c[0],&b[1]);
706 fp_mul2(&c[1],&b[1]);
707 fp_mul2(&c[2],(const fp*) &b[0]);
708 fp_add2(&c[0],(const fp*) &delta0);
709 fp_add2(&c[1],(const fp*) &delta1);
710 fp_sub2(&c[2],(const fp*) &delta1);
711 return;
712 }
713
714 if (clen == 4 && cstart == 5 && blen == 6 && alen >= 9) {
715 // c[0] = a[0]*b[5]+a[1]*b[4]+a[2]*b[3]+a[3]*b[2]+a[4]*b[1]+a[5]*b[0]
716 // c[1] = a[1]*b[5]+a[2]*b[4]+a[3]*b[3]+a[4]*b[2]+a[5]*b[1]+a[6]*b[0]
717 // c[2] = a[2]*b[5]+a[3]*b[4]+a[4]*b[3]+a[5]*b[2]+a[6]*b[1]+a[7]*b[0]
718 // c[3] = a[3]*b[5]+a[4]*b[4]+a[5]*b[3]+a[6]*b[2]+a[7]*b[1]+a[8]*b[0]
719
720 fp a01[6];
721 for (long long i = 0;i < 6;++i) fp_add3(&a01[i],&a[i],&a[i+3]);
722
723 fp b01[3];
724 for (long long i = 0;i < 3;++i) fp_sub3(&b01[i],&b[i],&b[i+3]);
725
726 poly_mul_mid(c,2,3,(const fp*) a01,5,b+3,3);
727 poly_mul_mid(c+3,2,1,(const fp*) a01+3,3,b,3);
728
729 fp delta[3];
730 poly_mul_mid(delta,2,3,a+3,5,(const fp*) b01,3);
731
732 fp_add2(&c[0],(const fp*) &delta[0]);
733 fp_add2(&c[1],(const fp*) &delta[1]);
734 fp_add2(&c[2],(const fp*) &delta[2]);
735 fp_sub2(&c[3],(const fp*) &delta[0]);
736 return;
737 }
738
739 if (clen == 5 && cstart == 3 && blen == 4 && alen >= 8) {
740 // c[0] = a[0]*b[3]+a[1]*b[2]+a[2]*b[1]+a[3]*b[0]
741 // c[1] = a[1]*b[3]+a[2]*b[2]+a[3]*b[1]+a[4]*b[0]
742 // c[2] = a[2]*b[3]+a[3]*b[2]+a[4]*b[1]+a[5]*b[0]
743 // c[3] = a[3]*b[3]+a[4]*b[2]+a[5]*b[1]+a[6]*b[0]
744 // c[4] = a[4]*b[3]+a[5]*b[2]+a[6]*b[1]+a[7]*b[0]
745
746 fp a01[6];
747 fp_add3(&a01[0],&a[0],&a[2]);
748 fp_add3(&a01[1],&a[1],&a[3]);
749 fp_add3(&a01[2],&a[2],&a[4]);
750 fp_add3(&a01[3],&a[3],&a[5]);
751 fp_add3(&a01[4],&a[4],&a[6]);
752 fp_add3(&a01[5],&a[5],&a[7]);
753
754 fp b01[2];
755 fp_sub3(&b01[0],&b[0],&b[2]);
756 fp_sub3(&b01[1],&b[1],&b[3]);
757
758 poly_mul_mid(c,1,3,(const fp*) a01,4,b+2,2);
759 poly_mul_mid(c+3,1,2,(const fp*) a01+3,3,b,2);
760
761 fp delta[3];
762 poly_mul_mid(delta,1,3,a+2,4,(const fp*) b01,2);
763
764 fp_add2(&c[0],(const fp*) &delta[0]);
765 fp_add2(&c[1],(const fp*) &delta[1]);
766 fp_add2(&c[2],(const fp*) &delta[2]);
767 fp_sub2(&c[3],(const fp*) &delta[1]);
768 fp_sub2(&c[4],(const fp*) &delta[2]);
769 return;
770 }
771
772 if ((clen&1) && cstart == clen && blen == clen+1 && alen >= cstart+clen) {
773 long long split = (clen+1)/2;
774 assert(2*split == clen+1);
775
776 fp a01[3*split-2];
777 assert(3*split-2+split <= alen);
778 for (long long i = 0;i < 3*split-2;++i) fp_add3(&a01[i],&a[i],&a[i+split]);
779
780 fp b01[split];
781 assert(2*split == blen);
782 for (long long i = 0;i < split;++i) fp_sub3(&b01[i],&b[i],&b[i+split]);
783
784 assert(clen <= 3*split-2);
785 poly_mul_mid(c,split-1,split,(const fp*) a01,clen,b+split,split);
786
787 assert(clen-1+split <= 3*split-2);
788 poly_mul_mid(c+split,split-1,split-1,(const fp*) a01+split,clen-1,b,split);
789
790 fp delta[split];
791 poly_mul_mid(delta,split-1,split,a+split,clen,(const fp*) b01,split);
792
793 for (long long i = 0;i < split;++i) fp_add2(&c[i],(const fp*) &delta[i]);
794 for (long long i = 0;i < split-1;++i) fp_sub2(&c[i+split],(const fp*) &delta[i]);
795 return;
796 }
797
798 if (!(clen&1) && cstart == clen-1 && blen == clen && alen >= cstart+clen) {
799 // again transposed karatsuba from hanrot--quercia--zimmermann
800 long long split = clen/2;
801
802 fp a01[3*split-1];
803 for (long long i = 0;i < 3*split-1;++i) fp_add3(&a01[i],&a[i],&a[i+split]);
804
805 fp b01[split];
806 for (long long i = 0;i < split;++i) fp_sub3(&b01[i],&b[i],&b[i+split]);
807
808 poly_mul_mid(c,split-1,split,(const fp*) a01,2*split-1,b+split,split);
809 poly_mul_mid(c+split,split-1,split,(const fp*) a01+split,2*split-1,b,split);
810
811 fp delta[split];
812 poly_mul_mid(delta,split-1,split,a+split,2*split-1,(const fp*) b01,split);
813
814 for (long long i = 0;i < split;++i) {
815 fp_add2(&c[i],(const fp*) &delta[i]);
816 fp_sub2(&c[split+i],(const fp*) &delta[i]);
817 }
818 return;
819 }
820
821 if (cstart+cstart+clen < alen+blen) {
822 fp ab[cstart+clen];
823 poly_mul_low(ab,cstart+clen,a,alen,b,blen);
824 for (long long i = 0;i < clen;++i)
825 fp_copy(c[i], ab[cstart+i]);
826 return;
827 }
828
829 fp ab[alen+blen-1-cstart];
830 poly_mul_high(ab,cstart,a,alen,b,blen);
831 for (long long i = 0;i < clen;++i)
832 fp_copy(c[i], ab[i]);
833 return;
834}
835
836long long poly_tree1size(long long n)
837{
838 if (n <= 1) return 0;
839 if (n == 2) return 3;
840 if (n == 3) return 7;
841
842 long long m = n/2;
843 long long left = poly_tree1size(m);
844 long long right = poly_tree1size(n-m);
845 return left+right+n+1;
846}
847
848/* input: P[0...2n-1] has n 2-coeff polys */
849/* output: number of coeffs in product tree (minus n) */
850/* tree itself (without P) is stored in T */
851/* for n>=2, product is stored in final n+1 coeffs of T */
852long long poly_tree1(fp *T,const fp *P,long long n)
853{
854 if (n <= 1) return 0;
855
856 if (n == 2) {
857 poly_mul(T,P,2,P+2,2);
858 return 3;
859 }
860
861 if (n == 3) {
862 poly_mul(T,P,2,P+2,2);
863 poly_mul(T+3,(const fp*) T,3,P+4,2);
864 return 7;
865 }
866
867 long long m = n/2;
868 long long left = poly_tree1(T,P,m);
869 long long right = poly_tree1(T+left,P+2*m,n-m);
870 poly_mul(T+left+right,(const fp*) T+left-(m+1),m+1,(const fp*) T+left+right-(n-m+1),n-m+1);
871 return left+right+n+1;
872}
873
874static long long poly_eval_precomputesize(long long flen)
875{
876 if (flen <= 2) return 0;
877 return flen;
878}
879
880static void poly_eval_precompute(fp *precomp,long long flen,const proj *P)
881{
882 if (flen <= 2) return;
883
884 fp pxpow[flen];
885 fp pzpow[flen];
886
887 fp_copy(pxpow[1], P->x);
888 fp_copy(pzpow[1], P->z);
889 for (long long i = 2;i < flen;++i) {
890 fp_mul3(&pxpow[i],(const fp*) &pxpow[i-1],&P->x);
891 fp_mul3(&pzpow[i],(const fp*) &pzpow[i-1],&P->z);
892 }
893
894 fp_copy(precomp[0], pzpow[flen-1]);
895 fp_copy(precomp[flen-1], pxpow[flen-1]);
896 for (long long i = 1;i < flen-1;++i)
897 fp_mul3(&precomp[i],(const fp*) &pxpow[i],(const fp*) &pzpow[flen-1-i]);
898}
899
900/* assumes flen > 0 */
901/* output: v = f[p] = f[0]+f[1]p+...+f[flen-1]p^(flen-1) */
902/* i.e.: */
903/* v = f[0]pz^(flen-1)+f[1]px pz^(flen-2)+...+f[flen-1]px^(flen-1) */
904/* implicitly divided by pz^(flen-1) */
905/* denominators are eliminated */
906/* since application always computes ratios of two values at some points */
907static void poly_eval_postcompute(fp *v,const fp *f,long long flen,const proj *P,const fp *precomp)
908{
909 assert(flen > 0);
910 if (flen == 1) {
911 fp_copy(*v, f[0]);
912 return;
913 }
914 if (flen == 2) {
915 fp tmp;
916 fp_mul3(v,&f[0],&P->z);
917 fp_mul3(&tmp,&f[1],&P->x);
918 fp_add2(v,(const fp*) &tmp);
919 return;
920 }
921
922 fp_mul3(v,&f[0],&precomp[0]);
923
924 for (long long i = 1;i < flen;++i) {
925 fp tmp;
926 fp_mul3(&tmp,&f[i],&precomp[i]);
927 fp_add2(v,(const fp*) &tmp);
928 }
929}
930
931// static void poly_eval(fp *v,const fp *f,long long flen,const proj *P)
932// {
933// long long precompsize = poly_eval_precomputesize(flen);
934// fp precomp[precompsize];
935// poly_eval_precompute(precomp,flen,P);
936// poly_eval_postcompute(v,f,flen,P,precomp);
937// }
938
939/* assuming rlen >= 1, mdeg >= 0: */
940/* input: m[0]+m[1]x+...+m[mdeg]x^mdeg */
941/* output: pseudo-reciprocal r[0]+r[1]x+...+r[rlen-1]x^(rlen-1) */
942/* and d = positive power of m[mdeg] */
943/* conventional reciprocal floor(x^(mdeg+rlen-1)/m) */
944/* is pseudo-reciprocal divided by d (if d is nonzero) */
945static void poly_pseudoreciprocal(fp *d,fp *r,long long rlen,const fp *m,long long mdeg)
946{
947 if (mdeg == 0) {
948 fp_copy(r[rlen-1], fp_1);
949 for (long long i = 0;i < rlen-1;++i) fp_copy(r[i], fp_0);
950 fp_copy(*d, m[mdeg]);
951 return;
952 }
953 if (rlen == 1) {
954 fp_copy(r[0], fp_1);
955 fp_copy(*d, m[mdeg]);
956 return;
957 }
958 if (rlen == 2) {
959 /* can absorb into general case below */
960 fp_copy(r[1], m[mdeg]);
961 fp_neg2(&r[0],&m[mdeg-1]);
962 fp_sq2(d,&m[mdeg]);
963 return;
964 }
965
966 /* apply simpson recursive method for division */
967 /* (often miscredited to newton) */
968 /* XXX: remove redundancies as per harvey et al. */
969
970 if (mdeg >= rlen) {
971 /* divide m by x^(mdeg-(rlen-1)) */
972 /* and truncate, which cannot affect result */
973 m += mdeg-(rlen-1);
974 mdeg = rlen-1;
975 }
976
977 long long top = (rlen+1)/2;
978 long long bot = rlen-top;
979 fp s[top];
980 poly_pseudoreciprocal(d,s,top,m,mdeg);
981 /* s/d is floor(x^(mdeg+top-1)/m) */
982 /* i.e. s/d is x^(mdeg+top-1)/m+O(1/x) */
983 /* i.e. ms is dx^(mdeg+top-1)+eps with eps in O(x^(mdeg-1)) */
984
985 fp eps[mdeg];
986 poly_mul_low(eps,mdeg,m,mdeg+1,(const fp*) s,top);
987
988 /* ms(dx^(mdeg+top-1)-eps) */
989 /* = d^2x^(2*mdeg+2*top-2)-eps^2 */
990 /* with eps^2 in O(x^(2*mdeg-2)) */
991
992 /* sdx^bot - s eps/x^(mdeg+2*top-1-rlen) */
993 /* = s(dx^(mdeg+top-1)-eps)/x^(mdeg+2*top-1-rlen) */
994 /* = desired d^2 x^(mdeg+rlen-1)/m */
995 /* - undesired eps^2/mx^(mdeg+2*top-1-rlen) */
996 /* with the undesired part in O(1/x) */
997
998 fp epss[bot];
999 poly_mul_high(epss,mdeg+top-bot-1,(const fp*) eps,mdeg,(const fp*) s,top);
1000
1001 for (long long i = 0;i < bot;++i)
1002 fp_neg2(&r[i],(const fp*) &epss[i]);
1003 for (long long i = 0;i < top;++i)
1004 fp_mul3(&r[i+bot],(const fp*) &s[i],(const fp*) d);
1005
1006 fp_sq1(d);
1007}
1008
1009static long long poly_pseudoremainder_precomputesize(long long glen,long long flen)
1010{
1011 assert(flen >= glen);
1012 if (flen == glen) return 0;
1013 long long vlen = flen-glen;
1014 return vlen+1;
1015}
1016
1017static void poly_pseudoremainder_precompute(fp *precomp,long long glen,long long flen,const fp *m)
1018{
1019 assert(flen >= glen);
1020 if (flen == glen) return;
1021 long long vlen = flen-glen;
1022 fp *d = precomp; /* length 1 */
1023 fp *v = precomp+1; /* length vlen */
1024 poly_pseudoreciprocal(d,v,vlen,m,glen);
1025}
1026
1027static void poly_pseudoremainder_postcompute(fp *g,long long glen,const fp *f,long long flen,const fp *m,const fp *precomp)
1028{
1029 assert(flen >= glen);
1030
1031 if (flen == glen) {
1032 for (long long i = 0;i < glen;++i)
1033 fp_copy(g[i], f[i]);
1034 return;
1035 }
1036
1037 /* simplified version when m[glen] = 1: */
1038 /* v = floor(x^(flen-1)/m) is within O(1/x) of x^(flen-1)/m */
1039 /* v*f is within O(x^(flen-2)) of x^(flen-1)f/m */
1040 /* v*f/x^(flen-1) is within O(1/x) of f/m */
1041 /* q = floor(v*f/x^(flen-1)) is within O(1/x) of f/m */
1042 /* q*m is within O(x^(glen-1)) of f */
1043 /* finally, define g = f-q*m */
1044
1045 long long vlen = flen-glen;
1046 const fp *d = precomp; /* length 1 */
1047 const fp *v = precomp+1; /* length vlen */
1048 /* floor(x^(flen-1)/m) = v/d */
1049 /* i.e., v is within O(1/x) of dx^(flen-1)/m */
1050
1051 /* vf is within O(x^(flen-2)) of dx^(flen-1)f/m */
1052 /* vf/x^(flen-1) is within O(1/x) of df/m */
1053 /* mvf/x^(flen-1) is within O(x^(glen-1)) of df */
1054
1055 fp vf[vlen];
1056 poly_mul_high(vf,flen-1,v,vlen,f,flen);
1057
1058 fp qm[glen];
1059 poly_mul_low(qm,glen,(const fp*) vf,vlen,m,glen+1);
1060
1061 for (long long i = 0;i < glen;++i) {
1062 fp_mul3(&g[i],&f[i],d);
1063 fp_sub2(&g[i], (const fp*) &qm[i]);
1064 }
1065}
1066
1067/* assuming flen >= glen >= 1: */
1068/* reduce f[0]+f[1]x+f[2]x^2+...+f[flen-1]x^(flen-1) */
1069/* modulo m[0]+m[1]x+...+m[glen]x^glen */
1070/* to obtain pseudo-remainder g[0]+g[1]x+...+g[glen-1]x^(glen-1) */
1071/* and denominator d = m[glen]^(some nonnegative exponent) */
1072/* conventional remainder is pseudo-remainder divided by d (if d is nonzero) */
1073/* d is not returned since application eliminates denominators */
1074// static void poly_pseudoremainder(fp *g,long long glen,const fp *f,long long flen,const fp *m)
1075// {
1076// long long precompsize = poly_pseudoremainder_precomputesize(glen,flen);
1077// fp precomp[precompsize];
1078// poly_pseudoremainder_precompute(precomp,glen,flen,m);
1079// poly_pseudoremainder_postcompute(g,glen,f,flen,m,precomp);
1080
1081// /* XXX: try non-precomputation version too */
1082// }
1083
1084static long long poly_multieval_unscaled_precomputesize(long long n,long long flen)
1085{
1086 if (n <= 0) return 0;
1087 if (n == 1)
1088 return poly_eval_precomputesize(flen);
1089 long long m = n/2;
1090 if (flen <= n)
1091 return
1092 poly_multieval_unscaled_precomputesize(m,flen)
1093 + poly_multieval_unscaled_precomputesize(n-m,flen);
1094 if (n == 2)
1095 return
1096 poly_pseudoremainder_precomputesize(n,flen)
1097 + poly_multieval_unscaled_precomputesize(1,n)
1098 + poly_multieval_unscaled_precomputesize(1,n);
1099 if (n == 3)
1100 return
1101 poly_pseudoremainder_precomputesize(n,flen)
1102 + poly_multieval_unscaled_precomputesize(2,n)
1103 + poly_multieval_unscaled_precomputesize(1,n);
1104 return
1105 poly_pseudoremainder_precomputesize(n,flen)
1106 + poly_multieval_unscaled_precomputesize(m,n)
1107 + poly_multieval_unscaled_precomputesize(n-m,n);
1108}
1109
1110static void poly_multieval_unscaled_precompute(fp *precomp,long long n,long long flen,const fp *P,const fp *T)
1111{
1112 if (n <= 0) return;
1113
1114 if (n == 1) {
1115 proj Pp;
1116 fp_neg2(&Pp.x,&P[0]);
1117 fp_copy(Pp.z, P[1]);
1118 poly_eval_precompute(precomp,flen,&Pp);
1119 return;
1120 }
1121
1122 long long m = n/2;
1123 long long left = poly_tree1size(m);
1124 if (flen <= n) {
1125 poly_multieval_unscaled_precompute(precomp,m,flen,P,T);
1126 precomp += poly_multieval_unscaled_precomputesize(m,flen);
1127 poly_multieval_unscaled_precompute(precomp,n-m,flen,P+2*m,T+left);
1128 return;
1129 }
1130
1131 if (n == 2) {
1132 poly_pseudoremainder_precompute(precomp,n,flen,T);
1133 precomp += poly_pseudoremainder_precomputesize(n,flen);
1134 poly_multieval_unscaled_precompute(precomp,1,n,P,0);
1135 precomp += poly_multieval_unscaled_precomputesize(1,n);
1136 poly_multieval_unscaled_precompute(precomp,1,n,P+2,0);
1137 return;
1138 }
1139
1140 if (n == 3) {
1141 poly_pseudoremainder_precompute(precomp,n,flen,T+3);
1142 precomp += poly_pseudoremainder_precomputesize(n,flen);
1143 poly_multieval_unscaled_precompute(precomp,2,n,P,T);
1144 precomp += poly_multieval_unscaled_precomputesize(2,n);
1145 poly_multieval_unscaled_precompute(precomp,1,n,P+4,0);
1146 return;
1147 }
1148
1149 long long right = poly_tree1size(n-m);
1150 poly_pseudoremainder_precompute(precomp,n,flen,T+left+right);
1151 precomp += poly_pseudoremainder_precomputesize(n,flen);
1152 poly_multieval_unscaled_precompute(precomp,m,n,P,T);
1153 precomp += poly_multieval_unscaled_precomputesize(m,n);
1154 poly_multieval_unscaled_precompute(precomp,n-m,n,P+2*m,T+left);
1155}
1156
1157static void poly_multieval_unscaled_postcompute(fp *v,long long n,const fp *f,long long flen,const fp *P,const fp *T,const fp *precomp)
1158{
1159 if (n <= 0) return;
1160
1161 if (n == 1) {
1162 proj Pp;
1163 fp_neg2(&Pp.x,&P[0]);
1164 fp_copy(Pp.z, P[1]);
1165 poly_eval_postcompute(v,f,flen,&Pp,precomp);
1166 return;
1167 }
1168
1169 long long m = n/2;
1170 long long left = poly_tree1size(m);
1171 if (flen <= n) {
1172 /* must do this if flen <= n */
1173 /* can do this even for larger flen */
1174 poly_multieval_unscaled_postcompute(v,m,f,flen,P,T,precomp);
1175 precomp += poly_multieval_unscaled_precomputesize(m,flen);
1176 poly_multieval_unscaled_postcompute(v+m,n-m,f,flen,P+2*m,T+left,precomp);
1177 return;
1178 }
1179
1180 fp g[n];
1181
1182 if (n == 2) {
1183 poly_pseudoremainder_postcompute(g,n,f,flen,T,precomp);
1184 precomp += poly_pseudoremainder_precomputesize(n,flen);
1185 poly_multieval_unscaled_postcompute(v,1,(const fp*) g,n,P,0,precomp);
1186 precomp += poly_multieval_unscaled_precomputesize(1,n);
1187 poly_multieval_unscaled_postcompute(v+1,1,(const fp*) g,n,P+2,0,precomp);
1188 return;
1189 }
1190
1191 if (n == 3) {
1192 poly_pseudoremainder_postcompute(g,n,f,flen,T+3,precomp);
1193 precomp += poly_pseudoremainder_precomputesize(n,flen);
1194 poly_multieval_unscaled_postcompute(v,2,(const fp*) g,n,P,T,precomp);
1195 precomp += poly_multieval_unscaled_precomputesize(2,n);
1196 poly_multieval_unscaled_postcompute(v+2,1,(const fp*) g,n,P+4,0,precomp);
1197 return;
1198 }
1199
1200 long long right = poly_tree1size(n-m);
1201 poly_pseudoremainder_postcompute(g,n,f,flen,T+left+right,precomp);
1202 precomp += poly_pseudoremainder_precomputesize(n,flen);
1203 poly_multieval_unscaled_postcompute(v,m,(const fp*) g,n,P,T,precomp);
1204 precomp += poly_multieval_unscaled_precomputesize(m,n);
1205 poly_multieval_unscaled_postcompute(v+m,n-m,(const fp*) g,n,P+2*m,T+left,precomp);
1206}
1207
1208// static void poly_multieval_unscaled(fp *v,long long n,const fp *f,long long flen,const fp *P,const fp *T)
1209// {
1210// long long precompsize = poly_multieval_unscaled_precomputesize(n,flen);
1211// fp precomp[precompsize];
1212// poly_multieval_unscaled_precompute(precomp,n,flen,P,T);
1213// poly_multieval_unscaled_postcompute(v,n,f,flen,P,T,precomp);
1214// }
1215
1216/* same API as poly_multieval_unscaled */
1217/* except for a different input representation: */
1218/* r[0]/x^n+r[1]/x^(n-1)+...+r[n-1]/x */
1219/* is the scaled representation of f mod root */
1220/* where root is the product of the polys in P */
1221static void poly_multieval_scaled(fp *v,long long n,const fp *r,const fp *P,const fp *T)
1222{
1223 if (n <= 0) return;
1224 if (n == 1) {
1225 fp_copy(v[0], r[0]);
1226 return;
1227 }
1228 if (n == 2) {
1229 fp g[1];
1230 poly_mul_mid(g,1,1,r,2,P+2,2);
1231 poly_multieval_scaled(v,1,(const fp*) g,P,0);
1232 poly_mul_mid(g,1,1,r,2,P,2);
1233 poly_multieval_scaled(v+1,1,(const fp*) g,P+2,0);
1234 return;
1235 }
1236 if (n == 3) {
1237 fp g[2];
1238 poly_mul_mid(g,1,2,r,3,P+4,2);
1239 poly_multieval_scaled(v,2,(const fp*) g,P,T);
1240 poly_mul_mid(g,2,1,r,3,T,3);
1241 poly_multieval_scaled(v+2,1,(const fp*) g,P+4,0);
1242 return;
1243 }
1244
1245 long long m = n/2;
1246 long long left = poly_tree1size(m);
1247 long long right = poly_tree1size(n-m);
1248
1249 fp g[n-m];
1250 poly_mul_mid(g,n-m,m,r,n,T+left+right-(n-m+1),n-m+1);
1251 poly_multieval_scaled(v,m,(const fp*) g,P,T);
1252 poly_mul_mid(g,m,n-m,r,n,T+left-(m+1),m+1);
1253 poly_multieval_scaled(v+m,n-m,(const fp*) g,P+2*m,T+left);
1254}
1255
1256static long long poly_multieval_chooseunscaled(long long n,long long flen)
1257{
1258 /* XXX: tune this */
1259 if (n <= 1) return 1;
1260 if (flen <= 1) return 1;
1261 return 0;
1262}
1263
1264long long poly_multieval_precomputesize(long long n,long long flen)
1265{
1266 if (poly_multieval_chooseunscaled(n,flen))
1267 return poly_multieval_unscaled_precomputesize(n,flen);
1268 if (flen < n) flen = n;
1269 return flen;
1270}
1271
1272void poly_multieval_precompute(fp *precomp,long long n,long long flen,const fp *P,const fp *T)
1273{
1274 if (poly_multieval_chooseunscaled(n,flen)) {
1275 poly_multieval_unscaled_precompute(precomp,n,flen,P,T);
1276 return;
1277 }
1278 if (flen < n) flen = n;
1279 long long m = n/2;
1280 long long left = poly_tree1size(m);
1281 long long right = poly_tree1size(n-m);
1282 fp denom;
1283 poly_pseudoreciprocal(&denom,precomp,flen,T+left+right,n);
1284}
1285
1286void poly_multieval_postcompute(fp *v,long long n,const fp *f,long long flen,const fp *P,const fp *T,const fp *precomp)
1287{
1288 if (poly_multieval_chooseunscaled(n,flen)) {
1289 poly_multieval_unscaled_postcompute(v,n,f,flen,P,T,precomp);
1290 return;
1291 }
1292
1293 /* now use scaled remainder tree */
1294
1295 fp fcopy[n];
1296 if (flen < n) {
1297 /* XXX: or split n into smaller trees? */
1298 for (long long i = 0;i < flen;++i) fp_copy(fcopy[i], f[i]);
1299 for (long long i = flen;i < n;++i) fp_copy(fcopy[i], fp_0);
1300 // f = fcopy;
1301 for(long long i = 0;i < n;++i)
1302 fp_copy(UNCONST(fp, f[i]), fcopy[i]);
1303 flen = n;
1304 }
1305
1306 const fp *rootinv = precomp; /* first flen entries are polynomial */
1307
1308 /* rootinv/denom is within O(1/x) of x^(n+flen-1)/root */
1309 /* frootinv/(denom*x^(flen-1)) is within O(1/x) of f x^n/root */
1310
1311 fp frootinv[n];
1312 poly_mul_mid(frootinv,flen-1,n,f,flen,rootinv,flen);
1313 poly_multieval_scaled(v,n,(const fp*) frootinv,(const fp*) P,(const fp*) T);
1314}
1315
1316/* same API as poly_multieval_unscaled */
1317void poly_multieval(fp *v,long long n,const fp *f,long long flen,const fp *P,const fp *T)
1318{
1319 long long precompsize = poly_multieval_precomputesize(n,flen);
1320 fp precomp[precompsize];
1321 poly_multieval_precompute(precomp,n,flen,P,T);
1322 poly_multieval_postcompute(v,n,f,flen,P,T,(const fp*) precomp);
1323}
1324
1325void poly_multiprod2(fp *T,long long n)
1326{
1327 if (n <= 1) return;
1328
1329 long long m = n/2;
1330 poly_multiprod2(T,m);
1331 poly_multiprod2(T+3*m,n-m);
1332
1333 fp X[2*n+1];
1334
1335 /* T[0..2m] is left product */
1336 /* T[3m...2n+m] is right product */
1337 poly_mul(X,(const fp*) T,2*m+1,(const fp*) T+3*m,2*(n-m)+1);
1338
1339 for (long long i = 0;i <= 2*n;++i) fp_copy(T[i], X[i]);
1340}
1341
1343{
1344 if (n <= 1) return;
1345
1346 long long m = n/2;
1349
1350 fp X[2*n+1];
1351
1352 /* T[0..2m] is left product */
1353 /* T[3m...2n+m] is right product */
1354 poly_mul_selfreciprocal(X,(const fp*) T,2*m+1,(const fp*) T+3*m,2*(n-m)+1);
1355
1356 for (long long i = 0;i <= 2*n;++i) fp_copy(T[i], X[i]);
1357}
uint64_t fp[NUMBER_OF_WORDS]
Definition fp-gmp.h:22
#define fp_1
Definition fp-gmp.h:48
#define fp_0
Definition fp-gmp.h:50
#define fp_copy
Definition fp-gmp.h:79
assert(var1 eq var2)
#define UNCONST(type, var)
Definition poly.c:8
#define poly_mul_high
Definition poly.h:22
#define poly_multieval
Definition poly.h:61
#define poly_multieval_postcompute
Definition poly.h:70
#define poly_tree1
Definition poly.h:49
#define poly_mul
Definition poly.h:15
#define poly_mul_selfreciprocal
Definition poly.h:30
#define poly_multiprod2_selfreciprocal
Definition poly.h:40
#define poly_tree1size
Definition poly.h:52
#define poly_multieval_precomputesize
Definition poly.h:67
#define poly_mul_mid
Definition poly.h:26
#define poly_multiprod2
Definition poly.h:37
#define poly_mul_low
Definition poly.h:18
#define poly_multieval_precompute
Definition poly.h:64
for i
Definition proj.h:18
fp z
Definition proj.h:20
fp x
Definition proj.h:19
g a1
Definition to_model.m:15
f a
Definition to_model.m:12