Let us walk on the 3-isogeny graph
Loading...
Searching...
No Matches
mont.c
Go to the documentation of this file.
1#include <stdio.h>
2#include <string.h>
3#include <assert.h>
4
5#include "steps.h"
6#include "mont.h"
7#if defined AVX2
8#include "../common/fp/avx2/fp-avx2.h"
9#elif defined GMP
11#else
12#include "../common/fp/mulx/fp.h"
13#endif
14#include "poly.h"
15#include "int64mask.h"
16
17// #define fp_cswap fp_cswap1
18
19// precompute A24.x = A.x+2*A.z, A24.z = 4*A.z
20void xA24(proj *A24, const proj *A)
21{
22 fp_double2(&A24->x, &A->z);
23 fp_double2(&A24->z, (const fp *)&A24->x);
24 fp_add2(&A24->x, &A->x);
25}
26
27static void x2(proj *P2, proj const *P)
28{
29 fp_add3(&P2->x, &P->x, &P->z);
30 fp_sq1(&P2->x);
31 fp_sub3(&P2->z, &P->x, &P->z);
32 fp_sq1(&P2->z);
33}
34
35static void x2DBL(proj *Q, proj const *P2, const proj *A24, int Aaffine)
36{
37 fp a, b, c;
38 fp_sub3(&c, &P2->x, &P2->z);
39 if (Aaffine)
40 fp_quadruple2(&b, &P2->z);
41 else
42 fp_mul3(&b, &A24->z, &P2->z);
43 fp_mul3(&Q->x, &P2->x, (const fp *)&b);
44 fp_mul3(&a, (const fp *)&c, &A24->x);
45 fp_add2(&a, (const fp *)&b);
46 fp_mul3(&Q->z, (const fp *)&a, (const fp *)&c);
47}
48
49void xDBLADD(proj *R, proj *S, proj const *P, proj const *Q, proj const *PQ, proj const *A24, int Aaffine)
50{
51 fp tmp0, tmp1, tmp2;
52
53 fp_add3(&tmp0, &P->x, &P->z);
54 fp_sub3(&tmp1, &P->x, &P->z);
55 fp_sq2(&R->x, (const fp *)&tmp0);
56 fp_sub3(&tmp2, &Q->x, &Q->z);
57 fp_add3(&S->x, &Q->x, &Q->z);
58 fp_mul2(&tmp0, (const fp *)&tmp2);
59 fp_sq2(&R->z, (const fp *)&tmp1);
60 fp_mul2(&tmp1, (const fp *)&S->x);
61 fp_sub3(&tmp2, (const fp *)&R->x, (const fp *)&R->z);
62 if (Aaffine)
63 fp_quadruple1(&R->z);
64 else
65 fp_mul2(&R->z, &A24->z);
66 fp_mul2(&R->x, (const fp *)&R->z);
67 fp_mul3(&S->x, &A24->x, (const fp *)&tmp2);
68 fp_sub3(&S->z, (const fp *)&tmp0, (const fp *)&tmp1);
69 fp_add2(&R->z, (const fp *)&S->x);
70 fp_add3(&S->x, (const fp *)&tmp0, (const fp *)&tmp1);
71 fp_mul2(&R->z, (const fp *)&tmp2);
72 fp_sq1(&S->z);
73 fp_sq1(&S->x);
74 fp_mul2(&S->z, &PQ->x);
75 fp_mul2(&S->x, &PQ->z);
76}
77
78void xDBL(proj *Q, proj const *P, const proj *A24, int Aaffine)
79{
80 proj P2;
81 x2(&P2, P);
82 x2DBL(Q, &P2, A24, Aaffine);
83}
84
85void xADD(proj *S, proj const *P, proj const *Q, proj const *PQ)
86{
87 fp a, b, c, d;
88 fp_add3(&a, &P->x, &P->z);
89 fp_sub3(&b, &P->x, &P->z);
90 fp_add3(&c, &Q->x, &Q->z);
91 fp_sub3(&d, &Q->x, &Q->z);
92 fp_mul2(&a, (const fp *)&d);
93 fp_mul2(&b, (const fp *)&c);
94 fp_add3(&c, (const fp *)&a, (const fp *)&b);
95 fp_sub3(&d, (const fp *)&a, (const fp *)&b);
96 fp_sq1(&c);
97 fp_sq1(&d);
98 fp_mul3(&S->x, &PQ->z, (const fp *)&c);
99 fp_mul3(&S->z, &PQ->x, (const fp *)&d);
100}
101
102// goal: Q = any point having same order as lP
103// where prime l has chain dac,daclen with 0 <= daclen <= maxdaclen
104// time depends on maxdaclen, but not dac or daclen
105// this implementation does not necessarily return Q=lP
106// can simply return P if it notices that P has order <l
107// note that Q buffer can overlap P buffer
108// XXX: skip copies in cases where dac and daclen are specified to be public
109// XXX: skip copies by unrolling
110void xMUL_dac(proj *Q, proj const *A24, int Aaffine, proj const *P, int64_t dac, int64_t daclen, int64_t maxdaclen)
111{
112 proj Pinput = *P; // keeping copy since Q can overlap P
113 proj P1 = Pinput;
114 proj P2;
115 xDBL(&P2, &P1, A24, Aaffine);
116 proj P3;
117 xADD(&P3, &P2, &P1, &P1);
118 // int64_t collision = fp_iszero(&Pinput.z);
119 int64_t collision = fp_iszero(Pinput.z);
120
121 for (;;)
122 {
123 int64_t want = 1 + int64mask_negative(daclen);
124 proj_cmov(Q, &P3, want);
125 if (maxdaclen <= 0)
126 break;
127
128 // invariant: P1+P2 = P3
129 // odd dac: want to replace P1,P2,P3 with P1,P3,P1+P3
130 // even dac: want to replace P1,P2,P3 with P2,P3,P2+P3
131
132 proj_cswap(&P1, &P2, 1 - (dac & 1));
133 // invariant: P1+P2 = P3
134 // all cases: want to replace P1,P2,P3 with P1,P3,P1+P3
135
136 // collision |= want&fp_iszero(&P2.z);
137 collision |= want & fp_iszero(P2.z);
138
139 proj next;
140 xADD(&next, &P3, &P1, &P2);
141 P2 = P3;
142 P3 = next;
143
144 maxdaclen -= 1;
145 daclen -= 1;
146 dac >>= 1;
147 }
148
149 proj_cmov(Q, &Pinput, collision);
150 // in case of collision, input has the right order
151}
152
153/* Montgomery ladder. */
154/* P must not be the unique point of order 2. */
155/* k must have _at most_ kbits. */
156void xMUL(proj *Q, proj const *A, int Aaffine, proj const *P, uintbig const *k, int64_t kbits)
157{
158 proj R = *P;
159 proj A24;
160 const proj Pcopy = *P; /* in case Q = P */
161
162 fp_copy(Q->x, fp_1);
163 fp_copy(Q->z, fp_0);
164
165 xA24(&A24, A);
166
167 int64_t prevbit = 0;
168 while (kbits > 0)
169 {
170 --kbits;
171
172 int64_t bit = uintbig_bit(k, kbits);
173 int64_t bitxor = bit ^ prevbit;
174
175 proj_cswap(Q, &R, bitxor);
176
177 xDBLADD(Q, &R, Q, &R, &Pcopy, &A24, Aaffine);
178
179 prevbit = bit;
180 }
181 proj_cswap(Q, &R, prevbit);
182}
183
184void xMUL_vartime(proj *Q, proj const *A, int Aaffine, proj const *P, uintbig const *k)
185{
186 int64_t kbits = UBITS;
187 while (kbits && !uintbig_bit(k, kbits - 1))
188 --kbits;
189 xMUL(Q, A, Aaffine, P, k, kbits);
190}
191
192/* replace x with x^k y^8 */
193/* constant-time 2-bit window algorithm copied from qisog */
194static void powpow8mod(fp *x, const fp *y, uint64_t k, uint64_t kupper)
195{
196 int64_t bits = 5; /* XXX: also optimize smaller k */
197 kupper >>= 5;
198 while (kupper)
199 {
200 ++bits;
201 kupper >>= 1;
202 }
203
204 fp x1;
205 fp_copy(x1, *x);
206 fp x2;
207 fp_sq2(&x2, (const fp *)x);
208 fp x3;
209 fp_mul3(&x3, (const fp *)&x2, (const fp *)x);
210
211 int xwritten = 0; /* otherwise *x is implicitly 1 */
212
213 for (int64_t i = (bits - 1) & ~1; i >= 0; i -= 2)
214 {
215 if (xwritten)
216 {
217 fp_sq1(x);
218 if (i == 2)
219 fp_mul2(x, y);
220 fp_sq1(x);
221 }
222 fp ram0;
223 fp_copy(ram0, fp_1);
224 fp ram1;
225 fp_copy(ram1, x1);
226 int64_t control1 = 1 & (k >> (i + 1));
227 fp_cmov_ctidh(&ram0, (const fp *)&x2, control1);
228 fp_cmov_ctidh(&ram1, (const fp *)&x3, control1);
229 int64_t control0 = 1 & (k >> i);
230 fp_cmov_ctidh(&ram0, (const fp *)&ram1, control0);
231
232 if (xwritten)
233 fp_mul2(x, (const fp *)&ram0);
234 else
235 fp_copy(*x, ram0);
236
237 xwritten = 1;
238 }
239 assert(xwritten);
240}
241
242static void biquad_precompute_point(fp *precomp, const proj *Q)
243{
244 /* XXX: can do S-M tradeoff */
245
246 fp_sq2(&precomp[0], &Q->x);
247 fp_sq2(&precomp[1], &Q->z);
248 fp_mul3(&precomp[2], &Q->x, &Q->z);
249 fp_add3(&precomp[3], (const fp *)&precomp[2], (const fp *)&precomp[2]);
250 fp_sub3(&precomp[4], (const fp *)&precomp[0], (const fp *)&precomp[1]);
251 fp_sub3(&precomp[5], &Q->x, &Q->z);
252 fp_sq1(&precomp[5]);
253}
254
255static void biquad_precompute_curve(fp *Aprecomp, const proj *P, const proj *A)
256{
257 fp Pplus;
258 fp_add3(&Pplus, &P->x, &P->z);
259 fp_sq1(&Pplus);
260 fp PxPx;
261 fp_sq2(&PxPx, &P->x);
262 fp PzPz;
263 fp_sq2(&PzPz, &P->z);
264 fp PxPz2;
265 fp_sub3(&PxPz2, (const fp *)&Pplus, (const fp *)&PxPx);
266 fp_sub2(&PxPz2, (const fp *)&PzPz);
267
268 fp_mul3(&Aprecomp[0], (const fp *)&A->z, (const fp *)&PxPx);
269 fp_mul3(&Aprecomp[1], (const fp *)&A->z, (const fp *)&PzPz);
270 fp_mul3(&Aprecomp[2], (const fp *)&A->z, (const fp *)&PxPz2);
271 fp_mul3(&Aprecomp[3], (const fp *)&A->x, (const fp *)&PxPz2);
272
273 fp t;
274 fp_add3(&t, (const fp *)&Aprecomp[0], (const fp *)&Aprecomp[1]);
275 fp_sub3(&Aprecomp[5], (const fp *)&t, (const fp *)&Aprecomp[2]);
276 fp_add3(&Aprecomp[6], (const fp *)&t, (const fp *)&Aprecomp[2]);
277
278 fp_add3(&Aprecomp[7], (const fp *)&Aprecomp[3], (const fp *)&Aprecomp[6]);
279 fp_neg1(&Aprecomp[7]);
280
281 fp_sub3(&Aprecomp[4], (const fp *)&Aprecomp[0], (const fp *)&Aprecomp[1]);
282}
283
284// /* biquad and biquad_inv */
285// static void biquad_both(fp *out,fp *outinv,const proj *P,const proj *Q,const proj *A)
286// {
287// fp PxQx; fp_mul3(&PxQx,&P->x,&Q->x);
288// fp PxQz; fp_mul3(&PxQz,&P->x,&Q->z);
289// fp PzQx; fp_mul3(&PzQx,&P->z,&Q->x);
290// fp PzQz; fp_mul3(&PzQz,&P->z,&Q->z);
291// fp PPQQ; fp_mul3(&PPQQ,&PxQx,&PzQz);
292// fp_add2(&PPQQ,&PPQQ);
293// fp_mul2(&PPQQ,&A->x);
294
295// fp s,t;
296
297// fp_add3(&s,&PxQx,&PzQz);
298// fp_add3(&t,&PzQx,&PxQz);
299// fp_mul3(&out[1],&s,&t);
300// fp_mul2(&out[1],&A->z);
301// fp_add2(&out[1],&PPQQ);
302// fp_add2(&out[1],&out[1]);
303// fp_neg1(&out[1]); /* XXX: push through other computations? */
304
305// fp_sub3(&out[2],&PxQz,&PzQx);
306// fp_sq1(&out[2]);
307// fp_mul2(&out[2],&A->z);
308
309// fp_sub3(&out[0],&PxQx,&PzQz);
310// fp_sq1(&out[0]);
311// fp_mul2(&out[0],&A->z);
312
313// /* ----- */
314
315// fp_add3(&s,&PxQz,&PzQx);
316// fp_add3(&t,&PzQz,&PxQx);
317// fp_mul3(&outinv[1],&s,&t);
318// fp_mul2(&outinv[1],&A->z);
319// fp_add2(&outinv[1],&PPQQ);
320// fp_add2(&outinv[1],&outinv[1]);
321// fp_neg1(&outinv[1]); /* XXX: push through other computations? */
322
323// fp_copy(outinv[2], out[0]);
324// fp_copy(outinv[0], out[2]);
325// }
326
327// /* biquad_1 and biquad_minus1 */
328// static void biquad_pm1(fp *outplus,fp *outminus,const proj *P,const proj *A)
329// {
330// fp Pplus; fp_add3(&Pplus,&P->x,&P->z);
331// fp Pminus; fp_sub3(&Pminus,&P->x,&P->z);
332// fp_sq1(&Pplus);
333// fp_sq1(&Pminus);
334
335// fp_mul3(&outplus[0],&Pminus,&A->z);
336// fp_copy(outplus[2], outplus[0]);
337// fp_mul3(&outminus[0],&Pplus,&A->z);
338// fp_copy(outminus[2], outminus[0]);
339
340// fp u;
341// fp_sub3(&u,&Pminus,&Pplus);
342// fp_mul2(&u,&A->x);
343
344// fp t;
345// fp_add3(&t,&outminus[0],&outminus[0]);
346// fp_sub3(&outplus[1],&u,&t);
347
348// fp_add3(&t,&outplus[0],&outplus[0]);
349// fp_sub3(&outminus[1],&t,&u);
350// }
351
352static void biquad_postcompute_curve(fp *outplus, fp *outminus, const fp *Aprecomp)
353{
354 fp_copy(outplus[0], Aprecomp[5]);
355 fp_copy(outplus[2], Aprecomp[5]);
356 fp_copy(outminus[0], Aprecomp[6]);
357 fp_copy(outminus[2], Aprecomp[6]);
358
359 fp_add3(&outplus[1], &Aprecomp[7], &Aprecomp[7]);
360 fp_add3(&outminus[1], &Aprecomp[5], &Aprecomp[3]);
361 fp_double1(&outminus[1]);
362}
363
364static void biquad_postcompute_point(fp *out, const fp *precomp, const fp *Aprecomp)
365{
366 fp v;
367
368 fp_mul3(&out[1], (const fp *)&Aprecomp[7], (const fp *)&precomp[3]);
369 fp_mul3(&v, (const fp *)&Aprecomp[2], (const fp *)&precomp[5]);
370 fp_sub2(&out[1], (const fp *)&v);
371
372 fp_mul3(&out[2], (const fp *)&Aprecomp[0], (const fp *)&precomp[1]);
373 fp_mul3(&v, (const fp *)&Aprecomp[2], (const fp *)&precomp[2]);
374 fp_sub2(&out[2], (const fp *)&v);
375 fp_mul3(&v, (const fp *)&Aprecomp[1], (const fp *)&precomp[0]);
376 fp_add2(&out[2], (const fp *)&v);
377
378 fp_mul3(&out[0], (const fp *)&Aprecomp[4], (const fp *)&precomp[4]);
379 fp_add2(&out[0], (const fp *)&out[2]);
380}
381
382// // replace x with x^k
383// static void exp_by_squaring_(fp* x, uint64_t k)
384// {
385// fp result1;
386// fp_copy(result1, fp_1);
387
388// while (k)
389// {
390// if (k & 1){
391// fp_mul2(&result1, x);
392// }
393// fp_sq1(x);
394// k >>= 1;
395// }
396
397// fp_cswap_ctidh(&result1, x, 1);
398
399// }
400
401//simultaneous square-and-multiply, computes x^exp and y^exp
402static void exp_by_squaring_(fp *x, fp *y, uint64_t exp)
403{
404 fp result1, result2;
405 fp_copy(result1, fp_1);
406 fp_copy(result2, fp_1);
407
408 while (exp)
409 {
410 if (exp & 1)
411 {
412 fp_mul2(&result1, (const fp *)x);
413 fp_mul2(&result2, (const fp *)y);
414 }
415
416 fp_sq1(x);
417 fp_sq1(y);
418 exp >>= 1;
419 }
420
421 fp_cswap(result1, *x, 1);
422 fp_cswap(result2, *y, 1);
423}
424
425/* computes the isogeny with kernel point K of order k */
426/* returns the new curve coefficient A and the images of P[0],P[1],...,P[Plen-1] */
427/* k is odd; klower <= k <= kupper */
428/* goal here is for timing to not leak k */
429void xISOG_matryoshka(proj *A, proj *P, int64_t Plen, proj const *K, int64_t k, int64_t klower, int64_t kupper)
430{
431 assert(3 <= klower);
432 assert(klower & 1);
433 assert(kupper & 1);
434
435 int64_t sqrtvelu = 0;
436 int64_t bs = 0;
437 int64_t gs = 0;
438
439 steps(&bs, &gs, klower);
440 if (bs)
441 {
442 sqrtvelu = 1;
443 assert(bs > 0);
444 assert(gs > 0);
445 assert(!(bs & 1));
446 }
447
448 fp tmp0, tmp1, tmp2, tmp3, tmp4;
449
450 proj A24;
451 proj Aed; // twisted Edwards curve coefficients
452
453 // A -> (A : C)
454 // Aed.z = 2C
455 fp_double2(&Aed.z, (const fp *)&A->z);
456
457 // Aed.x = A + 2C
458 fp_add3(&Aed.x, (const fp *)&A->x, (const fp *)&Aed.z);
459
460 // A24.x = A + 2C
461 fp_copy(A24.x, Aed.x);
462
463 // A24.z = 4C
464 fp_double2(&A24.z, (const fp *)&Aed.z);
465
466 // Aed.z = A - 2C
467 fp_sub3(&Aed.z, (const fp *)&A->x, (const fp *)&Aed.z);
468
469 proj M[kupper]; /* XXX: use less space */
470#ifndef NDEBUG
471 int Minit[kupper];
472
473 for (int64_t s = 0; s < kupper; ++s)
474 Minit[s] = 0;
475
476 Minit[1] = 1;
477#endif
478 M[1] = *K;
479 xDBL(&M[2], K, &A24, 0);
480#ifndef NDEBUG
481 Minit[2] = 1;
482#endif
483
484 if (sqrtvelu)
485 {
486 for (int64_t s = 3; s < kupper; ++s)
487 {
488 if (s & 1)
489 {
490 int64_t i = s / 2;
491 assert(s == 2 * i + 1);
492 if (i < bs)
493 {
494 if (s == 3)
495 {
496 assert(Minit[1]);
497 assert(Minit[2]);
498 xADD(&M[s], &M[2], &M[1], &M[1]);
499#ifndef NDEBUG
500 Minit[s] = 1;
501#endif
502 continue;
503 }
504 assert(Minit[s - 2]);
505 assert(Minit[s - 4]);
506 assert(Minit[2]);
507 xADD(&M[s], &M[s - 2], &M[2], &M[s - 4]);
508#ifndef NDEBUG
509 Minit[s] = 1;
510#endif
511 continue;
512 }
513 }
514 else
515 {
516 int64_t i = s / 2 - 1;
517 assert(s == 2 * i + 2);
518 if (i < (kupper - 1) / 2 - 2 * bs * gs)
519 {
520 if (s == 4)
521 {
522 assert(Minit[2]);
523 xDBL(&M[s], &M[2], &A24, 0);
524#ifndef NDEBUG
525 Minit[s] = 1;
526#endif
527 continue;
528 }
529 assert(Minit[s - 2]);
530 assert(Minit[s - 4]);
531 assert(Minit[2]);
532 xADD(&M[s], &M[s - 2], &M[2], &M[s - 4]);
533#ifndef NDEBUG
534 Minit[s] = 1;
535#endif
536 continue;
537 }
538 }
539
540 if (bs > 0)
541 {
542 if (s == 2 * bs)
543 {
544 assert(Minit[bs - 1]);
545 assert(Minit[bs + 1]);
546 assert(Minit[2]);
547 xADD(&M[s], &M[bs + 1], &M[bs - 1], &M[2]);
548#ifndef NDEBUG
549 Minit[s] = 1;
550#endif
551 continue;
552 }
553 else if (s == 4 * bs)
554 {
555 assert(Minit[2 * bs]);
556 xDBL(&M[s], &M[2 * bs], &A24, 0);
557#ifndef NDEBUG
558 Minit[s] = 1;
559#endif
560 continue;
561 }
562 else if (s == 6 * bs)
563 {
564 assert(Minit[2 * bs]);
565 assert(Minit[4 * bs]);
566 xADD(&M[s], &M[4 * bs], &M[2 * bs], &M[2 * bs]);
567#ifndef NDEBUG
568 Minit[s] = 1;
569#endif
570 continue;
571 }
572 else if (s % (4 * bs) == 2 * bs)
573 {
574 int64_t j = s / (4 * bs);
575 assert(s == 2 * bs * (2 * j + 1));
576 if (j < gs)
577 {
578 assert(Minit[s - 4 * bs]);
579 assert(Minit[s - 8 * bs]);
580 assert(Minit[4 * bs]);
581 xADD(&M[s], &M[s - 4 * bs], &M[4 * bs], &M[s - 8 * bs]);
582#ifndef NDEBUG
583 Minit[s] = 1;
584#endif
585 continue;
586 }
587 }
588 }
589 }
590 }
591 else
592 {
593 for (int64_t i = 3; i <= (kupper - 1) / 2; ++i)
594 {
595#ifndef NDEBUG
596 Minit[i] = 1;
597#endif
598 xADD(&M[i], &M[i - 1], K, &M[i - 2]);
599 }
600 }
601
602 proj Abatch;
603 fp_copy(Abatch.x, fp_1);
604 fp_copy(Abatch.z, fp_1);
605 int64_t fixPlen = 0;
606 if(Plen>0) {
607 fixPlen = Plen;
608 } else {
609 fixPlen = 1;
610 }
611 proj Qbatch[fixPlen];
612 for (int64_t h = 0; h < Plen; ++h)
613 {
614 fp_copy(Qbatch[h].x, fp_1);
615 fp_copy(Qbatch[h].z, fp_1);
616 }
617 fp Psum[fixPlen], Pdif[fixPlen];
618 for (int64_t h = 0; h < Plen; ++h)
619 {
620 //precomputations
621 // ( X + Z )
622 fp_add3(&Psum[h], (const fp *)&P[h].x, (const fp *)&P[h].z);
623 // ( X - Z )
624 fp_sub3(&Pdif[h], (const fp *)&P[h].x, (const fp *)&P[h].z);
625 }
626
627 if (sqrtvelu)
628 {
629 int64_t TIlen = 2 * bs + poly_tree1size(bs);
630 fp TI[TIlen];
631
632 for (int64_t i = 0; i < bs; ++i)
633 {
634 assert(Minit[2 * i + 1]);
635 fp_neg2(&TI[2 * i], (const fp *)&M[2 * i + 1].x);
636 fp_copy(TI[2 * i + 1], M[2 * i + 1].z);
637 }
638
639 poly_tree1(TI + 2 * bs, (const fp *)TI, bs);
640
641 fp Aprecomp[gs][8];
642 fp T1[3 * gs];
643 fp Tminus1[3 * gs];
644
645 for (int64_t j = 0; j < gs; ++j)
646 {
647 assert(Minit[2 * bs * (2 * j + 1)]);
648 biquad_precompute_curve(Aprecomp[j], &M[2 * bs * (2 * j + 1)], A);
649 biquad_postcompute_curve(T1 + 3 * j, Tminus1 + 3 * j, (const fp *)Aprecomp[j]);
650 }
653
654 int64_t precompsize = poly_multieval_precomputesize(bs, 2 * gs + 1);
655 fp precomp[precompsize];
656 poly_multieval_precompute(precomp, bs, 2 * gs + 1, (const fp *)TI, (const fp *)TI + 2 * bs);
657
658 fp v[bs];
659
660 poly_multieval_postcompute(v, bs, (const fp *)T1, 2 * gs + 1, (const fp *)TI, (const fp *)TI + 2 * bs, (const fp *)precomp);
661 fp_copy(Abatch.x, v[0]);
662 for (int64_t i = 1; i < bs; ++i)
663 fp_mul2(&Abatch.x, (const fp *)&v[i]);
664
665 poly_multieval_postcompute(v, bs, (const fp *)Tminus1, 2 * gs + 1, (const fp *)TI, (const fp *)TI + 2 * bs, (const fp *)precomp);
666 fp_copy(Abatch.z, v[0]);
667 for (int64_t i = 1; i < bs; ++i)
668 fp_mul2(&Abatch.z, (const fp *)&v[i]);
669
670 for (int64_t h = 0; h < Plen; ++h)
671 {
672 fp Pprecomp[6];
673 biquad_precompute_point(Pprecomp, &P[h]);
674
675 fp TP[3 * gs];
676 for (int64_t j = 0; j < gs; ++j)
677 biquad_postcompute_point(TP + 3 * j, (const fp *)Pprecomp, (const fp *)Aprecomp[j]);
678 poly_multiprod2(TP, gs);
679
680 fp TPinv[2 * gs + 1];
681 for (int64_t j = 0; j < 2 * gs + 1; ++j)
682 fp_copy(TPinv[j], TP[2 * gs - j]);
683
684 poly_multieval_postcompute(v, bs, (const fp *)TP, 2 * gs + 1, (const fp *)TI, (const fp *)TI + 2 * bs, (const fp *)precomp);
685 fp_copy(Qbatch[h].z, v[0]);
686 for (int64_t i = 1; i < bs; ++i)
687 fp_mul2(&Qbatch[h].z, (const fp *)&v[i]);
688
689 poly_multieval_postcompute(v, bs, (const fp *)TPinv, 2 * gs + 1, (const fp *)TI, (const fp *)TI + 2 * bs, (const fp *)precomp);
690 fp_copy(Qbatch[h].x, v[0]);
691 for (int64_t i = 1; i < bs; ++i)
692 fp_mul2(&Qbatch[h].x, (const fp *)&v[i]);
693 }
694
695 int64_t ignore = (k - 1) / 2 - 2 * bs * gs; /* skip i >= ignore */
696 for (int64_t i = 0; i < (kupper - 1) / 2 - 2 * bs * gs; ++i)
697 {
698 int64_t want = -((i - ignore) >> 61); /* 61 to reduce risk of compiler doing something silly */
699 assert(Minit[2 * i + 2]);
700 fp_sub3(&tmp4, (const fp *)&M[2 * i + 2].x, (const fp *)&M[2 * i + 2].z);
701 fp_add3(&tmp3, (const fp *)&M[2 * i + 2].x, (const fp *)&M[2 * i + 2].z);
702 fp_mul3(&tmp2, (const fp *)&Abatch.x, (const fp *)&tmp4);
703 fp_cmov_ctidh(&Abatch.x, (const fp *)&tmp2, want);
704 fp_mul3(&tmp2, (const fp *)&Abatch.z, (const fp *)&tmp3);
705 fp_cmov_ctidh(&Abatch.z, (const fp *)&tmp2, want);
706 for (int64_t h = 0; h < Plen; ++h)
707 {
708 fp_mul3(&tmp1, (const fp *)&tmp4, (const fp *)&Psum[h]);
709 fp_mul3(&tmp0, (const fp *)&tmp3, (const fp *)&Pdif[h]);
710 fp_add3(&tmp2, (const fp *)&tmp0, (const fp *)&tmp1);
711 fp_mul2(&tmp2, (const fp *)&Qbatch[h].x);
712 fp_cmov_ctidh(&Qbatch[h].x, (const fp *)&tmp2, want);
713 fp_sub3(&tmp2, (const fp *)&tmp0, (const fp *)&tmp1);
714 fp_mul2(&tmp2, (const fp *)&Qbatch[h].z);
715 fp_cmov_ctidh(&Qbatch[h].z, (const fp *)&tmp2, want);
716 }
717 }
718 }
719 else
720 {
721 // no sqrtvelu
722 int64_t ignore = (k + 1) / 2; /* skip i >= ignore */
723
724 assert(Minit[1]);
725 fp_sub3(&tmp4, (const fp *)&M[1].x, (const fp *)&M[1].z);
726 fp_add3(&tmp3, (const fp *)&M[1].x, (const fp *)&M[1].z);
727 fp_copy(Abatch.x, tmp4);
728 fp_copy(Abatch.z, tmp3);
729
730 for (int64_t h = 0; h < Plen; ++h)
731 {
732 fp_mul3(&tmp1, (const fp *)&tmp4, (const fp *)&Psum[h]);
733 fp_mul3(&tmp0, (const fp *)&tmp3, (const fp *)&Pdif[h]);
734 fp_add3(&Qbatch[h].x, (const fp *)&tmp0, (const fp *)&tmp1);
735 fp_sub3(&Qbatch[h].z, (const fp *)&tmp0, (const fp *)&tmp1);
736 }
737
738 for (int64_t i = 2; i <= (kupper - 1) / 2; ++i)
739 {
740 int64_t want = -((i - ignore) >> 61);
741 // 2 < i < (k-1)/2
742 assert(Minit[i]);
743 fp_sub3(&tmp4, (const fp *)&M[i].x, (const fp *)&M[i].z);
744 fp_add3(&tmp3, (const fp *)&M[i].x, (const fp *)&M[i].z);
745 fp_mul3(&tmp2, (const fp *)&Abatch.x, (const fp *)&tmp4);
746 fp_cmov_ctidh(&Abatch.x, (const fp *)&tmp2, want);
747 fp_mul3(&tmp2, (const fp *)&Abatch.z, (const fp *)&tmp3);
748 fp_cmov_ctidh(&Abatch.z, (const fp *)&tmp2, want);
749 for (int64_t h = 0; h < Plen; ++h)
750 {
751 // point evaluation
752 fp_mul3(&tmp1, (const fp *)&tmp4, (const fp *)&Psum[h]);
753 fp_mul3(&tmp0, (const fp *)&tmp3, (const fp *)&Pdif[h]);
754 fp_add3(&tmp2, (const fp *)&tmp0, (const fp *)&tmp1);
755 fp_mul2(&tmp2, (const fp *)&Qbatch[h].x);
756 fp_cmov_ctidh(&Qbatch[h].x, (const fp *)&tmp2, want);
757 fp_sub3(&tmp2, (const fp *)&tmp0, (const fp *)&tmp1);
758 fp_mul2(&tmp2, (const fp *)&Qbatch[h].z);
759 fp_cmov_ctidh(&Qbatch[h].z, (const fp *)&tmp2, want);
760 }
761 }
762 }
763
764 for (int64_t h = 0; h < Plen; ++h)
765 {
766 // point evaluation
767 // [∏( X − Z )( X i + Z i ) + ( X + Z )( X i − Z i )]^2
768 fp_sq1(&Qbatch[h].x);
769 fp_sq1(&Qbatch[h].z);
770
771 // X' = X * Qbatch[h].x
772 // X' = X * [∏( X − Z )( X i + Z i ) + ( X + Z )( X i − Z i )]^2
773 fp_mul2(&P[h].x, (const fp *)&Qbatch[h].x);
774 fp_mul2(&P[h].z, (const fp *)&Qbatch[h].z);
775 }
776
777 // Aed.x = Aed.x^k * Abatch.z^8
778 powpow8mod(&Aed.x, (const fp *)&Abatch.z, k, kupper);
779 // Aed.z = Aed.z^k * Abatch.x^8
780 powpow8mod(&Aed.z, (const fp *)&Abatch.x, k, kupper);
781
782 //compute Montgomery params
783 // ( A' : C' ) = ( 2 ( a'_E + d'_E ) : a'_E − d'_E )
784 fp_add3(&A->x, (const fp *)&Aed.x, (const fp *)&Aed.z);
785 fp_sub3(&A->z, (const fp *)&Aed.x, (const fp *)&Aed.z);
786 fp_double1(&A->x);
787}
788
789void xISOG(proj *A, proj *P, int64_t Plen, proj const *K, int64_t k)
790{
791 xISOG_matryoshka(A, P, Plen, K, k, k, k);
792}
793
794void xISOG_old(proj *A, proj *P, proj const *K, int64_t k)
795{
796 assert(k >= 3);
797 assert(k % 2 == 1);
798
799 fp tmp0, tmp1, tmp2, Psum, Pdif;
800 proj Q, Aed, prod;
801
802 fp_add3(&Aed.z, (const fp *)&A->z, (const fp *)&A->z); //compute twisted Edwards curve coefficients
803 fp_add3(&Aed.x, (const fp *)&A->x, (const fp *)&Aed.z);
804 fp_sub3(&Aed.z, (const fp *)&A->x, (const fp *)&Aed.z);
805
806 fp_add3(&Psum, (const fp *)&P->x, (const fp *)&P->z); //precomputations
807 fp_sub3(&Pdif, (const fp *)&P->x, (const fp *)&P->z);
808
809 fp_sub3(&prod.x, &K->x, &K->z);
810 fp_add3(&prod.z, &K->x, &K->z);
811
812 fp_mul3(&tmp1, (const fp *)&prod.x, (const fp *)&Psum);
813 fp_mul3(&tmp0, (const fp *)&prod.z, (const fp *)&Pdif);
814 fp_add3(&Q.x, (const fp *)&tmp0, (const fp *)&tmp1);
815 fp_sub3(&Q.z, (const fp *)&tmp0, (const fp *)&tmp1);
816
817 proj M[k];
818 proj A24;
819 xA24(&A24, A);
820
821 M[0] = *K;
822 xDBL(&M[1], K, &A24, 0);
823 for (int64_t i = 2; i < k / 2; ++i)
824 {
825 xADD(&M[i], &M[(i - 1)], K, &M[(i - 2)]);
826 }
827
828 for (int64_t i = 1; i < k / 2; ++i)
829 {
830 fp_sub3(&tmp1, (const fp *)&M[i].x, (const fp *)&M[i].z);
831 fp_add3(&tmp0, (const fp *)&M[i].x, (const fp *)&M[i].z);
832 fp_mul2(&prod.x, (const fp *)&tmp1);
833 fp_mul2(&prod.z, (const fp *)&tmp0);
834 fp_mul2(&tmp1, (const fp *)&Psum);
835 fp_mul2(&tmp0, (const fp *)&Pdif);
836 fp_add3(&tmp2, (const fp *)&tmp0, (const fp *)&tmp1);
837 fp_mul2(&Q.x, (const fp *)&tmp2);
838 fp_sub3(&tmp2, (const fp *)&tmp0, (const fp *)&tmp1);
839 fp_mul2(&Q.z, (const fp *)&tmp2);
840 }
841
842 // point evaluation
843 fp_sq1(&Q.x);
844 fp_sq1(&Q.z);
845 fp_mul2(&P->x, (const fp *)&Q.x);
846 fp_mul2(&P->z, (const fp *)&Q.z);
847
848 //compute Aed.x^k, Aed.z^k
849 exp_by_squaring_(&Aed.x, &Aed.z, k);
850
851 //compute prod.x^8, prod.z^8
852 fp_sq1(&prod.x);
853 fp_sq1(&prod.x);
854 fp_sq1(&prod.x);
855 fp_sq1(&prod.z);
856 fp_sq1(&prod.z);
857 fp_sq1(&prod.z);
858
859 //compute image curve parameters
860 fp_mul2(&Aed.z, (const fp *)&prod.x);
861 fp_mul2(&Aed.x, (const fp *)&prod.z);
862
863 //compute Montgomery params
864 fp_add3(&A->x, (const fp *)&Aed.x, (const fp *)&Aed.z);
865 fp_sub3(&A->z, (const fp *)&Aed.x, (const fp *)&Aed.z);
866 fp_add2(&A->x, (const fp *)&A->x);
867}
uint64_t fp[NUMBER_OF_WORDS]
Definition fp-gmp.h:22
#define uintbig_bit
Definition fp-gmp.h:36
#define fp_1
Definition fp-gmp.h:48
#define UBITS
Definition fp-gmp.h:247
#define fp_0
Definition fp-gmp.h:50
#define fp_copy
Definition fp-gmp.h:79
#define fp_cswap
Definition fp-gmp.h:82
assert(var1 eq var2)
#define xMUL
Definition mont.h:22
#define xA24
Definition mont.h:17
#define xMUL_dac
Definition mont.h:21
#define xISOG_old
Definition mont.h:26
#define xDBLADD
Definition mont.h:20
#define xISOG
Definition mont.h:25
#define xMUL_vartime
Definition mont.h:23
#define xADD
Definition mont.h:19
#define xISOG_matryoshka
Definition mont.h:24
#define xDBL
Definition mont.h:18
A
Definition tests.py:29
#define poly_multieval_postcompute
Definition poly.h:70
#define poly_tree1
Definition poly.h:49
#define poly_multiprod2_selfreciprocal
Definition poly.h:40
#define poly_tree1size
Definition poly.h:52
#define poly_multieval_precomputesize
Definition poly.h:67
#define poly_multiprod2
Definition poly.h:37
#define poly_multieval_precompute
Definition poly.h:64
for i
for j
#define steps
Definition steps.h:7
Definition proj.h:18
fp z
Definition proj.h:20
fp x
Definition proj.h:19
f a
Definition to_model.m:12