430{
434
435 int64_t sqrtvelu = 0;
436 int64_t bs = 0;
437 int64_t gs = 0;
438
439 steps(&bs, &gs, klower);
440 if (bs)
441 {
442 sqrtvelu = 1;
446 }
447
448 fp tmp0, tmp1, tmp2, tmp3, tmp4;
449
452
453
454
455 fp_double2(&Aed.
z, (
const fp *)&
A->z);
456
457
458 fp_add3(&Aed.
x, (
const fp *)&
A->x, (
const fp *)&Aed.
z);
459
460
462
463
464 fp_double2(&A24.
z, (
const fp *)&Aed.
z);
465
466
467 fp_sub3(&Aed.
z, (
const fp *)&
A->x, (
const fp *)&Aed.
z);
468
470#ifndef NDEBUG
471 int Minit[kupper];
472
473 for (int64_t s = 0; s < kupper; ++s)
474 Minit[s] = 0;
475
476 Minit[1] = 1;
477#endif
478 M[1] = *K;
479 xDBL(&M[2], K, &A24, 0);
480#ifndef NDEBUG
481 Minit[2] = 1;
482#endif
483
484 if (sqrtvelu)
485 {
486 for (int64_t s = 3; s < kupper; ++s)
487 {
488 if (s & 1)
489 {
493 {
494 if (s == 3)
495 {
498 xADD(&M[s], &M[2], &M[1], &M[1]);
499#ifndef NDEBUG
500 Minit[s] = 1;
501#endif
502 continue;
503 }
507 xADD(&M[s], &M[s - 2], &M[2], &M[s - 4]);
508#ifndef NDEBUG
509 Minit[s] = 1;
510#endif
511 continue;
512 }
513 }
514 else
515 {
516 int64_t
i = s / 2 - 1;
518 if (
i < (kupper - 1) / 2 - 2 * bs * gs)
519 {
520 if (s == 4)
521 {
523 xDBL(&M[s], &M[2], &A24, 0);
524#ifndef NDEBUG
525 Minit[s] = 1;
526#endif
527 continue;
528 }
532 xADD(&M[s], &M[s - 2], &M[2], &M[s - 4]);
533#ifndef NDEBUG
534 Minit[s] = 1;
535#endif
536 continue;
537 }
538 }
539
540 if (bs > 0)
541 {
542 if (s == 2 * bs)
543 {
547 xADD(&M[s], &M[bs + 1], &M[bs - 1], &M[2]);
548#ifndef NDEBUG
549 Minit[s] = 1;
550#endif
551 continue;
552 }
553 else if (s == 4 * bs)
554 {
556 xDBL(&M[s], &M[2 * bs], &A24, 0);
557#ifndef NDEBUG
558 Minit[s] = 1;
559#endif
560 continue;
561 }
562 else if (s == 6 * bs)
563 {
566 xADD(&M[s], &M[4 * bs], &M[2 * bs], &M[2 * bs]);
567#ifndef NDEBUG
568 Minit[s] = 1;
569#endif
570 continue;
571 }
572 else if (s % (4 * bs) == 2 * bs)
573 {
574 int64_t
j = s / (4 * bs);
575 assert(s == 2 * bs * (2 *
j + 1));
577 {
578 assert(Minit[s - 4 * bs]);
579 assert(Minit[s - 8 * bs]);
581 xADD(&M[s], &M[s - 4 * bs], &M[4 * bs], &M[s - 8 * bs]);
582#ifndef NDEBUG
583 Minit[s] = 1;
584#endif
585 continue;
586 }
587 }
588 }
589 }
590 }
591 else
592 {
593 for (int64_t
i = 3;
i <= (kupper - 1) / 2; ++
i)
594 {
595#ifndef NDEBUG
597#endif
598 xADD(&M[
i], &M[
i - 1], K, &M[
i - 2]);
599 }
600 }
601
605 int64_t fixPlen = 0;
606 if(Plen>0) {
607 fixPlen = Plen;
608 } else {
609 fixPlen = 1;
610 }
611 proj Qbatch[fixPlen];
612 for (int64_t h = 0; h < Plen; ++h)
613 {
616 }
617 fp Psum[fixPlen], Pdif[fixPlen];
618 for (int64_t h = 0; h < Plen; ++h)
619 {
620
621
622 fp_add3(&Psum[h], (
const fp *)&P[h].x, (
const fp *)&P[h].z);
623
624 fp_sub3(&Pdif[h], (
const fp *)&P[h].x, (
const fp *)&P[h].z);
625 }
626
627 if (sqrtvelu)
628 {
631
632 for (int64_t
i = 0;
i < bs; ++
i)
633 {
635 fp_neg2(&TI[2 *
i], (
const fp *)&M[2 *
i + 1].x);
637 }
638
640
644
645 for (int64_t
j = 0;
j < gs; ++
j)
646 {
647 assert(Minit[2 * bs * (2 *
j + 1)]);
648 biquad_precompute_curve(Aprecomp[
j], &M[2 * bs * (2 *
j + 1)], A);
649 biquad_postcompute_curve(T1 + 3 *
j, Tminus1 + 3 *
j, (
const fp *)Aprecomp[
j]);
650 }
653
655 fp precomp[precompsize];
657
659
662 for (int64_t
i = 1;
i < bs; ++
i)
663 fp_mul2(&Abatch.
x, (
const fp *)&v[
i]);
664
667 for (int64_t
i = 1;
i < bs; ++
i)
668 fp_mul2(&Abatch.
z, (
const fp *)&v[
i]);
669
670 for (int64_t h = 0; h < Plen; ++h)
671 {
673 biquad_precompute_point(Pprecomp, &P[h]);
674
676 for (int64_t
j = 0;
j < gs; ++
j)
677 biquad_postcompute_point(TP + 3 *
j, (
const fp *)Pprecomp, (
const fp *)Aprecomp[
j]);
679
680 fp TPinv[2 * gs + 1];
681 for (int64_t
j = 0;
j < 2 * gs + 1; ++
j)
683
686 for (int64_t
i = 1;
i < bs; ++
i)
687 fp_mul2(&Qbatch[h].z, (
const fp *)&v[
i]);
688
691 for (int64_t
i = 1;
i < bs; ++
i)
692 fp_mul2(&Qbatch[h].x, (
const fp *)&v[
i]);
693 }
694
695 int64_t ignore = (k - 1) / 2 - 2 * bs * gs;
696 for (int64_t
i = 0;
i < (kupper - 1) / 2 - 2 * bs * gs; ++
i)
697 {
698 int64_t want = -((
i - ignore) >> 61);
700 fp_sub3(&tmp4, (
const fp *)&M[2 *
i + 2].x, (
const fp *)&M[2 *
i + 2].z);
701 fp_add3(&tmp3, (
const fp *)&M[2 *
i + 2].x, (
const fp *)&M[2 *
i + 2].z);
702 fp_mul3(&tmp2, (
const fp *)&Abatch.
x, (
const fp *)&tmp4);
703 fp_cmov_ctidh(&Abatch.
x, (
const fp *)&tmp2, want);
704 fp_mul3(&tmp2, (
const fp *)&Abatch.
z, (
const fp *)&tmp3);
705 fp_cmov_ctidh(&Abatch.
z, (
const fp *)&tmp2, want);
706 for (int64_t h = 0; h < Plen; ++h)
707 {
708 fp_mul3(&tmp1, (
const fp *)&tmp4, (
const fp *)&Psum[h]);
709 fp_mul3(&tmp0, (
const fp *)&tmp3, (
const fp *)&Pdif[h]);
710 fp_add3(&tmp2, (
const fp *)&tmp0, (
const fp *)&tmp1);
711 fp_mul2(&tmp2, (
const fp *)&Qbatch[h].x);
712 fp_cmov_ctidh(&Qbatch[h].x, (
const fp *)&tmp2, want);
713 fp_sub3(&tmp2, (
const fp *)&tmp0, (
const fp *)&tmp1);
714 fp_mul2(&tmp2, (
const fp *)&Qbatch[h].z);
715 fp_cmov_ctidh(&Qbatch[h].z, (
const fp *)&tmp2, want);
716 }
717 }
718 }
719 else
720 {
721
722 int64_t ignore = (k + 1) / 2;
723
725 fp_sub3(&tmp4, (
const fp *)&M[1].x, (
const fp *)&M[1].z);
726 fp_add3(&tmp3, (
const fp *)&M[1].x, (
const fp *)&M[1].z);
729
730 for (int64_t h = 0; h < Plen; ++h)
731 {
732 fp_mul3(&tmp1, (
const fp *)&tmp4, (
const fp *)&Psum[h]);
733 fp_mul3(&tmp0, (
const fp *)&tmp3, (
const fp *)&Pdif[h]);
734 fp_add3(&Qbatch[h].x, (
const fp *)&tmp0, (
const fp *)&tmp1);
735 fp_sub3(&Qbatch[h].z, (
const fp *)&tmp0, (
const fp *)&tmp1);
736 }
737
738 for (int64_t
i = 2;
i <= (kupper - 1) / 2; ++
i)
739 {
740 int64_t want = -((
i - ignore) >> 61);
741
743 fp_sub3(&tmp4, (
const fp *)&M[
i].x, (
const fp *)&M[
i].z);
744 fp_add3(&tmp3, (
const fp *)&M[
i].x, (
const fp *)&M[
i].z);
745 fp_mul3(&tmp2, (
const fp *)&Abatch.
x, (
const fp *)&tmp4);
746 fp_cmov_ctidh(&Abatch.
x, (
const fp *)&tmp2, want);
747 fp_mul3(&tmp2, (
const fp *)&Abatch.
z, (
const fp *)&tmp3);
748 fp_cmov_ctidh(&Abatch.
z, (
const fp *)&tmp2, want);
749 for (int64_t h = 0; h < Plen; ++h)
750 {
751
752 fp_mul3(&tmp1, (
const fp *)&tmp4, (
const fp *)&Psum[h]);
753 fp_mul3(&tmp0, (
const fp *)&tmp3, (
const fp *)&Pdif[h]);
754 fp_add3(&tmp2, (
const fp *)&tmp0, (
const fp *)&tmp1);
755 fp_mul2(&tmp2, (
const fp *)&Qbatch[h].x);
756 fp_cmov_ctidh(&Qbatch[h].x, (
const fp *)&tmp2, want);
757 fp_sub3(&tmp2, (
const fp *)&tmp0, (
const fp *)&tmp1);
758 fp_mul2(&tmp2, (
const fp *)&Qbatch[h].z);
759 fp_cmov_ctidh(&Qbatch[h].z, (
const fp *)&tmp2, want);
760 }
761 }
762 }
763
764 for (int64_t h = 0; h < Plen; ++h)
765 {
766
767
768 fp_sq1(&Qbatch[h].x);
769 fp_sq1(&Qbatch[h].z);
770
771
772
773 fp_mul2(&P[h].x, (
const fp *)&Qbatch[h].x);
774 fp_mul2(&P[h].z, (
const fp *)&Qbatch[h].z);
775 }
776
777
778 powpow8mod(&Aed.
x, (
const fp *)&Abatch.
z, k, kupper);
779
780 powpow8mod(&Aed.
z, (
const fp *)&Abatch.
x, k, kupper);
781
782
783
784 fp_add3(&
A->x, (
const fp *)&Aed.
x, (
const fp *)&Aed.
z);
785 fp_sub3(&
A->z, (
const fp *)&Aed.
x, (
const fp *)&Aed.
z);
787}
#define poly_multieval_postcompute
#define poly_multiprod2_selfreciprocal
#define poly_multieval_precomputesize
#define poly_multieval_precompute