Skip to content

Commit b31855d

Browse files
committed
perform scaling with the initial normalization as final normalization is no longer needed.
Signed-off-by: Markku-Juhani O. Saarinen <[email protected]>
1 parent e15004e commit b31855d

File tree

1 file changed

+3
-20
lines changed

1 file changed

+3
-20
lines changed

mlkem/src/native/riscv64/src/rv64v_poly.c

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -389,9 +389,9 @@ static vint16m2_t mlk_rv64v_intt2(vint16m2_t vp, vint16m1_t cz)
389389
t0 = __riscv_vget_v_i16m2_i16m1(vp, 0);
390390
t1 = __riscv_vget_v_i16m2_i16m1(vp, 1);
391391

392-
/* move to positive range [0, q-1] for the reverse transform */
393-
t0 = fq_mulq_vx(t0, MLK_RVV_MONT_R1, vl);
394-
t1 = fq_mulq_vx(t1, MLK_RVV_MONT_R1, vl);
392+
/* pre-scale and move to positive range [0, q-1] for inverse transform */
393+
t0 = fq_mulq_vx(t0, MLK_RVV_MONT_NR, vl);
394+
t1 = fq_mulq_vx(t1, MLK_RVV_MONT_NR, vl);
395395

396396
c0 = __riscv_vrgather_vv_i16m1(cz, cs2, vl);
397397
MLK_RVV_BFLY_RV(t0, t1, vt, c0, vl);
@@ -512,23 +512,6 @@ void mlk_rv64v_poly_invntt_tomont(int16_t *r)
512512
MLK_RVV_BFLY_RX(v6, ve, vt, izeta[0x01], vl);
513513
MLK_RVV_BFLY_RX(v7, vf, vt, izeta[0x01], vl);
514514

515-
v0 = fq_mulq_vx(v0, MLK_RVV_MONT_NR, vl);
516-
v1 = fq_mulq_vx(v1, MLK_RVV_MONT_NR, vl);
517-
v2 = fq_mulq_vx(v2, MLK_RVV_MONT_NR, vl);
518-
v3 = fq_mulq_vx(v3, MLK_RVV_MONT_NR, vl);
519-
v4 = fq_mulq_vx(v4, MLK_RVV_MONT_NR, vl);
520-
v5 = fq_mulq_vx(v5, MLK_RVV_MONT_NR, vl);
521-
v6 = fq_mulq_vx(v6, MLK_RVV_MONT_NR, vl);
522-
v7 = fq_mulq_vx(v7, MLK_RVV_MONT_NR, vl);
523-
v8 = fq_mulq_vx(v8, MLK_RVV_MONT_NR, vl);
524-
v9 = fq_mulq_vx(v9, MLK_RVV_MONT_NR, vl);
525-
va = fq_mulq_vx(va, MLK_RVV_MONT_NR, vl);
526-
vb = fq_mulq_vx(vb, MLK_RVV_MONT_NR, vl);
527-
vc = fq_mulq_vx(vc, MLK_RVV_MONT_NR, vl);
528-
vd = fq_mulq_vx(vd, MLK_RVV_MONT_NR, vl);
529-
ve = fq_mulq_vx(ve, MLK_RVV_MONT_NR, vl);
530-
vf = fq_mulq_vx(vf, MLK_RVV_MONT_NR, vl);
531-
532515
__riscv_vse16_v_i16m1(&r[0x00], v0, vl);
533516
__riscv_vse16_v_i16m1(&r[0x10], v1, vl);
534517
__riscv_vse16_v_i16m1(&r[0x20], v2, vl);

0 commit comments

Comments
 (0)