11package org .bouncycastle .pqc .crypto .hqc ;
22
3+ import org .bouncycastle .util .Arrays ;
4+
35class GF2PolynomialCalculator
46{
57 private final int VEC_N_SIZE_64 ;
@@ -13,218 +15,122 @@ class GF2PolynomialCalculator
1315 RED_MASK = red_mask ;
1416 }
1517
16- protected void multLongs (long [] res , long [] a , long [] b )
18+ public void vectMul (long [] o , long [] a1 , long [] a2 )
1719 {
18- long [] stack = new long [VEC_N_SIZE_64 << 3 ];
19- long [] o_karat = new long [(VEC_N_SIZE_64 << 1 ) + 1 ];
20-
21- karatsuba (o_karat , 0 , a , 0 , b , 0 , VEC_N_SIZE_64 , stack , 0 );
22- reduce (res , o_karat );
20+ long [] unreduced = new long [VEC_N_SIZE_64 << 1 ];
21+ long [] tmpBuffer = new long [VEC_N_SIZE_64 << 4 ];
22+ karatsuba (unreduced , 0 , a1 , 0 , a2 , 0 , VEC_N_SIZE_64 , tmpBuffer , 0 );
23+ reduce (o , unreduced );
2324 }
2425
25-
26- private void base_mul (long [] c , int cOffset , long a , long b )
26+ /**
27+ * Performs schoolbook multiplication over GF(2).
28+ *
29+ * <p>This method computes {@code r = a * b}, where {@code a} and {@code b} are
30+ * polynomials over GF(2), each represented as {@code n} 64-bit words. The result
31+ * is stored in {@code r} as {@code 2 * n} 64-bit words.</p>
32+ */
33+ private void schoolbookMul (long [] r , int rOff , long [] a , int aOff , long [] b , int bOff , int n )
2734 {
28- long h = 0 ;
29- long l = 0 ;
30- long g ;
31- long [] u = new long [16 ];
32- long [] mask_tab = new long [4 ];
33-
34- // Step 1
35- u [0 ] = 0 ;
36- u [1 ] = b & ((1L << (64 - 4 )) - 1L );
37- u [2 ] = u [1 ] << 1 ;
38- u [3 ] = u [2 ] ^ u [1 ];
39- u [4 ] = u [2 ] << 1 ;
40- u [5 ] = u [4 ] ^ u [1 ];
41- u [6 ] = u [3 ] << 1 ;
42- u [7 ] = u [6 ] ^ u [1 ];
43- u [8 ] = u [4 ] << 1 ;
44- u [9 ] = u [8 ] ^ u [1 ];
45- u [10 ] = u [5 ] << 1 ;
46- u [11 ] = u [10 ] ^ u [1 ];
47- u [12 ] = u [6 ] << 1 ;
48- u [13 ] = u [12 ] ^ u [1 ];
49- u [14 ] = u [7 ] << 1 ;
50- u [15 ] = u [14 ] ^ u [1 ];
51-
52- g =0 ;
53- long tmp1 = a & 15 ;
35+ Arrays .fill (r , rOff , rOff + (n << 1 ), 0L );
5436
55- for (int i = 0 ; i < 16 ; i ++)
37+ for (int i = 0 ; i < n ; i ++, rOff ++)
5638 {
57- long tmp2 = tmp1 - i ;
58- g ^= (u [i ] & -(1 - ((tmp2 | -tmp2 ) >>> 63 )));
59- }
60- l = g ;
61- h = 0 ;
62-
63- // Step 2
64- for (byte i = 4 ; i < 64 ; i += 4 )
65- {
66- g = 0 ;
67- long temp1 = (a >> i ) & 15 ;
68- for (int j = 0 ; j < 16 ; ++j )
39+ long ai = a [i + aOff ];
40+ for (int bit = 0 ; bit < 64 ; bit ++)
6941 {
70- long tmp2 = temp1 - j ;
71- g ^= (u [j ] & -(1 - ((tmp2 | -tmp2 ) >>> 63 )));
42+ long mask = -((ai >> bit ) & 1L );
43+ if (bit == 0 )
44+ {
45+ for (int j = 0 , rOff1 = rOff , bOff1 = bOff ; j < n ; j ++, rOff1 ++, bOff1 ++)
46+ {
47+ r [rOff1 ] ^= b [bOff1 ] & mask ;
48+ }
49+ }
50+ else
51+ {
52+ int inv = 64 - bit ;
53+ for (int j = 0 , rOff1 = rOff , bOff1 = bOff ; j < n ; j ++, bOff1 ++)
54+ {
55+ r [rOff1 ++] ^= (b [bOff1 ] << bit ) & mask ;
56+ r [rOff1 ] ^= (b [bOff1 ] >>> inv ) & mask ;
57+ }
58+ }
7259 }
73-
74- l ^= g << i ;
75- h ^= g >>> (64 - i );
76- }
77-
78- // Step 3
79- mask_tab [0 ] = - ((b >> 60 ) & 1 );
80- mask_tab [1 ] = - ((b >> 61 ) & 1 );
81- mask_tab [2 ] = - ((b >> 62 ) & 1 );
82- mask_tab [3 ] = - ((b >> 63 ) & 1 );
83-
84- l ^= ((a << 60 ) & mask_tab [0 ]);
85- h ^= ((a >>> 4 ) & mask_tab [0 ]);
86-
87- l ^= ((a << 61 ) & mask_tab [1 ]);
88- h ^= ((a >>> 3 ) & mask_tab [1 ]);
89-
90- l ^= ((a << 62 ) & mask_tab [2 ]);
91- h ^= ((a >>> 2 ) & mask_tab [2 ]);
92-
93- l ^= ((a << 63 ) & mask_tab [3 ]);
94- h ^= ((a >>> 1 ) & mask_tab [3 ]);
95-
96- c [0 + cOffset ] = l ;
97- c [1 + cOffset ] = h ;
98- }
99-
100-
101-
102-
103- private void karatsuba_add1 (long [] alh , int alhOffset ,
104- long [] blh , int blhOffset ,
105- long [] a , int aOffset ,
106- long [] b , int bOffset ,
107- int size_l , int size_h )
108- {
109- for (int i = 0 ; i < size_h ; i ++)
110- {
111- alh [i + alhOffset ] = a [i + aOffset ] ^ a [i + size_l + aOffset ];
112- blh [i + blhOffset ] = b [i + bOffset ] ^ b [i + size_l + bOffset ];
113- }
114-
115- if (size_h < size_l )
116- {
117- alh [size_h + alhOffset ] = a [size_h + aOffset ];
118- blh [size_h + blhOffset ] = b [size_h + bOffset ];
11960 }
12061 }
12162
122-
123-
124- private void karatsuba_add2 (long [] o , int oOffset ,
125- long [] tmp1 , int tmp1Offset ,
126- long [] tmp2 , int tmp2Offset ,
127- int size_l , int size_h )
128- {
129- for (int i = 0 ; i < (2 * size_l ) ; i ++)
130- {
131- tmp1 [i + tmp1Offset ] = tmp1 [i + tmp1Offset ] ^ o [i + oOffset ];
132- }
133-
134- for (int i = 0 ; i < ( 2 * size_h ); i ++)
135- {
136- tmp1 [i + tmp1Offset ] = tmp1 [i + tmp1Offset ] ^ tmp2 [i + tmp2Offset ];
137- }
138-
139- for (int i = 0 ; i < (2 * size_l ); i ++)
140- {
141- o [i + size_l + oOffset ] = o [i + size_l + oOffset ] ^ tmp1 [i + tmp1Offset ];
142- }
143- }
144-
145-
146-
14763 /**
148- * Karatsuba multiplication of a and b, Implementation inspired from the NTL library.
64+ * Performs Karatsuba multiplication over GF(2) using a caller-supplied temporary buffer.
65+ *
66+ * <p>If {@code n <= 16}, this method falls back to
67+ * {@link #schoolbookMul(long[], int, long[], int, long[], int, int)}.
68+ * Otherwise, the operands are split in half and the algorithm is applied recursively.</p>
14969 *
150- * \param[out] o Polynomial
151- * \param[in] a Polynomial
152- * \param[in] b Polynomial
153- * \param[in] size Length of polynomial
154- * \param[in] stack Length of polynomial
15570 */
156- private void karatsuba (long [] o , int oOffset , long [] a , int aOffset , long [] b , int bOffset , int size , long [] stack , int stackOffset )
71+ private void karatsuba (long [] r , int rOffset , long [] a , int aOffset ,
72+ long [] b , int bOffset , int n , long [] tmpBuffer , int tmpOffset )
15773 {
158- int size_l , size_h ;
159- int ahOffset , bhOffset ;
160-
161- if (size == 1 )
74+ if (n <= 16 )
16275 {
163- base_mul ( o , oOffset , a [ 0 + aOffset ] , b [ 0 + bOffset ] );
76+ schoolbookMul ( r , rOffset , a , aOffset , b , bOffset , n );
16477 return ;
16578 }
16679
167- size_h = size / 2 ;
168- size_l = (size + 1 ) / 2 ;
169-
170- // alh = stack
171- int alhOffset = stackOffset ;
172- // blh = stack with size_l offset
173- int blhOffset = alhOffset + size_l ;
174- // tmp1 = stack with size_l * 2 offset;
175- int tmp1Offset = blhOffset + size_l ;
176- // tmp2 = o with size_l * 2 offset;
177- int tmp2Offset = oOffset + size_l *2 ;
178-
179- stackOffset += 4 * size_l ;
80+ int m = n >> 1 ;
81+ int n1 = n - m ;
82+ int nx2 = n << 1 ;
83+ int mx2 = m << 1 ;
84+ int n1x2 = n1 << 1 ;
18085
181- ahOffset = aOffset + size_l ;
182- bhOffset = bOffset + size_l ;
86+ int z2Offset = tmpOffset + nx2 ;
87+ int zMidOffset = z2Offset + nx2 ;
88+ int taOffset = zMidOffset + nx2 ;
89+ int tbOffset = taOffset + n ;
90+ int childBufferOffset = tmpOffset + (n << 3 );
18391
184- karatsuba (o , oOffset , a , aOffset , b , bOffset , size_l , stack , stackOffset );
92+ karatsuba (tmpBuffer , tmpOffset , a , aOffset , b , bOffset , m , tmpBuffer , childBufferOffset );
93+ karatsuba (tmpBuffer , z2Offset , a , aOffset + m , b , bOffset + m , n1 , tmpBuffer , childBufferOffset );
18594
186- karatsuba (o , tmp2Offset , a , ahOffset , b , bhOffset , size_h , stack , stackOffset );
95+ for (int i = 0 ; i < n1 ; i ++)
96+ {
97+ long loa = (i < m ) ? a [aOffset + i ] : 0 ;
98+ long lob = (i < m ) ? b [bOffset + i ] : 0 ;
99+ tmpBuffer [taOffset + i ] = loa ^ a [aOffset + m + i ];
100+ tmpBuffer [tbOffset + i ] = lob ^ b [bOffset + m + i ];
101+ }
187102
188- karatsuba_add1 ( stack , alhOffset , stack , blhOffset , a , aOffset , b , bOffset , size_l , size_h );
103+ karatsuba ( tmpBuffer , zMidOffset , tmpBuffer , taOffset , tmpBuffer , tbOffset , n1 , tmpBuffer , childBufferOffset );
189104
190- karatsuba (stack , tmp1Offset , stack , alhOffset , stack , blhOffset , size_l , stack , stackOffset );
105+ System .arraycopy (tmpBuffer , tmpOffset , r , rOffset , mx2 );
106+ System .arraycopy (tmpBuffer , z2Offset , r , rOffset + mx2 , n1x2 );
191107
192- karatsuba_add2 (o , oOffset , stack , tmp1Offset , o , tmp2Offset , size_l , size_h );
108+ for (int i = 0 ; i < 2 * n1 ; i ++)
109+ {
110+ long z0i = (i < mx2 ) ? tmpBuffer [tmpOffset + i ] : 0 ;
111+ long z2i = (i < n1x2 ) ? tmpBuffer [z2Offset + i ] : 0 ;
112+ r [rOffset + m + i ] ^= tmpBuffer [zMidOffset + i ] ^ z0i ^ z2i ;
113+ }
193114 }
194115
195-
196-
197116 /**
198- * @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$
117+ * Reduces a polynomial modulo {@code X^n - 1}.
199118 *
200- * This function computes the modular reduction of the polynomial a(x)
119+ * <p>This computes {@code o(x) = a(x) mod (X^n - 1)}, where
120+ * {@code a(x)} may have degree up to {@code 2n - 2}. The result
121+ * is a polynomial of degree less than {@code n}, represented as
122+ * {@code n} 64-bit words.</p>
201123 *
202- * @param[in] a Pointer to the polynomial a(x)
203- * @param[out] o Pointer to the result
124+ * @param o the result buffer of length {@code n} words,
125+ * where the reduced polynomial is stored
126+ * @param a the input polynomial to be reduced
204127 */
205128 private void reduce (long [] o , long [] a )
206129 {
207- int i ;
208- long r ;
209- long carry ;
210-
211- for (i = 0 ; i < VEC_N_SIZE_64 ; i ++)
130+ for (int i = 0 ; i < VEC_N_SIZE_64 ; i ++)
212131 {
213- r = a [i + VEC_N_SIZE_64 - 1 ] >>> (PARAM_N & 0x3F );
214- carry = (long ) (a [i + VEC_N_SIZE_64 ] << (64 - (PARAM_N & 0x3FL )));
215- o [i ] = a [i ] ^ r ^ carry ;
132+ o [i ] = a [i ] ^ (a [i + VEC_N_SIZE_64 - 1 ] >>> (PARAM_N & 0x3F )) ^ ((a [i + VEC_N_SIZE_64 ] << (64 - (PARAM_N & 0x3FL ))));
216133 }
217134 o [VEC_N_SIZE_64 - 1 ] &= RED_MASK ;
218135 }
219-
220-
221-
222- static void addLongs (long [] res , long [] a , long [] b )
223- {
224- for (int i = 0 ; i < a .length ; i ++)
225- {
226- res [i ] = a [i ] ^ b [i ];
227- }
228- }
229-
230136}
0 commit comments