11package org .bouncycastle .pqc .crypto .mayo ;
22
3- public class GF16Utils
3+ class GF16Utils
44{
55 static final long NIBBLE_MASK_MSB = 0x7777777777777777L ;
66 static final long MASK_MSB = 0x8888888888888888L ;
@@ -20,7 +20,7 @@ public class GF16Utils
2020 * @param acc the accumulator long array; the target vector starts at index accOffset
2121 * @param accOffset the starting index in 'acc'
2222 */
23- public static void mVecMulAdd (int mVecLimbs , long [] in , int inOffset , int b , long [] acc , int accOffset )
23+ static void mVecMulAdd (int mVecLimbs , long [] in , int inOffset , int b , long [] acc , int accOffset )
2424 {
2525 long a , r64 , a_msb , a_msb3 ;
2626 long b32 = b & 0x00000000FFFFFFFFL ;
@@ -67,18 +67,17 @@ public static void mVecMulAdd(int mVecLimbs, long[] in, int inOffset, int b, lon
6767 * @param acc the accumulator (as a flat long[] array) with dimensions (bsMatRows x matCols);
6868 * each “entry” is an m‐vector (length mVecLimbs).
6969 * @param bsMatRows number of rows in the bsMat (the “triangular” matrix’s row count).
70- * @param bsMatCols number of columns in bsMat.
7170 * @param matCols number of columns in the matrix “mat.”
7271 */
73- public static void mulAddMUpperTriangularMatXMat (int mVecLimbs , long [] bsMat , byte [] mat , long [] acc , int accOff ,
74- int bsMatRows , int bsMatCols , int matCols )
72+ static void mulAddMUpperTriangularMatXMat (int mVecLimbs , long [] bsMat , byte [] mat , long [] acc , int accOff ,
73+ int bsMatRows , int matCols )
7574 {
7675 int bsMatEntriesUsed = 0 ;
7776 int matColsmVecLimbs = matCols * mVecLimbs ;
7877 for (int r = 0 , rmatCols = 0 , rmatColsmVecLimbs = 0 ; r < bsMatRows ; r ++, rmatCols += matCols , rmatColsmVecLimbs += matColsmVecLimbs )
7978 {
8079 // For each row r, the inner loop goes from column triangular*r to bsMatCols-1.
81- for (int c = r , cmatCols = rmatCols ; c < bsMatCols ; c ++, cmatCols += matCols )
80+ for (int c = r , cmatCols = rmatCols ; c < bsMatRows ; c ++, cmatCols += matCols )
8281 {
8382 for (int k = 0 , kmVecLimbs = 0 ; k < matCols ; k ++, kmVecLimbs += mVecLimbs )
8483 {
@@ -103,8 +102,8 @@ public static void mulAddMUpperTriangularMatXMat(int mVecLimbs, long[] bsMat, by
103102 * @param matCols number of columns in “mat.”
104103 * @param bsMatCols number of columns in the bsMat matrix.
105104 */
106- public static void mulAddMatTransXMMat (int mVecLimbs , byte [] mat , long [] bsMat , int bsMatOff , long [] acc ,
107- int matRows , int matCols , int bsMatCols )
105+ static void mulAddMatTransXMMat (int mVecLimbs , byte [] mat , long [] bsMat , int bsMatOff , long [] acc ,
106+ int matRows , int matCols , int bsMatCols )
108107 {
109108 // Loop over each column r of mat (which becomes row of mat^T)
110109 for (int r = 0 ; r < matCols ; r ++)
@@ -141,8 +140,8 @@ public static void mulAddMatTransXMMat(int mVecLimbs, byte[] mat, long[] bsMat,
141140 * @param matCols the number of columns in the matrix
142141 * @param bsMatCols the number of columns in the bit‐sliced matrix (per block)
143142 */
144- public static void mulAddMatXMMat (int mVecLimbs , byte [] mat , long [] bsMat , long [] acc ,
145- int matRows , int matCols , int bsMatCols )
143+ static void mulAddMatXMMat (int mVecLimbs , byte [] mat , long [] bsMat , long [] acc ,
144+ int matRows , int matCols , int bsMatCols )
146145 {
147146 for (int r = 0 ; r < matRows ; r ++)
148147 {
@@ -163,8 +162,8 @@ public static void mulAddMatXMMat(int mVecLimbs, byte[] mat, long[] bsMat, long[
163162 }
164163 }
165164
166- public static void mulAddMatXMMat (int mVecLimbs , byte [] mat , long [] bsMat , int bsMatOff , long [] acc ,
167- int matRows , int matCols , int bsMatCols )
165+ static void mulAddMatXMMat (int mVecLimbs , byte [] mat , long [] bsMat , int bsMatOff , long [] acc ,
166+ int matRows , int matCols , int bsMatCols )
168167 {
169168 for (int r = 0 ; r < matRows ; r ++)
170169 {
@@ -204,8 +203,8 @@ public static void mulAddMatXMMat(int mVecLimbs, byte[] mat, long[] bsMat, int b
204203 * @param bsMatCols the number of columns in the bit‑sliced matrix.
205204 * @param matRows the number of rows in the matrix.
206205 */
207- public static void mulAddMUpperTriangularMatXMatTrans (int mVecLimbs , long [] bsMat , byte [] mat , long [] acc ,
208- int bsMatRows , int bsMatCols , int matRows )
206+ static void mulAddMUpperTriangularMatXMatTrans (int mVecLimbs , long [] bsMat , byte [] mat , long [] acc ,
207+ int bsMatRows , int bsMatCols , int matRows )
209208 {
210209 int bsMatEntriesUsed = 0 ;
211210 for (int r = 0 ; r < bsMatRows ; r ++)
@@ -236,23 +235,28 @@ public static void mulAddMUpperTriangularMatXMatTrans(int mVecLimbs, long[] bsMa
236235 * @param b an element in GF(16) (only the lower 4 bits are used)
237236 * @return the product a * b in GF(16)
238237 */
239- public static int mulF (int a , int b )
238+ static int mulF (int a , int b )
240239 {
241- // In C there is a conditional XOR with unsigned_char_blocker to work around
242- // compiler-specific behavior. In Java we can omit it (or define it as needed).
243- // a ^= unsignedCharBlocker; // Omitted in Java
244-
245- // Perform carryless multiplication:
246- // Multiply b by each bit of a and XOR the results.
247- int p = ((a & 1 ) * b ) ^ ((a & 2 ) * b ) ^ ((a & 4 ) * b ) ^ ((a & 8 ) * b );
248-
240+ // Carryless multiply: multiply b by each bit of a and XOR.
241+ int p = (-(a & 1 ) & b ) ^ (-((a >> 1 ) & 1 ) & (b << 1 )) ^ (-((a >> 2 ) & 1 ) & (b << 2 )) ^ (-((a >> 3 ) & 1 ) & (b << 3 ));
249242 // Reduce modulo f(X) = x^4 + x + 1.
250- // Extract the upper nibble (bits 4 to 7).
251243 int topP = p & 0xF0 ;
252- // The reduction: XOR p with (topP shifted right by 4 and by 3) and mask to 4 bits.
253244 return (p ^ (topP >> 4 ) ^ (topP >> 3 )) & 0x0F ;
254245 }
255246
247+ /**
248+ * Computes the multiplicative inverse in GF(16) for a GF(16) element.
249+ */
250+ static byte inverseF (int a )
251+ {
252+ // In GF(16), the inverse can be computed via exponentiation.
253+ int a2 = mulF (a , a );
254+ int a4 = mulF (a2 , a2 );
255+ int a8 = mulF (a4 , a4 );
256+ int a6 = mulF (a2 , a4 );
257+ return (byte ) mulF (a8 , a6 );
258+ }
259+
256260 /**
257261 * Performs a GF(16) carryless multiplication of a nibble (lower 4 bits of a)
258262 * with a 64-bit word b, then reduces modulo the polynomial x⁴ + x + 1 on each byte.
@@ -261,64 +265,29 @@ public static int mulF(int a, int b)
261265 * @param b a 64-bit word representing 16 GF(16) elements (packed 4 bits per element)
262266 * @return the reduced 64-bit word after multiplication
263267 */
264- public static long mulFx8 (byte a , long b )
268+ static long mulFx8 (byte a , long b )
265269 {
266270 // Convert 'a' to an unsigned int so that bit operations work as expected.
267271 int aa = a & 0xFF ;
268272 // Carryless multiplication: for each bit in 'aa' (considering only the lower 4 bits),
269273 // if that bit is set, multiply 'b' (by 1, 2, 4, or 8) and XOR the result.
270- long p = ((aa & 1 ) * b ) ^ (( aa & 2 ) * b ) ^ (( aa & 4 ) * b ) ^ (( aa & 8 ) * b );
274+ long p = (- (aa & 1 ) & b ) ^ (-(( aa >> 1 ) & 1 ) & ( b << 1 )) ^ (-(( aa >> 2 ) & 1 ) & ( b << 2 )) ^ (-(( aa >> 3 ) & 1 ) & ( b << 3 ) );
271275
272276 // Reduction mod (x^4 + x + 1): process each byte in parallel.
273277 long topP = p & 0xf0f0f0f0f0f0f0f0L ;
274278 return (p ^ (topP >> 4 ) ^ (topP >> 3 )) & 0x0f0f0f0f0f0f0f0fL ;
275279 }
276280
277- public static void matMul (byte [] a , byte [] b , byte [] c , int colrowAB , int rowA , int colB )
278- {
279- int cIndex = 0 ;
280- for (int i = 0 ; i < rowA ; i ++)
281- {
282- int aRowStart = i * colrowAB ;
283- for (int j = 0 ; j < colB ; j ++)
284- {
285- c [cIndex ++] = lincomb (a , aRowStart , b , j , colrowAB , colB );
286- }
287- }
288- }
289-
290- public static void matMul (byte [] a , int aOff , byte [] b , int bOff , byte [] c , int cOff ,
291- int colrowAB , int rowA , int colB )
292- {
293- for (int i = 0 , aRowStart = 0 ; i < rowA ; i ++, aRowStart += colrowAB )
294- {
295- for (int j = 0 ; j < colB ; j ++)
296- {
297- c [cOff ++] = lincomb (a , aOff + aRowStart , b , bOff + j , colrowAB , colB );
298- }
299- }
300- }
301-
302- private static byte lincomb (byte [] a , int aStart , byte [] b , int bStart ,
303- int colrowAB , int colB )
304- {
305- byte result = 0 ;
306- for (int k = 0 ; k < colrowAB ; k ++)
307- {
308- result ^= mulF (a [aStart + k ], b [bStart + k * colB ]);
309- }
310- return result ;
311- }
312-
313- public static void matAdd (byte [] a , int aOff , byte [] b , int bOff , byte [] c , int cOff , int m , int n )
281+ static void matMul (byte [] a , byte [] b , int bOff , byte [] c , int colrowAB , int rowA )
314282 {
315- for (int i = 0 , in = 0 ; i < m ; i ++, in += n )
283+ for (int i = 0 , aRowStart = 0 , cOff = 0 ; i < rowA ; i ++, aRowStart += colrowAB )
316284 {
317- for (int j = 0 ; j < n ; j ++)
285+ byte result = 0 ;
286+ for (int k = 0 ; k < colrowAB ; k ++)
318287 {
319- int idx = in + j ;
320- c [idx + cOff ] = (byte )(a [idx + aOff ] ^ b [idx + bOff ]);
288+ result ^= mulF (a [aRowStart + k ], b [bOff + k ]);
321289 }
290+ c [cOff ++] = result ;
322291 }
323292 }
324293}
0 commit comments