22
33public class GF16Utils
44{
5+ static final long NIBBLE_MASK_MSB = 0x7777777777777777L ;
6+ static final long MASK_MSB = 0x8888888888888888L ;
7+ static final long MASK_LSB = 0x1111111111111111L ;
8+ static final long NIBBLE_MASK_LSB = ~MASK_LSB ;
59
610 /**
711 * Multiplies each limb of a GF(16) vector (subarray of 'in') by the GF(16) element 'a'
@@ -18,8 +22,7 @@ public class GF16Utils
1822 */
1923 public static void mVecMulAdd (int mVecLimbs , long [] in , int inOffset , int b , long [] acc , int accOffset )
2024 {
21- long maskMsb = 0x8888888888888888L ;
22- long a , r64 , a_msb ;
25+ long a , r64 , a_msb , a_msb3 ;
2326 long b32 = b & 0x00000000FFFFFFFFL ;
2427 long b32and1 = b32 & 1 ;
2528 long b32_1_1 = ((b32 >>> 1 ) & 1 );
@@ -29,23 +32,26 @@ public static void mVecMulAdd(int mVecLimbs, long[] in, int inOffset, int b, lon
2932 {
3033 // In the original code there is a conditional XOR with unsigned_char_blocker;
3134 // here we simply use b directly.
32- a = in [inOffset + i ];
33- r64 = a * b32and1 ;
35+ a = in [inOffset ++ ];
36+ r64 = a & - b32and1 ;
3437
35- a_msb = a & maskMsb ;
36- a ^= a_msb ;
37- a = (a << 1 ) ^ ((a_msb >>> 3 ) * 3 );
38- r64 ^= a * b32_1_1 ;
38+ a_msb = a & MASK_MSB ;
39+ a &= NIBBLE_MASK_MSB ;
40+ a_msb3 = a_msb >>> 3 ;
41+ a = (a << 1 ) ^ (a_msb3 + (a_msb3 << 1 ));
42+ r64 ^= a & -b32_1_1 ;
3943
40- a_msb = a & maskMsb ;
41- a ^= a_msb ;
42- a = (a << 1 ) ^ ((a_msb >>> 3 ) * 3 );
43- r64 ^= a * b32_2_1 ;
44+ a_msb = a & MASK_MSB ;
45+ a &= NIBBLE_MASK_MSB ;
46+ a_msb3 = a_msb >>> 3 ;
47+ a = (a << 1 ) ^ (a_msb3 + (a_msb3 << 1 ));
48+ r64 ^= a & -b32_2_1 ;
4449
45- a_msb = a & maskMsb ;
46- a ^= a_msb ;
47- a = (a << 1 ) ^ ((a_msb >>> 3 ) * 3 );
48- acc [accOffset + i ] ^= r64 ^ (a * b32_3_1 );
50+ a_msb = a & MASK_MSB ;
51+ a &= NIBBLE_MASK_MSB ;
52+ a_msb3 = a_msb >>> 3 ;
53+ a = (a << 1 ) ^ (a_msb3 + (a_msb3 << 1 ));
54+ acc [accOffset ++] ^= r64 ^ (a & -b32_3_1 );
4955 }
5056 }
5157
@@ -190,24 +196,22 @@ public static void mulAddMatXMMat(int mVecLimbs, byte[] mat, long[] bsMat, int b
190196 * by the scalar (from {@code mat}) and adds the result to the corresponding vector in {@code acc}.
191197 * </p>
192198 *
193- * @param mVecLimbs the number of limbs (elements) in each vector.
194- * @param bsMat the bit‑sliced matrix stored as a long array.
195- * @param mat the matrix stored as a byte array.
196- * @param acc the accumulator array where the results are added.
197- * @param bsMatRows the number of rows in the bit‑sliced matrix.
198- * @param bsMatCols the number of columns in the bit‑sliced matrix.
199- * @param matRows the number of rows in the matrix.
200- * @param triangular if non‑zero, indicates that the matrix is upper triangular (i.e. the loop for {@code c}
201- * starts at {@code triangular * r}).
199+ * @param mVecLimbs the number of limbs (elements) in each vector.
200+ * @param bsMat the bit‑sliced matrix stored as a long array.
201+ * @param mat the matrix stored as a byte array.
202+ * @param acc the accumulator array where the results are added.
203+ * @param bsMatRows the number of rows in the bit‑sliced matrix.
204+ * @param bsMatCols the number of columns in the bit‑sliced matrix.
205+ * @param matRows the number of rows in the matrix.
202206 */
203207 public static void mulAddMUpperTriangularMatXMatTrans (int mVecLimbs , long [] bsMat , byte [] mat , long [] acc ,
204- int bsMatRows , int bsMatCols , int matRows , int triangular )
208+ int bsMatRows , int bsMatCols , int matRows )
205209 {
206210 int bsMatEntriesUsed = 0 ;
207211 for (int r = 0 ; r < bsMatRows ; r ++)
208212 {
209213 // For upper triangular, start c at triangular * r; otherwise, triangular is zero.
210- for (int c = triangular * r ; c < bsMatCols ; c ++)
214+ for (int c = r ; c < bsMatCols ; c ++)
211215 {
212216 for (int k = 0 ; k < matRows ; k ++)
213217 {
@@ -270,8 +274,7 @@ public static long mulFx8(byte a, long b)
270274 return (p ^ (topP >> 4 ) ^ (topP >> 3 )) & 0x0f0f0f0f0f0f0f0fL ;
271275 }
272276
273- public static void matMul (byte [] a , byte [] b , byte [] c ,
274- int colrowAB , int rowA , int colB )
277+ public static void matMul (byte [] a , byte [] b , byte [] c , int colrowAB , int rowA , int colB )
275278 {
276279 int cIndex = 0 ;
277280 for (int i = 0 ; i < rowA ; i ++)
@@ -287,12 +290,11 @@ public static void matMul(byte[] a, byte[] b, byte[] c,
287290 public static void matMul (byte [] a , int aOff , byte [] b , int bOff , byte [] c , int cOff ,
288291 int colrowAB , int rowA , int colB )
289292 {
290- int cIndex = 0 ;
291293 for (int i = 0 , aRowStart = 0 ; i < rowA ; i ++, aRowStart += colrowAB )
292294 {
293295 for (int j = 0 ; j < colB ; j ++)
294296 {
295- c [cOff + cIndex ++] = lincomb (a , aOff + aRowStart , b , bOff + j , colrowAB , colB );
297+ c [cOff ++] = lincomb (a , aOff + aRowStart , b , bOff + j , colrowAB , colB );
296298 }
297299 }
298300 }
0 commit comments