bcgit
diff --git a/‎core/src/main/java/org/bouncycastle/pqc/crypto/mayo/GF16Utils.java‎
Lines changed: 18 additions & 77 deletions b/‎core/src/main/java/org/bouncycastle/pqc/crypto/mayo/GF16Utils.java‎
Lines changed: 18 additions & 77 deletions
diff --git a/‎core/src/main/java/org/bouncycastle/pqc/crypto/mayo/MayoKeyPairGenerator.java‎
Lines changed: 47 additions & 100 deletions b/‎core/src/main/java/org/bouncycastle/pqc/crypto/mayo/MayoKeyPairGenerator.java‎
Lines changed: 47 additions & 100 deletions
@@ -16,26 +16,25 @@ public class GF16Utils
     public static long gf16vMulU64(long a, int b)
     {
         long maskMsb = 0x8888888888888888L;
-        long a64 = a;
         // In the original code there is a conditional XOR with unsigned_char_blocker;
         // here we simply use b directly.
         long b32 = b & 0x00000000FFFFFFFFL;
-        long r64 = a64 * (b32 & 1);
+        long r64 = a * (b32 & 1);
 
-        long a_msb = a64 & maskMsb;
-        a64 ^= a_msb;
-        a64 = (a64 << 1) ^ ((a_msb >>> 3) * 3);
-        r64 ^= a64 * ((b32 >> 1) & 1);
+        long a_msb = a & maskMsb;
+        a ^= a_msb;
+        a = (a << 1) ^ ((a_msb >>> 3) * 3);
+        r64 ^= a * ((b32 >> 1) & 1);
 
-        a_msb = a64 & maskMsb;
-        a64 ^= a_msb;
-        a64 = (a64 << 1) ^ ((a_msb >>> 3) * 3);
-        r64 ^= a64 * ((b32 >>> 2) & 1);
+        a_msb = a & maskMsb;
+        a ^= a_msb;
+        a = (a << 1) ^ ((a_msb >>> 3) * 3);
+        r64 ^= a * ((b32 >>> 2) & 1);
 
-        a_msb = a64 & maskMsb;
-        a64 ^= a_msb;
-        a64 = (a64 << 1) ^ ((a_msb >>> 3) * 3);
-        r64 ^= a64 * ((b32 >> 3) & 1);
+        a_msb = a & maskMsb;
+        a ^= a_msb;
+        a = (a << 1) ^ ((a_msb >>> 3) * 3);
+        r64 ^= a * ((b32 >> 3) & 1);
 
         return r64;
     }
@@ -61,18 +60,6 @@ public static void mVecMulAdd(int mVecLimbs, long[] in, int inOffset, int a, lon
         }
     }
 
-    /**
-     * Convenience overload of mVecMulAdd that assumes zero offsets.
-     *
-     * @param mVecLimbs the number of limbs
-     * @param in        the input vector
-     * @param a         the GF(16) element to multiply by
-     * @param acc       the accumulator vector
-     */
-    public static void mVecMulAdd(int mVecLimbs, long[] in, int a, long[] acc)
-    {
-        mVecMulAdd(mVecLimbs, in, 0, a, acc, 0);
-    }
 
     /**
      * Performs the multiplication and accumulation of a block of an upper‐triangular matrix
@@ -156,33 +143,18 @@ public static void mulAddMatTransXMMat(int mVecLimbs, byte[] mat, long[] bsMat,
         {
             for (int c = 0; c < matRows; c++)
             {
+                byte matVal = mat[c * matCols + r];
                 for (int k = 0; k < bsMatCols; k++)
                 {
-                    // For bsMat: the m-vector at index (c * bsMatCols + k)
                     int bsMatOffset = (c * bsMatCols + k) * mVecLimbs;
-                    // For mat: element at row c, column r.
-                    int a = mat[c * matCols + r] & 0xFF;
                     // For acc: add into the m-vector at index (r * bsMatCols + k)
                     int accOffset = (r * bsMatCols + k) * mVecLimbs;
-                    mVecMulAdd(mVecLimbs, bsMat, bsMatOffset, a, acc, accOffset);
+                    mVecMulAdd(mVecLimbs, bsMat, bsMatOffset, matVal, acc, accOffset);
                 }
             }
         }
     }
 
-
-    /**
-     * Adds (bitwise XOR) mVecLimbs elements from the source array (starting at srcOffset)
-     * into the destination array (starting at destOffset).
-     */
-    public static void mVecAdd(int mVecLimbs, long[] src, int srcOffset, long[] dest, int destOffset)
-    {
-        for (int i = 0; i < mVecLimbs; i++)
-        {
-            dest[destOffset + i] ^= src[srcOffset + i];
-        }
-    }
-
     /**
      * Multiplies a matrix (given as a byte array) with a bit‐sliced matrix (given as a long array)
      * and accumulates the result into the acc array.
@@ -288,30 +260,6 @@ public static void mulAddMUpperTriangularMatXMatTrans(int mVecLimbs, long[] bsMa
         }
     }
 
-    /**
-     * Multiplies a vector (from bsMat) by an unsigned scalar (from mat) and adds the result
-     * to the corresponding vector in acc.
-     *
-     * <p>
-     * This method corresponds to the C function <code>m_vec_mul_add</code>.
-     * It processes {@code mVecLimbs} elements starting from the given offsets in the source and accumulator arrays.
-     * </p>
-     *
-     * @param mVecLimbs   the number of limbs (elements) in the vector
-     * @param bsMat       the source array (bit-sliced matrix) of long values
-     * @param bsMatOffset the starting index in bsMat for the vector
-     * @param scalar      the scalar value (from mat), as a byte
-     * @param acc         the accumulator array where the result is added
-     * @param accOffset   the starting index in the accumulator array for the current vector
-     */
-    public static void mVecMulAdd(int mVecLimbs, long[] bsMat, int bsMatOffset, byte scalar, long[] acc, int accOffset)
-    {
-        for (int i = 0; i < mVecLimbs; i++)
-        {
-            acc[accOffset + i] ^= gf16vMulU64(bsMat[bsMatOffset + i], scalar);
-        }
-    }
-
     /**
      * GF(16) multiplication mod x^4 + x + 1.
      * <p>
@@ -339,8 +287,7 @@ public static int mulF(int a, int b)
         // Extract the upper nibble (bits 4 to 7).
         int topP = p & 0xF0;
         // The reduction: XOR p with (topP shifted right by 4 and by 3) and mask to 4 bits.
-        int out = (p ^ (topP >> 4) ^ (topP >> 3)) & 0x0F;
-        return out;
+        return (p ^ (topP >> 4) ^ (topP >> 3)) & 0x0F;
     }
 
     /**
@@ -364,8 +311,7 @@ public static long mulFx8(byte a, long b)
 
         // Reduction mod (x^4 + x + 1): process each byte in parallel.
         long topP = p & 0xf0f0f0f0f0f0f0f0L;
-        long out = (p ^ (topP >> 4) ^ (topP >> 3)) & 0x0f0f0f0f0f0f0f0fL;
-        return out;
+        return (p ^ (topP >> 4) ^ (topP >> 3)) & 0x0f0f0f0f0f0f0f0fL;
     }
 
     public static void matMul(byte[] a, byte[] b, byte[] c,
@@ -420,9 +366,6 @@ public static void matAdd(byte[] a, int aOff, byte[] b, int bOff, byte[] c, int
         }
     }
 
-    // Define the blocker constant as needed (set to 0 if not used).
-    private static final byte UNSIGNED_CHAR_BLOCKER = 0;
-
     /**
      * Returns 0x00 if a equals b, otherwise returns 0xFF.
      * This operation is performed in constant time.
@@ -442,9 +385,7 @@ public static byte ctCompare8(byte a, byte b)
         // If diff is 0, then -diff is 0, and shifting yields 0.
         // If diff is nonzero, -diff is negative, so the arithmetic shift yields -1 (0xFFFFFFFF),
         // which when cast to a byte becomes 0xFF.
-        int result = negDiff >> 31;
-        // XOR with UNSIGNED_CHAR_BLOCKER (assumed 0 here) and cast to byte.
-        return (byte)(result ^ UNSIGNED_CHAR_BLOCKER);
+        return (byte) (negDiff >> 31);
     }
 
     public static void efUnpackMVector(int legs, long[] packedRow, int packedRowOff, byte[] out)
 
@@ -5,173 +5,120 @@
 import org.bouncycastle.crypto.AsymmetricCipherKeyPair;
 import org.bouncycastle.crypto.AsymmetricCipherKeyPairGenerator;
 import org.bouncycastle.crypto.KeyGenerationParameters;
-import org.bouncycastle.util.Pack;
+import org.bouncycastle.util.Arrays;
+import org.bouncycastle.util.Longs;
 
 public class MayoKeyPairGenerator
     implements AsymmetricCipherKeyPairGenerator
 {
     private MayoParameters p;
     private SecureRandom random;
 
-
     public void init(KeyGenerationParameters param)
     {
         this.p = ((MayoKeyGenerationParameters)param).getParameters();
         this.random = param.getRandom();
     }
 
-
     @Override
     public AsymmetricCipherKeyPair generateKeyPair()
     {
-        int ret = MayoEngine.MAYO_OK;
+        // Retrieve parameters from p.
+        int mVecLimbs = p.getMVecLimbs();
+        int m = p.getM();
+        int v = p.getV();
+        int o = p.getO();
+        int oBytes = p.getOBytes();
+        int p1Limbs = p.getP1Limbs();
+        int p3Limbs = p.getP3Limbs();
+        int pkSeedBytes = p.getPkSeedBytes();
+        int skSeedBytes = p.getSkSeedBytes();
+
         byte[] cpk = new byte[p.getCpkBytes()];
         // seed_sk points to csk.
         byte[] seed_sk = new byte[p.getCskBytes()];
 
         // Allocate S = new byte[PK_SEED_BYTES_MAX + O_BYTES_MAX]
-        byte[] S = new byte[p.getPkSeedBytes() + p.getOBytes()];
+        byte[] seed_pk = new byte[pkSeedBytes + oBytes];
 
         // Allocate P as a long array of size (P1_LIMBS_MAX + P2_LIMBS_MAX)
-        long[] P = new long[p.getP1Limbs() + p.getP2Limbs()];
+        long[] P = new long[p1Limbs + p.getP2Limbs()];
 
         // Allocate P3 as a long array of size (O_MAX * O_MAX * M_VEC_LIMBS_MAX), zero-initialized.
-        long[] P3 = new long[p.getO() * p.getO() * p.getMVecLimbs()];
-
-        // seed_pk will be a reference into S.
-        byte[] seed_pk;
+        long[] P3 = new long[o * o * mVecLimbs];
 
         // Allocate O as a byte array of size (V_MAX * O_MAX).
         // Here we assume V_MAX is given by p.getV() (or replace with a constant if needed).
-        byte[] O = new byte[p.getV() * p.getO()];
-
-        // Retrieve parameters from p.
-        int m_vec_limbs = p.getMVecLimbs();
-        int param_m = p.getM();
-        int param_v = p.getV();
-        int param_o = p.getO();
-        int param_O_bytes = p.getOBytes();
-        int param_P1_limbs = p.getP1Limbs();
-        int param_P3_limbs = p.getP3Limbs();
-        int param_pk_seed_bytes = p.getPkSeedBytes();
-        int param_sk_seed_bytes = p.getSkSeedBytes();
-
-        // In the C code, P1 is P and P2 is P offset by param_P1_limbs.
-        // In Java, we will have functions (like expandP1P2) work on the full array P.
+        byte[] O = new byte[v * o];
 
         // Generate secret key seed (seed_sk) using a secure random generator.
         random.nextBytes(seed_sk);
 
         // S ← shake256(seed_sk, pk_seed_bytes + O_bytes)
-        Utils.shake256(S, param_pk_seed_bytes + param_O_bytes, seed_sk, param_sk_seed_bytes);
-
-        // seed_pk is the beginning of S.
-        seed_pk = S;
+        Utils.shake256(seed_pk, pkSeedBytes + oBytes, seed_sk, skSeedBytes);
 
         // o ← Decode_o(S[ param_pk_seed_bytes : param_pk_seed_bytes + O_bytes ])
         // Decode nibbles from S starting at offset param_pk_seed_bytes into O,
         // with expected output length = param_v * param_o.
-        Utils.decode(S, param_pk_seed_bytes, O, param_v * param_o);
+        Utils.decode(seed_pk, pkSeedBytes, O, v * o);
 
         // Expand P1 and P2 into the array P using seed_pk.
         MayoEngine.expandP1P2(p, P, seed_pk);
 
         // For compute_P3, we need to separate P1 and P2.
         // Here, we treat P1 as the first param_P1_limbs elements of P,
         // and P2 as the remaining elements.
-        long[] P1 = P;
-        long[] P2 = new long[P.length - param_P1_limbs];
-        System.arraycopy(P, param_P1_limbs, P2, 0, P2.length);
-
-        // Compute P3, which (in the process) modifies P2.
-        computeP3(p, P1, P2, O, P3);
-
-        // Store seed_pk into the public key cpk.
-        System.arraycopy(seed_pk, 0, cpk, 0, param_pk_seed_bytes);
-
-        // Allocate an array for the "upper" part of P3.
-        long[] P3_upper = new long[p.getP3Limbs()];
-
-        // Compute Upper(P3) and store the result in P3_upper.
-        mUpper(p, P3, P3_upper, param_o);
-
-        // Pack the m-vectors in P3_upper into cpk (after the seed_pk).
-        // The number of m-vectors to pack is (param_P3_limbs / m_vec_limbs),
-        // and param_m is used as the m value.
-        Utils.packMVecs(P3_upper, cpk, param_pk_seed_bytes, param_P3_limbs / m_vec_limbs, param_m);
-        // Securely clear sensitive data.
-//        secureClear(O);
-//        secureClear(P2);
-//        secureClear(P3);
-
-        return new AsymmetricCipherKeyPair(new MayoPublicKeyParameter(p, cpk), new MayoPrivateKeyParameter(p, seed_sk));
-    }
-
-    /**
-     * Computes P3 from P1, P2, and O.
-     * <p>
-     * In C, compute_P3 does:
-     * 1. Compute P1*O + P2, storing result in P2.
-     * 2. Compute P3 = O^T * (P1*O + P2).
-     *
-     * @param p  the parameter object.
-     * @param P1 the P1 matrix as a long[] array.
-     * @param P2 the P2 matrix as a long[] array; on output, P1*O is added to it.
-     * @param O  the O matrix as a byte[] array.
-     * @param P3 the output matrix (as a long[] array) which will receive O^T*(P1*O + P2).
-     */
-    public static void computeP3(MayoParameters p, long[] P1, long[] P2, byte[] O, long[] P3)
-    {
-        int mVecLimbs = p.getMVecLimbs();
-        int paramV = p.getV();
-        int paramO = p.getO();
+        long[] P2 = new long[P.length - p1Limbs];
+        System.arraycopy(P, p1Limbs, P2, 0, P2.length);
 
         // Compute P1 * O + P2 and store the result in P2.
-        GF16Utils.P1TimesO(p, P1, O, P2);
+        GF16Utils.P1TimesO(p, P, O, P2);
 
         // Compute P3 = O^T * (P1*O + P2).
         // Here, treat P2 as the bsMat for the multiplication.
         // Dimensions: mat = O (size: paramV x paramO), bsMat = P2 (size: paramV x paramO),
         // and acc (P3) will have dimensions: (paramO x paramO), each entry being an m-vector.
-        GF16Utils.mulAddMatTransXMMat(mVecLimbs, O, P2, P3, paramV, paramO, paramO);
-    }
+        GF16Utils.mulAddMatTransXMMat(mVecLimbs, O, P2, P3, v, o, o);
 
-    /**
-     * Reproduces the behavior of the C function m_upper.
-     * <p>
-     * For each pair (r, c) with 0 <= r <= c < size, it copies the m-vector at
-     * position (r, c) from 'in' to the next position in 'out' and, if r != c,
-     * it adds (XORs) the m-vector at position (c, r) into that same output vector.
-     *
-     * @param p    the parameter object (used to get mVecLimbs)
-     * @param in   the input long array (each vector is mVecLimbs in length)
-     * @param out  the output long array (must be large enough to store all output vectors)
-     * @param size the size parameter defining the matrix dimensions.
-     */
-    public static void mUpper(MayoParameters p, long[] in, long[] out, int size)
-    {
-        int mVecLimbs = p.getMVecLimbs();
+        // Store seed_pk into the public key cpk.
+        System.arraycopy(seed_pk, 0, cpk, 0, pkSeedBytes);
+
+        // Allocate an array for the "upper" part of P3.
+        long[] P3_upper = new long[p3Limbs];
+
+        // Compute Upper(P3) and store the result in P3_upper.
         int mVecsStored = 0;
-        for (int r = 0; r < size; r++)
+        for (int r = 0; r < o; r++)
         {
-            for (int c = r; c < size; c++)
+            for (int c = r; c < o; c++)
             {
                 // Compute the starting index for the (r, c) vector in the input array.
-                int srcOffset = mVecLimbs * (r * size + c);
+                int srcOffset = mVecLimbs * (r * o + c);
                 // Compute the output offset for the current stored vector.
                 int destOffset = mVecLimbs * mVecsStored;
 
                 // Copy the vector at (r, c) into the output.
-                System.arraycopy(in, srcOffset, out, destOffset, mVecLimbs);
+                System.arraycopy(P3, srcOffset, P3_upper, destOffset, mVecLimbs);
 
                 // If off-diagonal, add (XOR) the vector at (c, r) into the same output vector.
                 if (r != c)
                 {
-                    int srcOffset2 = mVecLimbs * (c * size + r);
-                    GF16Utils.mVecAdd(mVecLimbs, in, srcOffset2, out, destOffset);
+                    int srcOffset2 = mVecLimbs * (c * o + r);
+                    Longs.xorTo(mVecLimbs, P3, srcOffset2, P3_upper, destOffset);
                 }
                 mVecsStored++;
             }
         }
+
+        // Pack the m-vectors in P3_upper into cpk (after the seed_pk).
+        // The number of m-vectors to pack is (param_P3_limbs / m_vec_limbs),
+        // and param_m is used as the m value.
+        Utils.packMVecs(P3_upper, cpk, pkSeedBytes, p3Limbs / mVecLimbs, m);
+        // Securely clear sensitive data.
+        Arrays.clear(O);
+        Arrays.clear(P2);
+        Arrays.clear(P3);
+
+        return new AsymmetricCipherKeyPair(new MayoPublicKeyParameter(p, cpk), new MayoPrivateKeyParameter(p, seed_sk));
     }
 }