bcgit
diff --git a/‎core/src/main/java/org/bouncycastle/pqc/crypto/mayo/GF16Utils.java‎
Lines changed: 185 additions & 0 deletions b/‎core/src/main/java/org/bouncycastle/pqc/crypto/mayo/GF16Utils.java‎
Lines changed: 185 additions & 0 deletions
diff --git a/‎core/src/main/java/org/bouncycastle/pqc/crypto/mayo/MayoEngine.java‎
Lines changed: 190 additions & 0 deletions b/‎core/src/main/java/org/bouncycastle/pqc/crypto/mayo/MayoEngine.java‎
Lines changed: 190 additions & 0 deletions
@@ -0,0 +1,185 @@
+package org.bouncycastle.pqc.crypto.mayo;
+
+public class GF16Utils
+{
+
+    /**
+     * Multiplies a 64-bit limb by a GF(16) element (represented as an int, 0–255).
+     * This emulates gf16v_mul_u64 from C.
+     *
+     * @param a a 64-bit limb
+     * @param b an 8-bit GF(16) element (only the low 4 bits are used)
+     * @return the product as a 64-bit limb
+     */
+    public static long gf16vMulU64(long a, int b)
+    {
+        long maskMsb = 0x8888888888888888L;
+        long a64 = a;
+        // In the original code there is a conditional XOR with unsigned_char_blocker;
+        // here we simply use b directly.
+        long b32 = b & 0x00000000FFFFFFFFL;
+        long r64 = a64 * (b32 & 1);
+
+        long a_msb = a64 & maskMsb;
+        a64 ^= a_msb;
+        a64 = (a64 << 1) ^ ((a_msb >>> 3) * 3);
+        r64 ^= a64 * ((b32 >> 1) & 1);
+
+        a_msb = a64 & maskMsb;
+        a64 ^= a_msb;
+        a64 = (a64 << 1) ^ ((a_msb >>> 3) * 3);
+        r64 ^= a64 * ((b32 >>> 2) & 1);
+
+        a_msb = a64 & maskMsb;
+        a64 ^= a_msb;
+        a64 = (a64 << 1) ^ ((a_msb >>> 3) * 3);
+        r64 ^= a64 * ((b32 >> 3) & 1);
+
+        return r64;
+    }
+
+    /**
+     * Multiplies each limb of a GF(16) vector (subarray of 'in') by the GF(16) element 'a'
+     * and XORs the result into the corresponding subarray of acc.
+     * <p>
+     * This version uses explicit array offsets.
+     *
+     * @param mVecLimbs the number of limbs in the vector
+     * @param in        the input long array containing the vector; the vector starts at index inOffset
+     * @param inOffset  the starting index in 'in'
+     * @param a         the GF(16) element (0–255) to multiply by
+     * @param acc       the accumulator long array; the target vector starts at index accOffset
+     * @param accOffset the starting index in 'acc'
+     */
+    public static void mVecMulAdd(int mVecLimbs, long[] in, int inOffset, int a, long[] acc, int accOffset)
+    {
+        for (int i = 0; i < mVecLimbs; i++)
+        {
+            acc[accOffset + i] ^= gf16vMulU64(in[inOffset + i], a);
+        }
+    }
+
+    /**
+     * Convenience overload of mVecMulAdd that assumes zero offsets.
+     *
+     * @param mVecLimbs the number of limbs
+     * @param in        the input vector
+     * @param a         the GF(16) element to multiply by
+     * @param acc       the accumulator vector
+     */
+    public static void mVecMulAdd(int mVecLimbs, long[] in, int a, long[] acc)
+    {
+        mVecMulAdd(mVecLimbs, in, 0, a, acc, 0);
+    }
+
+    /**
+     * Performs the multiplication and accumulation of a block of an upper‐triangular matrix
+     * times a second matrix.
+     *
+     * @param mVecLimbs  number of limbs per m-vector.
+     * @param bsMat      the “basis” matrix (as a flat long[] array); each entry occupies mVecLimbs elements.
+     * @param mat        the second matrix (as a flat byte[] array) stored row‐major,
+     *                   with dimensions (bsMatCols x matCols).
+     * @param acc        the accumulator (as a flat long[] array) with dimensions (bsMatRows x matCols);
+     *                   each “entry” is an m‐vector (length mVecLimbs).
+     * @param bsMatRows  number of rows in the bsMat (the “triangular” matrix’s row count).
+     * @param bsMatCols  number of columns in bsMat.
+     * @param matCols    number of columns in the matrix “mat.”
+     * @param triangular if 1, start column index for each row is (r * triangular); otherwise use 0.
+     */
+    public static void mulAddMUpperTriangularMatXMat(int mVecLimbs, long[] bsMat, byte[] mat, long[] acc,
+                                                     int bsMatRows, int bsMatCols, int matCols, int triangular)
+    {
+        int bsMatEntriesUsed = 0;
+        for (int r = 0; r < bsMatRows; r++)
+        {
+            // For each row r, the inner loop goes from column triangular*r to bsMatCols-1.
+            for (int c = triangular * r; c < bsMatCols; c++)
+            {
+                for (int k = 0; k < matCols; k++)
+                {
+                    // Calculate the offsets:
+                    // For bsMat: the m-vector starting at index bsMatEntriesUsed * mVecLimbs.
+                    int bsMatOffset = bsMatEntriesUsed * mVecLimbs;
+                    // For mat: element at row c, column k (row-major layout).
+                    int a = mat[c * matCols + k] & 0xFF;
+                    // For acc: add into the m-vector at row r, column k.
+                    int accOffset = (r * matCols + k) * mVecLimbs;
+                    GF16Utils.mVecMulAdd(mVecLimbs, bsMat, bsMatOffset, a, acc, accOffset);
+                }
+                bsMatEntriesUsed++;
+            }
+        }
+    }
+
+    /**
+     * Computes P1_times_O.
+     * <p>
+     * In C:
+     * P1_times_O(p, P1, O, acc) calls:
+     * mul_add_m_upper_triangular_mat_x_mat(PARAM_m_vec_limbs(p), P1, O, acc, PARAM_v(p), PARAM_v(p), PARAM_o(p), 1);
+     *
+     * @param p   the parameter object.
+     * @param P1  the P1 matrix as a long[] array.
+     * @param O   the O matrix as a byte[] array.
+     * @param acc the output accumulator (long[] array).
+     */
+    public static void P1TimesO(MayoParameters p, long[] P1, byte[] O, long[] acc)
+    {
+        int mVecLimbs = p.getMVecLimbs();
+        int paramV = p.getV();
+        int paramO = p.getO();
+        // Here, bsMatRows and bsMatCols are both paramV, and matCols is paramO, triangular=1.
+        mulAddMUpperTriangularMatXMat(mVecLimbs, P1, O, acc, paramV, paramV, paramO, 1);
+    }
+
+    /**
+     * Multiplies the transpose of a single matrix with m matrices and adds the result into acc.
+     *
+     * @param mVecLimbs number of limbs per m-vector.
+     * @param mat       the matrix to be transposed (as a flat byte[] array), dimensions: (matRows x matCols).
+     * @param bsMat     the m-matrix (as a flat long[] array), with each entry of length mVecLimbs.
+     *                  Its logical dimensions: (matRows x bsMatCols).
+     * @param acc       the accumulator (as a flat long[] array) with dimensions (matCols x bsMatCols);
+     *                  each entry is an m-vector.
+     * @param matRows   number of rows in the matrix “mat.”
+     * @param matCols   number of columns in “mat.”
+     * @param bsMatCols number of columns in the bsMat matrix.
+     */
+    public static void mulAddMatTransXMMat(int mVecLimbs, byte[] mat, long[] bsMat, long[] acc,
+                                           int matRows, int matCols, int bsMatCols)
+    {
+        // Loop over each column r of mat (which becomes row of mat^T)
+        for (int r = 0; r < matCols; r++)
+        {
+            for (int c = 0; c < matRows; c++)
+            {
+                for (int k = 0; k < bsMatCols; k++)
+                {
+                    // For bsMat: the m-vector at index (c * bsMatCols + k)
+                    int bsMatOffset = (c * bsMatCols + k) * mVecLimbs;
+                    // For mat: element at row c, column r.
+                    int a = mat[c * matCols + r] & 0xFF;
+                    // For acc: add into the m-vector at index (r * bsMatCols + k)
+                    int accOffset = (r * bsMatCols + k) * mVecLimbs;
+                    GF16Utils.mVecMulAdd(mVecLimbs, bsMat, bsMatOffset, a, acc, accOffset);
+                }
+            }
+        }
+    }
+
+
+    /**
+     * Adds (bitwise XOR) mVecLimbs elements from the source array (starting at srcOffset)
+     * into the destination array (starting at destOffset).
+     */
+    public static void mVecAdd(int mVecLimbs, long[] src, int srcOffset, long[] dest, int destOffset)
+    {
+        for (int i = 0; i < mVecLimbs; i++)
+        {
+            dest[destOffset + i] ^= src[srcOffset + i];
+        }
+    }
+
+}
+
@@ -0,0 +1,190 @@
+package org.bouncycastle.pqc.crypto.mayo;
+
+import org.bouncycastle.crypto.BlockCipher;
+import org.bouncycastle.crypto.engines.AESEngine;
+import org.bouncycastle.crypto.modes.CTRModeCipher;
+import org.bouncycastle.crypto.modes.SICBlockCipher;
+import org.bouncycastle.crypto.params.KeyParameter;
+import org.bouncycastle.crypto.params.ParametersWithIV;
+import org.bouncycastle.util.Arrays;
+import org.bouncycastle.util.Pack;
+
+public class MayoEngine
+{
+    /**
+     * Expands P1 and P2 using AES_128_CTR as a PRF and then unpacks the resulting bytes
+     * into an array of 64-bit limbs.
+     *
+     * @param p       Mayo parameters
+     * @param P       The output long array which will hold the unpacked limbs.
+     *                Its length should be at least ((P1_bytes + P2_bytes) / 8) limbs.
+     * @param seed_pk The seed (used as the key) for the PRF.
+     * @return The number of bytes produced, i.e., P1_bytes + P2_bytes.
+     */
+    public static int expandP1P2(MayoParameters p, long[] P, byte[] seed_pk)
+    {
+        // Compute total number of bytes to generate: P1_bytes + P2_bytes.
+        int outLen = p.getP1Bytes() + p.getP2Bytes();
+        // Temporary byte array to hold the PRF output.
+        byte[] temp = new byte[outLen];
+
+        // Call AES_128_CTR (our previously defined function using BouncyCastle)
+        // to fill temp with outLen pseudorandom bytes using seed_pk as key.
+        AES_128_CTR(temp, outLen, seed_pk, p.getPkSeedBytes());
+
+        // The number of vectors is the total limbs divided by mVecLimbs.
+        int numVectors = (p.getP1Limbs() + p.getP2Limbs()) / p.getMVecLimbs();
+
+        // Unpack the byte array 'temp' into the long array 'P'
+        // using our previously defined unpackMVecs method.
+        Utils.unpackMVecs(temp, P, numVectors, p.getM());
+
+        // Return the number of output bytes produced.
+        return outLen;
+    }
+
+    /**
+     * AES_128_CTR generates outputByteLen bytes using AES-128 in CTR mode.
+     * The key (of length keyLen) is used to expand the AES key.
+     * A 16-byte IV (all zeros) is used.
+     *
+     * @param output        the output buffer which will be filled with the keystream
+     * @param outputByteLen the number of bytes to produce
+     * @param key           the AES key (should be 16 bytes for AES-128)
+     * @param keyLen        the length of the key (unused here but kept for similarity)
+     * @return the number of output bytes produced (i.e. outputByteLen)
+     */
+    public static int AES_128_CTR(byte[] output, int outputByteLen, byte[] key, int keyLen)
+    {
+        // Create a 16-byte IV (all zeros)
+        byte[] iv = new byte[16]; // automatically zero-initialized
+
+        // Set up AES engine in CTR (SIC) mode.
+        BlockCipher aesEngine = AESEngine.newInstance();
+        // SICBlockCipher implements CTR mode for AES.
+        CTRModeCipher ctrCipher = SICBlockCipher.newInstance(aesEngine);
+        // Wrap the key with the IV.
+        ParametersWithIV params = new ParametersWithIV(new KeyParameter(Arrays.copyOf(key, keyLen)), iv);
+        ctrCipher.init(true, params);
+
+        // CTR mode is a stream cipher: encrypting zero bytes produces the keystream.
+        int blockSize = ctrCipher.getBlockSize(); // typically 16 bytes
+        byte[] zeroBlock = new byte[blockSize];     // block of zeros
+        byte[] blockOut = new byte[blockSize];
+
+        int offset = 0;
+        // Process full blocks
+        while (offset + blockSize <= outputByteLen)
+        {
+            ctrCipher.processBlock(zeroBlock, 0, blockOut, 0);
+            System.arraycopy(blockOut, 0, output, offset, blockSize);
+            offset += blockSize;
+        }
+        // Process any remaining partial block.
+        if (offset < outputByteLen)
+        {
+            ctrCipher.processBlock(zeroBlock, 0, blockOut, 0);
+            int remaining = outputByteLen - offset;
+            System.arraycopy(blockOut, 0, output, offset, remaining);
+        }
+        return outputByteLen;
+    }
+
+    public static final int MAYO_OK = 0;
+    public static final int PK_SEED_BYTES_MAX = 16;  // Adjust as needed
+    public static final int O_BYTES_MAX = 312;         // Adjust as needed
+
+    /**
+     * Expands the secret key.
+     *
+     * @param p   the MayoParameters instance.
+     * @param csk the input secret key seed (byte array).
+     * @param sk  the Sk object that holds the expanded secret key components.
+     * @return MAYO_OK on success.
+     */
+//    public static int mayoExpandSk(MayoParameters p, byte[] csk, MayoPrivateKeyParameter sk)
+//    {
+//        int ret = MAYO_OK;
+//        int totalS = PK_SEED_BYTES_MAX + O_BYTES_MAX;
+//        byte[] S = new byte[totalS];
+//
+//        // sk.p is the long[] array, sk.O is the byte[] array.
+//
+//        long[] P = new long[p.getPkSeedBytes() >> 3];
+//        Pack.littleEndianToLong(sk.getP(), 0, P);
+//        byte[] O = sk.getO();
+//
+//        int param_o = p.getO();
+//        int param_v = p.getV();
+//        int param_O_bytes = p.getOBytes();
+//        int param_pk_seed_bytes = p.getPkSeedBytes();
+//        int param_sk_seed_bytes = p.getSkSeedBytes();
+//
+//        // In C, seed_sk = csk and seed_pk = S (the beginning of S)
+//        byte[] seed_sk = csk;
+//        byte[] seed_pk = S;  // first param_pk_seed_bytes of S
+//
+//        // Generate S = seed_pk || (additional bytes), using SHAKE256.
+//        // Output length is param_pk_seed_bytes + param_O_bytes.
+//        Utils.shake256(S, param_pk_seed_bytes + param_O_bytes, seed_sk, param_sk_seed_bytes);
+//
+//        // Decode the portion of S after the first param_pk_seed_bytes into O.
+//        // (In C, this is: decode(S + param_pk_seed_bytes, O, param_v * param_o))
+//        Utils.decode(S, param_pk_seed_bytes, O, param_v * param_o);
+//
+//        // Expand P1 and P2 into the long array P using seed_pk.
+//        MayoEngine.expandP1P2(p, P, seed_pk);
+//
+//        // Let P2 start at offset = PARAM_P1_limbs(p)
+//        int p1Limbs = p.getP1Limbs();
+//        int offsetP2 = p1Limbs;
+//
+//        // Compute L_i = (P1 + P1^t)*O + P2.
+//        // Here, we assume that P1P1tTimesO writes into the portion of P starting at offsetP2.
+//        P1P1tTimesO(p, P, O, P, offsetP2);
+//
+//        // Securely clear sensitive temporary data.
+//        java.util.Arrays.fill(S, (byte)0);
+//        return ret;
+//    }
+
+    /**
+     * Multiplies and accumulates the product (P1 + P1^t)*O into the accumulator.
+     * This version writes into the 'acc' array starting at the specified offset.
+     *
+     * @param p         the MayoParameters.
+     * @param P1        the P1 vector as a long[] array.
+     * @param O         the O array (each byte represents a GF(16) element).
+     * @param acc       the accumulator array where results are XORed in.
+     * @param accOffset the starting index in acc.
+     */
+    public static void P1P1tTimesO(MayoParameters p, long[] P1, byte[] O, long[] acc, int accOffset)
+    {
+        int paramO = p.getO();
+        int paramV = p.getV();
+        int mVecLimbs = p.getMVecLimbs();
+        int bsMatEntriesUsed = 0;
+        for (int r = 0; r < paramV; r++)
+        {
+            for (int c = r; c < paramV; c++)
+            {
+                if (c == r)
+                {
+                    bsMatEntriesUsed++;
+                    continue;
+                }
+                for (int k = 0; k < paramO; k++)
+                {
+                    // Multiply the m-vector at P1 for the current matrix entry,
+                    // and accumulate into acc for row r.
+                    GF16Utils.mVecMulAdd(mVecLimbs, P1, bsMatEntriesUsed * mVecLimbs,
+                        O[c * paramO + k] & 0xFF, acc, accOffset + (r * paramO + k) * mVecLimbs);
+                    // Similarly, accumulate into acc for row c.
+                    GF16Utils.mVecMulAdd(mVecLimbs, P1, bsMatEntriesUsed * mVecLimbs,
+                        O[r * paramO + k] & 0xFF, acc, accOffset + (c * paramO + k) * mVecLimbs);
+                }
+                bsMatEntriesUsed++;
+            }
+        }
+    }
+}