diff --git a/packages/common/src/eips.ts b/packages/common/src/eips.ts index 58bd81db913..d5968d2459e 100644 --- a/packages/common/src/eips.ts +++ b/packages/common/src/eips.ts @@ -343,6 +343,14 @@ export const eipsDict: EIPsDict = { 6800: { minimumHardfork: Hardfork.London, }, + /** + * Description : EVM Modular Arithmetic Extensions + * URL : https://eips.ethereum.org/EIPS/eip-6690 + * Status : Draft + */ + 6690: { + minimumHardfork: Hardfork.Prague, + }, /** * Description : Execution layer triggerable withdrawals (experimental) * URL : https://github.com/ethereum/EIPs/blob/3b5fcad6b35782f8aaeba7d4ac26004e8fbd720f/EIPS/eip-7002.md diff --git a/packages/evm/src/evm.ts b/packages/evm/src/evm.ts index 5e157b96171..883191589b3 100644 --- a/packages/evm/src/evm.ts +++ b/packages/evm/src/evm.ts @@ -251,7 +251,7 @@ export class EVM implements EVMInterface { const supportedEIPs = [ 663, 1153, 1559, 2537, 2565, 2718, 2929, 2930, 2935, 3198, 3529, 3540, 3541, 3607, 3651, 3670, 3855, 3860, 4200, 4399, 4750, 4788, 4844, 4895, 5133, 5450, 5656, 6110, 6206, 6780, 6800, - 7002, 7069, 7251, 7480, 7516, 7620, 7685, 7691, 7692, 7698, 7702, 7709, + 6690, 7002, 7069, 7251, 7480, 7516, 7620, 7685, 7691, 7692, 7698, 7702, 7709, ] for (const eip of this.common.eips()) { diff --git a/packages/evm/src/evmmax/addMod.ts b/packages/evm/src/evmmax/addMod.ts new file mode 100644 index 00000000000..5199a9c9cae --- /dev/null +++ b/packages/evm/src/evmmax/addMod.ts @@ -0,0 +1,1564 @@ +import { MASK_64, add64, sub64 } from './index.js' + +function addMod64(z: bigint[], x: bigint[], y: bigint[], modulus: bigint[]): void { + const MASK_64 = (1n << 64n) - 1n + + const x0 = x[0] & MASK_64 + const y0 = y[0] & MASK_64 + const m0 = modulus[0] & MASK_64 + + const fullSum = x0 + y0 + const sumLow64 = fullSum & MASK_64 + const carry = fullSum >> 64n + + let diff = sumLow64 - m0 + let borrow = 0n + if (diff < 0n) { + diff &= MASK_64 + borrow = 1n + } + + if (carry === 0n && borrow !== 0n) { + z[0] = sumLow64 + } else { + z[0] = diff + } + z[0] &= MASK_64 +} + +function addMod128(z: bigint[], x: bigint[], y: bigint[], mod: bigint[]): void { + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + + { + const [sumLow, carryOut] = add64(x[0], y[0], c) + tmp0 = sumLow + c = carryOut + } + { + const [sumLow, carryOut] = add64(x[1], y[1], c) + tmp1 = sumLow + c = carryOut + } + + let c1 = 0n + let out0 = 0n + let out1 = 0n + + { + const [diffLow, borrowOut] = sub64(tmp0, mod[0], c1) + out0 = diffLow + c1 = borrowOut + } + { + const [diffLow, borrowOut] = sub64(tmp1, mod[1], c1) + out1 = diffLow + c1 = borrowOut + } + + if (c === 0n && c1 !== 0n) { + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + } else { + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + } +} + +function addMod192( + z: bigint[], // [3] => final output + x: bigint[], // [3] => first addend + y: bigint[], // [3] => second addend + mod: bigint[], // [3] => the modulus +): void { + const MASK_64 = (1n << 64n) - 1n + + // Step 1: Add x + y (3 limbs), capturing carry in `c` + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + + // lower limb + { + const [sumLow, carryOut] = add64(x[0], y[0], c) + tmp0 = sumLow + c = carryOut + } + // middle limb + { + const [sumLow, carryOut] = add64(x[1], y[1], c) + tmp1 = sumLow + c = carryOut + } + // high limb + { + const [sumLow, carryOut] = add64(x[2], y[2], c) + tmp2 = sumLow + c = carryOut + } + + // Step 2: Subtract mod from tmp (3 limbs), capturing borrow in `c1` + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + + { + const [diff, borrow] = sub64(tmp0, mod[0], c1) + out0 = diff + c1 = borrow + } + { + const [diff, borrow] = sub64(tmp1, mod[1], c1) + out1 = diff + c1 = borrow + } + { + const [diff, borrow] = sub64(tmp2, mod[2], c1) + out2 = diff + c1 = borrow + } + + // Step 3: If no carry from addition but we got a borrow from subtract => revert to tmp + // Else keep out. Then mask the result to 64 bits per limb. + if (c === 0n && c1 !== 0n) { + // revert to tmp + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + } else { + // keep sub result + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + } +} // Adjust if needed + +function addMod256( + z: bigint[], // [4] => final result + x: bigint[], // [4] => first addend + y: bigint[], // [4] => second addend + mod: bigint[], // [4] => modulus +): void { + const MASK_64 = (1n << 64n) - 1n + + // Step 1: Add x + y across 4 limbs, capturing carry in `c`. + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + let tmp3 = 0n + + { + const [sumLow, carry] = add64(x[0], y[0], c) + tmp0 = sumLow + c = carry + } + { + const [sumLow, carry] = add64(x[1], y[1], c) + tmp1 = sumLow + c = carry + } + { + const [sumLow, carry] = add64(x[2], y[2], c) + tmp2 = sumLow + c = carry + } + { + const [sumLow, carry] = add64(x[3], y[3], c) + tmp3 = sumLow + c = carry + } + + // Step 2: Subtract the 4-limb `mod` from `tmp` (tmp0..3), capturing borrow in c1. + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + let out3 = 0n + + { + const [diff, borrow] = sub64(tmp0, mod[0], c1) + out0 = diff + c1 = borrow + } + { + const [diff, borrow] = sub64(tmp1, mod[1], c1) + out1 = diff + c1 = borrow + } + { + const [diff, borrow] = sub64(tmp2, mod[2], c1) + out2 = diff + c1 = borrow + } + { + const [diff, borrow] = sub64(tmp3, mod[3], c1) + out3 = diff + c1 = borrow + } + + // Step 3: If addition carry=0 but subtract borrow=1 => revert to raw sum; else keep out. + // Then mask each limb to 64 bits. + if (c === 0n && c1 !== 0n) { + // revert to tmp + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + z[3] = tmp3 & MASK_64 + } else { + // keep sub + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + z[3] = out3 & MASK_64 + } +} // Adjust if needed + +function addMod320( + z: bigint[], // [5] => final result (320-bit) + x: bigint[], // [5] => first addend + y: bigint[], // [5] => second addend + mod: bigint[], // [5] => modulus +): void { + const MASK_64 = (1n << 64n) - 1n + + // Step 1: Add x + y across 5 limbs, capturing carry in `c`. + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + let tmp3 = 0n + let tmp4 = 0n + + { + const [sumLow, carry] = add64(x[0], y[0], c) + tmp0 = sumLow + c = carry + } + { + const [sumLow, carry] = add64(x[1], y[1], c) + tmp1 = sumLow + c = carry + } + { + const [sumLow, carry] = add64(x[2], y[2], c) + tmp2 = sumLow + c = carry + } + { + const [sumLow, carry] = add64(x[3], y[3], c) + tmp3 = sumLow + c = carry + } + { + const [sumLow, carry] = add64(x[4], y[4], c) + tmp4 = sumLow + c = carry + } + + // Step 2: Subtract the 5-limb `mod` from tmp (tmp0..4), capturing borrow in c1. + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + let out3 = 0n + let out4 = 0n + + { + const [diff, borrow] = sub64(tmp0, mod[0], c1) + out0 = diff + c1 = borrow + } + { + const [diff, borrow] = sub64(tmp1, mod[1], c1) + out1 = diff + c1 = borrow + } + { + const [diff, borrow] = sub64(tmp2, mod[2], c1) + out2 = diff + c1 = borrow + } + { + const [diff, borrow] = sub64(tmp3, mod[3], c1) + out3 = diff + c1 = borrow + } + { + const [diff, borrow] = sub64(tmp4, mod[4], c1) + out4 = diff + c1 = borrow + } + + // Step 3: If addition carry=0 but subtract borrow=1 => revert to raw sum; else keep out. + if (c === 0n && c1 !== 0n) { + // revert to tmp + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + z[3] = tmp3 & MASK_64 + z[4] = tmp4 & MASK_64 + } else { + // keep sub + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + z[3] = out3 & MASK_64 + z[4] = out4 & MASK_64 + } +} // Adjust import as needed + +/** + * addMod384: + * z = (x + y) mod modulus + * + * Each of z, x, y, modulus is an array of length 6, representing 384 bits + * in 64-bit limbs: x[0..5], y[0..5], etc. + * We do a 6-limb addition, then a 6-limb subtract of 'modulus', + * and decide which result to keep. + */ +function addMod384( + z: bigint[], // [6], final 384-bit result + x: bigint[], // [6], input x + y: bigint[], // [6], input y + mod: bigint[], // [6], the modulus +): void { + const MASK_64 = (1n << 64n) - 1n + + // Step 1: Add x + y across 6 limbs, capturing carry in `c`. + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + let tmp3 = 0n + let tmp4 = 0n + let tmp5 = 0n + + { + const [sumLow, carry] = add64(x[0], y[0], c) + tmp0 = sumLow + c = carry + } + { + const [sumLow, carry] = add64(x[1], y[1], c) + tmp1 = sumLow + c = carry + } + { + const [sumLow, carry] = add64(x[2], y[2], c) + tmp2 = sumLow + c = carry + } + { + const [sumLow, carry] = add64(x[3], y[3], c) + tmp3 = sumLow + c = carry + } + { + const [sumLow, carry] = add64(x[4], y[4], c) + tmp4 = sumLow + c = carry + } + { + const [sumLow, carry] = add64(x[5], y[5], c) + tmp5 = sumLow + c = carry + } + + // Step 2: Subtract the 6-limb `mod` from tmp (tmp0..5), capturing borrow in c1. + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + let out3 = 0n + let out4 = 0n + let out5 = 0n + + { + const [diff, borrow] = sub64(tmp0, mod[0], c1) + out0 = diff + c1 = borrow + } + { + const [diff, borrow] = sub64(tmp1, mod[1], c1) + out1 = diff + c1 = borrow + } + { + const [diff, borrow] = sub64(tmp2, mod[2], c1) + out2 = diff + c1 = borrow + } + { + const [diff, borrow] = sub64(tmp3, mod[3], c1) + out3 = diff + c1 = borrow + } + { + const [diff, borrow] = sub64(tmp4, mod[4], c1) + out4 = diff + c1 = borrow + } + { + const [diff, borrow] = sub64(tmp5, mod[5], c1) + out5 = diff + c1 = borrow + } + + // Step 3: If addition carry = 0 but subtract borrow = 1 => revert to raw sum + // else keep out. Then mask each limb to 64 bits. + if (c === 0n && c1 !== 0n) { + // revert to tmp + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + z[3] = tmp3 & MASK_64 + z[4] = tmp4 & MASK_64 + z[5] = tmp5 & MASK_64 + } else { + // keep sub + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + z[3] = out3 & MASK_64 + z[4] = out4 & MASK_64 + z[5] = out5 & MASK_64 + } +} // Adjust import path as needed + +/** + * addMod448: + * z = (x + y) mod mod + * + * Both x, y, mod, z are each arrays of length 7 (7 limbs * 64 bits = 448 bits). + * We: + * 1) Add the 7 limbs of x and y (unrolled), + * 2) Subtract the 7 limbs of mod (unrolled), + * 3) If add-carry==0 && subtract-borrow==1 => revert to raw sum, else keep sub. + */ +function addMod448( + z: bigint[], // [7], final 448-bit output + x: bigint[], // [7], first addend + y: bigint[], // [7], second addend + mod: bigint[], // [7], the modulus +): void { + const MASK_64 = (1n << 64n) - 1n + + // Step 1: Add x + y across 7 limbs, capturing carry in `c`. + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + let tmp3 = 0n + let tmp4 = 0n + let tmp5 = 0n + let tmp6 = 0n + + // Add limb 0 + { + const [sumLow, carryOut] = add64(x[0], y[0], c) + tmp0 = sumLow + c = carryOut + } + // Add limb 1 + { + const [sumLow, carryOut] = add64(x[1], y[1], c) + tmp1 = sumLow + c = carryOut + } + // Add limb 2 + { + const [sumLow, carryOut] = add64(x[2], y[2], c) + tmp2 = sumLow + c = carryOut + } + // Add limb 3 + { + const [sumLow, carryOut] = add64(x[3], y[3], c) + tmp3 = sumLow + c = carryOut + } + // Add limb 4 + { + const [sumLow, carryOut] = add64(x[4], y[4], c) + tmp4 = sumLow + c = carryOut + } + // Add limb 5 + { + const [sumLow, carryOut] = add64(x[5], y[5], c) + tmp5 = sumLow + c = carryOut + } + // Add limb 6 + { + const [sumLow, carryOut] = add64(x[6], y[6], c) + tmp6 = sumLow + c = carryOut + } + + // Step 2: Subtract mod from tmp (7 limbs), capturing borrow in c1. + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + let out3 = 0n + let out4 = 0n + let out5 = 0n + let out6 = 0n + + { + const [diffLow, borrowOut] = sub64(tmp0, mod[0], c1) + out0 = diffLow + c1 = borrowOut + } + { + const [diffLow, borrowOut] = sub64(tmp1, mod[1], c1) + out1 = diffLow + c1 = borrowOut + } + { + const [diffLow, borrowOut] = sub64(tmp2, mod[2], c1) + out2 = diffLow + c1 = borrowOut + } + { + const [diffLow, borrowOut] = sub64(tmp3, mod[3], c1) + out3 = diffLow + c1 = borrowOut + } + { + const [diffLow, borrowOut] = sub64(tmp4, mod[4], c1) + out4 = diffLow + c1 = borrowOut + } + { + const [diffLow, borrowOut] = sub64(tmp5, mod[5], c1) + out5 = diffLow + c1 = borrowOut + } + { + const [diffLow, borrowOut] = sub64(tmp6, mod[6], c1) + out6 = diffLow + c1 = borrowOut + } + + // Step 3: If c == 0 (no add overflow) and c1 != 0 (subtract borrowed), + // revert to raw sum tmp. Otherwise, keep out. Then mask each limb to 64 bits. + if (c === 0n && c1 !== 0n) { + // revert to tmp + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + z[3] = tmp3 & MASK_64 + z[4] = tmp4 & MASK_64 + z[5] = tmp5 & MASK_64 + z[6] = tmp6 & MASK_64 + } else { + // keep sub result + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + z[3] = out3 & MASK_64 + z[4] = out4 & MASK_64 + z[5] = out5 & MASK_64 + z[6] = out6 & MASK_64 + } +} // Adjust if needed + +/** + * addMod512: + * z = (x + y) mod mod + * + * x, y, mod, z each are arrays of length 8 (8 limbs * 64 bits = 512 bits). + * This function unrolls the addition and subtraction steps similarly to + * addMod64, addMod128, etc. + */ +function addMod512( + z: bigint[], // [8], final 512-bit result + x: bigint[], // [8], first addend + y: bigint[], // [8], second addend + mod: bigint[], // [8], the modulus +): void { + const MASK_64 = (1n << 64n) - 1n + + // ------------------------------- + // Step 1: Add x + y across 8 limbs + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + let tmp3 = 0n + let tmp4 = 0n + let tmp5 = 0n + let tmp6 = 0n + let tmp7 = 0n + + // Limb 0 + { + const [sumLow, carry] = add64(x[0], y[0], c) + tmp0 = sumLow + c = carry + } + // Limb 1 + { + const [sumLow, carry] = add64(x[1], y[1], c) + tmp1 = sumLow + c = carry + } + // Limb 2 + { + const [sumLow, carry] = add64(x[2], y[2], c) + tmp2 = sumLow + c = carry + } + // Limb 3 + { + const [sumLow, carry] = add64(x[3], y[3], c) + tmp3 = sumLow + c = carry + } + // Limb 4 + { + const [sumLow, carry] = add64(x[4], y[4], c) + tmp4 = sumLow + c = carry + } + // Limb 5 + { + const [sumLow, carry] = add64(x[5], y[5], c) + tmp5 = sumLow + c = carry + } + // Limb 6 + { + const [sumLow, carry] = add64(x[6], y[6], c) + tmp6 = sumLow + c = carry + } + // Limb 7 + { + const [sumLow, carry] = add64(x[7], y[7], c) + tmp7 = sumLow + c = carry + } + + // ------------------------------- + // Step 2: Subtract mod from tmp, capturing borrow in c1 + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + let out3 = 0n + let out4 = 0n + let out5 = 0n + let out6 = 0n + let out7 = 0n + + // Limb 0 + { + const [diff, borrow] = sub64(tmp0, mod[0], c1) + out0 = diff + c1 = borrow + } + // Limb 1 + { + const [diff, borrow] = sub64(tmp1, mod[1], c1) + out1 = diff + c1 = borrow + } + // Limb 2 + { + const [diff, borrow] = sub64(tmp2, mod[2], c1) + out2 = diff + c1 = borrow + } + // Limb 3 + { + const [diff, borrow] = sub64(tmp3, mod[3], c1) + out3 = diff + c1 = borrow + } + // Limb 4 + { + const [diff, borrow] = sub64(tmp4, mod[4], c1) + out4 = diff + c1 = borrow + } + // Limb 5 + { + const [diff, borrow] = sub64(tmp5, mod[5], c1) + out5 = diff + c1 = borrow + } + // Limb 6 + { + const [diff, borrow] = sub64(tmp6, mod[6], c1) + out6 = diff + c1 = borrow + } + // Limb 7 + { + const [diff, borrow] = sub64(tmp7, mod[7], c1) + out7 = diff + c1 = borrow + } + + // ------------------------------- + // Step 3: if c=0 (no add overflow) && c1!=0 (subtract borrowed), + // revert to tmp. Otherwise keep out. Mask each limb to 64 bits. + if (c === 0n && c1 !== 0n) { + // revert to raw sum + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + z[3] = tmp3 & MASK_64 + z[4] = tmp4 & MASK_64 + z[5] = tmp5 & MASK_64 + z[6] = tmp6 & MASK_64 + z[7] = tmp7 & MASK_64 + } else { + // keep sub + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + z[3] = out3 & MASK_64 + z[4] = out4 & MASK_64 + z[5] = out5 & MASK_64 + z[6] = out6 & MASK_64 + z[7] = out7 & MASK_64 + } +} // Adjust import path as needed + +/** + * addMod576: + * z = (x + y) mod mod + * + * Each of x, y, mod, and z has 9 limbs of 64 bits => [9] arrays (576 bits). + * This unrolled version adds x and y across 9 limbs, then subtracts mod, + * deciding which result to keep. + */ +function addMod576( + z: bigint[], // [9], final 576-bit result + x: bigint[], // [9], first addend + y: bigint[], // [9], second addend + mod: bigint[], // [9], the 576-bit modulus +): void { + const MASK_64 = (1n << 64n) - 1n + + // ------------------------------- + // Step 1: Add x + y across 9 limbs + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + let tmp3 = 0n + let tmp4 = 0n + let tmp5 = 0n + let tmp6 = 0n + let tmp7 = 0n + let tmp8 = 0n + + // Limb 0 + { + const [sumLow, carry] = add64(x[0], y[0], c) + tmp0 = sumLow + c = carry + } + // Limb 1 + { + const [sumLow, carry] = add64(x[1], y[1], c) + tmp1 = sumLow + c = carry + } + // Limb 2 + { + const [sumLow, carry] = add64(x[2], y[2], c) + tmp2 = sumLow + c = carry + } + // Limb 3 + { + const [sumLow, carry] = add64(x[3], y[3], c) + tmp3 = sumLow + c = carry + } + // Limb 4 + { + const [sumLow, carry] = add64(x[4], y[4], c) + tmp4 = sumLow + c = carry + } + // Limb 5 + { + const [sumLow, carry] = add64(x[5], y[5], c) + tmp5 = sumLow + c = carry + } + // Limb 6 + { + const [sumLow, carry] = add64(x[6], y[6], c) + tmp6 = sumLow + c = carry + } + // Limb 7 + { + const [sumLow, carry] = add64(x[7], y[7], c) + tmp7 = sumLow + c = carry + } + // Limb 8 + { + const [sumLow, carry] = add64(x[8], y[8], c) + tmp8 = sumLow + c = carry + } + + // ------------------------------- + // Step 2: Subtract mod from tmp, capturing borrow in c1 + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + let out3 = 0n + let out4 = 0n + let out5 = 0n + let out6 = 0n + let out7 = 0n + let out8 = 0n + + // Limb 0 + { + const [diff, borrow] = sub64(tmp0, mod[0], c1) + out0 = diff + c1 = borrow + } + // Limb 1 + { + const [diff, borrow] = sub64(tmp1, mod[1], c1) + out1 = diff + c1 = borrow + } + // Limb 2 + { + const [diff, borrow] = sub64(tmp2, mod[2], c1) + out2 = diff + c1 = borrow + } + // Limb 3 + { + const [diff, borrow] = sub64(tmp3, mod[3], c1) + out3 = diff + c1 = borrow + } + // Limb 4 + { + const [diff, borrow] = sub64(tmp4, mod[4], c1) + out4 = diff + c1 = borrow + } + // Limb 5 + { + const [diff, borrow] = sub64(tmp5, mod[5], c1) + out5 = diff + c1 = borrow + } + // Limb 6 + { + const [diff, borrow] = sub64(tmp6, mod[6], c1) + out6 = diff + c1 = borrow + } + // Limb 7 + { + const [diff, borrow] = sub64(tmp7, mod[7], c1) + out7 = diff + c1 = borrow + } + // Limb 8 + { + const [diff, borrow] = sub64(tmp8, mod[8], c1) + out8 = diff + c1 = borrow + } + + // ------------------------------- + // Step 3: If c == 0 (no add overflow) and c1 != 0 (sub borrowed), + // revert to raw sum, else keep sub. + // Then mask each limb to 64 bits. + if (c === 0n && c1 !== 0n) { + // revert to tmp + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + z[3] = tmp3 & MASK_64 + z[4] = tmp4 & MASK_64 + z[5] = tmp5 & MASK_64 + z[6] = tmp6 & MASK_64 + z[7] = tmp7 & MASK_64 + z[8] = tmp8 & MASK_64 + } else { + // keep sub + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + z[3] = out3 & MASK_64 + z[4] = out4 & MASK_64 + z[5] = out5 & MASK_64 + z[6] = out6 & MASK_64 + z[7] = out7 & MASK_64 + z[8] = out8 & MASK_64 + } +} // Adjust if needed + +/** + * addMod640: + * z = (x + y) mod mod + * + * x, y, mod, z are each arrays of length 10 (10 limbs * 64 bits = 640 bits). + * We unroll the addition and subtraction steps as in other addModXXX functions. + */ +function addMod640( + z: bigint[], // [10], final 640-bit result + x: bigint[], // [10], first addend + y: bigint[], // [10], second addend + mod: bigint[], // [10], the modulus +): void { + const MASK_64 = (1n << 64n) - 1n + + // ------------------------------- + // Step 1: Add x + y across 10 limbs + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + let tmp3 = 0n + let tmp4 = 0n + let tmp5 = 0n + let tmp6 = 0n + let tmp7 = 0n + let tmp8 = 0n + let tmp9 = 0n + + // Limb 0 + { + const [sumLow, carry] = add64(x[0], y[0], c) + tmp0 = sumLow + c = carry + } + // Limb 1 + { + const [sumLow, carry] = add64(x[1], y[1], c) + tmp1 = sumLow + c = carry + } + // Limb 2 + { + const [sumLow, carry] = add64(x[2], y[2], c) + tmp2 = sumLow + c = carry + } + // Limb 3 + { + const [sumLow, carry] = add64(x[3], y[3], c) + tmp3 = sumLow + c = carry + } + // Limb 4 + { + const [sumLow, carry] = add64(x[4], y[4], c) + tmp4 = sumLow + c = carry + } + // Limb 5 + { + const [sumLow, carry] = add64(x[5], y[5], c) + tmp5 = sumLow + c = carry + } + // Limb 6 + { + const [sumLow, carry] = add64(x[6], y[6], c) + tmp6 = sumLow + c = carry + } + // Limb 7 + { + const [sumLow, carry] = add64(x[7], y[7], c) + tmp7 = sumLow + c = carry + } + // Limb 8 + { + const [sumLow, carry] = add64(x[8], y[8], c) + tmp8 = sumLow + c = carry + } + // Limb 9 + { + const [sumLow, carry] = add64(x[9], y[9], c) + tmp9 = sumLow + c = carry + } + + // ------------------------------- + // Step 2: Subtract mod from tmp, capturing borrow in c1 + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + let out3 = 0n + let out4 = 0n + let out5 = 0n + let out6 = 0n + let out7 = 0n + let out8 = 0n + let out9 = 0n + + // Limb 0 + { + const [diff, borrow] = sub64(tmp0, mod[0], c1) + out0 = diff + c1 = borrow + } + // Limb 1 + { + const [diff, borrow] = sub64(tmp1, mod[1], c1) + out1 = diff + c1 = borrow + } + // Limb 2 + { + const [diff, borrow] = sub64(tmp2, mod[2], c1) + out2 = diff + c1 = borrow + } + // Limb 3 + { + const [diff, borrow] = sub64(tmp3, mod[3], c1) + out3 = diff + c1 = borrow + } + // Limb 4 + { + const [diff, borrow] = sub64(tmp4, mod[4], c1) + out4 = diff + c1 = borrow + } + // Limb 5 + { + const [diff, borrow] = sub64(tmp5, mod[5], c1) + out5 = diff + c1 = borrow + } + // Limb 6 + { + const [diff, borrow] = sub64(tmp6, mod[6], c1) + out6 = diff + c1 = borrow + } + // Limb 7 + { + const [diff, borrow] = sub64(tmp7, mod[7], c1) + out7 = diff + c1 = borrow + } + // Limb 8 + { + const [diff, borrow] = sub64(tmp8, mod[8], c1) + out8 = diff + c1 = borrow + } + // Limb 9 + { + const [diff, borrow] = sub64(tmp9, mod[9], c1) + out9 = diff + c1 = borrow + } + + // ------------------------------- + // Step 3: If c=0 (no add overflow) && c1!=0 (borrow), + // revert to raw sum; else keep sub. Then mask each limb. + if (c === 0n && c1 !== 0n) { + // revert to tmp + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + z[3] = tmp3 & MASK_64 + z[4] = tmp4 & MASK_64 + z[5] = tmp5 & MASK_64 + z[6] = tmp6 & MASK_64 + z[7] = tmp7 & MASK_64 + z[8] = tmp8 & MASK_64 + z[9] = tmp9 & MASK_64 + } else { + // keep sub + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + z[3] = out3 & MASK_64 + z[4] = out4 & MASK_64 + z[5] = out5 & MASK_64 + z[6] = out6 & MASK_64 + z[7] = out7 & MASK_64 + z[8] = out8 & MASK_64 + z[9] = out9 & MASK_64 + } +} // Adjust if needed + +/** + * addMod704: + * z = (x + y) mod mod + * + * x, y, mod, z each are arrays of length 11 (11 limbs * 64 bits = 704 bits). + * We unroll the addition and subtraction steps similarly to addMod64, etc. + */ +function addMod704( + z: bigint[], // [11], final 704-bit result + x: bigint[], // [11], first addend + y: bigint[], // [11], second addend + mod: bigint[], // [11], the 704-bit modulus +): void { + const MASK_64 = (1n << 64n) - 1n + + // ------------------------------- + // Step 1: Add x + y across 11 limbs + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + let tmp3 = 0n + let tmp4 = 0n + let tmp5 = 0n + let tmp6 = 0n + let tmp7 = 0n + let tmp8 = 0n + let tmp9 = 0n + let tmp10 = 0n + + // Limb 0 + { + const [sumLow, carry] = add64(x[0], y[0], c) + tmp0 = sumLow + c = carry + } + // Limb 1 + { + const [sumLow, carry] = add64(x[1], y[1], c) + tmp1 = sumLow + c = carry + } + // Limb 2 + { + const [sumLow, carry] = add64(x[2], y[2], c) + tmp2 = sumLow + c = carry + } + // Limb 3 + { + const [sumLow, carry] = add64(x[3], y[3], c) + tmp3 = sumLow + c = carry + } + // Limb 4 + { + const [sumLow, carry] = add64(x[4], y[4], c) + tmp4 = sumLow + c = carry + } + // Limb 5 + { + const [sumLow, carry] = add64(x[5], y[5], c) + tmp5 = sumLow + c = carry + } + // Limb 6 + { + const [sumLow, carry] = add64(x[6], y[6], c) + tmp6 = sumLow + c = carry + } + // Limb 7 + { + const [sumLow, carry] = add64(x[7], y[7], c) + tmp7 = sumLow + c = carry + } + // Limb 8 + { + const [sumLow, carry] = add64(x[8], y[8], c) + tmp8 = sumLow + c = carry + } + // Limb 9 + { + const [sumLow, carry] = add64(x[9], y[9], c) + tmp9 = sumLow + c = carry + } + // Limb 10 + { + const [sumLow, carry] = add64(x[10], y[10], c) + tmp10 = sumLow + c = carry + } + + // ------------------------------- + // Step 2: Subtract mod from tmp, capturing borrow in c1 + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + let out3 = 0n + let out4 = 0n + let out5 = 0n + let out6 = 0n + let out7 = 0n + let out8 = 0n + let out9 = 0n + let out10 = 0n + + // Limb 0 + { + const [diff, borrow] = sub64(tmp0, mod[0], c1) + out0 = diff + c1 = borrow + } + // Limb 1 + { + const [diff, borrow] = sub64(tmp1, mod[1], c1) + out1 = diff + c1 = borrow + } + // Limb 2 + { + const [diff, borrow] = sub64(tmp2, mod[2], c1) + out2 = diff + c1 = borrow + } + // Limb 3 + { + const [diff, borrow] = sub64(tmp3, mod[3], c1) + out3 = diff + c1 = borrow + } + // Limb 4 + { + const [diff, borrow] = sub64(tmp4, mod[4], c1) + out4 = diff + c1 = borrow + } + // Limb 5 + { + const [diff, borrow] = sub64(tmp5, mod[5], c1) + out5 = diff + c1 = borrow + } + // Limb 6 + { + const [diff, borrow] = sub64(tmp6, mod[6], c1) + out6 = diff + c1 = borrow + } + // Limb 7 + { + const [diff, borrow] = sub64(tmp7, mod[7], c1) + out7 = diff + c1 = borrow + } + // Limb 8 + { + const [diff, borrow] = sub64(tmp8, mod[8], c1) + out8 = diff + c1 = borrow + } + // Limb 9 + { + const [diff, borrow] = sub64(tmp9, mod[9], c1) + out9 = diff + c1 = borrow + } + // Limb 10 + { + const [diff, borrow] = sub64(tmp10, mod[10], c1) + out10 = diff + c1 = borrow + } + + // ------------------------------- + // Step 3: If c == 0 (no add overflow) && c1 != 0 (borrow), + // revert to raw sum, else keep sub. Then mask each limb. + if (c === 0n && c1 !== 0n) { + // revert to tmp + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + z[3] = tmp3 & MASK_64 + z[4] = tmp4 & MASK_64 + z[5] = tmp5 & MASK_64 + z[6] = tmp6 & MASK_64 + z[7] = tmp7 & MASK_64 + z[8] = tmp8 & MASK_64 + z[9] = tmp9 & MASK_64 + z[10] = tmp10 & MASK_64 + } else { + // keep sub + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + z[3] = out3 & MASK_64 + z[4] = out4 & MASK_64 + z[5] = out5 & MASK_64 + z[6] = out6 & MASK_64 + z[7] = out7 & MASK_64 + z[8] = out8 & MASK_64 + z[9] = out9 & MASK_64 + z[10] = out10 & MASK_64 + } +} // Adjust if needed + +/** + * addMod768: + * z = (x + y) mod mod + * + * Each of x, y, mod, z is 12-limb (768-bit): + * x[0..11], y[0..11], mod[0..11], z[0..11]. + * We unroll the addition and subtraction steps, capturing carry and borrow. + */ +function addMod768( + z: bigint[], // [12], final 768-bit result + x: bigint[], // [12], first addend + y: bigint[], // [12], second addend + mod: bigint[], // [12], the modulus +): void { + const MASK_64 = (1n << 64n) - 1n + + // ------------------------------- + // Step 1: Add x + y across 12 limbs + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + let tmp3 = 0n + let tmp4 = 0n + let tmp5 = 0n + let tmp6 = 0n + let tmp7 = 0n + let tmp8 = 0n + let tmp9 = 0n + let tmp10 = 0n + let tmp11 = 0n + + // Limb 0 + { + const [sumLow, carry] = add64(x[0], y[0], c) + tmp0 = sumLow + c = carry + } + // Limb 1 + { + const [sumLow, carry] = add64(x[1], y[1], c) + tmp1 = sumLow + c = carry + } + // Limb 2 + { + const [sumLow, carry] = add64(x[2], y[2], c) + tmp2 = sumLow + c = carry + } + // Limb 3 + { + const [sumLow, carry] = add64(x[3], y[3], c) + tmp3 = sumLow + c = carry + } + // Limb 4 + { + const [sumLow, carry] = add64(x[4], y[4], c) + tmp4 = sumLow + c = carry + } + // Limb 5 + { + const [sumLow, carry] = add64(x[5], y[5], c) + tmp5 = sumLow + c = carry + } + // Limb 6 + { + const [sumLow, carry] = add64(x[6], y[6], c) + tmp6 = sumLow + c = carry + } + // Limb 7 + { + const [sumLow, carry] = add64(x[7], y[7], c) + tmp7 = sumLow + c = carry + } + // Limb 8 + { + const [sumLow, carry] = add64(x[8], y[8], c) + tmp8 = sumLow + c = carry + } + // Limb 9 + { + const [sumLow, carry] = add64(x[9], y[9], c) + tmp9 = sumLow + c = carry + } + // Limb 10 + { + const [sumLow, carry] = add64(x[10], y[10], c) + tmp10 = sumLow + c = carry + } + // Limb 11 + { + const [sumLow, carry] = add64(x[11], y[11], c) + tmp11 = sumLow + c = carry + } + + // ------------------------------- + // Step 2: Subtract mod from tmp, capturing borrow in c1 + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + let out3 = 0n + let out4 = 0n + let out5 = 0n + let out6 = 0n + let out7 = 0n + let out8 = 0n + let out9 = 0n + let out10 = 0n + let out11 = 0n + + // Limb 0 + { + const [diff, borrow] = sub64(tmp0, mod[0], c1) + out0 = diff + c1 = borrow + } + // Limb 1 + { + const [diff, borrow] = sub64(tmp1, mod[1], c1) + out1 = diff + c1 = borrow + } + // Limb 2 + { + const [diff, borrow] = sub64(tmp2, mod[2], c1) + out2 = diff + c1 = borrow + } + // Limb 3 + { + const [diff, borrow] = sub64(tmp3, mod[3], c1) + out3 = diff + c1 = borrow + } + // Limb 4 + { + const [diff, borrow] = sub64(tmp4, mod[4], c1) + out4 = diff + c1 = borrow + } + // Limb 5 + { + const [diff, borrow] = sub64(tmp5, mod[5], c1) + out5 = diff + c1 = borrow + } + // Limb 6 + { + const [diff, borrow] = sub64(tmp6, mod[6], c1) + out6 = diff + c1 = borrow + } + // Limb 7 + { + const [diff, borrow] = sub64(tmp7, mod[7], c1) + out7 = diff + c1 = borrow + } + // Limb 8 + { + const [diff, borrow] = sub64(tmp8, mod[8], c1) + out8 = diff + c1 = borrow + } + // Limb 9 + { + const [diff, borrow] = sub64(tmp9, mod[9], c1) + out9 = diff + c1 = borrow + } + // Limb 10 + { + const [diff, borrow] = sub64(tmp10, mod[10], c1) + out10 = diff + c1 = borrow + } + // Limb 11 + { + const [diff, borrow] = sub64(tmp11, mod[11], c1) + out11 = diff + c1 = borrow + } + + // ------------------------------- + // Step 3: If c==0 (no add overflow) && c1!=0 (sub borrowed), + // revert to raw sum, else keep sub. Then mask each limb. + if (c === 0n && c1 !== 0n) { + // revert to tmp + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + z[3] = tmp3 & MASK_64 + z[4] = tmp4 & MASK_64 + z[5] = tmp5 & MASK_64 + z[6] = tmp6 & MASK_64 + z[7] = tmp7 & MASK_64 + z[8] = tmp8 & MASK_64 + z[9] = tmp9 & MASK_64 + z[10] = tmp10 & MASK_64 + z[11] = tmp11 & MASK_64 + } else { + // keep sub + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + z[3] = out3 & MASK_64 + z[4] = out4 & MASK_64 + z[5] = out5 & MASK_64 + z[6] = out6 & MASK_64 + z[7] = out7 & MASK_64 + z[8] = out8 & MASK_64 + z[9] = out9 & MASK_64 + z[10] = out10 & MASK_64 + z[11] = out11 & MASK_64 + } +} + +export const addModPreset: Function[] = [ + addMod64, + addMod128, + addMod192, + addMod256, + addMod320, + addMod384, + addMod448, + addMod512, + addMod576, + addMod640, + addMod704, + addMod768, +] diff --git a/packages/evm/src/evmmax/binaryArith.ts b/packages/evm/src/evmmax/binaryArith.ts new file mode 100644 index 00000000000..83ae83ed3eb --- /dev/null +++ b/packages/evm/src/evmmax/binaryArith.ts @@ -0,0 +1,36 @@ +import { bigIntToBEBytes, limbsToInt, placeBEBytesInOutput } from './index.js' + +// binary arithmetic preset functions + +export function mulModBinary(z: bigint[], x: bigint[], y: bigint[], modulus: bigint[]) { + const X = limbsToInt(x) + const Y = limbsToInt(y) + const M = limbsToInt(modulus) + + const result = (X * Y) % M + const resultBytes = bigIntToBEBytes(result) + placeBEBytesInOutput(z, resultBytes) +} + +export function addModBinary(z: bigint[], x: bigint[], y: bigint[], modulus: bigint[]) { + const X = limbsToInt(x) + const Y = limbsToInt(y) + const M = limbsToInt(modulus) + + const result = (X + Y) % M + const resultBytes = bigIntToBEBytes(result) + placeBEBytesInOutput(z, resultBytes) +} + +export function subModBinary(z: bigint[], x: bigint[], y: bigint[], modulus: bigint[]) { + const X = limbsToInt(x) + const Y = limbsToInt(y) + const M = limbsToInt(modulus) + + let result = (X - Y) % M + if (result < 0n) { + result += M + } + const resultBytes = bigIntToBEBytes(result) + placeBEBytesInOutput(z, resultBytes) +} diff --git a/packages/evm/src/evmmax/constants.ts b/packages/evm/src/evmmax/constants.ts new file mode 100644 index 00000000000..54988d790ba --- /dev/null +++ b/packages/evm/src/evmmax/constants.ts @@ -0,0 +1,7 @@ +export const SETMODX_BASE_COST = 1 +export const STOREX_BASE_COST = 1 +export const LOADX_BASE_COST = 1 +export const MAX_ALLOC_SIZE = 96 * 256 +export const SETMODX_ODD_MODULUS_COST = [23, 26, 29, 32, 36, 39, 42, 45, 48, 51, 54, 58] +export const MULMODX_COST = [1, 1, 1, 2, 2, 3, 4, 5, 7, 8, 10, 12] +export const ADD_OR_SUB_COST = [1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2] diff --git a/packages/evm/src/evmmax/fieldAllocations.ts b/packages/evm/src/evmmax/fieldAllocations.ts new file mode 100644 index 00000000000..6ee0f11bfc5 --- /dev/null +++ b/packages/evm/src/evmmax/fieldAllocations.ts @@ -0,0 +1,60 @@ +import { FieldContext } from './index.js' + +export class FieldAllocs { + private alloced: Map + private active: FieldContext | null + private allocedSize: bigint + + constructor() { + this.alloced = new Map() + this.active = null + this.allocedSize = BigInt(0) + } + + /** + * AllocAndSetActive takes an id (number between 0 and 255 inclusive), + * a big-endian modulus, and the number of field elements to allocate. + * Each field element occupies memory equivalent to the size of the modulus + * padded to the nearest multiple of 8 bytes. + */ + async allocAndSetActive(id: number, modulus: Uint8Array, allocCount: bigint): Promise { + if (id < 0 || id > 255) { + throw new Error('id must be between 0 and 255 inclusive') + } + + const fieldContext = new FieldContext(modulus, allocCount) + this.alloced.set(id, fieldContext) + this.active = fieldContext + this.allocedSize += BigInt(fieldContext.getAllocatedSize()) + } + + /** + * AllocSize returns the amount of EVMMAX-allocated memory (in bytes) + * in the current EVM call context. + */ + allocSize(): bigint { + return this.allocedSize + } + + /** + * SetActive sets a modulus as active in the current EVM call context. + * The modulus associated with id is assumed to have already been instantiated + * by a previous call to AllocAndSetActive. + */ + setActive(id: number): void { + const fieldContext = this.alloced.get(id) + if (!fieldContext) { + throw new Error(`FieldContext with id ${id} not found`) + } + this.active = fieldContext + } + + getActive(): FieldContext { + if (this.active == null) throw new Error('No active mod set') + return this.active + } + + getAlloced(): Map { + return this.alloced + } +} diff --git a/packages/evm/src/evmmax/fieldContext.ts b/packages/evm/src/evmmax/fieldContext.ts new file mode 100644 index 00000000000..6790adf1c93 --- /dev/null +++ b/packages/evm/src/evmmax/fieldContext.ts @@ -0,0 +1,317 @@ +import { BIGINT_8, bigIntToBytes, bytesToBigInt, concatBytes } from '@ethereumjs/util' + +import { + addModBinary, + addModPreset, + bytesToLimbs, + lt, + mulModBinary, + mulModPreset, + negModInverse, + putUint64BE, + subModBinary, + subModPreset, +} from './index.js' + +const MAX_MODULUS_SIZE = 96 // 768 bit max modulus width +const ZERO_BYTE = Uint8Array.from([0]) + +function isModulusBinary(modulus: bigint): boolean { + return modulus > 0n && (modulus & (modulus - 1n)) === 0n +} + +export class FieldContext { + public modulus: bigint[] + public r2: bigint[] | undefined + public modInvVal: bigint | undefined + + public useMontgomeryRepr: boolean + public isModulusBinary: boolean + + public scratchSpace: bigint[] + public addSubCost: bigint | undefined + public mulCost: bigint | undefined + + public addMod: Function + public subMod: Function + public mulMod: Function + + public one: bigint[] | undefined + public modulusInt: bigint + public elemSize: bigint + public scratchSpaceElemCount: bigint + public outputWriteBuf: bigint[] | undefined + + constructor(modBytes: Uint8Array, scratchSize: bigint) { + if (modBytes.length > MAX_MODULUS_SIZE) { + throw new Error('modulus cannot be greater than 768 bits') + } + if (modBytes.length === 0) { + throw new Error('modulus must be non-empty') + } + if (modBytes.subarray(0, 2) === ZERO_BYTE) { + throw new Error('most significant byte of modulus must not be zero') + } + if (scratchSize === 0n) { + throw new Error('scratch space must have non-zero size') + } + if (scratchSize > 256n) { + throw new Error('scratch space can allocate a maximum of 256 field elements') + } + + const mod = bytesToBigInt(modBytes) + const paddedSize = BigInt(Math.ceil(modBytes.length / 8) * 8) // Compute paddedSize as the next multiple of 8 bytes + + if (isModulusBinary(mod)) { + this.modulus = bytesToLimbs(modBytes) + this.mulMod = mulModBinary + this.addMod = addModBinary + this.subMod = subModBinary + this.scratchSpace = new Array(Number((paddedSize / BIGINT_8) * scratchSize)).fill(0n) + this.outputWriteBuf = new Array(this.scratchSpace.length).fill(0n) + this.scratchSpaceElemCount = BigInt(scratchSize) + this.modulusInt = mod + this.elemSize = paddedSize / 8n + this.useMontgomeryRepr = false + this.isModulusBinary = true + + return + } + + if (modBytes.at(-1)! % 2 === 0) { + throw new Error('modulus cannot be even') + } + + const negModInv = negModInverse(mod) + const paddedSizeBig = BigInt(paddedSize) + const shiftAmount = paddedSizeBig * 16n + const r2 = (1n << shiftAmount) % mod + + let r2Bytes = bigIntToBytes(r2) + if (modBytes.length < paddedSize) { + modBytes = concatBytes(new Uint8Array(Number(paddedSize) - modBytes.length), modBytes) + } + if (r2Bytes.length < paddedSize) { + r2Bytes = concatBytes(new Uint8Array(Number(paddedSize) - r2Bytes.length), r2Bytes) + } + + const one = new Array(Number(paddedSize / BIGINT_8)).fill(0n) + one[0] = 1n + this.modulus = bytesToLimbs(modBytes) + this.modInvVal = negModInv + this.r2 = bytesToLimbs(r2Bytes) + this.mulMod = mulModPreset[Number(paddedSize / 8n - 1n)] + this.addMod = addModPreset[Number(paddedSize / 8n - 1n)] + this.subMod = subModPreset[Number(paddedSize / 8n - 1n)] + this.scratchSpace = new Array(Number((paddedSize / BIGINT_8) * scratchSize)).fill(0n) + this.outputWriteBuf = new Array(this.scratchSpace.length).fill(0n) // TODO just globally define outputwritebuf like golang implementation + this.scratchSpaceElemCount = BigInt(scratchSize) + this.one = one + this.modulusInt = mod + this.elemSize = paddedSize / 8n + this.useMontgomeryRepr = true + this.isModulusBinary = false + } + + getAllocatedSize(): number { + return this.scratchSpace.length * 8 + } + + getElemSize(): number { + return Number(this.elemSize) + } + + getNumElems(): bigint { + return this.scratchSpaceElemCount + } + + store(dst: number, count: number, from: Uint8Array) { + const elemSize = this.modulus.length + + for (let i = 0; i < count; i++) { + const srcIdx = i * elemSize * 8 + const dstIdx = dst * elemSize + i * elemSize + const srcBytes = from.slice(srcIdx, srcIdx + elemSize * 8) + const val = bytesToLimbs(srcBytes) + if (!lt(val, this.modulus)) throw new Error(`value being stored must be less than modulus`) + if (this.useMontgomeryRepr) { + const tmp = this.scratchSpace.slice(dstIdx + elemSize) + this.mulMod(tmp, val, this.r2, this.modulus, this.modInvVal) + for (let i = 0; i < elemSize; i++) { + this.scratchSpace[dstIdx + i] = tmp[i] + } + } else { + for (let i = 0; i < elemSize; i++) { + this.scratchSpace[dstIdx + i] = val[i] + } + } + } + } + + /** + * Load 'count' field elements from this.scratchSpace (starting at index 'from') + * into the provided 'dst' Uint8Array. + */ + public load(dst: Uint8Array, from: number, count: number): void { + const elemSize = this.modulus.length + let dstIdx = 0 + + for (let srcIdx = from; srcIdx < from + count; srcIdx++) { + // temp array to hold limbs + const res = new Array(elemSize) + + if (this.useMontgomeryRepr) { + this.mulMod( + res, + this.scratchSpace.slice(srcIdx * elemSize, (srcIdx + 1) * elemSize), + this.one, + this.modulus, + this.modInvVal, + ) + } else { + // Directly copy from scratchSpace + const slice = this.scratchSpace.slice(srcIdx * elemSize, (srcIdx + 1) * elemSize) + for (let i = 0; i < elemSize; i++) { + res[i] = slice[i] + } + } + + // Write res[] into 'dst' + for (let i = 0; i < elemSize; i++) { + const limb = res[elemSize - 1 - i] + putUint64BE(dst, dstIdx + i * 8, limb) + } + dstIdx += elemSize * 8 + } + } + + /** + * MulMod computes 'count' modular multiplications, pairwise multiplying values + * from offsets [x, x+xStride, x+xStride*2, ..., x+xStride*(count - 1)] + * and [y, y+yStride, y+yStride*2, ..., y+yStride*(count - 1)] + * placing the result in [out, out+outStride, out+outStride*2, ..., out+outStride*(count - 1)]. + */ + public mulM( + outIndex: number, + outStride: number, + x: number, + xStride: number, + y: number, + yStride: number, + count: number, + ): void { + const elemSize = this.modulus.length + + // perform the multiplications, writing into outputWriteBuf + for (let i = 0; i < count; i++) { + const xSrc = (x + i * xStride) * elemSize + const ySrc = (y + i * yStride) * elemSize + const dst = (outIndex + i * outStride) * elemSize + + const xSlice = this.scratchSpace.slice(xSrc, xSrc + elemSize) + const ySlice = this.scratchSpace.slice(ySrc, ySrc + elemSize) + + const outSlice = this.outputWriteBuf!.slice(dst, dst + elemSize) + + this.mulMod(outSlice, xSlice, ySlice, this.modulus, this.modInvVal) + + for (let j = 0; j < elemSize; j++) { + this.outputWriteBuf![dst + j] = outSlice[j] + } + } + + // copy the result from outputWriteBuf into scratchSpace + for (let i = 0; i < count; i++) { + const offset = (outIndex + i * outStride) * elemSize + for (let j = 0; j < elemSize; j++) { + this.scratchSpace[offset + j] = this.outputWriteBuf![offset + j] + } + } + } + + /** + * SubMod computes 'count' modular subtractions, pairwise subtracting values + * at offsets [x, x+xStride, ..., x+xStride*(count - 1)] and + * [y, y+yStride, ..., y+yStride*(count - 1)] + * placing the result in [out, out+outStride, ...]. + */ + public subM( + outIndex: number, + outStride: number, + x: number, + xStride: number, + y: number, + yStride: number, + count: number, + ): void { + const elemSize = this.modulus.length + + // perform the subtractions into outputWriteBuf + for (let i = 0; i < count; i++) { + const xSrc = (x + i * xStride) * elemSize + const ySrc = (y + i * yStride) * elemSize + const dst = (outIndex + i * outStride) * elemSize + + const xSlice = this.scratchSpace.slice(xSrc, xSrc + elemSize) + const ySlice = this.scratchSpace.slice(ySrc, ySrc + elemSize) + const outSlice = this.outputWriteBuf!.slice(dst, dst + elemSize) + + this.subMod(outSlice, xSlice, ySlice, this.modulus) + + for (let j = 0; j < elemSize; j++) { + this.outputWriteBuf![dst + j] = outSlice[j] + } + } + + // copy from outputWriteBuf into scratchSpace + for (let i = 0; i < count; i++) { + const offset = (outIndex + i * outStride) * elemSize + for (let j = 0; j < elemSize; j++) { + this.scratchSpace[offset + j] = this.outputWriteBuf![offset + j] + } + } + } + + /** + * AddMod computes 'count' modular additions, pairwise adding values + * at offsets [x, x+xStride, ..., x+xStride*(count - 1)] and + * [y, y+yStride, ..., y+yStride*(count - 1)] + * placing the result in [out, out+outStride, ...]. + */ + public addM( + outIndex: number, + outStride: number, + x: number, + xStride: number, + y: number, + yStride: number, + count: number, + ): void { + const elemSize = Number(this.elemSize) + + // perform the additions, writing to outputWriteBuf + for (let i = 0; i < count; i++) { + const xSrc = (x + i * xStride) * elemSize + const ySrc = (y + i * yStride) * elemSize + const dst = (outIndex + i * outStride) * elemSize + + const xSlice = this.scratchSpace.slice(xSrc, xSrc + elemSize) + const ySlice = this.scratchSpace.slice(ySrc, ySrc + elemSize) + const outSlice = this.outputWriteBuf!.slice(dst, dst + elemSize) + + this.addMod(outSlice, xSlice, ySlice, this.modulus) + + for (let j = 0; j < elemSize; j++) { + this.outputWriteBuf![dst + j] = outSlice[j] + } + } + + // copy from outputWriteBuf into scratchSpace + for (let i = 0; i < count; i++) { + const offset = (outIndex + i * outStride) * elemSize + for (let j = 0; j < elemSize; j++) { + this.scratchSpace[offset + j] = this.outputWriteBuf![offset + j] + } + } + } +} diff --git a/packages/evm/src/evmmax/index.ts b/packages/evm/src/evmmax/index.ts new file mode 100644 index 00000000000..09b37d81773 --- /dev/null +++ b/packages/evm/src/evmmax/index.ts @@ -0,0 +1,8 @@ +export * from './addMod.js' +export * from './binaryArith.js' +export * from './fieldAllocations.js' +export * from './fieldContext.js' +export * from './montMulMod.js' +export * from './subMulMod.js' +export * from './util.js' +export * from './constants.js' diff --git a/packages/evm/src/evmmax/montMulMod.ts b/packages/evm/src/evmmax/montMulMod.ts new file mode 100644 index 00000000000..e120a5cf45b --- /dev/null +++ b/packages/evm/src/evmmax/montMulMod.ts @@ -0,0 +1,3029 @@ +import { MASK_64, add64, madd0, madd1, madd2, mul64, sub64 } from './index.js' + +function montMulMod64( + z: bigint[], + x: bigint[], + y: bigint[], + modulus: bigint[], + modInv: bigint, +): void { + const x0 = x[0] & MASK_64 + const y0 = y[0] & MASK_64 + const m0 = modulus[0] & MASK_64 + const inv = modInv & MASK_64 + + let t0 = 0n + let t1 = 0n + + let D = 0n + let C = 0n + let m = 0n + + let res = 0n + + { + const [carryMul, lowMul] = mul64(x0, y0) + C = carryMul + t0 = lowMul + } + + { + const [sumLow, carryOut] = add64(t1, C, 0n) + t1 = sumLow + D = carryOut + } + + m = (t0 * inv) & MASK_64 + { + const C = madd0(m, m0, t0) + { + const [sumLow, carryOut] = add64(t1, C, 0n) + t0 = sumLow + var newC = carryOut + } + + { + const [sumLow2, carryOut2] = add64(0n, D, newC) + t1 = sumLow2 + } + } + + { + const [diff, borrow] = sub64(t0, m0, 0n) + res = diff + D = borrow + } + + let src: bigint + if (D !== 0n && t1 === 0n) { + src = t0 + } else { + src = res + } + + z[0] = src & MASK_64 +} + +function montMulMod128( + out: bigint[], + x: bigint[], + y: bigint[], + mod: bigint[], + modInv: bigint, +): void { + const t = [0n, 0n, 0n] + let D = 0n + let m = 0n + let C = 0n + + const res = [0n, 0n] + + { + { + const [hi0, lo0] = mul64(x[0], y[0]) + C = hi0 + t[0] = lo0 + + const [hi1, lo1] = madd1(x[0], y[1], C) + C = hi1 + t[1] = lo1 + + const [temp2, tempD] = add64(t[2], C, 0n) + t[2] = temp2 + D = tempD + } + + m = (t[0] * modInv) & MASK_64 + + { + C = madd0(m, mod[0], t[0]) + + const [c2, t0_] = madd2(m, mod[1], t[1], C) + C = c2 + t[0] = t0_ + + const [t1_, c3] = add64(t[2], C, 0n) + t[1] = t1_ + C = c3 + + const [t2_, _dummy] = add64(0n, D, C) + t[2] = t2_ + } + } + + { + { + const [c4, t0_] = madd1(x[1], y[0], t[0]) + C = c4 + t[0] = t0_ + + const [c5, t1_] = madd2(x[1], y[1], t[1], C) + C = c5 + t[1] = t1_ + + const [t2_, d_] = add64(t[2], C, 0n) + t[2] = t2_ + D = d_ + } + + m = (t[0] * modInv) & MASK_64 + + { + C = madd0(m, mod[0], t[0]) + + const [c6, t0_] = madd2(m, mod[1], t[1], C) + C = c6 + t[0] = t0_ + + const [t1_, c7] = add64(t[2], C, 0n) + t[1] = t1_ + C = c7 + + const [t2_, _dummy2] = add64(0n, D, C) + t[2] = t2_ + } + } + + { + const [r0, d0] = sub64(t[0], mod[0], 0n) + res[0] = r0 + let D_ = d0 + + const [r1, d1] = sub64(t[1], mod[1], D_) + res[1] = r1 + D_ = d1 + + if (D_ !== 0n && t[2] === 0n) { + out[0] = t[0] & MASK_64 + out[1] = t[1] & MASK_64 + } else { + out[0] = res[0] & MASK_64 + out[1] = res[1] & MASK_64 + } + } +} + +function montMulMod192( + out: bigint[], // [3], final result + x: bigint[], // [3], input x + y: bigint[], // [3], input y + mod: bigint[], // [3], the modulus + modInv: bigint, // single-limb "magic factor" +): void { + // We'll keep partial results in t[0..3]. + // t has 4 limbs to handle overflow from 3-limb multiplication. + const t = [0n, 0n, 0n, 0n] + let D = 0n + let m = 0n + let C = 0n + const res = [0n, 0n, 0n] + + // -------------------------------------------- + // First outer block (j = 0) + { + // 1) t <- x[0] * y[..] + { + // C, t[0] = bits.Mul64(x[0], y[0]) + const [hi0, lo0] = mul64(x[0], y[0]) + C = hi0 + t[0] = lo0 + + // C, t[1] = madd1(x[0], y[1], C) + const [hi1, lo1] = madd1(x[0], y[1], C) + C = hi1 + t[1] = lo1 + + // C, t[2] = madd1(x[0], y[2], C) + const [hi2, lo2] = madd1(x[0], y[2], C) + C = hi2 + { + const [t2_, d_] = add64(t[2], lo2, 0n) + t[2] = t2_ + D = d_ + } + + // t[3], D = bits.Add64(t[3], C, 0) + { + const [t3_, d2] = add64(t[3], C, 0n) + t[3] = t3_ + D += d2 + } + } + + // 2) m = t[0]*modInv mod 2^64 + m = (t[0] * modInv) & MASK_64 + + console.log('dbg101') + console.log(`t ${t}`) + console.log(`C ${C}`) + console.log(`D ${D}`) + console.log(`m ${m}`) + + // 3) reduce 1 limb at a time + { + // C = madd0(m, mod[0], t[0]) + C = madd0(m, mod[0], t[0]) + + // C, t[0] = madd2(m, mod[1], t[1], C) + { + const [c2, t0_] = madd2(m, mod[1], t[1], C) + C = c2 + t[0] = t0_ + } + + // C, t[1] = madd2(m, mod[2], t[2], C) + { + const [c3, t1_] = madd2(m, mod[2], t[2], C) + C = c3 + t[1] = t1_ + } + + // t[2], C = bits.Add64(t[3], C, 0) + { + const [t2_, c4] = add64(t[3], C, 0n) + t[2] = t2_ + C = c4 + } + // t[3], _ = bits.Add64(0, D, C) + { + const [t3_, d_] = add64(0n, D, C) + t[3] = t3_ + } + } + } + + console.log('dbg102') + console.log(`t ${t}`) + console.log(`C ${C}`) + console.log(`D ${D}`) + console.log(`m ${m}`) + + // -------------------------------------------- + // Next outer blocks for j=1..2 + for (let j = 1; j < 3; j++) { + // 1) partial multiply x[j] * y => accumulate in t + { + // C, t[0] = madd1(x[j], y[0], t[0]) + const [c4, t0_] = madd1(x[j], y[0], t[0]) + C = c4 + t[0] = t0_ + + // C, t[1] = madd2(x[j], y[1], t[1], C) + const [c5, t1_] = madd2(x[j], y[1], t[1], C) + C = c5 + t[1] = t1_ + + // C, t[2] = madd2(x[j], y[2], t[2], C) + const [c6, t2_] = madd2(x[j], y[2], t[2], C) + C = c6 + t[2] = t2_ + + // t[3], D = bits.Add64(t[3], C, 0) + { + const [t3_, d_] = add64(t[3], C, 0n) + t[3] = t3_ + D = d_ + } + } + + // 2) m = t[0]*modInv (mod 2^64) + m = (t[0] * modInv) & MASK_64 + + // 3) reduce one limb at a time + { + // C = madd0(m, mod[0], t[0]) + C = madd0(m, mod[0], t[0]) + + // C, t[0] = madd2(m, mod[1], t[1], C) + { + const [c7, t0_] = madd2(m, mod[1], t[1], C) + C = c7 + t[0] = t0_ + } + + // C, t[1] = madd2(m, mod[2], t[2], C) + { + const [c7, t1_] = madd2(m, mod[2], t[2], C) + C = c7 + t[1] = t1_ + } + + // t[2], C = bits.Add64(t[2], C, 0) + { + const [t2_, c9] = add64(t[3], C, 0n) + t[2] = t2_ + C = c9 + } + + // t[3], _ = bits.Add64(0, D, C) + { + const [t3_, d_] = add64(0n, D, C) + t[3] = t3_ + } + } + } + + console.log('dbg103') + console.log(`t ${t}`) + console.log(`C ${C}`) + console.log(`D ${D}`) + console.log(`m ${m}`) + + // -------------------------------------------- + // Final subtract => res = t[0..2] - mod[0..2]. + // If that borrow != 0 and t[3] == 0 => revert to t, else keep res + { + let d_ = 0n + { + const [r0, b0] = sub64(t[0], mod[0], 0n) + res[0] = r0 + d_ = b0 + } + { + const [r1, b1] = sub64(t[1], mod[1], d_) + res[1] = r1 + d_ = b1 + } + { + const [r2, b2] = sub64(t[2], mod[2], d_) + res[2] = r2 + d_ = b2 + } + + if (d_ !== 0n && t[3] === 0n) { + out[0] = t[0] & MASK_64 + out[1] = t[1] & MASK_64 + out[2] = t[2] & MASK_64 + } else { + out[0] = res[0] & MASK_64 + out[1] = res[1] & MASK_64 + out[2] = res[2] & MASK_64 + } + } + + console.log('dbg104') + console.log(`out ${out}`) +} + +function montMulMod256( + out: bigint[], // [4] => final 256-bit result + x: bigint[], // [4] => input x + y: bigint[], // [4] => input y + mod: bigint[], // [4] => the modulus + modInv: bigint, // single-limb "magic factor" => -mod^-1 mod 2^64 +): void { + // t has 5 limbs => partial sums from 4-limb multiply can overflow into a 5th limb. + const t = [0n, 0n, 0n, 0n, 0n] + let D = 0n + let m = 0n + let C = 0n + // store the final 4-limb subtract result in res + const res = [0n, 0n, 0n, 0n] + + // ------------------------------- + // 1) First outer loop (unrolled) => j=0 + { + // partial multiply x[0]*y[0..3], store in t + + // C, t[0] = bits.Mul64(x[0], y[0]) + { + const [hi0, lo0] = mul64(x[0], y[0]) + C = hi0 + t[0] = lo0 + } + // C, t[1] = madd1(x[0], y[1], C) + { + const [hi1, lo1] = madd1(x[0], y[1], C) + C = hi1 + t[1] = lo1 + } + // C, t[2] = madd1(x[0], y[2], C) + { + const [hi2, lo2] = madd1(x[0], y[2], C) + C = hi2 + t[2] = lo2 + } + // C, t[3] = madd1(x[0], y[3], C) + { + const [hi3, lo3] = madd1(x[0], y[3], C) + C = hi3 + t[3] = lo3 + } + + // t[4], D = bits.Add64(t[4], C, 0) + { + const [t4_, d_] = add64(t[4], C, 0n) + t[4] = t4_ + D = d_ + } + + // m = t[0]*modInv mod 2^64 + m = (t[0] * modInv) & MASK_64 + + // reduce => 1 limb at a time + { + // C = madd0(m, mod[0], t[0]) + C = madd0(m, mod[0], t[0]) + + // C, t[0] = madd2(m, mod[1], t[1], C) + { + const [c2, t0_] = madd2(m, mod[1], t[1], C) + C = c2 + t[0] = t0_ + } + + // C, t[1] = madd2(m, mod[2], t[2], C) + { + const [c3, t2_] = madd2(m, mod[2], t[2], C) + C = c3 + t[1] = t2_ + } + + // C, t[2] = madd2(m, mod[3], t[3], C) + { + const [c4, t3_] = madd2(m, mod[3], t[3], C) + C = c4 + t[2] = t3_ + } + + // t[3], C = bits.Add64(t[4], C, 0) + { + const [t4_, c5] = add64(t[4], C, 0n) + t[3] = t4_ + C = c5 + } + + // t[4], _ = bits.Add64(0, D, C) + { + const [t4_, carryOut] = add64(0n, D, C) + t[4] = t4_ + // carryOut is not stored => single-limb leftover + } + } + } + + // ------------------------------- + // 2) For j=1..3 + for (let j = 1; j < 4; j++) { + // partial multiply x[j] * y[0..3], plus t + + // C, t[0] = madd1(x[j], y[0], t[0]) + { + const [c4, t0_] = madd1(x[j], y[0], t[0]) + C = c4 + t[0] = t0_ + } + // C, t[1] = madd2(x[j], y[1], t[1], C) + { + const [c5, t1_] = madd2(x[j], y[1], t[1], C) + C = c5 + t[1] = t1_ + } + // C, t[2] = madd2(x[j], y[2], t[2], C) + { + const [c6, t2_] = madd2(x[j], y[2], t[2], C) + C = c6 + t[2] = t2_ + } + // C, t[3] = madd2(x[j], y[3], t[3], C) + { + const [c7, t3_] = madd2(x[j], y[3], t[3], C) + C = c7 + t[3] = t3_ + } + + // t[4], D = bits.Add64(t[4], C, 0) + { + const [t4_, d_] = add64(t[4], C, 0n) + t[4] = t4_ + D = d_ + } + + // m = t[0]*modInv mod 2^64 + m = (t[0] * modInv) & MASK_64 + + // reduce => 1 limb at a time + { + // C = madd0(m, mod[0], t[0]) + C = madd0(m, mod[0], t[0]) + + // C, t[0] = madd2(m, mod[1], t[1], C) + { + const [c8, t0_] = madd2(m, mod[1], t[1], C) + C = c8 + t[0] = t0_ + } + + // C, t[1] = madd2(m, mod[2], t[2], C) + { + const [c9, t2_] = madd2(m, mod[2], t[2], C) + C = c9 + t[1] = t2_ + } + + // C, t[2] = madd2(m, mod[3], t[3], C) + { + const [c10, t3_] = madd2(m, mod[3], t[3], C) + C = c10 + t[2] = t3_ + } + + // t[3], C = bits.Add64(t[4], C, 0) + { + const [t4_, c11] = add64(t[4], C, 0n) + t[3] = t4_ + C = c11 + } + + // t[4], _ = bits.Add64(0, D, C) + { + const [t4_, carryOut] = add64(0n, D, C) + t[4] = t4_ + // carryOut not stored + } + } + } + + // ------------------------------- + // Final subtract => res = t[0..3] - mod[0..3]. + // If borrow != 0 && t[4] == 0 => revert to t, else keep res + { + let d_ = 0n + + { + const [r0, b0] = sub64(t[0], mod[0], 0n) + res[0] = r0 + d_ = b0 + } + { + const [r1, b1] = sub64(t[1], mod[1], d_) + res[1] = r1 + d_ = b1 + } + { + const [r2, b2] = sub64(t[2], mod[2], d_) + res[2] = r2 + d_ = b2 + } + { + const [r3, b3] = sub64(t[3], mod[3], d_) + res[3] = r3 + d_ = b3 + } + + if (d_ !== 0n && t[4] === 0n) { + out[0] = t[0] & MASK_64 + out[1] = t[1] & MASK_64 + out[2] = t[2] & MASK_64 + out[3] = t[3] & MASK_64 + } else { + out[0] = res[0] & MASK_64 + out[1] = res[1] & MASK_64 + out[2] = res[2] & MASK_64 + out[3] = res[3] & MASK_64 + } + } +} + +function montMulMod320( + out: bigint[], // [5], final 320-bit result + x: bigint[], // [5], input x + y: bigint[], // [5], input y + mod: bigint[], // [5], the modulus + modInv: bigint, // single-limb "magic factor" => -mod^-1 mod 2^64 +): void { + // t => partial sums, 6 limbs to handle overflow + const t = [0n, 0n, 0n, 0n, 0n, 0n] + let D = 0n + let C = 0n + let m = 0n + + // final subtract result + const res = [0n, 0n, 0n, 0n, 0n] + + //------------------------------- + // 1) "First outer loop" => j=0 + { + // multiply x[0] * y[0..4], accumulate in t + + // C, t[0] = bits.Mul64(x[0], y[0]) + { + const [hi0, lo0] = mul64(x[0], y[0]) + C = hi0 + t[0] = lo0 + } + // C, t[1] = madd1(x[0], y[1], C) + { + const [hi1, lo1] = madd1(x[0], y[1], C) + C = hi1 + t[1] = lo1 + } + // C, t[2] = madd1(x[0], y[2], C) + { + const [hi2, lo2] = madd1(x[0], y[2], C) + C = hi2 + t[2] = lo2 + } + // C, t[3] = madd1(x[0], y[3], C) + { + const [hi3, lo3] = madd1(x[0], y[3], C) + C = hi3 + t[3] = lo3 + } + // C, t[4] = madd1(x[0], y[4], C) + { + const [hi4, lo4] = madd1(x[0], y[4], C) + C = hi4 + t[4] = lo4 + } + + // t[5], D = bits.Add64(t[5], C, 0) + { + const [t5_, d2] = add64(t[5], C, 0n) + D = d2 + t[5] = t5_ + } + + // m = t[0] * modInv mod 2^64 + m = (t[0] * modInv) & MASK_64 + + // reduce => 1 limb at a time + { + // C = madd0(m, mod[0], t[0]) + C = madd0(m, mod[0], t[0]) + + // next: c, t[0] = madd2(m, mod[1], t[1], C) + { + const [c2, t1_] = madd2(m, mod[1], t[1], C) + C = c2 + t[0] = t1_ + } + { + const [c3, t2_] = madd2(m, mod[2], t[2], C) + C = c3 + t[1] = t2_ + } + { + const [c4, t3_] = madd2(m, mod[3], t[3], C) + C = c4 + t[2] = t3_ + } + { + const [c5, t4_] = madd2(m, mod[4], t[4], C) + C = c5 + t[3] = t4_ + } + + // t[4], C = bits.Add64(t[5], C, 0) + { + const [t5_, c6] = add64(t[5], C, 0n) + t[4] = t5_ + C = c6 + } + // t[5], _ = bits.Add64(0, D, C) + { + const [t5_, leftover] = add64(0n, D, C) + t[5] = t5_ + // leftover is single-limb carry, not stored + } + } + } + + //------------------------------- + // 2) For j=1..4 + for (let j = 1; j < 5; j++) { + // partial multiply x[j] * y[0..4], incorporate in t + + // C, t[0] = madd1(x[j], y[0], t[0]) + { + const [c4, t0_] = madd1(x[j], y[0], t[0]) + C = c4 + t[0] = t0_ + } + { + const [c5, t1_] = madd2(x[j], y[1], t[1], C) + C = c5 + t[1] = t1_ + } + { + const [c6, t2_] = madd2(x[j], y[2], t[2], C) + C = c6 + t[2] = t2_ + } + { + const [c7, t3_] = madd2(x[j], y[3], t[3], C) + C = c7 + t[3] = t3_ + } + { + const [c8, t4_] = madd2(x[j], y[4], t[4], C) + C = c8 + t[4] = t4_ + } + + // t[5], D = bits.Add64(t[5], C, 0) + { + const [t5_, d_] = add64(t[5], C, 0n) + t[5] = t5_ + D = d_ + } + + // m = (t[0]*modInv) mod 2^64 + m = (t[0] * modInv) & MASK_64 + + // reduce => 1 limb at a time + { + C = madd0(m, mod[0], t[0]) + { + const [c9, t1_] = madd2(m, mod[1], t[1], C) + C = c9 + t[0] = t1_ + } + { + const [c10, t2_] = madd2(m, mod[2], t[2], C) + C = c10 + t[1] = t2_ + } + { + const [c11, t3_] = madd2(m, mod[3], t[3], C) + C = c11 + t[2] = t3_ + } + { + const [c12, t4_] = madd2(m, mod[4], t[4], C) + C = c12 + t[3] = t4_ + } + + { + const [t5_, c13] = add64(t[5], C, 0n) + t[4] = t5_ + C = c13 + } + { + const [t5_, leftover] = add64(0n, D, C) + t[5] = t5_ + // leftover not stored + } + } + } + + //------------------------------- + // Final subtract => res = t[0..4] - mod[0..4] + // if borrow != 0 && t[5]==0 => revert, else keep + { + let d_ = 0n + + { + const [r0, b0] = sub64(t[0], mod[0], 0n) + res[0] = r0 + d_ = b0 + } + { + const [r1, b1] = sub64(t[1], mod[1], d_) + res[1] = r1 + d_ = b1 + } + { + const [r2, b2] = sub64(t[2], mod[2], d_) + res[2] = r2 + d_ = b2 + } + { + const [r3, b3] = sub64(t[3], mod[3], d_) + res[3] = r3 + d_ = b3 + } + { + const [r4, b4] = sub64(t[4], mod[4], d_) + res[4] = r4 + d_ = b4 + } + + if (d_ !== 0n && t[5] === 0n) { + // revert => t + out[0] = t[0] & MASK_64 + out[1] = t[1] & MASK_64 + out[2] = t[2] & MASK_64 + out[3] = t[3] & MASK_64 + out[4] = t[4] & MASK_64 + } else { + out[0] = res[0] & MASK_64 + out[1] = res[1] & MASK_64 + out[2] = res[2] & MASK_64 + out[3] = res[3] & MASK_64 + out[4] = res[4] & MASK_64 + } + } +} + +function montMulMod384( + out: bigint[], // [6], final 384-bit result + x: bigint[], // [6], input x + y: bigint[], // [6], input y + mod: bigint[], // [6], the modulus + modInv: bigint, // single-limb “magic factor” => -mod^-1 mod 2^64 +): void { + // t => partial sums, 7 limbs to handle overflow from 6-limb multiplication + const t = [0n, 0n, 0n, 0n, 0n, 0n, 0n] + let D = 0n + let m = 0n + let C = 0n + + // final subtract result + const res = [0n, 0n, 0n, 0n, 0n, 0n] + + // ------------------------------- + // 1) “first outer loop”, for j=0 + { + // Multiply x[0] * y[0..5], accumulate into t + + // step-by-step: + // C, t[0] = bits.Mul64(x[0], y[0]) + { + const [hi0, lo0] = mul64(x[0], y[0]) + C = hi0 + t[0] = lo0 + } + // C, t[1] = madd1(x[0], y[1], C) + { + const [hi1, lo1] = madd1(x[0], y[1], C) + C = hi1 + t[1] = lo1 + } + // C, t[2] = madd1(x[0], y[2], C) + { + const [hi2, lo2] = madd1(x[0], y[2], C) + C = hi2 + t[2] = lo2 + } + // C, t[3] = madd1(x[0], y[3], C) + { + const [hi3, lo3] = madd1(x[0], y[3], C) + C = hi3 + t[3] = lo3 + } + // C, t[4] = madd1(x[0], y[4], C) + { + const [hi4, lo4] = madd1(x[0], y[4], C) + C = hi4 + t[4] = lo4 + } + // C, t[5] = madd1(x[0], y[5], C) + { + const [hi5, lo5] = madd1(x[0], y[5], C) + C = hi5 + t[5] = lo5 + } + + // t[6], D = bits.Add64(t[6], C, 0) + { + const [t6_, d2] = add64(t[6], C, 0n) + t[6] = t6_ + D = d2 + } + + // m = (t[0] * modInv) mod 2^64 + m = (t[0] * modInv) & MASK_64 + + // partial reduce => “1 limb at a time” + { + // C = madd0(m, mod[0], t[0]) + C = madd0(m, mod[0], t[0]) + + // C, t[0] = madd2(m, mod[1], t[1], C) + { + const [c2, t1_] = madd2(m, mod[1], t[1], C) + C = c2 + t[0] = t1_ + } + { + const [c3, t2_] = madd2(m, mod[2], t[2], C) + C = c3 + t[1] = t2_ + } + { + const [c4, t3_] = madd2(m, mod[3], t[3], C) + C = c4 + t[2] = t3_ + } + { + const [c5, t4_] = madd2(m, mod[4], t[4], C) + C = c5 + t[3] = t4_ + } + { + const [c6, t5_] = madd2(m, mod[5], t[5], C) + C = c6 + t[4] = t5_ + } + + // t[5], C = bits.Add64(t[6], C, 0) + { + const [t6_, c7] = add64(t[6], C, 0n) + t[5] = t6_ + C = c7 + } + + // t[6], _ = bits.Add64(0, D, C) + { + const [t6_, leftover] = add64(0n, D, C) + t[6] = t6_ + // leftover ignored for single-limb + } + } + } + + // ------------------------------- + // 2) for j=1..5 + for (let j = 1; j < 6; j++) { + // multiply x[j]*y[0..5], incorporate into t + + // C, t[0] = madd1(x[j], y[0], t[0]) + { + const [c4, t0_] = madd1(x[j], y[0], t[0]) + C = c4 + t[0] = t0_ + } + { + const [c5, t1_] = madd2(x[j], y[1], t[1], C) + C = c5 + t[1] = t1_ + } + { + const [c6, t2_] = madd2(x[j], y[2], t[2], C) + C = c6 + t[2] = t2_ + } + { + const [c7, t3_] = madd2(x[j], y[3], t[3], C) + C = c7 + t[3] = t3_ + } + { + const [c8, t4_] = madd2(x[j], y[4], t[4], C) + C = c8 + t[4] = t4_ + } + { + const [c9, t5_] = madd2(x[j], y[5], t[5], C) + C = c9 + t[5] = t5_ + } + + // t[6], D = bits.Add64(t[6], C, 0) + { + const [t6_, d_] = add64(t[6], C, 0n) + t[6] = t6_ + D = d_ + } + + // m = (t[0]*modInv) mod 2^64 + m = (t[0] * modInv) & MASK_64 + + // reduce => 1 limb at a time + { + C = madd0(m, mod[0], t[0]) + { + const [c10, t1_] = madd2(m, mod[1], t[1], C) + C = c10 + t[0] = t1_ + } + { + const [c11, t2_] = madd2(m, mod[2], t[2], C) + C = c11 + t[1] = t2_ + } + { + const [c12, t3_] = madd2(m, mod[3], t[3], C) + C = c12 + t[2] = t3_ + } + { + const [c13, t4_] = madd2(m, mod[4], t[4], C) + C = c13 + t[3] = t4_ + } + { + const [c14, t5_] = madd2(m, mod[5], t[5], C) + C = c14 + t[4] = t5_ + } + + { + const [t6_, c15] = add64(t[6], C, 0n) + t[5] = t6_ + C = c15 + } + + { + const [t6_, leftover2] = add64(0n, D, C) + t[6] = t6_ + // leftover2 not stored + } + } + } + + // ------------------------------- + // Final subtract => res[i] = t[i] - mod[i], for i=0..5 + // if borrow != 0 && t[6]==0 => revert t, else keep res + { + let d_ = 0n + { + const [r0, b0] = sub64(t[0], mod[0], 0n) + res[0] = r0 + d_ = b0 + } + { + const [r1, b1] = sub64(t[1], mod[1], d_) + res[1] = r1 + d_ = b1 + } + { + const [r2, b2] = sub64(t[2], mod[2], d_) + res[2] = r2 + d_ = b2 + } + { + const [r3, b3] = sub64(t[3], mod[3], d_) + res[3] = r3 + d_ = b3 + } + { + const [r4, b4] = sub64(t[4], mod[4], d_) + res[4] = r4 + d_ = b4 + } + { + const [r5, b5] = sub64(t[5], mod[5], d_) + res[5] = r5 + d_ = b5 + } + + if (d_ !== 0n && t[6] === 0n) { + // revert => keep t[0..5] + out[0] = t[0] & MASK_64 + out[1] = t[1] & MASK_64 + out[2] = t[2] & MASK_64 + out[3] = t[3] & MASK_64 + out[4] = t[4] & MASK_64 + out[5] = t[5] & MASK_64 + } else { + out[0] = res[0] & MASK_64 + out[1] = res[1] & MASK_64 + out[2] = res[2] & MASK_64 + out[3] = res[3] & MASK_64 + out[4] = res[4] & MASK_64 + out[5] = res[5] & MASK_64 + } + } +} + +function montMulMod448( + out: bigint[], // [7] => final 448-bit result + x: bigint[], // [7] => input x + y: bigint[], // [7] => input y + mod: bigint[], // [7] => the modulus + modInv: bigint, // single-limb “magic factor” => -mod^-1 mod 2^64 +): void { + // t => partial sums, 8 limbs for overflow from 7-limb multiplication + const t = [0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n] + let D = 0n + let C = 0n + let m = 0n + + // final subtract array + const res = [0n, 0n, 0n, 0n, 0n, 0n, 0n] + + // --------------------------------- + // 1) First outer block => j=0 + { + // Multiply x[0] * y[0..6], accumulate in t + + // C, t[0] = bits.Mul64(x[0], y[0]) + { + const [hi0, lo0] = mul64(x[0], y[0]) + C = hi0 + t[0] = lo0 + } + + // Repeatedly call madd1(x[0], y[i], C) for i=1..6 + { + const [hi1, lo1] = madd1(x[0], y[1], C) + C = hi1 + t[1] = lo1 + } + { + const [hi2, lo2] = madd1(x[0], y[2], C) + C = hi2 + t[2] = lo2 + } + { + const [hi3, lo3] = madd1(x[0], y[3], C) + C = hi3 + t[3] = lo3 + } + { + const [hi4, lo4] = madd1(x[0], y[4], C) + C = hi4 + t[4] = lo4 + } + { + const [hi5, lo5] = madd1(x[0], y[5], C) + C = hi5 + t[5] = lo5 + } + { + const [hi6, lo6] = madd1(x[0], y[6], C) + C = hi6 + t[6] = lo6 + } + + // t[7], D = bits.Add64(t[7], C, 0) + { + const [t7_, d2] = add64(t[7], C, 0n) + t[7] = t7_ + D = d2 + } + + // m = (t[0] * modInv) mod 2^64 + m = (t[0] * modInv) & MASK_64 + + // reduce => one limb at a time + { + // C = madd0(m, mod[0], t[0]) + C = madd0(m, mod[0], t[0]) + + { + // C, t[0] = madd2(m, mod[1], t[1], C) + const [c2, t1_] = madd2(m, mod[1], t[1], C) + C = c2 + t[0] = t1_ + } + { + const [c3, t2_] = madd2(m, mod[2], t[2], C) + C = c3 + t[1] = t2_ + } + { + const [c4, t3_] = madd2(m, mod[3], t[3], C) + C = c4 + t[2] = t3_ + } + { + const [c5, t4_] = madd2(m, mod[4], t[4], C) + C = c5 + t[3] = t4_ + } + { + const [c6, t5_] = madd2(m, mod[5], t[5], C) + C = c6 + t[4] = t5_ + } + { + const [c7, t6_] = madd2(m, mod[6], t[6], C) + C = c7 + t[5] = t6_ + } + + // t[6], C = bits.Add64(t[7], C, 0) + { + const [t7_, c8] = add64(t[7], C, 0n) + t[6] = t7_ + C = c8 + } + // t[7], _ = bits.Add64(0, D, C) + { + const [t7_, leftover] = add64(0n, D, C) + t[7] = t7_ + // leftover ignored + } + } + } + + // --------------------------------- + // 2) for j=1..6 + for (let j = 1; j < 7; j++) { + // multiply x[j] * y[0..6], accumulate in t + + // C, t[0] = madd1(x[j], y[0], t[0]) + { + const [c4, t0_] = madd1(x[j], y[0], t[0]) + C = c4 + t[0] = t0_ + } + { + const [c5, t1_] = madd2(x[j], y[1], t[1], C) + C = c5 + t[1] = t1_ + } + { + const [c6, t2_] = madd2(x[j], y[2], t[2], C) + C = c6 + t[2] = t2_ + } + { + const [c7, t3_] = madd2(x[j], y[3], t[3], C) + C = c7 + t[3] = t3_ + } + { + const [c8, t4_] = madd2(x[j], y[4], t[4], C) + C = c8 + t[4] = t4_ + } + { + const [c9, t5_] = madd2(x[j], y[5], t[5], C) + C = c9 + t[5] = t5_ + } + { + const [c10, t6_] = madd2(x[j], y[6], t[6], C) + C = c10 + t[6] = t6_ + } + + // t[7], D = bits.Add64(t[7], C, 0) + { + const [t7_, d_] = add64(t[7], C, 0n) + t[7] = t7_ + D = d_ + } + + // m = (t[0]*modInv) mod 2^64 + m = (t[0] * modInv) & MASK_64 + + // reduce => one limb at a time + { + // C = madd0(m, mod[0], t[0]) + C = madd0(m, mod[0], t[0]) + + { + const [c11, t1_] = madd2(m, mod[1], t[1], C) + C = c11 + t[0] = t1_ + } + { + const [c12, t2_] = madd2(m, mod[2], t[2], C) + C = c12 + t[1] = t2_ + } + { + const [c13, t3_] = madd2(m, mod[3], t[3], C) + C = c13 + t[2] = t3_ + } + { + const [c14, t4_] = madd2(m, mod[4], t[4], C) + C = c14 + t[3] = t4_ + } + { + const [c15, t5_] = madd2(m, mod[5], t[5], C) + C = c15 + t[4] = t5_ + } + { + const [c16, t6_] = madd2(m, mod[6], t[6], C) + C = c16 + t[5] = t6_ + } + + { + const [t7_, c17] = add64(t[7], C, 0n) + t[6] = t7_ + C = c17 + } + + { + const [t7_, leftover2] = add64(0n, D, C) + t[7] = t7_ + // leftover2 not stored + } + } + } + + // --------------------------------- + // Final subtract => res[i] = t[i] - mod[i], for i=0..6 + // if borrow != 0 && t[7] == 0 => revert => keep t + { + let d_ = 0n + // subtract limbs + { + const [r0, b0] = sub64(t[0], mod[0], 0n) + res[0] = r0 + d_ = b0 + } + { + const [r1, b1] = sub64(t[1], mod[1], d_) + res[1] = r1 + d_ = b1 + } + { + const [r2, b2] = sub64(t[2], mod[2], d_) + res[2] = r2 + d_ = b2 + } + { + const [r3, b3] = sub64(t[3], mod[3], d_) + res[3] = r3 + d_ = b3 + } + { + const [r4, b4] = sub64(t[4], mod[4], d_) + res[4] = r4 + d_ = b4 + } + { + const [r5, b5] = sub64(t[5], mod[5], d_) + res[5] = r5 + d_ = b5 + } + { + const [r6, b6] = sub64(t[6], mod[6], d_) + res[6] = r6 + d_ = b6 + } + + // if d_ != 0 && t[7] == 0 => revert to t, else keep res + if (d_ !== 0n && t[7] === 0n) { + out[0] = t[0] & MASK_64 + out[1] = t[1] & MASK_64 + out[2] = t[2] & MASK_64 + out[3] = t[3] & MASK_64 + out[4] = t[4] & MASK_64 + out[5] = t[5] & MASK_64 + out[6] = t[6] & MASK_64 + } else { + out[0] = res[0] & MASK_64 + out[1] = res[1] & MASK_64 + out[2] = res[2] & MASK_64 + out[3] = res[3] & MASK_64 + out[4] = res[4] & MASK_64 + out[5] = res[5] & MASK_64 + out[6] = res[6] & MASK_64 + } + } +} + +function montMulMod512( + out: bigint[], // [8], final 512-bit result + x: bigint[], // [8], input x + y: bigint[], // [8], input y + mod: bigint[], // [8], the modulus + modInv: bigint, // single-limb “magic factor” => -mod^-1 mod 2^64 +): void { + // t => partial sums, 9 limbs for overflow from 8-limb multiplication + const t = [0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n] + let D = 0n + let C = 0n + let m = 0n + + // final subtract buffer + const res = [0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n] + + //------------------------------- + // 1) “first outer loop” => j=0 + { + // multiply x[0] * y[0..7], accumulate in t + + // Step by step: + // C, t[0] = bits.Mul64(x[0], y[0]) + { + const [hi0, lo0] = mul64(x[0], y[0]) + C = hi0 + t[0] = lo0 + } + // C, t[1] = madd1(x[0], y[1], C) + { + const [hi1, lo1] = madd1(x[0], y[1], C) + C = hi1 + t[1] = lo1 + } + // Repeat for y[2]..y[7] + { + const [hi2, lo2] = madd1(x[0], y[2], C) + C = hi2 + t[2] = lo2 + } + { + const [hi3, lo3] = madd1(x[0], y[3], C) + C = hi3 + t[3] = lo3 + } + { + const [hi4, lo4] = madd1(x[0], y[4], C) + C = hi4 + t[4] = lo4 + } + { + const [hi5, lo5] = madd1(x[0], y[5], C) + C = hi5 + t[5] = lo5 + } + { + const [hi6, lo6] = madd1(x[0], y[6], C) + C = hi6 + t[6] = lo6 + } + { + const [hi7, lo7] = madd1(x[0], y[7], C) + C = hi7 + t[7] = lo7 + } + + // t[8], D = bits.Add64(t[8], C, 0) + { + const [t8_, d2] = add64(t[8], C, 0n) + t[8] = t8_ + D = d2 + } + + // m = (t[0] * modInv) mod 2^64 + m = (t[0] * modInv) & MASK_64 + + // reduce => one limb at a time + { + // C = madd0(m, mod[0], t[0]) + C = madd0(m, mod[0], t[0]) + + { + const [c2, t1_] = madd2(m, mod[1], t[1], C) + C = c2 + t[0] = t1_ + } + { + const [c3, t2_] = madd2(m, mod[2], t[2], C) + C = c3 + t[1] = t2_ + } + { + const [c4, t3_] = madd2(m, mod[3], t[3], C) + C = c4 + t[2] = t3_ + } + { + const [c5, t4_] = madd2(m, mod[4], t[4], C) + C = c5 + t[3] = t4_ + } + { + const [c6, t5_] = madd2(m, mod[5], t[5], C) + C = c6 + t[4] = t5_ + } + { + const [c7, t6_] = madd2(m, mod[6], t[6], C) + C = c7 + t[5] = t6_ + } + { + const [c8, t7_] = madd2(m, mod[7], t[7], C) + C = c8 + t[6] = t7_ + } + + // t[7], C = bits.Add64(t[8], C, 0) + { + const [t8_, c9] = add64(t[8], C, 0n) + t[7] = t8_ + C = c9 + } + // t[8], _ = bits.Add64(0, D, C) + { + const [t8_, leftover] = add64(0n, D, C) + t[8] = t8_ + // leftover not stored for single-limb + } + } + } + + // ------------------------------- + // 2) For j=1..7 + for (let j = 1; j < 8; j++) { + // multiply x[j]*y[0..7], incorporate into t + + // C, t[0] = madd1(x[j], y[0], t[0]) + { + const [c4, t0_] = madd1(x[j], y[0], t[0]) + C = c4 + t[0] = t0_ + } + { + const [c5, t1_] = madd2(x[j], y[1], t[1], C) + C = c5 + t[1] = t1_ + } + { + const [c6, t2_] = madd2(x[j], y[2], t[2], C) + C = c6 + t[2] = t2_ + } + { + const [c7, t3_] = madd2(x[j], y[3], t[3], C) + C = c7 + t[3] = t3_ + } + { + const [c8, t4_] = madd2(x[j], y[4], t[4], C) + C = c8 + t[4] = t4_ + } + { + const [c9, t5_] = madd2(x[j], y[5], t[5], C) + C = c9 + t[5] = t5_ + } + { + const [c10, t6_] = madd2(x[j], y[6], t[6], C) + C = c10 + t[6] = t6_ + } + { + const [c11, t7_] = madd2(x[j], y[7], t[7], C) + C = c11 + t[7] = t7_ + } + + { + const [t8_, d_] = add64(t[8], C, 0n) + t[8] = t8_ + D = d_ + } + + // m = (t[0]*modInv) mod 2^64 + m = (t[0] * modInv) & MASK_64 + + // partial reduce => 1 limb at a time + { + C = madd0(m, mod[0], t[0]) + + { + const [c12, t1_] = madd2(m, mod[1], t[1], C) + C = c12 + t[0] = t1_ + } + { + const [c13, t2_] = madd2(m, mod[2], t[2], C) + C = c13 + t[1] = t2_ + } + { + const [c14, t3_] = madd2(m, mod[3], t[3], C) + C = c14 + t[2] = t3_ + } + { + const [c15, t4_] = madd2(m, mod[4], t[4], C) + C = c15 + t[3] = t4_ + } + { + const [c16, t5_] = madd2(m, mod[5], t[5], C) + C = c16 + t[4] = t5_ + } + { + const [c17, t6_] = madd2(m, mod[6], t[6], C) + C = c17 + t[5] = t6_ + } + { + const [c18, t7_] = madd2(m, mod[7], t[7], C) + C = c18 + t[6] = t7_ + } + + { + const [t8_, c19] = add64(t[8], C, 0n) + t[7] = t8_ + C = c19 + } + + { + const [t8_, leftover2] = add64(0n, D, C) + t[8] = t8_ + // leftover2 not stored + } + } + } + + // ------------------------------- + // Final subtract => res = t[0..7] - mod[0..7] + // if borrow != 0 && t[8] == 0 => revert, else keep + { + let d_ = 0n + { + const [r0, b0] = sub64(t[0], mod[0], 0n) + res[0] = r0 + d_ = b0 + } + { + const [r1, b1] = sub64(t[1], mod[1], d_) + res[1] = r1 + d_ = b1 + } + { + const [r2, b2] = sub64(t[2], mod[2], d_) + res[2] = r2 + d_ = b2 + } + { + const [r3, b3] = sub64(t[3], mod[3], d_) + res[3] = r3 + d_ = b3 + } + { + const [r4, b4] = sub64(t[4], mod[4], d_) + res[4] = r4 + d_ = b4 + } + { + const [r5, b5] = sub64(t[5], mod[5], d_) + res[5] = r5 + d_ = b5 + } + { + const [r6, b6] = sub64(t[6], mod[6], d_) + res[6] = r6 + d_ = b6 + } + { + const [r7, b7] = sub64(t[7], mod[7], d_) + res[7] = r7 + d_ = b7 + } + + if (d_ !== 0n && t[8] === 0n) { + // revert => keep t + out[0] = t[0] & MASK_64 + out[1] = t[1] & MASK_64 + out[2] = t[2] & MASK_64 + out[3] = t[3] & MASK_64 + out[4] = t[4] & MASK_64 + out[5] = t[5] & MASK_64 + out[6] = t[6] & MASK_64 + out[7] = t[7] & MASK_64 + } else { + out[0] = res[0] & MASK_64 + out[1] = res[1] & MASK_64 + out[2] = res[2] & MASK_64 + out[3] = res[3] & MASK_64 + out[4] = res[4] & MASK_64 + out[5] = res[5] & MASK_64 + out[6] = res[6] & MASK_64 + out[7] = res[7] & MASK_64 + } + } +} + +function montMulMod576( + out: bigint[], // [9], final 576-bit result + x: bigint[], // [9], input x + y: bigint[], // [9], input y + mod: bigint[], // [9], modulus + modInv: bigint, // single-limb “magic factor” => -mod^-1 mod 2^64 +): void { + // t => partial sums, 10 limbs for overflow from 9-limb multiplication + const t = [0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n] + let D = 0n + let C = 0n + let m = 0n + + // store the final subtract results + const res = [0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n] + + // ------------------------------- + // 1) First outer loop => j=0 + { + // multiply x[0] * y[0..8], accumulate in t + + // C, t[0] = bits.Mul64(x[0], y[0]) + { + const [hi0, lo0] = mul64(x[0], y[0]) + C = hi0 + t[0] = lo0 + } + // C, t[1] = madd1(x[0], y[1], C) + { + const [hi1, lo1] = madd1(x[0], y[1], C) + C = hi1 + t[1] = lo1 + } + { + const [hi2, lo2] = madd1(x[0], y[2], C) + C = hi2 + t[2] = lo2 + } + { + const [hi3, lo3] = madd1(x[0], y[3], C) + C = hi3 + t[3] = lo3 + } + { + const [hi4, lo4] = madd1(x[0], y[4], C) + C = hi4 + t[4] = lo4 + } + { + const [hi5, lo5] = madd1(x[0], y[5], C) + C = hi5 + t[5] = lo5 + } + { + const [hi6, lo6] = madd1(x[0], y[6], C) + C = hi6 + t[6] = lo6 + } + { + const [hi7, lo7] = madd1(x[0], y[7], C) + C = hi7 + t[7] = lo7 + } + { + const [hi8, lo8] = madd1(x[0], y[8], C) + C = hi8 + t[8] = lo8 + } + + // t[9], D = bits.Add64(t[9], C, 0) + { + const [t9_, d2] = add64(t[9], C, 0n) + t[9] = t9_ + D = d2 + } + + // m = (t[0]*modInv) mod 2^64 + m = (t[0] * modInv) & MASK_64 + + // partial reduce => 1 limb + { + // C = madd0(m, mod[0], t[0]) + C = madd0(m, mod[0], t[0]) + + { + const [c2, t1_] = madd2(m, mod[1], t[1], C) + C = c2 + t[0] = t1_ + } + { + const [c3, t2_] = madd2(m, mod[2], t[2], C) + C = c3 + t[1] = t2_ + } + { + const [c4, t3_] = madd2(m, mod[3], t[3], C) + C = c4 + t[2] = t3_ + } + { + const [c5, t4_] = madd2(m, mod[4], t[4], C) + C = c5 + t[3] = t4_ + } + { + const [c6, t5_] = madd2(m, mod[5], t[5], C) + C = c6 + t[4] = t5_ + } + { + const [c7, t6_] = madd2(m, mod[6], t[6], C) + C = c7 + t[5] = t6_ + } + { + const [c8, t7_] = madd2(m, mod[7], t[7], C) + C = c8 + t[6] = t7_ + } + { + const [c9, t8_] = madd2(m, mod[8], t[8], C) + C = c9 + t[7] = t8_ + } + + // t[8], C = bits.Add64(t[9], C, 0) + { + const [t9_, c10] = add64(t[9], C, 0n) + t[8] = t9_ + C = c10 + } + // t[9], _ = bits.Add64(0, D, C) + { + const [t9_, leftover] = add64(0n, D, C) + t[9] = t9_ + // leftover ignored + } + } + } + + // ------------------------------- + // 2) for j=1..8 + for (let j = 1; j < 9; j++) { + // multiply x[j]*y[0..8], accumulate in t + + // C, t[0] = madd1(x[j], y[0], t[0]) + { + const [c4, t0_] = madd1(x[j], y[0], t[0]) + C = c4 + t[0] = t0_ + } + { + const [c5, t1_] = madd2(x[j], y[1], t[1], C) + C = c5 + t[1] = t1_ + } + { + const [c6, t2_] = madd2(x[j], y[2], t[2], C) + C = c6 + t[2] = t2_ + } + { + const [c7, t3_] = madd2(x[j], y[3], t[3], C) + C = c7 + t[3] = t3_ + } + { + const [c8, t4_] = madd2(x[j], y[4], t[4], C) + C = c8 + t[4] = t4_ + } + { + const [c9, t5_] = madd2(x[j], y[5], t[5], C) + C = c9 + t[5] = t5_ + } + { + const [c10, t6_] = madd2(x[j], y[6], t[6], C) + C = c10 + t[6] = t6_ + } + { + const [c11, t7_] = madd2(x[j], y[7], t[7], C) + C = c11 + t[7] = t7_ + } + { + const [c12, t8_] = madd2(x[j], y[8], t[8], C) + C = c12 + t[8] = t8_ + } + + { + const [t9_, d_] = add64(t[9], C, 0n) + t[9] = t9_ + D = d_ + } + + // m = (t[0]*modInv) mod 2^64 + m = (t[0] * modInv) & MASK_64 + + // reduce => 1 limb + { + C = madd0(m, mod[0], t[0]) + + { + const [c13, t1_] = madd2(m, mod[1], t[1], C) + C = c13 + t[0] = t1_ + } + { + const [c14, t2_] = madd2(m, mod[2], t[2], C) + C = c14 + t[1] = t2_ + } + { + const [c15, t3_] = madd2(m, mod[3], t[3], C) + C = c15 + t[2] = t3_ + } + { + const [c16, t4_] = madd2(m, mod[4], t[4], C) + C = c16 + t[3] = t4_ + } + { + const [c17, t5_] = madd2(m, mod[5], t[5], C) + C = c17 + t[4] = t5_ + } + { + const [c18, t6_] = madd2(m, mod[6], t[6], C) + C = c18 + t[5] = t6_ + } + { + const [c19, t7_] = madd2(m, mod[7], t[7], C) + C = c19 + t[6] = t7_ + } + { + const [c20, t8_] = madd2(m, mod[8], t[8], C) + C = c20 + t[7] = t8_ + } + + { + const [t9_, c21] = add64(t[9], C, 0n) + t[8] = t9_ + C = c21 + } + + { + const [t9_, leftover2] = add64(0n, D, C) + t[9] = t9_ + // leftover2 not stored + } + } + } + + // ------------------------------- + // Final subtract => res = t[0..8] - mod[0..8] + // if borrow != 0 && t[9] == 0 => revert => keep t + { + let d_ = 0n + { + const [r0, b0] = sub64(t[0], mod[0], 0n) + res[0] = r0 + d_ = b0 + } + { + const [r1, b1] = sub64(t[1], mod[1], d_) + res[1] = r1 + d_ = b1 + } + { + const [r2, b2] = sub64(t[2], mod[2], d_) + res[2] = r2 + d_ = b2 + } + { + const [r3, b3] = sub64(t[3], mod[3], d_) + res[3] = r3 + d_ = b3 + } + { + const [r4, b4] = sub64(t[4], mod[4], d_) + res[4] = r4 + d_ = b4 + } + { + const [r5, b5] = sub64(t[5], mod[5], d_) + res[5] = r5 + d_ = b5 + } + { + const [r6, b6] = sub64(t[6], mod[6], d_) + res[6] = r6 + d_ = b6 + } + { + const [r7, b7] = sub64(t[7], mod[7], d_) + res[7] = r7 + d_ = b7 + } + { + const [r8, b8] = sub64(t[8], mod[8], d_) + res[8] = r8 + d_ = b8 + } + + if (d_ !== 0n && t[9] === 0n) { + // revert => keep t + for (let i = 0; i < 9; i++) { + out[i] = t[i] & MASK_64 + } + } else { + for (let i = 0; i < 9; i++) { + out[i] = res[i] & MASK_64 + } + } + } +} + +function montMulMod640( + out: bigint[], // [10], final 640-bit result + x: bigint[], // [10], input x + y: bigint[], // [10], input y + mod: bigint[], // [10], the modulus + modInv: bigint, // single-limb “magic factor” => -mod^-1 mod 2^64 +): void { + // Temporary accumulation array t, length 11 for overflow + const t = [0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n] + let D = 0n + let C = 0n + let m = 0n + + // We'll store the final subtract result in `res`. + const res = new Array(10).fill(0n) + + // 1) "First outer loop" => j=0 + { + // multiply x[0]*y[0..9], accumulate in t + + // C, t[0] = bits.Mul64(x[0], y[0]) + { + const [hi0, lo0] = mul64(x[0], y[0]) + C = hi0 + t[0] = lo0 + } + // Then do madd1 for y[1..9] + { + const [hi1, lo1] = madd1(x[0], y[1], C) + C = hi1 + t[1] = lo1 + } + { + const [hi2, lo2] = madd1(x[0], y[2], C) + C = hi2 + t[2] = lo2 + } + { + const [hi3, lo3] = madd1(x[0], y[3], C) + C = hi3 + t[3] = lo3 + } + { + const [hi4, lo4] = madd1(x[0], y[4], C) + C = hi4 + t[4] = lo4 + } + { + const [hi5, lo5] = madd1(x[0], y[5], C) + C = hi5 + t[5] = lo5 + } + { + const [hi6, lo6] = madd1(x[0], y[6], C) + C = hi6 + t[6] = lo6 + } + { + const [hi7, lo7] = madd1(x[0], y[7], C) + C = hi7 + t[7] = lo7 + } + { + const [hi8, lo8] = madd1(x[0], y[8], C) + C = hi8 + t[8] = lo8 + } + { + const [hi9, lo9] = madd1(x[0], y[9], C) + C = hi9 + t[9] = lo9 + } + + // t[10], D = bits.Add64(t[10], C, 0) + { + const [t10_, d2] = add64(t[10], C, 0n) + t[10] = t10_ + D = d2 + } + + // m = (t[0]*modInv) mod 2^64 + m = (t[0] * modInv) & MASK_64 + + // partial reduce => "one limb at a time" + { + // C = madd0(m, mod[0], t[0]) + C = madd0(m, mod[0], t[0]) + + { + const [c2, t1_] = madd2(m, mod[1], t[1], C) + C = c2 + t[0] = t1_ + } + { + const [c3, t2_] = madd2(m, mod[2], t[2], C) + C = c3 + t[1] = t2_ + } + { + const [c4, t3_] = madd2(m, mod[3], t[3], C) + C = c4 + t[2] = t3_ + } + { + const [c5, t4_] = madd2(m, mod[4], t[4], C) + C = c5 + t[3] = t4_ + } + { + const [c6, t5_] = madd2(m, mod[5], t[5], C) + C = c6 + t[4] = t5_ + } + { + const [c7, t6_] = madd2(m, mod[6], t[6], C) + C = c7 + t[5] = t6_ + } + { + const [c8, t7_] = madd2(m, mod[7], t[7], C) + C = c8 + t[6] = t7_ + } + { + const [c9, t8_] = madd2(m, mod[8], t[8], C) + C = c9 + t[7] = t8_ + } + { + const [c10, t9_] = madd2(m, mod[9], t[9], C) + C = c10 + t[8] = t9_ + } + + // t[9], C = bits.Add64(t[10], C, 0) + { + const [t10_, c11] = add64(t[10], C, 0n) + t[9] = t10_ + C = c11 + } + // t[10], _ = bits.Add64(0, D, C) + { + const [t10_, leftover] = add64(0n, D, C) + t[10] = t10_ + // leftover not stored + } + } + } + + // ------------------------------- + // 2) for j=1..9 + for (let j = 1; j < 10; j++) { + // multiply x[j]*y[0..9], accumulate in t + + // C, t[0] = madd1(x[j], y[0], t[0]) + { + const [c4, t0_] = madd1(x[j], y[0], t[0]) + C = c4 + t[0] = t0_ + } + { + const [c5, t1_] = madd2(x[j], y[1], t[1], C) + C = c5 + t[1] = t1_ + } + { + const [c6, t2_] = madd2(x[j], y[2], t[2], C) + C = c6 + t[2] = t2_ + } + { + const [c7, t3_] = madd2(x[j], y[3], t[3], C) + C = c7 + t[3] = t3_ + } + { + const [c8, t4_] = madd2(x[j], y[4], t[4], C) + C = c8 + t[4] = t4_ + } + { + const [c9, t5_] = madd2(x[j], y[5], t[5], C) + C = c9 + t[5] = t5_ + } + { + const [c10, t6_] = madd2(x[j], y[6], t[6], C) + C = c10 + t[6] = t6_ + } + { + const [c11, t7_] = madd2(x[j], y[7], t[7], C) + C = c11 + t[7] = t7_ + } + { + const [c12, t8_] = madd2(x[j], y[8], t[8], C) + C = c12 + t[8] = t8_ + } + { + const [c13, t9_] = madd2(x[j], y[9], t[9], C) + C = c13 + t[9] = t9_ + } + + { + const [t10_, d_] = add64(t[10], C, 0n) + t[10] = t10_ + D = d_ + } + + // m = (t[0] * modInv) mod 2^64 + m = (t[0] * modInv) & MASK_64 + + // partial reduce => 1 limb at a time + { + C = madd0(m, mod[0], t[0]) + + { + const [c14, t1_] = madd2(m, mod[1], t[1], C) + C = c14 + t[0] = t1_ + } + { + const [c15, t2_] = madd2(m, mod[2], t[2], C) + C = c15 + t[1] = t2_ + } + { + const [c16, t3_] = madd2(m, mod[3], t[3], C) + C = c16 + t[2] = t3_ + } + { + const [c17, t4_] = madd2(m, mod[4], t[4], C) + C = c17 + t[3] = t4_ + } + { + const [c18, t5_] = madd2(m, mod[5], t[5], C) + C = c18 + t[4] = t5_ + } + { + const [c19, t6_] = madd2(m, mod[6], t[6], C) + C = c19 + t[5] = t6_ + } + { + const [c20, t7_] = madd2(m, mod[7], t[7], C) + C = c20 + t[6] = t7_ + } + { + const [c21, t8_] = madd2(m, mod[8], t[8], C) + C = c21 + t[7] = t8_ + } + { + const [c22, t9_] = madd2(m, mod[9], t[9], C) + C = c22 + t[8] = t9_ + } + + { + const [t10_, c23] = add64(t[10], C, 0n) + t[9] = t10_ + C = c23 + } + + { + const [t10_, leftover2] = add64(0n, D, C) + t[10] = t10_ + // leftover2 not stored + } + } + } + + // ------------------------------- + // Final subtract => res[i] = t[i] - mod[i], for i=0..9 + // if borrow != 0 && t[10] == 0 => revert => keep t + { + let d_ = 0n + { + const [r0, b0] = sub64(t[0], mod[0], 0n) + res[0] = r0 + d_ = b0 + } + { + const [r1, b1] = sub64(t[1], mod[1], d_) + res[1] = r1 + d_ = b1 + } + { + const [r2, b2] = sub64(t[2], mod[2], d_) + res[2] = r2 + d_ = b2 + } + { + const [r3, b3] = sub64(t[3], mod[3], d_) + res[3] = r3 + d_ = b3 + } + { + const [r4, b4] = sub64(t[4], mod[4], d_) + res[4] = r4 + d_ = b4 + } + { + const [r5, b5] = sub64(t[5], mod[5], d_) + res[5] = r5 + d_ = b5 + } + { + const [r6, b6] = sub64(t[6], mod[6], d_) + res[6] = r6 + d_ = b6 + } + { + const [r7, b7] = sub64(t[7], mod[7], d_) + res[7] = r7 + d_ = b7 + } + { + const [r8, b8] = sub64(t[8], mod[8], d_) + res[8] = r8 + d_ = b8 + } + { + const [r9, b9] = sub64(t[9], mod[9], d_) + res[9] = r9 + d_ = b9 + } + + // If we borrowed but t[10] is 0 => revert => keep t + if (d_ !== 0n && t[10] === 0n) { + for (let i = 0; i < 10; i++) { + out[i] = t[i] & MASK_64 + } + } else { + for (let i = 0; i < 10; i++) { + out[i] = res[i] & MASK_64 + } + } + } +} + +function montMulMod704( + out: bigint[], // [11], final 704-bit result + x: bigint[], // [11], input x + y: bigint[], // [11], input y + mod: bigint[], // [11], modulus + modInv: bigint, // single-limb “magic factor” => -mod^-1 mod 2^64 +): void { + // t => partial sums, 12 limbs for overflow from 11-limb multiplication + const t = new Array(12).fill(0n) + let D = 0n + let C = 0n + let m = 0n + + // store final subtract in res + const res = new Array(11).fill(0n) + + // ------------------------------- + // 1) "First outer loop" => j=0 + { + // multiply x[0] * y[0..10], accumulate in t + + // C, t[0] = bits.Mul64(x[0], y[0]) + { + const [hi0, lo0] = mul64(x[0], y[0]) + C = hi0 + t[0] = lo0 + } + // then do madd1 for y[1..10] + { + const [hi1, lo1] = madd1(x[0], y[1], C) + C = hi1 + t[1] = lo1 + } + { + const [hi2, lo2] = madd1(x[0], y[2], C) + C = hi2 + t[2] = lo2 + } + { + const [hi3, lo3] = madd1(x[0], y[3], C) + C = hi3 + t[3] = lo3 + } + { + const [hi4, lo4] = madd1(x[0], y[4], C) + C = hi4 + t[4] = lo4 + } + { + const [hi5, lo5] = madd1(x[0], y[5], C) + C = hi5 + t[5] = lo5 + } + { + const [hi6, lo6] = madd1(x[0], y[6], C) + C = hi6 + t[6] = lo6 + } + { + const [hi7, lo7] = madd1(x[0], y[7], C) + C = hi7 + t[7] = lo7 + } + { + const [hi8, lo8] = madd1(x[0], y[8], C) + C = hi8 + t[8] = lo8 + } + { + const [hi9, lo9] = madd1(x[0], y[9], C) + C = hi9 + t[9] = lo9 + } + { + const [hi10, lo10] = madd1(x[0], y[10], C) + C = hi10 + t[10] = lo10 + } + + // t[11], D = bits.Add64(t[11], C, 0) + { + const [t11_, d2] = add64(t[11], C, 0n) + t[11] = t11_ + D = d2 + } + + // m = (t[0] * modInv) mod 2^64 + m = (t[0] * modInv) & MASK_64 + + // partial reduce => "one limb at a time" + { + // C = madd0(m, mod[0], t[0]) + C = madd0(m, mod[0], t[0]) + + { + const [c2, t1_] = madd2(m, mod[1], t[1], C) + C = c2 + t[0] = t1_ + } + { + const [c3, t2_] = madd2(m, mod[2], t[2], C) + C = c3 + t[1] = t2_ + } + { + const [c4, t3_] = madd2(m, mod[3], t[3], C) + C = c4 + t[2] = t3_ + } + { + const [c5, t4_] = madd2(m, mod[4], t[4], C) + C = c5 + t[3] = t4_ + } + { + const [c6, t5_] = madd2(m, mod[5], t[5], C) + C = c6 + t[4] = t5_ + } + { + const [c7, t6_] = madd2(m, mod[6], t[6], C) + C = c7 + t[5] = t6_ + } + { + const [c8, t7_] = madd2(m, mod[7], t[7], C) + C = c8 + t[6] = t7_ + } + { + const [c9, t8_] = madd2(m, mod[8], t[8], C) + C = c9 + t[7] = t8_ + } + { + const [c10, t9_] = madd2(m, mod[9], t[9], C) + C = c10 + t[8] = t9_ + } + { + const [c11, t10_] = madd2(m, mod[10], t[10], C) + C = c11 + t[9] = t10_ + } + + { + const [t11_, c12] = add64(t[11], C, 0n) + t[10] = t11_ + C = c12 + } + { + const [t11_, leftover] = add64(0n, D, C) + t[11] = t11_ + // leftover not used + } + } + } + + // ------------------------------- + // 2) For j=1..10 + for (let j = 1; j < 11; j++) { + // multiply x[j]*y[0..10], accumulate in t + + // C, t[0] = madd1(x[j], y[0], t[0]) + { + const [c4, t0_] = madd1(x[j], y[0], t[0]) + C = c4 + t[0] = t0_ + } + { + const [c5, t1_] = madd2(x[j], y[1], t[1], C) + C = c5 + t[1] = t1_ + } + { + const [c6, t2_] = madd2(x[j], y[2], t[2], C) + C = c6 + t[2] = t2_ + } + { + const [c7, t3_] = madd2(x[j], y[3], t[3], C) + C = c7 + t[3] = t3_ + } + { + const [c8, t4_] = madd2(x[j], y[4], t[4], C) + C = c8 + t[4] = t4_ + } + { + const [c9, t5_] = madd2(x[j], y[5], t[5], C) + C = c9 + t[5] = t5_ + } + { + const [c10, t6_] = madd2(x[j], y[6], t[6], C) + C = c10 + t[6] = t6_ + } + { + const [c11, t7_] = madd2(x[j], y[7], t[7], C) + C = c11 + t[7] = t7_ + } + { + const [c12, t8_] = madd2(x[j], y[8], t[8], C) + C = c12 + t[8] = t8_ + } + { + const [c13, t9_] = madd2(x[j], y[9], t[9], C) + C = c13 + t[9] = t9_ + } + { + const [c14, t10_] = madd2(x[j], y[10], t[10], C) + C = c14 + t[10] = t10_ + } + + { + const [t11_, d_] = add64(t[11], C, 0n) + t[11] = t11_ + D = d_ + } + + // m = (t[0]*modInv) mod 2^64 + m = (t[0] * modInv) & MASK_64 + + // partial reduce => 1 limb at a time + { + C = madd0(m, mod[0], t[0]) + + { + const [c15, t1_] = madd2(m, mod[1], t[1], C) + C = c15 + t[0] = t1_ + } + { + const [c16, t2_] = madd2(m, mod[2], t[2], C) + C = c16 + t[1] = t2_ + } + { + const [c17, t3_] = madd2(m, mod[3], t[3], C) + C = c17 + t[2] = t3_ + } + { + const [c18, t4_] = madd2(m, mod[4], t[4], C) + C = c18 + t[3] = t4_ + } + { + const [c19, t5_] = madd2(m, mod[5], t[5], C) + C = c19 + t[4] = t5_ + } + { + const [c20, t6_] = madd2(m, mod[6], t[6], C) + C = c20 + t[5] = t6_ + } + { + const [c21, t7_] = madd2(m, mod[7], t[7], C) + C = c21 + t[6] = t7_ + } + { + const [c22, t8_] = madd2(m, mod[8], t[8], C) + C = c22 + t[7] = t8_ + } + { + const [c23, t9_] = madd2(m, mod[9], t[9], C) + C = c23 + t[8] = t9_ + } + { + const [c24, t10_] = madd2(m, mod[10], t[10], C) + C = c24 + t[9] = t10_ + } + + { + const [t11_, c25] = add64(t[11], C, 0n) + t[10] = t11_ + C = c25 + } + + { + const [t11_, leftover2] = add64(0n, D, C) + t[11] = t11_ + // leftover2 not stored + } + } + } + + // ------------------------------- + // Final subtract => res[i] = t[i] - mod[i], for i=0..10 + // if borrow != 0 && t[11] == 0 => revert => keep t + { + let d_ = 0n + for (let i = 0; i < 11; i++) { + const [ri, bi] = sub64(t[i], mod[i], d_) + res[i] = ri + d_ = bi + } + + if (d_ !== 0n && t[11] === 0n) { + for (let i = 0; i < 11; i++) { + out[i] = t[i] & MASK_64 + } + } else { + for (let i = 0; i < 11; i++) { + out[i] = res[i] & MASK_64 + } + } + } +} + +function montMulMod768( + out: bigint[], // [12], final 768-bit result + x: bigint[], // [12], input x + y: bigint[], // [12], input y + mod: bigint[], // [12], the modulus + modInv: bigint, // single-limb “magic factor” => -mod^-1 mod 2^64 +): void { + // t => partial sums, 13 limbs for overflow from 12-limb multiply + const t = new Array(13).fill(0n) + let D = 0n + let C = 0n + let m = 0n + + // final subtract result buffer + const res = new Array(12).fill(0n) + + // -------------------------------- + // 1) “first outer loop” => j=0 + { + // multiply x[0]*y[0..11], accumulate in t + + // C, t[0] = bits.Mul64(x[0], y[0]) + { + const [hi0, lo0] = mul64(x[0], y[0]) + C = hi0 + t[0] = lo0 + } + // repeat madd1 for y[1..11] + { + const [hi1, lo1] = madd1(x[0], y[1], C) + C = hi1 + t[1] = lo1 + } + { + const [hi2, lo2] = madd1(x[0], y[2], C) + C = hi2 + t[2] = lo2 + } + { + const [hi3, lo3] = madd1(x[0], y[3], C) + C = hi3 + t[3] = lo3 + } + { + const [hi4, lo4] = madd1(x[0], y[4], C) + C = hi4 + t[4] = lo4 + } + { + const [hi5, lo5] = madd1(x[0], y[5], C) + C = hi5 + t[5] = lo5 + } + { + const [hi6, lo6] = madd1(x[0], y[6], C) + C = hi6 + t[6] = lo6 + } + { + const [hi7, lo7] = madd1(x[0], y[7], C) + C = hi7 + t[7] = lo7 + } + { + const [hi8, lo8] = madd1(x[0], y[8], C) + C = hi8 + t[8] = lo8 + } + { + const [hi9, lo9] = madd1(x[0], y[9], C) + C = hi9 + t[9] = lo9 + } + { + const [hi10, lo10] = madd1(x[0], y[10], C) + C = hi10 + t[10] = lo10 + } + { + const [hi11, lo11] = madd1(x[0], y[11], C) + C = hi11 + t[11] = lo11 + } + + // t[12], D = bits.Add64(t[12], C, 0) + { + const [t12_, d2] = add64(t[12], C, 0n) + t[12] = t12_ + D = d2 + } + + // m = (t[0]*modInv) mod 2^64 + m = (t[0] * modInv) & MASK_64 + + // partial reduce => "one limb at a time" + { + // C = madd0(m, mod[0], t[0]) + C = madd0(m, mod[0], t[0]) + + { + const [c2, t1_] = madd2(m, mod[1], t[1], C) + C = c2 + t[0] = t1_ + } + { + const [c3, t2_] = madd2(m, mod[2], t[2], C) + C = c3 + t[1] = t2_ + } + { + const [c4, t3_] = madd2(m, mod[3], t[3], C) + C = c4 + t[2] = t3_ + } + { + const [c5, t4_] = madd2(m, mod[4], t[4], C) + C = c5 + t[3] = t4_ + } + { + const [c6, t5_] = madd2(m, mod[5], t[5], C) + C = c6 + t[4] = t5_ + } + { + const [c7, t6_] = madd2(m, mod[6], t[6], C) + C = c7 + t[5] = t6_ + } + { + const [c8, t7_] = madd2(m, mod[7], t[7], C) + C = c8 + t[6] = t7_ + } + { + const [c9, t8_] = madd2(m, mod[8], t[8], C) + C = c9 + t[7] = t8_ + } + { + const [c10, t9_] = madd2(m, mod[9], t[9], C) + C = c10 + t[8] = t9_ + } + { + const [c11, t10_] = madd2(m, mod[10], t[10], C) + C = c11 + t[9] = t10_ + } + { + const [c12, t11_] = madd2(m, mod[11], t[11], C) + C = c12 + t[10] = t11_ + } + + { + const [t12_, c13] = add64(t[12], C, 0n) + t[11] = t12_ + C = c13 + } + + { + const [t12_, leftover] = add64(0n, D, C) + t[12] = t12_ + // leftover not stored + } + } + } + + // -------------------------------- + // 2) For j=1..11 + for (let j = 1; j < 12; j++) { + // multiply x[j]*y[0..11], accumulate in t + + // C, t[0] = madd1(x[j], y[0], t[0]) + { + const [c4, t0_] = madd1(x[j], y[0], t[0]) + C = c4 + t[0] = t0_ + } + { + const [c5, t1_] = madd2(x[j], y[1], t[1], C) + C = c5 + t[1] = t1_ + } + { + const [c6, t2_] = madd2(x[j], y[2], t[2], C) + C = c6 + t[2] = t2_ + } + { + const [c7, t3_] = madd2(x[j], y[3], t[3], C) + C = c7 + t[3] = t3_ + } + { + const [c8, t4_] = madd2(x[j], y[4], t[4], C) + C = c8 + t[4] = t4_ + } + { + const [c9, t5_] = madd2(x[j], y[5], t[5], C) + C = c9 + t[5] = t5_ + } + { + const [c10, t6_] = madd2(x[j], y[6], t[6], C) + C = c10 + t[6] = t6_ + } + { + const [c11, t7_] = madd2(x[j], y[7], t[7], C) + C = c11 + t[7] = t7_ + } + { + const [c12, t8_] = madd2(x[j], y[8], t[8], C) + C = c12 + t[8] = t8_ + } + { + const [c13, t9_] = madd2(x[j], y[9], t[9], C) + C = c13 + t[9] = t9_ + } + { + const [c14, t10_] = madd2(x[j], y[10], t[10], C) + C = c14 + t[10] = t10_ + } + { + const [c15, t11_] = madd2(x[j], y[11], t[11], C) + C = c15 + t[11] = t11_ + } + + { + const [t12_, d_] = add64(t[12], C, 0n) + t[12] = t12_ + D = d_ + } + + // m = (t[0]*modInv) mod 2^64 + m = (t[0] * modInv) & MASK_64 + + // partial reduce => 1 limb at a time + { + C = madd0(m, mod[0], t[0]) + + { + const [c16, t1_] = madd2(m, mod[1], t[1], C) + C = c16 + t[0] = t1_ + } + { + const [c17, t2_] = madd2(m, mod[2], t[2], C) + C = c17 + t[1] = t2_ + } + { + const [c18, t3_] = madd2(m, mod[3], t[3], C) + C = c18 + t[2] = t3_ + } + { + const [c19, t4_] = madd2(m, mod[4], t[4], C) + C = c19 + t[3] = t4_ + } + { + const [c20, t5_] = madd2(m, mod[5], t[5], C) + C = c20 + t[4] = t5_ + } + { + const [c21, t6_] = madd2(m, mod[6], t[6], C) + C = c21 + t[5] = t6_ + } + { + const [c22, t7_] = madd2(m, mod[7], t[7], C) + C = c22 + t[6] = t7_ + } + { + const [c23, t8_] = madd2(m, mod[8], t[8], C) + C = c23 + t[7] = t8_ + } + { + const [c24, t9_] = madd2(m, mod[9], t[9], C) + C = c24 + t[8] = t9_ + } + { + const [c25, t10_] = madd2(m, mod[10], t[10], C) + C = c25 + t[9] = t10_ + } + { + const [c26, t11_] = madd2(m, mod[11], t[11], C) + C = c26 + t[10] = t11_ + } + + { + const [t12_, c27] = add64(t[12], C, 0n) + t[11] = t12_ + C = c27 + } + + { + const [t12_, leftover2] = add64(0n, D, C) + t[12] = t12_ + // leftover2 not stored + } + } + } + + // -------------------------------- + // Final subtract => res = t[0..11] - mod[0..11] + // if borrow != 0 && t[12] == 0 => revert => keep t + { + let d_ = 0n + for (let i = 0; i < 12; i++) { + const [ri, bi] = sub64(t[i], mod[i], d_) + res[i] = ri + d_ = bi + } + + if (d_ !== 0n && t[12] === 0n) { + // revert => keep t + for (let i = 0; i < 12; i++) { + out[i] = t[i] & MASK_64 + } + } else { + for (let i = 0; i < 12; i++) { + out[i] = res[i] & MASK_64 + } + } + } +} + +export const mulModPreset: Function[] = [ + montMulMod64, + montMulMod128, + montMulMod192, + montMulMod256, + montMulMod320, + montMulMod384, + montMulMod448, + montMulMod512, + montMulMod576, + montMulMod640, + montMulMod704, + montMulMod768, +] diff --git a/packages/evm/src/evmmax/subMulMod.ts b/packages/evm/src/evmmax/subMulMod.ts new file mode 100644 index 00000000000..b2b94b87584 --- /dev/null +++ b/packages/evm/src/evmmax/subMulMod.ts @@ -0,0 +1,1598 @@ +import { MASK_64, add64, sub64 } from './index.js' + +export function subMod64(z: bigint[], x: bigint[], y: bigint[], mod: bigint[]): void { + let c = 0n + let tmpVal = 0n + + { + const [subLow, subBorrow] = sub64(x[0] & MASK_64, y[0] & MASK_64, c) + tmpVal = subLow + c = subBorrow + } + + let outVal = 0n + let c1 = 0n + { + const [addLow, addCarry] = add64(tmpVal, mod[0] & MASK_64, 0n) + outVal = addLow + c1 = addCarry + } + + let src: bigint + if (c === 0n) { + src = tmpVal + } else { + src = outVal + } + + z[0] = src & MASK_64 +} + +export function subMod128(z: bigint[], x: bigint[], y: bigint[], mod: bigint[]): void { + function add64(a: bigint, b: bigint, carryIn: bigint): [bigint, bigint] { + const sum = a + b + carryIn + const low = sum & MASK_64 + const carryOut = sum >> 64n + return [low, carryOut] + } + + function sub64(a: bigint, b: bigint, borrowIn: bigint): [bigint, bigint] { + let diff = a - b - borrowIn + let borrowOut = 0n + if (diff < 0n) { + diff &= MASK_64 + borrowOut = 1n + } + return [diff & MASK_64, borrowOut] + } + + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + + { + const [diffLow, borrowOut] = sub64(x[0], y[0], c) + tmp0 = diffLow + c = borrowOut + } + { + const [diffLow, borrowOut] = sub64(x[1], y[1], c) + tmp1 = diffLow + c = borrowOut + } + + let c1 = 0n + let out0 = 0n + let out1 = 0n + + { + const [sumLow, carryOut] = add64(tmp0, mod[0], c1) + out0 = sumLow + c1 = carryOut + } + { + const [sumLow, carryOut] = add64(tmp1, mod[1], c1) + out1 = sumLow + c1 = carryOut + } + + if (c === 0n) { + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + } else { + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + } +} + +/** + * subMod192: + * z = (x - y) mod modulus + * + * x, y, modulus, and z are each 3-limb arrays => [3] + * (192 bits total). We: + * 1) subtract y from x => tmp0..2 + * 2) add modulus => out0..2 + * 3) if no borrow in step 1 => z=tmp, else => z=out + */ +export function subMod192( + z: bigint[], // [3], final result + x: bigint[], // [3], minuend + y: bigint[], // [3], subtrahend + mod: bigint[], // [3], modulus +): void { + // Step 1: Subtract y from x across 3 limbs + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + + { + const [diffLow, borrowOut] = sub64(x[0] & MASK_64, y[0] & MASK_64, c) + tmp0 = diffLow + c = borrowOut + } + { + const [diffLow, borrowOut] = sub64(x[1] & MASK_64, y[1] & MASK_64, c) + tmp1 = diffLow + c = borrowOut + } + { + const [diffLow, borrowOut] = sub64(x[2] & MASK_64, y[2] & MASK_64, c) + tmp2 = diffLow + c = borrowOut + } + + // Step 2: Add modulus => out0..2 + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + + { + const [sumLow, carryOut] = add64(tmp0, mod[0] & MASK_64, c1) + out0 = sumLow + c1 = carryOut + } + { + const [sumLow, carryOut] = add64(tmp1, mod[1] & MASK_64, c1) + out1 = sumLow + c1 = carryOut + } + { + const [sumLow, carryOut] = add64(tmp2, mod[2] & MASK_64, c1) + out2 = sumLow + c1 = carryOut + } + + // Step 3: If no borrow from subtract => x>=y => z=tmp, else z=out + if (c === 0n) { + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + } else { + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + } +} + +/** + * subMod256: + * z = (x - y) mod modulus + * + * Each of x, y, mod, z are 4 limbs (256 bits). We unroll the limb-by-limb + * subtract and the subsequent add of the modulus. + */ +export function subMod256( + z: bigint[], // [4] => final 256-bit result + x: bigint[], // [4] => minuend + y: bigint[], // [4] => subtrahend + mod: bigint[], // [4] => 256-bit modulus +): void { + // Step 1: Subtract y from x across 4 limbs + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + let tmp3 = 0n + + // Limb 0 + { + const [diff, borrow] = sub64(x[0] & MASK_64, y[0] & MASK_64, c) + tmp0 = diff + c = borrow + } + // Limb 1 + { + const [diff, borrow] = sub64(x[1] & MASK_64, y[1] & MASK_64, c) + tmp1 = diff + c = borrow + } + // Limb 2 + { + const [diff, borrow] = sub64(x[2] & MASK_64, y[2] & MASK_64, c) + tmp2 = diff + c = borrow + } + // Limb 3 + { + const [diff, borrow] = sub64(x[3] & MASK_64, y[3] & MASK_64, c) + tmp3 = diff + c = borrow + } + + // Step 2: Add the modulus => out0..3 + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + let out3 = 0n + + // Limb 0 + { + const [sumLow, carry] = add64(tmp0, mod[0] & MASK_64, c1) + out0 = sumLow + c1 = carry + } + // Limb 1 + { + const [sumLow, carry] = add64(tmp1, mod[1] & MASK_64, c1) + out1 = sumLow + c1 = carry + } + // Limb 2 + { + const [sumLow, carry] = add64(tmp2, mod[2] & MASK_64, c1) + out2 = sumLow + c1 = carry + } + // Limb 3 + { + const [sumLow, carry] = add64(tmp3, mod[3] & MASK_64, c1) + out3 = sumLow + c1 = carry + } + + // Step 3: If no borrow from initial subtract => x>=y => z=tmp, else => z=out + // Then mask each limb to 64 bits + if (c === 0n) { + // revert to tmp + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + z[3] = tmp3 & MASK_64 + } else { + // keep out + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + z[3] = out3 & MASK_64 + } +} + +/** + * subMod320: + * z = (x - y) mod modulus + * + * Each of x, y, mod, z is 5 limbs => 320 bits. We: + * - subtract y from x (unrolled), + * - add modulus, + * - if no borrow => keep subtract result, else keep the sum with modulus. + */ +export function subMod320( + z: bigint[], // [5], final result + x: bigint[], // [5], minuend + y: bigint[], // [5], subtrahend + mod: bigint[], // [5], 320-bit modulus +): void { + // Step 1: Subtract y from x across 5 limbs + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + let tmp3 = 0n + let tmp4 = 0n + + // Limb 0 + { + const [diff, borrow] = sub64(x[0] & MASK_64, y[0] & MASK_64, c) + tmp0 = diff + c = borrow + } + // Limb 1 + { + const [diff, borrow] = sub64(x[1] & MASK_64, y[1] & MASK_64, c) + tmp1 = diff + c = borrow + } + // Limb 2 + { + const [diff, borrow] = sub64(x[2] & MASK_64, y[2] & MASK_64, c) + tmp2 = diff + c = borrow + } + // Limb 3 + { + const [diff, borrow] = sub64(x[3] & MASK_64, y[3] & MASK_64, c) + tmp3 = diff + c = borrow + } + // Limb 4 + { + const [diff, borrow] = sub64(x[4] & MASK_64, y[4] & MASK_64, c) + tmp4 = diff + c = borrow + } + + // Step 2: Add modulus => out0..4 + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + let out3 = 0n + let out4 = 0n + + // Limb 0 + { + const [sumLow, carry] = add64(tmp0, mod[0] & MASK_64, c1) + out0 = sumLow + c1 = carry + } + // Limb 1 + { + const [sumLow, carry] = add64(tmp1, mod[1] & MASK_64, c1) + out1 = sumLow + c1 = carry + } + // Limb 2 + { + const [sumLow, carry] = add64(tmp2, mod[2] & MASK_64, c1) + out2 = sumLow + c1 = carry + } + // Limb 3 + { + const [sumLow, carry] = add64(tmp3, mod[3] & MASK_64, c1) + out3 = sumLow + c1 = carry + } + // Limb 4 + { + const [sumLow, carry] = add64(tmp4, mod[4] & MASK_64, c1) + out4 = sumLow + c1 = carry + } + + // Step 3: If no borrow => x>=y => z=tmp, else => z=out. Then mask each limb. + if (c === 0n) { + // revert to tmp + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + z[3] = tmp3 & MASK_64 + z[4] = tmp4 & MASK_64 + } else { + // keep out + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + z[3] = out3 & MASK_64 + z[4] = out4 & MASK_64 + } +} + +/** + * subMod384: + * z = (x - y) mod mod + * + * x, y, mod, z each have 6 limbs => 384 bits total. + * Unrolled approach: + * 1) subtract y from x => store in tmp + * 2) add mod => store in out + * 3) if no borrow => keep tmp, else keep out + */ +export function subMod384( + z: bigint[], // [6] => final 384-bit result + x: bigint[], // [6], minuend + y: bigint[], // [6], subtrahend + mod: bigint[], // [6], the 384-bit modulus +): void { + // Step 1: Subtract y from x across 6 limbs + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + let tmp3 = 0n + let tmp4 = 0n + let tmp5 = 0n + + // Limb 0 + { + const [diff, borrow] = sub64(x[0] & MASK_64, y[0] & MASK_64, c) + tmp0 = diff + c = borrow + } + // Limb 1 + { + const [diff, borrow] = sub64(x[1] & MASK_64, y[1] & MASK_64, c) + tmp1 = diff + c = borrow + } + // Limb 2 + { + const [diff, borrow] = sub64(x[2] & MASK_64, y[2] & MASK_64, c) + tmp2 = diff + c = borrow + } + // Limb 3 + { + const [diff, borrow] = sub64(x[3] & MASK_64, y[3] & MASK_64, c) + tmp3 = diff + c = borrow + } + // Limb 4 + { + const [diff, borrow] = sub64(x[4] & MASK_64, y[4] & MASK_64, c) + tmp4 = diff + c = borrow + } + // Limb 5 + { + const [diff, borrow] = sub64(x[5] & MASK_64, y[5] & MASK_64, c) + tmp5 = diff + c = borrow + } + + // Step 2: Add the modulus => out0..5 + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + let out3 = 0n + let out4 = 0n + let out5 = 0n + + // Limb 0 + { + const [sumLow, carry] = add64(tmp0, mod[0] & MASK_64, c1) + out0 = sumLow + c1 = carry + } + // Limb 1 + { + const [sumLow, carry] = add64(tmp1, mod[1] & MASK_64, c1) + out1 = sumLow + c1 = carry + } + // Limb 2 + { + const [sumLow, carry] = add64(tmp2, mod[2] & MASK_64, c1) + out2 = sumLow + c1 = carry + } + // Limb 3 + { + const [sumLow, carry] = add64(tmp3, mod[3] & MASK_64, c1) + out3 = sumLow + c1 = carry + } + // Limb 4 + { + const [sumLow, carry] = add64(tmp4, mod[4] & MASK_64, c1) + out4 = sumLow + c1 = carry + } + // Limb 5 + { + const [sumLow, carry] = add64(tmp5, mod[5] & MASK_64, c1) + out5 = sumLow + c1 = carry + } + + // Step 3: If no borrow => x>=y => z=tmp, else => z=out. Then mask each limb + if (c === 0n) { + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + z[3] = tmp3 & MASK_64 + z[4] = tmp4 & MASK_64 + z[5] = tmp5 & MASK_64 + } else { + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + z[3] = out3 & MASK_64 + z[4] = out4 & MASK_64 + z[5] = out5 & MASK_64 + } +} + +/** + * subMod448: + * z = (x - y) mod modulus + * + * x, y, mod, z each are 7 limbs => 448 bits total. + * We: + * 1) subtract y from x (unrolled), + * 2) add modulus, + * 3) if no borrow => keep tmp, else keep out + * 4) mask each limb to 64 bits. + */ +export function subMod448( + z: bigint[], // [7] => final 448-bit result + x: bigint[], // [7], minuend + y: bigint[], // [7], subtrahend + mod: bigint[], // [7], the 448-bit modulus +): void { + // Step 1: Subtract y from x across 7 limbs + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + let tmp3 = 0n + let tmp4 = 0n + let tmp5 = 0n + let tmp6 = 0n + + // Limb 0 + { + const [diff, borrow] = sub64(x[0], y[0], c) + tmp0 = diff + c = borrow + } + // Limb 1 + { + const [diff, borrow] = sub64(x[1], y[1], c) + tmp1 = diff + c = borrow + } + // Limb 2 + { + const [diff, borrow] = sub64(x[2], y[2], c) + tmp2 = diff + c = borrow + } + // Limb 3 + { + const [diff, borrow] = sub64(x[3], y[3], c) + tmp3 = diff + c = borrow + } + // Limb 4 + { + const [diff, borrow] = sub64(x[4], y[4], c) + tmp4 = diff + c = borrow + } + // Limb 5 + { + const [diff, borrow] = sub64(x[5], y[5], c) + tmp5 = diff + c = borrow + } + // Limb 6 + { + const [diff, borrow] = sub64(x[6], y[6], c) + tmp6 = diff + c = borrow + } + + // Step 2: Add the modulus => out0..6 + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + let out3 = 0n + let out4 = 0n + let out5 = 0n + let out6 = 0n + + // Limb 0 + { + const [sumLow, carry] = add64(tmp0, mod[0], c1) + out0 = sumLow + c1 = carry + } + // Limb 1 + { + const [sumLow, carry] = add64(tmp1, mod[1], c1) + out1 = sumLow + c1 = carry + } + // Limb 2 + { + const [sumLow, carry] = add64(tmp2, mod[2], c1) + out2 = sumLow + c1 = carry + } + // Limb 3 + { + const [sumLow, carry] = add64(tmp3, mod[3], c1) + out3 = sumLow + c1 = carry + } + // Limb 4 + { + const [sumLow, carry] = add64(tmp4, mod[4], c1) + out4 = sumLow + c1 = carry + } + // Limb 5 + { + const [sumLow, carry] = add64(tmp5, mod[5], c1) + out5 = sumLow + c1 = carry + } + // Limb 6 + { + const [sumLow, carry] = add64(tmp6, mod[6], c1) + out6 = sumLow + c1 = carry + } + + // Step 3: If no borrow => x >= y => revert to tmp, else => out + if (c === 0n) { + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + z[3] = tmp3 & MASK_64 + z[4] = tmp4 & MASK_64 + z[5] = tmp5 & MASK_64 + z[6] = tmp6 & MASK_64 + } else { + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + z[3] = out3 & MASK_64 + z[4] = out4 & MASK_64 + z[5] = out5 & MASK_64 + z[6] = out6 & MASK_64 + } +} + +/** + * subMod512: + * z = (x - y) mod modulus + * + * x, y, mod, z each are arrays of length 8 => 512 bits total. + * We unroll the subtract of y from x, then add the modulus, + * deciding which result to keep based on whether we borrowed. + */ +export function subMod512( + z: bigint[], // [8], final 512-bit result + x: bigint[], // [8], minuend + y: bigint[], // [8], subtrahend + mod: bigint[], // [8], the 512-bit modulus +): void { + // Step 1: Subtract y from x across 8 limbs + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + let tmp3 = 0n + let tmp4 = 0n + let tmp5 = 0n + let tmp6 = 0n + let tmp7 = 0n + + // Limb 0 + { + const [diff, borrow] = sub64(x[0], y[0], c) + tmp0 = diff + c = borrow + } + // Limb 1 + { + const [diff, borrow] = sub64(x[1], y[1], c) + tmp1 = diff + c = borrow + } + // Limb 2 + { + const [diff, borrow] = sub64(x[2], y[2], c) + tmp2 = diff + c = borrow + } + // Limb 3 + { + const [diff, borrow] = sub64(x[3], y[3], c) + tmp3 = diff + c = borrow + } + // Limb 4 + { + const [diff, borrow] = sub64(x[4], y[4], c) + tmp4 = diff + c = borrow + } + // Limb 5 + { + const [diff, borrow] = sub64(x[5], y[5], c) + tmp5 = diff + c = borrow + } + // Limb 6 + { + const [diff, borrow] = sub64(x[6], y[6], c) + tmp6 = diff + c = borrow + } + // Limb 7 + { + const [diff, borrow] = sub64(x[7], y[7], c) + tmp7 = diff + c = borrow + } + + // Step 2: Add the modulus => out0..7 + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + let out3 = 0n + let out4 = 0n + let out5 = 0n + let out6 = 0n + let out7 = 0n + + // Limb 0 + { + const [sumLow, carry] = add64(tmp0, mod[0], c1) + out0 = sumLow + c1 = carry + } + // Limb 1 + { + const [sumLow, carry] = add64(tmp1, mod[1], c1) + out1 = sumLow + c1 = carry + } + // Limb 2 + { + const [sumLow, carry] = add64(tmp2, mod[2], c1) + out2 = sumLow + c1 = carry + } + // Limb 3 + { + const [sumLow, carry] = add64(tmp3, mod[3], c1) + out3 = sumLow + c1 = carry + } + // Limb 4 + { + const [sumLow, carry] = add64(tmp4, mod[4], c1) + out4 = sumLow + c1 = carry + } + // Limb 5 + { + const [sumLow, carry] = add64(tmp5, mod[5], c1) + out5 = sumLow + c1 = carry + } + // Limb 6 + { + const [sumLow, carry] = add64(tmp6, mod[6], c1) + out6 = sumLow + c1 = carry + } + // Limb 7 + { + const [sumLow, carry] = add64(tmp7, mod[7], c1) + out7 = sumLow + c1 = carry + } + + // Step 3: If no borrow => x >= y => revert to tmp, else => out + // Then mask each limb to 64 bits + if (c === 0n) { + // revert to tmp + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + z[3] = tmp3 & MASK_64 + z[4] = tmp4 & MASK_64 + z[5] = tmp5 & MASK_64 + z[6] = tmp6 & MASK_64 + z[7] = tmp7 & MASK_64 + } else { + // keep out + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + z[3] = out3 & MASK_64 + z[4] = out4 & MASK_64 + z[5] = out5 & MASK_64 + z[6] = out6 & MASK_64 + z[7] = out7 & MASK_64 + } +} + +/** + * subMod576: + * z = (x - y) mod mod + * + * Each of x, y, mod, z is 9 limbs => 576 bits total. + * We: + * 1) subtract y from x (unrolled), + * 2) add modulus, + * 3) if no borrow => keep tmp, else keep out + * 4) mask each limb to 64 bits. + */ +export function subMod576( + z: bigint[], // [9] => final 576-bit result + x: bigint[], // [9], minuend + y: bigint[], // [9], subtrahend + mod: bigint[], // [9], the 576-bit modulus +): void { + // Step 1: Subtract y from x across 9 limbs + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + let tmp3 = 0n + let tmp4 = 0n + let tmp5 = 0n + let tmp6 = 0n + let tmp7 = 0n + let tmp8 = 0n + + // Limb 0 + { + const [diff, borrow] = sub64(x[0], y[0], c) + tmp0 = diff + c = borrow + } + // Limb 1 + { + const [diff, borrow] = sub64(x[1], y[1], c) + tmp1 = diff + c = borrow + } + // Limb 2 + { + const [diff, borrow] = sub64(x[2], y[2], c) + tmp2 = diff + c = borrow + } + // Limb 3 + { + const [diff, borrow] = sub64(x[3], y[3], c) + tmp3 = diff + c = borrow + } + // Limb 4 + { + const [diff, borrow] = sub64(x[4], y[4], c) + tmp4 = diff + c = borrow + } + // Limb 5 + { + const [diff, borrow] = sub64(x[5], y[5], c) + tmp5 = diff + c = borrow + } + // Limb 6 + { + const [diff, borrow] = sub64(x[6], y[6], c) + tmp6 = diff + c = borrow + } + // Limb 7 + { + const [diff, borrow] = sub64(x[7], y[7], c) + tmp7 = diff + c = borrow + } + // Limb 8 + { + const [diff, borrow] = sub64(x[8], y[8], c) + tmp8 = diff + c = borrow + } + + // Step 2: Add the modulus => out0..8 + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + let out3 = 0n + let out4 = 0n + let out5 = 0n + let out6 = 0n + let out7 = 0n + let out8 = 0n + + { + const [sumLow, carry] = add64(tmp0, mod[0], c1) + out0 = sumLow + c1 = carry + } + { + const [sumLow, carry] = add64(tmp1, mod[1], c1) + out1 = sumLow + c1 = carry + } + { + const [sumLow, carry] = add64(tmp2, mod[2], c1) + out2 = sumLow + c1 = carry + } + { + const [sumLow, carry] = add64(tmp3, mod[3], c1) + out3 = sumLow + c1 = carry + } + { + const [sumLow, carry] = add64(tmp4, mod[4], c1) + out4 = sumLow + c1 = carry + } + { + const [sumLow, carry] = add64(tmp5, mod[5], c1) + out5 = sumLow + c1 = carry + } + { + const [sumLow, carry] = add64(tmp6, mod[6], c1) + out6 = sumLow + c1 = carry + } + { + const [sumLow, carry] = add64(tmp7, mod[7], c1) + out7 = sumLow + c1 = carry + } + { + const [sumLow, carry] = add64(tmp8, mod[8], c1) + out8 = sumLow + c1 = carry + } + + // Step 3: If no borrow => x >= y => revert to tmp, else => out + // Then mask each limb to 64 bits + if (c === 0n) { + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + z[3] = tmp3 & MASK_64 + z[4] = tmp4 & MASK_64 + z[5] = tmp5 & MASK_64 + z[6] = tmp6 & MASK_64 + z[7] = tmp7 & MASK_64 + z[8] = tmp8 & MASK_64 + } else { + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + z[3] = out3 & MASK_64 + z[4] = out4 & MASK_64 + z[5] = out5 & MASK_64 + z[6] = out6 & MASK_64 + z[7] = out7 & MASK_64 + z[8] = out8 & MASK_64 + } +} + +/** + * subMod640: + * z = (x - y) mod mod + * + * x, y, mod, z each have 10 limbs => 640 bits total. + * Unrolled approach: + * 1) subtract y from x across 10 limbs => tmp + * 2) add modulus => out + * 3) if no borrow => keep tmp, else keep out + * 4) mask each limb to 64 bits + */ +export function subMod640( + z: bigint[], // [10] => final 640-bit result + x: bigint[], // [10], minuend + y: bigint[], // [10], subtrahend + mod: bigint[], // [10], the 640-bit modulus +): void { + // Step 1: Subtract y from x (10 limbs) + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + let tmp3 = 0n + let tmp4 = 0n + let tmp5 = 0n + let tmp6 = 0n + let tmp7 = 0n + let tmp8 = 0n + let tmp9 = 0n + + // Limb 0 + { + const [diff, borrow] = sub64(x[0], y[0], c) + tmp0 = diff + c = borrow + } + // Limb 1 + { + const [diff, borrow] = sub64(x[1], y[1], c) + tmp1 = diff + c = borrow + } + // Limb 2 + { + const [diff, borrow] = sub64(x[2], y[2], c) + tmp2 = diff + c = borrow + } + // Limb 3 + { + const [diff, borrow] = sub64(x[3], y[3], c) + tmp3 = diff + c = borrow + } + // Limb 4 + { + const [diff, borrow] = sub64(x[4], y[4], c) + tmp4 = diff + c = borrow + } + // Limb 5 + { + const [diff, borrow] = sub64(x[5], y[5], c) + tmp5 = diff + c = borrow + } + // Limb 6 + { + const [diff, borrow] = sub64(x[6], y[6], c) + tmp6 = diff + c = borrow + } + // Limb 7 + { + const [diff, borrow] = sub64(x[7], y[7], c) + tmp7 = diff + c = borrow + } + // Limb 8 + { + const [diff, borrow] = sub64(x[8], y[8], c) + tmp8 = diff + c = borrow + } + // Limb 9 + { + const [diff, borrow] = sub64(x[9], y[9], c) + tmp9 = diff + c = borrow + } + + // Step 2: Add the modulus => out0..9 + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + let out3 = 0n + let out4 = 0n + let out5 = 0n + let out6 = 0n + let out7 = 0n + let out8 = 0n + let out9 = 0n + + // Limb 0 + { + const [sumLow, carry] = add64(tmp0, mod[0], c1) + out0 = sumLow + c1 = carry + } + // Limb 1 + { + const [sumLow, carry] = add64(tmp1, mod[1], c1) + out1 = sumLow + c1 = carry + } + // Limb 2 + { + const [sumLow, carry] = add64(tmp2, mod[2], c1) + out2 = sumLow + c1 = carry + } + // Limb 3 + { + const [sumLow, carry] = add64(tmp3, mod[3], c1) + out3 = sumLow + c1 = carry + } + // Limb 4 + { + const [sumLow, carry] = add64(tmp4, mod[4], c1) + out4 = sumLow + c1 = carry + } + // Limb 5 + { + const [sumLow, carry] = add64(tmp5, mod[5], c1) + out5 = sumLow + c1 = carry + } + // Limb 6 + { + const [sumLow, carry] = add64(tmp6, mod[6], c1) + out6 = sumLow + c1 = carry + } + // Limb 7 + { + const [sumLow, carry] = add64(tmp7, mod[7], c1) + out7 = sumLow + c1 = carry + } + // Limb 8 + { + const [sumLow, carry] = add64(tmp8, mod[8], c1) + out8 = sumLow + c1 = carry + } + // Limb 9 + { + const [sumLow, carry] = add64(tmp9, mod[9], c1) + out9 = sumLow + c1 = carry + } + + // Step 3: If c=0 => x>=y => revert to tmp, else => out + // Then mask each limb to 64 bits + if (c === 0n) { + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + z[3] = tmp3 & MASK_64 + z[4] = tmp4 & MASK_64 + z[5] = tmp5 & MASK_64 + z[6] = tmp6 & MASK_64 + z[7] = tmp7 & MASK_64 + z[8] = tmp8 & MASK_64 + z[9] = tmp9 & MASK_64 + } else { + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + z[3] = out3 & MASK_64 + z[4] = out4 & MASK_64 + z[5] = out5 & MASK_64 + z[6] = out6 & MASK_64 + z[7] = out7 & MASK_64 + z[8] = out8 & MASK_64 + z[9] = out9 & MASK_64 + } +} + +/** + * subMod704: + * z = (x - y) mod mod + * + * x, y, mod, z each have 11 limbs => 704 bits total. + * We: + * 1) subtract y from x (unrolled), + * 2) add modulus, + * 3) if no borrow => keep tmp, else keep out + * 4) mask each limb to 64 bits + */ +export function subMod704( + z: bigint[], // [11] => final 704-bit result + x: bigint[], // [11], minuend + y: bigint[], // [11], subtrahend + mod: bigint[], // [11], the 704-bit modulus +): void { + // Step 1: Subtract y from x across 11 limbs + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + let tmp3 = 0n + let tmp4 = 0n + let tmp5 = 0n + let tmp6 = 0n + let tmp7 = 0n + let tmp8 = 0n + let tmp9 = 0n + let tmp10 = 0n + + // Limb 0 + { + const [diff, borrow] = sub64(x[0], y[0], c) + tmp0 = diff + c = borrow + } + // Limb 1 + { + const [diff, borrow] = sub64(x[1], y[1], c) + tmp1 = diff + c = borrow + } + // Limb 2 + { + const [diff, borrow] = sub64(x[2], y[2], c) + tmp2 = diff + c = borrow + } + // Limb 3 + { + const [diff, borrow] = sub64(x[3], y[3], c) + tmp3 = diff + c = borrow + } + // Limb 4 + { + const [diff, borrow] = sub64(x[4], y[4], c) + tmp4 = diff + c = borrow + } + // Limb 5 + { + const [diff, borrow] = sub64(x[5], y[5], c) + tmp5 = diff + c = borrow + } + // Limb 6 + { + const [diff, borrow] = sub64(x[6], y[6], c) + tmp6 = diff + c = borrow + } + // Limb 7 + { + const [diff, borrow] = sub64(x[7], y[7], c) + tmp7 = diff + c = borrow + } + // Limb 8 + { + const [diff, borrow] = sub64(x[8], y[8], c) + tmp8 = diff + c = borrow + } + // Limb 9 + { + const [diff, borrow] = sub64(x[9], y[9], c) + tmp9 = diff + c = borrow + } + // Limb 10 + { + const [diff, borrow] = sub64(x[10], y[10], c) + tmp10 = diff + c = borrow + } + + // Step 2: Add the modulus => out0..10 + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + let out3 = 0n + let out4 = 0n + let out5 = 0n + let out6 = 0n + let out7 = 0n + let out8 = 0n + let out9 = 0n + let out10 = 0n + + // Limb 0 + { + const [sumLow, carry] = add64(tmp0, mod[0], c1) + out0 = sumLow + c1 = carry + } + // Limb 1 + { + const [sumLow, carry] = add64(tmp1, mod[1], c1) + out1 = sumLow + c1 = carry + } + // Limb 2 + { + const [sumLow, carry] = add64(tmp2, mod[2], c1) + out2 = sumLow + c1 = carry + } + // Limb 3 + { + const [sumLow, carry] = add64(tmp3, mod[3], c1) + out3 = sumLow + c1 = carry + } + // Limb 4 + { + const [sumLow, carry] = add64(tmp4, mod[4], c1) + out4 = sumLow + c1 = carry + } + // Limb 5 + { + const [sumLow, carry] = add64(tmp5, mod[5], c1) + out5 = sumLow + c1 = carry + } + // Limb 6 + { + const [sumLow, carry] = add64(tmp6, mod[6], c1) + out6 = sumLow + c1 = carry + } + // Limb 7 + { + const [sumLow, carry] = add64(tmp7, mod[7], c1) + out7 = sumLow + c1 = carry + } + // Limb 8 + { + const [sumLow, carry] = add64(tmp8, mod[8], c1) + out8 = sumLow + c1 = carry + } + // Limb 9 + { + const [sumLow, carry] = add64(tmp9, mod[9], c1) + out9 = sumLow + c1 = carry + } + // Limb 10 + { + const [sumLow, carry] = add64(tmp10, mod[10], c1) + out10 = sumLow + c1 = carry + } + + // Step 3: If no borrow => x>=y => revert to tmp, else => out + if (c === 0n) { + // revert to tmp + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + z[3] = tmp3 & MASK_64 + z[4] = tmp4 & MASK_64 + z[5] = tmp5 & MASK_64 + z[6] = tmp6 & MASK_64 + z[7] = tmp7 & MASK_64 + z[8] = tmp8 & MASK_64 + z[9] = tmp9 & MASK_64 + z[10] = tmp10 & MASK_64 + } else { + // keep out + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + z[3] = out3 & MASK_64 + z[4] = out4 & MASK_64 + z[5] = out5 & MASK_64 + z[6] = out6 & MASK_64 + z[7] = out7 & MASK_64 + z[8] = out8 & MASK_64 + z[9] = out9 & MASK_64 + z[10] = out10 & MASK_64 + } +} + +/** + * subMod768: + * z = (x - y) mod mod + * + * x, y, mod, z each have 12 limbs => 768 bits total. + * Unrolled approach: + * 1) subtract y from x (12 limbs) => tmp + * 2) add modulus => out + * 3) if no borrow => x>=y => revert to tmp, else => out + * 4) mask each limb to 64 bits + */ +export function subMod768( + z: bigint[], // [12] => final 768-bit result + x: bigint[], // [12], minuend + y: bigint[], // [12], subtrahend + mod: bigint[], // [12], the 768-bit modulus +): void { + // Step 1: Subtract y from x across 12 limbs + let c = 0n + let tmp0 = 0n + let tmp1 = 0n + let tmp2 = 0n + let tmp3 = 0n + let tmp4 = 0n + let tmp5 = 0n + let tmp6 = 0n + let tmp7 = 0n + let tmp8 = 0n + let tmp9 = 0n + let tmp10 = 0n + let tmp11 = 0n + + // Limb 0 + { + const [diff, borrow] = sub64(x[0], y[0], c) + tmp0 = diff + c = borrow + } + // Limb 1 + { + const [diff, borrow] = sub64(x[1], y[1], c) + tmp1 = diff + c = borrow + } + // Limb 2 + { + const [diff, borrow] = sub64(x[2], y[2], c) + tmp2 = diff + c = borrow + } + // Limb 3 + { + const [diff, borrow] = sub64(x[3], y[3], c) + tmp3 = diff + c = borrow + } + // Limb 4 + { + const [diff, borrow] = sub64(x[4], y[4], c) + tmp4 = diff + c = borrow + } + // Limb 5 + { + const [diff, borrow] = sub64(x[5], y[5], c) + tmp5 = diff + c = borrow + } + // Limb 6 + { + const [diff, borrow] = sub64(x[6], y[6], c) + tmp6 = diff + c = borrow + } + // Limb 7 + { + const [diff, borrow] = sub64(x[7], y[7], c) + tmp7 = diff + c = borrow + } + // Limb 8 + { + const [diff, borrow] = sub64(x[8], y[8], c) + tmp8 = diff + c = borrow + } + // Limb 9 + { + const [diff, borrow] = sub64(x[9], y[9], c) + tmp9 = diff + c = borrow + } + // Limb 10 + { + const [diff, borrow] = sub64(x[10], y[10], c) + tmp10 = diff + c = borrow + } + // Limb 11 + { + const [diff, borrow] = sub64(x[11], y[11], c) + tmp11 = diff + c = borrow + } + + // Step 2: Add the modulus => out0..11 + let c1 = 0n + let out0 = 0n + let out1 = 0n + let out2 = 0n + let out3 = 0n + let out4 = 0n + let out5 = 0n + let out6 = 0n + let out7 = 0n + let out8 = 0n + let out9 = 0n + let out10 = 0n + let out11 = 0n + + // Limb 0 + { + const [sumLow, carry] = add64(tmp0, mod[0], c1) + out0 = sumLow + c1 = carry + } + // Limb 1 + { + const [sumLow, carry] = add64(tmp1, mod[1], c1) + out1 = sumLow + c1 = carry + } + // Limb 2 + { + const [sumLow, carry] = add64(tmp2, mod[2], c1) + out2 = sumLow + c1 = carry + } + // Limb 3 + { + const [sumLow, carry] = add64(tmp3, mod[3], c1) + out3 = sumLow + c1 = carry + } + // Limb 4 + { + const [sumLow, carry] = add64(tmp4, mod[4], c1) + out4 = sumLow + c1 = carry + } + // Limb 5 + { + const [sumLow, carry] = add64(tmp5, mod[5], c1) + out5 = sumLow + c1 = carry + } + // Limb 6 + { + const [sumLow, carry] = add64(tmp6, mod[6], c1) + out6 = sumLow + c1 = carry + } + // Limb 7 + { + const [sumLow, carry] = add64(tmp7, mod[7], c1) + out7 = sumLow + c1 = carry + } + // Limb 8 + { + const [sumLow, carry] = add64(tmp8, mod[8], c1) + out8 = sumLow + c1 = carry + } + // Limb 9 + { + const [sumLow, carry] = add64(tmp9, mod[9], c1) + out9 = sumLow + c1 = carry + } + // Limb 10 + { + const [sumLow, carry] = add64(tmp10, mod[10], c1) + out10 = sumLow + c1 = carry + } + // Limb 11 + { + const [sumLow, carry] = add64(tmp11, mod[11], c1) + out11 = sumLow + c1 = carry + } + + // Step 3: If c=0 => x>=y => revert to tmp, else => out + // Then mask each limb to 64 bits + if (c === 0n) { + z[0] = tmp0 & MASK_64 + z[1] = tmp1 & MASK_64 + z[2] = tmp2 & MASK_64 + z[3] = tmp3 & MASK_64 + z[4] = tmp4 & MASK_64 + z[5] = tmp5 & MASK_64 + z[6] = tmp6 & MASK_64 + z[7] = tmp7 & MASK_64 + z[8] = tmp8 & MASK_64 + z[9] = tmp9 & MASK_64 + z[10] = tmp10 & MASK_64 + z[11] = tmp11 & MASK_64 + } else { + z[0] = out0 & MASK_64 + z[1] = out1 & MASK_64 + z[2] = out2 & MASK_64 + z[3] = out3 & MASK_64 + z[4] = out4 & MASK_64 + z[5] = out5 & MASK_64 + z[6] = out6 & MASK_64 + z[7] = out7 & MASK_64 + z[8] = out8 & MASK_64 + z[9] = out9 & MASK_64 + z[10] = out10 & MASK_64 + z[11] = out11 & MASK_64 + } +} + +export const subModPreset: Function[] = [ + subMod64, + subMod128, + subMod192, + subMod256, + subMod320, + subMod384, + subMod448, + subMod512, + subMod576, + subMod640, + subMod704, + subMod768, +] diff --git a/packages/evm/src/evmmax/util.ts b/packages/evm/src/evmmax/util.ts new file mode 100644 index 00000000000..5783feb0dcf --- /dev/null +++ b/packages/evm/src/evmmax/util.ts @@ -0,0 +1,329 @@ +import { bytesToHex } from '@ethereumjs/util' + +export const MASK_64 = (1n << 64n) - 1n + +/** + * Places the lower 64 bits of a bigint interpreted as a byte array in a destination byte array starting at offset + * + * @param dst destination byte array to put bytes into + * @param offset offset to start putting bytes from + * @param value bigint whose lower 64 bits are to be interpreted as big endian bytes and put into destination from offset + */ +export function putUint64BE(dst: Uint8Array, offset: number, value: bigint): void { + value = BigInt.asUintN(64, value) + const hex = value.toString(16).padStart(16, '0') + for (let i = 0; i < 8; i++) { + dst[offset + i] = parseInt(hex.slice(i * 2, i * 2 + 2), 16) + } +} + +/** + * Computes the negative modular inverse of mod modulo 2^64, + * i.e. returns (-mod^-1) mod 2^64. + * + * This uses a Newton-like iteration (based on the Go standard library + * approach) to find the 64-bit inverse of mod, then negates it + * modulo 2^64. Commonly used in single-limb Montgomery multiplication + * to get (-mod^-1) mod 2^64. + * + * @param mod A 64-bit bigint value to invert (mod must be < 2^64). + * @returns The negative inverse of mod, i.e. (-1 * mod^-1) mod 2^64. + */ +export function negModInverse(mod: bigint): bigint { + let k0 = (2n - mod) & MASK_64 + let t = (mod - 1n) & MASK_64 + + for (let i = 1; i < 64; i <<= 1) { + t = (t * t) & MASK_64 + k0 = (k0 * ((t + 1n) & MASK_64)) & MASK_64 + } + k0 = -k0 & MASK_64 + + return k0 +} + +/** + * Converts a big-endian byte array into an array of 64-bit limbs (bigints) + * in little-endian limb order. + * + * @param b A Uint8Array of bytes in big-endian order (most significant byte first). + * @returns An array of 64-bit bigints (limbs) in little-endian order. + */ +export function bytesToLimbs(b: Uint8Array): bigint[] { + // Determine how many 64-bit words (limbs) are needed to hold b + const wordCount = Math.ceil(b.length / 8) + const paddedSize = wordCount * 8 + + // Zero-pads b on the left (most-significant bytes) if needed + const paddedBytes = new Uint8Array(paddedSize) + paddedBytes.set(b, paddedSize - b.length) + + // Reads each 8-byte block as a 64-bit big-endian integer + const limbs: bigint[] = new Array(wordCount) + for (let i = 0; i < wordCount; i++) { + const offset = i * 8 + // Construct the 64-bit limb as a bigint + const limb = + (BigInt(paddedBytes[offset]) << 56n) | + (BigInt(paddedBytes[offset + 1]) << 48n) | + (BigInt(paddedBytes[offset + 2]) << 40n) | + (BigInt(paddedBytes[offset + 3]) << 32n) | + (BigInt(paddedBytes[offset + 4]) << 24n) | + (BigInt(paddedBytes[offset + 5]) << 16n) | + (BigInt(paddedBytes[offset + 6]) << 8n) | + BigInt(paddedBytes[offset + 7]) + limbs[i] = limb + } + + // Reverse the limbs to get little-endian + limbs.reverse() + + return limbs +} + +/** + * Converts an array of 64-bit limbs (bigints) in little-endian limb order + * into a big-endian byte array, then removes leading zeros. + * + * @param limbs An array of 64-bit bigints in little-endian limb order. + * @returns A Uint8Array in big-endian order with no leading zeros. + */ +export function limbsToBytes(limbs: bigint[]): Uint8Array { + const limbCount = limbs.length + const result = new Uint8Array(limbCount * 8) + + for (let i = 0; i < limbCount; i++) { + const limb = limbs[limbCount - 1 - i] + // Extract 8 bytes in big-endian order + const offset = i * 8 + result[offset] = Number((limb >> 56n) & 0xffn) + result[offset + 1] = Number((limb >> 48n) & 0xffn) + result[offset + 2] = Number((limb >> 40n) & 0xffn) + result[offset + 3] = Number((limb >> 32n) & 0xffn) + result[offset + 4] = Number((limb >> 24n) & 0xffn) + result[offset + 5] = Number((limb >> 16n) & 0xffn) + result[offset + 6] = Number((limb >> 8n) & 0xffn) + result[offset + 7] = Number(limb & 0xffn) + } + + // Remove leading zeros + let firstNonZero = 0 + while (firstNonZero < result.length && result[firstNonZero] === 0) { + firstNonZero++ + } + + return firstNonZero === result.length ? new Uint8Array([0]) : result.slice(firstNonZero) +} + +/** + * Helper function to convert a Uint8Array (big-endian) to bigint + * + * @param arr big endian byte array + * @returns bigint representation of input bytes + */ +export function uint8ArrayToBigint(arr: Uint8Array): bigint { + if (arr.length === 0) return 0n + const hex = '0x' + Array.from(arr, (byte) => byte.toString(16).padStart(2, '0')).join('') + return BigInt(hex) +} + +export function limbsToInt(limbs: bigint[]): bigint { + const numBytes = limbsToBytes(limbs) + return uint8ArrayToBigint(numBytes) +} + +/** + * Compares two limb arrays and returns true iff x < y + * + * @param x first limb array being compared + * @param y second limb array being compared + * @returns x < y + */ +export function lt(x: bigint[], y: bigint[]): boolean { + for (let i = x.length; i > 0; i--) { + if (x[i - 1] < y[i - 1]) { + return true + } + } + return false +} + +/** + * Interprets the provided big-endian byte array b as a 64-bit limb sequence + * and places it into the given out array of bigints, each representing 64 bits + * + * @param out An array of bigints, each 64 bits in size, to hold the result + * @param b A big-endian byte array to be interpreted and placed into out + */ +export function placeBEBytesInOutput(out: bigint[], b: Uint8Array): void { + const padded = new Uint8Array(out.length * 8) + padded.set(b, padded.length - b.length) + + const resultLimbs = out.length + for (let i = 0; i < resultLimbs; i++) { + const offset = i * 8 + let limb = 0n + limb |= BigInt(padded[offset]) << 56n + limb |= BigInt(padded[offset + 1]) << 48n + limb |= BigInt(padded[offset + 2]) << 40n + limb |= BigInt(padded[offset + 3]) << 32n + limb |= BigInt(padded[offset + 4]) << 24n + limb |= BigInt(padded[offset + 5]) << 16n + limb |= BigInt(padded[offset + 6]) << 8n + limb |= BigInt(padded[offset + 7]) + + out[resultLimbs - 1 - i] = limb + } +} + +/** + * Converts a non-negative bigint value into a big-endian byte array, + * removing any leading zeros + * + * @param value A non-negative bigint to convert to a big-endian byte array. + * @returns A big-endian Uint8Array representation of value with no leading zeros + */ +export function bigIntToBEBytes(value: bigint): Uint8Array { + if (value === 0n) return new Uint8Array([0]) + let hex = value.toString(16) + + // prepend '0' if needed to make the hex length even + if (hex.length % 2 !== 0) { + hex = '0' + hex + } + + // parse pairs of hex chars into a byte array in big-endian order + const arr = new Uint8Array(hex.length / 2) + for (let i = 0; i < arr.length; i++) { + arr[i] = parseInt(hex.slice(i * 2, i * 2 + 2), 16) + } + return arr +} + +// +// odd modulus arithmetic helpers, below +// + +/** + * Performs a 64-bit multiplication of a * b and returns [hi, lo], + * where lo is the low 64 bits, and hi is the high 64 bits. + * + * @param a A 64-bit bigint + * @param b A 64-bit bigint + * @returns [hi, lo] => two 64-bit bigints: high part and low part of the product + */ +export function mul64(a: bigint, b: bigint): [bigint, bigint] { + const product = a * b + const lo = product & MASK_64 + const hi = product >> 64n + return [hi, lo] +} + +/** + * Adds three 64-bit bigints (x, y, carryIn) and returns [low, carryOut], + * where low is the sum masked to 64 bits, and carryOut is the overflow. + * + * @param x A 64-bit bigint + * @param y A 64-bit bigint + * @param carryIn A 64-bit bigint carry-in + * @returns [low, carryOut] => sum's low 64 bits and carry-out + */ +export function add64(x: bigint, y: bigint, carryIn: bigint): [bigint, bigint] { + const sum = x + y + carryIn + const low = sum & MASK_64 + const carryOut = sum >> 64n + return [low, carryOut] +} + +/** + * Subtracts y + borrowIn from x in 64-bit space, returning [diff, borrowOut]. + * If the difference is negative, diff is masked to 64 bits and borrowOut = 1. + * + * @param x A 64-bit bigint operand + * @param y A 64-bit bigint operand + * @param borrowIn A 64-bit bigint borrow going into the difference + * @returns [diff, borrowOut] => 64-bit difference and borrow-out + */ +export function sub64(x: bigint, y: bigint, borrowIn: bigint): [bigint, bigint] { + let diff = x - y - borrowIn + let borrowOut = 0n + if (diff < 0n) { + diff &= MASK_64 + borrowOut = 1n + } + return [diff & MASK_64, borrowOut] +} + +/** + * Multiplies a and b in 64-bit space (hi, lo) then adds c to lo, + * returning the high part of the result, masked to 64 bits. + * + * @param a A 64-bit bigint operand + * @param b A 64-bit bigint operand + * @param c A 64-bit bigint addend to lo + * @returns A 64-bit bigint representing (hi + carry) + */ +export function madd0(a: bigint, b: bigint, c: bigint): bigint { + const [hi, lo] = mul64(a, b) + const sum = lo + c + const carry = sum >> 64n + const newHi = (hi + carry) & MASK_64 + return newHi +} + +/** + * Multiplies a and b (64 bits), then adds c to the low part. + * Returns [updatedHi, updatedLo], both masked to 64 bits. + * + * @param a A 64-bit bigint operand + * @param b A 64-bit bigint operand + * @param c A 64-bit bigint carry added to the product's low part + * @returns [hi2, lo2] => two 64-bit bigints: new high and new low + */ +export function madd1(a: bigint, b: bigint, c: bigint): [bigint, bigint] { + const [hi, lo] = mul64(a, b) + const [lo2, carry] = add64(lo, c, 0n) + const [hi2, _] = add64(hi, 0n, carry) + return [hi2 & MASK_64, lo2 & MASK_64] +} + +/** + * Multiplies a and b (64 bits), adds c and d to the partial sums, + * carrying through intermediate steps. Returns [updatedHi, updatedLo]. + * + * @param a A 64-bit bigint operand + * @param b A 64-bit bigint operand + * @param c A 64-bit bigint carry to add + * @param d A 64-bit bigint carry to add + * @returns [hi, lo] => final high and low 64-bit bigints after all adds + */ +export function madd2(a: bigint, b: bigint, c: bigint, d: bigint): [bigint, bigint] { + const [hi, lo] = mul64(a, b) + const [c2, carry] = add64(c, d, 0n) + const [hi2, _] = add64(hi, 0n, carry) + const [lo2, carry2] = add64(lo, c2, 0n) + const [hi3, __] = add64(hi2, 0n, carry2) + return [hi3 & MASK_64, lo2 & MASK_64] +} + +export function extractEVMMAXImmediateInputs(pc: number, code: Uint8Array) { + const out = code[pc + 1] + const outStride = code[pc + 2] + const x = code[pc + 3] + const xStride = code[pc + 4] + const y = code[pc + 5] + const yStride = code[pc + 6] + const count = code[pc + 7] + + console.log('dbg600') + console.log(pc) + console.log(out) + console.log(outStride) + console.log(x) + console.log(xStride) + console.log(y) + console.log(yStride) + console.log(count) + + return [out, outStride, x, xStride, y, yStride, count] +} diff --git a/packages/evm/src/index.ts b/packages/evm/src/index.ts index df97e14cdc1..9bddf3b3845 100644 --- a/packages/evm/src/index.ts +++ b/packages/evm/src/index.ts @@ -58,6 +58,7 @@ export { validateEOF, } +export * from './evmmax/fieldContext.ts' export * from './binaryTreeAccessWitness.ts' export * from './constructors.ts' export * from './params.ts' diff --git a/packages/evm/src/interpreter.ts b/packages/evm/src/interpreter.ts index a56ad9d09d8..714a2bc7ea7 100644 --- a/packages/evm/src/interpreter.ts +++ b/packages/evm/src/interpreter.ts @@ -19,6 +19,7 @@ import { EOFContainerMode, validateEOF } from './eof/container.ts' import { setupEOF } from './eof/setup.ts' import { ContainerSectionType } from './eof/verify.ts' import { EVMError, EVMErrorTypeString } from './errors.ts' +import { FieldAllocs } from './evmmax/index.ts' import { type EVMPerformanceLogger, type Timer } from './logger.ts' import { Memory } from './memory.ts' import { Message } from './message.ts' @@ -109,6 +110,7 @@ export interface RunState { gasRefund: bigint // Tracks the current refund gasLeft: bigint // Current gas left returnBytes: Uint8Array /* Current bytes in the return Uint8Array. Cleared each time a CALL/CREATE is made in the current frame. */ + evmmaxState: FieldAllocs } export interface InterpreterResult { @@ -200,6 +202,7 @@ export class Interpreter { stateManager: this._stateManager, blockchain, env, + evmmaxState: new FieldAllocs(), shouldDoJumpAnalysis: true, interpreter: this, gasRefund: env.gasRefund, diff --git a/packages/evm/src/opcodes/codes.ts b/packages/evm/src/opcodes/codes.ts index 0059b7b0310..8576f0b4e77 100644 --- a/packages/evm/src/opcodes/codes.ts +++ b/packages/evm/src/opcodes/codes.ts @@ -344,6 +344,19 @@ const eipOpcodes: { eip: number; opcodes: OpcodeEntry }[] = [ 0xe5: defaultOp('JUMPF'), }, }, + { + eip: 6690, + opcodes: { + // control & i/o + 0xc0: dynamicGasOp('SETMODX'), + 0xc1: dynamicGasOp('LOADX'), + 0xc2: dynamicGasOp('STOREX'), + // arithmetic + 0xc3: dynamicGasOp('ADDMODX'), + 0xc4: dynamicGasOp('SUBMODX'), + 0xc5: dynamicGasOp('MULMODX'), + }, + }, { eip: 7069, opcodes: { diff --git a/packages/evm/src/opcodes/functions.ts b/packages/evm/src/opcodes/functions.ts index e686fbd2faa..7c7ee15596b 100644 --- a/packages/evm/src/opcodes/functions.ts +++ b/packages/evm/src/opcodes/functions.ts @@ -47,6 +47,7 @@ import { } from './util.ts' import type { Common } from '@ethereumjs/common' +import { extractEVMMAXImmediateInputs } from '../evmmax/index.ts' import type { RunState } from '../interpreter.ts' export interface SyncOpHandler { @@ -989,6 +990,82 @@ export const handlers: Map = new Map([ runState.interpreter.log(mem, topicsCount, topicsBuf) }, ], + // 0xc0: SETMODX + [ + 0xc0, + function (runState, _common) { + const [id, modOffset, modSize, allocCount] = runState.stack.popN(4) + const modulus = runState.memory.read(Number(modOffset), Number(modSize)) + // console.log('dbg600') + // console.log(modulus) + runState.evmmaxState.allocAndSetActive(Number(id), modulus, allocCount) + }, + ], + // 0xc1: LOADX + [ + 0xc1, + function (runState, _common) { + console.log('dbg900') + const [dest, source, count] = runState.stack.popN(3) + const copySize = Number(count) * runState.evmmaxState.getActive()?.getElemSize() + const destBuf = new Uint8Array(copySize) + runState.evmmaxState.getActive()?.load(destBuf, Number(source), Number(count)) + console.log(copySize) + console.log(count) + console.log(runState.evmmaxState.getActive()?.getElemSize()) + console.log(destBuf) + console.log(runState.memory._store) + runState.memory.write(Number(dest), copySize, destBuf) + console.log(runState.memory._store) + }, + ], + // 0xc2: STOREX + [ + 0xc2, + function (runState, _common) { + // TODO figure out if we need to use extend(), _store(), or or just write() + const [dest, source, count] = runState.stack.popN(3) + const copySize = Number(count) * runState.evmmaxState.getActive()?.getElemSize() + const srcBuf = runState.memory.read(Number(source), Number(count) * copySize) + runState.evmmaxState.getActive()?.store(Number(dest), Number(count), srcBuf) + }, + ], + // 0xc3: ADDMODX + [ + 0xc3, + function (runState, _common) { + const [out, outStride, x, xStride, y, yStride, count] = extractEVMMAXImmediateInputs( + runState.programCounter - 1, + runState.code, + ) + runState.programCounter += 7 + runState.evmmaxState.getActive().addM(out, outStride, x, xStride, y, yStride, count) + }, + ], + // 0xc4: SUBMODX + [ + 0xc4, + function (runState, _common) { + const [out, outStride, x, xStride, y, yStride, count] = extractEVMMAXImmediateInputs( + runState.programCounter - 1, + runState.code, + ) + runState.programCounter += 7 + runState.evmmaxState.getActive().subM(out, outStride, x, xStride, y, yStride, count) + }, + ], + // 0xc5: MULMODX + [ + 0xc5, + function (runState, _common) { + const [out, outStride, x, xStride, y, yStride, count] = extractEVMMAXImmediateInputs( + runState.programCounter - 1, + runState.code, + ) + runState.programCounter += 7 + runState.evmmaxState.getActive().mulM(out, outStride, x, xStride, y, yStride, count) + }, + ], // 0xd0: DATALOAD [ 0xd0, diff --git a/packages/evm/src/opcodes/gas.ts b/packages/evm/src/opcodes/gas.ts index 7d3660b0374..350d0de3ecf 100644 --- a/packages/evm/src/opcodes/gas.ts +++ b/packages/evm/src/opcodes/gas.ts @@ -7,6 +7,7 @@ import { BIGINT_32, BIGINT_64, bigIntToBytes, + bytesToBigInt, equalsBytes, setLengthLeft, } from '@ethereumjs/util' @@ -21,6 +22,9 @@ import { accessAddressEIP2929, accessStorageEIP2929 } from './EIP2929.ts' import { createAddressFromStackBigInt, divCeil, + evmmaxMemoryGasCost, + isPowerOfTwo, + makeEVMMAXArithGasFunc, maxCallGas, setLengthLeftStorage, subMemUsage, @@ -30,6 +34,13 @@ import { import type { Common } from '@ethereumjs/common' import type { Address } from '@ethereumjs/util' +import { + ADD_OR_SUB_COST, + MAX_ALLOC_SIZE, + MULMODX_COST, + SETMODX_ODD_MODULUS_COST, +} from '../evmmax/index.ts' +import { add64, mul64 } from '../evmmax/util.ts' import type { RunState } from '../interpreter.ts' const EXTCALL_TARGET_MAX = BigInt(2) ** BigInt(8 * 20) - BigInt(1) @@ -47,6 +58,11 @@ async function eip7702GasCost( return BIGINT_0 } +const MAX_UINT64 = 2n ** 64n - 1n +function isUint64(value: bigint): boolean { + return value >= 0n && value <= MAX_UINT64 +} + /** * This file returns the dynamic parts of opcodes which have dynamic gas * These are not pure functions: some edit the size of the memory @@ -768,6 +784,142 @@ export const dynamicGasHandlers: Map { + const [modId, modOffset, modSize, allocCount] = runState.stack.peek(4) + + if (!isUint64(modId) || !isUint64(modSize) || !isUint64(allocCount)) { + trap('one or more parameters overflows 64 bits') + } + if (runState.evmmaxState.getAlloced().get(Number(modId)) !== undefined) { + return gas + } + if (modSize > 96n) { + trap('modulus cannot exceed 768 bits in width') + } + if (!isUint64(modOffset + modSize)) { + trap('modulus offset + size overflows uint64') + } + if (allocCount > 256) { + trap('cannot allocate more than 256 field elements per modulus id') + } + const paddedModSize = (modSize + 7n) / 8n + const precompCost = SETMODX_ODD_MODULUS_COST[Number(paddedModSize)] + + const allocSize = paddedModSize * allocCount + if (runState.evmmaxState.allocSize() + allocSize > MAX_ALLOC_SIZE) { + trap('call context evmmax allocation threshold exceeded') + } + + const memCost = evmmaxMemoryGasCost(runState, common, allocSize, 0n, 0n) // TODO should I be setting length and offset to 0? + const modBytes = runState.memory.read(Number(modOffset), Number(modSize)) + if (!isPowerOfTwo(bytesToBigInt(modBytes))) { + return (gas += BigInt(precompCost) + memCost) + } + return gas + memCost + }, + ], + [ + /* LOADX */ + 0xc1, + async function (runState, gas, common): Promise { + const [dst, src, count] = runState.stack.peek(3) + + if (!isUint64(src) || src >= runState.evmmaxState.getActive().getNumElems()) { + trap('src index out of bounds') + } + if (!isUint64(count) || count >= runState.evmmaxState.getActive().getNumElems()) { + trap('count must be less than number of field elements in the active space') + } + const [last1, overflow1] = add64(src, count, 0n) + if (overflow1 !== 0n || last1 > runState.evmmaxState.getActive().getNumElems()) { + trap('out of bounds copy source') + } + if (!isUint64(dst)) { + trap('destination of copy out of bounds') + } + + const [loadSize, overflow2] = mul64( + count, + BigInt(runState.evmmaxState.getActive().getElemSize()), + ) + if (overflow2 !== 0n) { + trap('overflow') + } + const [last2, overflow3] = add64(dst, loadSize, 0n) + if (overflow3 !== 0n || last2 > runState.memoryWordCount) { + trap('out of bounds destination') + } + + if (runState.evmmaxState.getActive().isModulusBinary) { + return gas + loadSize * common.param('copyGas') // TODO check if this translates from go: toWordSize(storeSize) * params.copyGas + } else { + return ( + gas + + count * + BigInt(MULMODX_COST[Number(runState.evmmaxState.getActive().getElemSize() / 8) - 1]) + ) + } + }, + ], + [ + /* STOREX */ + 0xc2, + async function (runState, gas, common): Promise { + const [dst, src, count] = runState.stack.peek(3) + + if (!isUint64(src) || src >= runState.memory._store.length) { + trap('src index out of bounds') + } + if (!isUint64(dst) || dst >= runState.evmmaxState.getActive().getNumElems()) { + trap('destination of copy out of bounds') + } + if (!isUint64(count) || count >= runState.evmmaxState.getActive().getNumElems()) { + trap('count must be less than number of field elements in the active space') + } + const storeSize = count * runState.evmmaxState.getActive().getNumElems() + if (src + storeSize > runState.memory._store.length) { + trap('source of copy out of bounds of EVM memory') + } + + if (runState.evmmaxState.getActive().isModulusBinary) { + return gas + storeSize * common.param('copyGas') // TODO check if this translates from go: toWordSize(storeSize) * params.copyGas + } else { + return ( + gas + + count * + BigInt( + MULMODX_COST[ + Number(Math.ceil(runState.evmmaxState.getActive().getElemSize() / 8)) - 1 + ], + ) + ) + } + }, + ], + [ + /* ADDMODX */ + 0xc3, + async function (runState, gas, common): Promise { + return makeEVMMAXArithGasFunc(ADD_OR_SUB_COST)(runState, gas, common) + }, + ], + [ + /* SUBMODX */ + 0xc4, + async function (runState, gas, common): Promise { + return makeEVMMAXArithGasFunc(ADD_OR_SUB_COST)(runState, gas, common) + }, + ], + [ + /* MULMODX */ + 0xc5, + async function (runState, gas, common): Promise { + return makeEVMMAXArithGasFunc(MULMODX_COST)(runState, gas, common) + }, + ], /* EXTCALL */ [ 0xf8, diff --git a/packages/evm/src/opcodes/util.ts b/packages/evm/src/opcodes/util.ts index 5b4af7775dd..6395d2d518d 100644 --- a/packages/evm/src/opcodes/util.ts +++ b/packages/evm/src/opcodes/util.ts @@ -10,6 +10,7 @@ import { bytesToHex, createAddressFromBigInt, equalsBytes, + hexToBigInt, setLengthLeft, setLengthRight, } from '@ethereumjs/util' @@ -20,6 +21,7 @@ import { EVMError } from '../errors.ts' import type { Common } from '@ethereumjs/common' import type { Address } from '@ethereumjs/util' import type { EVMErrorType } from '../errors.ts' +import { extractEVMMAXImmediateInputs } from '../evmmax/util.ts' import type { RunState } from '../interpreter.ts' const MASK_160 = (BIGINT_1 << BIGINT_160) - BIGINT_1 @@ -197,31 +199,100 @@ export function maxCallGas( } } -/** - * Subtracts the amount needed for memory usage from `runState.gasLeft` - */ -export function subMemUsage(runState: RunState, offset: bigint, length: bigint, common: Common) { +export function isPowerOfTwo(val: bigint): boolean { + if (val <= 0n) return false + + const bin = val.toString(2) + const topBitIndex = bin.length - 1 + const cleared = val - (1n << BigInt(topBitIndex)) + + return cleared === 0n +} + +export function makeEVMMAXArithGasFunc(opCosts: number[]): Function { + return function (runState: RunState, gas: bigint, common: Common) { + const [out, outStride, x, xStride, y, yStride, count] = extractEVMMAXImmediateInputs( + runState.programCounter, + runState.code, + ) + const values = [x + xStride * count, y + yStride * count, out + outStride * count] + const maxOffset = values.reduce((max, current) => (current > max ? current : max), 0) + if ( + count === 0 || + outStride === 0 || + maxOffset > runState.evmmaxState.getActive().getNumElems() + ) { + trap('bad parameters') + } + + // console.log('dbg200') + // console.log(gas) + // console.log(count) + // console.log(opCosts[runState.evmmaxState.getActive().modulus.length - 1]) + return ( + gas + BigInt(count) * BigInt(opCosts[runState.evmmaxState.getActive().modulus.length - 1]) + ) + } +} + +export function evmmaxMemoryGasCost( + runState: RunState, + common: Common, + newEVMMAXMemSize: bigint, + offset: bigint, + length: bigint, +) { + if (runState.memoryWordCount === 0n && newEVMMAXMemSize === 0n) { + return 0n + } + // YP (225): access with zero length will not extend the memory if (length === BIGINT_0) return BIGINT_0 const newMemoryWordCount = divCeil(offset + length, BIGINT_32) if (newMemoryWordCount <= runState.memoryWordCount) return BIGINT_0 - const words = newMemoryWordCount - const fee = common.param('memoryGas') - const quadCoefficient = common.param('quadCoefficientDivGas') - // words * 3 + words ^2 / 512 - let cost = words * fee + (words * words) / quadCoefficient + let newMemSize = newMemoryWordCount - if (cost > runState.highestMemCost) { - const currentHighestMemCost = runState.highestMemCost - runState.highestMemCost = cost - cost -= currentHighestMemCost + if (newMemSize > hexToBigInt('0x1FFFFFFFE0')) { + trap('gas uint64 overflow') // TODO is there an error code for gas overflow? } + const newMemSizePadded = newMemSize * 32n - runState.memoryWordCount = newMemoryWordCount + const curEVMMAXMemSizePadded = runState.evmmaxState.getActive().getAllocatedSize() * 32 + const newEVMMAXMemSizePadded = Number(newEVMMAXMemSize) * 32 + + if ( + newMemSizePadded > BigInt(runState.memory._store.length) || + newEVMMAXMemSizePadded > curEVMMAXMemSizePadded + ) { + if (newMemSize <= BigInt(runState.memory._store.length)) { + newMemSize = BigInt(runState.memory._store.length) + } + const newEffectiveMemSizeWords = newEVMMAXMemSize + newMemSize // toWordSize? + const words = newEffectiveMemSizeWords + const fee = common.param('memoryGas') + const quadCoefficient = common.param('quadCoefficientDivGas') + // words * 3 + words ^2 / 512 + let cost = words * fee + (words * words) / quadCoefficient + if (cost > runState.highestMemCost) { + const currentHighestMemCost = runState.highestMemCost + runState.highestMemCost = cost + cost -= currentHighestMemCost + } + runState.memoryWordCount = newMemoryWordCount - return cost + return cost + } + + return 0n +} + +/** + * Subtracts the amount needed for memory usage from `runState.gasLeft` + */ +export function subMemUsage(runState: RunState, offset: bigint, length: bigint, common: Common) { + return evmmaxMemoryGasCost(runState, common, runState.evmmaxState.allocSize(), offset, length) } /** diff --git a/packages/evm/test/eips/eip-6990.spec.ts b/packages/evm/test/eips/eip-6990.spec.ts new file mode 100644 index 00000000000..e05c2cf2fe2 --- /dev/null +++ b/packages/evm/test/eips/eip-6990.spec.ts @@ -0,0 +1,16 @@ +import { Common, Hardfork, Mainnet } from '@ethereumjs/common' +import { assert, describe, it } from 'vitest' +import { createEVM } from '../../src/index.ts' + +describe('EIP 6690 tests', async () => { + it(`evmmax instantiation`, async () => { + const evm = await createEVM({ + common: new Common({ + hardfork: Hardfork.Prague, + eips: [6690], + chain: Mainnet, + }), + }) + console.log(evm) + }) +}) diff --git a/packages/evm/test/evmmax/fieldContext.spec.ts b/packages/evm/test/evmmax/fieldContext.spec.ts new file mode 100644 index 00000000000..59d1fe8bc06 --- /dev/null +++ b/packages/evm/test/evmmax/fieldContext.spec.ts @@ -0,0 +1,79 @@ +import { randomBytes } from 'crypto' +import { bigIntToBytes, bytesToBigInt } from '@ethereumjs/util' +import { assert, describe, it } from 'vitest' + +import { FieldContext } from '../../src/index.js' + +function padBigIntBytes(val: bigint, byteLen: number): Uint8Array { + const raw = bigIntToBytes(val) + if (raw.length === byteLen) return raw + const out = new Uint8Array(byteLen) + out.set(raw, byteLen - raw.length) + return out +} + +function randomBigInt(size: number, limit: bigint): bigint { + return bytesToBigInt(randomBytes(size)) % limit +} + +function randomBinaryModulus(size: number): bigint { + return 1n << BigInt(size * 8) +} + +export function randomOddModulus(size: number): bigint { + let num + let bytes + while (true) { + bytes = randomBytes(size) + num = bytesToBigInt(bytes) + if (bytes[bytes.length - 1] % 2 !== 0) return num + } +} + +function testModulus(mod: bigint) { + const modBytes = bigIntToBytes(mod) + const fieldCtx = new FieldContext(modBytes, 256n) + + const xInt = randomBigInt(modBytes.length, mod) + const yInt = randomBigInt(modBytes.length, mod) + + // convert operands to padded bytes for storing + const elemByteLen = Number(fieldCtx.elemSize) + const xBytes = padBigIntBytes(xInt, elemByteLen * 8) + const yBytes = padBigIntBytes(yInt, elemByteLen * 8) + const outBytes = new Uint8Array(elemByteLen * 8) + + fieldCtx.store(1, 1, xBytes) + fieldCtx.store(2, 1, yBytes) + + fieldCtx.addM(0, 1, 1, 1, 2, 1, 1) + fieldCtx.load(outBytes, 0, 1) + const expectedAdd = (xInt + yInt) % mod + const actualAdd = bytesToBigInt(outBytes) + assert.deepEqual(actualAdd, expectedAdd) + + fieldCtx.subM(0, 1, 1, 1, 2, 1, 1) + fieldCtx.load(outBytes, 0, 1) + let expectedSub = (xInt - yInt) % mod + if (expectedSub < 0n) expectedSub += mod + const actualSub = bytesToBigInt(outBytes) + assert.deepEqual(actualSub, expectedSub) + + fieldCtx.mulM(0, 1, 1, 1, 2, 1, 1) + fieldCtx.load(outBytes, 0, 1) + const expectedMul = (xInt * yInt) % mod + const actualMul = bytesToBigInt(outBytes) + assert.deepEqual(actualMul, expectedMul) +} + +describe('FieldContext modular arithmetic', () => { + for (let i = 1; i < 96; i++) { + it(`should do add, sub, mul under a random modulus of size ${i} bytes`, () => { + const binaryMod = randomBinaryModulus(i) + testModulus(binaryMod) + + const oddMod = randomOddModulus(i) + testModulus(oddMod) + }) + } +}) diff --git a/packages/evm/test/evmmax/opcodes.spec.ts b/packages/evm/test/evmmax/opcodes.spec.ts new file mode 100644 index 00000000000..35f440dd417 --- /dev/null +++ b/packages/evm/test/evmmax/opcodes.spec.ts @@ -0,0 +1,241 @@ +import { Common, Mainnet } from '@ethereumjs/common' +import { assert, describe, it } from 'vitest' + +import { type PrefixedHexString, createAddressFromString, hexToBytes } from '@ethereumjs/util' +import { bigIntToBytes } from '@ethereumjs/util' +import { LOADX_BASE_COST, SETMODX_BASE_COST, STOREX_BASE_COST } from '../../src/evmmax/index.ts' +import { createEVM } from '../../src/index.ts' + +const MSTORE8 = '53' +const RETURN = 'f3' +const PUSH1 = '60' + +const SETMODX = 'c0' +const LOADX = 'c1' +const STOREX = 'c2' + +const ADDMODX = 'c3' +const SUBMODX = 'c4' +const MULMODX = 'c5' + +function numToOpcode(num: number) { + if (num > 255) throw 'unsupported: > 255 cannot fit in one byte' + return num.toString(16).padStart(2, '0') +} + +function mstore8(index: number, value: number) { + // return numToOpcode(value) + numToOpcode(index) + MSTORE8 + return PUSH1 + numToOpcode(value) + PUSH1 + numToOpcode(index) + MSTORE8 +} + +function ret(index: number, size: number) { + return size + index + RETURN +} + +function setupx(id: number, mod_offset: number, mod_size: number, alloc_count: number) { + return ( + PUSH1 + + numToOpcode(alloc_count) + + PUSH1 + + numToOpcode(mod_size) + + PUSH1 + + numToOpcode(mod_offset) + + PUSH1 + + numToOpcode(id) + + SETMODX + ) +} + +function storex(dst: number, source: number, count: number) { + return ( + PUSH1 + numToOpcode(count) + PUSH1 + numToOpcode(source) + PUSH1 + numToOpcode(dst) + STOREX + ) +} + +function loadx(dest: number, source: number, count: number) { + return ( + PUSH1 + numToOpcode(count) + PUSH1 + numToOpcode(source) + PUSH1 + numToOpcode(dest) + LOADX + ) +} + +function addmodx( + result_slot_idx: number, + result_stride: number, + x_slot_idx: number, + x_stride: number, + y_slot_idx: number, + y_stride: number, + count: number, +) { + return ( + ADDMODX + + numToOpcode(result_slot_idx) + + numToOpcode(result_stride) + + numToOpcode(x_slot_idx) + + numToOpcode(x_stride) + + numToOpcode(y_slot_idx) + + numToOpcode(y_stride) + + numToOpcode(count) + ) +} + +function submodx( + result_slot_idx: number, + result_stride: number, + x_slot_idx: number, + x_stride: number, + y_slot_idx: number, + y_stride: number, + count: number, +) { + return ( + SUBMODX + + numToOpcode(result_slot_idx) + + numToOpcode(result_stride) + + numToOpcode(x_slot_idx) + + numToOpcode(x_stride) + + numToOpcode(y_slot_idx) + + numToOpcode(y_stride) + + numToOpcode(count) + ) +} + +function mulmodx( + result_slot_idx: number, + result_stride: number, + x_slot_idx: number, + x_stride: number, + y_slot_idx: number, + y_stride: number, + count: number, +) { + return ( + MULMODX + + numToOpcode(result_slot_idx) + + numToOpcode(result_stride) + + numToOpcode(x_slot_idx) + + numToOpcode(x_stride) + + numToOpcode(y_slot_idx) + + numToOpcode(y_stride) + + numToOpcode(count) + ) +} + +describe('should be able to perform modular arithmetic with evmmax opcodes', () => { + // it('1-byte modulus test', async () => { + // const common = new Common({ + // chain: Mainnet, + // eips: [6690], + // params: { + // 6690: { + // setmodxGas: SETMODX_BASE_COST, + // loadxGas: LOADX_BASE_COST, + // storexGas: STOREX_BASE_COST, + // addmodxGas: 0, + // submodxGas: 0, + // mulmodxGas: 0, + // }, + // }, + // }) + // const evm = await createEVM({ common }) + // evm.events.on('step', (e) => { + // console.log(e.opcode.name) + // }) + + // const modulus = 8n + // const modBytes = bigIntToBytes(modulus) + + // console.log('dbg100') + // console.log(addmodx(0, 1, 1, 1, 2, 1, 1)) + // console.log(modBytes) + // // create bytecode + // const bytecode = + // '0x' + + // mstore8(0, 8) + // store value 0x08 at index 0 in memory + // setupx(0, 1, 0, 3) + + // mstore8(1, 3) + + // mstore8(2, 6) + + // storex(1, 1, 1) + + // storex(2, 2, 1) + + // addmodx(0, 1, 1, 1, 2, 1, 1) + + // mulmodx(0, 1, 1, 1, 2, 1, 1) + + // submodx(0, 1, 1, 1, 2, 1, 1) + + // loadx(0, 0, 1) + // // + ret(96, 32) + + // const ADDR_TO_CALL = createAddressFromString('0x' + '20'.repeat(20)) + + // await evm.stateManager.putCode(ADDR_TO_CALL, hexToBytes(bytecode as PrefixedHexString)) + + // const result = await evm.runCall({ + // to: ADDR_TO_CALL, + // gasLimit: BigInt(0xffffff), + // }) + + // // const result1 = await evm.runCall({ + // // data: hexToBytes(bytecode as PrefixedHexString), + // // gasLimit: BigInt(0xffffff), + // // }) + + // console.log(result) + // }) + + it('2-byte modulus test', async () => { + const common = new Common({ + chain: Mainnet, + eips: [6690], + params: { + 6690: { + setmodxGas: SETMODX_BASE_COST, + loadxGas: LOADX_BASE_COST, + storexGas: STOREX_BASE_COST, + addmodxGas: 0, + submodxGas: 0, + mulmodxGas: 0, + }, + }, + }) + const evm = await createEVM({ common }) + const modulus = 500n + const modBytes = bigIntToBytes(modulus) + + // const elemByteLen = Number(fieldCtx.elemSize) + // const xBytes = padBigIntBytes(xInt, elemByteLen * 8) + // const yBytes = padBigIntBytes(yInt, elemByteLen * 8) + // const outBytes = new Uint8Array(elemByteLen * 8) + + console.log('dbg100') + console.log(modBytes) + // create bytecode + const bytecode = + '0x' + + mstore8(0, modBytes[1]) + + mstore8(1, modBytes[0]) + + setupx(0, 0, 2, 3) + + mstore8(2, 220) + + mstore8(4, 230) + + storex(1, 1, 1) + + storex(2, 2, 1) + + addmodx(0, 1, 1, 1, 2, 1, 1) + + mulmodx(0, 1, 1, 1, 2, 1, 1) + + submodx(0, 1, 1, 1, 2, 1, 1) + + loadx(0, 0, 1) + // + ret(96, 32) + + const ADDR_TO_CALL = createAddressFromString('0x' + '20'.repeat(20)) + + await evm.stateManager.putCode(ADDR_TO_CALL, hexToBytes(bytecode as PrefixedHexString)) + + const result = await evm.runCall({ + to: ADDR_TO_CALL, + gasLimit: BigInt(0xffffff), + }) + + // const result1 = await evm.runCall({ + // data: hexToBytes(bytecode as PrefixedHexString), + // gasLimit: BigInt(0xffffff), + // }) + + console.log(result) + }) +})