From 811f8d8d5e6944ec708763a0450ffe94aebdd55d Mon Sep 17 00:00:00 2001 From: Robin Salen Date: Wed, 18 Mar 2026 18:03:47 +0900 Subject: [PATCH 1/5] perf: fold on the fly --- .../src/prover/constraints/folder.rs | 135 +++++------------- .../src/prover/constraints/mod.rs | 11 +- 2 files changed, 43 insertions(+), 103 deletions(-) diff --git a/p3-miden-lifted-stark/src/prover/constraints/folder.rs b/p3-miden-lifted-stark/src/prover/constraints/folder.rs index e3e44cf2..8c281caf 100644 --- a/p3-miden-lifted-stark/src/prover/constraints/folder.rs +++ b/p3-miden-lifted-stark/src/prover/constraints/folder.rs @@ -1,82 +1,27 @@ //! SIMD-optimized constraint folder for prover evaluation. //! -//! [`ProverConstraintFolder`] collects base and extension constraints during `air.eval()`, -//! then combines them via [`Self::finalize_constraints`] using decomposed alpha powers -//! and batched linear combinations. +//! [`ProverConstraintFolder`] accumulates base and extension constraints on-the-fly during +//! `air.eval()`, folding each constraint with its alpha power directly into a running sum. use alloc::vec::Vec; use core::marker::PhantomData; -use p3_field::{ - Algebra, BasedVectorSpace, ExtensionField, Field, PackedField, PrimeCharacteristicRing, -}; +use p3_field::{Algebra, BasedVectorSpace, ExtensionField, Field, PackedField}; use p3_miden_lifted_air::{ AirBuilder, EmptyWindow, ExtensionBuilder, PeriodicAirBuilder, PermutationAirBuilder, RowWindow, }; use crate::selectors::Selectors; -/// Batch size for constraint linear-combination chunks in [`finalize_constraints`]. -const CONSTRAINT_BATCH: usize = 8; - -/// Batched linear combination of packed extension field values with EF coefficients. -/// -/// Extension-field analogue of [`PackedField::packed_linear_combination`]. Processes -/// `coeffs` and `values` in chunks of [`CONSTRAINT_BATCH`], then handles the remainder. -#[inline] -fn batched_ext_linear_combination(coeffs: &[EF], values: &[PE]) -> PE -where - EF: Field, - PE: PrimeCharacteristicRing + Algebra + Copy, -{ - debug_assert_eq!(coeffs.len(), values.len()); - let len = coeffs.len(); - let mut acc = PE::ZERO; - let mut start = 0; - while start + CONSTRAINT_BATCH <= len { - let batch: [PE; CONSTRAINT_BATCH] = - core::array::from_fn(|i| values[start + i] * coeffs[start + i]); - acc += PE::sum_array::(&batch); - start += CONSTRAINT_BATCH; - } - for (&coeff, &val) in coeffs[start..].iter().zip(&values[start..]) { - acc += val * coeff; - } - acc -} - -/// Batched linear combination of packed base field values with F coefficients. -/// -/// Wraps [`PackedField::packed_linear_combination`] with batched chunking -/// and remainder handling, mirroring [`batched_ext_linear_combination`]. -#[inline] -fn batched_base_linear_combination(coeffs: &[P::Scalar], values: &[P]) -> P { - debug_assert_eq!(coeffs.len(), values.len()); - let len = coeffs.len(); - let mut acc = P::ZERO; - let mut start = 0; - while start + CONSTRAINT_BATCH <= len { - acc += P::packed_linear_combination::( - &coeffs[start..start + CONSTRAINT_BATCH], - &values[start..start + CONSTRAINT_BATCH], - ); - start += CONSTRAINT_BATCH; - } - for (&coeff, &val) in coeffs[start..].iter().zip(&values[start..]) { - acc += val * coeff; - } - acc -} - /// Packed constraint folder for SIMD-optimized prover evaluation. /// /// Uses packed types to evaluate constraints on multiple domain points simultaneously: /// - `P`: Packed base field (e.g., `PackedBabyBear`) /// - `PE`: Packed extension field - must be `Algebra + Algebra

+ BasedVectorSpace

` /// -/// Collects constraints during `air.eval()` into separate base/ext vectors, then -/// combines them in [`Self::finalize_constraints`] using decomposed alpha powers and -/// `packed_linear_combination` for efficient SIMD accumulation. +/// Accumulates constraints on-the-fly during `air.eval()` by folding each constraint +/// with its pre-computed alpha power directly into running accumulators (`base_acc` for +/// base-field constraints, `ext_acc` for extension-field constraints). /// /// # Type Parameters /// - `F`: Base field scalar @@ -109,14 +54,16 @@ where pub base_alpha_powers: &'a [Vec], /// Extension-field alpha powers, reordered to match ext constraint emission order. pub ext_alpha_powers: &'a [EF], - /// Current constraint index (debug-only bookkeeping) - pub constraint_index: usize, - /// Total expected constraint count (debug-only bookkeeping) + /// Running accumulator for base-field constraints (folded into PE via alpha powers). + pub base_acc: PE, + /// Running accumulator for extension-field constraints (folded via alpha powers). + pub ext_acc: PE, + /// Index of the next base constraint to be emitted. + pub base_constraint_index: usize, + /// Index of the next extension constraint to be emitted. + pub ext_constraint_index: usize, + /// Total expected constraint count (debug-only bookkeeping). pub constraint_count: usize, - /// Collected base-field constraints for this row - pub base_constraints: Vec

, - /// Collected extension-field constraints for this row - pub ext_constraints: Vec, pub _phantom: PhantomData, } @@ -127,35 +74,25 @@ where P: PackedField, PE: Algebra + Algebra

+ BasedVectorSpace

+ Copy + Send + Sync, { - /// Combine all collected constraints with their pre-computed alpha powers. + /// Return the accumulated constraint folding result. /// - /// Base constraints use `batched_base_linear_combination` per basis dimension, - /// decomposing the extension-field multiply into D base-field SIMD dot products. - /// Extension constraints use `batched_ext_linear_combination` with scalar EF - /// coefficients. Both process in chunks of `CONSTRAINT_BATCH`. - /// - /// We keep base and extension constraints separate because the base constraints can - /// stay in the base field and use packed SIMD arithmetic. Decomposing EF powers of - /// `alpha` into base-field coordinates turns the base-field fold into a small number - /// of packed dot-products, avoiding repeated cross-field promotions. + /// Constraints were folded on-the-fly during `air.eval()`: each `assert_zero` / + /// `assert_zero_ext` call multiplied by the corresponding alpha power and + /// accumulated into `base_acc` (base-field constraints) or `ext_acc` (extension- + /// field constraints). #[inline] pub fn finalize_constraints(self) -> PE { - debug_assert_eq!(self.constraint_index, self.constraint_count); debug_assert_eq!( - self.base_constraints.len(), + self.base_constraint_index + self.ext_constraint_index, + self.constraint_count + ); + debug_assert_eq!( + self.base_constraint_index, self.base_alpha_powers.first().map_or(0, Vec::len) ); - debug_assert_eq!(self.ext_constraints.len(), self.ext_alpha_powers.len()); + debug_assert_eq!(self.ext_constraint_index, self.ext_alpha_powers.len()); - // Base constraints: D independent base-field dot products - let base = &self.base_constraints; - let base_powers = self.base_alpha_powers; - let acc = PE::from_basis_coefficients_fn(|d| { - batched_base_linear_combination(&base_powers[d], base) - }); - - // Extension constraints: EF-coefficient dot product - acc + batched_ext_linear_combination(self.ext_alpha_powers, &self.ext_constraints) + self.base_acc + self.ext_acc } } @@ -203,15 +140,18 @@ where #[inline] fn assert_zero>(&mut self, x: I) { - self.base_constraints.push(x.into()); - self.constraint_index += 1; + let val: P = x.into(); + let idx = self.base_constraint_index; + let delta = PE::from_basis_coefficients_fn(|d| val * self.base_alpha_powers[d][idx]); + self.base_acc += delta; + self.base_constraint_index += 1; } #[inline] fn assert_zeros>(&mut self, array: [I; N]) { - let expr_array = array.map(Into::into); - self.base_constraints.extend(expr_array); - self.constraint_index += N; + for x in array { + self.assert_zero(x); + } } #[inline] @@ -236,8 +176,9 @@ where where I: Into, { - self.ext_constraints.push(x.into()); - self.constraint_index += 1; + let val: PE = x.into(); + self.ext_acc += val * self.ext_alpha_powers[self.ext_constraint_index]; + self.ext_constraint_index += 1; } } diff --git a/p3-miden-lifted-stark/src/prover/constraints/mod.rs b/p3-miden-lifted-stark/src/prover/constraints/mod.rs index d5299357..69226fdd 100644 --- a/p3-miden-lifted-stark/src/prover/constraints/mod.rs +++ b/p3-miden-lifted-stark/src/prover/constraints/mod.rs @@ -45,7 +45,7 @@ type PackedExt = >::ExtensionPacking; /// Uses SIMD-packed parallel iteration via rayon for optimal performance: /// - Processes `WIDTH` points simultaneously using packed field types /// - Main trace stays in base field, only aux trace uses extension field -/// - Constraints are collected then finalized in batches via decomposed alpha powers +/// - Constraints are folded on-the-fly into running accumulators via decomposed alpha powers /// /// Why we fold with `alpha`: the prover does not want to carry K separate constraint /// polynomials through the rest of the protocol. A random linear combination @@ -98,8 +98,6 @@ pub(crate) fn evaluate_constraints_into( // ─── Decompose alpha powers by constraint layout ─── let aux_ef_width = air.aux_width(); let constraint_count = layout.total_constraints(); - let base_count = layout.base_indices.len(); - let ext_count = layout.ext_indices.len(); let (base_alpha_powers, ext_alpha_powers) = layout.decompose_alpha(alpha); // Main trace width @@ -158,10 +156,11 @@ pub(crate) fn evaluate_constraints_into( selectors, base_alpha_powers: &base_alpha_powers, ext_alpha_powers: &ext_alpha_powers, - constraint_index: 0, + base_acc: Default::default(), + ext_acc: Default::default(), + base_constraint_index: 0, + ext_constraint_index: 0, constraint_count, - base_constraints: Vec::with_capacity(base_count), - ext_constraints: Vec::with_capacity(ext_count), _phantom: PhantomData, }; From 4e5acfed628c91348964aec0e420ba43caf5b4d4 Mon Sep 17 00:00:00 2001 From: Robin Salen Date: Thu, 19 Mar 2026 12:45:31 +0900 Subject: [PATCH 2/5] chore: update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 21ab7c90..1775dfad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## Unreleased + +- perf: fold constraints on the fly ([#55](https://github.com/0xMiden/p3-miden/pull/55)) + ## 0.5.0 (2026-03-10) - Fixed periodic column evaluation on LDE/quotient domains. From 528ab87c77ada8966296d6249008a2d6ca808d0c Mon Sep 17 00:00:00 2001 From: Robin Salen Date: Tue, 24 Mar 2026 16:56:36 +0900 Subject: [PATCH 3/5] chore: use P::ZERO --- p3-miden-lifted-stark/src/prover/constraints/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/p3-miden-lifted-stark/src/prover/constraints/mod.rs b/p3-miden-lifted-stark/src/prover/constraints/mod.rs index 69226fdd..94d07dae 100644 --- a/p3-miden-lifted-stark/src/prover/constraints/mod.rs +++ b/p3-miden-lifted-stark/src/prover/constraints/mod.rs @@ -14,7 +14,7 @@ pub(crate) use folder::ProverConstraintFolder; pub(crate) use layout::{ConstraintLayout, get_constraint_layout}; use p3_field::{ Algebra, BasedVectorSpace, ExtensionField, Field, PackedFieldExtension, PackedValue, - TwoAdicField, + PrimeCharacteristicRing, TwoAdicField, }; use p3_matrix::{Matrix, dense::RowMajorMatrix}; use p3_maybe_rayon::prelude::*; @@ -156,7 +156,7 @@ pub(crate) fn evaluate_constraints_into( selectors, base_alpha_powers: &base_alpha_powers, ext_alpha_powers: &ext_alpha_powers, - base_acc: Default::default(), + base_acc: PE::::ZERO, ext_acc: Default::default(), base_constraint_index: 0, ext_constraint_index: 0, From f09686272a8259864ca58fa183f3a39979c6456c Mon Sep 17 00:00:00 2001 From: Robin Salen Date: Thu, 26 Mar 2026 23:27:33 +0900 Subject: [PATCH 4/5] perf: override assert_zeros --- p3-miden-lifted-stark/src/prover/constraints/folder.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/p3-miden-lifted-stark/src/prover/constraints/folder.rs b/p3-miden-lifted-stark/src/prover/constraints/folder.rs index f5f9be2d..641a09c6 100644 --- a/p3-miden-lifted-stark/src/prover/constraints/folder.rs +++ b/p3-miden-lifted-stark/src/prover/constraints/folder.rs @@ -149,9 +149,15 @@ where #[inline] fn assert_zeros>(&mut self, array: [I; N]) { - for x in array { - self.assert_zero(x); + let idx = self.base_constraint_index; + let mut delta = PE::ZERO; + for (j, x) in array.into_iter().enumerate() { + let val: P = x.into(); + let term = PE::from_basis_coefficients_fn(|d| val * self.base_alpha_powers[d][idx + j]); + delta += term; } + self.base_acc += delta; + self.base_constraint_index += N; } #[inline] From f5b22900b2febe4a2a85301b760cedfab30de38e Mon Sep 17 00:00:00 2001 From: Robin Salen Date: Fri, 27 Mar 2026 00:11:20 +0900 Subject: [PATCH 5/5] refactor: use packed_linear_combination --- .../src/prover/constraints/folder.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/p3-miden-lifted-stark/src/prover/constraints/folder.rs b/p3-miden-lifted-stark/src/prover/constraints/folder.rs index 641a09c6..fdf19388 100644 --- a/p3-miden-lifted-stark/src/prover/constraints/folder.rs +++ b/p3-miden-lifted-stark/src/prover/constraints/folder.rs @@ -150,12 +150,12 @@ where #[inline] fn assert_zeros>(&mut self, array: [I; N]) { let idx = self.base_constraint_index; - let mut delta = PE::ZERO; - for (j, x) in array.into_iter().enumerate() { - let val: P = x.into(); - let term = PE::from_basis_coefficients_fn(|d| val * self.base_alpha_powers[d][idx + j]); - delta += term; - } + let vals = array.map(Into::into); + let powers = self.base_alpha_powers; + let delta = PE::from_basis_coefficients_fn(|d| { + let coeffs: [F; N] = core::array::from_fn(|j| powers[d][idx + j]); + P::packed_linear_combination::(&coeffs, &vals) + }); self.base_acc += delta; self.base_constraint_index += N; }