Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- [BREAKING] LMCS tree now indexed by domain order; `Lmcs::build_tree`/`build_aligned_tree` require `BitReversibleMatrix` inputs and store `M::BitRev` ([#52](https://github.com/0xMiden/p3-miden/pull/52)).
- Removed `reverse_bits_len` from PCS query sampling, DEEP verifier, and FRI verifier ([#52](https://github.com/0xMiden/p3-miden/pull/52)).
- perf: faster constraint evaluation for wide matrices ([#57](https://github.com/0xMiden/p3-miden/57)).
- perf: fold constraints on the fly ([#55](https://github.com/0xMiden/p3-miden/pull/55))

## 0.5.0 (2026-03-10)

Expand Down
141 changes: 44 additions & 97 deletions p3-miden-lifted-stark/src/prover/constraints/folder.rs
Original file line number Diff line number Diff line change
@@ -1,82 +1,27 @@
//! SIMD-optimized constraint folder for prover evaluation.
//!
//! [`ProverConstraintFolder`] collects base and extension constraints during `air.eval()`,
//! then combines them via [`Self::finalize_constraints`] using decomposed alpha powers
//! and batched linear combinations.
//! [`ProverConstraintFolder`] accumulates base and extension constraints on-the-fly during
//! `air.eval()`, folding each constraint with its alpha power directly into a running sum.

use alloc::vec::Vec;
use core::marker::PhantomData;

use p3_field::{
Algebra, BasedVectorSpace, ExtensionField, Field, PackedField, PrimeCharacteristicRing,
};
use p3_field::{Algebra, BasedVectorSpace, ExtensionField, Field, PackedField};
use p3_miden_lifted_air::{
AirBuilder, EmptyWindow, ExtensionBuilder, PeriodicAirBuilder, PermutationAirBuilder, RowWindow,
};

use crate::selectors::Selectors;

/// Batch size for constraint linear-combination chunks in [`finalize_constraints`].
const CONSTRAINT_BATCH: usize = 8;

/// Batched linear combination of packed extension field values with EF coefficients.
///
/// Extension-field analogue of [`PackedField::packed_linear_combination`]. Processes
/// `coeffs` and `values` in chunks of [`CONSTRAINT_BATCH`], then handles the remainder.
#[inline]
fn batched_ext_linear_combination<PE, EF>(coeffs: &[EF], values: &[PE]) -> PE
where
EF: Field,
PE: PrimeCharacteristicRing + Algebra<EF> + Copy,
{
debug_assert_eq!(coeffs.len(), values.len());
let len = coeffs.len();
let mut acc = PE::ZERO;
let mut start = 0;
while start + CONSTRAINT_BATCH <= len {
let batch: [PE; CONSTRAINT_BATCH] =
core::array::from_fn(|i| values[start + i] * coeffs[start + i]);
acc += PE::sum_array::<CONSTRAINT_BATCH>(&batch);
start += CONSTRAINT_BATCH;
}
for (&coeff, &val) in coeffs[start..].iter().zip(&values[start..]) {
acc += val * coeff;
}
acc
}

/// Batched linear combination of packed base field values with F coefficients.
///
/// Wraps [`PackedField::packed_linear_combination`] with batched chunking
/// and remainder handling, mirroring [`batched_ext_linear_combination`].
#[inline]
fn batched_base_linear_combination<P: PackedField>(coeffs: &[P::Scalar], values: &[P]) -> P {
debug_assert_eq!(coeffs.len(), values.len());
let len = coeffs.len();
let mut acc = P::ZERO;
let mut start = 0;
while start + CONSTRAINT_BATCH <= len {
acc += P::packed_linear_combination::<CONSTRAINT_BATCH>(
&coeffs[start..start + CONSTRAINT_BATCH],
&values[start..start + CONSTRAINT_BATCH],
);
start += CONSTRAINT_BATCH;
}
for (&coeff, &val) in coeffs[start..].iter().zip(&values[start..]) {
acc += val * coeff;
}
acc
}

/// Packed constraint folder for SIMD-optimized prover evaluation.
///
/// Uses packed types to evaluate constraints on multiple domain points simultaneously:
/// - `P`: Packed base field (e.g., `PackedGoldilocks`)
/// - `PE`: Packed extension field - must be `Algebra<EF> + Algebra<P> + BasedVectorSpace<P>`
///
/// Collects constraints during `air.eval()` into separate base/ext vectors, then
/// combines them in [`Self::finalize_constraints`] using decomposed alpha powers and
/// `packed_linear_combination` for efficient SIMD accumulation.
/// Accumulates constraints on-the-fly during `air.eval()` by folding each constraint
/// with its pre-computed alpha power directly into running accumulators (`base_acc` for
/// base-field constraints, `ext_acc` for extension-field constraints).
///
/// # Type Parameters
/// - `F`: Base field scalar
Expand Down Expand Up @@ -109,14 +54,16 @@ where
pub base_alpha_powers: &'a [Vec<F>],
/// Extension-field alpha powers, reordered to match ext constraint emission order.
pub ext_alpha_powers: &'a [EF],
/// Current constraint index (debug-only bookkeeping)
pub constraint_index: usize,
/// Total expected constraint count (debug-only bookkeeping)
/// Running accumulator for base-field constraints (folded into PE via alpha powers).
pub base_acc: PE,
/// Running accumulator for extension-field constraints (folded via alpha powers).
pub ext_acc: PE,
/// Index of the next base constraint to be emitted.
pub base_constraint_index: usize,
/// Index of the next extension constraint to be emitted.
pub ext_constraint_index: usize,
/// Total expected constraint count (debug-only bookkeeping).
pub constraint_count: usize,
/// Collected base-field constraints for this row
pub base_constraints: Vec<P>,
/// Collected extension-field constraints for this row
pub ext_constraints: Vec<PE>,
pub _phantom: PhantomData<EF>,
}

Expand All @@ -127,35 +74,25 @@ where
P: PackedField<Scalar = F>,
PE: Algebra<EF> + Algebra<P> + BasedVectorSpace<P> + Copy + Send + Sync,
{
/// Combine all collected constraints with their pre-computed alpha powers.
///
/// Base constraints use `batched_base_linear_combination` per basis dimension,
/// decomposing the extension-field multiply into D base-field SIMD dot products.
/// Extension constraints use `batched_ext_linear_combination` with scalar EF
/// coefficients. Both process in chunks of `CONSTRAINT_BATCH`.
/// Return the accumulated constraint folding result.
///
/// We keep base and extension constraints separate because the base constraints can
/// stay in the base field and use packed SIMD arithmetic. Decomposing EF powers of
/// `alpha` into base-field coordinates turns the base-field fold into a small number
/// of packed dot-products, avoiding repeated cross-field promotions.
/// Constraints were folded on-the-fly during `air.eval()`: each `assert_zero` /
/// `assert_zero_ext` call multiplied by the corresponding alpha power and
/// accumulated into `base_acc` (base-field constraints) or `ext_acc` (extension-
/// field constraints).
#[inline]
pub fn finalize_constraints(self) -> PE {
debug_assert_eq!(self.constraint_index, self.constraint_count);
debug_assert_eq!(
self.base_constraints.len(),
self.base_constraint_index + self.ext_constraint_index,
self.constraint_count
);
debug_assert_eq!(
self.base_constraint_index,
self.base_alpha_powers.first().map_or(0, Vec::len)
);
debug_assert_eq!(self.ext_constraints.len(), self.ext_alpha_powers.len());
debug_assert_eq!(self.ext_constraint_index, self.ext_alpha_powers.len());

// Base constraints: D independent base-field dot products
let base = &self.base_constraints;
let base_powers = self.base_alpha_powers;
let acc = PE::from_basis_coefficients_fn(|d| {
batched_base_linear_combination(&base_powers[d], base)
});

// Extension constraints: EF-coefficient dot product
acc + batched_ext_linear_combination(self.ext_alpha_powers, &self.ext_constraints)
self.base_acc + self.ext_acc
}
}

Expand Down Expand Up @@ -203,15 +140,24 @@ where

#[inline]
fn assert_zero<I: Into<Self::Expr>>(&mut self, x: I) {
self.base_constraints.push(x.into());
self.constraint_index += 1;
let val: P = x.into();
let idx = self.base_constraint_index;
let delta = PE::from_basis_coefficients_fn(|d| val * self.base_alpha_powers[d][idx]);
self.base_acc += delta;
self.base_constraint_index += 1;
}

#[inline]
fn assert_zeros<const N: usize, I: Into<Self::Expr>>(&mut self, array: [I; N]) {
let expr_array = array.map(Into::into);
self.base_constraints.extend(expr_array);
self.constraint_index += N;
let idx = self.base_constraint_index;
let vals = array.map(Into::into);
let powers = self.base_alpha_powers;
let delta = PE::from_basis_coefficients_fn(|d| {
let coeffs: [F; N] = core::array::from_fn(|j| powers[d][idx + j]);
P::packed_linear_combination::<N>(&coeffs, &vals)
});
self.base_acc += delta;
self.base_constraint_index += N;
}

#[inline]
Expand All @@ -236,8 +182,9 @@ where
where
I: Into<Self::ExprEF>,
{
self.ext_constraints.push(x.into());
self.constraint_index += 1;
let val: PE = x.into();
self.ext_acc += val * self.ext_alpha_powers[self.ext_constraint_index];
self.ext_constraint_index += 1;
}
}

Expand Down
13 changes: 6 additions & 7 deletions p3-miden-lifted-stark/src/prover/constraints/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pub(crate) use folder::ProverConstraintFolder;
pub(crate) use layout::{ConstraintLayout, get_constraint_layout};
use p3_field::{
Algebra, BasedVectorSpace, ExtensionField, Field, PackedFieldExtension, PackedValue,
TwoAdicField,
PrimeCharacteristicRing, TwoAdicField,
};
use p3_matrix::{Matrix, bitrev::BitReversedMatrixView, dense::RowMajorMatrixView};
use p3_maybe_rayon::prelude::*;
Expand Down Expand Up @@ -51,7 +51,7 @@ type PackedExt<F, EF> = <EF as ExtensionField<F>>::ExtensionPacking;
/// Uses SIMD-packed parallel iteration via rayon for optimal performance:
/// - Processes `WIDTH` points simultaneously using packed field types
/// - Main trace stays in base field, only aux trace uses extension field
/// - Constraints are collected then finalized in batches via decomposed alpha powers
/// - Constraints are folded on-the-fly into running accumulators via decomposed alpha powers
///
/// Why we fold with `alpha`: the prover does not want to carry K separate constraint
/// polynomials through the rest of the protocol. A random linear combination
Expand Down Expand Up @@ -103,8 +103,6 @@ pub(crate) fn evaluate_constraints_into<F, EF, A>(
// ─── Decompose alpha powers by constraint layout ───
let aux_ef_width = air.aux_width();
let constraint_count = layout.total_constraints();
let base_count = layout.base_indices.len();
let ext_count = layout.ext_indices.len();
let (base_alpha_powers, ext_alpha_powers) = layout.decompose_alpha(alpha);

// Main trace width
Expand Down Expand Up @@ -178,10 +176,11 @@ pub(crate) fn evaluate_constraints_into<F, EF, A>(
selectors,
base_alpha_powers: &base_alpha_powers,
ext_alpha_powers: &ext_alpha_powers,
constraint_index: 0,
base_acc: PE::<F, EF>::ZERO,
ext_acc: Default::default(),
base_constraint_index: 0,
ext_constraint_index: 0,
constraint_count,
base_constraints: Vec::with_capacity(base_count),
ext_constraints: Vec::with_capacity(ext_count),
_phantom: PhantomData,
};

Expand Down
Loading