Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## Unreleased

- perf: fold constraints on the fly ([#55](https://github.com/0xMiden/p3-miden/pull/55))

## 0.5.0 (2026-03-10)

- Fixed periodic column evaluation on LDE/quotient domains.
Expand Down
135 changes: 38 additions & 97 deletions p3-miden-lifted-stark/src/prover/constraints/folder.rs
Original file line number Diff line number Diff line change
@@ -1,82 +1,27 @@
//! SIMD-optimized constraint folder for prover evaluation.
//!
//! [`ProverConstraintFolder`] collects base and extension constraints during `air.eval()`,
//! then combines them via [`Self::finalize_constraints`] using decomposed alpha powers
//! and batched linear combinations.
//! [`ProverConstraintFolder`] accumulates base and extension constraints on-the-fly during
//! `air.eval()`, folding each constraint with its alpha power directly into a running sum.

use alloc::vec::Vec;
use core::marker::PhantomData;

use p3_field::{
Algebra, BasedVectorSpace, ExtensionField, Field, PackedField, PrimeCharacteristicRing,
};
use p3_field::{Algebra, BasedVectorSpace, ExtensionField, Field, PackedField};
use p3_miden_lifted_air::{
AirBuilder, EmptyWindow, ExtensionBuilder, PeriodicAirBuilder, PermutationAirBuilder, RowWindow,
};

use crate::selectors::Selectors;

/// Batch size for constraint linear-combination chunks in [`finalize_constraints`].
const CONSTRAINT_BATCH: usize = 8;

/// Batched linear combination of packed extension field values with EF coefficients.
///
/// Extension-field analogue of [`PackedField::packed_linear_combination`]. Processes
/// `coeffs` and `values` in chunks of [`CONSTRAINT_BATCH`], then handles the remainder.
#[inline]
fn batched_ext_linear_combination<PE, EF>(coeffs: &[EF], values: &[PE]) -> PE
where
EF: Field,
PE: PrimeCharacteristicRing + Algebra<EF> + Copy,
{
debug_assert_eq!(coeffs.len(), values.len());
let len = coeffs.len();
let mut acc = PE::ZERO;
let mut start = 0;
while start + CONSTRAINT_BATCH <= len {
let batch: [PE; CONSTRAINT_BATCH] =
core::array::from_fn(|i| values[start + i] * coeffs[start + i]);
acc += PE::sum_array::<CONSTRAINT_BATCH>(&batch);
start += CONSTRAINT_BATCH;
}
for (&coeff, &val) in coeffs[start..].iter().zip(&values[start..]) {
acc += val * coeff;
}
acc
}

/// Batched linear combination of packed base field values with F coefficients.
///
/// Wraps [`PackedField::packed_linear_combination`] with batched chunking
/// and remainder handling, mirroring [`batched_ext_linear_combination`].
#[inline]
fn batched_base_linear_combination<P: PackedField>(coeffs: &[P::Scalar], values: &[P]) -> P {
debug_assert_eq!(coeffs.len(), values.len());
let len = coeffs.len();
let mut acc = P::ZERO;
let mut start = 0;
while start + CONSTRAINT_BATCH <= len {
acc += P::packed_linear_combination::<CONSTRAINT_BATCH>(
&coeffs[start..start + CONSTRAINT_BATCH],
&values[start..start + CONSTRAINT_BATCH],
);
start += CONSTRAINT_BATCH;
}
for (&coeff, &val) in coeffs[start..].iter().zip(&values[start..]) {
acc += val * coeff;
}
acc
}

/// Packed constraint folder for SIMD-optimized prover evaluation.
///
/// Uses packed types to evaluate constraints on multiple domain points simultaneously:
/// - `P`: Packed base field (e.g., `PackedBabyBear`)
/// - `PE`: Packed extension field - must be `Algebra<EF> + Algebra<P> + BasedVectorSpace<P>`
///
/// Collects constraints during `air.eval()` into separate base/ext vectors, then
/// combines them in [`Self::finalize_constraints`] using decomposed alpha powers and
/// `packed_linear_combination` for efficient SIMD accumulation.
/// Accumulates constraints on-the-fly during `air.eval()` by folding each constraint
/// with its pre-computed alpha power directly into running accumulators (`base_acc` for
/// base-field constraints, `ext_acc` for extension-field constraints).
///
/// # Type Parameters
/// - `F`: Base field scalar
Expand Down Expand Up @@ -109,14 +54,16 @@ where
pub base_alpha_powers: &'a [Vec<F>],
/// Extension-field alpha powers, reordered to match ext constraint emission order.
pub ext_alpha_powers: &'a [EF],
/// Current constraint index (debug-only bookkeeping)
pub constraint_index: usize,
/// Total expected constraint count (debug-only bookkeeping)
/// Running accumulator for base-field constraints (folded into PE via alpha powers).
pub base_acc: PE,
/// Running accumulator for extension-field constraints (folded via alpha powers).
pub ext_acc: PE,
/// Index of the next base constraint to be emitted.
pub base_constraint_index: usize,
/// Index of the next extension constraint to be emitted.
pub ext_constraint_index: usize,
/// Total expected constraint count (debug-only bookkeeping).
pub constraint_count: usize,
/// Collected base-field constraints for this row
pub base_constraints: Vec<P>,
/// Collected extension-field constraints for this row
pub ext_constraints: Vec<PE>,
pub _phantom: PhantomData<EF>,
}

Expand All @@ -127,35 +74,25 @@ where
P: PackedField<Scalar = F>,
PE: Algebra<EF> + Algebra<P> + BasedVectorSpace<P> + Copy + Send + Sync,
{
/// Combine all collected constraints with their pre-computed alpha powers.
/// Return the accumulated constraint folding result.
///
/// Base constraints use `batched_base_linear_combination` per basis dimension,
/// decomposing the extension-field multiply into D base-field SIMD dot products.
/// Extension constraints use `batched_ext_linear_combination` with scalar EF
/// coefficients. Both process in chunks of `CONSTRAINT_BATCH`.
///
/// We keep base and extension constraints separate because the base constraints can
/// stay in the base field and use packed SIMD arithmetic. Decomposing EF powers of
/// `alpha` into base-field coordinates turns the base-field fold into a small number
/// of packed dot-products, avoiding repeated cross-field promotions.
/// Constraints were folded on-the-fly during `air.eval()`: each `assert_zero` /
/// `assert_zero_ext` call multiplied by the corresponding alpha power and
/// accumulated into `base_acc` (base-field constraints) or `ext_acc` (extension-
/// field constraints).
#[inline]
pub fn finalize_constraints(self) -> PE {
debug_assert_eq!(self.constraint_index, self.constraint_count);
debug_assert_eq!(
self.base_constraints.len(),
self.base_constraint_index + self.ext_constraint_index,
self.constraint_count
);
debug_assert_eq!(
self.base_constraint_index,
self.base_alpha_powers.first().map_or(0, Vec::len)
);
debug_assert_eq!(self.ext_constraints.len(), self.ext_alpha_powers.len());
debug_assert_eq!(self.ext_constraint_index, self.ext_alpha_powers.len());

// Base constraints: D independent base-field dot products
let base = &self.base_constraints;
let base_powers = self.base_alpha_powers;
let acc = PE::from_basis_coefficients_fn(|d| {
batched_base_linear_combination(&base_powers[d], base)
});

// Extension constraints: EF-coefficient dot product
acc + batched_ext_linear_combination(self.ext_alpha_powers, &self.ext_constraints)
self.base_acc + self.ext_acc
}
}

Expand Down Expand Up @@ -203,15 +140,18 @@ where

#[inline]
fn assert_zero<I: Into<Self::Expr>>(&mut self, x: I) {
self.base_constraints.push(x.into());
self.constraint_index += 1;
let val: P = x.into();
let idx = self.base_constraint_index;
let delta = PE::from_basis_coefficients_fn(|d| val * self.base_alpha_powers[d][idx]);
self.base_acc += delta;
self.base_constraint_index += 1;
}

#[inline]
fn assert_zeros<const N: usize, I: Into<Self::Expr>>(&mut self, array: [I; N]) {
let expr_array = array.map(Into::into);
self.base_constraints.extend(expr_array);
self.constraint_index += N;
for x in array {
self.assert_zero(x);
}
}

#[inline]
Expand All @@ -236,8 +176,9 @@ where
where
I: Into<Self::ExprEF>,
{
self.ext_constraints.push(x.into());
self.constraint_index += 1;
let val: PE = x.into();
self.ext_acc += val * self.ext_alpha_powers[self.ext_constraint_index];
self.ext_constraint_index += 1;
}
}

Expand Down
11 changes: 5 additions & 6 deletions p3-miden-lifted-stark/src/prover/constraints/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ type PackedExt<F, EF> = <EF as ExtensionField<F>>::ExtensionPacking;
/// Uses SIMD-packed parallel iteration via rayon for optimal performance:
/// - Processes `WIDTH` points simultaneously using packed field types
/// - Main trace stays in base field, only aux trace uses extension field
/// - Constraints are collected then finalized in batches via decomposed alpha powers
/// - Constraints are folded on-the-fly into running accumulators via decomposed alpha powers
///
/// Why we fold with `alpha`: the prover does not want to carry K separate constraint
/// polynomials through the rest of the protocol. A random linear combination
Expand Down Expand Up @@ -98,8 +98,6 @@ pub(crate) fn evaluate_constraints_into<F, EF, A, M>(
// ─── Decompose alpha powers by constraint layout ───
let aux_ef_width = air.aux_width();
let constraint_count = layout.total_constraints();
let base_count = layout.base_indices.len();
let ext_count = layout.ext_indices.len();
let (base_alpha_powers, ext_alpha_powers) = layout.decompose_alpha(alpha);

// Main trace width
Expand Down Expand Up @@ -158,10 +156,11 @@ pub(crate) fn evaluate_constraints_into<F, EF, A, M>(
selectors,
base_alpha_powers: &base_alpha_powers,
ext_alpha_powers: &ext_alpha_powers,
constraint_index: 0,
base_acc: Default::default(),
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I think this could start from PE::ZERO instead of Default::default(). PE already comes from Algebra, so zero is part of the contract here, while Default is only a convention on the current packed types.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would favor the solution in this PR for its simplicity unless the more involved solution with buffers provides a clear and consistent advantage.

ext_acc: Default::default(),
base_constraint_index: 0,
ext_constraint_index: 0,
constraint_count,
base_constraints: Vec::with_capacity(base_count),
ext_constraints: Vec::with_capacity(ext_count),
_phantom: PhantomData,
};

Expand Down
Loading