Collect empirical constraints

georgwiese · georgwiese · commit d40352ed3810 · 2025-11-28T09:54:08.000-05:00
diff --git a/autoprecompiles/src/empirical_constraints.rs b/autoprecompiles/src/empirical_constraints.rs
@@ -0,0 +1,29 @@
+use std::collections::BTreeMap;
+
+use serde::{Deserialize, Serialize};
+
+/// "Constraints" that were inferred from execution statistics.
+#[derive(Serialize, Deserialize, Clone, Default)]
+pub struct EmpiricalConstraints {
+    /// For each program counter, the range constraints for each column.
+    /// The range might not hold in 100% of cases.
+    pub column_ranges_by_pc: BTreeMap<u32, Vec<(u32, u32)>>,
+    /// For each basic block (identified by its starting PC), the equivalence classes of columns.
+    /// Each equivalence class is a list of (instruction index in block, column index).
+    pub equivalence_classes_by_block: BTreeMap<u64, Vec<Vec<(usize, usize)>>>,
+}
+
+/// Debug information mapping AIR ids to program counters and column names.
+#[derive(Serialize, Deserialize)]
+pub struct DebugInfo {
+    /// Mapping from program counter to AIR id.
+    pub air_id_by_pc: BTreeMap<u32, usize>,
+    /// Mapping from AIR id to column names.
+    pub column_names_by_air_id: BTreeMap<usize, Vec<String>>,
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct EmpiricalConstraintsJson {
+    pub empirical_constraints: EmpiricalConstraints,
+    pub debug_info: DebugInfo,
+}
diff --git a/autoprecompiles/src/lib.rs b/autoprecompiles/src/lib.rs
@@ -26,6 +26,7 @@ pub mod adapter;
 pub mod blocks;
 pub mod bus_map;
 pub mod constraint_optimizer;
+pub mod empirical_constraints;
 pub mod evaluation;
 pub mod execution_profile;
 pub mod expression;
diff --git a/cli-openvm/src/main.rs b/cli-openvm/src/main.rs
@@ -4,6 +4,7 @@ use metrics_util::{debugging::DebuggingRecorder, layers::Layer};
 use openvm_sdk::StdIn;
 use openvm_stark_sdk::bench::serialize_metric_snapshot;
 use powdr_autoprecompiles::pgo::{pgo_config, PgoType};
+use powdr_openvm::detect_empirical_constraints;
 use powdr_openvm::{compile_openvm, default_powdr_openvm_config, CompiledProgram, GuestOptions};
 
 #[cfg(feature = "metrics")]
@@ -144,6 +145,11 @@ fn run_command(command: Commands) {
             let execution_profile =
                 powdr_openvm::execution_profile_from_guest(&guest_program, stdin_from(input));
 
+            let _empirical_constraints = detect_empirical_constraints(
+                &guest_program,
+                powdr_config.degree_bound,
+                stdin_from(input),
+            );
             let pgo_config = pgo_config(pgo, max_columns, execution_profile);
             let program =
                 powdr_openvm::compile_exe(guest_program, powdr_config, pgo_config).unwrap();
diff --git a/openvm/Cargo.toml b/openvm/Cargo.toml
@@ -61,6 +61,7 @@ derive_more = { version = "2.0.1", default-features = false, features = [
   "from",
 ] }
 itertools = "0.14.0"
+serde_json = "1.0.140"
 
 tracing = "0.1.40"
 tracing-subscriber = { version = "0.3.17", features = ["std", "env-filter"] }
diff --git a/openvm/src/empirical_constraints.rs b/openvm/src/empirical_constraints.rs
@@ -0,0 +1,299 @@
+use itertools::Itertools;
+use openvm_circuit::arch::VmCircuitConfig;
+use openvm_sdk::StdIn;
+use openvm_stark_backend::p3_matrix::dense::DenseMatrix;
+use openvm_stark_sdk::openvm_stark_backend::p3_field::PrimeField32;
+use openvm_stark_sdk::p3_baby_bear::BabyBear;
+use powdr_autoprecompiles::blocks::BasicBlock;
+use powdr_autoprecompiles::empirical_constraints::{
+    DebugInfo, EmpiricalConstraints, EmpiricalConstraintsJson,
+};
+use powdr_autoprecompiles::DegreeBound;
+use std::collections::hash_map::Entry;
+use std::collections::BTreeMap;
+use std::{collections::HashMap, sync::Arc};
+
+use crate::trace_generation::do_with_trace;
+use crate::{CompiledProgram, Instr, OriginalCompiledProgram};
+
+use std::collections::HashSet;
+use std::hash::Hash;
+
+/// Materialized execution trace, Indexed by time and by PC
+#[derive(Default)]
+struct Trace {
+    /// The raw rows, in any order
+    rows: Vec<Vec<u32>>,
+    /// Mapping (segment_idx, timestamp) -> row index in `rows`
+    rows_by_time: BTreeMap<(usize, u32), usize>,
+    /// PC value -> List of row indices in `rows` with that PC
+    rows_by_pc: HashMap<u32, Vec<usize>>,
+}
+
+pub fn detect_empirical_constraints(
+    program: &OriginalCompiledProgram,
+    degree_bound: DegreeBound,
+    inputs: StdIn,
+) -> EmpiricalConstraints {
+    let blocks = program.collect_basic_blocks(degree_bound.identities);
+
+    // Collect trace, without any autoprecompiles.
+    let program = program.compiled_program(Vec::new(), degree_bound.identities);
+    let (trace, debug_info) = collect_trace(&program, inputs);
+    let empirical_constraints = generate_empirical_constraints(&blocks, trace);
+
+    // Export to disk
+    let export = EmpiricalConstraintsJson {
+        empirical_constraints: empirical_constraints.clone(),
+        debug_info,
+    };
+    let json = serde_json::to_string_pretty(&export).unwrap();
+    std::fs::write("empirical_constraints.json", json).unwrap();
+
+    empirical_constraints
+}
+
+fn collect_trace(program: &CompiledProgram, inputs: StdIn) -> (Trace, DebugInfo) {
+    let mut trace = Trace::default();
+    let mut debug_info = DebugInfo {
+        air_id_by_pc: BTreeMap::new(),
+        column_names_by_air_id: BTreeMap::new(),
+    };
+    let mut seg_idx = 0;
+
+    do_with_trace(program, inputs, |vm, _pk, ctx| {
+        let global_airs = vm
+            .config()
+            .create_airs()
+            .unwrap()
+            .into_airs()
+            .enumerate()
+            .collect::<HashMap<_, _>>();
+
+        for (air_id, proving_context) in &ctx.per_air {
+            if !proving_context.cached_mains.is_empty() {
+                // Not the case for instruction circuits
+                continue;
+            }
+            let main: &Arc<DenseMatrix<BabyBear>> = proving_context.common_main.as_ref().unwrap();
+
+            let air = &global_airs[air_id];
+            let Some(column_names) = air.columns() else {
+                continue;
+            };
+            assert_eq!(main.width, column_names.len());
+
+            // This is the case for all instruction circuits
+            let Some(pc_index) = column_names
+                .iter()
+                .position(|name| name == "from_state__pc")
+            else {
+                continue;
+            };
+            let ts_index = 1;
+
+            for row in main.row_slices() {
+                let row = row.iter().map(|v| v.as_canonical_u32()).collect::<Vec<_>>();
+                let pc_value = row[pc_index];
+                let ts_value = row[ts_index];
+                trace.rows.push(row);
+                let row_index = trace.rows.len() - 1;
+                trace.rows_by_time.insert((seg_idx, ts_value), row_index);
+
+                if pc_value == 0 {
+                    // Padding row!
+                    continue;
+                }
+
+                match trace.rows_by_pc.entry(pc_value) {
+                    Entry::Vacant(e) => {
+                        // First time we see this PC, initialize the column -> values map
+                        e.insert(vec![row_index]);
+                        debug_info
+                            .column_names_by_air_id
+                            .insert(*air_id, column_names.clone());
+                        debug_info.air_id_by_pc.insert(pc_value, *air_id);
+                    }
+                    Entry::Occupied(mut o) => {
+                        let rows = o.get_mut();
+                        assert_eq!(
+                            debug_info.air_id_by_pc[&pc_value],
+                            *air_id,
+                            "Mismatched air IDs for PC {}: {} vs {}",
+                            pc_value,
+                            global_airs[&debug_info.air_id_by_pc[&pc_value]].name(),
+                            air.name()
+                        );
+                        rows.push(row_index);
+                    }
+                }
+            }
+        }
+
+        seg_idx += 1;
+    })
+    .unwrap();
+    (trace, debug_info)
+}
+
+fn generate_empirical_constraints(
+    blocks: &[BasicBlock<Instr<BabyBear>>],
+    trace: Trace,
+) -> EmpiricalConstraints {
+    // Block ID -> instruction count mapping
+    let instruction_counts = blocks
+        .iter()
+        .map(|block| (block.start_pc, block.statements.len()))
+        .collect::<HashMap<_, _>>();
+
+    // Block ID -> Vec<Vec<Row>>
+    let mut block_rows = BTreeMap::new();
+    let mut i = 0;
+    let rows_by_time = trace.rows_by_time.values().collect::<Vec<_>>();
+    while i < rows_by_time.len() {
+        let row = &trace.rows[*rows_by_time[i]];
+        let pc_value = row[0] as u64;
+
+        if instruction_counts.contains_key(&pc_value) {
+            let instruction_count = instruction_counts[&pc_value];
+            let block_row_slice = &rows_by_time[i..i + instruction_count];
+            block_rows
+                .entry(pc_value)
+                .or_insert(Vec::new())
+                .push(block_row_slice.to_vec());
+            i += instruction_count;
+        } else {
+            i += 1;
+        }
+    }
+
+    // Block ID -> Vec<Vec<Vec<(instruction_index, col_index)>>>:
+    // Indices: block ID, instance idx, equivalence class idx, cell
+    let equivalence_classes = block_rows
+        .into_iter()
+        .map(|(block_id, blocks)| {
+            let classes = blocks
+                .into_iter()
+                .map(|rows| {
+                    let value_to_cells = rows
+                        .into_iter()
+                        .enumerate()
+                        .flat_map(|(instruction_index, row_index)| {
+                            trace.rows[*row_index]
+                                .iter()
+                                .enumerate()
+                                .map(|(col_index, v)| (*v, (instruction_index, col_index)))
+                                .collect::<Vec<_>>()
+                        })
+                        .into_group_map();
+                    value_to_cells.values().cloned().collect::<Vec<_>>()
+                })
+                .collect::<Vec<_>>();
+            (block_id, classes)
+        })
+        .collect::<HashMap<_, _>>();
+
+    // Intersect equivalence classes across all instances
+    let intersected_equivalence_classes = equivalence_classes
+        .into_iter()
+        .map(|(block_id, classes)| {
+            let intersected = intersect_partitions(&classes);
+
+            // Remove singleton classes
+            let intersected = intersected
+                .into_iter()
+                .filter(|class| class.len() > 1)
+                .collect::<Vec<_>>();
+
+            (block_id, intersected)
+        })
+        .collect::<BTreeMap<_, _>>();
+
+    // Map all column values to their range (1st and 99th percentile) for each pc
+    let column_ranges_by_pc: HashMap<u32, Vec<(u32, u32)>> = trace
+        .rows_by_pc
+        .into_iter()
+        .map(|(pc, pc_rows)| {
+            let rows = pc_rows
+                .into_iter()
+                .map(|row_index| &trace.rows[row_index])
+                .collect::<Vec<_>>();
+            for row in &rows {
+                // All rows for a given PC should be in the same chip
+                assert_eq!(row.len(), rows[0].len());
+            }
+            let column_ranges = (0..rows[0].len())
+                .map(|col_index| {
+                    let mut values = rows.iter().map(|row| row[col_index]).collect::<Vec<_>>();
+                    values.sort_unstable();
+                    let len = values.len();
+                    let p1_index = len / 100; // 1st percentile
+                    let p99_index = len * 99 / 100; // 99th percentile
+                    (values[p1_index], values[p99_index])
+                })
+                .collect();
+            (pc, column_ranges)
+        })
+        .collect();
+
+    EmpiricalConstraints {
+        column_ranges_by_pc: column_ranges_by_pc.into_iter().collect(),
+        equivalence_classes_by_block: intersected_equivalence_classes,
+    }
+}
+
+// ChatGPT generated code
+fn intersect_partitions<Id>(partitions: &[Vec<Vec<Id>>]) -> Vec<Vec<Id>>
+where
+    Id: Eq + Hash + Copy,
+{
+    if partitions.is_empty() {
+        return Vec::new();
+    }
+
+    // 1) For each partition, build a map: Id -> class_index
+    let mut maps: Vec<HashMap<Id, usize>> = Vec::with_capacity(partitions.len());
+    for part in partitions {
+        let mut m = HashMap::new();
+        for (class_idx, class) in part.iter().enumerate() {
+            for &id in class {
+                m.insert(id, class_idx);
+            }
+        }
+        maps.push(m);
+    }
+
+    // 2) Collect the universe of all Ids
+    let mut universe: HashSet<Id> = HashSet::new();
+    for part in partitions {
+        for class in part {
+            for &id in class {
+                universe.insert(id);
+            }
+        }
+    }
+
+    // 3) For each Id, build its "signature" of class indices across all partitions
+    //    and group by that signature.
+    let mut grouped: HashMap<Vec<usize>, Vec<Id>> = HashMap::new();
+
+    for &id in &universe {
+        let mut signature = Vec::with_capacity(maps.len());
+        let mut is_singleton = false;
+        for m in &maps {
+            let Some(class_idx) = m.get(&id) else {
+                // The element did not appear in one of the partition, so it is its
+                // own equivalence class. We can also omit it in the output partition.
+                is_singleton = true;
+                break;
+            };
+            signature.push(*class_idx);
+        }
+        if !is_singleton {
+            grouped.entry(signature).or_default().push(id);
+        }
+    }
+
+    // 4) Resulting equivalence classes are the grouped values
+    grouped.into_values().collect()
+}
diff --git a/openvm/src/lib.rs b/openvm/src/lib.rs
@@ -66,6 +66,7 @@ use crate::powdr_extension::{PowdrExtensionExecutor, PowdrPrecompile};
 mod air_builder;
 pub mod bus_map;
 pub mod cuda_abi;
+mod empirical_constraints;
 pub mod extraction_utils;
 pub mod opcode;
 mod program;
@@ -76,6 +77,8 @@ pub use opcode::instruction_allowlist;
 pub use powdr_autoprecompiles::DegreeBound;
 pub use powdr_autoprecompiles::PgoConfig;
 
+pub use crate::empirical_constraints::detect_empirical_constraints;
+
 pub type BabyBearSC = BabyBearPoseidon2Config;
 
 cfg_if::cfg_if! {
diff --git a/openvm/src/program.rs b/openvm/src/program.rs