Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion autoprecompiles/src/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use std::{fmt::Display, sync::Arc};
use powdr_number::FieldElement;
use serde::{Deserialize, Serialize};

use crate::EmpiricalConstraints;
use crate::{
blocks::{BasicBlock, Instruction, Program},
constraint_optimizer::IsBusStateful,
Expand Down Expand Up @@ -45,12 +46,19 @@ pub trait PgoAdapter {
config: &PowdrConfig,
vm_config: AdapterVmConfig<Self::Adapter>,
labels: BTreeMap<u64, Vec<String>>,
empirical_constraints: EmpiricalConstraints,
) -> Vec<AdapterApcWithStats<Self::Adapter>> {
let filtered_blocks = blocks
.into_iter()
.filter(|block| !Self::Adapter::should_skip_block(block))
.collect();
self.create_apcs_with_pgo(filtered_blocks, config, vm_config, labels)
self.create_apcs_with_pgo(
filtered_blocks,
config,
vm_config,
labels,
empirical_constraints,
)
}

fn create_apcs_with_pgo(
Expand All @@ -59,6 +67,7 @@ pub trait PgoAdapter {
config: &PowdrConfig,
vm_config: AdapterVmConfig<Self::Adapter>,
labels: BTreeMap<u64, Vec<String>>,
empirical_constraints: EmpiricalConstraints,
) -> Vec<AdapterApcWithStats<Self::Adapter>>;

fn pc_execution_count(&self, _pc: u64) -> Option<u32> {
Expand Down
137 changes: 137 additions & 0 deletions autoprecompiles/src/empirical_constraints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ use std::hash::Hash;
use itertools::Itertools;
use serde::{Deserialize, Serialize};

use crate::{
adapter::Adapter,
blocks::BasicBlock,
expression::{AlgebraicExpression, AlgebraicReference},
SymbolicConstraint,
};

/// "Constraints" that were inferred from execution statistics. They hold empirically
/// (most of the time), but are not guaranteed to hold in all cases.
#[derive(Serialize, Deserialize, Clone, Default, Debug)]
Expand All @@ -16,6 +23,9 @@ pub struct EmpiricalConstraints {
/// For each basic block (identified by its starting PC), the equivalence classes of columns.
/// Each equivalence class is a list of (instruction index in block, column index).
pub equivalence_classes_by_block: BTreeMap<u64, BTreeSet<BTreeSet<(usize, usize)>>>,
/// Count of how many times each program counter was executed in the sampled executions.
/// This can be used to set a threshold for applying constraints only to frequently executed PCs.
pub pc_counts: BTreeMap<u32, u64>,
}

/// Debug information mapping AIR ids to program counters and column names.
Expand Down Expand Up @@ -61,6 +71,32 @@ impl EmpiricalConstraints {
})
.or_insert(classes);
}

// Combine pc counts
for (pc, count) in other.pc_counts {
*self.pc_counts.entry(pc).or_insert(0) += count;
}
}

pub fn apply_pc_threshold(&self, threshold: u64) -> Self {
EmpiricalConstraints {
column_ranges_by_pc: self
.column_ranges_by_pc
.iter()
.filter(|(pc, _)| self.pc_counts.get(pc).cloned().unwrap_or(0) >= threshold)
.map(|(pc, ranges)| (*pc, ranges.clone()))
.collect(),
equivalence_classes_by_block: self
.equivalence_classes_by_block
.iter()
.filter(|(&block_pc, _)| {
// For equivalence classes, we check the pc_counts of the first instruction in the block
self.pc_counts.get(&(block_pc as u32)).cloned().unwrap_or(0) >= threshold
})
.map(|(block_pc, classes)| (*block_pc, classes.clone()))
.collect(),
pc_counts: self.pc_counts.clone(),
}
}
}

Expand Down Expand Up @@ -88,6 +124,107 @@ fn merge_maps<K: Ord, V: Eq + Debug>(map1: &mut BTreeMap<K, V>, map2: BTreeMap<K
}
}

/// For any program line that was not executed at least this many times in the traces,
/// discard any empirical constraints associated with it.
const EXECUTION_COUNT_THRESHOLD: u64 = 100;

pub struct ConstraintGenerator<'a, A: Adapter> {
empirical_constraints: EmpiricalConstraints,
algebraic_references: BTreeMap<(usize, usize), AlgebraicReference>,
block: &'a BasicBlock<A::Instruction>,
}

impl<'a, A: Adapter> ConstraintGenerator<'a, A> {
pub fn new(
empirical_constraints: &EmpiricalConstraints,
subs: &[Vec<u64>],
columns: impl Iterator<Item = AlgebraicReference>,
block: &'a BasicBlock<A::Instruction>,
) -> Self {
let reverse_subs = subs
.iter()
.enumerate()
.flat_map(|(instr_index, subs)| {
subs.iter()
.enumerate()
.map(move |(col_index, &poly_id)| (poly_id, (instr_index, col_index)))
})
.collect::<BTreeMap<_, _>>();
let algebraic_references = columns
.map(|r| (*reverse_subs.get(&r.id).unwrap(), r.clone()))
.collect::<BTreeMap<_, _>>();

Self {
empirical_constraints: empirical_constraints
.apply_pc_threshold(EXECUTION_COUNT_THRESHOLD),
algebraic_references,
block,
}
}

fn get_algebraic_reference(&self, instr_index: usize, col_index: usize) -> AlgebraicReference {
self.algebraic_references
.get(&(instr_index, col_index))
.cloned()
.unwrap_or_else(|| {
panic!(
"Missing reference for (i: {}, col_index: {}, block_id: {})",
instr_index, col_index, self.block.start_pc
)
})
}

pub fn range_constraints(&self) -> Vec<SymbolicConstraint<<A as Adapter>::PowdrField>> {
let mut constraints = Vec::new();

for i in 0..self.block.statements.len() {
let pc = (self.block.start_pc + (i * 4) as u64) as u32;
let Some(range_constraints) = self.empirical_constraints.column_ranges_by_pc.get(&pc)
else {
continue;
};
for (col_index, range) in range_constraints.iter().enumerate() {
// TODO: We could also enforce looser range constraints.
// This is a bit more complicated though, because we'd have to add bus interactions
// to actually enforce them.
if range.0 == range.1 {
let value = A::PowdrField::from(range.0 as u64);
let reference = self.get_algebraic_reference(i, col_index);
let constraint = AlgebraicExpression::Reference(reference)
- AlgebraicExpression::Number(value);

constraints.push(SymbolicConstraint { expr: constraint });
}
}
}

constraints
}

pub fn equivalence_constraints(&self) -> Vec<SymbolicConstraint<<A as Adapter>::PowdrField>> {
let mut constraints = Vec::new();

if let Some(equivalence_classes) = self
.empirical_constraints
.equivalence_classes_by_block
.get(&self.block.start_pc)
{
for equivalence_class in equivalence_classes {
let first = equivalence_class.first().unwrap();
let first_ref = self.get_algebraic_reference(first.0, first.1);
for other in equivalence_class.iter().skip(1) {
let other_ref = self.get_algebraic_reference(other.0, other.1);
let constraint = AlgebraicExpression::Reference(first_ref.clone())
- AlgebraicExpression::Reference(other_ref.clone());
constraints.push(SymbolicConstraint { expr: constraint });
}
}
}

constraints
}
}

/// Intersects multiple partitions of the same universe into a single partition.
/// In other words, two elements are in the same equivalence class in the resulting partition
/// if and only if they are in the same equivalence class in all input partitions.
Expand Down
78 changes: 67 additions & 11 deletions autoprecompiles/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::adapter::{Adapter, AdapterApc, AdapterVmConfig};
use crate::blocks::BasicBlock;
use crate::bus_map::{BusMap, BusType};
use crate::empirical_constraints::{ConstraintGenerator, EmpiricalConstraints};
use crate::evaluation::AirStats;
use crate::expression_conversion::algebraic_to_grouped_expression;
use crate::symbolic_machine_generator::convert_machine_field_type;
Expand Down Expand Up @@ -429,6 +430,7 @@ pub fn build<A: Adapter>(
vm_config: AdapterVmConfig<A>,
degree_bound: DegreeBound,
apc_candidates_dir_path: Option<&Path>,
empirical_constraints: &EmpiricalConstraints,
) -> Result<AdapterApc<A>, crate::constraint_optimizer::Error> {
let start = std::time::Instant::now();

Expand All @@ -438,6 +440,16 @@ pub fn build<A: Adapter>(
&vm_config.bus_map,
);

// Generate constraints for optimistic precompiles.
let constraint_generator = ConstraintGenerator::<A>::new(
empirical_constraints,
&column_allocator.subs,
machine.main_columns(),
&block,
);
let range_analyzer_constraints = constraint_generator.range_constraints();
let equivalence_analyzer_constraints = constraint_generator.equivalence_constraints();

let labels = [("apc_start_pc", block.start_pc.to_string())];
metrics::counter!("before_opt_cols", &labels)
.absolute(machine.unique_references().count() as u64);
Expand All @@ -446,13 +458,44 @@ pub fn build<A: Adapter>(
metrics::counter!("before_opt_interactions", &labels)
.absolute(machine.unique_references().count() as u64);

let mut baseline = machine;

// Optimize once without empirical constraints
let (machine, column_allocator) = optimizer::optimize::<A>(
machine,
vm_config.bus_interaction_handler,
baseline.clone(),
vm_config.bus_interaction_handler.clone(),
degree_bound,
&vm_config.bus_map,
column_allocator,
)?;
)
.unwrap();
// Get the precompile that is guaranteed to always work
let guaranteed_precompile = machine.render(&vm_config.bus_map);

let (machine, column_allocator, optimistic_precompile) =
if !range_analyzer_constraints.is_empty() || !equivalence_analyzer_constraints.is_empty() {
// Add empirical constraints
baseline.constraints.extend(range_analyzer_constraints);
baseline
.constraints
.extend(equivalence_analyzer_constraints);

// Optimize again with empirical constraints
// TODO: Calling optimize twice is needed; otherwise the solver fails.
let (machine, column_allocator) = optimizer::optimize::<A>(
baseline,
vm_config.bus_interaction_handler,
degree_bound,
&vm_config.bus_map,
column_allocator,
)
.unwrap();
let optimistic_precompile = machine.render(&vm_config.bus_map);
(machine, column_allocator, Some(optimistic_precompile))
} else {
// If there are no empirical constraints, we can skip optimizing twice.
(machine, column_allocator, None)
};

// add guards to constraints that are not satisfied by zeroes
let (machine, column_allocator) = add_guards(machine, column_allocator);
Expand All @@ -469,14 +512,27 @@ pub fn build<A: Adapter>(
let apc = Apc::new(block, machine, column_allocator);

if let Some(path) = apc_candidates_dir_path {
let ser_path = path
.join(format!("apc_candidate_{}", apc.start_pc()))
.with_extension("cbor");
std::fs::create_dir_all(path).expect("Failed to create directory for APC candidates");
let file =
std::fs::File::create(&ser_path).expect("Failed to create file for APC candidate");
let writer = BufWriter::new(file);
serde_cbor::to_writer(writer, &apc).expect("Failed to write APC candidate to file");
if let Some(optimistic_precompile) = &optimistic_precompile {
// For debugging purposes, serialize the APC candidate to a file
let ser_path = path
.join(format!("apc_candidate_{}", apc.start_pc()))
.with_extension("cbor");
std::fs::create_dir_all(path).expect("Failed to create directory for APC candidates");
let file =
std::fs::File::create(&ser_path).expect("Failed to create file for APC candidate");
let writer = BufWriter::new(file);
serde_cbor::to_writer(writer, &apc).expect("Failed to write APC candidate to file");

let dumb_path = path
.join(format!("apc_candidate_{}_guaranteed.txt", apc.start_pc()))
.with_extension("txt");
std::fs::write(dumb_path, guaranteed_precompile).unwrap();

let ai_path = path
.join(format!("apc_candidate_{}_optimistic.txt", apc.start_pc()))
.with_extension("txt");
std::fs::write(ai_path, optimistic_precompile).unwrap();
}
}

metrics::gauge!("apc_gen_time_ms", &labels).set(start.elapsed().as_millis() as f64);
Expand Down
14 changes: 14 additions & 0 deletions autoprecompiles/src/memory_optimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,20 @@ fn redundant_memory_interactions_indices<
// In that case, we can replace both bus interactions with equality constraints
// between the data that would have been sent and received.
if let Some((previous_send, existing_values)) = memory_contents.remove(&addr) {
// TODO: This can happen which optimistic precompiles, because basic block can include
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be fixed before merging. I suggest to black-list big-word instructions.

// big-word branching instructions. This hot fix is NOT sounds, because it does not handle
// a situation where we write a (small-word) limb, accesses the large word, and then the
// small-word limb again.
if existing_values.len() != mem_int.data().len() {
log::error!(
"Memory interaction data length mismatch: existing values = {}, new values = {}. Resetting memory.",
existing_values.iter().map(ToString::to_string).join(", "),
mem_int.data().iter().map(ToString::to_string).join(", ")
);
memory_contents.clear();
continue;
}
Comment on lines +194 to +202
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this needed here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This appears because some of OpenVM's 256-Bit branch instructions can appear in a basic block. These instruction read larger words.

I think we should fix this by not including these instructions inside a basic block, like we do for other precompiles (= non RISC-V instructions). Doing this properly would really complicate the memory optimizer and I don't think it's worth it.


for (existing, new) in existing_values.iter().zip_eq(mem_int.data().iter()) {
new_constraints.push(AlgebraicConstraint::assert_zero(
existing.clone() - new.clone(),
Expand Down
4 changes: 3 additions & 1 deletion autoprecompiles/src/pgo/cell/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::{
blocks::BasicBlock,
evaluation::EvaluationResult,
pgo::cell::selection::parallel_fractional_knapsack,
PowdrConfig,
EmpiricalConstraints, PowdrConfig,
};

mod selection;
Expand Down Expand Up @@ -92,6 +92,7 @@ impl<A: Adapter + Send + Sync, C: Candidate<A> + Send + Sync> PgoAdapter for Cel
config: &PowdrConfig,
vm_config: AdapterVmConfig<Self::Adapter>,
labels: BTreeMap<u64, Vec<String>>,
empirical_constraints: EmpiricalConstraints,
) -> Vec<AdapterApcWithStats<Self::Adapter>> {
tracing::info!(
"Generating autoprecompiles with cell PGO for {} blocks",
Expand Down Expand Up @@ -131,6 +132,7 @@ impl<A: Adapter + Send + Sync, C: Candidate<A> + Send + Sync> PgoAdapter for Cel
vm_config.clone(),
config.degree_bound,
config.apc_candidates_dir_path.as_deref(),
&empirical_constraints,
)
.ok()?;
let candidate = C::create(
Expand Down
10 changes: 8 additions & 2 deletions autoprecompiles/src/pgo/instruction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::{
adapter::{Adapter, AdapterApcWithStats, AdapterVmConfig, PgoAdapter},
blocks::BasicBlock,
pgo::create_apcs_for_all_blocks,
PowdrConfig,
EmpiricalConstraints, PowdrConfig,
};

pub struct InstructionPgo<A> {
Expand All @@ -30,6 +30,7 @@ impl<A: Adapter> PgoAdapter for InstructionPgo<A> {
config: &PowdrConfig,
vm_config: AdapterVmConfig<Self::Adapter>,
_labels: BTreeMap<u64, Vec<String>>,
empirical_constraints: EmpiricalConstraints,
) -> Vec<AdapterApcWithStats<Self::Adapter>> {
tracing::info!(
"Generating autoprecompiles with instruction PGO for {} blocks",
Expand Down Expand Up @@ -70,7 +71,12 @@ impl<A: Adapter> PgoAdapter for InstructionPgo<A> {
);
}

create_apcs_for_all_blocks::<Self::Adapter>(blocks, config, vm_config)
create_apcs_for_all_blocks::<Self::Adapter>(
blocks,
config,
vm_config,
empirical_constraints,
)
}

fn pc_execution_count(&self, pc: u64) -> Option<u32> {
Expand Down
Loading
Loading