Skip to content

Commit 316ea10

Browse files
committed
Build optimistic precompiles
1 parent bac0ffb commit 316ea10

File tree

13 files changed

+351
-40
lines changed

13 files changed

+351
-40
lines changed

autoprecompiles/src/adapter.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use std::{fmt::Display, sync::Arc};
66
use powdr_number::FieldElement;
77
use serde::{Deserialize, Serialize};
88

9+
use crate::EmpiricalConstraints;
910
use crate::{
1011
blocks::{BasicBlock, Instruction, Program},
1112
constraint_optimizer::IsBusStateful,
@@ -45,12 +46,19 @@ pub trait PgoAdapter {
4546
config: &PowdrConfig,
4647
vm_config: AdapterVmConfig<Self::Adapter>,
4748
labels: BTreeMap<u64, Vec<String>>,
49+
empirical_constraints: EmpiricalConstraints,
4850
) -> Vec<AdapterApcWithStats<Self::Adapter>> {
4951
let filtered_blocks = blocks
5052
.into_iter()
5153
.filter(|block| !Self::Adapter::should_skip_block(block))
5254
.collect();
53-
self.create_apcs_with_pgo(filtered_blocks, config, vm_config, labels)
55+
self.create_apcs_with_pgo(
56+
filtered_blocks,
57+
config,
58+
vm_config,
59+
labels,
60+
empirical_constraints,
61+
)
5462
}
5563

5664
fn create_apcs_with_pgo(
@@ -59,6 +67,7 @@ pub trait PgoAdapter {
5967
config: &PowdrConfig,
6068
vm_config: AdapterVmConfig<Self::Adapter>,
6169
labels: BTreeMap<u64, Vec<String>>,
70+
empirical_constraints: EmpiricalConstraints,
6271
) -> Vec<AdapterApcWithStats<Self::Adapter>>;
6372

6473
fn pc_execution_count(&self, _pc: u64) -> Option<u32> {

autoprecompiles/src/empirical_constraints.rs

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@ use std::hash::Hash;
66
use itertools::Itertools;
77
use serde::{Deserialize, Serialize};
88

9+
use crate::{
10+
adapter::Adapter,
11+
blocks::BasicBlock,
12+
expression::{AlgebraicExpression, AlgebraicReference},
13+
SymbolicConstraint,
14+
};
15+
916
/// "Constraints" that were inferred from execution statistics. They hold empirically
1017
/// (most of the time), but are not guaranteed to hold in all cases.
1118
#[derive(Serialize, Deserialize, Clone, Default, Debug)]
@@ -16,6 +23,9 @@ pub struct EmpiricalConstraints {
1623
/// For each basic block (identified by its starting PC), the equivalence classes of columns.
1724
/// Each equivalence class is a list of (instruction index in block, column index).
1825
pub equivalence_classes_by_block: BTreeMap<u64, BTreeSet<BTreeSet<(usize, usize)>>>,
26+
/// Count of how many times each program counter was executed in the sampled executions.
27+
/// This can be used to set a threshold for applying constraints only to frequently executed PCs.
28+
pub pc_counts: BTreeMap<u32, u64>,
1929
}
2030

2131
/// Debug information mapping AIR ids to program counters and column names.
@@ -61,6 +71,32 @@ impl EmpiricalConstraints {
6171
})
6272
.or_insert(classes);
6373
}
74+
75+
// Combine pc counts
76+
for (pc, count) in other.pc_counts {
77+
*self.pc_counts.entry(pc).or_insert(0) += count;
78+
}
79+
}
80+
81+
pub fn apply_pc_threshold(&self, threshold: u64) -> Self {
82+
EmpiricalConstraints {
83+
column_ranges_by_pc: self
84+
.column_ranges_by_pc
85+
.iter()
86+
.filter(|(pc, _)| self.pc_counts.get(pc).cloned().unwrap_or(0) >= threshold)
87+
.map(|(pc, ranges)| (*pc, ranges.clone()))
88+
.collect(),
89+
equivalence_classes_by_block: self
90+
.equivalence_classes_by_block
91+
.iter()
92+
.filter(|(&block_pc, _)| {
93+
// For equivalence classes, we check the pc_counts of the first instruction in the block
94+
self.pc_counts.get(&(block_pc as u32)).cloned().unwrap_or(0) >= threshold
95+
})
96+
.map(|(block_pc, classes)| (*block_pc, classes.clone()))
97+
.collect(),
98+
pc_counts: self.pc_counts.clone(),
99+
}
64100
}
65101
}
66102

@@ -88,6 +124,107 @@ fn merge_maps<K: Ord, V: Eq + Debug>(map1: &mut BTreeMap<K, V>, map2: BTreeMap<K
88124
}
89125
}
90126

127+
/// For any program line that was not executed at least this many times in the traces,
128+
/// discard any empirical constraints associated with it.
129+
const EXECUTION_COUNT_THRESHOLD: u64 = 100;
130+
131+
pub struct ConstraintGenerator<'a, A: Adapter> {
132+
empirical_constraints: EmpiricalConstraints,
133+
algebraic_references: BTreeMap<(usize, usize), AlgebraicReference>,
134+
block: &'a BasicBlock<A::Instruction>,
135+
}
136+
137+
impl<'a, A: Adapter> ConstraintGenerator<'a, A> {
138+
pub fn new(
139+
empirical_constraints: &EmpiricalConstraints,
140+
subs: &[Vec<u64>],
141+
columns: impl Iterator<Item = AlgebraicReference>,
142+
block: &'a BasicBlock<A::Instruction>,
143+
) -> Self {
144+
let reverse_subs = subs
145+
.iter()
146+
.enumerate()
147+
.flat_map(|(instr_index, subs)| {
148+
subs.iter()
149+
.enumerate()
150+
.map(move |(col_index, &poly_id)| (poly_id, (instr_index, col_index)))
151+
})
152+
.collect::<BTreeMap<_, _>>();
153+
let algebraic_references = columns
154+
.map(|r| (*reverse_subs.get(&r.id).unwrap(), r.clone()))
155+
.collect::<BTreeMap<_, _>>();
156+
157+
Self {
158+
empirical_constraints: empirical_constraints
159+
.apply_pc_threshold(EXECUTION_COUNT_THRESHOLD),
160+
algebraic_references,
161+
block,
162+
}
163+
}
164+
165+
fn get_algebraic_reference(&self, instr_index: usize, col_index: usize) -> AlgebraicReference {
166+
self.algebraic_references
167+
.get(&(instr_index, col_index))
168+
.cloned()
169+
.unwrap_or_else(|| {
170+
panic!(
171+
"Missing reference for (i: {}, col_index: {}, block_id: {})",
172+
instr_index, col_index, self.block.start_pc
173+
)
174+
})
175+
}
176+
177+
pub fn range_constraints(&self) -> Vec<SymbolicConstraint<<A as Adapter>::PowdrField>> {
178+
let mut constraints = Vec::new();
179+
180+
for i in 0..self.block.statements.len() {
181+
let pc = (self.block.start_pc + (i * 4) as u64) as u32;
182+
let Some(range_constraints) = self.empirical_constraints.column_ranges_by_pc.get(&pc)
183+
else {
184+
continue;
185+
};
186+
for (col_index, range) in range_constraints.iter().enumerate() {
187+
// TODO: We could also enforce looser range constraints.
188+
// This is a bit more complicated though, because we'd have to add bus interactions
189+
// to actually enforce them.
190+
if range.0 == range.1 {
191+
let value = A::PowdrField::from(range.0 as u64);
192+
let reference = self.get_algebraic_reference(i, col_index);
193+
let constraint = AlgebraicExpression::Reference(reference)
194+
- AlgebraicExpression::Number(value);
195+
196+
constraints.push(SymbolicConstraint { expr: constraint });
197+
}
198+
}
199+
}
200+
201+
constraints
202+
}
203+
204+
pub fn equivalence_constraints(&self) -> Vec<SymbolicConstraint<<A as Adapter>::PowdrField>> {
205+
let mut constraints = Vec::new();
206+
207+
if let Some(equivalence_classes) = self
208+
.empirical_constraints
209+
.equivalence_classes_by_block
210+
.get(&self.block.start_pc)
211+
{
212+
for equivalence_class in equivalence_classes {
213+
let first = equivalence_class.first().unwrap();
214+
let first_ref = self.get_algebraic_reference(first.0, first.1);
215+
for other in equivalence_class.iter().skip(1) {
216+
let other_ref = self.get_algebraic_reference(other.0, other.1);
217+
let constraint = AlgebraicExpression::Reference(first_ref.clone())
218+
- AlgebraicExpression::Reference(other_ref.clone());
219+
constraints.push(SymbolicConstraint { expr: constraint });
220+
}
221+
}
222+
}
223+
224+
constraints
225+
}
226+
}
227+
91228
/// Intersects multiple partitions of the same universe into a single partition.
92229
/// In other words, two elements are in the same equivalence class in the resulting partition
93230
/// if and only if they are in the same equivalence class in all input partitions.

autoprecompiles/src/lib.rs

Lines changed: 67 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use crate::adapter::{Adapter, AdapterApc, AdapterVmConfig};
22
use crate::blocks::BasicBlock;
33
use crate::bus_map::{BusMap, BusType};
4+
use crate::empirical_constraints::{ConstraintGenerator, EmpiricalConstraints};
45
use crate::evaluation::AirStats;
56
use crate::expression_conversion::algebraic_to_grouped_expression;
67
use crate::symbolic_machine_generator::convert_machine_field_type;
@@ -429,6 +430,7 @@ pub fn build<A: Adapter>(
429430
vm_config: AdapterVmConfig<A>,
430431
degree_bound: DegreeBound,
431432
apc_candidates_dir_path: Option<&Path>,
433+
empirical_constraints: &EmpiricalConstraints,
432434
) -> Result<AdapterApc<A>, crate::constraint_optimizer::Error> {
433435
let start = std::time::Instant::now();
434436

@@ -438,6 +440,16 @@ pub fn build<A: Adapter>(
438440
&vm_config.bus_map,
439441
);
440442

443+
// Generate constraints for optimistic precompiles.
444+
let constraint_generator = ConstraintGenerator::<A>::new(
445+
empirical_constraints,
446+
&column_allocator.subs,
447+
machine.main_columns(),
448+
&block,
449+
);
450+
let range_analyzer_constraints = constraint_generator.range_constraints();
451+
let equivalence_analyzer_constraints = constraint_generator.equivalence_constraints();
452+
441453
let labels = [("apc_start_pc", block.start_pc.to_string())];
442454
metrics::counter!("before_opt_cols", &labels)
443455
.absolute(machine.unique_references().count() as u64);
@@ -446,13 +458,44 @@ pub fn build<A: Adapter>(
446458
metrics::counter!("before_opt_interactions", &labels)
447459
.absolute(machine.unique_references().count() as u64);
448460

461+
let mut baseline = machine;
462+
463+
// Optimize once without empirical constraints
449464
let (machine, column_allocator) = optimizer::optimize::<A>(
450-
machine,
451-
vm_config.bus_interaction_handler,
465+
baseline.clone(),
466+
vm_config.bus_interaction_handler.clone(),
452467
degree_bound,
453468
&vm_config.bus_map,
454469
column_allocator,
455-
)?;
470+
)
471+
.unwrap();
472+
// Get the precompile that is guaranteed to always work
473+
let guaranteed_precompile = machine.render(&vm_config.bus_map);
474+
475+
let (machine, column_allocator, optimistic_precompile) =
476+
if !range_analyzer_constraints.is_empty() || !equivalence_analyzer_constraints.is_empty() {
477+
// Add empirical constraints
478+
baseline.constraints.extend(range_analyzer_constraints);
479+
baseline
480+
.constraints
481+
.extend(equivalence_analyzer_constraints);
482+
483+
// Optimize again with empirical constraints
484+
// TODO: Calling optimize twice is needed; otherwise the solver fails.
485+
let (machine, column_allocator) = optimizer::optimize::<A>(
486+
baseline,
487+
vm_config.bus_interaction_handler,
488+
degree_bound,
489+
&vm_config.bus_map,
490+
column_allocator,
491+
)
492+
.unwrap();
493+
let optimistic_precompile = machine.render(&vm_config.bus_map);
494+
(machine, column_allocator, Some(optimistic_precompile))
495+
} else {
496+
// If there are no empirical constraints, we can skip optimizing twice.
497+
(machine, column_allocator, None)
498+
};
456499

457500
// add guards to constraints that are not satisfied by zeroes
458501
let (machine, column_allocator) = add_guards(machine, column_allocator);
@@ -469,14 +512,27 @@ pub fn build<A: Adapter>(
469512
let apc = Apc::new(block, machine, column_allocator);
470513

471514
if let Some(path) = apc_candidates_dir_path {
472-
let ser_path = path
473-
.join(format!("apc_candidate_{}", apc.start_pc()))
474-
.with_extension("cbor");
475-
std::fs::create_dir_all(path).expect("Failed to create directory for APC candidates");
476-
let file =
477-
std::fs::File::create(&ser_path).expect("Failed to create file for APC candidate");
478-
let writer = BufWriter::new(file);
479-
serde_cbor::to_writer(writer, &apc).expect("Failed to write APC candidate to file");
515+
if let Some(optimistic_precompile) = &optimistic_precompile {
516+
// For debugging purposes, serialize the APC candidate to a file
517+
let ser_path = path
518+
.join(format!("apc_candidate_{}", apc.start_pc()))
519+
.with_extension("cbor");
520+
std::fs::create_dir_all(path).expect("Failed to create directory for APC candidates");
521+
let file =
522+
std::fs::File::create(&ser_path).expect("Failed to create file for APC candidate");
523+
let writer = BufWriter::new(file);
524+
serde_cbor::to_writer(writer, &apc).expect("Failed to write APC candidate to file");
525+
526+
let dumb_path = path
527+
.join(format!("apc_candidate_{}_guaranteed.txt", apc.start_pc()))
528+
.with_extension("txt");
529+
std::fs::write(dumb_path, guaranteed_precompile).unwrap();
530+
531+
let ai_path = path
532+
.join(format!("apc_candidate_{}_optimistic.txt", apc.start_pc()))
533+
.with_extension("txt");
534+
std::fs::write(ai_path, optimistic_precompile).unwrap();
535+
}
480536
}
481537

482538
metrics::gauge!("apc_gen_time_ms", &labels).set(start.elapsed().as_millis() as f64);

autoprecompiles/src/memory_optimizer.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,20 @@ fn redundant_memory_interactions_indices<
187187
// In that case, we can replace both bus interactions with equality constraints
188188
// between the data that would have been sent and received.
189189
if let Some((previous_send, existing_values)) = memory_contents.remove(&addr) {
190+
// TODO: This can happen which optimistic precompiles, because basic block can include
191+
// big-word branching instructions. This hot fix is NOT sounds, because it does not handle
192+
// a situation where we write a (small-word) limb, accesses the large word, and then the
193+
// small-word limb again.
194+
if existing_values.len() != mem_int.data().len() {
195+
log::error!(
196+
"Memory interaction data length mismatch: existing values = {}, new values = {}. Resetting memory.",
197+
existing_values.iter().map(ToString::to_string).join(", "),
198+
mem_int.data().iter().map(ToString::to_string).join(", ")
199+
);
200+
memory_contents.clear();
201+
continue;
202+
}
203+
190204
for (existing, new) in existing_values.iter().zip_eq(mem_int.data().iter()) {
191205
new_constraints.push(AlgebraicConstraint::assert_zero(
192206
existing.clone() - new.clone(),

autoprecompiles/src/pgo/cell/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use crate::{
1313
blocks::BasicBlock,
1414
evaluation::EvaluationResult,
1515
pgo::cell::selection::parallel_fractional_knapsack,
16-
PowdrConfig,
16+
EmpiricalConstraints, PowdrConfig,
1717
};
1818

1919
mod selection;
@@ -92,6 +92,7 @@ impl<A: Adapter + Send + Sync, C: Candidate<A> + Send + Sync> PgoAdapter for Cel
9292
config: &PowdrConfig,
9393
vm_config: AdapterVmConfig<Self::Adapter>,
9494
labels: BTreeMap<u64, Vec<String>>,
95+
empirical_constraints: EmpiricalConstraints,
9596
) -> Vec<AdapterApcWithStats<Self::Adapter>> {
9697
tracing::info!(
9798
"Generating autoprecompiles with cell PGO for {} blocks",
@@ -131,6 +132,7 @@ impl<A: Adapter + Send + Sync, C: Candidate<A> + Send + Sync> PgoAdapter for Cel
131132
vm_config.clone(),
132133
config.degree_bound,
133134
config.apc_candidates_dir_path.as_deref(),
135+
&empirical_constraints,
134136
)
135137
.ok()?;
136138
let candidate = C::create(

autoprecompiles/src/pgo/instruction.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use crate::{
44
adapter::{Adapter, AdapterApcWithStats, AdapterVmConfig, PgoAdapter},
55
blocks::BasicBlock,
66
pgo::create_apcs_for_all_blocks,
7-
PowdrConfig,
7+
EmpiricalConstraints, PowdrConfig,
88
};
99

1010
pub struct InstructionPgo<A> {
@@ -30,6 +30,7 @@ impl<A: Adapter> PgoAdapter for InstructionPgo<A> {
3030
config: &PowdrConfig,
3131
vm_config: AdapterVmConfig<Self::Adapter>,
3232
_labels: BTreeMap<u64, Vec<String>>,
33+
empirical_constraints: EmpiricalConstraints,
3334
) -> Vec<AdapterApcWithStats<Self::Adapter>> {
3435
tracing::info!(
3536
"Generating autoprecompiles with instruction PGO for {} blocks",
@@ -70,7 +71,12 @@ impl<A: Adapter> PgoAdapter for InstructionPgo<A> {
7071
);
7172
}
7273

73-
create_apcs_for_all_blocks::<Self::Adapter>(blocks, config, vm_config)
74+
create_apcs_for_all_blocks::<Self::Adapter>(
75+
blocks,
76+
config,
77+
vm_config,
78+
empirical_constraints,
79+
)
7480
}
7581

7682
fn pc_execution_count(&self, pc: u64) -> Option<u32> {

0 commit comments

Comments
 (0)