Skip to content

Commit 1913d1c

Browse files
committed
Build optimistic precompiles
1 parent 661a429 commit 1913d1c

File tree

13 files changed

+320
-31
lines changed

13 files changed

+320
-31
lines changed

autoprecompiles/src/adapter.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use std::{fmt::Display, sync::Arc};
66
use powdr_number::FieldElement;
77
use serde::{Deserialize, Serialize};
88

9+
use crate::EmpiricalConstraints;
910
use crate::{
1011
blocks::{BasicBlock, Instruction, Program},
1112
constraint_optimizer::IsBusStateful,
@@ -45,12 +46,19 @@ pub trait PgoAdapter {
4546
config: &PowdrConfig,
4647
vm_config: AdapterVmConfig<Self::Adapter>,
4748
labels: BTreeMap<u64, Vec<String>>,
49+
empirical_constraints: EmpiricalConstraints,
4850
) -> Vec<AdapterApcWithStats<Self::Adapter>> {
4951
let filtered_blocks = blocks
5052
.into_iter()
5153
.filter(|block| !Self::Adapter::should_skip_block(block))
5254
.collect();
53-
self.create_apcs_with_pgo(filtered_blocks, config, vm_config, labels)
55+
self.create_apcs_with_pgo(
56+
filtered_blocks,
57+
config,
58+
vm_config,
59+
labels,
60+
empirical_constraints,
61+
)
5462
}
5563

5664
fn create_apcs_with_pgo(
@@ -59,6 +67,7 @@ pub trait PgoAdapter {
5967
config: &PowdrConfig,
6068
vm_config: AdapterVmConfig<Self::Adapter>,
6169
labels: BTreeMap<u64, Vec<String>>,
70+
empirical_constraints: EmpiricalConstraints,
6271
) -> Vec<AdapterApcWithStats<Self::Adapter>>;
6372

6473
fn pc_execution_count(&self, _pc: u64) -> Option<u32> {

autoprecompiles/src/empirical_constraints.rs

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@ use std::hash::Hash;
66
use itertools::Itertools;
77
use serde::{Deserialize, Serialize};
88

9+
use crate::{
10+
adapter::Adapter,
11+
blocks::BasicBlock,
12+
expression::{AlgebraicExpression, AlgebraicReference},
13+
SymbolicConstraint,
14+
};
15+
916
/// "Constraints" that were inferred from execution statistics. They hold empirically
1017
/// (most of the time), but are not guaranteed to hold in all cases.
1118
#[derive(Serialize, Deserialize, Clone, Default, Debug)]
@@ -16,6 +23,9 @@ pub struct EmpiricalConstraints {
1623
/// For each basic block (identified by its starting PC), the equivalence classes of columns.
1724
/// Each equivalence class is a list of (instruction index in block, column index).
1825
pub equivalence_classes_by_block: BTreeMap<u64, BTreeSet<BTreeSet<(usize, usize)>>>,
26+
/// Count of how many times each program counter was executed in the sampled executions.
27+
/// This can be used to set a threshold for applying constraints only to frequently executed PCs.
28+
pub pc_counts: BTreeMap<u32, u64>,
1929
}
2030

2131
/// Debug information mapping AIR ids to program counters and column names.
@@ -61,6 +71,32 @@ impl EmpiricalConstraints {
6171
})
6272
.or_insert(classes);
6373
}
74+
75+
// Combine pc counts
76+
for (pc, count) in other.pc_counts {
77+
*self.pc_counts.entry(pc).or_insert(0) += count;
78+
}
79+
}
80+
81+
pub fn apply_pc_threshold(&self, threshold: u64) -> Self {
82+
EmpiricalConstraints {
83+
column_ranges_by_pc: self
84+
.column_ranges_by_pc
85+
.iter()
86+
.filter(|(pc, _)| self.pc_counts.get(pc).cloned().unwrap_or(0) >= threshold)
87+
.map(|(pc, ranges)| (*pc, ranges.clone()))
88+
.collect(),
89+
equivalence_classes_by_block: self
90+
.equivalence_classes_by_block
91+
.iter()
92+
.filter(|(&block_pc, _)| {
93+
// For equivalence classes, we check the pc_counts of the first instruction in the block
94+
self.pc_counts.get(&(block_pc as u32)).cloned().unwrap_or(0) >= threshold
95+
})
96+
.map(|(block_pc, classes)| (*block_pc, classes.clone()))
97+
.collect(),
98+
pc_counts: self.pc_counts.clone(),
99+
}
64100
}
65101
}
66102

@@ -88,6 +124,104 @@ fn merge_maps<K: Ord, V: Eq + Debug>(map1: &mut BTreeMap<K, V>, map2: BTreeMap<K
88124
}
89125
}
90126

127+
/// For any program line that was not executed at least this many times in the traces,
128+
/// discard any empirical constraints associated with it.
129+
const EXECUTION_COUNT_THRESHOLD: u64 = 100;
130+
131+
pub struct ConstraintGenerator<'a, A: Adapter> {
132+
empirical_constraints: EmpiricalConstraints,
133+
algebraic_references: BTreeMap<(usize, usize), AlgebraicReference>,
134+
block: &'a BasicBlock<A::Instruction>,
135+
}
136+
137+
impl<'a, A: Adapter> ConstraintGenerator<'a, A> {
138+
pub fn new(
139+
empirical_constraints: &EmpiricalConstraints,
140+
subs: &[Vec<u64>],
141+
columns: impl Iterator<Item = AlgebraicReference>,
142+
block: &'a BasicBlock<A::Instruction>,
143+
) -> Self {
144+
let reverse_subs = subs
145+
.iter()
146+
.enumerate()
147+
.flat_map(|(instr_index, subs)| {
148+
subs.iter()
149+
.enumerate()
150+
.map(move |(col_index, &poly_id)| (poly_id, (instr_index, col_index)))
151+
})
152+
.collect::<BTreeMap<_, _>>();
153+
let algebraic_references = columns
154+
.map(|r| (*reverse_subs.get(&r.id).unwrap(), r.clone()))
155+
.collect::<BTreeMap<_, _>>();
156+
157+
Self {
158+
empirical_constraints: empirical_constraints
159+
.apply_pc_threshold(EXECUTION_COUNT_THRESHOLD),
160+
algebraic_references,
161+
block,
162+
}
163+
}
164+
165+
fn get_algebraic_reference(&self, instr_index: usize, col_index: usize) -> AlgebraicReference {
166+
self.algebraic_references
167+
.get(&(instr_index, col_index))
168+
.cloned()
169+
.unwrap_or_else(|| {
170+
panic!(
171+
"Missing reference for (i: {}, col_index: {}, block_id: {})",
172+
instr_index, col_index, self.block.start_pc
173+
)
174+
})
175+
}
176+
177+
pub fn range_constraints(&self) -> Vec<SymbolicConstraint<<A as Adapter>::PowdrField>> {
178+
let mut constraints = Vec::new();
179+
180+
for i in 0..self.block.statements.len() {
181+
let pc = (self.block.start_pc + (i * 4) as u64) as u32;
182+
let Some(range_constraints) = self.empirical_constraints.column_ranges_by_pc.get(&pc)
183+
else {
184+
continue;
185+
};
186+
for (col_index, range) in range_constraints.iter().enumerate() {
187+
if range.0 == range.1 {
188+
let value = A::PowdrField::from(range.0 as u64);
189+
let reference = self.get_algebraic_reference(i, col_index);
190+
let constraint = AlgebraicExpression::Reference(reference)
191+
- AlgebraicExpression::Number(value);
192+
193+
constraints.push(SymbolicConstraint { expr: constraint });
194+
}
195+
}
196+
}
197+
198+
constraints
199+
}
200+
201+
pub fn equivalence_constraints(&self) -> Vec<SymbolicConstraint<<A as Adapter>::PowdrField>> {
202+
let mut constraints = Vec::new();
203+
204+
if let Some(equivalence_classes) = self
205+
.empirical_constraints
206+
.equivalence_classes_by_block
207+
.get(&self.block.start_pc)
208+
{
209+
for equivalence_class in equivalence_classes {
210+
let first = equivalence_class.first().unwrap();
211+
let first_ref = self.get_algebraic_reference(first.0, first.1);
212+
for other in equivalence_class.iter().skip(1) {
213+
let other_ref = self.get_algebraic_reference(other.0, other.1);
214+
let constraint = AlgebraicExpression::Reference(first_ref.clone())
215+
- AlgebraicExpression::Reference(other_ref.clone());
216+
constraints.push(SymbolicConstraint { expr: constraint });
217+
}
218+
}
219+
}
220+
221+
constraints
222+
}
223+
}
224+
91225
/// Intersects multiple partitions of the same universe into a single partition.
92226
/// In other words, two elements are in the same equivalence class in the resulting partition
93227
/// if and only if they are in the same equivalence class in all input partitions.

autoprecompiles/src/lib.rs

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use crate::adapter::{Adapter, AdapterApc, AdapterVmConfig};
22
use crate::blocks::BasicBlock;
33
use crate::bus_map::{BusMap, BusType};
4+
use crate::empirical_constraints::{ConstraintGenerator, EmpiricalConstraints};
45
use crate::evaluation::AirStats;
56
use crate::expression_conversion::algebraic_to_grouped_expression;
67
use crate::symbolic_machine_generator::convert_machine_field_type;
@@ -429,6 +430,7 @@ pub fn build<A: Adapter>(
429430
vm_config: AdapterVmConfig<A>,
430431
degree_bound: DegreeBound,
431432
apc_candidates_dir_path: Option<&Path>,
433+
empirical_constraints: &EmpiricalConstraints,
432434
) -> Result<AdapterApc<A>, crate::constraint_optimizer::Error> {
433435
let start = std::time::Instant::now();
434436

@@ -438,6 +440,16 @@ pub fn build<A: Adapter>(
438440
&vm_config.bus_map,
439441
);
440442

443+
let constraint_generator = ConstraintGenerator::<A>::new(
444+
empirical_constraints,
445+
&column_allocator.subs,
446+
machine.main_columns(),
447+
&block,
448+
);
449+
450+
let range_analyzer_constraints = constraint_generator.range_constraints();
451+
let equivalence_analyzer_constraints = constraint_generator.equivalence_constraints();
452+
441453
let labels = [("apc_start_pc", block.start_pc.to_string())];
442454
metrics::counter!("before_opt_cols", &labels)
443455
.absolute(machine.unique_references().count() as u64);
@@ -446,13 +458,32 @@ pub fn build<A: Adapter>(
446458
metrics::counter!("before_opt_interactions", &labels)
447459
.absolute(machine.unique_references().count() as u64);
448460

461+
let mut baseline = machine;
462+
463+
let (machine, column_allocator) = optimizer::optimize::<A>(
464+
baseline.clone(),
465+
vm_config.bus_interaction_handler.clone(),
466+
degree_bound,
467+
&vm_config.bus_map,
468+
column_allocator,
469+
)
470+
.unwrap();
471+
let dumb_precompile = machine.render(&vm_config.bus_map);
472+
473+
baseline.constraints.extend(range_analyzer_constraints);
474+
baseline
475+
.constraints
476+
.extend(equivalence_analyzer_constraints);
477+
449478
let (machine, column_allocator) = optimizer::optimize::<A>(
450-
machine,
479+
baseline,
451480
vm_config.bus_interaction_handler,
452481
degree_bound,
453482
&vm_config.bus_map,
454483
column_allocator,
455-
)?;
484+
)
485+
.unwrap();
486+
let ai_precompile = machine.render(&vm_config.bus_map);
456487

457488
// add guards to constraints that are not satisfied by zeroes
458489
let (machine, column_allocator) = add_guards(machine, column_allocator);
@@ -477,6 +508,16 @@ pub fn build<A: Adapter>(
477508
std::fs::File::create(&ser_path).expect("Failed to create file for APC candidate");
478509
let writer = BufWriter::new(file);
479510
serde_cbor::to_writer(writer, &apc).expect("Failed to write APC candidate to file");
511+
512+
let dumb_path = path
513+
.join(format!("apc_candidate_{}_dumb.txt", apc.start_pc()))
514+
.with_extension("txt");
515+
std::fs::write(dumb_path, dumb_precompile).unwrap();
516+
517+
let ai_path = path
518+
.join(format!("apc_candidate_{}_ai.txt", apc.start_pc()))
519+
.with_extension("txt");
520+
std::fs::write(ai_path, ai_precompile).unwrap();
480521
}
481522

482523
metrics::gauge!("apc_gen_time_ms", &labels).set(start.elapsed().as_millis() as f64);

autoprecompiles/src/memory_optimizer.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,16 @@ fn redundant_memory_interactions_indices<
187187
// In that case, we can replace both bus interactions with equality constraints
188188
// between the data that would have been sent and received.
189189
if let Some((previous_send, existing_values)) = memory_contents.remove(&addr) {
190+
if existing_values.len() != mem_int.data().len() {
191+
log::error!(
192+
"Memory interaction data length mismatch: existing values = {}, new values = {}. Resetting memory.",
193+
existing_values.iter().map(ToString::to_string).join(", "),
194+
mem_int.data().iter().map(ToString::to_string).join(", ")
195+
);
196+
memory_contents.clear();
197+
continue;
198+
}
199+
190200
for (existing, new) in existing_values.iter().zip_eq(mem_int.data().iter()) {
191201
new_constraints.push(AlgebraicConstraint::assert_zero(
192202
existing.clone() - new.clone(),

autoprecompiles/src/pgo/cell/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use crate::{
1313
blocks::BasicBlock,
1414
evaluation::EvaluationResult,
1515
pgo::cell::selection::parallel_fractional_knapsack,
16-
PowdrConfig,
16+
EmpiricalConstraints, PowdrConfig,
1717
};
1818

1919
mod selection;
@@ -92,6 +92,7 @@ impl<A: Adapter + Send + Sync, C: Candidate<A> + Send + Sync> PgoAdapter for Cel
9292
config: &PowdrConfig,
9393
vm_config: AdapterVmConfig<Self::Adapter>,
9494
labels: BTreeMap<u64, Vec<String>>,
95+
empirical_constraints: EmpiricalConstraints,
9596
) -> Vec<AdapterApcWithStats<Self::Adapter>> {
9697
tracing::info!(
9798
"Generating autoprecompiles with cell PGO for {} blocks",
@@ -131,6 +132,7 @@ impl<A: Adapter + Send + Sync, C: Candidate<A> + Send + Sync> PgoAdapter for Cel
131132
vm_config.clone(),
132133
config.degree_bound,
133134
config.apc_candidates_dir_path.as_deref(),
135+
&empirical_constraints,
134136
)
135137
.ok()?;
136138
let candidate = C::create(

autoprecompiles/src/pgo/instruction.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use crate::{
44
adapter::{Adapter, AdapterApcWithStats, AdapterVmConfig, PgoAdapter},
55
blocks::BasicBlock,
66
pgo::create_apcs_for_all_blocks,
7-
PowdrConfig,
7+
EmpiricalConstraints, PowdrConfig,
88
};
99

1010
pub struct InstructionPgo<A> {
@@ -30,6 +30,7 @@ impl<A: Adapter> PgoAdapter for InstructionPgo<A> {
3030
config: &PowdrConfig,
3131
vm_config: AdapterVmConfig<Self::Adapter>,
3232
_labels: BTreeMap<u64, Vec<String>>,
33+
empirical_constraints: EmpiricalConstraints,
3334
) -> Vec<AdapterApcWithStats<Self::Adapter>> {
3435
tracing::info!(
3536
"Generating autoprecompiles with instruction PGO for {} blocks",
@@ -70,7 +71,12 @@ impl<A: Adapter> PgoAdapter for InstructionPgo<A> {
7071
);
7172
}
7273

73-
create_apcs_for_all_blocks::<Self::Adapter>(blocks, config, vm_config)
74+
create_apcs_for_all_blocks::<Self::Adapter>(
75+
blocks,
76+
config,
77+
vm_config,
78+
empirical_constraints,
79+
)
7480
}
7581

7682
fn pc_execution_count(&self, pc: u64) -> Option<u32> {

autoprecompiles/src/pgo/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use strum::{Display, EnumString};
66
use crate::{
77
adapter::{Adapter, AdapterApcWithStats, AdapterVmConfig, ApcWithStats},
88
blocks::BasicBlock,
9-
PowdrConfig,
9+
EmpiricalConstraints, PowdrConfig,
1010
};
1111

1212
mod cell;
@@ -77,6 +77,7 @@ fn create_apcs_for_all_blocks<A: Adapter>(
7777
blocks: Vec<BasicBlock<A::Instruction>>,
7878
config: &PowdrConfig,
7979
vm_config: AdapterVmConfig<A>,
80+
empirical_constraints: EmpiricalConstraints,
8081
) -> Vec<AdapterApcWithStats<A>> {
8182
let n_acc = config.autoprecompiles as usize;
8283
tracing::info!("Generating {n_acc} autoprecompiles in parallel");
@@ -97,6 +98,7 @@ fn create_apcs_for_all_blocks<A: Adapter>(
9798
vm_config.clone(),
9899
config.degree_bound,
99100
config.apc_candidates_dir_path.as_deref(),
101+
&empirical_constraints,
100102
)
101103
.unwrap()
102104
})

0 commit comments

Comments
 (0)