@@ -6,6 +6,13 @@ use std::hash::Hash;
66use itertools:: Itertools ;
77use serde:: { Deserialize , Serialize } ;
88
9+ use crate :: {
10+ adapter:: Adapter ,
11+ blocks:: BasicBlock ,
12+ expression:: { AlgebraicExpression , AlgebraicReference } ,
13+ SymbolicConstraint ,
14+ } ;
15+
916/// "Constraints" that were inferred from execution statistics. They hold empirically
1017/// (most of the time), but are not guaranteed to hold in all cases.
1118#[ derive( Serialize , Deserialize , Clone , Default , Debug ) ]
@@ -16,6 +23,9 @@ pub struct EmpiricalConstraints {
1623 /// For each basic block (identified by its starting PC), the equivalence classes of columns.
1724 /// Each equivalence class is a list of (instruction index in block, column index).
1825 pub equivalence_classes_by_block : BTreeMap < u64 , BTreeSet < BTreeSet < ( usize , usize ) > > > ,
26+ /// Count of how many times each program counter was executed in the sampled executions.
27+ /// This can be used to set a threshold for applying constraints only to frequently executed PCs.
28+ pub pc_counts : BTreeMap < u32 , u64 > ,
1929}
2030
2131/// Debug information mapping AIR ids to program counters and column names.
@@ -61,6 +71,24 @@ impl EmpiricalConstraints {
6171 } )
6272 . or_insert ( classes) ;
6373 }
74+
75+ // Combine pc counts
76+ for ( pc, count) in other. pc_counts {
77+ * self . pc_counts . entry ( pc) . or_insert ( 0 ) += count;
78+ }
79+ }
80+
81+ pub fn with_thresholded_pc_count ( mut self , threshold : u64 ) -> Self {
82+ self . column_ranges_by_pc
83+ . retain ( |pc, _| self . pc_counts . get ( pc) . cloned ( ) . unwrap_or ( 0 ) >= threshold) ;
84+ self . equivalence_classes_by_block . retain ( |block_id, _| {
85+ self . pc_counts
86+ . get ( & ( * block_id as u32 ) )
87+ . cloned ( )
88+ . unwrap_or ( 0 )
89+ >= threshold
90+ } ) ;
91+ self
6492 }
6593}
6694
@@ -88,6 +116,99 @@ fn merge_maps<K: Ord, V: Eq + Debug>(map1: &mut BTreeMap<K, V>, map2: BTreeMap<K
88116 }
89117}
90118
119+ type ConstraintList < A > = Vec < SymbolicConstraint < <A as Adapter >:: PowdrField > > ;
120+
121+ /// For any program line that was not executed at least this many times in the traces,
122+ /// discard any empirical constraints associated with it.
123+ const EXECUTION_COUNT_THRESHOLD : u64 = 100 ;
124+
125+ pub fn add_empirical_constraints < A : Adapter > (
126+ empirical_constraints : & EmpiricalConstraints ,
127+ subs : & [ Vec < u64 > ] ,
128+ block : & BasicBlock < A :: Instruction > ,
129+ columns : impl Iterator < Item = AlgebraicReference > ,
130+ ) -> ( ConstraintList < A > , ConstraintList < A > ) {
131+ // Apply execution count threshold to avoid overfitting on rarely executed code
132+ let empirical_constraints = empirical_constraints
133+ . clone ( )
134+ . with_thresholded_pc_count ( EXECUTION_COUNT_THRESHOLD ) ;
135+
136+ let range_constraints = & empirical_constraints. column_ranges_by_pc ;
137+ let equivalence_classes_by_block = & empirical_constraints. equivalence_classes_by_block ;
138+
139+ let mut range_analyzer_constraints = Vec :: new ( ) ;
140+ let mut equivalence_analyzer_constraints = Vec :: new ( ) ;
141+
142+ // Mapping (instruction index, column index) -> AlgebraicReference
143+ let reverse_subs = subs
144+ . iter ( )
145+ . enumerate ( )
146+ . flat_map ( |( instr_index, subs) | {
147+ subs. iter ( )
148+ . enumerate ( )
149+ . map ( move |( col_index, & poly_id) | ( poly_id, ( instr_index, col_index) ) )
150+ } )
151+ . collect :: < BTreeMap < _ , _ > > ( ) ;
152+ let algebraic_references = columns
153+ . map ( |r| ( * reverse_subs. get ( & r. id ) . unwrap ( ) , r. clone ( ) ) )
154+ . collect :: < BTreeMap < _ , _ > > ( ) ;
155+
156+ for i in 0 ..block. statements . len ( ) {
157+ let pc = ( block. start_pc + ( i * 4 ) as u64 ) as u32 ;
158+ let Some ( range_constraints) = range_constraints. get ( & pc) else {
159+ continue ;
160+ } ;
161+ for ( col_index, range) in range_constraints. iter ( ) . enumerate ( ) {
162+ if range. 0 == range. 1 {
163+ let value = A :: PowdrField :: from ( range. 0 as u64 ) ;
164+ let Some ( reference) = algebraic_references. get ( & ( i, col_index) ) . cloned ( ) else {
165+ panic ! (
166+ "Missing reference for (i: {}, col_index: {}, block_id: {})" ,
167+ i, col_index, block. start_pc
168+ ) ;
169+ } ;
170+ let constraint =
171+ AlgebraicExpression :: Reference ( reference) - AlgebraicExpression :: Number ( value) ;
172+
173+ range_analyzer_constraints. push ( SymbolicConstraint { expr : constraint } ) ;
174+ }
175+ }
176+ }
177+
178+ if let Some ( equivalence_classes) = equivalence_classes_by_block. get ( & block. start_pc ) {
179+ for equivalence_class in equivalence_classes {
180+ let first = equivalence_class. first ( ) . unwrap ( ) ;
181+ let Some ( first_ref) = algebraic_references. get ( first) . cloned ( ) else {
182+ // TODO: This fails in some blocks. For now, just return no extra constraints.
183+ tracing:: warn!(
184+ "Missing reference for (i: {}, col_index: {}, block_id: {})" ,
185+ first. 0 ,
186+ first. 1 ,
187+ block. start_pc
188+ ) ;
189+ return ( range_analyzer_constraints, vec ! [ ] ) ;
190+ } ;
191+ for other in equivalence_class. iter ( ) . skip ( 1 ) {
192+ let Some ( other_ref) = algebraic_references. get ( other) . cloned ( ) else {
193+ // TODO: This fails in some blocks. For now, just return no extra constraints.
194+ tracing:: warn!(
195+ "Missing reference for (i: {}, col_index: {}, block_id: {})" ,
196+ other. 0 ,
197+ other. 1 ,
198+ block. start_pc
199+ ) ;
200+ return ( range_analyzer_constraints, vec ! [ ] ) ;
201+ } ;
202+ let constraint = AlgebraicExpression :: Reference ( first_ref. clone ( ) )
203+ - AlgebraicExpression :: Reference ( other_ref. clone ( ) ) ;
204+ equivalence_analyzer_constraints. push ( SymbolicConstraint { expr : constraint } ) ;
205+ }
206+ }
207+ }
208+
209+ ( range_analyzer_constraints, equivalence_analyzer_constraints)
210+ }
211+
91212/// Intersects multiple partitions of the same universe into a single partition.
92213/// In other words, two elements are in the same equivalence class in the resulting partition
93214/// if and only if they are in the same equivalence class in all input partitions.
0 commit comments