@@ -150,6 +150,11 @@ impl TestOutput {
150150 self . metric_value ( "row_groups_matched_statistics" )
151151 }
152152
153+ /// The number of row_groups fully matched by statistics
154+ fn row_groups_fully_matched_statistics ( & self ) -> Option < usize > {
155+ self . metric_value ( "row_groups_fully_matched_statistics" )
156+ }
157+
153158 /// The number of row_groups pruned by statistics
154159 fn row_groups_pruned_statistics ( & self ) -> Option < usize > {
155160 self . metric_value ( "row_groups_pruned_statistics" )
@@ -178,6 +183,11 @@ impl TestOutput {
178183 self . metric_value ( "page_index_rows_pruned" )
179184 }
180185
186+ /// The number of row groups pruned by limit pruning
187+ fn limit_pruned_row_groups ( & self ) -> Option < usize > {
188+ self . metric_value ( "limit_pruned_row_groups" )
189+ }
190+
181191 fn description ( & self ) -> String {
182192 format ! (
183193 "Input:\n {}\n Query:\n {}\n Output:\n {}\n Metrics:\n {}" ,
@@ -191,20 +201,41 @@ impl TestOutput {
191201/// and the appropriate scenario
192202impl ContextWithParquet {
193203 async fn new ( scenario : Scenario , unit : Unit ) -> Self {
194- Self :: with_config ( scenario, unit, SessionConfig :: new ( ) ) . await
204+ Self :: with_config ( scenario, unit, SessionConfig :: new ( ) , None , None ) . await
205+ }
206+
207+ /// Set custom schema and batches for the test
208+ pub async fn with_custom_data (
209+ scenario : Scenario ,
210+ unit : Unit ,
211+ schema : Arc < Schema > ,
212+ batches : Vec < RecordBatch > ,
213+ ) -> Self {
214+ Self :: with_config (
215+ scenario,
216+ unit,
217+ SessionConfig :: new ( ) ,
218+ Some ( schema) ,
219+ Some ( batches) ,
220+ )
221+ . await
195222 }
196223
197224 async fn with_config (
198225 scenario : Scenario ,
199226 unit : Unit ,
200227 mut config : SessionConfig ,
228+ custom_schema : Option < Arc < Schema > > ,
229+ custom_batches : Option < Vec < RecordBatch > > ,
201230 ) -> Self {
202231 // Use a single partition for deterministic results no matter how many CPUs the host has
203232 config = config. with_target_partitions ( 1 ) ;
204233 let file = match unit {
205234 Unit :: RowGroup ( row_per_group) => {
206235 config = config. with_parquet_bloom_filter_pruning ( true ) ;
207- make_test_file_rg ( scenario, row_per_group) . await
236+ config. options_mut ( ) . execution . parquet . pushdown_filters = true ;
237+ make_test_file_rg ( scenario, row_per_group, custom_schema, custom_batches)
238+ . await
208239 }
209240 Unit :: Page ( row_per_page) => {
210241 config = config. with_parquet_page_index_pruning ( true ) ;
@@ -1030,7 +1061,12 @@ fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
10301061}
10311062
10321063/// Create a test parquet file with various data types
1033- async fn make_test_file_rg ( scenario : Scenario , row_per_group : usize ) -> NamedTempFile {
1064+ async fn make_test_file_rg (
1065+ scenario : Scenario ,
1066+ row_per_group : usize ,
1067+ custom_schema : Option < Arc < Schema > > ,
1068+ custom_batches : Option < Vec < RecordBatch > > ,
1069+ ) -> NamedTempFile {
10341070 let mut output_file = tempfile:: Builder :: new ( )
10351071 . prefix ( "parquet_pruning" )
10361072 . suffix ( ".parquet" )
@@ -1043,8 +1079,14 @@ async fn make_test_file_rg(scenario: Scenario, row_per_group: usize) -> NamedTem
10431079 . set_statistics_enabled ( EnabledStatistics :: Page )
10441080 . build ( ) ;
10451081
1046- let batches = create_data_batch ( scenario) ;
1047- let schema = batches[ 0 ] . schema ( ) ;
1082+ let ( batches, schema) =
1083+ if let ( Some ( schema) , Some ( batches) ) = ( custom_schema, custom_batches) {
1084+ ( batches, schema)
1085+ } else {
1086+ let batches = create_data_batch ( scenario) ;
1087+ let schema = batches[ 0 ] . schema ( ) ;
1088+ ( batches, schema)
1089+ } ;
10481090
10491091 let mut writer = ArrowWriter :: try_new ( & mut output_file, schema, Some ( props) ) . unwrap ( ) ;
10501092
0 commit comments