@@ -39,7 +39,7 @@ mod tests {
3939 use crate :: test:: object_store:: local_unpartitioned_file;
4040 use arrow:: array:: {
4141 ArrayRef , AsArray , Date64Array , Int32Array , Int64Array , Int8Array , StringArray ,
42- StructArray ,
42+ StringViewArray , StructArray ,
4343 } ;
4444 use arrow:: datatypes:: { DataType , Field , Fields , Schema , SchemaBuilder } ;
4545 use arrow:: record_batch:: RecordBatch ;
@@ -100,6 +100,7 @@ mod tests {
100100 predicate : Option < Expr > ,
101101 pushdown_predicate : bool ,
102102 page_index_predicate : bool ,
103+ bloom_filters : bool ,
103104 }
104105
105106 impl RoundTrip {
@@ -132,6 +133,11 @@ mod tests {
132133 self
133134 }
134135
136+ fn with_bloom_filters ( mut self ) -> Self {
137+ self . bloom_filters = true ;
138+ self
139+ }
140+
135141 /// run the test, returning only the resulting RecordBatches
136142 async fn round_trip_to_batches (
137143 self ,
@@ -156,10 +162,20 @@ mod tests {
156162 source = source
157163 . with_pushdown_filters ( true )
158164 . with_reorder_filters ( true ) ;
165+ } else {
166+ source = source. with_pushdown_filters ( false ) ;
159167 }
160168
161169 if self . page_index_predicate {
162170 source = source. with_enable_page_index ( true ) ;
171+ } else {
172+ source = source. with_enable_page_index ( false ) ;
173+ }
174+
175+ if self . bloom_filters {
176+ source = source. with_bloom_filter_on_read ( true ) ;
177+ } else {
178+ source = source. with_bloom_filter_on_read ( false ) ;
163179 }
164180
165181 source. with_schema ( Arc :: clone ( & file_schema) )
@@ -817,7 +833,7 @@ mod tests {
817833 }
818834
819835 #[ tokio:: test]
820- async fn evolved_schema_filter ( ) {
836+ async fn evolved_schema_column_order_filter ( ) {
821837 let c1: ArrayRef =
822838 Arc :: new ( StringArray :: from ( vec ! [ Some ( "Foo" ) , None , Some ( "bar" ) ] ) ) ;
823839
@@ -848,6 +864,88 @@ mod tests {
848864 assert_eq ! ( read. len( ) , 0 ) ;
849865 }
850866
867+ #[ tokio:: test]
868+ async fn evolved_schema_column_type_filter_strings ( ) {
869+ // The table and filter have a common data type, but the file schema differs
870+ let c1: ArrayRef =
871+ Arc :: new ( StringViewArray :: from ( vec ! [ Some ( "foo" ) , Some ( "bar" ) ] ) ) ;
872+ let batch = create_batch ( vec ! [ ( "c1" , c1. clone( ) ) ] ) ;
873+
874+ let schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new( "c1" , DataType :: Utf8 , false ) ] ) ) ;
875+
876+ // Predicate should prune all row groups
877+ let filter = col ( "c1" ) . eq ( lit ( ScalarValue :: Utf8 ( Some ( "aaa" . to_string ( ) ) ) ) ) ;
878+ let rt = RoundTrip :: new ( )
879+ . with_predicate ( filter)
880+ . with_schema ( schema. clone ( ) )
881+ . round_trip ( vec ! [ batch. clone( ) ] )
882+ . await ;
883+ // There should be no predicate evaluation errors
884+ let metrics = rt. parquet_exec . metrics ( ) . unwrap ( ) ;
885+ assert_eq ! ( get_value( & metrics, "predicate_evaluation_errors" ) , 0 ) ;
886+ assert_eq ! ( get_value( & metrics, "pushdown_rows_matched" ) , 0 ) ;
887+ assert_eq ! ( rt. batches. unwrap( ) . len( ) , 0 ) ;
888+
889+ // Predicate should prune no row groups
890+ let filter = col ( "c1" ) . eq ( lit ( ScalarValue :: Utf8 ( Some ( "foo" . to_string ( ) ) ) ) ) ;
891+ let rt = RoundTrip :: new ( )
892+ . with_predicate ( filter)
893+ . with_schema ( schema)
894+ . round_trip ( vec ! [ batch] )
895+ . await ;
896+ // There should be no predicate evaluation errors
897+ let metrics = rt. parquet_exec . metrics ( ) . unwrap ( ) ;
898+ assert_eq ! ( get_value( & metrics, "predicate_evaluation_errors" ) , 0 ) ;
899+ assert_eq ! ( get_value( & metrics, "pushdown_rows_matched" ) , 0 ) ;
900+ let read = rt
901+ . batches
902+ . unwrap ( )
903+ . iter ( )
904+ . map ( |b| b. num_rows ( ) )
905+ . sum :: < usize > ( ) ;
906+ assert_eq ! ( read, 2 , "Expected 2 rows to match the predicate" ) ;
907+ }
908+
909+ #[ tokio:: test]
910+ async fn evolved_schema_column_type_filter_ints ( ) {
911+ // The table and filter have a common data type, but the file schema differs
912+ let c1: ArrayRef = Arc :: new ( Int8Array :: from ( vec ! [ Some ( 1 ) , Some ( 2 ) ] ) ) ;
913+ let batch = create_batch ( vec ! [ ( "c1" , c1. clone( ) ) ] ) ;
914+
915+ let schema =
916+ Arc :: new ( Schema :: new ( vec ! [ Field :: new( "c1" , DataType :: UInt64 , false ) ] ) ) ;
917+
918+ // Predicate should prune all row groups
919+ let filter = col ( "c1" ) . eq ( lit ( ScalarValue :: UInt64 ( Some ( 5 ) ) ) ) ;
920+ let rt = RoundTrip :: new ( )
921+ . with_predicate ( filter)
922+ . with_schema ( schema. clone ( ) )
923+ . round_trip ( vec ! [ batch. clone( ) ] )
924+ . await ;
925+ // There should be no predicate evaluation errors
926+ let metrics = rt. parquet_exec . metrics ( ) . unwrap ( ) ;
927+ assert_eq ! ( get_value( & metrics, "predicate_evaluation_errors" ) , 0 ) ;
928+ assert_eq ! ( rt. batches. unwrap( ) . len( ) , 0 ) ;
929+
930+ // Predicate should prune no row groups
931+ let filter = col ( "c1" ) . eq ( lit ( ScalarValue :: UInt64 ( Some ( 1 ) ) ) ) ;
932+ let rt = RoundTrip :: new ( )
933+ . with_predicate ( filter)
934+ . with_schema ( schema)
935+ . round_trip ( vec ! [ batch] )
936+ . await ;
937+ // There should be no predicate evaluation errors
938+ let metrics = rt. parquet_exec . metrics ( ) . unwrap ( ) ;
939+ assert_eq ! ( get_value( & metrics, "predicate_evaluation_errors" ) , 0 ) ;
940+ let read = rt
941+ . batches
942+ . unwrap ( )
943+ . iter ( )
944+ . map ( |b| b. num_rows ( ) )
945+ . sum :: < usize > ( ) ;
946+ assert_eq ! ( read, 2 , "Expected 2 rows to match the predicate" ) ;
947+ }
948+
851949 #[ tokio:: test]
852950 async fn evolved_schema_disjoint_schema_filter ( ) {
853951 let c1: ArrayRef =
@@ -1748,6 +1846,7 @@ mod tests {
17481846 let rt = RoundTrip :: new ( )
17491847 . with_predicate ( filter. clone ( ) )
17501848 . with_pushdown_predicate ( )
1849+ . with_bloom_filters ( )
17511850 . round_trip ( vec ! [ batch1] )
17521851 . await ;
17531852
0 commit comments