@@ -34,6 +34,7 @@ struct RowGroupPruningTest {
3434 query : String ,
3535 expected_errors : Option < usize > ,
3636 expected_row_group_matched_by_statistics : Option < usize > ,
37+ expected_row_group_fully_matched_by_statistics : Option < usize > ,
3738 expected_row_group_pruned_by_statistics : Option < usize > ,
3839 expected_files_pruned_by_statistics : Option < usize > ,
3940 expected_row_group_matched_by_bloom_filter : Option < usize > ,
@@ -50,6 +51,7 @@ impl RowGroupPruningTest {
5051 expected_errors : None ,
5152 expected_row_group_matched_by_statistics : None ,
5253 expected_row_group_pruned_by_statistics : None ,
54+ expected_row_group_fully_matched_by_statistics : None ,
5355 expected_files_pruned_by_statistics : None ,
5456 expected_row_group_matched_by_bloom_filter : None ,
5557 expected_row_group_pruned_by_bloom_filter : None ,
@@ -82,6 +84,15 @@ impl RowGroupPruningTest {
8284 self
8385 }
8486
87+ // Set the expected fully matched row groups by statistics
88+ fn with_fully_matched_by_stats (
89+ mut self ,
90+ fully_matched_by_stats : Option < usize > ,
91+ ) -> Self {
92+ self . expected_row_group_fully_matched_by_statistics = fully_matched_by_stats;
93+ self
94+ }
95+
8596 // Set the expected pruned row groups by statistics
8697 fn with_pruned_by_stats ( mut self , pruned_by_stats : Option < usize > ) -> Self {
8798 self . expected_row_group_pruned_by_statistics = pruned_by_stats;
@@ -197,6 +208,11 @@ impl RowGroupPruningTest {
197208 self . expected_row_group_matched_by_statistics,
198209 "mismatched row_groups_matched_statistics" ,
199210 ) ;
211+ assert_eq ! (
212+ output. row_groups_fully_matched_statistics( ) ,
213+ self . expected_row_group_fully_matched_by_statistics,
214+ "mismatched row_groups_fully_matched_statistics" ,
215+ ) ;
200216 assert_eq ! (
201217 output. row_groups_pruned_statistics( ) ,
202218 self . expected_row_group_pruned_by_statistics,
@@ -1719,8 +1735,24 @@ fn make_i32_batch(
17191735 RecordBatch :: try_new ( schema, vec ! [ array] ) . map_err ( DataFusionError :: from)
17201736}
17211737
1738+ // Helper function to create a batch with two Int32 columns
1739+ fn make_two_col_i32_batch (
1740+ name_a : & str ,
1741+ name_b : & str ,
1742+ values_a : Vec < i32 > ,
1743+ values_b : Vec < i32 > ,
1744+ ) -> datafusion_common:: error:: Result < RecordBatch > {
1745+ let schema = Arc :: new ( Schema :: new ( vec ! [
1746+ Field :: new( name_a, DataType :: Int32 , false ) ,
1747+ Field :: new( name_b, DataType :: Int32 , false ) ,
1748+ ] ) ) ;
1749+ let array_a: ArrayRef = Arc :: new ( Int32Array :: from ( values_a) ) ;
1750+ let array_b: ArrayRef = Arc :: new ( Int32Array :: from ( values_b) ) ;
1751+ RecordBatch :: try_new ( schema, vec ! [ array_a, array_b] ) . map_err ( DataFusionError :: from)
1752+ }
1753+
17221754#[ tokio:: test]
1723- async fn test_limit_pruning ( ) -> datafusion_common:: error:: Result < ( ) > {
1755+ async fn test_limit_pruning_basic ( ) -> datafusion_common:: error:: Result < ( ) > {
17241756 // Scenario: Simple integer column, multiple row groups
17251757 // Query: SELECT c1 FROM t WHERE c1 = 0 LIMIT 2
17261758 // We expect 2 rows in total.
@@ -1754,6 +1786,7 @@ async fn test_limit_pruning() -> datafusion_common::error::Result<()> {
17541786 . with_expected_rows ( 2 )
17551787 . with_pruned_files ( Some ( 0 ) )
17561788 . with_matched_by_stats ( Some ( 4 ) )
1789+ . with_fully_matched_by_stats ( Some ( 3 ) )
17571790 . with_pruned_by_stats ( Some ( 1 ) )
17581791 . with_limit_pruned_row_groups ( Some ( 3 ) )
17591792 . test_row_group_prune_with_custom_data ( schema, batches, 2 )
@@ -1762,22 +1795,6 @@ async fn test_limit_pruning() -> datafusion_common::error::Result<()> {
17621795 Ok ( ( ) )
17631796}
17641797
1765- // Helper function to create a batch with two Int32 columns
1766- fn make_two_col_i32_batch (
1767- name_a : & str ,
1768- name_b : & str ,
1769- values_a : Vec < i32 > ,
1770- values_b : Vec < i32 > ,
1771- ) -> datafusion_common:: error:: Result < RecordBatch > {
1772- let schema = Arc :: new ( Schema :: new ( vec ! [
1773- Field :: new( name_a, DataType :: Int32 , false ) ,
1774- Field :: new( name_b, DataType :: Int32 , false ) ,
1775- ] ) ) ;
1776- let array_a: ArrayRef = Arc :: new ( Int32Array :: from ( values_a) ) ;
1777- let array_b: ArrayRef = Arc :: new ( Int32Array :: from ( values_b) ) ;
1778- RecordBatch :: try_new ( schema, vec ! [ array_a, array_b] ) . map_err ( DataFusionError :: from)
1779- }
1780-
17811798#[ tokio:: test]
17821799async fn test_limit_pruning_complex_filter ( ) -> datafusion_common:: error:: Result < ( ) > {
17831800 // Test Case 1: Complex filter with two columns (a = 1 AND b > 1 AND b < 4)
@@ -1815,6 +1832,7 @@ async fn test_limit_pruning_complex_filter() -> datafusion_common::error::Result
18151832 . with_expected_rows ( 5 )
18161833 . with_pruned_files ( Some ( 0 ) )
18171834 . with_matched_by_stats ( Some ( 4 ) ) // RG0,1,2,3 are matched
1835+ . with_fully_matched_by_stats ( Some ( 3 ) )
18181836 . with_pruned_by_stats ( Some ( 2 ) ) // RG4,5 are pruned
18191837 . with_limit_pruned_row_groups ( Some ( 2 ) ) // RG0, RG3 is pruned by limit
18201838 . test_row_group_prune_with_custom_data ( schema, batches, 3 )
@@ -1855,6 +1873,7 @@ async fn test_limit_pruning_multiple_fully_matched(
18551873 . with_expected_rows ( 8 )
18561874 . with_pruned_files ( Some ( 0 ) )
18571875 . with_matched_by_stats ( Some ( 4 ) ) // RG0,1,2,3 matched
1876+ . with_fully_matched_by_stats ( Some ( 4 ) )
18581877 . with_pruned_by_stats ( Some ( 1 ) ) // RG4 pruned
18591878 . with_limit_pruned_row_groups ( Some ( 2 ) ) // RG2,3 pruned by limit
18601879 . test_row_group_prune_with_custom_data ( schema, batches, 4 )
@@ -1894,6 +1913,7 @@ async fn test_limit_pruning_no_fully_matched() -> datafusion_common::error::Resu
18941913 . with_expected_rows ( 3 )
18951914 . with_pruned_files ( Some ( 0 ) )
18961915 . with_matched_by_stats ( Some ( 4 ) ) // RG0,1,2,3 matched
1916+ . with_fully_matched_by_stats ( Some ( 0 ) )
18971917 . with_pruned_by_stats ( Some ( 1 ) ) // RG4 pruned
18981918 . with_limit_pruned_row_groups ( Some ( 0 ) ) // RG3 pruned by limit
18991919 . test_row_group_prune_with_custom_data ( schema, batches, 3 )
@@ -1934,6 +1954,7 @@ async fn test_limit_pruning_exceeds_fully_matched() -> datafusion_common::error:
19341954 . with_expected_rows ( 10 ) // Total: 1 + 3 + 4 + 1 = 9 (less than limit)
19351955 . with_pruned_files ( Some ( 0 ) )
19361956 . with_matched_by_stats ( Some ( 4 ) ) // RG0,1,2,3 matched
1957+ . with_fully_matched_by_stats ( Some ( 2 ) )
19371958 . with_pruned_by_stats ( Some ( 1 ) ) // RG4 pruned
19381959 . with_limit_pruned_row_groups ( Some ( 0 ) ) // No limit pruning since we need all RGs
19391960 . test_row_group_prune_with_custom_data ( schema, batches, 4 )
0 commit comments