@@ -1761,3 +1761,183 @@ async fn test_limit_pruning() -> datafusion_common::error::Result<()> {
17611761
17621762 Ok ( ( ) )
17631763}
1764+
1765+ // Helper function to create a batch with two Int32 columns
1766+ fn make_two_col_i32_batch (
1767+ name_a : & str ,
1768+ name_b : & str ,
1769+ values_a : Vec < i32 > ,
1770+ values_b : Vec < i32 > ,
1771+ ) -> datafusion_common:: error:: Result < RecordBatch > {
1772+ let schema = Arc :: new ( Schema :: new ( vec ! [
1773+ Field :: new( name_a, DataType :: Int32 , false ) ,
1774+ Field :: new( name_b, DataType :: Int32 , false ) ,
1775+ ] ) ) ;
1776+ let array_a: ArrayRef = Arc :: new ( Int32Array :: from ( values_a) ) ;
1777+ let array_b: ArrayRef = Arc :: new ( Int32Array :: from ( values_b) ) ;
1778+ RecordBatch :: try_new ( schema, vec ! [ array_a, array_b] ) . map_err ( DataFusionError :: from)
1779+ }
1780+
1781+ #[ tokio:: test]
1782+ async fn test_limit_pruning_complex_filter ( ) -> datafusion_common:: error:: Result < ( ) > {
1783+ // Test Case 1: Complex filter with two columns (a = 1 AND b > 1 AND b < 4)
1784+ // Row Group 0: a=[1,1,1], b=[0,2,3] -> Partially matched, 2 rows match (b=2,3)
1785+ // Row Group 1: a=[1,1,1], b=[2,2,2] -> Fully matched, 3 rows
1786+ // Row Group 2: a=[1,1,1], b=[2,3,3] -> Fully matched, 3 rows
1787+ // Row Group 3: a=[1,1,1], b=[2,2,3] -> Fully matched, 3 rows
1788+ // Row Group 4: a=[2,2,2], b=[2,2,2] -> Not matched (a != 1)
1789+ // Row Group 5: a=[1,1,1], b=[5,6,7] -> Not matched (b >= 4)
1790+
1791+ // With LIMIT 5, we need RG1 (3 rows) + RG2 (2 rows from 3) = 5 rows
1792+ // RG4 and RG5 should be pruned by statistics
1793+ // RG3 should be pruned by limit
1794+ // RG0 is partially matched, so it depends on the order
1795+
1796+ let schema = Arc :: new ( Schema :: new ( vec ! [
1797+ Field :: new( "a" , DataType :: Int32 , false ) ,
1798+ Field :: new( "b" , DataType :: Int32 , false ) ,
1799+ ] ) ) ;
1800+ let query = "SELECT a, b FROM t WHERE a = 1 AND b > 1 AND b < 4 LIMIT 5" ;
1801+
1802+ let batches = vec ! [
1803+ make_two_col_i32_batch( "a" , "b" , vec![ 1 , 1 , 1 ] , vec![ 0 , 2 , 3 ] ) ?,
1804+ make_two_col_i32_batch( "a" , "b" , vec![ 1 , 1 , 1 ] , vec![ 2 , 2 , 2 ] ) ?,
1805+ make_two_col_i32_batch( "a" , "b" , vec![ 1 , 1 , 1 ] , vec![ 2 , 3 , 3 ] ) ?,
1806+ make_two_col_i32_batch( "a" , "b" , vec![ 1 , 1 , 1 ] , vec![ 2 , 2 , 3 ] ) ?,
1807+ make_two_col_i32_batch( "a" , "b" , vec![ 2 , 2 , 2 ] , vec![ 2 , 2 , 2 ] ) ?,
1808+ make_two_col_i32_batch( "a" , "b" , vec![ 1 , 1 , 1 ] , vec![ 5 , 6 , 7 ] ) ?,
1809+ ] ;
1810+
1811+ RowGroupPruningTest :: new ( )
1812+ . with_scenario ( Scenario :: Int )
1813+ . with_query ( query)
1814+ . with_expected_errors ( Some ( 0 ) )
1815+ . with_expected_rows ( 5 )
1816+ . with_pruned_files ( Some ( 0 ) )
1817+ . with_matched_by_stats ( Some ( 4 ) ) // RG0,1,2,3 are matched
1818+ . with_pruned_by_stats ( Some ( 2 ) ) // RG4,5 are pruned
1819+ . with_limit_pruned_row_groups ( Some ( 2 ) ) // RG0, RG3 is pruned by limit
1820+ . test_row_group_prune_with_custom_data ( schema, batches, 3 )
1821+ . await ;
1822+
1823+ Ok ( ( ) )
1824+ }
1825+
1826+ #[ tokio:: test]
1827+ async fn test_limit_pruning_multiple_fully_matched (
1828+ ) -> datafusion_common:: error:: Result < ( ) > {
1829+ // Test Case 2: Limit requires multiple fully matched row groups
1830+ // Row Group 0: a=[5,5,5,5] -> Fully matched, 4 rows
1831+ // Row Group 1: a=[5,5,5,5] -> Fully matched, 4 rows
1832+ // Row Group 2: a=[5,5,5,5] -> Fully matched, 4 rows
1833+ // Row Group 3: a=[5,5,5,5] -> Fully matched, 4 rows
1834+ // Row Group 4: a=[1,2,3,4] -> Not matched
1835+
1836+ // With LIMIT 8, we need RG0 (4 rows) + RG1 (4 rows) 8 rows
1837+ // RG2,3 should be pruned by limit
1838+ // RG4 should be pruned by statistics
1839+
1840+ let schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new( "a" , DataType :: Int32 , false ) ] ) ) ;
1841+ let query = "SELECT a FROM t WHERE a = 5 LIMIT 8" ;
1842+
1843+ let batches = vec ! [
1844+ make_i32_batch( "a" , vec![ 5 , 5 , 5 , 5 ] ) ?,
1845+ make_i32_batch( "a" , vec![ 5 , 5 , 5 , 5 ] ) ?,
1846+ make_i32_batch( "a" , vec![ 5 , 5 , 5 , 5 ] ) ?,
1847+ make_i32_batch( "a" , vec![ 5 , 5 , 5 , 5 ] ) ?,
1848+ make_i32_batch( "a" , vec![ 1 , 2 , 3 , 4 ] ) ?,
1849+ ] ;
1850+
1851+ RowGroupPruningTest :: new ( )
1852+ . with_scenario ( Scenario :: Int )
1853+ . with_query ( query)
1854+ . with_expected_errors ( Some ( 0 ) )
1855+ . with_expected_rows ( 8 )
1856+ . with_pruned_files ( Some ( 0 ) )
1857+ . with_matched_by_stats ( Some ( 4 ) ) // RG0,1,2,3 matched
1858+ . with_pruned_by_stats ( Some ( 1 ) ) // RG4 pruned
1859+ . with_limit_pruned_row_groups ( Some ( 2 ) ) // RG2,3 pruned by limit
1860+ . test_row_group_prune_with_custom_data ( schema, batches, 4 )
1861+ . await ;
1862+
1863+ Ok ( ( ) )
1864+ }
1865+
1866+ #[ tokio:: test]
1867+ async fn test_limit_pruning_no_fully_matched ( ) -> datafusion_common:: error:: Result < ( ) > {
1868+ // Test Case 3: No fully matched row groups - all are partially matched
1869+ // Row Group 0: a=[1,2,3] -> Partially matched, 1 row (a=2)
1870+ // Row Group 1: a=[2,3,4] -> Partially matched, 1 row (a=2)
1871+ // Row Group 2: a=[2,5,6] -> Partially matched, 1 row (a=2)
1872+ // Row Group 3: a=[2,7,8] -> Partially matched, 1 row (a=2)
1873+ // Row Group 4: a=[9,10,11] -> Not matched
1874+
1875+ // With LIMIT 3, we need to scan RG0,1,2 to get 3 matching rows
1876+ // Cannot prune much by limit since all matching RGs are partial
1877+ // RG4 should be pruned by statistics
1878+
1879+ let schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new( "a" , DataType :: Int32 , false ) ] ) ) ;
1880+ let query = "SELECT a FROM t WHERE a = 2 LIMIT 3" ;
1881+
1882+ let batches = vec ! [
1883+ make_i32_batch( "a" , vec![ 1 , 2 , 3 ] ) ?,
1884+ make_i32_batch( "a" , vec![ 2 , 3 , 4 ] ) ?,
1885+ make_i32_batch( "a" , vec![ 2 , 5 , 6 ] ) ?,
1886+ make_i32_batch( "a" , vec![ 2 , 7 , 8 ] ) ?,
1887+ make_i32_batch( "a" , vec![ 9 , 10 , 11 ] ) ?,
1888+ ] ;
1889+
1890+ RowGroupPruningTest :: new ( )
1891+ . with_scenario ( Scenario :: Int )
1892+ . with_query ( query)
1893+ . with_expected_errors ( Some ( 0 ) )
1894+ . with_expected_rows ( 3 )
1895+ . with_pruned_files ( Some ( 0 ) )
1896+ . with_matched_by_stats ( Some ( 4 ) ) // RG0,1,2,3 matched
1897+ . with_pruned_by_stats ( Some ( 1 ) ) // RG4 pruned
1898+ . with_limit_pruned_row_groups ( Some ( 0 ) ) // RG3 pruned by limit
1899+ . test_row_group_prune_with_custom_data ( schema, batches, 3 )
1900+ . await ;
1901+
1902+ Ok ( ( ) )
1903+ }
1904+
1905+ #[ tokio:: test]
1906+ async fn test_limit_pruning_exceeds_fully_matched ( ) -> datafusion_common:: error:: Result < ( ) >
1907+ {
1908+ // Test Case 4: Limit exceeds all fully matched rows, need partially matched
1909+ // Row Group 0: a=[10,11,12,12] -> Partially matched, 1 row (a=10)
1910+ // Row Group 1: a=[10,10,10,10] -> Fully matched, 4 rows
1911+ // Row Group 2: a=[10,10,10,10] -> Fully matched, 4 rows
1912+ // Row Group 3: a=[10,13,14,11] -> Partially matched, 1 row (a=10)
1913+ // Row Group 4: a=[20,21,22,22] -> Not matched
1914+
1915+ // With LIMIT 10, we need RG1 (4) + RG2 (4) = 8 from fully matched
1916+ // Still need 2 more, so we need to scan partially matched RG0 and RG3
1917+ // All matching row groups should be scanned, only RG4 pruned by statistics
1918+
1919+ let schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new( "a" , DataType :: Int32 , false ) ] ) ) ;
1920+ let query = "SELECT a FROM t WHERE a = 10 LIMIT 10" ;
1921+
1922+ let batches = vec ! [
1923+ make_i32_batch( "a" , vec![ 10 , 11 , 12 , 12 ] ) ?,
1924+ make_i32_batch( "a" , vec![ 10 , 10 , 10 , 10 ] ) ?,
1925+ make_i32_batch( "a" , vec![ 10 , 10 , 10 , 10 ] ) ?,
1926+ make_i32_batch( "a" , vec![ 10 , 13 , 14 , 11 ] ) ?,
1927+ make_i32_batch( "a" , vec![ 20 , 21 , 22 , 22 ] ) ?,
1928+ ] ;
1929+
1930+ RowGroupPruningTest :: new ( )
1931+ . with_scenario ( Scenario :: Int )
1932+ . with_query ( query)
1933+ . with_expected_errors ( Some ( 0 ) )
1934+ . with_expected_rows ( 10 ) // Total: 1 + 3 + 4 + 1 = 9 (less than limit)
1935+ . with_pruned_files ( Some ( 0 ) )
1936+ . with_matched_by_stats ( Some ( 4 ) ) // RG0,1,2,3 matched
1937+ . with_pruned_by_stats ( Some ( 1 ) ) // RG4 pruned
1938+ . with_limit_pruned_row_groups ( Some ( 0 ) ) // No limit pruning since we need all RGs
1939+ . test_row_group_prune_with_custom_data ( schema, batches, 4 )
1940+ . await ;
1941+
1942+ Ok ( ( ) )
1943+ }
0 commit comments