@@ -1879,6 +1879,41 @@ def test_parquet_non_null_column_to_pyarrow(ctx, tmp_path):
18791879 assert pyarrow_table .to_pydict () == {"m" : [3 ]}
18801880
18811881
1882+ def test_parquet_empty_batch_to_pyarrow (ctx , tmp_path ):
1883+ path = tmp_path .joinpath ("t.parquet" )
1884+
1885+ ctx .sql ("create table t_(a int not null)" ).collect ()
1886+ ctx .sql ("insert into t_ values (1), (2), (3)" ).collect ()
1887+ ctx .sql (f"copy (select * from t_) to '{ path } '" ).collect ()
1888+
1889+ ctx .register_parquet ("t" , path )
1890+ pyarrow_table = ctx .sql ("select * from t limit 0" ).to_arrow_table ()
1891+ assert pyarrow_table .schema == pa .schema (
1892+ [
1893+ pa .field ("a" , pa .int32 (), nullable = False ),
1894+ ]
1895+ )
1896+
1897+
1898+ def test_parquet_null_aggregation_to_pyarrow (ctx , tmp_path ):
1899+ path = tmp_path .joinpath ("t.parquet" )
1900+
1901+ ctx .sql ("create table t_(a int not null)" ).collect ()
1902+ ctx .sql ("insert into t_ values (1), (2), (3)" ).collect ()
1903+ ctx .sql (f"copy (select * from t_) to '{ path } '" ).collect ()
1904+
1905+ ctx .register_parquet ("t" , path )
1906+ pyarrow_table = ctx .sql (
1907+ "select max(a) as m from (select * from t where a < 0)"
1908+ ).to_arrow_table ()
1909+ assert pyarrow_table .to_pydict () == {"m" : [None ]}
1910+ assert pyarrow_table .schema == pa .schema (
1911+ [
1912+ pa .field ("m" , pa .int32 (), nullable = True ),
1913+ ]
1914+ )
1915+
1916+
18821917def test_execute_stream (df ):
18831918 stream = df .execute_stream ()
18841919 assert all (batch is not None for batch in stream )
0 commit comments