@@ -225,3 +225,37 @@ def test_filename_pattern_with_uuid(self, pd):
225225 result = duckdb .sql (f"FROM read_parquet('{ temp_file_name } /*/*.parquet', hive_partitioning=TRUE)" )
226226 expected = [("rei" , 321.0 , "a" ), ("shinji" , 123.0 , "a" ), ("asuka" , 23.0 , "b" ), ("kaworu" , 340.0 , "c" )]
227227 assert result .execute ().fetchall () == expected
228+
229+ @pytest .mark .parametrize ("file_size_bytes" , [1000 , "1k" ])
230+ def test_file_size_bytes_basic (self , file_size_bytes ):
231+ temp_file_name = os .path .join (tempfile .mkdtemp (), next (tempfile ._get_candidate_names ())) # noqa: PTH118
232+
233+ # use same test data as external/duckdb/test/sql/copy/file_size_bytes.test
234+ rel = duckdb .from_query ("SELECT i AS col_a, i AS col_b FROM range(0,10000) tbl(i);" )
235+ rel .to_parquet (temp_file_name , file_size_bytes = file_size_bytes , row_group_size = 2000 )
236+
237+ # Check that multiple files were created
238+ files = list (pathlib .Path (temp_file_name ).iterdir ())
239+ assert len (files ) > 1 , f"Expected multiple files, got { len (files )} "
240+
241+ # Verify data integrity
242+ result = duckdb .read_parquet (f"{ temp_file_name } /*.parquet" )
243+ assert len (result .execute ().fetchall ()) == 10000
244+
245+ @pytest .mark .parametrize ("pd" , [NumpyPandas (), ArrowPandas ()])
246+ @pytest .mark .parametrize ("file_size_bytes" , ["256MB" , "1G" ])
247+ def test_file_size_bytes_human_readable (self , pd , file_size_bytes ):
248+ temp_file_name = os .path .join (tempfile .mkdtemp (), next (tempfile ._get_candidate_names ())) # noqa: PTH118
249+ df = pd .DataFrame (
250+ {
251+ "name" : ["rei" , "shinji" , "asuka" , "kaworu" ],
252+ "float" : [321.0 , 123.0 , 23.0 , 340.0 ],
253+ "category" : ["a" , "a" , "b" , "c" ],
254+ }
255+ )
256+ rel = duckdb .from_df (df )
257+ rel .to_parquet (temp_file_name , file_size_bytes = file_size_bytes )
258+
259+ # With large file size limits, should create just one file
260+ parquet_rel = duckdb .read_parquet (temp_file_name )
261+ assert rel .execute ().fetchall () == parquet_rel .execute ().fetchall ()
0 commit comments