11import os
2+ import pathlib
3+ import re
24import tempfile
35
46import pytest
@@ -184,12 +186,14 @@ def test_filename_pattern_with_index(self, pd):
184186 rel = duckdb .from_df (df )
185187 rel .to_parquet (temp_file_name , partition_by = ["category" ], filename_pattern = "orders_{i}" )
186188 # Check that files follow the pattern with {i}
187- files_a = os .listdir (f"{ temp_file_name } /category=a" )
188- files_b = os .listdir (f"{ temp_file_name } /category=b" )
189- files_c = os .listdir (f"{ temp_file_name } /category=c" )
190- assert all ("orders_" in f and f .endswith (".parquet" ) for f in files_a )
191- assert all ("orders_" in f and f .endswith (".parquet" ) for f in files_b )
192- assert all ("orders_" in f and f .endswith (".parquet" ) for f in files_c )
189+ files_a = list (pathlib .Path (f"{ temp_file_name } /category=a" ).iterdir ())
190+ files_b = list (pathlib .Path (f"{ temp_file_name } /category=b" ).iterdir ())
191+ files_c = list (pathlib .Path (f"{ temp_file_name } /category=c" ).iterdir ())
192+ filename_pattern = re .compile (r"^orders_[09]+\.parquet$" )
193+ assert all (filename_pattern .search (str (f .name )) for f in files_a )
194+ assert all (filename_pattern .search (str (f .name )) for f in files_b )
195+ assert all (filename_pattern .search (str (f .name )) for f in files_c )
196+
193197 # Verify data integrity
194198 result = duckdb .sql (f"FROM read_parquet('{ temp_file_name } /*/*.parquet', hive_partitioning=TRUE)" )
195199 expected = [("rei" , 321.0 , "a" ), ("shinji" , 123.0 , "a" ), ("asuka" , 23.0 , "b" ), ("kaworu" , 340.0 , "c" )]
@@ -202,14 +206,22 @@ def test_filename_pattern_with_uuid(self, pd):
202206 {
203207 "name" : ["rei" , "shinji" , "asuka" , "kaworu" ],
204208 "float" : [321.0 , 123.0 , 23.0 , 340.0 ],
209+ "category" : ["a" , "a" , "b" , "c" ],
205210 }
206211 )
207212 rel = duckdb .from_df (df )
208- rel .to_parquet (temp_file_name , filename_pattern = "file_{uuid}" )
213+ rel .to_parquet (temp_file_name , partition_by = [ "category" ], filename_pattern = "file_{uuid}" )
209214 # Check that files follow the pattern with {uuid}
210- files = [f for f in os .listdir (temp_file_name ) if f .endswith (".parquet" )]
211- assert len (files ) > 0
212- assert all (f .startswith ("file_" ) and f .endswith (".parquet" ) for f in files )
215+ files_a = list (pathlib .Path (f"{ temp_file_name } /category=a" ).iterdir ())
216+ files_b = list (pathlib .Path (f"{ temp_file_name } /category=b" ).iterdir ())
217+ files_c = list (pathlib .Path (f"{ temp_file_name } /category=c" ).iterdir ())
218+ filename_pattern = re .compile (r"^file_[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12}\.parquet$" )
219+ print (files_a )
220+ assert all (filename_pattern .search (str (f .name )) for f in files_a )
221+ assert all (filename_pattern .search (str (f .name )) for f in files_b )
222+ assert all (filename_pattern .search (str (f .name )) for f in files_c )
223+
213224 # Verify data integrity
214- result = duckdb .read_parquet (f"{ temp_file_name } /*.parquet" )
215- assert rel .execute ().fetchall () == result .execute ().fetchall ()
225+ result = duckdb .sql (f"FROM read_parquet('{ temp_file_name } /*/*.parquet', hive_partitioning=TRUE)" )
226+ expected = [("rei" , 321.0 , "a" ), ("shinji" , 123.0 , "a" ), ("asuka" , 23.0 , "b" ), ("kaworu" , 340.0 , "c" )]
227+ assert result .execute ().fetchall () == expected
0 commit comments