Skip to content

Commit 5cb2afb

Browse files
committed
Fix linting and test
1 parent 8ab292c commit 5cb2afb

File tree

1 file changed

+24
-12
lines changed

1 file changed

+24
-12
lines changed

tests/fast/api/test_to_parquet.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import os
2+
import pathlib
3+
import re
24
import tempfile
35

46
import pytest
@@ -184,12 +186,14 @@ def test_filename_pattern_with_index(self, pd):
184186
rel = duckdb.from_df(df)
185187
rel.to_parquet(temp_file_name, partition_by=["category"], filename_pattern="orders_{i}")
186188
# Check that files follow the pattern with {i}
187-
files_a = os.listdir(f"{temp_file_name}/category=a")
188-
files_b = os.listdir(f"{temp_file_name}/category=b")
189-
files_c = os.listdir(f"{temp_file_name}/category=c")
190-
assert all("orders_" in f and f.endswith(".parquet") for f in files_a)
191-
assert all("orders_" in f and f.endswith(".parquet") for f in files_b)
192-
assert all("orders_" in f and f.endswith(".parquet") for f in files_c)
189+
files_a = list(pathlib.Path(f"{temp_file_name}/category=a").iterdir())
190+
files_b = list(pathlib.Path(f"{temp_file_name}/category=b").iterdir())
191+
files_c = list(pathlib.Path(f"{temp_file_name}/category=c").iterdir())
192+
filename_pattern = re.compile(r"^orders_[09]+\.parquet$")
193+
assert all(filename_pattern.search(str(f.name)) for f in files_a)
194+
assert all(filename_pattern.search(str(f.name)) for f in files_b)
195+
assert all(filename_pattern.search(str(f.name)) for f in files_c)
196+
193197
# Verify data integrity
194198
result = duckdb.sql(f"FROM read_parquet('{temp_file_name}/*/*.parquet', hive_partitioning=TRUE)")
195199
expected = [("rei", 321.0, "a"), ("shinji", 123.0, "a"), ("asuka", 23.0, "b"), ("kaworu", 340.0, "c")]
@@ -202,14 +206,22 @@ def test_filename_pattern_with_uuid(self, pd):
202206
{
203207
"name": ["rei", "shinji", "asuka", "kaworu"],
204208
"float": [321.0, 123.0, 23.0, 340.0],
209+
"category": ["a", "a", "b", "c"],
205210
}
206211
)
207212
rel = duckdb.from_df(df)
208-
rel.to_parquet(temp_file_name, filename_pattern="file_{uuid}")
213+
rel.to_parquet(temp_file_name, partition_by=["category"], filename_pattern="file_{uuid}")
209214
# Check that files follow the pattern with {uuid}
210-
files = [f for f in os.listdir(temp_file_name) if f.endswith(".parquet")]
211-
assert len(files) > 0
212-
assert all(f.startswith("file_") and f.endswith(".parquet") for f in files)
215+
files_a = list(pathlib.Path(f"{temp_file_name}/category=a").iterdir())
216+
files_b = list(pathlib.Path(f"{temp_file_name}/category=b").iterdir())
217+
files_c = list(pathlib.Path(f"{temp_file_name}/category=c").iterdir())
218+
filename_pattern = re.compile(r"^file_[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12}\.parquet$")
219+
print(files_a)
220+
assert all(filename_pattern.search(str(f.name)) for f in files_a)
221+
assert all(filename_pattern.search(str(f.name)) for f in files_b)
222+
assert all(filename_pattern.search(str(f.name)) for f in files_c)
223+
213224
# Verify data integrity
214-
result = duckdb.read_parquet(f"{temp_file_name}/*.parquet")
215-
assert rel.execute().fetchall() == result.execute().fetchall()
225+
result = duckdb.sql(f"FROM read_parquet('{temp_file_name}/*/*.parquet', hive_partitioning=TRUE)")
226+
expected = [("rei", 321.0, "a"), ("shinji", 123.0, "a"), ("asuka", 23.0, "b"), ("kaworu", 340.0, "c")]
227+
assert result.execute().fetchall() == expected

0 commit comments

Comments
 (0)