Skip to content

Commit f1063a1

Browse files
Copilotjoocer
andcommitted
Use existing parquet test files instead of creating temp files
Co-authored-by: joocer <[email protected]>
1 parent 124726a commit f1063a1

File tree

1 file changed

+50
-77
lines changed

1 file changed

+50
-77
lines changed

tests/unit/connectors/test_wildcard_paths.py

Lines changed: 50 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
import os
66
import sys
7-
import tempfile
87

98
sys.path.insert(1, os.path.join(sys.path[0], "../../.."))
109

@@ -60,94 +59,68 @@ def test_path_traversal_protection():
6059

6160
def test_wildcard_expansion():
6261
"""Test that wildcards are properly expanded to matching files"""
63-
# Create temporary test files
64-
with tempfile.TemporaryDirectory() as tmpdir:
65-
# Create some test files
66-
test_files = [
67-
os.path.join(tmpdir, "file1.txt"),
68-
os.path.join(tmpdir, "file2.txt"),
69-
os.path.join(tmpdir, "file3.txt"),
70-
]
71-
for f in test_files:
72-
with open(f, "w") as fp:
73-
fp.write("test content")
74-
75-
stats = MockStatistics()
76-
pattern = os.path.join(tmpdir, "*.txt")
77-
78-
connector = FileConnector(dataset=pattern, statistics=stats)
79-
80-
# Check that all files were found
81-
assert len(connector.files) == 3
82-
assert connector.has_wildcards is True
83-
84-
# Check files are sorted
85-
assert connector.files == sorted(test_files)
62+
stats = MockStatistics()
63+
pattern = "testdata/wildcard_test/*.parquet"
64+
65+
connector = FileConnector(dataset=pattern, statistics=stats)
66+
67+
# Check that all files were found
68+
assert len(connector.files) == 3
69+
assert connector.has_wildcards is True
70+
71+
# Check files are sorted
72+
expected_files = sorted([
73+
"testdata/wildcard_test/file1.parquet",
74+
"testdata/wildcard_test/file2.parquet",
75+
"testdata/wildcard_test/file3.parquet"
76+
])
77+
assert connector.files == expected_files
8678

8779

8880
def test_single_file_no_wildcard():
8981
"""Test that single files still work without wildcards"""
90-
with tempfile.TemporaryDirectory() as tmpdir:
91-
test_file = os.path.join(tmpdir, "test.txt")
92-
with open(test_file, "w") as fp:
93-
fp.write("test content")
94-
95-
stats = MockStatistics()
96-
connector = FileConnector(dataset=test_file, statistics=stats)
97-
98-
assert connector.has_wildcards is False
99-
assert connector.files == [test_file]
82+
stats = MockStatistics()
83+
test_file = "testdata/wildcard_test/file1.parquet"
84+
85+
connector = FileConnector(dataset=test_file, statistics=stats)
86+
87+
assert connector.has_wildcards is False
88+
assert connector.files == [test_file]
10089

10190

10291
def test_wildcard_range_pattern():
10392
"""Test wildcard with range patterns like [0-9]"""
104-
with tempfile.TemporaryDirectory() as tmpdir:
105-
# Create files matching a range pattern
106-
test_files = []
107-
for i in range(5):
108-
f = os.path.join(tmpdir, f"file{i}.txt")
109-
with open(f, "w") as fp:
110-
fp.write("test")
111-
test_files.append(f)
112-
113-
# Create a file that shouldn't match
114-
non_match = os.path.join(tmpdir, "fileX.txt")
115-
with open(non_match, "w") as fp:
116-
fp.write("test")
117-
118-
stats = MockStatistics()
119-
pattern = os.path.join(tmpdir, "file[0-9].txt")
120-
121-
connector = FileConnector(dataset=pattern, statistics=stats)
122-
123-
# Should match only files with digits
124-
assert len(connector.files) == 5
125-
assert all("file" in f and any(str(i) in f for i in range(5)) for f in connector.files)
126-
assert non_match not in connector.files
93+
stats = MockStatistics()
94+
pattern = "testdata/wildcard_test/file[1-3].parquet"
95+
96+
connector = FileConnector(dataset=pattern, statistics=stats)
97+
98+
# Should match files 1, 2, 3 (all 3 files)
99+
assert len(connector.files) == 3
100+
expected_files = sorted([
101+
"testdata/wildcard_test/file1.parquet",
102+
"testdata/wildcard_test/file2.parquet",
103+
"testdata/wildcard_test/file3.parquet"
104+
])
105+
assert connector.files == expected_files
127106

128107

129108
def test_wildcard_question_mark():
130109
"""Test wildcard with ? (single character match)"""
131-
with tempfile.TemporaryDirectory() as tmpdir:
132-
# Create files
133-
file1 = os.path.join(tmpdir, "fileA.txt")
134-
file2 = os.path.join(tmpdir, "fileB.txt")
135-
file_no_match = os.path.join(tmpdir, "fileAB.txt")
136-
137-
for f in [file1, file2, file_no_match]:
138-
with open(f, "w") as fp:
139-
fp.write("test")
140-
141-
stats = MockStatistics()
142-
pattern = os.path.join(tmpdir, "file?.txt")
143-
144-
connector = FileConnector(dataset=pattern, statistics=stats)
145-
146-
# Should match only single-character files
147-
assert len(connector.files) == 2
148-
assert file1 in connector.files
149-
assert file2 in connector.files
150-
assert file_no_match not in connector.files
110+
stats = MockStatistics()
111+
# Use ? to match single digit in filename
112+
pattern = "testdata/wildcard_test/file?.parquet"
113+
114+
connector = FileConnector(dataset=pattern, statistics=stats)
115+
116+
# Should match all 3 files (file1, file2, file3)
117+
assert len(connector.files) == 3
118+
expected_files = sorted([
119+
"testdata/wildcard_test/file1.parquet",
120+
"testdata/wildcard_test/file2.parquet",
121+
"testdata/wildcard_test/file3.parquet"
122+
])
123+
assert connector.files == expected_files
151124

152125

153126
if __name__ == "__main__": # pragma: no cover

0 commit comments

Comments
 (0)