|  | 
| 4 | 4 | 
 | 
| 5 | 5 | import os | 
| 6 | 6 | import sys | 
| 7 |  | -import tempfile | 
| 8 | 7 | 
 | 
| 9 | 8 | sys.path.insert(1, os.path.join(sys.path[0], "../../..")) | 
| 10 | 9 | 
 | 
| @@ -60,94 +59,68 @@ def test_path_traversal_protection(): | 
| 60 | 59 | 
 | 
| 61 | 60 | def test_wildcard_expansion(): | 
| 62 | 61 |     """Test that wildcards are properly expanded to matching files""" | 
| 63 |  | -    # Create temporary test files | 
| 64 |  | -    with tempfile.TemporaryDirectory() as tmpdir: | 
| 65 |  | -        # Create some test files | 
| 66 |  | -        test_files = [ | 
| 67 |  | -            os.path.join(tmpdir, "file1.txt"), | 
| 68 |  | -            os.path.join(tmpdir, "file2.txt"), | 
| 69 |  | -            os.path.join(tmpdir, "file3.txt"), | 
| 70 |  | -        ] | 
| 71 |  | -        for f in test_files: | 
| 72 |  | -            with open(f, "w") as fp: | 
| 73 |  | -                fp.write("test content") | 
| 74 |  | -         | 
| 75 |  | -        stats = MockStatistics() | 
| 76 |  | -        pattern = os.path.join(tmpdir, "*.txt") | 
| 77 |  | -         | 
| 78 |  | -        connector = FileConnector(dataset=pattern, statistics=stats) | 
| 79 |  | -         | 
| 80 |  | -        # Check that all files were found | 
| 81 |  | -        assert len(connector.files) == 3 | 
| 82 |  | -        assert connector.has_wildcards is True | 
| 83 |  | -         | 
| 84 |  | -        # Check files are sorted | 
| 85 |  | -        assert connector.files == sorted(test_files) | 
|  | 62 | +    stats = MockStatistics() | 
|  | 63 | +    pattern = "testdata/wildcard_test/*.parquet" | 
|  | 64 | +     | 
|  | 65 | +    connector = FileConnector(dataset=pattern, statistics=stats) | 
|  | 66 | +     | 
|  | 67 | +    # Check that all files were found | 
|  | 68 | +    assert len(connector.files) == 3 | 
|  | 69 | +    assert connector.has_wildcards is True | 
|  | 70 | +     | 
|  | 71 | +    # Check files are sorted | 
|  | 72 | +    expected_files = sorted([ | 
|  | 73 | +        "testdata/wildcard_test/file1.parquet", | 
|  | 74 | +        "testdata/wildcard_test/file2.parquet", | 
|  | 75 | +        "testdata/wildcard_test/file3.parquet" | 
|  | 76 | +    ]) | 
|  | 77 | +    assert connector.files == expected_files | 
| 86 | 78 | 
 | 
| 87 | 79 | 
 | 
| 88 | 80 | def test_single_file_no_wildcard(): | 
| 89 | 81 |     """Test that single files still work without wildcards""" | 
| 90 |  | -    with tempfile.TemporaryDirectory() as tmpdir: | 
| 91 |  | -        test_file = os.path.join(tmpdir, "test.txt") | 
| 92 |  | -        with open(test_file, "w") as fp: | 
| 93 |  | -            fp.write("test content") | 
| 94 |  | -         | 
| 95 |  | -        stats = MockStatistics() | 
| 96 |  | -        connector = FileConnector(dataset=test_file, statistics=stats) | 
| 97 |  | -         | 
| 98 |  | -        assert connector.has_wildcards is False | 
| 99 |  | -        assert connector.files == [test_file] | 
|  | 82 | +    stats = MockStatistics() | 
|  | 83 | +    test_file = "testdata/wildcard_test/file1.parquet" | 
|  | 84 | +     | 
|  | 85 | +    connector = FileConnector(dataset=test_file, statistics=stats) | 
|  | 86 | +     | 
|  | 87 | +    assert connector.has_wildcards is False | 
|  | 88 | +    assert connector.files == [test_file] | 
| 100 | 89 | 
 | 
| 101 | 90 | 
 | 
| 102 | 91 | def test_wildcard_range_pattern(): | 
| 103 | 92 |     """Test wildcard with range patterns like [0-9]""" | 
| 104 |  | -    with tempfile.TemporaryDirectory() as tmpdir: | 
| 105 |  | -        # Create files matching a range pattern | 
| 106 |  | -        test_files = [] | 
| 107 |  | -        for i in range(5): | 
| 108 |  | -            f = os.path.join(tmpdir, f"file{i}.txt") | 
| 109 |  | -            with open(f, "w") as fp: | 
| 110 |  | -                fp.write("test") | 
| 111 |  | -            test_files.append(f) | 
| 112 |  | -         | 
| 113 |  | -        # Create a file that shouldn't match | 
| 114 |  | -        non_match = os.path.join(tmpdir, "fileX.txt") | 
| 115 |  | -        with open(non_match, "w") as fp: | 
| 116 |  | -            fp.write("test") | 
| 117 |  | -         | 
| 118 |  | -        stats = MockStatistics() | 
| 119 |  | -        pattern = os.path.join(tmpdir, "file[0-9].txt") | 
| 120 |  | -         | 
| 121 |  | -        connector = FileConnector(dataset=pattern, statistics=stats) | 
| 122 |  | -         | 
| 123 |  | -        # Should match only files with digits | 
| 124 |  | -        assert len(connector.files) == 5 | 
| 125 |  | -        assert all("file" in f and any(str(i) in f for i in range(5)) for f in connector.files) | 
| 126 |  | -        assert non_match not in connector.files | 
|  | 93 | +    stats = MockStatistics() | 
|  | 94 | +    pattern = "testdata/wildcard_test/file[1-3].parquet" | 
|  | 95 | +     | 
|  | 96 | +    connector = FileConnector(dataset=pattern, statistics=stats) | 
|  | 97 | +     | 
|  | 98 | +    # Should match files 1, 2, 3 (all 3 files) | 
|  | 99 | +    assert len(connector.files) == 3 | 
|  | 100 | +    expected_files = sorted([ | 
|  | 101 | +        "testdata/wildcard_test/file1.parquet", | 
|  | 102 | +        "testdata/wildcard_test/file2.parquet", | 
|  | 103 | +        "testdata/wildcard_test/file3.parquet" | 
|  | 104 | +    ]) | 
|  | 105 | +    assert connector.files == expected_files | 
| 127 | 106 | 
 | 
| 128 | 107 | 
 | 
| 129 | 108 | def test_wildcard_question_mark(): | 
| 130 | 109 |     """Test wildcard with ? (single character match)""" | 
| 131 |  | -    with tempfile.TemporaryDirectory() as tmpdir: | 
| 132 |  | -        # Create files | 
| 133 |  | -        file1 = os.path.join(tmpdir, "fileA.txt") | 
| 134 |  | -        file2 = os.path.join(tmpdir, "fileB.txt") | 
| 135 |  | -        file_no_match = os.path.join(tmpdir, "fileAB.txt") | 
| 136 |  | -         | 
| 137 |  | -        for f in [file1, file2, file_no_match]: | 
| 138 |  | -            with open(f, "w") as fp: | 
| 139 |  | -                fp.write("test") | 
| 140 |  | -         | 
| 141 |  | -        stats = MockStatistics() | 
| 142 |  | -        pattern = os.path.join(tmpdir, "file?.txt") | 
| 143 |  | -         | 
| 144 |  | -        connector = FileConnector(dataset=pattern, statistics=stats) | 
| 145 |  | -         | 
| 146 |  | -        # Should match only single-character files | 
| 147 |  | -        assert len(connector.files) == 2 | 
| 148 |  | -        assert file1 in connector.files | 
| 149 |  | -        assert file2 in connector.files | 
| 150 |  | -        assert file_no_match not in connector.files | 
|  | 110 | +    stats = MockStatistics() | 
|  | 111 | +    # Use ? to match single digit in filename | 
|  | 112 | +    pattern = "testdata/wildcard_test/file?.parquet" | 
|  | 113 | +     | 
|  | 114 | +    connector = FileConnector(dataset=pattern, statistics=stats) | 
|  | 115 | +     | 
|  | 116 | +    # Should match all 3 files (file1, file2, file3) | 
|  | 117 | +    assert len(connector.files) == 3 | 
|  | 118 | +    expected_files = sorted([ | 
|  | 119 | +        "testdata/wildcard_test/file1.parquet", | 
|  | 120 | +        "testdata/wildcard_test/file2.parquet", | 
|  | 121 | +        "testdata/wildcard_test/file3.parquet" | 
|  | 122 | +    ]) | 
|  | 123 | +    assert connector.files == expected_files | 
| 151 | 124 | 
 | 
| 152 | 125 | 
 | 
| 153 | 126 | if __name__ == "__main__":  # pragma: no cover | 
|  | 
0 commit comments