@@ -36,7 +36,7 @@ def _write_file(df, path, file_type):
3636
3737@pytest .mark .parametrize ("file_type" , ["parquet" , "json" , "jsonl" , "csv" ])
3838def test_get_file_column_names_basic_parquet (tmp_path , file_type ):
39- """Test _get_file_column_names with basic parquet file."""
39+ """Test get_file_column_names with basic parquet file."""
4040 test_data = {
4141 "id" : [1 , 2 , 3 ],
4242 "name" : ["Alice" , "Bob" , "Charlie" ],
@@ -51,7 +51,7 @@ def test_get_file_column_names_basic_parquet(tmp_path, file_type):
5151
5252
5353def test_get_file_column_names_nested_fields (tmp_path ):
54- """Test _get_file_column_names with nested fields in parquet."""
54+ """Test get_file_column_names with nested fields in parquet."""
5555 schema = pa .schema (
5656 [
5757 pa .field (
@@ -72,7 +72,7 @@ def test_get_file_column_names_nested_fields(tmp_path):
7272
7373@pytest .mark .parametrize ("file_type" , ["parquet" , "json" , "jsonl" , "csv" ])
7474def test_get_file_column_names_empty_parquet (tmp_path , file_type ):
75- """Test _get_file_column_names with empty parquet file."""
75+ """Test get_file_column_names with empty parquet file."""
7676 empty_df = pd .DataFrame ()
7777 empty_path = tmp_path / f"empty.{ file_type } "
7878 _write_file (empty_df , empty_path , file_type )
@@ -83,7 +83,7 @@ def test_get_file_column_names_empty_parquet(tmp_path, file_type):
8383
8484@pytest .mark .parametrize ("file_type" , ["parquet" , "json" , "jsonl" , "csv" ])
8585def test_get_file_column_names_large_schema (tmp_path , file_type ):
86- """Test _get_file_column_names with many columns."""
86+ """Test get_file_column_names with many columns."""
8787 num_columns = 50
8888 test_data = {f"col_{ i } " : np .random .randn (10 ) for i in range (num_columns )}
8989 df = pd .DataFrame (test_data )
@@ -98,7 +98,7 @@ def test_get_file_column_names_large_schema(tmp_path, file_type):
9898
9999@pytest .mark .parametrize ("file_type" , ["parquet" , "json" , "jsonl" , "csv" ])
100100def test_get_file_column_names_special_characters (tmp_path , file_type ):
101- """Test _get_file_column_names with special characters in column names."""
101+ """Test get_file_column_names with special characters in column names."""
102102 special_data = {
103103 "column with spaces" : [1 ],
104104 "column-with-dashes" : [2 ],
@@ -117,7 +117,7 @@ def test_get_file_column_names_special_characters(tmp_path, file_type):
117117
118118@pytest .mark .parametrize ("file_type" , ["parquet" , "json" , "jsonl" , "csv" ])
119119def test_get_file_column_names_unicode (tmp_path , file_type ):
120- """Test _get_file_column_names with unicode column names."""
120+ """Test get_file_column_names with unicode column names."""
121121 unicode_data = {"café" : [1 ], "résumé" : [2 ], "naïve" : [3 ], "façade" : [4 ], "garçon" : [5 ], "über" : [6 ], "schön" : [7 ]}
122122 df_unicode = pd .DataFrame (unicode_data )
123123
@@ -126,6 +126,22 @@ def test_get_file_column_names_unicode(tmp_path, file_type):
126126 assert get_file_column_names (str (unicode_path ), file_type ) == df_unicode .columns .tolist ()
127127
128128
129+ @pytest .mark .parametrize ("file_type" , ["parquet" , "csv" , "json" , "jsonl" ])
130+ def test_get_file_column_names_with_glob_pattern (tmp_path , file_type ):
131+ df = pd .DataFrame ({"col1" : [1 , 2 , 3 ], "col2" : [4 , 5 , 6 ]})
132+ for i in range (5 ):
133+ _write_file (df , tmp_path / f"{ i } .{ file_type } " , file_type )
134+ assert get_file_column_names (f"{ tmp_path } /*.{ file_type } " , file_type ) == ["col1" , "col2" ]
135+
136+
137+ def test_get_file_column_names_with_glob_pattern_error (tmp_path ):
138+ df = pd .DataFrame ({"col1" : [1 , 2 , 3 ], "col2" : [4 , 5 , 6 ]})
139+ for i in range (5 ):
140+ _write_file (df , tmp_path / f"{ i } .parquet" , "parquet" )
141+ with pytest .raises (InvalidFilePathError , match = "No files found matching pattern" ):
142+ get_file_column_names (f"{ tmp_path } /*.csv" , "csv" )
143+
144+
129145def test_get_file_column_names_error_handling ():
130146 with pytest .raises (InvalidFilePathError , match = "🛑 Unsupported file type: 'txt'" ):
131147 get_file_column_names ("test.txt" , "txt" )
0 commit comments