@@ -136,10 +136,10 @@ def __init__(self, *args, **kwargs):
136136 if ".." in self .dataset or self .dataset [0 ] in ("\\ " , "/" , "~" ):
137137 # Don't find any datasets which look like path traversal
138138 raise DatasetNotFoundError (dataset = self .dataset )
139-
139+
140140 # Check if dataset contains wildcards
141- self .has_wildcards = any (char in self .dataset for char in ['*' , '?' , '[' ])
142-
141+ self .has_wildcards = any (char in self .dataset for char in ["*" , "?" , "[" ])
142+
143143 if self .has_wildcards :
144144 # Expand wildcards to get list of files
145145 self .files = self ._expand_wildcards (self .dataset )
@@ -150,43 +150,43 @@ def __init__(self, *args, **kwargs):
150150 else :
151151 self .files = [self .dataset ]
152152 self .decoder = get_decoder (self .dataset )
153-
153+
154154 def _expand_wildcards (self , pattern : str ) -> List [str ]:
155155 """
156156 Expand wildcard patterns in file paths while preventing path traversal.
157-
157+
158158 Supports wildcards:
159159 - * matches any number of characters
160- - ? matches a single character
160+ - ? matches a single character
161161 - [range] matches a range of characters (e.g., [0-9], [a-z])
162-
162+
163163 Args:
164164 pattern: File path pattern with wildcards
165-
165+
166166 Returns:
167167 List of matching file paths
168168 """
169169 # Additional path traversal check after expansion
170170 if ".." in pattern :
171171 raise DatasetNotFoundError (dataset = pattern )
172-
172+
173173 # Use glob to expand the pattern
174174 matched_files = glob .glob (pattern , recursive = False )
175-
175+
176176 # Filter out any results that might have path traversal
177177 # This is an extra safety check
178178 safe_files = []
179179 for file_path in matched_files :
180180 if ".." not in file_path and os .path .isfile (file_path ):
181181 safe_files .append (file_path )
182-
182+
183183 return sorted (safe_files )
184184
185185 def read_dataset (
186186 self , columns : list = None , predicates : list = None , limit : int = None , ** kwargs
187187 ) -> pyarrow .Table :
188188 rows_read = 0
189-
189+
190190 # Iterate over all matched files
191191 for file_path in self .files :
192192 morsel = read_blob (
@@ -221,7 +221,7 @@ def get_dataset_schema(self) -> RelationSchema:
221221
222222 # Use the first file to get the schema
223223 first_file = self .files [0 ]
224-
224+
225225 try :
226226 file_descriptor = os .open (first_file , os .O_RDONLY | os .O_BINARY )
227227 size = os .path .getsize (first_file )
0 commit comments