@@ -136,10 +136,10 @@ def __init__(self, *args, **kwargs):
136136        if  ".."  in  self .dataset  or  self .dataset [0 ] in  ("\\ " , "/" , "~" ):
137137            # Don't find any datasets which look like path traversal 
138138            raise  DatasetNotFoundError (dataset = self .dataset )
139-          
139+ 
140140        # Check if dataset contains wildcards 
141-         self .has_wildcards  =  any (char  in  self .dataset  for  char  in  ['*' ,  '?' ,  '[' ])
142-          
141+         self .has_wildcards  =  any (char  in  self .dataset  for  char  in  ["*" ,  "?" ,  "[" ])
142+ 
143143        if  self .has_wildcards :
144144            # Expand wildcards to get list of files 
145145            self .files  =  self ._expand_wildcards (self .dataset )
@@ -150,43 +150,43 @@ def __init__(self, *args, **kwargs):
150150        else :
151151            self .files  =  [self .dataset ]
152152            self .decoder  =  get_decoder (self .dataset )
153-      
153+ 
154154    def  _expand_wildcards (self , pattern : str ) ->  List [str ]:
155155        """ 
156156        Expand wildcard patterns in file paths while preventing path traversal. 
157-          
157+ 
158158        Supports wildcards: 
159159        - * matches any number of characters 
160-         - ? matches a single character    
160+         - ? matches a single character 
161161        - [range] matches a range of characters (e.g., [0-9], [a-z]) 
162-          
162+ 
163163        Args: 
164164            pattern: File path pattern with wildcards 
165-              
165+ 
166166        Returns: 
167167            List of matching file paths 
168168        """ 
169169        # Additional path traversal check after expansion 
170170        if  ".."  in  pattern :
171171            raise  DatasetNotFoundError (dataset = pattern )
172-          
172+ 
173173        # Use glob to expand the pattern 
174174        matched_files  =  glob .glob (pattern , recursive = False )
175-          
175+ 
176176        # Filter out any results that might have path traversal 
177177        # This is an extra safety check 
178178        safe_files  =  []
179179        for  file_path  in  matched_files :
180180            if  ".."  not  in file_path  and  os .path .isfile (file_path ):
181181                safe_files .append (file_path )
182-          
182+ 
183183        return  sorted (safe_files )
184184
185185    def  read_dataset (
186186        self , columns : list  =  None , predicates : list  =  None , limit : int  =  None , ** kwargs 
187187    ) ->  pyarrow .Table :
188188        rows_read  =  0 
189-          
189+ 
190190        # Iterate over all matched files 
191191        for  file_path  in  self .files :
192192            morsel  =  read_blob (
@@ -221,7 +221,7 @@ def get_dataset_schema(self) -> RelationSchema:
221221
222222        # Use the first file to get the schema 
223223        first_file  =  self .files [0 ]
224-          
224+ 
225225        try :
226226            file_descriptor  =  os .open (first_file , os .O_RDONLY  |  os .O_BINARY )
227227            size  =  os .path .getsize (first_file )
0 commit comments