@@ -286,66 +286,3 @@ def _load_local_file(self) -> Generator[str, None, None]:
286286 f'Unexpected error reading file "{ f } ": { str (e )} . '
287287 f'Please check if the file exists and is readable.'
288288 )
289-
290-
291- def load_local_file (path : str , by_line : bool = True ) -> Generator [str , None , None ]:
292- """
293- Load a local file and return its contents.
294-
295- This is a standalone helper function for loading local files without needing
296- to create a full LocalDataSource instance.
297-
298- Args:
299- path: Path to the file or directory to load.
300- by_line: If True, yield content line by line. If False, yield entire content.
301-
302- Returns:
303- Generator[str]: The contents of the file(s).
304-
305- Raises:
306- RuntimeError: If the file doesn't exist, is not readable, or has unsupported format.
307- """
308- import gzip
309-
310- if not os .path .exists (path ):
311- raise RuntimeError (f'"{ path } " is not a valid path' )
312-
313- f_list = []
314- if os .path .isfile (path ):
315- f_list = [path ]
316- elif os .path .isdir (path ):
317- # Find all files recursively
318- for root , dirs , files in os .walk (path ):
319- for file in files :
320- f_list .append (os .path .join (root , file ))
321-
322- for f in f_list :
323- # Check if file is gzipped
324- if f .endswith ('.gz' ):
325- try :
326- with gzip .open (f , 'rt' , encoding = 'utf-8' ) as _f :
327- if by_line :
328- for line in _f :
329- yield line
330- else :
331- yield _f .read ()
332- except Exception as gz_error :
333- raise RuntimeError (
334- f'Failed to read gzipped file "{ f } ": { str (gz_error )} . '
335- f'Please ensure the file is a valid gzip-compressed text file.'
336- )
337- else :
338- # For regular files, try UTF-8 encoding
339- try :
340- with open (f , "r" , encoding = "utf-8" ) as _f :
341- if by_line :
342- for line in _f :
343- yield line
344- else :
345- yield _f .read ()
346- except UnicodeDecodeError as decode_error :
347- raise RuntimeError (
348- f'Failed to read file "{ f } ": Unsupported file format or encoding. '
349- f'Dingo only supports UTF-8 text files (.jsonl, .json, .txt), Excel files (.xlsx, .xls) and .gz compressed text files. '
350- f'Original error: { str (decode_error )} '
351- )
0 commit comments