Skip to content

Commit 10a90f2

Browse files
committed
chore: cleanup docs and local.py to focus on examples and tests
1 parent 160beee commit 10a90f2

20 files changed

+537
-776
lines changed

dingo/data/datasource/local.py

Lines changed: 0 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -286,66 +286,3 @@ def _load_local_file(self) -> Generator[str, None, None]:
286286
f'Unexpected error reading file "{f}": {str(e)}. '
287287
f'Please check if the file exists and is readable.'
288288
)
289-
290-
291-
def load_local_file(path: str, by_line: bool = True) -> Generator[str, None, None]:
292-
"""
293-
Load a local file and return its contents.
294-
295-
This is a standalone helper function for loading local files without needing
296-
to create a full LocalDataSource instance.
297-
298-
Args:
299-
path: Path to the file or directory to load.
300-
by_line: If True, yield content line by line. If False, yield entire content.
301-
302-
Returns:
303-
Generator[str]: The contents of the file(s).
304-
305-
Raises:
306-
RuntimeError: If the file doesn't exist, is not readable, or has unsupported format.
307-
"""
308-
import gzip
309-
310-
if not os.path.exists(path):
311-
raise RuntimeError(f'"{path}" is not a valid path')
312-
313-
f_list = []
314-
if os.path.isfile(path):
315-
f_list = [path]
316-
elif os.path.isdir(path):
317-
# Find all files recursively
318-
for root, dirs, files in os.walk(path):
319-
for file in files:
320-
f_list.append(os.path.join(root, file))
321-
322-
for f in f_list:
323-
# Check if file is gzipped
324-
if f.endswith('.gz'):
325-
try:
326-
with gzip.open(f, 'rt', encoding='utf-8') as _f:
327-
if by_line:
328-
for line in _f:
329-
yield line
330-
else:
331-
yield _f.read()
332-
except Exception as gz_error:
333-
raise RuntimeError(
334-
f'Failed to read gzipped file "{f}": {str(gz_error)}. '
335-
f'Please ensure the file is a valid gzip-compressed text file.'
336-
)
337-
else:
338-
# For regular files, try UTF-8 encoding
339-
try:
340-
with open(f, "r", encoding="utf-8") as _f:
341-
if by_line:
342-
for line in _f:
343-
yield line
344-
else:
345-
yield _f.read()
346-
except UnicodeDecodeError as decode_error:
347-
raise RuntimeError(
348-
f'Failed to read file "{f}": Unsupported file format or encoding. '
349-
f'Dingo only supports UTF-8 text files (.jsonl, .json, .txt), Excel files (.xlsx, .xls) and .gz compressed text files. '
350-
f'Original error: {str(decode_error)}'
351-
)

0 commit comments

Comments
 (0)