Skip to content

Commit a36d12a

Browse files
fix: add pi-heif as direct dependency and improve markdown file handling
Co-Authored-By: Aaron <AJ> Steers <[email protected]>
1 parent be3c07b commit a36d12a

File tree

3 files changed

+79
-9
lines changed

3 files changed

+79
-9
lines changed

airbyte_cdk/sources/file_based/file_types/unstructured_parser.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -218,9 +218,13 @@ def _read_file(
218218
self._get_file_type_error_message(filetype),
219219
)
220220
if filetype in {FileType.MD, FileType.TXT}:
221-
file_content: bytes = file_handle.read()
222-
decoded_content: str = optional_decode(file_content)
223-
return decoded_content
221+
try:
222+
file_content: bytes = file_handle.read()
223+
decoded_content: str = optional_decode(file_content)
224+
return decoded_content
225+
except Exception as e:
226+
logger.error(f"Error reading {filetype} file: {str(e)}")
227+
raise self._create_parse_error(remote_file, str(e))
224228
if format.processing.mode == "local":
225229
return self._read_file_locally(
226230
file_handle,

poetry.lock

Lines changed: 69 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ sqlalchemy = {version = "^2.0,!=2.0.36", optional = true }
8484
xmltodict = ">=0.13,<0.15"
8585
anyascii = "^0.3.2"
8686
whenever = "^0.6.16"
87+
pi-heif = "^0.22.0"
8788

8889
[tool.poetry.group.dev.dependencies]
8990
freezegun = "*"
@@ -106,6 +107,7 @@ types-python-dateutil = "^2.9.0.20241003"
106107
types-pyyaml = "^6.0.12.20240917"
107108
types-cachetools = "^5.5.0.20240820"
108109
deptry = "^0.23.0"
110+
pi-heif = "^0.22.0"
109111

110112
[tool.poetry.extras]
111113
file-based = ["avro", "fastavro", "pyarrow", "unstructured", "pdf2image", "pdfminer.six", "unstructured.pytesseract", "pytesseract", "markdown", "python-calamine", "python-snappy", "pi-heif"]
@@ -154,7 +156,7 @@ lint-fix = { cmd = "poetry run ruff check --fix .", help = "Auto-fix any lint is
154156
lint-fix-unsafe = { cmd = "poetry run ruff check --fix --unsafe-fixes .", help = "Lint-fix modified files, including 'unsafe' fixes. It is recommended to first commit any pending changes and then always manually review any unsafe changes applied." }
155157

156158
# ruff fix everything (ignoring non-Python fixes)
157-
ruff-fix = { sequence = ["lint-fix", "_format-fix-ruff"] , help = "Lint-fix and format-fix all code." }
159+
ruff-fix = { sequence = ["lint-fix", "_format-fix-ruff"], help = "Lint-fix and format-fix all code." }
158160

159161
# Combined Check and Fix tasks
160162

0 commit comments

Comments
 (0)