Skip to content

Commit 8199546

Browse files
fix: update partition_pdf import path and add pi-heif dependency
Co-Authored-By: Aaron <AJ> Steers <[email protected]>
1 parent 91f34da commit 8199546

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

airbyte_cdk/sources/file_based/file_types/unstructured_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def _import_unstructured() -> None:
8080
global unstructured_partition_docx
8181
global unstructured_partition_pptx
8282
from unstructured.partition.docx import partition_docx
83-
from unstructured.partition.pdf import partition_pdf
83+
from unstructured.partition.pdf_image.pdf import partition_pdf
8484
from unstructured.partition.pptx import partition_pptx
8585

8686
# separate global variables to properly propagate typing

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ types-cachetools = "^5.5.0.20240820"
108108
deptry = "^0.23.0"
109109

110110
[tool.poetry.extras]
111-
file-based = ["avro", "fastavro", "pyarrow", "unstructured", "pdf2image", "pdfminer.six", "unstructured.pytesseract", "pytesseract", "markdown", "python-calamine", "python-snappy"]
111+
file-based = ["avro", "fastavro", "pyarrow", "unstructured", "pdf2image", "pdfminer.six", "unstructured.pytesseract", "pytesseract", "markdown", "python-calamine", "python-snappy", "pi-heif"]
112112
vector-db-based = ["langchain", "openai", "cohere", "tiktoken"]
113113
sql = ["sqlalchemy"]
114114

0 commit comments

Comments
 (0)