Skip to content
Closed
1,033 changes: 915 additions & 118 deletions docs/docs/integrations/document_loaders/zeroxpdfloader.ipynb

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion libs/community/extended_testing_deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jq>=1.4.1,<2
jsonschema>1
keybert>=0.8.5
langchain_openai>=0.2.1
litellm>=1.30,<=1.39.5
litellm>=1.30
lxml>=4.9.3,<6.0
markdownify>=0.11.6,<0.12
motor>=3.3.1,<4
Expand All @@ -62,6 +62,7 @@ pandas>=2.0.1,<3
pdfminer-six==20231228
pdfplumber>=0.11
pgvector>=0.1.6,<0.2
pillow>=11.1
playwright>=1.48.0,<2
praw>=7.7.1,<8
premai>=0.3.25,<0.4,!=0.3.100
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@
PyPDFium2Loader,
PyPDFLoader,
UnstructuredPDFLoader,
ZeroxPDFLoader,
)
from langchain_community.document_loaders.pebblo import (
PebbloSafeLoader,
Expand Down Expand Up @@ -732,6 +733,7 @@
"YoutubeAudioLoader": "langchain_community.document_loaders.blob_loaders",
"YoutubeLoader": "langchain_community.document_loaders.youtube",
"YuqueLoader": "langchain_community.document_loaders.yuque",
"ZeroxPDFLoader": "langchain_community.document_loaders.pdf",
}


Expand Down Expand Up @@ -940,4 +942,5 @@ def __getattr__(name: str) -> Any:
"YoutubeAudioLoader",
"YoutubeLoader",
"YuqueLoader",
"ZeroxPDFLoader",
]
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
PyMuPDFParser,
PyPDFium2Parser,
PyPDFParser,
ZeroxPDFParser,
)
from langchain_community.document_loaders.parsers.vsdx import (
VsdxParser,
Expand All @@ -55,6 +56,7 @@
"RapidOCRBlobParser": "langchain_community.document_loaders.parsers.images",
"TesseractBlobParser": "langchain_community.document_loaders.parsers.images",
"VsdxParser": "langchain_community.document_loaders.parsers.vsdx",
"ZeroxPDFParser": "langchain_community.document_loaders.parsers.pdf",
}


Expand Down Expand Up @@ -82,4 +84,5 @@ def __getattr__(name: str) -> Any:
"RapidOCRBlobParser",
"TesseractBlobParser",
"VsdxParser",
"ZeroxPDFParser",
]
Loading
Loading