-
Notifications
You must be signed in to change notification settings - Fork 46
Open
Description
Link to original pdf:
http://media.turuz.com/users/kesli-2017/Bolum_1_Files_0-499/0207-CHaghatay_lugatleri_uzre_notlar.pdf
Logged error:
INFO:docling.pipeline.base_pipeline:Processing document 8107320367458434831.pdf
WARNING:docling.pipeline.base_pipeline:Encountered an error during conversion of document 1fdabc0b786a08982baa89fa258d542e682449ab31140d917082fceb76a63f0f:
Traceback (most recent call last):
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/pipeline/base_pipeline.py", line 164, in _build_document
for p in pipeline_pages: # Must exhaust!
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/pipeline/base_pipeline.py", line 129, in _apply_on_pages
yield from page_batch
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/models/page_assemble_model.py", line 70, in __call__
for page in page_batch:
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/models/table_structure_model.py", line 177, in __call__
for page in page_batch:
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/models/layout_model.py", line 152, in __call__
pages = list(page_batch)
^^^^^^^^^^^^^^^^
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/models/easyocr_model.py", line 130, in __call__
yield from page_batch
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/models/page_preprocessing_model.py", line 48, in __call__
page = self._parse_page_cells(conv_res, page)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/models/page_preprocessing_model.py", line 72, in _parse_page_cells
page.parsed_page = page._backend.get_segmented_page()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/backend/docling_parse_v4_backend.py", line 96, in get_segmented_page
self._ensure_parsed()
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/backend/docling_parse_v4_backend.py", line 48, in _ensure_parsed
seg_page = self._dp_doc.get_page(
^^^^^^^^^^^^^^^^^^^^^^
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling_parse/pdf_parser.py", line 136, in get_page
doc_dict = self._parser.parse_pdf_from_key_on_page(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: font_name [NULL] is not known: /F0, /F1, /F2, /F24, /F28, /F29, /F3, /F30, /F31, /F5, /F52, /F53, /F54, /F55, /F78
Traceback (most recent call last):
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/convert-batch-s3.py", line 133, in <module>
for item in result_processor.process_documents(
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/local_utils/results_processor.py", line 63, in process_documents
for i, conv_res in enumerate(results):
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/document_converter.py", line 267, in convert_all
for conv_res in conv_res_iter:
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/document_converter.py", line 339, in _convert
for item in map(
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/document_converter.py", line 386, in _process_document
conv_res = self._execute_pipeline(in_doc, raises_on_error=raises_on_error)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/document_converter.py", line 409, in _execute_pipeline
conv_res = pipeline.execute(in_doc, raises_on_error=raises_on_error)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/pipeline/base_pipeline.py", line 57, in execute
raise e
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/pipeline/base_pipeline.py", line 49, in execute
conv_res = self._build_document(conv_res)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/pipeline/base_pipeline.py", line 204, in _build_document
raise e
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/pipeline/base_pipeline.py", line 164, in _build_document
for p in pipeline_pages: # Must exhaust!
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/pipeline/base_pipeline.py", line 129, in _apply_on_pages
yield from page_batch
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/models/page_assemble_model.py", line 70, in __call__
for page in page_batch:
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/models/table_structure_model.py", line 177, in __call__
for page in page_batch:
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/models/layout_model.py", line 152, in __call__
pages = list(page_batch)
^^^^^^^^^^^^^^^^
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/models/easyocr_model.py", line 130, in __call__
yield from page_batch
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/models/page_preprocessing_model.py", line 48, in __call__
page = self._parse_page_cells(conv_res, page)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/models/page_preprocessing_model.py", line 72, in _parse_page_cells
page.parsed_page = page._backend.get_segmented_page()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/backend/docling_parse_v4_backend.py", line 96, in get_segmented_page
self._ensure_parsed()
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling/backend/docling_parse_v4_backend.py", line 48, in _ensure_parsed
seg_page = self._dp_doc.get_page(
^^^^^^^^^^^^^^^^^^^^^^
File "/Users/vku/Documents/cloud/bluevela-stuff/docling-convert-s3-to-s3/.venv/lib/python3.11/site-packages/docling_parse/pdf_parser.py", line 136, in get_page
doc_dict = self._parser.parse_pdf_from_key_on_page(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: font_name [NULL] is not known: /F0, /F1, /F2, /F24, /F28, /F29, /F3, /F30, /F31, /F5, /F52, /F53, /F54, /F55, /F78
Metadata
Metadata
Assignees
Labels
No labels