Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions docling/experimental/pipeline/threaded_layout_vlm_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@
from docling.experimental.datamodel.threaded_layout_vlm_pipeline_options import (
ThreadedLayoutVlmPipelineOptions,
)
from docling.models.api_vlm_model import ApiVlmModel
from docling.models.base_model import BaseVlmPageModel
from docling.models.layout_model import LayoutModel
from docling.models.vlm_models_inline.hf_transformers_model import (
from docling.models.stages.layout.layout_model import LayoutModel
from docling.models.vlm_pipeline_models.api_vlm_model import ApiVlmModel
from docling.models.vlm_pipeline_models.hf_transformers_model import (
HuggingFaceTransformersVlmModel,
)
from docling.models.vlm_models_inline.mlx_model import HuggingFaceMlxModel
from docling.models.vlm_pipeline_models.mlx_model import HuggingFaceMlxModel
from docling.pipeline.base_pipeline import BasePipeline
from docling.pipeline.standard_pdf_pipeline import (
ProcessingResult,
Expand Down Expand Up @@ -162,7 +162,7 @@ def build_prompt(
vlm_options=vlm_options,
)
elif vlm_options.inference_framework == InferenceFramework.VLLM:
from docling.models.vlm_models_inline.vllm_model import VllmVlmModel
from docling.models.vlm_pipeline_models.vllm_model import VllmVlmModel

self.vlm_model = VllmVlmModel(
enabled=True,
Expand Down
Empty file.
26 changes: 16 additions & 10 deletions docling/models/plugins/defaults.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
def ocr_engines():
from docling.models.auto_ocr_model import OcrAutoModel
from docling.models.easyocr_model import EasyOcrModel
from docling.models.ocr_mac_model import OcrMacModel
from docling.models.rapid_ocr_model import RapidOcrModel
from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
from docling.models.tesseract_ocr_model import TesseractOcrModel
from docling.models.stages.ocr.auto_ocr_model import OcrAutoModel
from docling.models.stages.ocr.easyocr_model import EasyOcrModel
from docling.models.stages.ocr.ocr_mac_model import OcrMacModel
from docling.models.stages.ocr.rapid_ocr_model import RapidOcrModel
from docling.models.stages.ocr.tesseract_ocr_cli_model import TesseractOcrCliModel
from docling.models.stages.ocr.tesseract_ocr_model import TesseractOcrModel

return {
"ocr_engines": [
Expand All @@ -19,8 +19,12 @@ def ocr_engines():


def picture_description():
from docling.models.picture_description_api_model import PictureDescriptionApiModel
from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
from docling.models.stages.picture_description.picture_description_api_model import (
PictureDescriptionApiModel,
)
from docling.models.stages.picture_description.picture_description_vlm_model import (
PictureDescriptionVlmModel,
)

return {
"picture_description": [
Expand All @@ -34,7 +38,7 @@ def layout_engines():
from docling.experimental.models.table_crops_layout_model import (
TableCropsLayoutModel,
)
from docling.models.layout_model import LayoutModel
from docling.models.stages.layout.layout_model import LayoutModel

return {
"layout_engines": [
Expand All @@ -45,7 +49,9 @@ def layout_engines():


def table_structure_engines():
from docling.models.table_structure_model import TableStructureModel
from docling.models.stages.table_structure.table_structure_model import (
TableStructureModel,
)

return {
"table_structure_engines": [
Expand Down
Empty file.
Empty file.
Empty file.
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
RapidOcrOptions,
)
from docling.models.base_ocr_model import BaseOcrModel
from docling.models.easyocr_model import EasyOcrModel
from docling.models.ocr_mac_model import OcrMacModel
from docling.models.rapid_ocr_model import RapidOcrModel
from docling.models.stages.ocr.easyocr_model import EasyOcrModel
from docling.models.stages.ocr.ocr_mac_model import OcrMacModel
from docling.models.stages.ocr.rapid_ocr_model import RapidOcrModel

_log = logging.getLogger(__name__)

Expand Down
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
)
from docling.datamodel.document import ConversionResult
from docling.models.base_model import BasePageModel
from docling.models.layout_model import LayoutModel
from docling.models.stages.layout.layout_model import LayoutModel
from docling.utils.profiling import TimeRecorder

_log = logging.getLogger(__name__)
Expand Down
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
6 changes: 3 additions & 3 deletions docling/pipeline/base_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@
)
from docling.datamodel.settings import settings
from docling.models.base_model import GenericEnrichmentModel
from docling.models.document_picture_classifier import (
from docling.models.factories import get_picture_description_factory
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
from docling.models.stages.picture_classifier.document_picture_classifier import (
DocumentPictureClassifier,
DocumentPictureClassifierOptions,
)
from docling.models.factories import get_picture_description_factory
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
from docling.utils.profiling import ProfilingScope, TimeRecorder
from docling.utils.utils import chunkify

Expand Down
2 changes: 1 addition & 1 deletion docling/pipeline/extraction_vlm_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
VlmExtractionPipelineOptions,
)
from docling.datamodel.settings import settings
from docling.models.vlm_models_inline.nuextract_transformers_model import (
from docling.models.extraction.nuextract_transformers_model import (
NuExtractTransformersModel,
)
from docling.pipeline.base_extraction_pipeline import BaseExtractionPipeline
Expand Down
17 changes: 13 additions & 4 deletions docling/pipeline/legacy_standard_pdf_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,27 @@
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.datamodel.settings import settings
from docling.models.base_ocr_model import BaseOcrModel
from docling.models.code_formula_model import CodeFormulaModel, CodeFormulaModelOptions
from docling.models.factories import (
get_layout_factory,
get_ocr_factory,
get_table_structure_factory,
)
from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions
from docling.models.page_preprocessing_model import (
from docling.models.stages.code_formula.code_formula_model import (
CodeFormulaModel,
CodeFormulaModelOptions,
)
from docling.models.stages.page_assemble.page_assemble_model import (
PageAssembleModel,
PageAssembleOptions,
)
from docling.models.stages.page_preprocessing.page_preprocessing_model import (
PagePreprocessingModel,
PagePreprocessingOptions,
)
from docling.models.readingorder_model import ReadingOrderModel, ReadingOrderOptions
from docling.models.stages.reading_order.readingorder_model import (
ReadingOrderModel,
ReadingOrderOptions,
)
from docling.pipeline.base_pipeline import PaginatedPipeline
from docling.utils.model_downloader import download_models
from docling.utils.profiling import ProfilingScope, TimeRecorder
Expand Down
17 changes: 13 additions & 4 deletions docling/pipeline/standard_pdf_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,27 @@
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import ThreadedPdfPipelineOptions
from docling.datamodel.settings import settings
from docling.models.code_formula_model import CodeFormulaModel, CodeFormulaModelOptions
from docling.models.factories import (
get_layout_factory,
get_ocr_factory,
get_table_structure_factory,
)
from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions
from docling.models.page_preprocessing_model import (
from docling.models.stages.code_formula.code_formula_model import (
CodeFormulaModel,
CodeFormulaModelOptions,
)
from docling.models.stages.page_assemble.page_assemble_model import (
PageAssembleModel,
PageAssembleOptions,
)
from docling.models.stages.page_preprocessing.page_preprocessing_model import (
PagePreprocessingModel,
PagePreprocessingOptions,
)
from docling.models.readingorder_model import ReadingOrderModel, ReadingOrderOptions
from docling.models.stages.reading_order.readingorder_model import (
ReadingOrderModel,
ReadingOrderOptions,
)
from docling.pipeline.base_pipeline import ConvertPipeline
from docling.utils.profiling import ProfilingScope, TimeRecorder
from docling.utils.utils import chunkify
Expand Down
8 changes: 4 additions & 4 deletions docling/pipeline/vlm_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@
ResponseFormat,
)
from docling.datamodel.settings import settings
from docling.models.api_vlm_model import ApiVlmModel
from docling.models.vlm_models_inline.hf_transformers_model import (
from docling.models.vlm_pipeline_models.api_vlm_model import ApiVlmModel
from docling.models.vlm_pipeline_models.hf_transformers_model import (
HuggingFaceTransformersVlmModel,
)
from docling.models.vlm_models_inline.mlx_model import HuggingFaceMlxModel
from docling.models.vlm_pipeline_models.mlx_model import HuggingFaceMlxModel
from docling.pipeline.base_pipeline import PaginatedPipeline
from docling.utils.profiling import ProfilingScope, TimeRecorder

Expand Down Expand Up @@ -93,7 +93,7 @@ def __init__(self, pipeline_options: VlmPipelineOptions):
),
]
elif vlm_options.inference_framework == InferenceFramework.VLLM:
from docling.models.vlm_models_inline.vllm_model import VllmVlmModel
from docling.models.vlm_pipeline_models.vllm_model import VllmVlmModel

self.build_pipe = [
VllmVlmModel(
Expand Down
20 changes: 13 additions & 7 deletions docling/utils/model_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,19 @@
SMOLDOCLING_MLX,
SMOLDOCLING_TRANSFORMERS,
)
from docling.models.code_formula_model import CodeFormulaModel
from docling.models.document_picture_classifier import DocumentPictureClassifier
from docling.models.easyocr_model import EasyOcrModel
from docling.models.layout_model import LayoutModel
from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
from docling.models.rapid_ocr_model import RapidOcrModel
from docling.models.table_structure_model import TableStructureModel
from docling.models.stages.code_formula.code_formula_model import CodeFormulaModel
from docling.models.stages.layout.layout_model import LayoutModel
from docling.models.stages.ocr.easyocr_model import EasyOcrModel
from docling.models.stages.ocr.rapid_ocr_model import RapidOcrModel
from docling.models.stages.picture_classifier.document_picture_classifier import (
DocumentPictureClassifier,
)
from docling.models.stages.picture_description.picture_description_vlm_model import (
PictureDescriptionVlmModel,
)
from docling.models.stages.table_structure.table_structure_model import (
TableStructureModel,
)
from docling.models.utils.hf_model_download import download_hf_model

_log = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/enrich_doclingdocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from docling.datamodel.base_models import InputFormat, ItemAndImageEnrichmentElement
from docling.datamodel.document import InputDocument
from docling.models.base_model import BaseItemAndImageEnrichmentModel
from docling.models.document_picture_classifier import (
from docling.models.stages.picture_classifier.document_picture_classifier import (
DocumentPictureClassifier,
DocumentPictureClassifierOptions,
)
Expand Down