diff --git a/docling/experimental/pipeline/threaded_layout_vlm_pipeline.py b/docling/experimental/pipeline/threaded_layout_vlm_pipeline.py index db73db8dbb..ecaf8fd5eb 100644 --- a/docling/experimental/pipeline/threaded_layout_vlm_pipeline.py +++ b/docling/experimental/pipeline/threaded_layout_vlm_pipeline.py @@ -32,13 +32,13 @@ from docling.experimental.datamodel.threaded_layout_vlm_pipeline_options import ( ThreadedLayoutVlmPipelineOptions, ) -from docling.models.api_vlm_model import ApiVlmModel from docling.models.base_model import BaseVlmPageModel -from docling.models.layout_model import LayoutModel -from docling.models.vlm_models_inline.hf_transformers_model import ( +from docling.models.stages.layout.layout_model import LayoutModel +from docling.models.vlm_pipeline_models.api_vlm_model import ApiVlmModel +from docling.models.vlm_pipeline_models.hf_transformers_model import ( HuggingFaceTransformersVlmModel, ) -from docling.models.vlm_models_inline.mlx_model import HuggingFaceMlxModel +from docling.models.vlm_pipeline_models.mlx_model import HuggingFaceMlxModel from docling.pipeline.base_pipeline import BasePipeline from docling.pipeline.standard_pdf_pipeline import ( ProcessingResult, @@ -162,7 +162,7 @@ def build_prompt( vlm_options=vlm_options, ) elif vlm_options.inference_framework == InferenceFramework.VLLM: - from docling.models.vlm_models_inline.vllm_model import VllmVlmModel + from docling.models.vlm_pipeline_models.vllm_model import VllmVlmModel self.vlm_model = VllmVlmModel( enabled=True, diff --git a/docling/models/extraction/__init__.py b/docling/models/extraction/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docling/models/vlm_models_inline/nuextract_transformers_model.py b/docling/models/extraction/nuextract_transformers_model.py similarity index 100% rename from docling/models/vlm_models_inline/nuextract_transformers_model.py rename to docling/models/extraction/nuextract_transformers_model.py diff --git a/docling/models/plugins/defaults.py b/docling/models/plugins/defaults.py index 11c8d7b323..f390cf5536 100644 --- a/docling/models/plugins/defaults.py +++ b/docling/models/plugins/defaults.py @@ -1,10 +1,10 @@ def ocr_engines(): - from docling.models.auto_ocr_model import OcrAutoModel - from docling.models.easyocr_model import EasyOcrModel - from docling.models.ocr_mac_model import OcrMacModel - from docling.models.rapid_ocr_model import RapidOcrModel - from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel - from docling.models.tesseract_ocr_model import TesseractOcrModel + from docling.models.stages.ocr.auto_ocr_model import OcrAutoModel + from docling.models.stages.ocr.easyocr_model import EasyOcrModel + from docling.models.stages.ocr.ocr_mac_model import OcrMacModel + from docling.models.stages.ocr.rapid_ocr_model import RapidOcrModel + from docling.models.stages.ocr.tesseract_ocr_cli_model import TesseractOcrCliModel + from docling.models.stages.ocr.tesseract_ocr_model import TesseractOcrModel return { "ocr_engines": [ @@ -19,8 +19,12 @@ def ocr_engines(): def picture_description(): - from docling.models.picture_description_api_model import PictureDescriptionApiModel - from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel + from docling.models.stages.picture_description.picture_description_api_model import ( + PictureDescriptionApiModel, + ) + from docling.models.stages.picture_description.picture_description_vlm_model import ( + PictureDescriptionVlmModel, + ) return { "picture_description": [ @@ -34,7 +38,7 @@ def layout_engines(): from docling.experimental.models.table_crops_layout_model import ( TableCropsLayoutModel, ) - from docling.models.layout_model import LayoutModel + from docling.models.stages.layout.layout_model import LayoutModel return { "layout_engines": [ @@ -45,7 +49,9 @@ def layout_engines(): def table_structure_engines(): - from docling.models.table_structure_model import TableStructureModel + from docling.models.stages.table_structure.table_structure_model import ( + TableStructureModel, + ) return { "table_structure_engines": [ diff --git a/docling/models/stages/__init__.py b/docling/models/stages/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docling/models/stages/code_formula/__init__.py b/docling/models/stages/code_formula/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docling/models/code_formula_model.py b/docling/models/stages/code_formula/code_formula_model.py similarity index 100% rename from docling/models/code_formula_model.py rename to docling/models/stages/code_formula/code_formula_model.py diff --git a/docling/models/stages/layout/__init__.py b/docling/models/stages/layout/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docling/models/layout_model.py b/docling/models/stages/layout/layout_model.py similarity index 100% rename from docling/models/layout_model.py rename to docling/models/stages/layout/layout_model.py diff --git a/docling/models/stages/ocr/__init__.py b/docling/models/stages/ocr/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docling/models/auto_ocr_model.py b/docling/models/stages/ocr/auto_ocr_model.py similarity index 96% rename from docling/models/auto_ocr_model.py rename to docling/models/stages/ocr/auto_ocr_model.py index 3723df0b03..ef2dcbb151 100644 --- a/docling/models/auto_ocr_model.py +++ b/docling/models/stages/ocr/auto_ocr_model.py @@ -15,9 +15,9 @@ RapidOcrOptions, ) from docling.models.base_ocr_model import BaseOcrModel -from docling.models.easyocr_model import EasyOcrModel -from docling.models.ocr_mac_model import OcrMacModel -from docling.models.rapid_ocr_model import RapidOcrModel +from docling.models.stages.ocr.easyocr_model import EasyOcrModel +from docling.models.stages.ocr.ocr_mac_model import OcrMacModel +from docling.models.stages.ocr.rapid_ocr_model import RapidOcrModel _log = logging.getLogger(__name__) diff --git a/docling/models/easyocr_model.py b/docling/models/stages/ocr/easyocr_model.py similarity index 100% rename from docling/models/easyocr_model.py rename to docling/models/stages/ocr/easyocr_model.py diff --git a/docling/models/ocr_mac_model.py b/docling/models/stages/ocr/ocr_mac_model.py similarity index 100% rename from docling/models/ocr_mac_model.py rename to docling/models/stages/ocr/ocr_mac_model.py diff --git a/docling/models/rapid_ocr_model.py b/docling/models/stages/ocr/rapid_ocr_model.py similarity index 100% rename from docling/models/rapid_ocr_model.py rename to docling/models/stages/ocr/rapid_ocr_model.py diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/stages/ocr/tesseract_ocr_cli_model.py similarity index 100% rename from docling/models/tesseract_ocr_cli_model.py rename to docling/models/stages/ocr/tesseract_ocr_cli_model.py diff --git a/docling/models/tesseract_ocr_model.py b/docling/models/stages/ocr/tesseract_ocr_model.py similarity index 100% rename from docling/models/tesseract_ocr_model.py rename to docling/models/stages/ocr/tesseract_ocr_model.py diff --git a/docling/models/stages/page_assemble/__init__.py b/docling/models/stages/page_assemble/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docling/models/page_assemble_model.py b/docling/models/stages/page_assemble/page_assemble_model.py similarity index 98% rename from docling/models/page_assemble_model.py rename to docling/models/stages/page_assemble/page_assemble_model.py index bc5589eb71..6534101a33 100644 --- a/docling/models/page_assemble_model.py +++ b/docling/models/stages/page_assemble/page_assemble_model.py @@ -17,7 +17,7 @@ ) from docling.datamodel.document import ConversionResult from docling.models.base_model import BasePageModel -from docling.models.layout_model import LayoutModel +from docling.models.stages.layout.layout_model import LayoutModel from docling.utils.profiling import TimeRecorder _log = logging.getLogger(__name__) diff --git a/docling/models/stages/page_preprocessing/__init__.py b/docling/models/stages/page_preprocessing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docling/models/page_preprocessing_model.py b/docling/models/stages/page_preprocessing/page_preprocessing_model.py similarity index 100% rename from docling/models/page_preprocessing_model.py rename to docling/models/stages/page_preprocessing/page_preprocessing_model.py diff --git a/docling/models/stages/picture_classifier/__init__.py b/docling/models/stages/picture_classifier/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docling/models/document_picture_classifier.py b/docling/models/stages/picture_classifier/document_picture_classifier.py similarity index 100% rename from docling/models/document_picture_classifier.py rename to docling/models/stages/picture_classifier/document_picture_classifier.py diff --git a/docling/models/stages/picture_description/__init__.py b/docling/models/stages/picture_description/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docling/models/picture_description_api_model.py b/docling/models/stages/picture_description/picture_description_api_model.py similarity index 100% rename from docling/models/picture_description_api_model.py rename to docling/models/stages/picture_description/picture_description_api_model.py diff --git a/docling/models/picture_description_vlm_model.py b/docling/models/stages/picture_description/picture_description_vlm_model.py similarity index 100% rename from docling/models/picture_description_vlm_model.py rename to docling/models/stages/picture_description/picture_description_vlm_model.py diff --git a/docling/models/stages/reading_order/__init__.py b/docling/models/stages/reading_order/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docling/models/readingorder_model.py b/docling/models/stages/reading_order/readingorder_model.py similarity index 100% rename from docling/models/readingorder_model.py rename to docling/models/stages/reading_order/readingorder_model.py diff --git a/docling/models/stages/table_structure/__init__.py b/docling/models/stages/table_structure/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docling/models/table_structure_model.py b/docling/models/stages/table_structure/table_structure_model.py similarity index 100% rename from docling/models/table_structure_model.py rename to docling/models/stages/table_structure/table_structure_model.py diff --git a/docling/models/vlm_models_inline/__init__.py b/docling/models/vlm_pipeline_models/__init__.py similarity index 100% rename from docling/models/vlm_models_inline/__init__.py rename to docling/models/vlm_pipeline_models/__init__.py diff --git a/docling/models/api_vlm_model.py b/docling/models/vlm_pipeline_models/api_vlm_model.py similarity index 100% rename from docling/models/api_vlm_model.py rename to docling/models/vlm_pipeline_models/api_vlm_model.py diff --git a/docling/models/vlm_models_inline/hf_transformers_model.py b/docling/models/vlm_pipeline_models/hf_transformers_model.py similarity index 100% rename from docling/models/vlm_models_inline/hf_transformers_model.py rename to docling/models/vlm_pipeline_models/hf_transformers_model.py diff --git a/docling/models/vlm_models_inline/mlx_model.py b/docling/models/vlm_pipeline_models/mlx_model.py similarity index 100% rename from docling/models/vlm_models_inline/mlx_model.py rename to docling/models/vlm_pipeline_models/mlx_model.py diff --git a/docling/models/vlm_models_inline/vllm_model.py b/docling/models/vlm_pipeline_models/vllm_model.py similarity index 100% rename from docling/models/vlm_models_inline/vllm_model.py rename to docling/models/vlm_pipeline_models/vllm_model.py diff --git a/docling/pipeline/base_pipeline.py b/docling/pipeline/base_pipeline.py index 1ee9ab89c6..5fc1b64ffe 100644 --- a/docling/pipeline/base_pipeline.py +++ b/docling/pipeline/base_pipeline.py @@ -28,12 +28,12 @@ ) from docling.datamodel.settings import settings from docling.models.base_model import GenericEnrichmentModel -from docling.models.document_picture_classifier import ( +from docling.models.factories import get_picture_description_factory +from docling.models.picture_description_base_model import PictureDescriptionBaseModel +from docling.models.stages.picture_classifier.document_picture_classifier import ( DocumentPictureClassifier, DocumentPictureClassifierOptions, ) -from docling.models.factories import get_picture_description_factory -from docling.models.picture_description_base_model import PictureDescriptionBaseModel from docling.utils.profiling import ProfilingScope, TimeRecorder from docling.utils.utils import chunkify diff --git a/docling/pipeline/extraction_vlm_pipeline.py b/docling/pipeline/extraction_vlm_pipeline.py index b995b013fd..ba652ae957 100644 --- a/docling/pipeline/extraction_vlm_pipeline.py +++ b/docling/pipeline/extraction_vlm_pipeline.py @@ -20,7 +20,7 @@ VlmExtractionPipelineOptions, ) from docling.datamodel.settings import settings -from docling.models.vlm_models_inline.nuextract_transformers_model import ( +from docling.models.extraction.nuextract_transformers_model import ( NuExtractTransformersModel, ) from docling.pipeline.base_extraction_pipeline import BaseExtractionPipeline diff --git a/docling/pipeline/legacy_standard_pdf_pipeline.py b/docling/pipeline/legacy_standard_pdf_pipeline.py index 55c2703cd9..548f38128e 100644 --- a/docling/pipeline/legacy_standard_pdf_pipeline.py +++ b/docling/pipeline/legacy_standard_pdf_pipeline.py @@ -14,18 +14,27 @@ from docling.datamodel.pipeline_options import PdfPipelineOptions from docling.datamodel.settings import settings from docling.models.base_ocr_model import BaseOcrModel -from docling.models.code_formula_model import CodeFormulaModel, CodeFormulaModelOptions from docling.models.factories import ( get_layout_factory, get_ocr_factory, get_table_structure_factory, ) -from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions -from docling.models.page_preprocessing_model import ( +from docling.models.stages.code_formula.code_formula_model import ( + CodeFormulaModel, + CodeFormulaModelOptions, +) +from docling.models.stages.page_assemble.page_assemble_model import ( + PageAssembleModel, + PageAssembleOptions, +) +from docling.models.stages.page_preprocessing.page_preprocessing_model import ( PagePreprocessingModel, PagePreprocessingOptions, ) -from docling.models.readingorder_model import ReadingOrderModel, ReadingOrderOptions +from docling.models.stages.reading_order.readingorder_model import ( + ReadingOrderModel, + ReadingOrderOptions, +) from docling.pipeline.base_pipeline import PaginatedPipeline from docling.utils.model_downloader import download_models from docling.utils.profiling import ProfilingScope, TimeRecorder diff --git a/docling/pipeline/standard_pdf_pipeline.py b/docling/pipeline/standard_pdf_pipeline.py index 5845625039..ffaa38ed45 100644 --- a/docling/pipeline/standard_pdf_pipeline.py +++ b/docling/pipeline/standard_pdf_pipeline.py @@ -40,18 +40,27 @@ from docling.datamodel.document import ConversionResult from docling.datamodel.pipeline_options import ThreadedPdfPipelineOptions from docling.datamodel.settings import settings -from docling.models.code_formula_model import CodeFormulaModel, CodeFormulaModelOptions from docling.models.factories import ( get_layout_factory, get_ocr_factory, get_table_structure_factory, ) -from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions -from docling.models.page_preprocessing_model import ( +from docling.models.stages.code_formula.code_formula_model import ( + CodeFormulaModel, + CodeFormulaModelOptions, +) +from docling.models.stages.page_assemble.page_assemble_model import ( + PageAssembleModel, + PageAssembleOptions, +) +from docling.models.stages.page_preprocessing.page_preprocessing_model import ( PagePreprocessingModel, PagePreprocessingOptions, ) -from docling.models.readingorder_model import ReadingOrderModel, ReadingOrderOptions +from docling.models.stages.reading_order.readingorder_model import ( + ReadingOrderModel, + ReadingOrderOptions, +) from docling.pipeline.base_pipeline import ConvertPipeline from docling.utils.profiling import ProfilingScope, TimeRecorder from docling.utils.utils import chunkify diff --git a/docling/pipeline/vlm_pipeline.py b/docling/pipeline/vlm_pipeline.py index ab919c4d9f..89ce3cc4ec 100644 --- a/docling/pipeline/vlm_pipeline.py +++ b/docling/pipeline/vlm_pipeline.py @@ -37,11 +37,11 @@ ResponseFormat, ) from docling.datamodel.settings import settings -from docling.models.api_vlm_model import ApiVlmModel -from docling.models.vlm_models_inline.hf_transformers_model import ( +from docling.models.vlm_pipeline_models.api_vlm_model import ApiVlmModel +from docling.models.vlm_pipeline_models.hf_transformers_model import ( HuggingFaceTransformersVlmModel, ) -from docling.models.vlm_models_inline.mlx_model import HuggingFaceMlxModel +from docling.models.vlm_pipeline_models.mlx_model import HuggingFaceMlxModel from docling.pipeline.base_pipeline import PaginatedPipeline from docling.utils.profiling import ProfilingScope, TimeRecorder @@ -93,7 +93,7 @@ def __init__(self, pipeline_options: VlmPipelineOptions): ), ] elif vlm_options.inference_framework == InferenceFramework.VLLM: - from docling.models.vlm_models_inline.vllm_model import VllmVlmModel + from docling.models.vlm_pipeline_models.vllm_model import VllmVlmModel self.build_pipe = [ VllmVlmModel( diff --git a/docling/utils/model_downloader.py b/docling/utils/model_downloader.py index cbcf68789d..1b6c72c666 100644 --- a/docling/utils/model_downloader.py +++ b/docling/utils/model_downloader.py @@ -15,13 +15,19 @@ SMOLDOCLING_MLX, SMOLDOCLING_TRANSFORMERS, ) -from docling.models.code_formula_model import CodeFormulaModel -from docling.models.document_picture_classifier import DocumentPictureClassifier -from docling.models.easyocr_model import EasyOcrModel -from docling.models.layout_model import LayoutModel -from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel -from docling.models.rapid_ocr_model import RapidOcrModel -from docling.models.table_structure_model import TableStructureModel +from docling.models.stages.code_formula.code_formula_model import CodeFormulaModel +from docling.models.stages.layout.layout_model import LayoutModel +from docling.models.stages.ocr.easyocr_model import EasyOcrModel +from docling.models.stages.ocr.rapid_ocr_model import RapidOcrModel +from docling.models.stages.picture_classifier.document_picture_classifier import ( + DocumentPictureClassifier, +) +from docling.models.stages.picture_description.picture_description_vlm_model import ( + PictureDescriptionVlmModel, +) +from docling.models.stages.table_structure.table_structure_model import ( + TableStructureModel, +) from docling.models.utils.hf_model_download import download_hf_model _log = logging.getLogger(__name__) diff --git a/docs/examples/enrich_doclingdocument.py b/docs/examples/enrich_doclingdocument.py index d227b4f1b4..ad08147904 100644 --- a/docs/examples/enrich_doclingdocument.py +++ b/docs/examples/enrich_doclingdocument.py @@ -35,7 +35,7 @@ from docling.datamodel.base_models import InputFormat, ItemAndImageEnrichmentElement from docling.datamodel.document import InputDocument from docling.models.base_model import BaseItemAndImageEnrichmentModel -from docling.models.document_picture_classifier import ( +from docling.models.stages.picture_classifier.document_picture_classifier import ( DocumentPictureClassifier, DocumentPictureClassifierOptions, )