docling-project · cau-git · Mar 16, 2026 · Mar 16, 2026 · Mar 16, 2026 · Mar 16, 2026
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -50,7 +50,7 @@ jobs:
             pre-commit|${{ env.PY }}|
 
       - name: Install Python Dependencies
-        run: uv sync --frozen --all-extras
+        run: uv sync --frozen --all-extras --no-extra nemotron-ocr
 
       - name: Check style
         run: |
@@ -92,7 +92,7 @@ jobs:
           python-version: ${{ matrix.python-version }}
 
       - name: Install Python Dependencies
-        run: uv sync --frozen --all-extras
+        run: uv sync --frozen --all-extras --no-extra nemotron-ocr
 
       - name: Cache Models
         uses: actions/cache@v5
@@ -159,7 +159,7 @@ jobs:
           python-version: ${{ matrix.python-version }}
 
       - name: Install Python Dependencies
-        run: uv sync --frozen --all-extras
+        run: uv sync --frozen --all-extras --no-extra nemotron-ocr
 
       - name: Cache Models
         uses: actions/cache@v5
@@ -231,7 +231,7 @@ jobs:
           python-version: ${{ matrix.python-version }}
 
       - name: Install Python Dependencies
-        run: uv sync --frozen --all-extras
+        run: uv sync --frozen --all-extras --no-extra nemotron-ocr
 
       - name: Cache Models
         uses: actions/cache@v5
@@ -410,7 +410,7 @@ jobs:
           enable-cache: true
 
       - name: Install dependencies
-        run: uv sync --all-extras
+        run: uv sync --all-extras --no-extra nemotron-ocr
 
       - name: Build package
         run: uv build

diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml
@@ -29,7 +29,7 @@ jobs:
           python-version: ${{ matrix.python-version }}
           enable-cache: true
       - name: Install dependencies
-        run: uv sync --all-extras
+        run: uv sync --all-extras --no-extra nemotron-ocr
       - name: Build package
         run: uv build
       - name: Publish distribution 📦 to PyPI

diff --git a/docling/cli/models.py b/docling/cli/models.py
@@ -42,6 +42,7 @@ class _AvailableModels(str, Enum):
     GRANITE_CHART_EXTRACTION = "granite_chart_extraction"
     RAPIDOCR = "rapidocr"
     EASYOCR = "easyocr"
+    NEMOTRON_OCR = "nemotron_ocr"
 
 
 _default_models = [
@@ -123,6 +124,7 @@ def download(
         in to_download,
         with_rapidocr=_AvailableModels.RAPIDOCR in to_download,
         with_easyocr=_AvailableModels.EASYOCR in to_download,
+        with_nemotron_ocr=_AvailableModels.NEMOTRON_OCR in to_download,
     )
 
     if quiet:

diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py
@@ -158,7 +158,8 @@ class OcrOptions(BaseOptions):
     See Also:
         `OcrAutoOptions`: Automatic engine selection based on availability.
         `EasyOcrOptions`, `TesseractCliOcrOptions`, `TesseractOcrOptions`,
-        `RapidOcrOptions`, `OcrMacOptions`: Engine-specific configurations.
+        `RapidOcrOptions`, `OcrMacOptions`, `NemotronOcrOptions`: Engine-specific
+        configurations.
     """
 
     lang: Annotated[
@@ -322,6 +323,40 @@ class RapidOcrOptions(OcrOptions):
     )
 
 
+class NemotronOcrOptions(OcrOptions):
+    """Configuration for NVIDIA Nemotron OCR.
+
+    Notes:
+        Nemotron OCR does not expose runtime language selection through its public
+        API. The `lang` field is kept only for compatibility with the shared OCR
+        options interface. Use the pipeline-level `artifacts_path` to point to
+        pre-downloaded checkpoint artifacts.
+    """
+
+    kind: ClassVar[Literal["nemotron-ocr"]] = "nemotron-ocr"
+    lang: Annotated[
+        list[str],
+        Field(
+            description=(
+                "Reserved for interface compatibility. Nemotron OCR does not expose "
+                "runtime language selection through its public API."
+            )
+        ),
+    ] = []
+    merge_level: Annotated[
+        Literal["word", "sentence", "paragraph"],
+        Field(
+            description=(
+                "Granularity requested from Nemotron OCR. `word` is the default "
+                "because it maps most directly to Docling OCR cells."
+            )
+        ),
+    ] = "sentence"
+    model_config = ConfigDict(
+        extra="forbid",
+    )
+
+
 class EasyOcrOptions(OcrOptions):
     """Configuration for EasyOCR engine."""
 

diff --git a/docling/models/plugins/defaults.py b/docling/models/plugins/defaults.py
@@ -1,6 +1,7 @@
 def ocr_engines():
     from docling.models.stages.ocr.auto_ocr_model import OcrAutoModel
     from docling.models.stages.ocr.easyocr_model import EasyOcrModel
+    from docling.models.stages.ocr.nemotron_ocr_model import NemotronOcrModel
     from docling.models.stages.ocr.ocr_mac_model import OcrMacModel
     from docling.models.stages.ocr.rapid_ocr_model import RapidOcrModel
     from docling.models.stages.ocr.tesseract_ocr_cli_model import TesseractOcrCliModel
@@ -10,6 +11,7 @@ def ocr_engines():
         "ocr_engines": [
             OcrAutoModel,
             EasyOcrModel,
+            NemotronOcrModel,
             OcrMacModel,
             RapidOcrModel,
             TesseractOcrModel,

diff --git a/docling/models/stages/ocr/nemotron_ocr_model.py b/docling/models/stages/ocr/nemotron_ocr_model.py
@@ -0,0 +1,253 @@
+import logging
+import platform
+import sys
+from collections.abc import Iterable, Sequence
+from pathlib import Path
+from typing import Any, Optional, Type, TypedDict, cast
+
+import numpy
+from docling_core.types.doc import BoundingBox, CoordOrigin
+from docling_core.types.doc.page import BoundingRectangle, TextCell
+
+from docling.datamodel.accelerator_options import AcceleratorOptions
+from docling.datamodel.base_models import Page
+from docling.datamodel.document import ConversionResult
+from docling.datamodel.pipeline_options import (
+    NemotronOcrOptions,
+    OcrOptions,
+)
+from docling.datamodel.settings import settings
+from docling.models.base_ocr_model import BaseOcrModel
+from docling.models.utils.hf_model_download import download_hf_model
+from docling.utils.accelerator_utils import decide_device
+from docling.utils.profiling import TimeRecorder
+
+_log = logging.getLogger(__name__)
+
+
+class NemotronOcrPrediction(TypedDict):
+    """Exact prediction schema returned by `nemotron_ocr`."""
+
+    text: str
+    confidence: float
+    left: float
+    upper: float
+    right: float
+    lower: float
+
+
+class NemotronOcrModel(BaseOcrModel):
+    _repo_id = "nvidia/nemotron-ocr-v1"
+
+    def __init__(
+        self,
+        enabled: bool,
+        artifacts_path: Optional[Path],
+        options: NemotronOcrOptions,
+        accelerator_options: AcceleratorOptions,
+    ):
+        super().__init__(
+            enabled=enabled,
+            artifacts_path=artifacts_path,
+            options=options,
+            accelerator_options=accelerator_options,
+        )
+        self.options: NemotronOcrOptions
+        self.scale = 3  # multiplier for 72 dpi == 216 dpi.
+
+        if self.enabled:
+            self._validate_runtime(accelerator_options=accelerator_options)
+
+            try:
+                from nemotron_ocr.inference.pipeline import NemotronOCR
+            except ImportError as exc:
+                raise ImportError(
+                    "Nemotron OCR is not installed. Install the optional dependency "
+                    'via `pip install "docling[nemotron-ocr]"` on Linux x86_64 with '
+                    "Python 3.12 and CUDA 13.x."
+                ) from exc
+
+            model_dir = self._resolve_model_dir(artifacts_path=artifacts_path)
+            self.reader = NemotronOCR(
+                model_dir=None if model_dir is None else str(model_dir)
+            )
+
+    @staticmethod
+    def _fail_runtime(message: str) -> None:
+        _log.warning(message)
+        raise RuntimeError(message)
+
+    @classmethod
+    def _validate_runtime(cls, accelerator_options: AcceleratorOptions) -> None:
+        if sys.platform != "linux":
+            cls._fail_runtime("Nemotron OCR is only supported on Linux.")
+
+        if platform.machine() != "x86_64":
+            cls._fail_runtime("Nemotron OCR is only supported on x86_64 machines.")
+
+        if sys.version_info[:2] != (3, 12):
+            cls._fail_runtime("Nemotron OCR requires Python 3.12.")
+
+        requested_device = decide_device(accelerator_options.device)
+        if not requested_device.startswith("cuda"):
+            cls._fail_runtime(
+                "Nemotron OCR requires a CUDA accelerator. Set "
+                "`pipeline_options.accelerator_options.device` to CUDA or AUTO on a "
+                "CUDA-enabled machine."
+            )
+
+        import torch
+
+        if not torch.cuda.is_available():
+            cls._fail_runtime(
+                "Nemotron OCR requires CUDA at initialization time, but "
+                "`torch.cuda.is_available()` is false."
+            )
+
+        cuda_version = torch.version.cuda
+        if cuda_version is None or not cuda_version.startswith("13."):
+            cls._fail_runtime(
+                "Nemotron OCR requires CUDA 13.x, but the current PyTorch runtime "
+                f"reports CUDA {cuda_version!r}."
+            )
+
+    @classmethod
+    def _resolve_model_dir(cls, artifacts_path: Optional[Path]) -> Optional[Path]:
+        if artifacts_path is None:
+            return None
+
+        repo_cache_folder = cls._repo_id.replace("/", "--")
+        if (artifacts_path / repo_cache_folder).exists():
+            return artifacts_path / repo_cache_folder / "checkpoints"
+
+        available_dirs = []
+        if artifacts_path.exists():
+            available_dirs = sorted(
+                path.name for path in artifacts_path.iterdir() if path.is_dir()
+            )
+
+        raise FileNotFoundError(
+            "Nemotron OCR artifacts not found in artifacts_path.\n"
+            f"Expected location: {artifacts_path / repo_cache_folder / 'checkpoints'}\n"
+            f"Available directories in {artifacts_path}: {available_dirs}\n"
+            "Use `docling-tools models download nemotron_ocr` to pre-download "
+            "the checkpoints or unset artifacts_path to allow the upstream "
+            "package to download them."
+        )
+
+    @staticmethod
+    def download_models(
+        local_dir: Optional[Path] = None,
+        force: bool = False,
+        progress: bool = False,
+    ) -> Path:
+        if local_dir is None:
+            local_dir = (
+                settings.cache_dir
+                / "models"
+                / NemotronOcrModel._repo_id.replace("/", "--")
+            )
+
+        local_dir.mkdir(parents=True, exist_ok=True)
+        return download_hf_model(
+            repo_id=NemotronOcrModel._repo_id,
+            local_dir=local_dir,
+            force=force,
+            progress=progress,
+        )
+
+    @staticmethod
+    def _prediction_to_cell(
+        prediction: NemotronOcrPrediction,
+        index: int,
+        ocr_rect: BoundingBox,
+        image_width: int,
+        image_height: int,
+        scale: int,
+    ) -> TextCell:
+        # `nemotron_ocr` returns normalized `left/right` and an inverted
+        # pair `lower/upper`, where `lower` is the top Y and `upper` is the
+        # bottom Y in image coordinates.
+        left = (prediction["left"] * image_width) / scale + ocr_rect.l
+        top = (prediction["lower"] * image_height) / scale + ocr_rect.t
+        right = (prediction["right"] * image_width) / scale + ocr_rect.l
+        bottom = (prediction["upper"] * image_height) / scale + ocr_rect.t
+        text = prediction["text"]
+
+        return TextCell(
+            index=index,
+            text=text,
+            orig=text,
+            from_ocr=True,
+            confidence=float(prediction["confidence"]),
+            rect=BoundingRectangle.from_bounding_box(
+                BoundingBox(
+                    l=left,
+                    t=top,
+                    r=right,
+                    b=bottom,
+                    coord_origin=CoordOrigin.TOPLEFT,
+                )
+            ),
+        )
+
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:
+        if not self.enabled:
+            yield from page_batch
+            return
+
+        for page in page_batch:
+            assert page._backend is not None
+            if not page._backend.is_valid():
+                yield page
+            else:
+                with TimeRecorder(conv_res, "ocr"):
+                    ocr_rects = self.get_ocr_rects(page)
+
+                    all_ocr_cells = []
+                    for ocr_rect in ocr_rects:
+                        if ocr_rect.area() == 0:
+                            continue
+
+                        high_res_image = page._backend.get_page_image(
+                            scale=self.scale, cropbox=ocr_rect
+                        )
+                        image_width, image_height = high_res_image.size
+                        image_array = numpy.array(high_res_image)
+                        raw_predictions = cast(
+                            Sequence[NemotronOcrPrediction],
+                            self.reader(
+                                image_array,
+                                merge_level=self.options.merge_level,
+                                visualize=False,
+                            ),
+                        )
+
+                        del high_res_image
+                        del image_array
+
+                        cells = [
+                            self._prediction_to_cell(
+                                prediction=prediction,
+                                index=index,
+                                ocr_rect=ocr_rect,
+                                image_width=image_width,
+                                image_height=image_height,
+                                scale=self.scale,
+                            )
+                            for index, prediction in enumerate(raw_predictions)
+                        ]
+                        all_ocr_cells.extend(cells)
+
+                    self.post_process_cells(all_ocr_cells, page)
+
+                if settings.debug.visualize_ocr:
+                    self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
+
+                yield page
+
+    @classmethod
+    def get_options_type(cls) -> Type[OcrOptions]:
+        return NemotronOcrOptions