feat: add rocm image build support and fix cuda (#292)

rmdg88 · Rui-Dias-Gomes · web-flow · commit fd1b987e8dc1 · 2025-07-31T14:22:42.000+02:00
Signed-off-by: rmdg88 &lt;rmdg88@gmail.com&gt;
Signed-off-by: Rui-Dias-Gomes &lt;rui.dias.gomes@ibm.com&gt;
Co-authored-by: Rui-Dias-Gomes &lt;rui.dias.gomes@ibm.com&gt;
diff --git a/.github/styles/config/vocabularies/Docling/accept.txt b/.github/styles/config/vocabularies/Docling/accept.txt
@@ -19,6 +19,7 @@ Kubeflow
 (?i)PyTorch
 (?i)CUDA
 (?i)NVIDIA
+(?i)ROCm
 (?i)env
 Gradio
 bool
diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
@@ -15,7 +15,7 @@ jobs:
         with:
           fetch-depth: 0  # for fetching tags, required for semantic-release
       - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
         with:
           enable-cache: true
       - name: Install dependencies
@@ -45,7 +45,7 @@ jobs:
           token: ${{ steps.app-token.outputs.token }}
           fetch-depth: 0  # for fetching tags, required for semantic-release
       - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
         with:
           enable-cache: true
       - name: Install dependencies
diff --git a/.github/workflows/ci-images-dryrun.yml b/.github/workflows/ci-images-dryrun.yml
@@ -21,10 +21,10 @@ jobs:
             build_args: |
               UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
             platforms: linux/amd64, linux/arm64
-          - name: docling-project/docling-serve-cu124
-            build_args: |
-              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
-            platforms: linux/amd64
+          # - name: docling-project/docling-serve-cu124
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
+          #   platforms: linux/amd64
           - name: docling-project/docling-serve-cu126
             build_args: |
               UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
@@ -33,6 +33,10 @@ jobs:
             build_args: |
               UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
             platforms: linux/amd64
+          # - name: docling-project/docling-serve-rocm
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
+          #   platforms: linux/amd64
 
     permissions:
       packages: write
diff --git a/.github/workflows/images.yml b/.github/workflows/images.yml
@@ -25,10 +25,10 @@ jobs:
             build_args: |
               UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
             platforms: linux/amd64, linux/arm64
-          - name: docling-project/docling-serve-cu124
-            build_args: |
-              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
-            platforms: linux/amd64
+          # - name: docling-project/docling-serve-cu124
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
+          #   platforms: linux/amd64
           - name: docling-project/docling-serve-cu126
             build_args: |
               UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
@@ -37,7 +37,10 @@ jobs:
             build_args: |
               UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
             platforms: linux/amd64
-
+          # - name: docling-project/docling-serve-rocm
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
+          #   platforms: linux/amd64
     permissions:
       packages: write
       contents: read
diff --git a/.github/workflows/job-build.yml b/.github/workflows/job-build.yml
@@ -12,7 +12,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
         with:
           python-version: ${{ matrix.python-version }}
           enable-cache: true
diff --git a/.github/workflows/job-checks.yml b/.github/workflows/job-checks.yml
@@ -12,7 +12,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
         with:
           python-version: ${{ matrix.python-version }}
           enable-cache: true
@@ -28,7 +28,7 @@ jobs:
         run: uv sync --frozen --all-extras --no-extra flash-attn
 
       - name: Run styling check
-        run: pre-commit run --all-files
+        run: uv run pre-commit run --all-files
 
   build-package:
     uses: ./.github/workflows/job-build.yml
@@ -47,14 +47,16 @@ jobs:
           name: python-package-distributions
           path: dist/
       - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
         with:
           python-version: ${{ matrix.python-version }}
           enable-cache: true
+      - name: Create virtual environment
+        run: uv venv
       - name: Install package
         run: uv pip install dist/*.whl
       - name: Create the server
-        run: python -c 'from docling_serve.app import create_app; create_app()'
+        run: .venv/bin/python -c 'from docling_serve.app import create_app; create_app()'
 
   markdown-lint:
     runs-on: ubuntu-latest
@@ -64,4 +66,3 @@ jobs:
         uses: DavidAnson/markdownlint-cli2-action@v16
         with:
           globs: "**/*.md"
-
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -33,7 +33,7 @@ repos:
         args: ["--config=.github/vale.ini"]
         files: \.md$
   - repo: https://github.com/astral-sh/uv-pre-commit
-    # uv version.
-    rev: 0.7.13
+    # uv version, https://github.com/astral-sh/uv-pre-commit/releases
+    rev: 0.8.3
     hooks:
       - id: uv-lock
diff --git a/Containerfile b/Containerfile
@@ -1,13 +1,17 @@
 ARG BASE_IMAGE=quay.io/sclorg/python-312-c9s:c9s
 
-FROM ${BASE_IMAGE}
+ARG UV_VERSION=0.8.3
 
-USER 0
+ARG UV_SYNC_EXTRA_ARGS=""
+
+FROM ${BASE_IMAGE} AS docling-base
 
 ###################################################################################################
 # OS Layer                                                                                        #
 ###################################################################################################
 
+USER 0
+
 RUN --mount=type=bind,source=os-packages.txt,target=/tmp/os-packages.txt \
     dnf -y install --best --nodocs --setopt=install_weak_deps=False dnf-plugins-core && \
     dnf config-manager --best --nodocs --setopt=install_weak_deps=False --save && \
@@ -21,16 +25,19 @@ RUN /usr/bin/fix-permissions /opt/app-root/src/.cache
 
 ENV TESSDATA_PREFIX=/usr/share/tesseract/tessdata/
 
+FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv_stage
+
 ###################################################################################################
 # Docling layer                                                                                   #
 ###################################################################################################
 
+FROM docling-base
+
 USER 1001
 
 WORKDIR /opt/app-root/src
 
 ENV \
-    # On container environments, always set a thread budget to avoid undesired thread congestion.
     OMP_NUM_THREADS=4 \
     LANG=en_US.UTF-8 \
     LC_ALL=en_US.UTF-8 \
@@ -40,9 +47,9 @@ ENV \
     UV_PROJECT_ENVIRONMENT=/opt/app-root \
     DOCLING_SERVE_ARTIFACTS_PATH=/opt/app-root/src/.cache/docling/models
 
-ARG UV_SYNC_EXTRA_ARGS=""
+ARG UV_SYNC_EXTRA_ARGS
 
-RUN --mount=from=ghcr.io/astral-sh/uv:0.7.19,source=/uv,target=/bin/uv \
+RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
     --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
     --mount=type=bind,source=uv.lock,target=uv.lock \
     --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
@@ -61,7 +68,8 @@ RUN echo "Downloading models..." && \
     chmod -R g=u ${DOCLING_SERVE_ARTIFACTS_PATH}
 
 COPY --chown=1001:0 ./docling_serve ./docling_serve
-RUN --mount=from=ghcr.io/astral-sh/uv:0.7.19,source=/uv,target=/bin/uv \
+
+RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
     --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
     --mount=type=bind,source=uv.lock,target=uv.lock \
     --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
diff --git a/Makefile b/Makefile
@@ -60,6 +60,13 @@ docling-serve-cu128-image: Containerfile ## Build docling-serve container image
 	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) ghcr.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
 	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) quay.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
 
+.PHONY: docling-serve-rocm-image
+docling-serve-rocm-image: Containerfile ## Build docling-serve container image with ROCm support
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with ROCm 6.3]"
+	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-rocm:$(TAG) .
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-rocm:$(TAG) ghcr.io/docling-project/docling-serve-rocm:$(BRANCH_TAG)
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-rocm:$(TAG) quay.io/docling-project/docling-serve-rocm:$(BRANCH_TAG)
+
 .PHONY: action-lint
 action-lint: .action-lint ##      Lint GitHub Action workflows
 .action-lint: $(shell find .github -type f) | action-lint-file
@@ -107,3 +114,24 @@ run-docling-cu124: ## Run the docling-serve container with GPU support and assig
 	$(CMD_PREFIX) docker rm -f docling-serve-cu124 2>/dev/null || true
 	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.4]"
 	$(CMD_PREFIX) docker run -it --name docling-serve-cu124 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu124:main
+
+.PHONY: run-docling-cu126
+run-docling-cu126: ## Run the docling-serve container with GPU support and assign a container name
+	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
+	$(CMD_PREFIX) docker rm -f docling-serve-cu126 2>/dev/null || true
+	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.6]"
+	$(CMD_PREFIX) docker run -it --name docling-serve-cu126 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu126:main
+
+.PHONY: run-docling-cu128
+run-docling-cu128: ## Run the docling-serve container with GPU support and assign a container name
+	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
+	$(CMD_PREFIX) docker rm -f docling-serve-cu128 2>/dev/null || true
+	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.8]"
+	$(CMD_PREFIX) docker run -it --name docling-serve-cu128 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu128:main
+
+.PHONY: run-docling-rocm
+run-docling-rocm: ## Run the docling-serve container with GPU support and assign a container name
+	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
+	$(CMD_PREFIX) docker rm -f docling-serve-rocm 2>/dev/null || true
+	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN ROCm 6.3]"
+	$(CMD_PREFIX) docker run -it --name docling-serve-rocm -p 5001:5001 ghcr.io/docling-project/docling-serve-rocm:main
diff --git a/README.md b/README.md
@@ -50,17 +50,32 @@ curl -X 'POST' \
   }'
 ```
 
-### Container images
+### Container Images
 
-Available container images:
+The following container images are available for running **Docling Serve** with different hardware and PyTorch configurations:
 
-| Name | Description | Arch | Size |
-| -----|-------------|------|------|
-| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br /> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Simple image for Docling Serve, installing all packages from the official pypi.org index. | `linux/amd64`, `linux/arm64` | 3.6 GB (arm64) <br /> 8.7 GB (amd64) |
-| [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br /> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | Cpu-only image which installs `torch` from the pytorch cpu index. | `linux/amd64`, `linux/arm64` | 3.6 GB |
-| [`ghcr.io/docling-project/docling-serve-cu124`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu124) <br /> [`quay.io/docling-project/docling-serve-cu124`](https://quay.io/repository/docling-project/docling-serve-cu124) | Cuda 12.4 image which installs `torch` from the pytorch cu124 index. | `linux/amd64` | 8.7 GB |
-| [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br /> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | Cuda 12.6 image which installs `torch` from the pytorch cu126 index. | `linux/amd64` | 8.7 GB |
-| [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br /> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | Cuda 12.8 image which installs `torch` from the pytorch cu128 index. | `linux/amd64` | 8.7 GB |
+#### 📦 Distributed Images
+
+| Image | Description | Architectures | Size |
+|-------|-------------|----------------|------|
+| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Base image with all packages installed from the official PyPI index. | `linux/amd64`, `linux/arm64` | 4.4 GB (arm64) <br> 8.7 GB (amd64) |
+| [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | CPU-only variant, using `torch` from the PyTorch CPU index. | `linux/amd64`, `linux/arm64` | 4.4 GB |
+| [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | CUDA 12.6 build with `torch` from the cu126 index. | `linux/amd64` | 10.0 GB |
+| [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | CUDA 12.8 build with `torch` from the cu128 index. | `linux/amd64` | 11.4 GB |
+
+#### 🚫 Not Distributed
+
+An image for AMD ROCm 6.3 (`docling-serve-rocm`) is supported but **not published** due to its large size.
+
+To build it locally:
+
+```bash
+git clone --branch main git@github.com:docling-project/docling-serve.git
+cd docling-serve/
+make docling-serve-rocm-image
+```
+
+For deployment using Docker Compose, see [docs/deployment.md](docs/deployment.md).
 
 Coming soon: `docling-serve-slim` images will reduce the size by skipping the model weights download.
 
diff --git a/docs/deploy-examples/compose-amd.yaml b/docs/deploy-examples/compose-amd.yaml
@@ -0,0 +1,21 @@
+# AMD ROCm deployment
+
+services:
+  docling-serve:
+    image: ghcr.io/docling-project/docling-serve-rocm:main
+    container_name: docling-serve
+    ports:
+      - "5001:5001"
+    environment:
+      DOCLING_SERVE_ENABLE_UI: "true"
+      ROCR_VISIBLE_DEVICES: "0" # https://rocm.docs.amd.com/en/latest/conceptual/gpu-isolation.html#rocr-visible-devices
+      ## This section is for compatibility with older cards
+      # HSA_OVERRIDE_GFX_VERSION: "11.0.0"
+      # HSA_ENABLE_SDMA: "0"
+    devices:
+      - /dev/kfd:/dev/kfd
+      - /dev/dri:/dev/dri
+    group_add:
+      - 44    # video group GID from host
+      - 992   # render group GID from host
+    restart: always
diff --git a/docs/deploy-examples/compose-gpu.yaml b/docs/deploy-examples/compose-gpu.yaml
diff --git a/docs/deploy-examples/compose-nvidia.yaml b/docs/deploy-examples/compose-nvidia.yaml
@@ -0,0 +1,20 @@
+# NVIDIA CUDA deployment
+
+services:
+  docling-serve:
+    image: ghcr.io/docling-project/docling-serve-cu126:main
+    container_name: docling-serve
+    ports:
+      - "5001:5001"
+    environment:
+      DOCLING_SERVE_ENABLE_UI: "true"
+      NVIDIA_VISIBLE_DEVICES: "all" # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html
+    # deploy:  # This section is for compatibility with Swarm
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: all
+    #           capabilities: [gpu]
+    runtime: nvidia
+    restart: always
diff --git a/docs/deployment.md b/docs/deployment.md
diff --git a/pyproject.toml b/pyproject.toml
diff --git a/uv.lock b/uv.lock