Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/styles/config/vocabularies/Docling/accept.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Kubeflow
(?i)PyTorch
(?i)CUDA
(?i)NVIDIA
(?i)ROCm
(?i)env
Gradio
bool
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
with:
fetch-depth: 0 # for fetching tags, required for semantic-release
- name: Install uv and set the python version
uses: astral-sh/setup-uv@v5
uses: astral-sh/setup-uv@v6
with:
enable-cache: true
- name: Install dependencies
Expand Down Expand Up @@ -45,7 +45,7 @@ jobs:
token: ${{ steps.app-token.outputs.token }}
fetch-depth: 0 # for fetching tags, required for semantic-release
- name: Install uv and set the python version
uses: astral-sh/setup-uv@v5
uses: astral-sh/setup-uv@v6
with:
enable-cache: true
- name: Install dependencies
Expand Down
12 changes: 8 additions & 4 deletions .github/workflows/ci-images-dryrun.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ jobs:
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
platforms: linux/amd64, linux/arm64
- name: docling-project/docling-serve-cu124
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
platforms: linux/amd64
# - name: docling-project/docling-serve-cu124
# build_args: |
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
# platforms: linux/amd64
- name: docling-project/docling-serve-cu126
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
Expand All @@ -33,6 +33,10 @@ jobs:
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
platforms: linux/amd64
# - name: docling-project/docling-serve-rocm
# build_args: |
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
# platforms: linux/amd64

permissions:
packages: write
Expand Down
13 changes: 8 additions & 5 deletions .github/workflows/images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ jobs:
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
platforms: linux/amd64, linux/arm64
- name: docling-project/docling-serve-cu124
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
platforms: linux/amd64
# - name: docling-project/docling-serve-cu124
# build_args: |
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
# platforms: linux/amd64
- name: docling-project/docling-serve-cu126
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
Expand All @@ -37,7 +37,10 @@ jobs:
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
platforms: linux/amd64

# - name: docling-project/docling-serve-rocm
# build_args: |
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
# platforms: linux/amd64
permissions:
packages: write
contents: read
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/job-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Install uv and set the python version
uses: astral-sh/setup-uv@v5
uses: astral-sh/setup-uv@v6
with:
python-version: ${{ matrix.python-version }}
enable-cache: true
Expand Down
11 changes: 6 additions & 5 deletions .github/workflows/job-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Install uv and set the python version
uses: astral-sh/setup-uv@v5
uses: astral-sh/setup-uv@v6
with:
python-version: ${{ matrix.python-version }}
enable-cache: true
Expand All @@ -28,7 +28,7 @@ jobs:
run: uv sync --frozen --all-extras --no-extra flash-attn

- name: Run styling check
run: pre-commit run --all-files
run: uv run pre-commit run --all-files

build-package:
uses: ./.github/workflows/job-build.yml
Expand All @@ -47,14 +47,16 @@ jobs:
name: python-package-distributions
path: dist/
- name: Install uv and set the python version
uses: astral-sh/setup-uv@v5
uses: astral-sh/setup-uv@v6
with:
python-version: ${{ matrix.python-version }}
enable-cache: true
- name: Create virtual environment
run: uv venv
- name: Install package
run: uv pip install dist/*.whl
- name: Create the server
run: python -c 'from docling_serve.app import create_app; create_app()'
run: .venv/bin/python -c 'from docling_serve.app import create_app; create_app()'

markdown-lint:
runs-on: ubuntu-latest
Expand All @@ -64,4 +66,3 @@ jobs:
uses: DavidAnson/markdownlint-cli2-action@v16
with:
globs: "**/*.md"

4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ repos:
args: ["--config=.github/vale.ini"]
files: \.md$
- repo: https://github.com/astral-sh/uv-pre-commit
# uv version.
rev: 0.7.13
# uv version, https://github.com/astral-sh/uv-pre-commit/releases
rev: 0.8.3
hooks:
- id: uv-lock
20 changes: 14 additions & 6 deletions Containerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
ARG BASE_IMAGE=quay.io/sclorg/python-312-c9s:c9s

FROM ${BASE_IMAGE}
ARG UV_VERSION=0.8.3

USER 0
ARG UV_SYNC_EXTRA_ARGS=""

FROM ${BASE_IMAGE} AS docling-base

###################################################################################################
# OS Layer #
###################################################################################################

USER 0

RUN --mount=type=bind,source=os-packages.txt,target=/tmp/os-packages.txt \
dnf -y install --best --nodocs --setopt=install_weak_deps=False dnf-plugins-core && \
dnf config-manager --best --nodocs --setopt=install_weak_deps=False --save && \
Expand All @@ -21,16 +25,19 @@ RUN /usr/bin/fix-permissions /opt/app-root/src/.cache

ENV TESSDATA_PREFIX=/usr/share/tesseract/tessdata/

FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv_stage

###################################################################################################
# Docling layer #
###################################################################################################

FROM docling-base

USER 1001

WORKDIR /opt/app-root/src

ENV \
# On container environments, always set a thread budget to avoid undesired thread congestion.
OMP_NUM_THREADS=4 \
LANG=en_US.UTF-8 \
LC_ALL=en_US.UTF-8 \
Expand All @@ -40,9 +47,9 @@ ENV \
UV_PROJECT_ENVIRONMENT=/opt/app-root \
DOCLING_SERVE_ARTIFACTS_PATH=/opt/app-root/src/.cache/docling/models

ARG UV_SYNC_EXTRA_ARGS=""
ARG UV_SYNC_EXTRA_ARGS

RUN --mount=from=ghcr.io/astral-sh/uv:0.7.19,source=/uv,target=/bin/uv \
RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
--mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
Expand All @@ -61,7 +68,8 @@ RUN echo "Downloading models..." && \
chmod -R g=u ${DOCLING_SERVE_ARTIFACTS_PATH}

COPY --chown=1001:0 ./docling_serve ./docling_serve
RUN --mount=from=ghcr.io/astral-sh/uv:0.7.19,source=/uv,target=/bin/uv \

RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
--mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
Expand Down
28 changes: 28 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ docling-serve-cu128-image: Containerfile ## Build docling-serve container image
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) ghcr.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) quay.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)

.PHONY: docling-serve-rocm-image
docling-serve-rocm-image: Containerfile ## Build docling-serve container image with ROCm support
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve with ROCm 6.3]"
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-rocm:$(TAG) .
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-rocm:$(TAG) ghcr.io/docling-project/docling-serve-rocm:$(BRANCH_TAG)
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-rocm:$(TAG) quay.io/docling-project/docling-serve-rocm:$(BRANCH_TAG)

.PHONY: action-lint
action-lint: .action-lint ## Lint GitHub Action workflows
.action-lint: $(shell find .github -type f) | action-lint-file
Expand Down Expand Up @@ -107,3 +114,24 @@ run-docling-cu124: ## Run the docling-serve container with GPU support and assig
$(CMD_PREFIX) docker rm -f docling-serve-cu124 2>/dev/null || true
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.4]"
$(CMD_PREFIX) docker run -it --name docling-serve-cu124 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu124:main

.PHONY: run-docling-cu126
run-docling-cu126: ## Run the docling-serve container with GPU support and assign a container name
$(ECHO_PREFIX) printf " %-12s Removing existing container if it exists...\n" "[CLEANUP]"
$(CMD_PREFIX) docker rm -f docling-serve-cu126 2>/dev/null || true
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.6]"
$(CMD_PREFIX) docker run -it --name docling-serve-cu126 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu126:main

.PHONY: run-docling-cu128
run-docling-cu128: ## Run the docling-serve container with GPU support and assign a container name
$(ECHO_PREFIX) printf " %-12s Removing existing container if it exists...\n" "[CLEANUP]"
$(CMD_PREFIX) docker rm -f docling-serve-cu128 2>/dev/null || true
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.8]"
$(CMD_PREFIX) docker run -it --name docling-serve-cu128 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu128:main

.PHONY: run-docling-rocm
run-docling-rocm: ## Run the docling-serve container with GPU support and assign a container name
$(ECHO_PREFIX) printf " %-12s Removing existing container if it exists...\n" "[CLEANUP]"
$(CMD_PREFIX) docker rm -f docling-serve-rocm 2>/dev/null || true
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN ROCm 6.3]"
$(CMD_PREFIX) docker run -it --name docling-serve-rocm -p 5001:5001 ghcr.io/docling-project/docling-serve-rocm:main
33 changes: 24 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,32 @@ curl -X 'POST' \
}'
```

### Container images
### Container Images

Available container images:
The following container images are available for running **Docling Serve** with different hardware and PyTorch configurations:

| Name | Description | Arch | Size |
| -----|-------------|------|------|
| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br /> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Simple image for Docling Serve, installing all packages from the official pypi.org index. | `linux/amd64`, `linux/arm64` | 3.6 GB (arm64) <br /> 8.7 GB (amd64) |
| [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br /> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | Cpu-only image which installs `torch` from the pytorch cpu index. | `linux/amd64`, `linux/arm64` | 3.6 GB |
| [`ghcr.io/docling-project/docling-serve-cu124`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu124) <br /> [`quay.io/docling-project/docling-serve-cu124`](https://quay.io/repository/docling-project/docling-serve-cu124) | Cuda 12.4 image which installs `torch` from the pytorch cu124 index. | `linux/amd64` | 8.7 GB |
| [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br /> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | Cuda 12.6 image which installs `torch` from the pytorch cu126 index. | `linux/amd64` | 8.7 GB |
| [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br /> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | Cuda 12.8 image which installs `torch` from the pytorch cu128 index. | `linux/amd64` | 8.7 GB |
#### 📦 Distributed Images

| Image | Description | Architectures | Size |
|-------|-------------|----------------|------|
| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Base image with all packages installed from the official PyPI index. | `linux/amd64`, `linux/arm64` | 4.4 GB (arm64) <br> 8.7 GB (amd64) |
| [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | CPU-only variant, using `torch` from the PyTorch CPU index. | `linux/amd64`, `linux/arm64` | 4.4 GB |
| [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | CUDA 12.6 build with `torch` from the cu126 index. | `linux/amd64` | 10.0 GB |
| [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | CUDA 12.8 build with `torch` from the cu128 index. | `linux/amd64` | 11.4 GB |

#### 🚫 Not Distributed

An image for AMD ROCm 6.3 (`docling-serve-rocm`) is supported but **not published** due to its large size.

To build it locally:

```bash
git clone --branch main git@github.com:docling-project/docling-serve.git
cd docling-serve/
make docling-serve-rocm-image
```

For deployment using Docker Compose, see [docs/deployment.md](docs/deployment.md).

Coming soon: `docling-serve-slim` images will reduce the size by skipping the model weights download.

Expand Down
21 changes: 21 additions & 0 deletions docs/deploy-examples/compose-amd.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# AMD ROCm deployment

services:
docling-serve:
image: ghcr.io/docling-project/docling-serve-rocm:main
container_name: docling-serve
ports:
- "5001:5001"
environment:
DOCLING_SERVE_ENABLE_UI: "true"
ROCR_VISIBLE_DEVICES: "0" # https://rocm.docs.amd.com/en/latest/conceptual/gpu-isolation.html#rocr-visible-devices
## This section is for compatibility with older cards
# HSA_OVERRIDE_GFX_VERSION: "11.0.0"
# HSA_ENABLE_SDMA: "0"
devices:
- /dev/kfd:/dev/kfd
- /dev/dri:/dev/dri
group_add:
- 44 # video group GID from host
- 992 # render group GID from host
restart: always
15 changes: 0 additions & 15 deletions docs/deploy-examples/compose-gpu.yaml

This file was deleted.

20 changes: 20 additions & 0 deletions docs/deploy-examples/compose-nvidia.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# NVIDIA CUDA deployment

services:
docling-serve:
image: ghcr.io/docling-project/docling-serve-cu126:main
container_name: docling-serve
ports:
- "5001:5001"
environment:
DOCLING_SERVE_ENABLE_UI: "true"
NVIDIA_VISIBLE_DEVICES: "all" # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html
# deploy: # This section is for compatibility with Swarm
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: all
# capabilities: [gpu]
runtime: nvidia
restart: always
Loading
Loading