|
| 1 | +# Universal image Dockerfile |
| 2 | +# |
| 3 | +# Base image: |
| 4 | +# - Minimal Jupyter CUDA workbench with CUDA 12.8 and Python 3.12 |
| 5 | +# - Provides JupyterLab, Elyra integration, addons, and default ENTRYPOINT start-notebook.sh |
| 6 | +# - Source: quay.io/opendatahub/workbench-images:cuda-jupyter-minimal-ubi9-python-3.12-2025a_20250903 |
| 7 | +# |
| 8 | +# Design intent: |
| 9 | +# - Preserve workbench behavior by default (no args → start-notebook.sh) |
| 10 | +# - Add runtime capabilities on top (Python ML/training stack, RDMA/IB packages) |
| 11 | +# - Avoid duplicating dependencies provided by the base image |
| 12 | +# - Allow headless runtime mode when a command is provided (args → exec that command) |
| 13 | + |
| 14 | +FROM quay.io/opendatahub/workbench-images:cuda-jupyter-minimal-ubi9-python-3.12-2025a_20250903 |
| 15 | + |
| 16 | +LABEL name="universal:py312-cuda128-torch280" \ |
| 17 | + summary="Universal CUDA 12.8 Python 3.12 image with PyTorch 2.8.0" \ |
| 18 | + description="Universal image combining minimal Jupyter workbench and runtime ML stack (CUDA 12.8, PyTorch 2.8.0, FlashAttention 2.8.3) on UBI9" \ |
| 19 | + io.k8s.display-name="Universal CUDA 12.8 Python 3.12 (Workbench + Runtime)" \ |
| 20 | + io.k8s.description="Universal image: Jupyter workbench by default; runtime when command provided. Includes RDMA/IB libs, Torch 2.8.0 cu128, FlashAttention 2.8.3." |
| 21 | + |
| 22 | +## TODO: Add license file |
| 23 | +# COPY LICENSE.md /licenses/cuda-license.md |
| 24 | + |
| 25 | +# For OS installs we need elevated privileges; base may default to 1001 |
| 26 | +USER 0 |
| 27 | +WORKDIR /opt/app-root/bin |
| 28 | + |
| 29 | +# Keep NVIDIA driver capability constraints consistent with runtime image behavior |
| 30 | +ENV NVIDIA_VISIBLE_DEVICES=all \ |
| 31 | + NVIDIA_DRIVER_CAPABILITIES=compute,utility \ |
| 32 | + CUDA_VERSION=12.8 \ |
| 33 | + PIP_DEFAULT_TIMEOUT=600 \ |
| 34 | + PIP_DISABLE_PIP_VERSION_CHECK=1 |
| 35 | + |
| 36 | +# Follow runtime: enable CUDA and Mellanox OFED repositories for RDMA/IB packages. |
| 37 | +# Note: The base image already includes CUDA 12.8 runtime; we only add missing components (e.g., RDMA libs). |
| 38 | +RUN dnf config-manager \ |
| 39 | + --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \ |
| 40 | + && dnf config-manager \ |
| 41 | + --add-repo https://linux.mellanox.com/public/repo/mlnx_ofed/latest/rhel9.5/mellanox_mlnx_ofed.repo \ |
| 42 | + && dnf install -y --disablerepo="*" --enablerepo="cuda-rhel9-x86_64,mlnx_ofed_24.10-1.1.4.0_base,ubi-9-appstream-rpms,ubi-9-baseos-rpms" \ |
| 43 | + libibverbs-utils \ |
| 44 | + infiniband-diags \ |
| 45 | + libibumad3 \ |
| 46 | + librdmacm \ |
| 47 | + librdmacm-utils \ |
| 48 | + rdma-core \ |
| 49 | + mlnx-tools \ |
| 50 | + && dnf clean all \ |
| 51 | + && rm -rf /var/cache/dnf/* |
| 52 | + |
| 53 | +# Install CUDA NVCC and build toolchain required to build FlashAttention from source |
| 54 | +# NOTE: Use command-line CUDA packages to avoid Nsight GUI deps (X11 libs) not available in UBI |
| 55 | +RUN dnf install -y --disablerepo="*" --enablerepo="cuda-rhel9-x86_64,ubi-9-appstream-rpms,ubi-9-baseos-rpms" \ |
| 56 | + cuda-command-line-tools-12-8 \ |
| 57 | + cuda-cudart-devel-12-8 \ |
| 58 | + cuda-nvcc-12-8-12.8.93-1 \ |
| 59 | + gcc \ |
| 60 | + gcc-c++ \ |
| 61 | + make \ |
| 62 | + python3-devel \ |
| 63 | + cmake \ |
| 64 | + git \ |
| 65 | + && dnf clean all \ |
| 66 | + && rm -rf /var/cache/dnf/* |
| 67 | + |
| 68 | +# Ensure CUDA_HOME points to the toolkit and nvcc is discoverable, then sanity check nvcc |
| 69 | +ENV CUDA_HOME=/usr/local/cuda \ |
| 70 | + PATH=/usr/local/cuda/bin:$PATH \ |
| 71 | + LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH \ |
| 72 | + TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0" |
| 73 | + |
| 74 | +# NOTE: Optional build-time CUDA checks (remove if not needed for faster builds) |
| 75 | +# Verify CUDA toolkit present and nvcc available |
| 76 | +RUN /usr/local/cuda/bin/nvcc -V |
| 77 | +# Verify key CUDA libs are discoverable |
| 78 | +RUN ldconfig -p | grep -E 'libcudart|libcublas|libcudnn' || (echo "[fail-fast] CUDA libs not found in ldconfig" >&2; exit 1) |
| 79 | + |
| 80 | +# Quick preflight: verify torch wheel and flash-attn index are reachable to fail fast before large downloads |
| 81 | +ARG TORCH_WHEEL_FILE=https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl |
| 82 | +RUN curl -IfsS --connect-timeout 10 --max-time 20 "$TORCH_WHEEL_FILE" > /dev/null || (echo "[fail-fast] Torch cu128 wheel not reachable: $TORCH_WHEEL_FILE" >&2; exit 1) |
| 83 | +RUN curl -IfsS --connect-timeout 10 --max-time 20 https://pypi.org/simple/flash-attn/ > /dev/null || (echo "[fail-fast] PyPI flash-attn index not reachable" >&2; exit 1) |
| 84 | + |
| 85 | +# Switch back to the non-root user for Python environment changes |
| 86 | +USER 1001 |
| 87 | + |
| 88 | +WORKDIR /opt/app-root/src |
| 89 | + |
| 90 | +# Add runtime Python dependencies on top of the minimal Jupyter stack. |
| 91 | +# We intentionally avoid re-installing minimal-provided packages (e.g., jupyterlab) to prevent downgrades. |
| 92 | +# Torch/cu128 must match CUDA 12.8. FlashAttention is mandatory and currently supported on amd64. |
| 93 | +ARG TARGETARCH |
| 94 | +# Enforce amd64 for FlashAttention wheel availability |
| 95 | +RUN if [ "$TARGETARCH" != "amd64" ]; then echo "FlashAttention is mandatory and requires amd64 prebuilt wheels. Build with --platform linux/amd64." >&2; exit 1; fi |
| 96 | + |
| 97 | +# Install torch from the PyTorch CUDA index separately to avoid affecting other packages' index resolution |
| 98 | +RUN pip install --retries 5 --timeout 300 --no-cache-dir torch==2.8.0 --index-url https://download.pytorch.org/whl/cu128 |
| 99 | + |
| 100 | +# NOTE: Optional build-time check (remove if not needed): verify torch build has CUDA enabled |
| 101 | +RUN python - <<'PY' |
| 102 | +import torch, sys |
| 103 | +print("[check] torch", torch.__version__, "cuda build:", torch.version.cuda) |
| 104 | +sys.exit(0 if torch.backends.cuda.is_built() else 1) |
| 105 | +PY |
| 106 | + |
| 107 | +# Install numpy ahead of building extensions that expect it |
| 108 | +RUN pip install --retries 5 --timeout 300 --no-cache-dir numpy==2.3.3 |
| 109 | + |
| 110 | +# Install build backend for VCS package and the SDK itself (no build isolation so backend is visible) |
| 111 | +RUN pip install --retries 5 --timeout 300 --no-cache-dir hatchling hatch-vcs |
| 112 | +RUN pip install --retries 5 --timeout 300 --no-cache-dir --no-build-isolation "git+https://github.com/briangallagher/sdk@training-hub" |
| 113 | + |
| 114 | +# Provide ninja via pip (RHEL/UBI repo ninja-build may be unavailable) |
| 115 | +RUN pip install --retries 5 --timeout 300 --no-cache-dir ninja |
| 116 | + |
| 117 | +# Install remaining runtime packages (resolved from default PyPI), including FlashAttention |
| 118 | +# Note: We intentionally do not use a Pipfile/lock here to avoid mixing resolvers with the base (uv lock), |
| 119 | +# to control CUDA/FA install order and indexes, and to reduce lock churn across arches/ABI-specific wheels. |
| 120 | +RUN pip install --retries 5 --timeout 300 --no-cache-dir \ |
| 121 | + flash-attn==2.8.3 --no-build-isolation \ |
| 122 | + accelerate==1.10.0 \ |
| 123 | + transformers==4.55.2 \ |
| 124 | + peft==0.17.0 \ |
| 125 | + tqdm==4.67.1 \ |
| 126 | + datasets==4.0.0 \ |
| 127 | + pydantic>=2.11.7 \ |
| 128 | + aiofiles==24.1.0 \ |
| 129 | + "protobuf>=5.28.0,<6.0.0" \ |
| 130 | + "simpleeval>=0.9.13,<1.0" \ |
| 131 | + safetensors==0.6.2 \ |
| 132 | + packaging==25.0 \ |
| 133 | + pyyaml==6.0.2 \ |
| 134 | + py-cpuinfo==9.0.0 \ |
| 135 | + numba==0.61.2 \ |
| 136 | + rich==14.1.0 \ |
| 137 | + tensorboard==2.19.0 \ |
| 138 | + bitsandbytes>=0.45.3 \ |
| 139 | + liger-kernel==0.5.10 \ |
| 140 | + "sentencepiece>=0.1.99,<0.3" \ |
| 141 | + tokenizers==0.21.4 \ |
| 142 | + training-hub==0.2.0 \ |
| 143 | + trl==0.21.0 \ |
| 144 | + deepspeed>=0.14.3 \ |
| 145 | + async-timeout==4.0.3 \ |
| 146 | + aiohttp==3.12.15 \ |
| 147 | + hf-xet==1.1.8 \ |
| 148 | + huggingface-hub==0.34.4 \ |
| 149 | + mlflow==3.4.0 \ |
| 150 | + psutil==7.0.0 \ |
| 151 | + && chmod -R g+w /opt/app-root/lib/python3.12/site-packages \ |
| 152 | + && fix-permissions /opt/app-root -P |
| 153 | + |
| 154 | +# Provide a POSIX entrypoint wrapper to choose behavior based on invocation |
| 155 | +COPY --chmod=0755 entrypoint-universal.sh /usr/local/bin/entrypoint-universal.sh |
| 156 | + |
| 157 | +# Set ENTRYPOINT to the wrapper so that providing a command runs headless. |
| 158 | +# Default CMD maintains workbench behavior (no args → start-notebook.sh) |
| 159 | +ENTRYPOINT ["/usr/local/bin/entrypoint-universal.sh"] |
| 160 | +CMD ["start-notebook.sh"] |
0 commit comments