Refactor Dockerfile for CUDA and Python dependencies

Smartappli · web-flow · commit 5e57238344ae · 2025-11-08T20:51:06.000+01:00
Updated system dependencies and installation steps for Python and CUDA. Improved comments and organized the Dockerfile structure.
diff --git a/Docker/cuda/cuda.Dockerfile b/Docker/cuda/cuda.Dockerfile
@@ -1,47 +1,49 @@
 ARG CUDA_IMAGE=13.0.2-devel-ubuntu24.04
 FROM nvidia/cuda:${CUDA_IMAGE}
 
-# Variables d'environnement de base
 ENV DEBIAN_FRONTEND=noninteractive \
     PYTHONDONTWRITEBYTECODE=1 \
     PYTHONUNBUFFERED=1 \
     HOST=0.0.0.0
 
-# Répertoire de travail
 WORKDIR /app
 
-# Dépendances système
+# Dépendances système pour Python + build C/C++ + OpenCL/BLAS
 RUN apt-get update && \
     apt-get upgrade -y && \
     apt-get install -y --no-install-recommends \
         git build-essential \
-        python3 python3-pip python3-venv \
-        gcc wget \
+        python3 python3-pip python3-dev \
+        cmake ninja-build \
+        gcc g++ wget \
         ocl-icd-opencl-dev opencl-headers clinfo \
         libclblast-dev libopenblas-dev \
     && mkdir -p /etc/OpenCL/vendors \
     && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd \
     && rm -rf /var/lib/apt/lists/*
 
-# Copie du code
+# Copie du projet
 COPY . .
 
-# Variables pour la build CUDA de llama-cpp
+# Variables pour la build CUDA de llama-cpp-python
 ENV CUDA_DOCKER_ARCH=all \
     GGML_CUDA=1 \
-    CMAKE_ARGS="-DGGML_CUDA=on" \
-    FORCE_CMAKE=1
+    FORCE_CMAKE=1 \
+    CMAKE_ARGS="-DGGML_CUDA=on"
 
-# Installation des dépendances Python
-RUN python3 -m pip install --upgrade --no-cache-dir pip && \
+# Installer les deps Python générales
+RUN python3 -m pip install --upgrade --no-cache-dir pip wheel && \
     pip install --no-cache-dir \
-        pytest cmake scikit-build setuptools \
+        pytest scikit-build setuptools \
         fastapi uvicorn sse-starlette \
-        pydantic-settings starlette-context && \
-    pip install --no-cache-dir llama-cpp-python
+        pydantic-settings starlette-context
+
+# Installer llama-cpp-python avec CUDA
+# (tu peux fixer une version si besoin, ex: ==0.3.2)
+RUN pip install --no-cache-dir \
+        "llama-cpp-python" \
+        --verbose
 
-# Port du serveur (optionnel mais pratique pour la doc)
 EXPOSE 8000
 
-# Démarrage du serveur llama-cpp
 CMD ["python3", "-m", "llama_cpp.server", "--config_file", "config-cuda.json"]