@@ -16,23 +16,33 @@ ENV TORCH_CUDA_ARCH_LIST="6.0;6.1;6.2;7.0;7.2;7.5;8.0;8.6"
1616ENV TORCH_DONT_CHECK_COMPILER_ABI=1
1717
1818RUN apt update \
19- && apt install -y wget --no-install-recommends
19+ && apt install -y wget --no-install-recommends
2020
2121RUN wget https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb \
2222 && dpkg -i cuda-keyring_1.1-1_all.deb \
2323 && rm -rf cuda-keyring_1.1-1_all.deb
2424
2525RUN apt update \
26- && apt install -y --no-install-recommends \
26+ && apt install -y --no-install-recommends \
2727 cuda-libraries-12-9 \
2828 cuda-compiler-12-9 \
2929 && apt clean all \
3030 && rm -rf /var/cache/apt/* \
31- && echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf \
32- && echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf
31+ && echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf \
32+ && echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf
33+
34+ RUN rm -rf /usr/local/cuda-12.9/targets/x86_64-linux/lib/libnvrtc.alt.so.12 \
35+ /usr/local/cuda-12.9/targets/x86_64-linux/lib/libnvrtc.alt.so.12.9.86 \
36+ /usr/local/cuda-12.9/targets/x86_64-linux/lib/libnvrtc-builtins.alt.so.12.9 \
37+ /usr/local/cuda-12.9/targets/x86_64-linux/lib/libcusolverMg.so.11 \
38+ /usr/local/cuda-12.9/targets/x86_64-linux/lib/libnvrtc-builtins.alt.so.12.9.86 \
39+ /usr/local/cuda-12.9/targets/x86_64-linux/lib/libcusolverMg.so.11.7.5.82 \
40+ /usr/local/cuda-12.9/targets/x86_64-linux/lib/libcufilt.a
3341
3442# step 2: get python dependencies
35- FROM base AS runtime
43+ FROM base AS python_dependencies
44+
45+ RUN ulimit -n 65536
3646
3747ENV UV_COMPILE_BYTECODE=1
3848ENV UV_LINK_MODE=copy
@@ -62,25 +72,60 @@ RUN --mount=type=cache,target=/root/.cache/uv \
6272 --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
6373 uv sync --frozen --no-dev --no-editable
6474
75+ # Install runtime dependencies
76+ RUN pip3 uninstall -y setuptools pip wheel && \
77+ rm -rf /root/.cache/pip
78+
79+ # Place executables in the environment at the front of the path
80+ RUN rm -rf /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/cublas/include \
81+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/cuda_cupti/include \
82+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/cuda_cupti/lib/libcheckpoint.so \
83+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/cuda_cupti/lib/libnvperf_host.so \
84+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/cuda_cupti/lib/libnvperf_target.so \
85+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/cuda_cupti/lib/libpcsamplingutil.so \
86+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/cuda_nvrtc/libnvrtc.alt.so.12 \
87+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/cuda_nvrtc/libnvrtc-builtins.alt.so.12.9 \
88+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/cuda_runtime/include \
89+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/cufft/include \
90+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/curand/include \
91+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/cusolver/include \
92+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/cusolver/lib/libcusolverMg.so.11 \
93+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/cusparse/include \
94+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/nvjitlink/include \
95+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/cudnn/include \
96+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/cudnn/lib/__init__.py \
97+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/cudnn/lib/__pycache__ \
98+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/cudnn/__pycache__ \
99+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/triton/backends/nvidia \
100+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/cuda_nvrtc/lib/libnvrtc.alt.so.12 \
101+ /interactive_ai/workflows/train/trainer/.venv/lib/python3.10/site-packages/nvidia/cuda_nvrtc/lib/libnvrtc-builtins.alt.so.12.8
102+
103+ FROM python:3.10-slim-bookworm@sha256:cda0e2fa3894f9ada2c652534e6de6b2fe65687c63208a77e9e8d88231547902 AS runtime
104+
105+ RUN ulimit -n 65536
106+
65107# Install runtime dependencies
66108RUN apt-get update && \
67109 apt-get install -y --no-install-recommends \
68110 libgl1=1.6.* \
69111 libglib2.0-0=2.74.* \
70112 curl && \
71- rm -rf /var/lib/apt/lists/* && \
72- useradd -l -u 10001 non-root && \
73- pip3 uninstall -y setuptools pip wheel && \
74- rm -rf /root/.cache/pip
113+ rm -rf /var/lib/apt/lists/*
114+
115+ RUN useradd -l -u 10001 non-root
116+
117+ USER non-root
118+ WORKDIR /home/non-root
75119
76- # Copy the application from the builder
77120COPY --link --from=cuda --chown=10001 /usr/local /usr/local
121+ COPY --link --from=python_dependencies --chown=10001 /interactive_ai /interactive_ai
122+
123+ WORKDIR /interactive_ai/workflows/train/trainer
124+
125+ COPY --link scripts/ scripts
126+ COPY --link run run
127+ COPY --link download_pretrained_weights.py download_pretrained_weights.py
78128
79- # Place executables in the environment at the front of the path
80129ENV PATH="/interactive_ai/workflows/train/trainer/.venv/bin:/interactive_ai/workflows/train/trainer:$PATH"
81130ENV PYTHONPATH="/interactive_ai/workflows/train/trainer"
82131ENV HF_HUB_OFFLINE=1
83-
84- USER non-root
85- WORKDIR /home/non-root
86- WORKDIR /interactive_ai/workflows/train/trainer
0 commit comments