Skip to content

Commit 519add9

Browse files
committed
Document reference Dockerfile
I don't know if the forge project wants to actually maintain a Dockerfile, but this one worked for me as of commit `bcd86f00f967b5c7d833059095ae3a8ba126e28a` and I wanted to at least open the PR so that it's documented somewhere. Maybe in the future someone else will want to run forge as a container also.
1 parent bcd86f0 commit 519add9

File tree

1 file changed

+176
-0
lines changed

1 file changed

+176
-0
lines changed

Dockerfile

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
# ================================================================================================
2+
# Stage 1: Download vLLM wheel and build torchtitan wheel
3+
# ================================================================================================
4+
FROM ubuntu:22.04 AS wheel-downloader
5+
6+
# Install tools needed for downloading and building wheels
7+
RUN apt-get update && apt-get install -y --no-install-recommends \
8+
curl \
9+
jq \
10+
ca-certificates \
11+
git \
12+
python3.10 \
13+
python3-pip \
14+
&& rm -rf /var/lib/apt/lists/*
15+
16+
# Download vLLM wheel from GitHub releases
17+
ARG GITHUB_REPO="meta-pytorch/forge"
18+
ARG RELEASE_TAG="v0.0.0-93025"
19+
ARG TORCHTITAN_COMMIT="0cfbd0b3c2d827af629a107a77a9e47229c31663" # From assets/versions.sh - compatible with PyTorch 2.9
20+
21+
WORKDIR /tmp/download
22+
23+
# Download vLLM wheel
24+
RUN echo "Fetching vLLM wheel from GitHub release ${RELEASE_TAG}..." && \
25+
# Get the release information from GitHub API
26+
RELEASE_INFO=$(curl -s "https://api.github.com/repos/${GITHUB_REPO}/releases/tags/${RELEASE_TAG}") && \
27+
# Extract the vLLM wheel download URL
28+
VLLM_URL=$(echo "$RELEASE_INFO" | jq -r '.assets[] | select(.name | contains("vllm")) | .browser_download_url' | head -1) && \
29+
VLLM_NAME=$(echo "$RELEASE_INFO" | jq -r '.assets[] | select(.name | contains("vllm")) | .name' | head -1) && \
30+
echo "Downloading: $VLLM_NAME" && \
31+
echo "URL: $VLLM_URL" && \
32+
# Download the wheel
33+
curl -L -o "/tmp/download/${VLLM_NAME}" "${VLLM_URL}" && \
34+
echo "vLLM download complete"
35+
36+
# Build torchtitan wheel from specific commit
37+
RUN echo "Building torchtitan from commit ${TORCHTITAN_COMMIT}..." && \
38+
cd /tmp && \
39+
git clone https://github.com/pytorch/torchtitan.git && \
40+
cd torchtitan && \
41+
git checkout ${TORCHTITAN_COMMIT} && \
42+
python3 -m pip install --upgrade pip wheel && \
43+
pip wheel --no-deps . -w /tmp/download && \
44+
echo "torchtitan build complete: $(ls -lh /tmp/download/*.whl)"
45+
46+
# ================================================================================================
47+
# Stage 2: Main application image
48+
# ================================================================================================
49+
FROM nvidia/cuda:12.9.1-base-ubuntu22.04
50+
51+
# Metadata labels
52+
LABEL maintainer="PyTorch Team"
53+
LABEL description="Forge - A PyTorch-native agentic RL library for post-training large language models"
54+
LABEL cuda.version="12.9.1"
55+
LABEL python.version="3.10"
56+
57+
# Set environment to avoid interactive prompts during build
58+
ENV DEBIAN_FRONTEND=noninteractive
59+
60+
# ================================================================================================
61+
# Install system dependencies
62+
# ================================================================================================
63+
RUN apt-get update && apt-get install -y --no-install-recommends \
64+
# Build tools (needed for compiling Python extensions)
65+
build-essential \
66+
git \
67+
curl \
68+
ca-certificates \
69+
# RDMA/InfiniBand libraries for distributed training
70+
libibverbs1 \
71+
libibverbs-dev \
72+
rdma-core \
73+
libmlx5-1 \
74+
&& rm -rf /var/lib/apt/lists/*
75+
76+
# ================================================================================================
77+
# Set up CUDA environment variables
78+
# ================================================================================================
79+
# These environment variables match those set by cuda_env.sh in the installation script
80+
ENV CUDA_VERSION=12.9
81+
ENV CUDA_HOME=/usr/local/cuda-${CUDA_VERSION}
82+
ENV NVCC=${CUDA_HOME}/bin/nvcc
83+
ENV CUDA_NVCC_EXECUTABLE=${CUDA_HOME}/bin/nvcc
84+
ENV CUDA_INCLUDE_DIRS=${CUDA_HOME}/include
85+
ENV CUDA_CUDART_LIBRARY=${CUDA_HOME}/lib64/libcudart.so
86+
87+
# Add CUDA binaries to PATH
88+
ENV PATH="${CUDA_HOME}/bin:${PATH}"
89+
90+
# Add CUDA compat libs to LD_LIBRARY_PATH
91+
# This is critical for PyTorch and vLLM to find CUDA libraries
92+
ENV LD_LIBRARY_PATH="${CUDA_HOME}/compat:${LD_LIBRARY_PATH}"
93+
94+
# Temporary flag required by Monarch
95+
ENV MONARCH_HOST_MESH_V1_REMOVE_ME_BEFORE_RELEASE=1
96+
97+
# Create symlink if /usr/local/cuda-12.9 doesn't exist but /usr/local/cuda does
98+
RUN if [ ! -d "/usr/local/cuda-12.9" ] && [ -d "/usr/local/cuda" ]; then \
99+
ln -s /usr/local/cuda /usr/local/cuda-12.9; \
100+
fi
101+
102+
# ================================================================================================
103+
# Install uv package manager
104+
# ================================================================================================
105+
# Install uv - a fast Python package installer
106+
RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
107+
# Make uv available in PATH
108+
ln -s /root/.local/bin/uv /usr/local/bin/uv && \
109+
ln -s /root/.local/bin/uvx /usr/local/bin/uvx
110+
111+
# Set working directory early so venv is created in project directory
112+
WORKDIR /workspace
113+
114+
# Install Python 3.10 via uv (required by monarch wheel cp310)
115+
RUN uv python install 3.10 && \
116+
# Create a virtual environment at project location using uv-managed Python 3.10
117+
uv venv --python 3.10 .venv
118+
119+
# Activate the virtual environment by adding it to PATH
120+
ENV PATH="/workspace/.venv/bin:${PATH}"
121+
ENV VIRTUAL_ENV="/workspace/.venv"
122+
123+
# Add uv-managed Python library path to LD_LIBRARY_PATH for monarch
124+
# Monarch needs libpython3.10.so.1.0 which is provided by uv's Python installation
125+
ENV LD_LIBRARY_PATH="/root/.local/share/uv/python/cpython-3.10.19-linux-x86_64-gnu/lib:${LD_LIBRARY_PATH}"
126+
127+
# ================================================================================================
128+
# Install PyTorch nightly with uv
129+
# ================================================================================================
130+
# Install PyTorch nightly with CUDA 12.9 support
131+
# This is a large download and should be in its own layer for caching
132+
ARG PYTORCH_VERSION="2.9.0.dev20250905"
133+
RUN uv pip install --no-cache \
134+
torch==${PYTORCH_VERSION} \
135+
--index-url https://download.pytorch.org/whl/nightly/cu129
136+
137+
# ================================================================================================
138+
# Install pre-built wheels
139+
# ================================================================================================
140+
# Create temporary directory for wheels
141+
RUN mkdir -p /tmp/wheels
142+
143+
# Copy local wheels from assets directory (excluding torchtitan - we build it fresh)
144+
COPY assets/wheels/monarch*.whl assets/wheels/torchstore*.whl /tmp/wheels/
145+
146+
# Copy downloaded vLLM and built torchtitan wheels from stage 1
147+
COPY --from=wheel-downloader /tmp/download/*.whl /tmp/wheels/
148+
149+
# Install all wheels using uv
150+
# The wheels include: monarch, torchstore, freshly-built torchtitan, and vLLM
151+
RUN uv pip install --no-cache /tmp/wheels/*.whl && \
152+
rm -rf /tmp/wheels
153+
154+
# ================================================================================================
155+
# Install Forge
156+
# ================================================================================================
157+
# Copy the entire source tree
158+
# .dockerignore will exclude unnecessary files
159+
COPY . /workspace/
160+
161+
# Install Forge in production mode (not editable)
162+
# This installs the forge package and its dependencies
163+
RUN uv pip install --no-cache .
164+
165+
# ================================================================================================
166+
# Final setup
167+
# ================================================================================================
168+
# Verify installations (basic import checks that don't require GPU)
169+
# The virtual environment is activated via PATH, so python uses the venv
170+
RUN python -c "import torch; print(f'PyTorch version: {torch.__version__}')" && \
171+
python -c "import vllm; print('vLLM imported successfully')" && \
172+
python -c "import forge; print('Forge imported successfully')"
173+
174+
# Set default command to bash for interactive use
175+
# Users can override this when running the container
176+
CMD ["/bin/bash"]

0 commit comments

Comments
 (0)