Skip to content

Commit 7718981

Browse files
committed
build(trainer): add base image for vllm
1 parent 8ec0741 commit 7718981

File tree

5 files changed

+71
-3
lines changed

5 files changed

+71
-3
lines changed

.github/workflows/build-training-base-image.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ on:
88
required: true
99
type: choice
1010
options:
11+
- 25.11-cu130-torch290-vllm0112
1112
- 25.06-cu129-torch280-sglang055
1213
- 25.03-cu128-torch271-sglang048
1314

extra/docker/training-base/25.03-cu128-torch271-sglang048.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,4 +59,4 @@ RUN echo 'set -g default-terminal "tmux-256color"' > /root/.tmux.conf && \
5959
COPY . /workspace/agentrl
6060
RUN --mount=type=cache,target=/root/.cache/uv \
6161
uv pip install --system --no-deps \
62-
-e /workspace/agentrl/trainer[megatron]
62+
-e "/workspace/agentrl/trainer[sglang,megatron]"

extra/docker/training-base/25.06-cu129-torch280-sglang055.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,4 +57,4 @@ RUN echo 'set -g default-terminal "tmux-256color"' > /root/.tmux.conf && \
5757
COPY . /workspace/agentrl
5858
RUN --mount=type=cache,target=/root/.cache/uv \
5959
uv pip install --system --no-deps \
60-
-e /workspace/agentrl/trainer[megatron]
60+
-e "/workspace/agentrl/trainer[sglang,megatron]"
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
### Common dependencies for the training environment
2+
# May not be up to date, double-check before using
3+
4+
FROM nvcr.io/nvidia/cuda-dl-base:25.11-cuda13.0-devel-ubuntu24.04
5+
6+
ENV DEBIAN_FRONTEND=noninteractive
7+
ENV LANG=C.UTF-8
8+
ENV LC_ALL=C.UTF-8
9+
ENV PYTHONUNBUFFERED=1
10+
ENV UV_BREAK_SYSTEM_PACKAGES=1
11+
ENV UV_LINK_MODE=copy
12+
ENV UV_NO_BUILD_ISOLATION=1
13+
ENV PATH="/root/.local/bin:${PATH}"
14+
15+
WORKDIR /workspace
16+
17+
### 1. install python and base tooling
18+
RUN apt-get update && \
19+
apt-get install -y \
20+
python-is-python3 python3 python3-dev \
21+
curl ca-certificates git htop ncurses-term parallel tmux && \
22+
apt-get clean && \
23+
rm -rf /var/lib/apt/lists/*
24+
25+
### 2. install uv and python dependencies
26+
RUN curl -fsSL https://astral.sh/uv/install.sh | sh
27+
28+
RUN --mount=type=cache,target=/root/.cache/uv \
29+
uv pip install --system --upgrade setuptools packaging psutil ninja pybind11
30+
RUN --mount=type=cache,target=/root/.cache/uv \
31+
uv pip install --system \
32+
--extra-index-url https://download.pytorch.org/whl/cu130 \
33+
torch==2.9.0 torchvision==0.24.0 torchaudio==2.9.0
34+
RUN --mount=type=cache,target=/root/.cache/uv \
35+
uv pip install --system \
36+
--extra-index-url https://download.pytorch.org/whl/cu130 \
37+
https://github.com/vllm-project/vllm/releases/download/v0.11.2/vllm-0.11.2+cu130-cp38-abi3-manylinux1_x86_64.whl \
38+
https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.4.22/flash_attn-2.8.1+cu130torch2.9-cp312-cp312-linux_x86_64.whl \
39+
megatron-core transformer-engine[pytorch] \
40+
accelerate aiohttp binpacking filelock numpy Pillow \
41+
PyYAML ray[rllib] requests tensordict transformers \
42+
wandb nvitop py-spy
43+
44+
### 3. configure utils
45+
RUN echo 'set -g default-terminal "tmux-256color"' > /root/.tmux.conf && \
46+
echo "set -ga terminal-overrides ',*:Tc'" >> /root/.tmux.conf && \
47+
echo 'set-environment -g LANG "C.UTF-8"' >> /root/.tmux.conf && \
48+
echo 'set-environment -g LC_ALL "C.UTF-8"' >> /root/.tmux.conf && \
49+
echo 'set-option -g history-limit 50000' >> /root/.tmux.conf && \
50+
echo 'set-option -g mouse on' >> /root/.tmux.conf && \
51+
echo 'alias pip="uv pip"' >> /root/.bashrc && \
52+
echo 'alias tt="tmux attach -t"' >> /root/.bashrc && \
53+
echo 'alias tn="tmux new -s"' >> /root/.bashrc && \
54+
echo 'alias dp="ls -A | parallel du -sh 2>/dev/null | sort -h"' >> /root/.bashrc && \
55+
echo 'alias ds="du -sh .[!.]* * 2>/dev/null | sort -h"' >> /root/.bashrc && \
56+
echo 'alias pd="py-spy dump --pid"' >> /root/.bashrc
57+
58+
### 4. install current agentrl trainer
59+
COPY . /workspace/agentrl
60+
RUN --mount=type=cache,target=/root/.cache/uv \
61+
uv pip install --system --no-deps \
62+
-e "/workspace/agentrl/trainer[vllm,megatron]"

trainer/pyproject.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,19 @@ dependencies = [
2929
"PyYAML",
3030
"ray[rllib]",
3131
"requests",
32-
"sglang[all]>=0.4.8",
3332
"tensordict",
3433
"torch>=2.6.0",
3534
"transformers",
3635
"wandb"
3736
]
3837

3938
[project.optional-dependencies]
39+
sglang = [
40+
"sglang[all]>=0.4.8"
41+
]
42+
vllm = [
43+
"vllm>=0.11.2"
44+
]
4045
megatron = [
4146
"megatron-core",
4247
"transformer-engine[pytorch]",

0 commit comments

Comments
 (0)