55# docker run -it --gpus all --shm-size="64g" --rm -v $PWD:/workspace -v <root_path_of_data_and_checkpoints>:/data trinity-rft-megatron:latest
66
77
8- FROM nvcr.io/nvidia/cuda:12.6.3 -cudnn-devel-ubuntu22.04
8+ FROM nvcr.io/nvidia/cuda:12.8.1 -cudnn-devel-ubuntu22.04
99
1010WORKDIR /workspace
1111
12- # copy the Trinity-RFT dir into the workspace
13- COPY . .
14-
1512RUN apt update && apt install -y \
1613 build-essential \
1714 curl git wget vim tmux net-tools \
@@ -22,17 +19,21 @@ RUN apt update && apt install -y \
2219 && ln -sf /usr/bin/pip3 /usr/bin/pip
2320
2421# For Aliyun users: update pip mirror to aliyun to speed up pip install
25- RUN pip config set global.index-url http://mirrors.cloud.aliyuncs.com/pypi/simple/ \
26- && pip config set install.trusted-host mirrors.cloud.aliyuncs.com
22+ # RUN pip config set global.index-url http://mirrors.cloud.aliyuncs.com/pypi/simple/ \
23+ # && pip config set install.trusted-host mirrors.cloud.aliyuncs.com
24+
25+ # copy the Trinity-RFT dir into the workspace
26+ COPY . .
2727
2828# Install Trinity-RFT with Megatron
2929RUN pip install --upgrade pip \
30+ && pip install -e .[dev] \
31+ && pip install flash_attn==2.8.1 --no-build-isolation \
3032 && pip install -e .[megatron] \
31- && pip install flash-attn==2.8.1 \
3233 && pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \
3334 --config-settings "--build-option=--cpp_ext" \
3435 --config-settings "--build-option=--cuda_ext" \
35- --resume-retries 999 git+https://github.com/NVIDIA/apex.git
36+ --resume-retries 20 git+https://github.com/NVIDIA/apex.git
3637
3738# Set Env variables
3839
0 commit comments