diff --git a/.devcontainer/cu126/devcontainer.json b/.devcontainer/cu126/devcontainer.json index 8c0e885fbb..3edf2bf3ea 100644 --- a/.devcontainer/cu126/devcontainer.json +++ b/.devcontainer/cu126/devcontainer.json @@ -1,8 +1,14 @@ { "name": "CUDA Development Container", "build": { - "dockerfile": "../../docker/Dockerfile.cu126.dev", - "context": "../../" + "dockerfile": "../../docker/Dockerfile", + "context": "../../", + "target": "dev", + "args": { + "CUDA_BASE_IMAGE": "nvidia/cuda:12.6.0-devel-ubuntu24.04", + "CUDA_VERSION": "cu126", + "PYTORCH_INDEX": "cu126" + } }, "runArgs": [ "--gpus=all" diff --git a/.devcontainer/cu128/devcontainer.json b/.devcontainer/cu128/devcontainer.json index b1afbf30a5..5bfa17dae1 100644 --- a/.devcontainer/cu128/devcontainer.json +++ b/.devcontainer/cu128/devcontainer.json @@ -1,8 +1,14 @@ { "name": "CUDA Development Container", "build": { - "dockerfile": "../../docker/Dockerfile.cu128.dev", - "context": "../../" + "dockerfile": "../../docker/Dockerfile", + "context": "../../", + "target": "dev", + "args": { + "CUDA_BASE_IMAGE": "nvidia/cuda:12.8.0-devel-ubuntu24.04", + "CUDA_VERSION": "cu128", + "PYTORCH_INDEX": "cu128" + } }, "runArgs": [ "--gpus=all" diff --git a/.devcontainer/cu129/devcontainer.json b/.devcontainer/cu129/devcontainer.json index 5829d9eae6..d25f365329 100644 --- a/.devcontainer/cu129/devcontainer.json +++ b/.devcontainer/cu129/devcontainer.json @@ -1,8 +1,14 @@ { "name": "CUDA Development Container", "build": { - "dockerfile": "../../docker/Dockerfile.cu129.dev", - "context": "../../" + "dockerfile": "../../docker/Dockerfile", + "context": "../../", + "target": "dev", + "args": { + "CUDA_BASE_IMAGE": "nvidia/cuda:12.9.0-devel-ubuntu24.04", + "CUDA_VERSION": "cu129", + "PYTORCH_INDEX": "cu129" + } }, "runArgs": [ "--gpus=all" diff --git a/.devcontainer/cu130/devcontainer.json b/.devcontainer/cu130/devcontainer.json index 08b8d763fd..4894bf67cb 100644 --- a/.devcontainer/cu130/devcontainer.json +++ b/.devcontainer/cu130/devcontainer.json @@ -1,8 +1,14 @@ { "name": "CUDA Development Container", "build": { - "dockerfile": "../../docker/Dockerfile.cu130.dev", - "context": "../../" + "dockerfile": "../../docker/Dockerfile", + "context": "../../", + "target": "dev", + "args": { + "CUDA_BASE_IMAGE": "nvidia/cuda:13.0.1-devel-ubuntu24.04", + "CUDA_VERSION": "cu130", + "PYTORCH_INDEX": "cu130" + } }, "runArgs": [ "--gpus=all" diff --git a/.github/workflows/release-ci-docker.yml b/.github/workflows/release-ci-docker.yml index 944d35c730..7f27ad9e15 100644 --- a/.github/workflows/release-ci-docker.yml +++ b/.github/workflows/release-ci-docker.yml @@ -36,7 +36,32 @@ jobs: needs: generate-tag strategy: matrix: - cuda: [cu126, cu128, cu129, cu130, cu131] + include: + - cuda: cu126 + base_image: nvidia/cuda:12.6.0-devel-ubuntu24.04 + nvidia_lib_path: nvidia/cublas/lib + pytorch_index: cu126 + install_tilelang: "false" + - cuda: cu128 + base_image: nvidia/cuda:12.8.0-devel-ubuntu24.04 + nvidia_lib_path: nvidia/cublas/lib + pytorch_index: cu128 + install_tilelang: "false" + - cuda: cu129 + base_image: nvidia/cuda:12.9.0-devel-ubuntu24.04 + nvidia_lib_path: nvidia/cublas/lib + pytorch_index: cu129 + install_tilelang: "false" + - cuda: cu130 + base_image: nvidia/cuda:13.0.1-devel-ubuntu24.04 + nvidia_lib_path: nvidia/cu13/lib + pytorch_index: cu130 + install_tilelang: "false" + - cuda: cu131 + base_image: nvidia/cuda:13.1.1-cudnn-devel-ubuntu24.04 + nvidia_lib_path: nvidia/cu13/lib + pytorch_index: cu130 # TODO: update to cu131 when PyTorch publishes cu131 wheels + install_tilelang: "true" arch: [amd64, arm64] steps: - uses: actions/checkout@v4 @@ -58,7 +83,14 @@ jobs: uses: docker/build-push-action@v5 with: context: . - file: docker/Dockerfile.${{ matrix.cuda }} + file: docker/Dockerfile + target: test + build-args: | + CUDA_BASE_IMAGE=${{ matrix.base_image }} + CUDA_VERSION=${{ matrix.cuda }} + PYTORCH_INDEX=${{ matrix.pytorch_index }} + NVIDIA_LIB_PATH=${{ matrix.nvidia_lib_path }} + INSTALL_TILELANG=${{ matrix.install_tilelang }} platforms: linux/${{ matrix.arch }} push: ${{ github.event_name != 'pull_request' }} pull: true # Always pull the latest base image diff --git a/docker/Dockerfile.cu131.dev b/docker/Dockerfile similarity index 54% rename from docker/Dockerfile.cu131.dev rename to docker/Dockerfile index a9c6e32c90..60fb95cdd4 100644 --- a/docker/Dockerfile.cu131.dev +++ b/docker/Dockerfile @@ -1,12 +1,56 @@ -FROM nvidia/cuda:13.1.1-cudnn-devel-ubuntu24.04 +ARG CUDA_BASE_IMAGE +FROM ${CUDA_BASE_IMAGE} AS base ENV DEBIAN_FRONTEND=noninteractive -# Update package lists and install system dependencies RUN apt-get update && apt-get install -y \ curl \ git \ - wget \ + wget + +# ---- test target ---- +FROM base AS test + +# Install python +COPY docker/install/install_python.sh /install/install_python.sh +RUN bash /install/install_python.sh /opt/conda py312 + +WORKDIR /workspace + +RUN echo "source activate py312" >> ~/.bashrc +ENV PATH="/opt/conda/bin:$PATH" +ENV PATH="/opt/conda/envs/py312/bin:$PATH" + +# Set LD_LIBRARY_PATH to ensure pip-installed nvidia libs take precedence over system libraries +ARG NVIDIA_LIB_PATH +ENV LD_LIBRARY_PATH="/opt/conda/envs/py312/lib/python3.12/site-packages/${NVIDIA_LIB_PATH}/:$LD_LIBRARY_PATH" + +# Triton +ENV TRITON_PTXAS_PATH="/usr/local/cuda/bin/ptxas" + +# Install torch and other python packages +COPY requirements.txt /install/requirements.txt +COPY docker/install/install_python_packages.sh /install/install_python_packages.sh +ARG CUDA_VERSION +ARG PYTORCH_INDEX=${CUDA_VERSION} +RUN bash /install/install_python_packages.sh ${PYTORCH_INDEX} + +ARG INSTALL_TILELANG=false +RUN if [ "$INSTALL_TILELANG" = "true" ]; then pip install tilelang cuda-tile; fi + +# Install mpi4py in the conda environment +RUN conda install -n py312 -y mpi4py mpich + +# Configure pip for user-site installations (allows arbitrary users to install packages) +# This enables 'pip install --user' and 'pip install -e .' to work for any user +RUN mkdir -p /opt/pip-user && chmod 1777 /opt/pip-user +ENV PYTHONUSERBASE=/opt/pip-user +ENV PATH="/opt/pip-user/bin:$PATH" + +# ---- dev target ---- +FROM base AS dev + +RUN apt-get update && apt-get install -y \ clang-format \ clangd-19 \ vim \ @@ -45,13 +89,13 @@ ENV PATH="/home/$USERNAME/conda/bin:$PATH" ENV PATH="/home/$USERNAME/conda/envs/py312/bin:$PATH" # Install torch and other python packages -# TODO: update cu130 -> cu131 when PyTorch starts publishing cu131 wheels COPY requirements.txt /install/requirements.txt COPY docker/install/install_python_packages.sh /install/install_python_packages.sh -RUN bash /install/install_python_packages.sh cu130 && pip3 install pre-commit - -# Install tilelang and cuda-tile -RUN pip install tilelang cuda-tile +ARG CUDA_VERSION +ARG PYTORCH_INDEX=${CUDA_VERSION} +ARG INSTALL_TILELANG=false +RUN bash /install/install_python_packages.sh ${PYTORCH_INDEX} && pip3 install pre-commit && \ + if [ "$INSTALL_TILELANG" = "true" ]; then pip install tilelang cuda-tile; fi # Install mpi4py in the conda environment RUN conda install -n py312 -y mpi4py mpich diff --git a/docker/Dockerfile.cu126 b/docker/Dockerfile.cu126 deleted file mode 100644 index fda2f23b91..0000000000 --- a/docker/Dockerfile.cu126 +++ /dev/null @@ -1,37 +0,0 @@ -FROM nvidia/cuda:12.6.0-devel-ubuntu24.04 - -ENV DEBIAN_FRONTEND=noninteractive - -# Update package lists and install system dependencies -RUN apt-get update && apt-get install -y \ - curl \ - git \ - wget - -# Install python -COPY docker/install/install_python.sh /install/install_python.sh -RUN bash /install/install_python.sh /opt/conda py312 - -# Set home directory -WORKDIR /workspace - -RUN echo "source activate py312" >> ~/.bashrc -ENV PATH="/opt/conda/bin:$PATH" -ENV PATH="/opt/conda/envs/py312/bin:$PATH" - -# Ensure pip-installed nvidia-cublas takes precedence over system libraries -ENV LD_LIBRARY_PATH="/opt/conda/envs/py312/lib/python3.12/site-packages/nvidia/cublas/lib/:$LD_LIBRARY_PATH" - -# Install torch and other python packages -COPY requirements.txt /install/requirements.txt -COPY docker/install/install_python_packages.sh /install/install_python_packages.sh -RUN bash /install/install_python_packages.sh cu126 - -# Install mpi4py in the conda environment -RUN conda install -n py312 -y mpi4py mpich - -# Configure pip for user-site installations (allows arbitrary users to install packages) -# This enables 'pip install --user' and 'pip install -e .' to work for any user -RUN mkdir -p /opt/pip-user && chmod 1777 /opt/pip-user -ENV PYTHONUSERBASE=/opt/pip-user -ENV PATH="/opt/pip-user/bin:$PATH" diff --git a/docker/Dockerfile.cu126.dev b/docker/Dockerfile.cu126.dev deleted file mode 100644 index 816d5af619..0000000000 --- a/docker/Dockerfile.cu126.dev +++ /dev/null @@ -1,73 +0,0 @@ -FROM nvidia/cuda:12.6.0-devel-ubuntu24.04 - -ENV DEBIAN_FRONTEND=noninteractive - -# Update package lists and install system dependencies -RUN apt-get update && apt-get install -y \ - curl \ - git \ - wget \ - clang-format \ - clangd-19 \ - vim \ - zsh \ - && rm -rf /var/lib/apt/lists/* - -# Create a non-root user -ARG USERNAME=devuser -ARG USER_UID=1003 -ARG USER_GID=$USER_UID - -# Create the user -RUN groupadd --gid $USER_GID $USERNAME \ - && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \ - # [Optional] Add sudo support - && apt-get update \ - && apt-get install -y sudo \ - && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ - && chmod 0440 /etc/sudoers.d/$USERNAME \ - && rm -rf /var/lib/apt/lists/* - -# Remove default 'ubuntu' user (UID 1000) to prevent devcontainer permission conflicts -# Ref: https://github.com/rapidsai/devcontainers/pull/373 -RUN if grep ubuntu:x:1000:1000 /etc/passwd >/dev/null; then userdel -f -r ubuntu; fi - -# Switch to non-root user -USER $USERNAME -WORKDIR /home/$USERNAME - -# Install python -COPY docker/install/install_python.sh /install/install_python.sh -RUN bash /install/install_python.sh /home/$USERNAME/conda py312 - -RUN echo "source activate py312" >> ~/.bashrc -ENV PATH="/home/$USERNAME/conda/bin:$PATH" -ENV PATH="/home/$USERNAME/conda/envs/py312/bin:$PATH" - -# Install torch and other python packages -COPY requirements.txt /install/requirements.txt -COPY docker/install/install_python_packages.sh /install/install_python_packages.sh -RUN bash /install/install_python_packages.sh cu126 && pip3 install pre-commit - -# Install mpi4py in the conda environment -RUN conda install -n py312 -y mpi4py mpich - -# Install oh-my-zsh -RUN sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" "" --unattended - -# Install zsh-autosuggestions -RUN git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions - -# Configure zsh -RUN sed -i 's/ZSH_THEME="robbyrussell"/ZSH_THEME="fino-time"/' ~/.zshrc && \ - sed -i 's/plugins=(git)/plugins=(git zsh-autosuggestions)/' ~/.zshrc - -# clangd -ENV PATH="/usr/lib/llvm-19/bin:$PATH" - -# Triton -ENV TRITON_PTXAS_PATH="/usr/local/cuda/bin/ptxas" - -# Set zsh as default shell -ENV SHELL=/bin/zsh -CMD [ "zsh" ] diff --git a/docker/Dockerfile.cu128 b/docker/Dockerfile.cu128 deleted file mode 100644 index b43ecfbcda..0000000000 --- a/docker/Dockerfile.cu128 +++ /dev/null @@ -1,37 +0,0 @@ -FROM nvidia/cuda:12.8.0-devel-ubuntu24.04 - -ENV DEBIAN_FRONTEND=noninteractive - -# Update package lists and install system dependencies -RUN apt-get update && apt-get install -y \ - curl \ - git \ - wget - -# Install python -COPY docker/install/install_python.sh /install/install_python.sh -RUN bash /install/install_python.sh /opt/conda py312 - -# Set home directory -WORKDIR /workspace - -RUN echo "source activate py312" >> ~/.bashrc -ENV PATH="/opt/conda/bin:$PATH" -ENV PATH="/opt/conda/envs/py312/bin:$PATH" - -# Ensure pip-installed nvidia-cublas takes precedence over system libraries -ENV LD_LIBRARY_PATH="/opt/conda/envs/py312/lib/python3.12/site-packages/nvidia/cublas/lib/:$LD_LIBRARY_PATH" - -# Install torch and other python packages -COPY requirements.txt /install/requirements.txt -COPY docker/install/install_python_packages.sh /install/install_python_packages.sh -RUN bash /install/install_python_packages.sh cu128 - -# Install mpi4py in the conda environment -RUN conda install -n py312 -y mpi4py mpich - -# Configure pip for user-site installations (allows arbitrary users to install packages) -# This enables 'pip install --user' and 'pip install -e .' to work for any user -RUN mkdir -p /opt/pip-user && chmod 1777 /opt/pip-user -ENV PYTHONUSERBASE=/opt/pip-user -ENV PATH="/opt/pip-user/bin:$PATH" diff --git a/docker/Dockerfile.cu128.dev b/docker/Dockerfile.cu128.dev deleted file mode 100644 index 4fb71d9eeb..0000000000 --- a/docker/Dockerfile.cu128.dev +++ /dev/null @@ -1,73 +0,0 @@ -FROM nvidia/cuda:12.8.0-devel-ubuntu24.04 - -ENV DEBIAN_FRONTEND=noninteractive - -# Update package lists and install system dependencies -RUN apt-get update && apt-get install -y \ - curl \ - git \ - wget \ - clang-format \ - clangd-19 \ - vim \ - zsh \ - && rm -rf /var/lib/apt/lists/* - -# Create a non-root user -ARG USERNAME=devuser -ARG USER_UID=1003 -ARG USER_GID=$USER_UID - -# Create the user -RUN groupadd --gid $USER_GID $USERNAME \ - && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \ - # [Optional] Add sudo support - && apt-get update \ - && apt-get install -y sudo \ - && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ - && chmod 0440 /etc/sudoers.d/$USERNAME \ - && rm -rf /var/lib/apt/lists/* - -# Remove default 'ubuntu' user (UID 1000) to prevent devcontainer permission conflicts -# Ref: https://github.com/rapidsai/devcontainers/pull/373 -RUN if grep ubuntu:x:1000:1000 /etc/passwd >/dev/null; then userdel -f -r ubuntu; fi - -# Switch to non-root user -USER $USERNAME -WORKDIR /home/$USERNAME - -# Install python -COPY docker/install/install_python.sh /install/install_python.sh -RUN bash /install/install_python.sh /home/$USERNAME/conda py312 - -RUN echo "source activate py312" >> ~/.bashrc -ENV PATH="/home/$USERNAME/conda/bin:$PATH" -ENV PATH="/home/$USERNAME/conda/envs/py312/bin:$PATH" - -# Install torch and other python packages -COPY requirements.txt /install/requirements.txt -COPY docker/install/install_python_packages.sh /install/install_python_packages.sh -RUN bash /install/install_python_packages.sh cu128 && pip3 install pre-commit - -# Install mpi4py in the conda environment -RUN conda install -n py312 -y mpi4py mpich - -# Install oh-my-zsh -RUN sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" "" --unattended - -# Install zsh-autosuggestions -RUN git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions - -# Configure zsh -RUN sed -i 's/ZSH_THEME="robbyrussell"/ZSH_THEME="fino-time"/' ~/.zshrc && \ - sed -i 's/plugins=(git)/plugins=(git zsh-autosuggestions)/' ~/.zshrc - -# clangd -ENV PATH="/usr/lib/llvm-19/bin:$PATH" - -# Triton -ENV TRITON_PTXAS_PATH="/usr/local/cuda/bin/ptxas" - -# Set zsh as default shell -ENV SHELL=/bin/zsh -CMD [ "zsh" ] diff --git a/docker/Dockerfile.cu129 b/docker/Dockerfile.cu129 deleted file mode 100644 index e5607416ff..0000000000 --- a/docker/Dockerfile.cu129 +++ /dev/null @@ -1,40 +0,0 @@ -FROM nvidia/cuda:12.9.0-devel-ubuntu24.04 - -ENV DEBIAN_FRONTEND=noninteractive - -# Update package lists and install system dependencies -RUN apt-get update && apt-get install -y \ - curl \ - git \ - wget - -# Install python -COPY docker/install/install_python.sh /install/install_python.sh -RUN bash /install/install_python.sh /opt/conda py312 - -# Set home directory -WORKDIR /workspace - -RUN echo "source activate py312" >> ~/.bashrc -ENV PATH="/opt/conda/bin:$PATH" -ENV PATH="/opt/conda/envs/py312/bin:$PATH" - -# Ensure pip-installed nvidia-cublas takes precedence over system libraries -ENV LD_LIBRARY_PATH="/opt/conda/envs/py312/lib/python3.12/site-packages/nvidia/cublas/lib/:$LD_LIBRARY_PATH" - -# Triton -ENV TRITON_PTXAS_PATH="/usr/local/cuda/bin/ptxas" - -# Install torch and other python packages -COPY requirements.txt /install/requirements.txt -COPY docker/install/install_python_packages.sh /install/install_python_packages.sh -RUN bash /install/install_python_packages.sh cu129 - -# Install mpi4py in the conda environment -RUN conda install -n py312 -y mpi4py mpich - -# Configure pip for user-site installations (allows arbitrary users to install packages) -# This enables 'pip install --user' and 'pip install -e .' to work for any user -RUN mkdir -p /opt/pip-user && chmod 1777 /opt/pip-user -ENV PYTHONUSERBASE=/opt/pip-user -ENV PATH="/opt/pip-user/bin:$PATH" diff --git a/docker/Dockerfile.cu129.dev b/docker/Dockerfile.cu129.dev deleted file mode 100644 index bfba3d95e6..0000000000 --- a/docker/Dockerfile.cu129.dev +++ /dev/null @@ -1,73 +0,0 @@ -FROM nvidia/cuda:12.9.0-devel-ubuntu24.04 - -ENV DEBIAN_FRONTEND=noninteractive - -# Update package lists and install system dependencies -RUN apt-get update && apt-get install -y \ - curl \ - git \ - wget \ - clang-format \ - clangd-19 \ - vim \ - zsh \ - && rm -rf /var/lib/apt/lists/* - -# Create a non-root user -ARG USERNAME=devuser -ARG USER_UID=1003 -ARG USER_GID=$USER_UID - -# Create the user -RUN groupadd --gid $USER_GID $USERNAME \ - && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \ - # [Optional] Add sudo support - && apt-get update \ - && apt-get install -y sudo \ - && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ - && chmod 0440 /etc/sudoers.d/$USERNAME \ - && rm -rf /var/lib/apt/lists/* - -# Remove default 'ubuntu' user (UID 1000) to prevent devcontainer permission conflicts -# Ref: https://github.com/rapidsai/devcontainers/pull/373 -RUN if grep ubuntu:x:1000:1000 /etc/passwd >/dev/null; then userdel -f -r ubuntu; fi - -# Switch to non-root user -USER $USERNAME -WORKDIR /home/$USERNAME - -# Install python -COPY docker/install/install_python.sh /install/install_python.sh -RUN bash /install/install_python.sh /home/$USERNAME/conda py312 - -RUN echo "source activate py312" >> ~/.bashrc -ENV PATH="/home/$USERNAME/conda/bin:$PATH" -ENV PATH="/home/$USERNAME/conda/envs/py312/bin:$PATH" - -# Install torch and other python packages -COPY requirements.txt /install/requirements.txt -COPY docker/install/install_python_packages.sh /install/install_python_packages.sh -RUN bash /install/install_python_packages.sh cu129 && pip3 install pre-commit - -# Install mpi4py in the conda environment -RUN conda install -n py312 -y mpi4py mpich - -# Install oh-my-zsh -RUN sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" "" --unattended - -# Install zsh-autosuggestions -RUN git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions - -# Configure zsh -RUN sed -i 's/ZSH_THEME="robbyrussell"/ZSH_THEME="fino-time"/' ~/.zshrc && \ - sed -i 's/plugins=(git)/plugins=(git zsh-autosuggestions)/' ~/.zshrc - -# clangd -ENV PATH="/usr/lib/llvm-19/bin:$PATH" - -# Triton -ENV TRITON_PTXAS_PATH="/usr/local/cuda/bin/ptxas" - -# Set zsh as default shell -ENV SHELL=/bin/zsh -CMD [ "zsh" ] diff --git a/docker/Dockerfile.cu130 b/docker/Dockerfile.cu130 deleted file mode 100644 index fb68419e5a..0000000000 --- a/docker/Dockerfile.cu130 +++ /dev/null @@ -1,40 +0,0 @@ -FROM nvidia/cuda:13.0.1-devel-ubuntu24.04 - -ENV DEBIAN_FRONTEND=noninteractive - -# Update package lists and install system dependencies -RUN apt-get update && apt-get install -y \ - curl \ - git \ - wget - -# Install python -COPY docker/install/install_python.sh /install/install_python.sh -RUN bash /install/install_python.sh /opt/conda py312 - -# Set home directory -WORKDIR /workspace - -RUN echo "source activate py312" >> ~/.bashrc -ENV PATH="/opt/conda/bin:$PATH" -ENV PATH="/opt/conda/envs/py312/bin:$PATH" - -# Set LD_LIBRARY_PATH to ensure pip-installed nvidia-cublas takes precedence over system libraries -ENV LD_LIBRARY_PATH="/opt/conda/envs/py312/lib/python3.12/site-packages/nvidia/cu13/lib/:$LD_LIBRARY_PATH" - -# Triton -ENV TRITON_PTXAS_PATH="/usr/local/cuda/bin/ptxas" - -# Install torch and other python packages -COPY requirements.txt /install/requirements.txt -COPY docker/install/install_python_packages.sh /install/install_python_packages.sh -RUN bash /install/install_python_packages.sh cu130 - -# Install mpi4py in the conda environment -RUN conda install -n py312 -y mpi4py mpich - -# Configure pip for user-site installations (allows arbitrary users to install packages) -# This enables 'pip install --user' and 'pip install -e .' to work for any user -RUN mkdir -p /opt/pip-user && chmod 1777 /opt/pip-user -ENV PYTHONUSERBASE=/opt/pip-user -ENV PATH="/opt/pip-user/bin:$PATH" diff --git a/docker/Dockerfile.cu130.dev b/docker/Dockerfile.cu130.dev deleted file mode 100644 index 56762e242b..0000000000 --- a/docker/Dockerfile.cu130.dev +++ /dev/null @@ -1,73 +0,0 @@ -FROM nvidia/cuda:13.0.0-devel-ubuntu24.04 - -ENV DEBIAN_FRONTEND=noninteractive - -# Update package lists and install system dependencies -RUN apt-get update && apt-get install -y \ - curl \ - git \ - wget \ - clang-format \ - clangd-19 \ - vim \ - zsh \ - && rm -rf /var/lib/apt/lists/* - -# Create a non-root user -ARG USERNAME=devuser -ARG USER_UID=1003 -ARG USER_GID=$USER_UID - -# Create the user -RUN groupadd --gid $USER_GID $USERNAME \ - && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \ - # [Optional] Add sudo support - && apt-get update \ - && apt-get install -y sudo \ - && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ - && chmod 0440 /etc/sudoers.d/$USERNAME \ - && rm -rf /var/lib/apt/lists/* - -# Remove default 'ubuntu' user (UID 1000) to prevent devcontainer permission conflicts -# Ref: https://github.com/rapidsai/devcontainers/pull/373 -RUN if grep ubuntu:x:1000:1000 /etc/passwd >/dev/null; then userdel -f -r ubuntu; fi - -# Switch to non-root user -USER $USERNAME -WORKDIR /home/$USERNAME - -# Install python -COPY docker/install/install_python.sh /install/install_python.sh -RUN bash /install/install_python.sh /home/$USERNAME/conda py312 - -RUN echo "source activate py312" >> ~/.bashrc -ENV PATH="/home/$USERNAME/conda/bin:$PATH" -ENV PATH="/home/$USERNAME/conda/envs/py312/bin:$PATH" - -# Install torch and other python packages -COPY requirements.txt /install/requirements.txt -COPY docker/install/install_python_packages.sh /install/install_python_packages.sh -RUN bash /install/install_python_packages.sh cu130 && pip3 install pre-commit - -# Install mpi4py in the conda environment -RUN conda install -n py312 -y mpi4py mpich - -# Install oh-my-zsh -RUN sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" "" --unattended - -# Install zsh-autosuggestions -RUN git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions - -# Configure zsh -RUN sed -i 's/ZSH_THEME="robbyrussell"/ZSH_THEME="fino-time"/' ~/.zshrc && \ - sed -i 's/plugins=(git)/plugins=(git zsh-autosuggestions)/' ~/.zshrc - -# clangd -ENV PATH="/usr/lib/llvm-19/bin:$PATH" - -# Triton -ENV TRITON_PTXAS_PATH="/usr/local/cuda/bin/ptxas" - -# Set zsh as default shell -ENV SHELL=/bin/zsh -CMD [ "zsh" ] diff --git a/docker/Dockerfile.cu131 b/docker/Dockerfile.cu131 deleted file mode 100644 index 7b18404d81..0000000000 --- a/docker/Dockerfile.cu131 +++ /dev/null @@ -1,44 +0,0 @@ -FROM nvidia/cuda:13.1.1-cudnn-devel-ubuntu24.04 - -ENV DEBIAN_FRONTEND=noninteractive - -# Update package lists and install system dependencies -RUN apt-get update && apt-get install -y \ - curl \ - git \ - wget - -# Install python -COPY docker/install/install_python.sh /install/install_python.sh -RUN bash /install/install_python.sh /opt/conda py312 - -# Set home directory -WORKDIR /workspace - -RUN echo "source activate py312" >> ~/.bashrc -ENV PATH="/opt/conda/bin:$PATH" -ENV PATH="/opt/conda/envs/py312/bin:$PATH" - -# Set LD_LIBRARY_PATH to ensure pip-installed nvidia-cublas takes precedence over system libraries -ENV LD_LIBRARY_PATH="/opt/conda/envs/py312/lib/python3.12/site-packages/nvidia/cu13/lib/:$LD_LIBRARY_PATH" - -# Triton -ENV TRITON_PTXAS_PATH="/usr/local/cuda/bin/ptxas" - -# Install torch and other python packages -# TODO: update cu130 -> cu131 when PyTorch starts publishing cu131 wheels -COPY requirements.txt /install/requirements.txt -COPY docker/install/install_python_packages.sh /install/install_python_packages.sh -RUN bash /install/install_python_packages.sh cu130 - -# Install tilelang and cuda-tile -RUN pip install tilelang cuda-tile - -# Install mpi4py in the conda environment -RUN conda install -n py312 -y mpi4py mpich - -# Configure pip for user-site installations (allows arbitrary users to install packages) -# This enables 'pip install --user' and 'pip install -e .' to work for any user -RUN mkdir -p /opt/pip-user && chmod 1777 /opt/pip-user -ENV PYTHONUSERBASE=/opt/pip-user -ENV PATH="/opt/pip-user/bin:$PATH"