diff --git a/build/Dockerfile b/build/Dockerfile index c8aaab5fc..fbf66209c 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -88,7 +88,8 @@ ENV NV_CUDA_CUDART_DEV_VERSION=12.1.55-1 \ NV_NVML_DEV_VERSION=12.1.55-1 \ NV_LIBCUBLAS_DEV_VERSION=12.1.0.26-1 \ NV_LIBNPP_DEV_VERSION=12.0.2.50-1 \ - NV_LIBNCCL_DEV_PACKAGE_VERSION=2.18.3-1+cuda12.1 + NV_LIBNCCL_DEV_PACKAGE_VERSION=2.18.3-1+cuda12.1 \ + NV_CUDNN9_CUDA_VERSION=9.6.0.74-1 RUN dnf config-manager \ --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \ @@ -103,6 +104,15 @@ RUN dnf config-manager \ libnccl-devel-${NV_LIBNCCL_DEV_PACKAGE_VERSION} \ && dnf clean all +# opening connection for too long in one go was resulting in timeouts +RUN dnf config-manager \ + --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \ + && dnf clean packages \ + && dnf install -y \ + libcusparselt0 libcusparselt-devel \ + cudnn9-cuda-12-6-${NV_CUDNN9_CUDA_VERSION} \ + && dnf clean all + ENV LIBRARY_PATH="$CUDA_HOME/lib64/stubs" FROM cuda-devel AS python-installations @@ -138,7 +148,8 @@ RUN if [[ -z "${WHEEL_VERSION}" ]]; \ RUN --mount=type=cache,target=/home/${USER}/.cache/pip,uid=${USER_UID} \ python -m pip install --user wheel && \ python -m pip install --user "$(head bdist_name)" && \ - python -m pip install --user "$(head bdist_name)[flash-attn]" + python -m pip install --user "$(head bdist_name)[flash-attn]" && \ + python -m pip install --user "$(head bdist_name)[mamba]" # fms_acceleration_peft = PEFT-training, e.g., 4bit QLoRA # fms_acceleration_foak = Fused LoRA and triton kernels diff --git a/pyproject.toml b/pyproject.toml index bbb3c4d0c..fe0726494 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ aim = ["aim>=3.19.0,<4.0"] mlflow = ["mlflow"] fms-accel = ["fms-acceleration>=0.6"] gptq-dev = ["auto_gptq>0.4.2", "optimum>=1.15.0"] +mamba = ["mamba_ssm[causal-conv1d] @ git+https://github.com/state-spaces/mamba.git"] scanner-dev = ["HFResourceScanner>=0.1.0"]