build(deps): changes needed to support mamba/jamba model (#400)

anhuong · Ssukriti · aluu317 · web-flow · commit c963595e696f · 2025-03-18T14:56:15.000-06:00
* enable mamba deps

Signed-off-by: Sukriti-Sharma4 &lt;sukriti.sharma4@ibm.com&gt;

* fix deps

Signed-off-by: Sukriti-Sharma4 &lt;sukriti.sharma4@ibm.com&gt;

* try dockerfile

Signed-off-by: Sukriti-Sharma4 &lt;sukriti.sharma4@ibm.com&gt;

* trial 2 dockerfile

Signed-off-by: Sukriti-Sharma4 &lt;sukriti.sharma4@ibm.com&gt;

* dockerfile trial 3

Signed-off-by: Sukriti-Sharma4 &lt;sukriti.sharma4@ibm.com&gt;

* install cudnn9-cuda-12 and mamba separately

Signed-off-by: Anh Uong &lt;anh.uong@ibm.com&gt;

* deps: update transformers fork to 4.46

Signed-off-by: Anh Uong &lt;anh.uong@ibm.com&gt;

* update to latest transformers as PR merged

Signed-off-by: Sukriti-Sharma4 &lt;sukriti.sharma4@ibm.com&gt;

* clean up Dockerfile

Signed-off-by: Sukriti-Sharma4 &lt;sukriti.sharma4@ibm.com&gt;

* test commit

Signed-off-by: Sukriti-Sharma4 &lt;sukriti.sharma4@ibm.com&gt;

* install cudnn

Signed-off-by: Sukriti-Sharma4 &lt;sukriti.sharma4@ibm.com&gt;

* test

Signed-off-by: Sukriti-Sharma4 &lt;sukriti.sharma4@ibm.com&gt;

* remove extra dep

Signed-off-by: Sukriti-Sharma4 &lt;sukriti.sharma4@ibm.com&gt;

* version cleanup

Signed-off-by: Sukriti-Sharma4 &lt;sukriti.sharma4@ibm.com&gt;

* test

Signed-off-by: Sukriti-Sharma4 &lt;sukriti.sharma4@ibm.com&gt;

* Try Joe's freeup-disk-space runner

Signed-off-by: Angel Luu &lt;angel.luu@us.ibm.com&gt;

* test try for images to pass

Signed-off-by: Sukriti-Sharma4 &lt;sukriti.sharma4@ibm.com&gt;

* Remove versioning for libcusparselt

Signed-off-by: Angel Luu &lt;angel.luu@us.ibm.com&gt;

---------

Signed-off-by: Sukriti-Sharma4 &lt;sukriti.sharma4@ibm.com&gt;
Signed-off-by: Anh Uong &lt;anh.uong@ibm.com&gt;
Signed-off-by: Angel Luu &lt;angel.luu@us.ibm.com&gt;
Co-authored-by: Sukriti-Sharma4 &lt;sukriti.sharma4@ibm.com&gt;
Co-authored-by: Angel Luu &lt;angel.luu@us.ibm.com&gt;
Co-authored-by: Sukriti Sharma &lt;Ssukriti@users.noreply.github.com&gt;
diff --git a/build/Dockerfile b/build/Dockerfile
@@ -88,7 +88,8 @@ ENV NV_CUDA_CUDART_DEV_VERSION=12.1.55-1 \
     NV_NVML_DEV_VERSION=12.1.55-1 \
     NV_LIBCUBLAS_DEV_VERSION=12.1.0.26-1 \
     NV_LIBNPP_DEV_VERSION=12.0.2.50-1 \
-    NV_LIBNCCL_DEV_PACKAGE_VERSION=2.18.3-1+cuda12.1
+    NV_LIBNCCL_DEV_PACKAGE_VERSION=2.18.3-1+cuda12.1 \
+    NV_CUDNN9_CUDA_VERSION=9.6.0.74-1
 
 RUN dnf config-manager \
        --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \
@@ -103,6 +104,15 @@ RUN dnf config-manager \
         libnccl-devel-${NV_LIBNCCL_DEV_PACKAGE_VERSION} \
     && dnf clean all
 
+# opening connection for too long in one go was resulting in timeouts
+RUN dnf config-manager \
+       --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \
+    && dnf clean packages \
+    && dnf install -y \
+        libcusparselt0 libcusparselt-devel \
+        cudnn9-cuda-12-6-${NV_CUDNN9_CUDA_VERSION} \
+    && dnf clean all
+
 ENV LIBRARY_PATH="$CUDA_HOME/lib64/stubs"
 
 FROM cuda-devel AS python-installations
@@ -138,7 +148,8 @@ RUN if [[ -z "${WHEEL_VERSION}" ]]; \
 RUN --mount=type=cache,target=/home/${USER}/.cache/pip,uid=${USER_UID} \
     python -m pip install --user wheel && \
     python -m pip install --user "$(head bdist_name)" && \
-    python -m pip install --user "$(head bdist_name)[flash-attn]"
+    python -m pip install --user "$(head bdist_name)[flash-attn]" && \
+    python -m pip install --user "$(head bdist_name)[mamba]"
 
 # fms_acceleration_peft = PEFT-training, e.g., 4bit QLoRA
 # fms_acceleration_foak = Fused LoRA and triton kernels
diff --git a/pyproject.toml b/pyproject.toml
@@ -48,6 +48,7 @@ aim = ["aim>=3.19.0,<4.0"]
 mlflow = ["mlflow"]
 fms-accel = ["fms-acceleration>=0.6"]
 gptq-dev = ["auto_gptq>0.4.2", "optimum>=1.15.0"]
+mamba = ["mamba_ssm[causal-conv1d] @ git+https://github.com/state-spaces/mamba.git"]
 scanner-dev = ["HFResourceScanner>=0.1.0"]