ai-dynamo · Alexey-Rivkin · Nov 5, 2025 · Nov 6, 2025 · Nov 9, 2025 · Nov 9, 2025
@@ -13,7 +13,7 @@
 #   docker run --gpus all --privileged -it nixl-gpu-test
 #
 # Build arguments:
-#   BASE_IMAGE: Base NVIDIA cuda-dl-base image (default: nvcr.io/nvidia/cuda-dl-base:25.06-cuda12.9-devel-ubuntu24.04)
+#   BASE_IMAGE: Base NVIDIA cuda-dl-base image (default: nvcr.io/nvidia/cuda-dl-base:25.10-cuda13.0-devel-ubuntu24.04)
 #   _UID: User ID for the non-root user (default: 148069)
 #   _GID: Group ID for the user (default: 30)
 #   _LOGIN: Username (default: svc-nixl)
@@ -22,7 +22,7 @@
 #   WORKSPACE: Workspace directory path
 #
 
-ARG BASE_IMAGE=nvcr.io/nvidia/cuda-dl-base:25.06-cuda12.9-devel-ubuntu24.04
+ARG BASE_IMAGE=nvcr.io/nvidia/cuda-dl-base:25.10-cuda13.0-devel-ubuntu24.04
 
 FROM ${BASE_IMAGE}
 

@@ -137,7 +137,7 @@ sudo nvidia-ctk runtime configure --runtime=docker
 sudo systemctl restart docker
 ```
 
-Verify GPU access in containers using `docker run --gpus all nvcr.io/nvidia/cuda-dl-base:25.06-cuda12.9-devel-ubuntu24.04 nvidia-smi`[^1_3].
+Verify GPU access in containers using `docker run --gpus all nvcr.io/nvidia/cuda-dl-base:25.10-cuda13.0-devel-ubuntu24.04 nvidia-smi`[^1_3].
 
 ### 9. **Validation and Troubleshooting**
 

@@ -6,7 +6,7 @@
 # Key Components:
 # - Job Configuration: Defines timeout, failure behavior, and Kubernetes resources
 # - Docker Images: Specifies the container images used for different build stages
-#   - cuda-dl-base images (25.06 for Ubuntu 24.04, 24.10 for Ubuntu 22.04) for building and testing
+#   - cuda-dl-base images (25.10 for Ubuntu 24.04, 13.0.1 for Ubuntu 22.04) for building and testing
 #   - Podman image for container builds
 # - Matrix Axes: Defines build variations (currently x86_64 architecture)
 # - Build Steps: Sequential steps for building, testing, and container creation
@@ -34,8 +34,9 @@ kubernetes:
   requests: "{memory: 8Gi, cpu: 8000m}"
 
 runs_on_dockers:
-  - { name: "ubuntu24.04-cuda-dl-base", url: "nvcr.io/nvidia/cuda-dl-base:25.06-cuda12.9-devel-ubuntu24.04" }
-  - { name: "ubuntu22.04-cuda-dl-base", url: "nvcr.io/nvidia/cuda-dl-base:24.10-cuda12.6-devel-ubuntu22.04" }
+  - { name: "ubuntu24.04-cuda12-dl-base", url: "nvcr.io/nvidia/cuda-dl-base:25.06-cuda12.9-devel-ubuntu24.04" }
+  - { name: "ubuntu24.04-cuda13-dl-base", url: "nvcr.io/nvidia/cuda-dl-base:25.10-cuda13.0-devel-ubuntu24.04" }
+  - { name: "ubuntu22.04-cuda-dl-base", url: "nvidia/cuda:13.0.1-devel-ubuntu22.04" }
   - { name: "podman-v5.0.2", url: "quay.io/podman/stable:v5.0.2", category: 'tool', privileged: true }
 
 matrix:

@@ -30,6 +30,7 @@ runs_on_agents:
 matrix:
   axes:
     image:
+      - nvcr.io/nvidia/cuda-dl-base:25.10-cuda13.0-devel-ubuntu24.04
       - nvcr.io/nvidia/cuda-dl-base:25.06-cuda12.9-devel-ubuntu24.04
     arch:
       - x86_64

@@ -280,7 +280,7 @@
           description: "Base Docker image for the container build"
       - string:
           name: "BASE_IMAGE_TAG"
-          default: "25.06-cuda12.9-devel-ubuntu24.04"
+          default: "25.10-cuda13.0-devel-ubuntu24.04"
           description: "Tag for the base Docker image"
       - string:
           name: "TAG_SUFFIX"
@@ -294,7 +294,7 @@
           description: >
             Update the latest tag for this architecture.<br/>
             When enabled, also creates: <code>&lt;base-image-tag&gt;-&lt;arch&gt;-latest</code><br/>
-            Example: <code>25.06-cuda12.9-devel-ubuntu24.04-aarch64-latest</code><br/>
+            Example: <code>25.10-cuda13.0-devel-ubuntu24.04-aarch64-latest</code><br/>
       - string:
           name: "MAIL_TO"
           default: "[email protected]"

@@ -36,7 +36,7 @@ which cargo
 cargo --version
 
 export LD_LIBRARY_PATH=${INSTALL_DIR}/lib:${INSTALL_DIR}/lib/$ARCH-linux-gnu:${INSTALL_DIR}/lib/$ARCH-linux-gnu/plugins:/usr/local/lib:${INSTALL_DIR}/lib64:$LD_LIBRARY_PATH
-export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/local/cuda/lib64:/usr/local/cuda-12.8/compat:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/local/cuda/lib64:/usr/local/cuda-13.0/compat:$LD_LIBRARY_PATH
 export LD_LIBRARY_PATH=/usr/local/cuda/compat/lib.real:$LD_LIBRARY_PATH
 export CPATH=${INSTALL_DIR}/include:$CPATH
 export PATH=${INSTALL_DIR}/bin:$PATH

@@ -65,7 +65,7 @@ A comprehensive benchmarking tool for the NVIDIA Inference Xfer Library (NIXL) t
 - **Operating System**: Ubuntu 22.04/24.04 LTS (recommended) or RHEL-based
 - **Docker**: Version 20.10+ (for container builds)
 - **Git**: For source code management
-- **CUDA Toolkit**: 12.8+ (for GPU features)
+- **CUDA Toolkit**: 13.0+ (for GPU features)
 - **Python**: 3.12+ (for benchmark utilities)
 
 ## Quick Start
@@ -172,7 +172,7 @@ cd nixl/benchmark/nixlbench/contrib
 | `--ucx <path>` | Path to custom UCX source (optional) | Uses base image UCX |
 | `--build-type <type>` | Build type: `debug` or `release` | `release` |
 | `--base-image <image>` | Base Docker image | `nvcr.io/nvidia/cuda-dl-base` |
-| `--base-image-tag <tag>` | Base image tag | `25.06-cuda12.9-devel-ubuntu24.04` |
+| `--base-image-tag <tag>` | Base image tag | `25.10-cuda13.0-devel-ubuntu24.04` |
 | `--arch <arch>` | Target architecture: `x86_64` or `aarch64` | Auto-detected |
 | `--python-versions <versions>` | Python versions (comma-separated) | `3.12` |
 | `--tag <tag>` | Custom Docker image tag | Auto-generated |
@@ -187,7 +187,7 @@ For development environments or when Docker is not available.
 **Required:**
 - **NIXL**: Core communication library
 - **UCX**: Unified Communication X library
-- **CUDA**: NVIDIA CUDA Toolkit (≥12.8)
+- **CUDA**: NVIDIA CUDA Toolkit (≥13.0)
 - **CMake**: Build system (≥3.20)
 - **Meson**: Build system for NIXL/NIXLBench
 - **Ninja**: Build backend
@@ -234,9 +234,9 @@ sudo apt-get reinstall -y --no-install-recommends \
 
 #### CUDA Toolkit Installation
 ```bash
-# Download and install CUDA 12.8
-wget https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_550.54.15_linux.run
-sudo sh cuda_12.8.0_550.54.15_linux.run
+# Download and install CUDA 13.0
+wget https://developer.download.nvidia.com/compute/cuda/13.0.2/local_installers/cuda_13.0.2_580.95.05_linux.run
+sudo sh cuda_13.0.2_580.95.05_linux.run
 
 # Set environment variables
 export PATH=/usr/local/cuda/bin:$PATH

@@ -14,7 +14,7 @@
 # limitations under the License.
 
 ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
-ARG BASE_IMAGE_TAG="25.06-cuda12.9-devel-ubuntu24.04"
+ARG BASE_IMAGE_TAG="25.10-cuda13.0-devel-ubuntu24.04"
 
 # UCX argument is either "upstream" (default installed in base image) or "custom" (build from source)
 ARG UCX="upstream"

@@ -35,7 +35,7 @@ if [ -z ${latest_tag} ]; then
 fi
 
 BASE_IMAGE=nvcr.io/nvidia/cuda-dl-base
-BASE_IMAGE_TAG=25.06-cuda12.9-devel-ubuntu24.04
+BASE_IMAGE_TAG=25.10-cuda13.0-devel-ubuntu24.04
 ARCH=$(uname -m)
 [ "$ARCH" = "arm64" ] && ARCH="aarch64"
 WHL_BASE=manylinux_2_39

@@ -14,7 +14,7 @@
 # limitations under the License.
 
 ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
-ARG BASE_IMAGE_TAG="25.06-cuda12.9-devel-ubuntu24.04"
+ARG BASE_IMAGE_TAG="25.10-cuda13.0-devel-ubuntu24.04"
 ARG OS
 
 FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG}

@@ -29,7 +29,7 @@ fi
 VERSION=v$latest_tag.dev.$commit_id
 
 BASE_IMAGE=nvcr.io/nvidia/cuda-dl-base
-BASE_IMAGE_TAG=25.06-cuda12.9-devel-ubuntu24.04
+BASE_IMAGE_TAG=25.10-cuda13.0-devel-ubuntu24.04
 ARCH=$(uname -m)
 [ "$ARCH" = "arm64" ] && ARCH="aarch64"
 WHL_BASE=manylinux_2_39
@@ -148,7 +148,8 @@ get_options() {
     done
 
     if [[ $OS == "ubuntu22" ]]; then
-        BASE_IMAGE_TAG=24.10-cuda12.6-devel-ubuntu22.04
+        BASE_IMAGE=nvidia/cuda
+        BASE_IMAGE_TAG=13.0.1-devel-ubuntu22.04
         WHL_BASE=${WHL_BASE:-manylinux_2_34}
     fi
 

@@ -50,7 +50,7 @@ getAvailableNetworkDevices() {
     hints->mode = FI_CONTEXT;
     hints->ep_attr->type = FI_EP_RDM;
 
-    int ret = fi_getinfo(FI_VERSION(1, 18), NULL, NULL, 0, hints, &info);
+    int ret = fi_getinfo(FI_VERSION(1, 21), NULL, NULL, 0, hints, &info);
     if (ret) {
         NIXL_ERROR << "fi_getinfo failed " << fi_strerror(-ret);
         fi_freeinfo(hints);

@@ -431,7 +431,7 @@ nixlLibfabricRail::nixlLibfabricRail(const std::string &device,
     hints->domain_attr->threading = FI_THREAD_SAFE;
     try {
         // Get fabric info for this specific device - first try with FI_HMEM
-        int ret = fi_getinfo(FI_VERSION(1, 18), NULL, NULL, 0, hints, &info);
+        int ret = fi_getinfo(FI_VERSION(1, 21), NULL, NULL, 0, hints, &info);
 
         // If no provider found with FI_HMEM, retry without it
         if (ret || !info) {
@@ -442,7 +442,7 @@ nixlLibfabricRail::nixlLibfabricRail(const std::string &device,
             hints->caps = FI_MSG | FI_RMA;
             hints->caps |= FI_LOCAL_COMM | FI_REMOTE_COMM;
 
-            ret = fi_getinfo(FI_VERSION(1, 18), NULL, NULL, 0, hints, &info);
+            ret = fi_getinfo(FI_VERSION(1, 21), NULL, NULL, 0, hints, &info);
             if (ret) {
                 NIXL_ERROR << "fi_getinfo failed for rail " << rail_id << ": " << fi_strerror(-ret);
                 throw std::runtime_error("fi_getinfo failed for rail " + std::to_string(rail_id));

@@ -381,7 +381,7 @@ nixlLibfabricTopology::buildPcieToLibfabricMapping() {
     // This ensures consistency between device discovery and PCIe mapping
     hints->fabric_attr->prov_name = strdup(provider_name.c_str());
 
-    int ret = fi_getinfo(FI_VERSION(1, 18), NULL, NULL, 0, hints, &info);
+    int ret = fi_getinfo(FI_VERSION(1, 21), NULL, NULL, 0, hints, &info);
     if (ret) {
         NIXL_ERROR << "fi_getinfo failed for PCIe mapping with provider " << provider_name << ": "
                    << fi_strerror(-ret);
-Original file line number
+Diff line change
@@ Expand Up / @@ -30,6 +30,7 @@ runs_on_agents: @@
     matrix:
       axes:
         image:
+          - nvcr.io/nvidia/cuda-dl-base:25.10-cuda13.0-devel-ubuntu24.04
           - nvcr.io/nvidia/cuda-dl-base:25.06-cuda12.9-devel-ubuntu24.04
         arch:
           - x86_64
@@ Expand Down @@