docs: update KVBM diagram and bump container image tags to 1.0.0 (#7365)

dagil-nvidia · web-flow · commit 46dad85b03f3 · 2026-03-13T18:08:10.000-05:00
Signed-off-by: Dan Gil &lt;dagil@nvidia.com&gt;
diff --git a/docs/assets/img/architecture.png b/docs/assets/img/architecture.png
diff --git a/docs/assets/img/kvbm-components.svg b/docs/assets/img/kvbm-components.svg
diff --git a/docs/backends/trtllm/README.md b/docs/backends/trtllm/README.md
@@ -44,7 +44,7 @@ docker compose -f deploy/docker-compose.yml up -d
 **Step 2 (host terminal):** Pull and run the prebuilt container:
 
 ```bash
-DYNAMO_VERSION=0.9.0
+DYNAMO_VERSION=1.0.0
 docker pull nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:$DYNAMO_VERSION
 docker run --gpus all -it --network host --ipc host \
   nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:$DYNAMO_VERSION
diff --git a/docs/backends/trtllm/multinode/trtllm-multinode-examples.md b/docs/backends/trtllm/multinode/trtllm-multinode-examples.md
@@ -80,7 +80,7 @@ following environment variables based:
 ```bash
 # NOTE: IMAGE must be set manually for now
 # Use the prebuilt container from NGC (see ../README.md#quick-start):
-#   export IMAGE="nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.9.0"
+#   export IMAGE="nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0"
 # Or build a custom one (see ../trtllm-building-custom-container.md)
 # Or you can also download the image to shared storage and point
 # IMAGE to the local path.
diff --git a/docs/benchmarks/kv-router-ab-testing.md b/docs/benchmarks/kv-router-ab-testing.md
@@ -121,7 +121,7 @@ spec:
       replicas: 1
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.9.0
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
           env:
             - name: POD_UID
               valueFrom:
@@ -146,7 +146,7 @@ spec:
                       values:
                         - gpu-h100-sxm  # Adjust to your GPU node type
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.9.0
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
           workingDir: /workspace
           command:
             - /bin/sh
@@ -212,7 +212,7 @@ spec:
       replicas: 1
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.9.0
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
           env:
             - name: POD_UID
               valueFrom:
@@ -240,7 +240,7 @@ spec:
                       values:
                         - gpu-h100-sxm  # Adjust to your GPU node type
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.9.0
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
           workingDir: /workspace
           command:
             - /bin/sh
@@ -438,7 +438,7 @@ spec:
       restartPolicy: Never
       containers:
       - name: benchmark
-        image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.9.0
+        image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
         securityContext:
           runAsUser: 0  # Required: apt-get and pip install need root in ephemeral benchmark pod
         command:
diff --git a/docs/components/profiler/README.md b/docs/components/profiler/README.md
@@ -37,7 +37,7 @@ metadata:
 spec:
   model: "Qwen/Qwen3-0.6B"
   backend: vllm
-  image: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.9.0"
+  image: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0"
 
   workload:
     isl: 3000      # Average input sequence length
diff --git a/docs/components/profiler/profiler-guide.md b/docs/components/profiler/profiler-guide.md
@@ -200,7 +200,7 @@ Each DGDR requires a container image for profiling and deployment:
 
 ```yaml
 spec:
-  image: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.9.0"
+  image: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0"
 ```
 
 #### Quick Start: Deploy with DGDR
@@ -371,7 +371,7 @@ metadata:
 spec:
   model: "Qwen/Qwen3-0.6B"
   backend: vllm
-  image: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.9.0"
+  image: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0"
 
   searchStrategy: rapid  # or thorough
   autoApply: true
diff --git a/docs/components/router/router-examples.md b/docs/components/router/router-examples.md
@@ -130,7 +130,7 @@ spec:
           value: "16"
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.0
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
 ```
 
 ### Alternative: Using Command Args in K8s
@@ -140,7 +140,7 @@ You can also pass CLI arguments directly in the container command:
 ```yaml
 extraPodSpec:
   mainContainer:
-    image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.0
+    image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
     command:
       - /bin/sh
       - -c
diff --git a/docs/features/disaggregated-serving/README.md b/docs/features/disaggregated-serving/README.md
@@ -75,7 +75,7 @@ aiconfigurator cli default \
   --tpot 25 \
   --backend vllm \
   --backend-version 0.12.0 \
-  --generator-dynamo-version 0.8.0 \
+  --generator-dynamo-version 1.0.0 \
   --generator-set K8sConfig.k8s_namespace=$YOUR_NAMESPACE \
   --generator-set K8sConfig.k8s_pvc_name=$YOUR_PVC \
   --save-dir ./results_vllm
@@ -272,7 +272,7 @@ spec:
           value: /opt/models
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
           imagePullPolicy: IfNotPresent
 
     VLLMWorker:
@@ -292,7 +292,7 @@ spec:
           value: /opt/models
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
           workingDir: /workspace
           imagePullPolicy: IfNotPresent
           command:
@@ -506,7 +506,7 @@ spec:
           value: /opt/models
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
           imagePullPolicy: IfNotPresent
 
     VLLMPrefillWorker:
@@ -533,7 +533,7 @@ spec:
           value: "0"
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
           workingDir: /workspace
           imagePullPolicy: IfNotPresent
           securityContext:
@@ -581,7 +581,7 @@ spec:
           value: "0"
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
           workingDir: /workspace
           imagePullPolicy: IfNotPresent
           securityContext:
diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md
@@ -20,13 +20,13 @@ Containers have all dependencies pre-installed. No setup required.
 
 ```bash
 # SGLang
-docker run --gpus all --network host --rm -it nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.1
+docker run --gpus all --network host --rm -it nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0
 
 # TensorRT-LLM
-docker run --gpus all --network host --rm -it nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1
+docker run --gpus all --network host --rm -it nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
 
 # vLLM
-docker run --gpus all --network host --rm -it nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.1
+docker run --gpus all --network host --rm -it nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
 ```
 
 <Tip>
diff --git a/docs/kubernetes/api-reference.md b/docs/kubernetes/api-reference.md
@@ -176,7 +176,7 @@ _Appears in:_
 | `namespace` _string_ | Namespace is the desired namespace for the created DynamoGraphDeployment.<br />If not specified, defaults to the DGDR namespace. |  | Optional: \{\} <br /> |
 | `labels` _object (keys:string, values:string)_ | Labels are additional labels to add to the DynamoGraphDeployment metadata.<br />These are merged with auto-generated labels from the profiling process. |  | Optional: \{\} <br /> |
 | `annotations` _object (keys:string, values:string)_ | Annotations are additional annotations to add to the DynamoGraphDeployment metadata. |  | Optional: \{\} <br /> |
-| `workersImage` _string_ | WorkersImage specifies the container image to use for DynamoGraphDeployment worker components.<br />This image is used for both temporary DGDs created during online profiling and the final DGD.<br />If omitted, the image from the base config file (e.g., disagg.yaml) is used.<br />Example: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.9.0" |  | Optional: \{\} <br /> |
+| `workersImage` _string_ | WorkersImage specifies the container image to use for DynamoGraphDeployment worker components.<br />This image is used for both temporary DGDs created during online profiling and the final DGD.<br />If omitted, the image from the base config file (e.g., disagg.yaml) is used.<br />Example: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0" |  | Optional: \{\} <br /> |
 
 
 #### DeploymentStatus
@@ -945,7 +945,7 @@ _Appears in:_
 | --- | --- | --- | --- |
 | `config` _[JSON](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#json-v1-apiextensions-k8s-io)_ | Config is the profiling configuration as arbitrary JSON/YAML. This will be passed directly to the profiler.<br />The profiler will validate the configuration and report any errors. |  | Optional: \{\} <br />Type: object <br /> |
 | `configMapRef` _[ConfigMapKeySelector](#configmapkeyselector)_ | ConfigMapRef is an optional reference to a ConfigMap containing the DynamoGraphDeployment<br />base config file (disagg.yaml). This is separate from the profiling config above.<br />The path to this config will be set as engine.config in the profiling config. |  | Optional: \{\} <br /> |
-| `profilerImage` _string_ | ProfilerImage specifies the container image to use for profiling jobs.<br />This image contains the profiler code and dependencies needed for SLA-based profiling.<br />Example: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.9.0" |  | Required: \{\} <br /> |
+| `profilerImage` _string_ | ProfilerImage specifies the container image to use for profiling jobs.<br />This image contains the profiler code and dependencies needed for SLA-based profiling.<br />Example: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0" |  | Required: \{\} <br /> |
 | `outputPVC` _string_ | OutputPVC is an optional PersistentVolumeClaim name for storing profiling output.<br />If specified, all profiling artifacts (logs, plots, configs, raw data) will be written<br />to this PVC instead of an ephemeral emptyDir volume. This allows users to access<br />complete profiling results after the job completes by mounting the PVC.<br />The PVC must exist in the same namespace as the DGDR.<br />If not specified, profiling uses emptyDir and only essential data is saved to ConfigMaps.<br />Note: ConfigMaps are still created regardless of this setting for planner integration. |  | Optional: \{\} <br /> |
 | `resources` _[ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#resourcerequirements-v1-core)_ | Resources specifies the compute resource requirements for the profiling job container.<br />If not specified, no resource requests or limits are set. |  | Optional: \{\} <br /> |
 | `tolerations` _[Toleration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#toleration-v1-core) array_ | Tolerations allows the profiling job to be scheduled on nodes with matching taints.<br />For example, to schedule on GPU nodes, add a toleration for the nvidia.com/gpu taint. |  | Optional: \{\} <br /> |
diff --git a/docs/kubernetes/snapshot.md b/docs/kubernetes/snapshot.md
@@ -291,7 +291,7 @@ checkpoint:
   identity:
     model: "meta-llama/Llama-3-8B"
     backendFramework: "vllm"
-    dynamoVersion: "0.9.0"
+    dynamoVersion: "1.0.0"
     tensorParallelSize: 1
     pipelineParallelSize: 1
     dtype: "bfloat16"
diff --git a/docs/reference/feature-matrix.md b/docs/reference/feature-matrix.md
@@ -6,7 +6,7 @@ title: Feature Matrix
 
 This document provides a comprehensive compatibility matrix for key Dynamo features across the supported backends.
 
-*Updated for Dynamo v0.9.0*
+*Updated for Dynamo v1.0.0*
 
 **Legend:**
 *   ✅ : Supported