[Misc] Updating default LMCache docker image for remote KV cache sharing (#384)

YuhanLiu11 · web-flow · commit a9cfe50e5381 · 2025-04-28T10:56:18.000-05:00
* Update helm chart template for newest vllm version

Signed-off-by: YuhanLiu11 &lt;yliu738@wisc.edu&gt;

* Update shared storage yaml file

Signed-off-by: YuhanLiu11 &lt;yliu738@wisc.edu&gt;

---------

Signed-off-by: YuhanLiu11 &lt;yliu738@wisc.edu&gt;
diff --git a/helm/templates/deployment-vllm-multi.yaml b/helm/templates/deployment-vllm-multi.yaml
@@ -77,8 +77,11 @@ spec:
           {{- end }}
           {{- with $modelSpec.vllmConfig }}
           {{-   if hasKey . "enableChunkedPrefill" }}
+          {{-     if .enableChunkedPrefill }}
           - "--enable-chunked-prefill"
-          - {{ .enableChunkedPrefill | quote }}
+          {{-     else }}
+          - "--no-enable-chunked-prefill"
+          {{-     end }}
           {{-   end }}
           {{-   if .enablePrefixCaching }}
           - "--enable-prefix-caching"
diff --git a/tutorials/assets/values-06-shared-storage.yaml b/tutorials/assets/values-06-shared-storage.yaml
@@ -3,7 +3,7 @@ servingEngineSpec:
   modelSpec:
   - name: "mistral"
     repository: "lmcache/vllm-openai"
-    tag: "latest"
+    tag: "2025-04-18"
     modelURL: "mistralai/Mistral-7B-Instruct-v0.2"
     replicaCount: 2
     requestCPU: 10
@@ -14,11 +14,14 @@ servingEngineSpec:
       enableChunkedPrefill: false
       enablePrefixCaching: false
       maxModelLen: 16384
+      v1: 0
 
     lmcacheConfig:
       enabled: true
       cpuOffloadingBufferSize: "20"
-
+    env:
+      - name: LMCACHE_LOG_LEVEL
+        value: "DEBUG"
     hf_token: <YOUR HF TOKEN>
 
 cacheserverSpec:
@@ -36,7 +39,7 @@ cacheserverSpec:
 
   # -- Cache server image (reusing the vllm image)
   repository: "lmcache/vllm-openai"
-  tag: "latest"
+  tag: "2025-04-18"
 
   # TODO (Jiayi): please adjust this once we have evictor
   # -- router resource requests and limits