ModelEngine-Group · ygwpz · Dec 4, 2025 · Dec 3, 2025
@@ -18,40 +18,3 @@ jobs:
 
   call-lint:
     uses: ./.github/workflows/pre-commit.yml
-
-  unit-test:
-    needs: call-lint
-    name: Run Unittests
-    runs-on: ubuntu-latest
-    steps:
-      - name: Free disk space
-        run: |
-          sudo rm -rf /usr/share/dotnet
-          sudo rm -rf /opt/ghc
-          sudo rm -rf "/usr/local/share/boost"
-          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
-          docker system prune -af
-          df -h
-
-      - name: Checkout unified-cache-management repo
-        uses: actions/checkout@v4
-
-      - name: Run unit test inside vLLM container
-        run: |
-          docker run --rm \
-            -e VLLM_USE_PRECOMPILED=1 \
-            -e PLATFORM=cuda \
-            -v ${{ github.workspace }}:/workspace/unified-cache-management \
-            -w /workspace/unified-cache-management \
-            --entrypoint /bin/bash \
-            vllm/vllm-openai:v0.9.2 \
-            -c "
-              set -euo pipefail
-              pip install -v -e . --no-build-isolation
-              cd \$(pip show vllm | grep Location | awk '{print \$2}') &&
-              git apply /workspace/unified-cache-management/ucm/integration/vllm/patch/0.9.2/vllm-adapt-pc.patch
-              git apply /workspace/unified-cache-management/ucm/integration/vllm/patch/0.9.2/vllm-adapt-aggre.patch
-              git apply /workspace/unified-cache-management/ucm/integration/vllm/patch/0.9.2/vllm-adapt-sparse.patch
-              cd /workspace/unified-cache-management
-              python3 -m unittest discover -s test
-            "
@@ -51,7 +51,7 @@ export PLATFORM=cuda
 pip install -v -e . --no-build-isolation
 ```
 
-**Note:** Patches are now applied automatically via dynamic patching when you import the unified-cache-management package. You no longer need to manually apply patches using `git apply`. The patches are automatically applied when you use the `UnifiedCacheConnectorV1` connector.
+**Note:** Patches are now applied automatically via dynamic patching when you import the unified-cache-management package. You no longer need to manually apply patches using `git apply`. The patches are automatically applied when you use the `UCMConnector` connector.
 
 
 ## Setup from docker

@@ -26,8 +26,8 @@ vllm serve /home/models/Qwen2.5-7B-Instruct \
 --block-size 128 \
 --kv-transfer-config \
 '{
-    "kv_connector": "UnifiedCacheConnectorV1",
-    "kv_connector_module_path": "ucm.integration.vllm.uc_connector",
+    "kv_connector": "UCMConnector",
+    "kv_connector_module_path": "ucm.integration.vllm.ucm_connector",
     "kv_role": "kv_producer",
     "kv_connector_extra_config": {
         "ucm_connector_name": "UcmNfsStore",
@@ -55,8 +55,8 @@ vllm serve /home/models/Qwen2.5-7B-Instruct \
 --block-size 128 \
 --kv-transfer-config \
 '{
-    "kv_connector": "UnifiedCacheConnectorV1",
-    "kv_connector_module_path": "ucm.integration.vllm.uc_connector",
+    "kv_connector": "UCMConnector",
+    "kv_connector_module_path": "ucm.integration.vllm.ucm_connector",
     "kv_role": "kv_consumer",
     "kv_connector_extra_config": {
         "ucm_connector_name": "UcmNfsStore",

@@ -33,8 +33,8 @@ vllm serve /home/models/Qwen2.5-7B-Instruct \
 --dtype bfloat16 \
 --kv-transfer-config \
 '{
-    "kv_connector": "UnifiedCacheConnectorV1",
-    "kv_connector_module_path": "ucm.integration.vllm.uc_connector",
+    "kv_connector": "UCMConnector",
+    "kv_connector_module_path": "ucm.integration.vllm.ucm_connector",
     "kv_role": "kv_producer",
     "kv_connector_extra_config": {
         "ucm_connector_name": "UcmNfsStore",
@@ -63,8 +63,8 @@ vllm serve /home/models/Qwen2.5-7B-Instruct \
 --dtype bfloat16 \
 --kv-transfer-config \
 '{
-    "kv_connector": "UnifiedCacheConnectorV1",
-    "kv_connector_module_path": "ucm.integration.vllm.uc_connector",
+    "kv_connector": "UCMConnector",
+    "kv_connector_module_path": "ucm.integration.vllm.ucm_connector",
     "kv_role": "kv_consumer",
     "kv_connector_extra_config": {
         "ucm_connector_name": "UcmNfsStore",

@@ -26,8 +26,8 @@ vllm serve /home/models/Qwen2.5-7B-Instruct \
 --block-size 128 \
 --kv-transfer-config \
 '{
-    "kv_connector": "UnifiedCacheConnectorV1",
-    "kv_connector_module_path": "ucm.integration.vllm.uc_connector",
+    "kv_connector": "UCMConnector",
+    "kv_connector_module_path": "ucm.integration.vllm.ucm_connector",
     "kv_role": "kv_producer",
     "kv_connector_extra_config": {
         "ucm_connector_name": "UcmNfsStore",
@@ -54,8 +54,8 @@ vllm serve /home/models/Qwen2.5-7B-Instruct \
 --block-size 128 \
 --kv-transfer-config \
 '{
-    "kv_connector": "UnifiedCacheConnectorV1",
-    "kv_connector_module_path": "ucm.integration.vllm.uc_connector",
+    "kv_connector": "UCMConnector",
+    "kv_connector_module_path": "ucm.integration.vllm.ucm_connector",
     "kv_role": "kv_producer",
     "kv_connector_extra_config": {
         "ucm_connector_name": "UcmNfsStore",
@@ -83,8 +83,8 @@ vllm serve /home/models/Qwen2.5-7B-Instruct \
 --block-size 128 \
 --kv-transfer-config \
 '{
-    "kv_connector": "UnifiedCacheConnectorV1",
-    "kv_connector_module_path": "ucm.integration.vllm.uc_connector",
+    "kv_connector": "UCMConnector",
+    "kv_connector_module_path": "ucm.integration.vllm.ucm_connector",
     "kv_role": "kv_consumer",
     "kv_connector_extra_config": {
         "ucm_connector_name": "UcmNfsStore",
@@ -110,8 +110,8 @@ vllm serve /home/models/Qwen2.5-7B-Instruct \
 --block-size 128 \
 --kv-transfer-config \
 '{
-    "kv_connector": "UnifiedCacheConnectorV1",
-    "kv_connector_module_path": "ucm.integration.vllm.uc_connector",
+    "kv_connector": "UCMConnector",
+    "kv_connector_module_path": "ucm.integration.vllm.ucm_connector",
     "kv_role": "kv_consumer",
     "kv_connector_extra_config": {
         "ucm_connector_name": "UcmNfsStore",

@@ -135,8 +135,8 @@ vllm serve /home/models/Qwen2.5-14B-Instruct \
 --port 7800 \
 --kv-transfer-config \
 '{
-    "kv_connector": "UnifiedCacheConnectorV1",
-    "kv_connector_module_path": "ucm.integration.vllm.uc_connector",
+    "kv_connector": "UCMConnector",
+    "kv_connector_module_path": "ucm.integration.vllm.ucm_connector",
     "kv_role": "kv_both",
     "kv_connector_extra_config": {"UCM_CONFIG_FILE": "/workspace/unified-cache-management/examples/ucm_config_example.yaml"}
 }'

@@ -88,7 +88,7 @@ Similar to UCM's `offline_inference_esa.py` examples. We only need to specify `u
 ...
 ktc = KVTransferConfig(
     kv_connector=name,
-    kv_connector_module_path="ucm.integration.vllm.uc_connector",
+    kv_connector_module_path="ucm.integration.vllm.ucm_connector",
     kv_role="kv_both",
     kv_connector_extra_config={
         "ucm_connector_name": "UcmNfsStore",
@@ -121,7 +121,7 @@ vllm serve /home/models/DeepSeek-R1-Distill-Qwen-32B \
 --kv-transfer-config \
 '{
     "kv_connector": name,
-    "kv_connector_module_path": "ucm.integration.vllm.uc_connector",
+    "kv_connector_module_path": "ucm.integration.vllm.ucm_connector",
     "kv_role": "kv_both",
     "kv_connector_extra_config": {
         "ucm_connector_name": "UcmNfsStore",

@@ -32,7 +32,7 @@ load_only_first_rank: false
   # GSA: {}
 
 
-# Whether to use layerwise loading/saving (optional, default: True for UnifiedCacheConnectorV1)
+# Whether to use layerwise loading/saving (optional, default: True for UCMConnector)
 # use_layerwise: true
 # hit_ratio: 0.9