Skip to content

Commit cfdf958

Browse files
committed
fix ascend patch and change version
1 parent 59d00f4 commit cfdf958

File tree

2 files changed

+23
-13
lines changed

2 files changed

+23
-13
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def build_cmake(self, ext: CMakeExtension):
139139

140140
setup(
141141
name="uc-manager",
142-
version="0.2.0rc2",
142+
version="0.2.0",
143143
description="Unified Cache Management",
144144
author="Unified Cache Team",
145145
packages=find_packages(),

ucm/integration/vllm/patch/0.9.2/vllm-ascend-adapt.patch

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
From c92cb68fd1fa6215cd6d5b207b95c841ac20dbe1 Mon Sep 17 00:00:00 2001
2-
From: wenxinwang <wangwenxin21@huawei.com>
3-
Date: Tue, 23 Dec 2025 19:21:33 -0800
4-
Subject: [PATCH] sparse patch for vllm-ascend
1+
From 57681500369b33dc3ac9a2cc97ad10980bab56fc Mon Sep 17 00:00:00 2001
2+
From: qyh <qiuyuhao1@huawei.com>
3+
Date: Wed, 31 Dec 2025 17:15:08 +0800
4+
Subject: [PATCH] modify ascend patch for register_kv_cache
55

66
---
7-
vllm_ascend/attention/attention_v1.py | 80 ++++++++++++++++++++++
8-
vllm_ascend/attention/mla_v1.py | 14 +++-
9-
vllm_ascend/worker/model_runner_v1.py | 98 ++++++++++++++++++++++++---
10-
vllm_ascend/worker/worker_v1.py | 25 +++++--
11-
4 files changed, 201 insertions(+), 16 deletions(-)
7+
vllm_ascend/attention/attention_v1.py | 80 ++++++++++++++++++++
8+
vllm_ascend/attention/mla_v1.py | 14 +++-
9+
vllm_ascend/worker/model_runner_v1.py | 101 +++++++++++++++++++++++---
10+
vllm_ascend/worker/worker_v1.py | 25 ++++++-
11+
4 files changed, 204 insertions(+), 16 deletions(-)
1212

1313
diff --git a/vllm_ascend/attention/attention_v1.py b/vllm_ascend/attention/attention_v1.py
1414
index 7d7f488f..ea982244 100644
@@ -185,7 +185,7 @@ index f50fe56e..ae8f50bf 100644
185185

186186
return output_padded
187187
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
188-
index eabcdbcc..782b9a3b 100644
188+
index eabcdbcc..2762fbc7 100644
189189
--- a/vllm_ascend/worker/model_runner_v1.py
190190
+++ b/vllm_ascend/worker/model_runner_v1.py
191191
@@ -39,7 +39,10 @@ from vllm.config import CompilationLevel, VllmConfig
@@ -335,7 +335,17 @@ index eabcdbcc..782b9a3b 100644
335335

336336
use_spec_decode = len(
337337
scheduler_output.scheduled_spec_decode_tokens) > 0
338-
@@ -2369,3 +2402,48 @@ class NPUModelRunner(LoRAModelRunnerMixin):
338+
@@ -1965,6 +1998,9 @@ class NPUModelRunner(LoRAModelRunnerMixin):
339+
self.vllm_config.compilation_config.static_forward_context,
340+
self.kv_caches)
341+
342+
+ if has_kv_transfer_group():
343+
+ get_kv_transfer_group().register_kv_caches(kv_caches)
344+
+
345+
def get_kv_cache_spec(self) -> dict[str, KVCacheSpec]:
346+
"""
347+
Generates the KVCacheSpec by parsing the kv cache format from each
348+
@@ -2369,3 +2405,48 @@ class NPUModelRunner(LoRAModelRunnerMixin):
339349
if batch_size <= padded_batch_size < selected_batch_size:
340350
selected_batch_size = padded_batch_size
341351
return selected_batch_size
@@ -458,5 +468,5 @@ index df03d508..5d5d9b5a 100644
458468
def _init_profiler(self):
459469
# Torch profiler. Enabled and configured through env vars:
460470
--
461-
2.34.1
471+
2.50.1.windows.1
462472

0 commit comments

Comments
 (0)