Skip to content

Commit 1e1c3e6

Browse files
[bugfix] kvcomp config (#584)
1 parent d608be8 commit 1e1c3e6

File tree

4 files changed

+49
-16
lines changed

4 files changed

+49
-16
lines changed

examples/offline_inference_kvcomphbm.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,7 @@ def build_llm_with_uc(module_path: str, name: str, model: str):
7777
},
7878
}
7979
],
80-
"ucm_sparse_config": {
81-
"GSA": {
82-
"kvcompOnDevice_config_path": "/workspace/unified-cache-management/ucm/sparse/kvcomp/configs/kvcomp_deepseek_v2_lite_config.json"
83-
}
84-
},
80+
"ucm_sparse_config": {"GSA": {}},
8581
},
8682
)
8783

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,4 +148,5 @@ def build_cmake(self, ext: CMakeExtension):
148148
cmdclass={"build_ext": CMakeBuild},
149149
zip_safe=False,
150150
include_package_data=False,
151+
package_data={"ucm": ["sparse/kvcomp/configs/**/*.json"]},
151152
)

ucm/sparse/kvcomp/kvcomp_hbm.py

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from importlib import resources
2+
from pathlib import Path
13
from typing import Any, Dict, List, Optional, Union
24

35
import torch
@@ -24,6 +26,49 @@
2426
ReqType = Union[str, int]
2527

2628

29+
def kvcomp_config_path_for_model(vllm_config) -> str:
30+
model = vllm_config.model_config.model.lower()
31+
logger.info("[KvComp] model name: %s", model)
32+
33+
if "deepseek" in model and "r1" in model:
34+
rel = "ucm/sparse/kvcomp/configs/kvcomp_deepseek_r1_awq_config.json"
35+
elif "qwen3" in model and "32b" in model:
36+
rel = "ucm/sparse/kvcomp/configs/kvcomp_qwen3_32B_config.json"
37+
else:
38+
raise ValueError(f"[KvCompOnDevice] Unsupported model for kvcomp: {model}")
39+
40+
logger.info("[KvComp] target relative path: %s", rel)
41+
42+
cur = Path(__file__).resolve()
43+
repo = cur
44+
for depth in range(30):
45+
if (
46+
(repo / "pyproject.toml").is_file()
47+
or (repo / "setup.cfg").is_file()
48+
or (repo / ".git").exists()
49+
):
50+
51+
p = repo / rel
52+
logger.info("[KvComp] repo root detected at depth=%d: %s", depth, repo)
53+
if p.is_file():
54+
logger.info("[KvComp] config loaded from SOURCE tree: %s", p)
55+
return str(p)
56+
logger.warning("[KvComp] repo root found but config missing: %s", p)
57+
break
58+
if repo.parent == repo:
59+
logger.debug("[KvComp] reached filesystem root, stop searching")
60+
break
61+
62+
repo = repo.parent
63+
64+
sub = rel[len("ucm/") :] if rel.startswith("ucm/") else rel
65+
res = resources.files("ucm").joinpath(*sub.split("/"))
66+
67+
with resources.as_file(res) as p:
68+
logger.info("[KvComp] config loaded from PACKAGE resource (wheel): %s", p)
69+
return str(p)
70+
71+
2772
class KvCompOnDevice(UcmSparseBase):
2873
# handle batch
2974
def __init__(self, vllm_config: VllmConfig, role: UcmSparseRole):
@@ -39,16 +84,7 @@ def __init__(self, vllm_config: VllmConfig, role: UcmSparseRole):
3984
)
4085
self.block_size = vllm_config.cache_config.block_size
4186

42-
self.kvcompOnDevice_cfg = (
43-
Config(vllm_config.kv_transfer_config)
44-
.get_config()
45-
.get("ucm_sparse_config")
46-
.get("GSA")
47-
)
48-
49-
kvcompOnDevice_config_path = self.kvcompOnDevice_cfg[
50-
"kvcompOnDevice_config_path"
51-
]
87+
kvcompOnDevice_config_path = kvcomp_config_path_for_model(vllm_config)
5288
self.kvcompOnDevice_config = KvCompConfig.from_json(kvcompOnDevice_config_path)
5389
logger.info(f"read kvcomp config file : {kvcompOnDevice_config_path} ")
5490
self.hash_topk_tokens = self.kvcompOnDevice_config.vllm_hash_attention_topk

ucm/sparse/state.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def ensure_ucm_sparse_initialized(
4141

4242
# Check if UCM sparse is enabled
4343
ucm_config = Config(vllm_config.kv_transfer_config)
44-
ucm_sparse_config = ucm_config.get_config().get("ucm_sparse_config")
44+
ucm_sparse_config = ucm_config.get_config().get("ucm_sparse_method")
4545
if not ucm_sparse_config:
4646
return
4747

0 commit comments

Comments
 (0)