Skip to content

Commit 306e554

Browse files
TabrizianShixiaowei02
authored andcommitted
Enable KVCache reuse for disagg
Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com>
1 parent 6cec630 commit 306e554

File tree

1 file changed

+0
-2
lines changed

1 file changed

+0
-2
lines changed

tests/integration/defs/accuracy/test_disaggregated_serving.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -960,7 +960,6 @@ def test_auto_dtype(self, overlap_scheduler):
960960
ctx_server_config["cache_transceiver_config"] = {"backend": "DEFAULT"}
961961
gen_server_config["cache_transceiver_config"] = {"backend": "DEFAULT"}
962962
ctx_server_config["kv_cache_config"] = {
963-
"enable_block_reuse": False,
964963
"free_gpu_memory_fraction": 0.7,
965964
"tokens_per_block": 64,
966965
"dtype": "fp8"
@@ -977,7 +976,6 @@ def test_auto_dtype(self, overlap_scheduler):
977976
ctx_server_config["enable_attention_dp"] = True
978977
ctx_server_config["enable_autotuner"] = False
979978
gen_server_config["kv_cache_config"] = {
980-
"enable_block_reuse": False,
981979
"tokens_per_block": 64,
982980
"free_gpu_memory_fraction": 0.7,
983981
"dtype": "fp8"

0 commit comments

Comments
 (0)