Skip to content

Commit a2225b9

Browse files
committed
Enable partial reuse for dis-agg tests on Gemma3 and GPT-OSS
Signed-off-by: eopXD <[email protected]>
1 parent 0982516 commit a2225b9

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

tests/integration/defs/accuracy/test_disaggregated_serving.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,12 +1035,12 @@ def test_auto_dtype(self, block_reuse):
10351035
ctx_server_config["kv_cache_config"] = {
10361036
"max_attention_window": [512, 512, 512, 512, 512, 32768],
10371037
"enable_block_reuse": block_reuse,
1038-
"enable_partial_reuse": False,
1038+
"enable_partial_reuse": True,
10391039
}
10401040
gen_server_config["kv_cache_config"] = {
10411041
"max_attention_window": [512, 512, 512, 512, 512, 32768],
10421042
"enable_block_reuse": block_reuse,
1043-
"enable_partial_reuse": False,
1043+
"enable_partial_reuse": True,
10441044
}
10451045
disaggregated_server_config = {
10461046
"hostname": "localhost",
@@ -1097,13 +1097,13 @@ def test_auto_dtype(self, block_reuse, mocker):
10971097
ctx_server_config["kv_cache_config"] = {
10981098
"max_attention_window": [128, 32768],
10991099
"enable_block_reuse": block_reuse,
1100-
"enable_partial_reuse": False,
1100+
"enable_partial_reuse": True,
11011101
"free_gpu_memory_fraction": 0.5,
11021102
}
11031103
gen_server_config["kv_cache_config"] = {
11041104
"max_attention_window": [128, 32768],
11051105
"enable_block_reuse": block_reuse,
1106-
"enable_partial_reuse": False,
1106+
"enable_partial_reuse": True,
11071107
"free_gpu_memory_fraction": 0.5,
11081108
}
11091109
disaggregated_server_config = {

0 commit comments

Comments
 (0)