Skip to content

Commit 32fdea2

Browse files
committed
Enable partial reuse for dis-agg tests on Gemma3 and GPT-OSS
Signed-off-by: eopXD <[email protected]>
1 parent ed3a309 commit 32fdea2

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

tests/integration/defs/accuracy/test_disaggregated_serving.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,12 +1028,12 @@ def test_auto_dtype(self, block_reuse):
10281028
ctx_server_config["kv_cache_config"] = {
10291029
"max_attention_window": [512, 512, 512, 512, 512, 32768],
10301030
"enable_block_reuse": block_reuse,
1031-
"enable_partial_reuse": False,
1031+
"enable_partial_reuse": True,
10321032
}
10331033
gen_server_config["kv_cache_config"] = {
10341034
"max_attention_window": [512, 512, 512, 512, 512, 32768],
10351035
"enable_block_reuse": block_reuse,
1036-
"enable_partial_reuse": False,
1036+
"enable_partial_reuse": True,
10371037
}
10381038
disaggregated_server_config = {
10391039
"hostname": "localhost",
@@ -1090,13 +1090,13 @@ def test_auto_dtype(self, block_reuse, mocker):
10901090
ctx_server_config["kv_cache_config"] = {
10911091
"max_attention_window": [128, 32768],
10921092
"enable_block_reuse": block_reuse,
1093-
"enable_partial_reuse": False,
1093+
"enable_partial_reuse": True,
10941094
"free_gpu_memory_fraction": 0.5,
10951095
}
10961096
gen_server_config["kv_cache_config"] = {
10971097
"max_attention_window": [128, 32768],
10981098
"enable_block_reuse": block_reuse,
1099-
"enable_partial_reuse": False,
1099+
"enable_partial_reuse": True,
11001100
"free_gpu_memory_fraction": 0.5,
11011101
}
11021102
disaggregated_server_config = {

0 commit comments

Comments
 (0)