diff --git a/tests/integration/defs/accuracy/test_disaggregated_serving.py b/tests/integration/defs/accuracy/test_disaggregated_serving.py index 8c7111f4490..27dd2175976 100644 --- a/tests/integration/defs/accuracy/test_disaggregated_serving.py +++ b/tests/integration/defs/accuracy/test_disaggregated_serving.py @@ -1035,12 +1035,12 @@ def test_auto_dtype(self, block_reuse): ctx_server_config["kv_cache_config"] = { "max_attention_window": [512, 512, 512, 512, 512, 32768], "enable_block_reuse": block_reuse, - "enable_partial_reuse": False, + "enable_partial_reuse": True, } gen_server_config["kv_cache_config"] = { "max_attention_window": [512, 512, 512, 512, 512, 32768], "enable_block_reuse": block_reuse, - "enable_partial_reuse": False, + "enable_partial_reuse": True, } disaggregated_server_config = { "hostname": "localhost", @@ -1097,13 +1097,13 @@ def test_auto_dtype(self, block_reuse, mocker): ctx_server_config["kv_cache_config"] = { "max_attention_window": [128, 32768], "enable_block_reuse": block_reuse, - "enable_partial_reuse": False, + "enable_partial_reuse": True, "free_gpu_memory_fraction": 0.5, } gen_server_config["kv_cache_config"] = { "max_attention_window": [128, 32768], "enable_block_reuse": block_reuse, - "enable_partial_reuse": False, + "enable_partial_reuse": True, "free_gpu_memory_fraction": 0.5, } disaggregated_server_config = {