diff --git a/ucm/integration/vllm/patch/patch_funcs/v092/vllm_rerope_patch.py b/ucm/integration/vllm/patch/patch_funcs/v092/vllm_rerope_patch.py index f23c6ac2b..e7bc75ed7 100644 --- a/ucm/integration/vllm/patch/patch_funcs/v092/vllm_rerope_patch.py +++ b/ucm/integration/vllm/patch/patch_funcs/v092/vllm_rerope_patch.py @@ -36,7 +36,6 @@ def _apply_rerope_adapt_patches() -> None: try: _patch_attention_spec() - _patch_request_succeed_dumped_blocks() _patch_utils() _patch_gpu_model_runner() _patch_qwen2_model() @@ -83,23 +82,6 @@ def _page_size_bytes_rerope(self: "AttentionSpec") -> int: ) -# ==================== vllm/v1/request.py ==================== -def _patch_request_succeed_dumped_blocks() -> None: - """Patch Request to add succeed_dumped_blocks field.""" - try: - from vllm.v1.request import Request - - original_init = Request.__init__ - - def __init__(self, *args, **kwargs): - original_init(self, *args, **kwargs) - self.succeed_dumped_blocks = [] - - Request.__init__ = __init__ - except ImportError: - logger.warning("Could not patch Request.__init__ - module not found") - - # ==================== vllm/v1/attention/backends/utils.py ==================== def _patch_utils() -> None: """Patch common metadata"""