Skip to content

Commit 4a48d6b

Browse files
committed
adapt long sequence input when rerope is disabled
1 parent 2c033a8 commit 4a48d6b

File tree

2 files changed

+9
-2
lines changed

2 files changed

+9
-2
lines changed

ucm/integration/vllm/patch/0.9.2/vllm-rerope-adapt.patch

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1430,7 +1430,7 @@ index 5a26e88db..df86287c8 100644
14301430
max_num_scheduled_tokens = max(tokens)
14311431

14321432
+ # Get use_rerope
1433-
+ if len(scheduler_output.scheduled_new_reqs) > 0:
1433+
+ if envs.VLLM_USE_REROPE and len(scheduler_output.scheduled_new_reqs) > 0:
14341434
+ self.use_rerope = max([len(req.prompt_token_ids)
14351435
+ for req in scheduler_output.scheduled_new_reqs]) > envs.REROPE_WINDOW
14361436
+

ucm/integration/vllm/patch/patch_funcs/v092/vllm_rerope_patch.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ def add_init(
164164

165165
from typing import TYPE_CHECKING, Any, Optional
166166

167+
import os
167168
import numpy as np
168169
from vllm.v1.attention.backends.utils import CommonAttentionMetadata
169170
from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
@@ -202,7 +203,13 @@ def _prepare_inputs_modify(
202203

203204
###################### rerope patch ###############
204205
# Get use_rerope
205-
if len(scheduler_output.scheduled_new_reqs) > 0:
206+
vllm_use_rerope = os.getenv("VLLM_USE_REROPE", "0").lower() in (
207+
"1",
208+
"true",
209+
"yes",
210+
"on",
211+
)
212+
if vllm_use_rerope and len(scheduler_output.scheduled_new_reqs) > 0:
206213
self.use_rerope = (
207214
max(
208215
[

0 commit comments

Comments
 (0)