File tree Expand file tree Collapse file tree 2 files changed +9
-2
lines changed
ucm/integration/vllm/patch Expand file tree Collapse file tree 2 files changed +9
-2
lines changed Original file line number Diff line number Diff line change @@ -1430,7 +1430,7 @@ index 5a26e88db..df86287c8 100644
14301430 max_num_scheduled_tokens = max(tokens)
14311431
14321432+ # Get use_rerope
1433- + if len(scheduler_output.scheduled_new_reqs) > 0:
1433+ + if envs.VLLM_USE_REROPE and len(scheduler_output.scheduled_new_reqs) > 0:
14341434+ self.use_rerope = max([len(req.prompt_token_ids)
14351435+ for req in scheduler_output.scheduled_new_reqs]) > envs.REROPE_WINDOW
14361436+
Original file line number Diff line number Diff line change @@ -164,6 +164,7 @@ def add_init(
164164
165165 from typing import TYPE_CHECKING , Any , Optional
166166
167+ import os
167168 import numpy as np
168169 from vllm .v1 .attention .backends .utils import CommonAttentionMetadata
169170 from vllm .v1 .spec_decode .metadata import SpecDecodeMetadata
@@ -202,7 +203,13 @@ def _prepare_inputs_modify(
202203
203204 ###################### rerope patch ###############
204205 # Get use_rerope
205- if len (scheduler_output .scheduled_new_reqs ) > 0 :
206+ vllm_use_rerope = os .getenv ("VLLM_USE_REROPE" , "0" ).lower () in (
207+ "1" ,
208+ "true" ,
209+ "yes" ,
210+ "on" ,
211+ )
212+ if vllm_use_rerope and len (scheduler_output .scheduled_new_reqs ) > 0 :
206213 self .use_rerope = (
207214 max (
208215 [
You can’t perform that action at this time.
0 commit comments