Skip to content

Commit 716f3fc

Browse files
authored
Introduce VLLM_WARMUP_WITH_PENALTY for internVL warmup (#1967)
Introduce VLLM_WARMUP_WITH_PENALTY to call apply penalty code in sampler during warmup also. https://github.com/HabanaAI/vllm-fork/blob/libint/intervl_bucket/vllm/model_executor/layers/sampler.py#L280 is not called during warmup. And it causes extra graph compile during runtime as it sets to True for real run.
1 parent bb96123 commit 716f3fc

File tree

1 file changed

+15
-1
lines changed

1 file changed

+15
-1
lines changed

vllm/worker/hpu_model_runner.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2784,11 +2784,15 @@ def create_dummy_seq_group_metadata(self,
27842784
lora_request=None,
27852785
img_args=None,
27862786
temperature=0,
2787+
presence_penalty=0.0,
2788+
top_p=1.0,
27872789
ctx=0):
27882790
if self.is_pooler:
27892791
sampling_params = None
27902792
else:
2791-
sampling_params = SamplingParams(temperature=temperature)
2793+
sampling_params = SamplingParams(temperature=temperature,
2794+
presence_penalty=presence_penalty,
2795+
top_p=top_p)
27922796
num_blocks = math.ceil(seq_len / self.block_size)
27932797
seq_len = max(seq_len, 1)
27942798
computed_block_nums = None
@@ -2945,6 +2949,12 @@ def warmup_scenario(self,
29452949
]
29462950
self.profiler.start('internal', scenario_name)
29472951
times = num_iters if use_graphs or is_pt_profiler_run else 1
2952+
presence_penalty = 1.0 if os.getenv('VLLM_WARMUP_WITH_PENALTY',
2953+
'0') == '1' else 0.0
2954+
top_p = 0.1 if os.getenv('VLLM_WARMUP_WITH_PENALTY',
2955+
'0') == '1' else 1.0
2956+
temperature = 1.0 if os.getenv('VLLM_WARMUP_WITH_PENALTY',
2957+
'0') == '1' else 0.0
29482958
if is_prompt:
29492959
seqs = [
29502960
self.create_dummy_seq_group_metadata(
@@ -2955,6 +2965,8 @@ def warmup_scenario(self,
29552965
if dummy_lora_requests_per_seq else None,
29562966
img_args=img_args,
29572967
temperature=temperature,
2968+
presence_penalty=presence_penalty,
2969+
top_p=top_p,
29582970
ctx=ctx) for i in range(batch_size)
29592971
]
29602972
else:
@@ -2968,6 +2980,8 @@ def warmup_scenario(self,
29682980
lora_request=dummy_lora_requests_per_seq[i]
29692981
if dummy_lora_requests_per_seq else None,
29702982
temperature=temperature,
2983+
presence_penalty=presence_penalty,
2984+
top_p=top_p,
29712985
ctx=ctx) for i, b in enumerate(blocks)
29722986
]
29732987
if not is_dummy_run:

0 commit comments

Comments
 (0)