@@ -2784,11 +2784,15 @@ def create_dummy_seq_group_metadata(self,
27842784 lora_request = None ,
27852785 img_args = None ,
27862786 temperature = 0 ,
2787+ presence_penalty = 0.0 ,
2788+ top_p = 1.0 ,
27872789 ctx = 0 ):
27882790 if self .is_pooler :
27892791 sampling_params = None
27902792 else :
2791- sampling_params = SamplingParams (temperature = temperature )
2793+ sampling_params = SamplingParams (temperature = temperature ,
2794+ presence_penalty = presence_penalty ,
2795+ top_p = top_p )
27922796 num_blocks = math .ceil (seq_len / self .block_size )
27932797 seq_len = max (seq_len , 1 )
27942798 computed_block_nums = None
@@ -2945,6 +2949,12 @@ def warmup_scenario(self,
29452949 ]
29462950 self .profiler .start ('internal' , scenario_name )
29472951 times = num_iters if use_graphs or is_pt_profiler_run else 1
2952+ presence_penalty = 1.0 if os .getenv ('VLLM_WARMUP_WITH_PENALTY' ,
2953+ '0' ) == '1' else 0.0
2954+ top_p = 0.1 if os .getenv ('VLLM_WARMUP_WITH_PENALTY' ,
2955+ '0' ) == '1' else 1.0
2956+ temperature = 1.0 if os .getenv ('VLLM_WARMUP_WITH_PENALTY' ,
2957+ '0' ) == '1' else 0.0
29482958 if is_prompt :
29492959 seqs = [
29502960 self .create_dummy_seq_group_metadata (
@@ -2955,6 +2965,8 @@ def warmup_scenario(self,
29552965 if dummy_lora_requests_per_seq else None ,
29562966 img_args = img_args ,
29572967 temperature = temperature ,
2968+ presence_penalty = presence_penalty ,
2969+ top_p = top_p ,
29582970 ctx = ctx ) for i in range (batch_size )
29592971 ]
29602972 else :
@@ -2968,6 +2980,8 @@ def warmup_scenario(self,
29682980 lora_request = dummy_lora_requests_per_seq [i ]
29692981 if dummy_lora_requests_per_seq else None ,
29702982 temperature = temperature ,
2983+ presence_penalty = presence_penalty ,
2984+ top_p = top_p ,
29712985 ctx = ctx ) for i , b in enumerate (blocks )
29722986 ]
29732987 if not is_dummy_run :
0 commit comments