@@ -178,21 +178,18 @@ def get_model_answers(
178
178
nim_model = None ,
179
179
):
180
180
# Model Optimizer modification
181
+ tokenizer = get_tokenizer (model_path , trust_remote_code = args .trust_remote_code )
181
182
if checkpoint_dir :
182
- tokenizer = get_tokenizer (model_path , trust_remote_code = args .trust_remote_code )
183
- if checkpoint_dir :
184
- # get model type
185
- last_part = os .path .basename (checkpoint_dir )
186
- model_type = last_part .split ("_" )[0 ]
187
- # Some models require to set pad_token and eos_token based on external config (e.g., qwen)
188
- if model_type == "qwen" :
189
- tokenizer .pad_token = tokenizer .convert_ids_to_tokens (151643 )
190
- tokenizer .eos_token = tokenizer .convert_ids_to_tokens (151643 )
191
-
192
- assert LLM is not None , "tensorrt_llm APIs could not be imported."
193
- model = LLM (checkpoint_dir , tokenizer = tokenizer )
194
- else :
195
- raise ValueError ("checkpoint_dir is required for TensorRT LLM inference." )
183
+ # get model type
184
+ last_part = os .path .basename (checkpoint_dir )
185
+ model_type = last_part .split ("_" )[0 ]
186
+ # Some models require to set pad_token and eos_token based on external config (e.g., qwen)
187
+ if model_type == "qwen" :
188
+ tokenizer .pad_token = tokenizer .convert_ids_to_tokens (151643 )
189
+ tokenizer .eos_token = tokenizer .convert_ids_to_tokens (151643 )
190
+
191
+ assert LLM is not None , "tensorrt_llm APIs could not be imported."
192
+ model = LLM (checkpoint_dir , tokenizer = tokenizer )
196
193
elif not nim_model :
197
194
model , _ = load_model (
198
195
model_path ,
@@ -205,7 +202,6 @@ def get_model_answers(
205
202
cpu_offloading = False ,
206
203
debug = False ,
207
204
)
208
- tokenizer = get_tokenizer (model_path , trust_remote_code = args .trust_remote_code )
209
205
if args .quant_cfg :
210
206
quantize_model (
211
207
model ,
0 commit comments