Skip to content

Commit 340eb7a

Browse files
authored
Remove Qwen tokenizer modification (#390)
Signed-off-by: Chenjie Luo <[email protected]>
1 parent adcb1a1 commit 340eb7a

File tree

5 files changed

+0
-29
lines changed

5 files changed

+0
-29
lines changed

examples/llm_eval/gen_model_answer.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -180,14 +180,6 @@ def get_model_answers(
180180
# Model Optimizer modification
181181
tokenizer = get_tokenizer(model_path, trust_remote_code=args.trust_remote_code)
182182
if checkpoint_dir:
183-
# get model type
184-
last_part = os.path.basename(checkpoint_dir)
185-
model_type = last_part.split("_")[0]
186-
# Some models require to set pad_token and eos_token based on external config (e.g., qwen)
187-
if model_type == "qwen":
188-
tokenizer.pad_token = tokenizer.convert_ids_to_tokens(151643)
189-
tokenizer.eos_token = tokenizer.convert_ids_to_tokens(151643)
190-
191183
assert LLM is not None, "tensorrt_llm APIs could not be imported."
192184
model = LLM(checkpoint_dir, tokenizer=tokenizer)
193185
elif not nim_model:

examples/llm_eval/mmlu.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -253,14 +253,6 @@ def main(
253253
model_path = kwargs["model_path"]
254254
tokenizer = get_tokenizer(model_path, trust_remote_code=kwargs.get("trust_remote_code", False))
255255
if kwargs.get("checkpoint_dir"):
256-
# get model type
257-
last_part = os.path.basename(kwargs["checkpoint_dir"])
258-
model_type = last_part.split("_")[0]
259-
# Some models require to set pad_token and eos_token based on external config (e.g., qwen)
260-
if model_type == "qwen":
261-
tokenizer.pad_token = tokenizer.convert_ids_to_tokens(151643)
262-
tokenizer.eos_token = tokenizer.convert_ids_to_tokens(151643)
263-
264256
assert LLM is not None, "tensorrt_llm APIs could not be imported."
265257
medusa_choices = kwargs.get("medusa_choices")
266258
model = LLM(

examples/llm_eval/quantization_utils.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,6 @@ def get_tokenizer(ckpt_path, max_seq_len=MAX_SEQ_LEN, trust_remote_code=False):
5353
padding_side="left",
5454
trust_remote_code=trust_remote_code,
5555
)
56-
if type(tokenizer).__name__ == "QWenTokenizer":
57-
# qwen use token id 151643 as pad and eos tokens
58-
tokenizer.pad_token = tokenizer.convert_ids_to_tokens(151643)
59-
tokenizer.eos_token = tokenizer.convert_ids_to_tokens(151643)
6056

6157
# can't set attribute 'pad_token' for "<unk>"
6258
if tokenizer.pad_token != "<unk>":

examples/llm_ptq/example_utils.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,6 @@ def get_tokenizer(ckpt_path, trust_remote_code=False, **kwargs):
5454
ckpt_path, trust_remote_code=trust_remote_code, **kwargs
5555
)
5656

57-
if "qwen" in type(tokenizer).__name__.lower():
58-
# qwen use token id 151643 as pad and eos tokens
59-
tokenizer.pad_token = tokenizer.convert_ids_to_tokens(151643)
60-
tokenizer.eos_token = tokenizer.convert_ids_to_tokens(151643)
61-
6257
# can't set attribute 'pad_token' for "<unk>"
6358
# We skip this step for Nemo models
6459
if tokenizer.pad_token != "<unk>" or tokenizer.pad_token is None:

examples/windows/accuracy_benchmark/quantization_utils.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,6 @@ def get_tokenizer(ckpt_path, max_seq_len=MAX_SEQ_LEN, trust_remote_code=False):
3737
padding_side="left",
3838
trust_remote_code=trust_remote_code,
3939
)
40-
if type(tokenizer).__name__ == "QWenTokenizer":
41-
# qwen use token id 151643 as pad and eos tokens
42-
tokenizer.pad_token = tokenizer.convert_ids_to_tokens(151643)
43-
tokenizer.eos_token = tokenizer.convert_ids_to_tokens(151643)
4440

4541
# can't set attribute 'pad_token' for "<unk>"
4642
if tokenizer.pad_token != "<unk>":

0 commit comments

Comments
 (0)