add tokens to special_tokens_dict (#436)

Abhishek-TAMU · web-flow · commit 24f7e42bb73d · 2025-01-09T16:26:24.000-05:00
Signed-off-by: Abhishek &lt;maurya.abhishek@ibm.com&gt;
diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py
@@ -290,8 +290,10 @@ def train(
             )
             if tokenizer.eos_token != configs.DEFAULT_PAD_TOKEN:
                 tokenizer.pad_token = configs.DEFAULT_PAD_TOKEN
+                special_tokens_dict["pad_token"] = configs.DEFAULT_PAD_TOKEN
             else:
                 tokenizer.eos_token = configs.DEFAULT_EOS_TOKEN
+                special_tokens_dict["eos_token"] = configs.DEFAULT_EOS_TOKEN
 
     # TODO: lower priority but understand if resizing impacts inference quality and why its needed.
     # It makes sense if we manipulate tokenizer that we also save it and provide it to inference.