Skip to content

Commit 6fa9816

Browse files
committed
chat_template.jinja file
1 parent d4f0f78 commit 6fa9816

File tree

2 files changed

+10
-0
lines changed

2 files changed

+10
-0
lines changed

apps/sft/main.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,9 @@ def setup_data(self):
154154
generation_config_path=os.path.join(
155155
self.job_config.model.hf_assets_path, "generation_config.json"
156156
),
157+
chat_template_path=os.path.join(
158+
self.job_config.model.hf_assets_path, "chat_template.jinja"
159+
),
157160
)
158161

159162
dataset = sft_iterable_dataset(

src/forge/data/tokenizer.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def __init__(
4242
*,
4343
tokenizer_config_json_path: str | None = None,
4444
generation_config_path: str | None = None,
45+
chat_template_path: str | None = None,
4546
):
4647
self.tokenizer = Tokenizer.from_file(tokenizer_json_path)
4748
if not (tokenizer_config_json_path or generation_config_path):
@@ -51,6 +52,10 @@ def __init__(
5152
if tokenizer_config_json_path:
5253
with open(tokenizer_config_json_path, "rb") as f:
5354
self.config = json.load(f)
55+
if chat_template_path:
56+
with open(chat_template_path, "r") as f:
57+
# TODO: warning in the case of overwrite?
58+
self.config["chat_template"] = f.read()
5459
else:
5560
self.config = None
5661
if generation_config_path:
@@ -227,12 +232,14 @@ def __init__(
227232
*,
228233
tokenizer_config_json_path: str | None = None,
229234
generation_config_path: str | None = None,
235+
chat_template_path: str | None = None,
230236
truncation_type: str = "right",
231237
):
232238
self.base_tokenizer = HuggingFaceBaseTokenizer(
233239
tokenizer_json_path=tokenizer_json_path,
234240
tokenizer_config_json_path=tokenizer_config_json_path,
235241
generation_config_path=generation_config_path,
242+
chat_template_path=chat_template_path
236243
)
237244

238245
# Contents of the tokenizer_config.json

0 commit comments

Comments
 (0)