Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit c9f8a71

Browse files
committed
feat(generate): Make prepending BOS model-conigurable
And disable it for Granite Code models Branch: GraniteCodeSupport Signed-off-by: Gabe Goodhart <[email protected]>
1 parent 526ce15 commit c9f8a71

File tree

4 files changed

+17
-3
lines changed

4 files changed

+17
-3
lines changed

torchchat/generate.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,7 @@ def encode_tokens(self, string, bos=True, device="cpu"):
746746
if bos:
747747
tokens = [self.tokenizer.bos_id()] + tokens
748748
logger.debug("Size after encode_tokens: %d", len(tokens))
749+
logger.debug("Token IDs: %s", tokens)
749750
return torch.tensor(tokens, dtype=torch.int, device=device)
750751

751752
def _callback(self, x, *, buffer, done_generating):
@@ -794,7 +795,7 @@ def _gen_model_input(
794795
# Single String prompt
795796
if isinstance(prompt, str):
796797
encoded = self.encode_tokens(
797-
prompt, bos=True, device=self.builder_args.device
798+
prompt, bos=self.model.config.tokenizer_prepend_bos, device=self.builder_args.device
798799
)
799800
# List of dialog
800801
else:
@@ -1048,7 +1049,7 @@ def chat(
10481049
else:
10491050
prompt = f"{B_INST} {prompt.strip()} {E_INST}"
10501051
encoded = self.encode_tokens(
1051-
prompt, bos=True, device=self.builder_args.device
1052+
prompt, bos=self.model.config.tokenizer_prepend_bos, device=self.builder_args.device
10521053
)
10531054
else:
10541055
if self.system_prompt:

torchchat/model.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ class TransformerArgs:
276276
# Select the desired tokenizer. Defaults to sentencepiece
277277
use_tiktoken: bool = False
278278
use_hf_tokenizer: bool = False
279+
tokenizer_prepend_bos: bool = True
279280
max_seq_length: int = 8192
280281
rope_scaling: Optional[Dict[str, Any]] = None
281282
# For pipeline parallel
@@ -333,13 +334,15 @@ class ModelArgs:
333334
transformer_args: Dict[str, Dict[str, Any]]
334335
use_tiktoken: bool
335336
use_hf_tokenizer: bool
337+
tokenizer_prepend_bos: bool
336338

337339
def __init__(
338340
self,
339341
transformer_args: Dict[str, Dict[str, Any]],
340342
model_type: ModelType = ModelType.TextOnly,
341343
use_tiktoken: bool = False,
342344
use_hf_tokenizer: bool = False,
345+
tokenizer_prepend_bos: bool = True,
343346
) -> None:
344347
self._sanity_check(transformer_args, model_type)
345348

@@ -349,6 +352,7 @@ def __init__(
349352
# Model-level attributes
350353
self.use_tiktoken = use_tiktoken
351354
self.use_hf_tokenizer = use_hf_tokenizer
355+
self.tokenizer_prepend_bos = tokenizer_prepend_bos
352356

353357
def _sanity_check(
354358
self,
@@ -376,7 +380,14 @@ def from_params(cls, params_path):
376380

377381
use_tiktoken = loaded_params.get("use_tiktoken", False)
378382
use_hf_tokenizer = loaded_params.get("use_hf_tokenizer", False)
379-
return cls(transformer_args, model_type, use_tiktoken, use_hf_tokenizer)
383+
tokenizer_prepend_bos = loaded_params.get("tokenizer_prepend_bos", True)
384+
return cls(
385+
transformer_args=transformer_args,
386+
model_type=model_type,
387+
use_tiktoken=use_tiktoken,
388+
use_hf_tokenizer=use_hf_tokenizer,
389+
tokenizer_prepend_bos=tokenizer_prepend_bos,
390+
)
380391

381392
@classmethod
382393
def from_table(cls, name: str):

torchchat/model_params/Granite-3B-Code.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
"rope_base": 10000000,
99
"vocab_size": 49152,
1010
"use_hf_tokenizer": true,
11+
"tokenizer_prepend_bos": false,
1112
"norm_eps": 0.00001,
1213
"rope_scaling": null,
1314
"attention_bias": true,

torchchat/model_params/Granite-8B-Code.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
"rope_base": 10000000,
99
"vocab_size": 49152,
1010
"use_hf_tokenizer": true,
11+
"tokenizer_prepend_bos": false,
1112
"norm_eps": 0.00001,
1213
"rope_scaling": null,
1314
"attention_bias": true,

0 commit comments

Comments
 (0)