Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 06566d9

Browse files
committed
feat(granite3): Add config plumbing for granite3-2b
This does not yet implement the usage of the new multipliers in the architecture, so the output is garbage at the moment. NOTE: There is currently a bug where this model is missing tokenizer.json in HF, but that should be resolved soon. Branch: GraniteThreeDenseSupport Signed-off-by: Gabe Goodhart <[email protected]>
1 parent cc0ffce commit 06566d9

File tree

3 files changed

+33
-0
lines changed

3 files changed

+33
-0
lines changed

torchchat/model.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,11 @@ class TransformerArgs:
287287
feed_forward_bias: bool = False
288288
# Whether or not to tie the input word embeddings to the output
289289
tie_word_embeddings: bool = False
290+
# Granite architecture multipliers
291+
embedding_multiplier: Optional[float] = None
292+
attention_multiplier: Optional[float] = None
293+
residual_multiplier: Optional[float] = None
294+
logits_scaling: Optional[float] = None
290295

291296
def __post_init__(self):
292297
if self.n_local_heads == -1:

torchchat/model_config/models.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,5 +178,12 @@
178178
"distribution_path": "ibm-granite/granite-8b-code-instruct-128k",
179179
"transformer_params_key": "Granite-8B-Code",
180180
"tokenizer_file": "tokenizer.json"
181+
},
182+
"ibm-granite/granite-3.0-2b-instruct": {
183+
"aliases": ["granite3-2b"],
184+
"distribution_channel": "HuggingFaceSnapshot",
185+
"distribution_path": "ibm-granite/granite-3.0-2b-instruct",
186+
"transformer_params_key": "Granite-3.0-2B-Instruct",
187+
"tokenizer_file": "tokenizer.json"
181188
}
182189
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{
2+
"block_size": 8192,
3+
"dim": 2048,
4+
"hidden_dim": 8192,
5+
"n_heads": 32,
6+
"n_local_heads": 8,
7+
"n_layers": 40,
8+
"rope_base": 10000,
9+
"vocab_size": 49155,
10+
"use_hf_tokenizer": true,
11+
"tokenizer_prepend_bos": false,
12+
"norm_eps": 0.00001,
13+
"rope_scaling": null,
14+
"attention_bias": false,
15+
"feed_forward_bias": false,
16+
"tie_word_embeddings": true,
17+
"embedding_multiplier": 12.0,
18+
"attention_multiplier": 0.015625,
19+
"residual_multiplier": 0.22,
20+
"logits_scaling": 8.0
21+
}

0 commit comments

Comments
 (0)