Skip to content

Commit ddd61eb

Browse files
JRD971000Ali TaghibakhshiAli Taghibakhshi
authored
Alit/nano v2 (#14464)
* add nano v2 support * add convertor fix * minor change to test script * remove use_mamba_mem_eff_path * Apply isort and black reformatting Signed-off-by: JRD971000 <JRD971000@users.noreply.github.com> --------- Signed-off-by: JRD971000 <JRD971000@users.noreply.github.com> Co-authored-by: Ali Taghibakhshi <ataghibakhsh@login-eos02.eos.clusters.nvidia.com> Co-authored-by: Ali Taghibakhshi <ataghibakhsh@login-eos01.eos.clusters.nvidia.com> Co-authored-by: JRD971000 <JRD971000@users.noreply.github.com>
1 parent bb43d32 commit ddd61eb

File tree

8 files changed

+497
-0
lines changed

8 files changed

+497
-0
lines changed

nemo/collections/llm/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@
153153
NemotronHConfig47B,
154154
NemotronHConfig56B,
155155
NemotronModel,
156+
NemotronNano9Bv2,
156157
NVIDIAMambaConfig8B,
157158
NVIDIAMambaHybridConfig8B,
158159
Phi3Config,
@@ -279,6 +280,7 @@
279280
"NemotronHConfig8B",
280281
"NemotronHConfig47B",
281282
"NemotronHConfig56B",
283+
"NemotronNano9Bv2",
282284
"MambaModel",
283285
"LlamaConfig",
284286
"Llama2Config7B",

nemo/collections/llm/gpt/model/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@
166166
NemotronHConfig8B,
167167
NemotronHConfig47B,
168168
NemotronHConfig56B,
169+
NemotronNano9Bv2,
169170
NVIDIAMambaConfig8B,
170171
NVIDIAMambaHybridConfig8B,
171172
SSMConfig,
@@ -304,6 +305,7 @@
304305
"NemotronHConfig8B",
305306
"NemotronHConfig47B",
306307
"NemotronHConfig56B",
308+
"NemotronNano9Bv2",
307309
"MambaModel",
308310
"DeepSeekModel",
309311
"DeepSeekV2Config",

nemo/collections/llm/gpt/model/ssm.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,8 @@ def make_vocab_size_divisible_by(vocab_size):
575575
nemotron_h_config = NemotronHConfig47B()
576576
elif "56B" in source._name_or_path:
577577
nemotron_h_config = NemotronHConfig56B()
578+
elif "Nano-9B-v2" in source._name_or_path:
579+
nemotron_h_config = NemotronNano9Bv2()
578580
else:
579581
raise ValueError(f"Unsupported model size: {source._name_or_path}")
580582

@@ -702,6 +704,9 @@ def config(self):
702704
elif type(source) == NemotronHConfig56B:
703705
model_path = local_model_path if local_model_path else "nvidia/Nemotron-H-56B-Base-8K"
704706
hf_config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
707+
elif type(source) == NemotronNano9Bv2:
708+
model_path = local_model_path if local_model_path else "nvidia/NVIDIA-Nemotron-Nano-9B-v2"
709+
hf_config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
705710
else:
706711
raise ValueError(f"Unsupported model size: {source}")
707712

@@ -1039,6 +1044,21 @@ class NemotronHConfig56B(NemotronHConfigBase):
10391044
num_attention_heads: int = 64
10401045

10411046

1047+
@dataclass
1048+
class NemotronNano9Bv2(NemotronHConfigBase):
1049+
"""NemotronNano9Bv2"""
1050+
1051+
hybrid_override_pattern: str = "M-M-M-MM-M-M-M*-M-M-M*-M-M-M-M*-M-M-M-M*-M-MM-M-M-M-M-M-"
1052+
num_layers: int = 56
1053+
hidden_size: int = 4480
1054+
mamba_num_heads: int = 128
1055+
kv_channels: int = 128
1056+
mamba_state_dim: int = 128
1057+
ffn_hidden_size: int = 15680
1058+
num_attention_heads: int = 40
1059+
mamba_head_dim: int = 80
1060+
1061+
10421062
__all__ = [
10431063
"SSMConfig",
10441064
"BaseMambaConfig130M",
@@ -1053,4 +1073,5 @@ class NemotronHConfig56B(NemotronHConfigBase):
10531073
"NemotronHConfig8B",
10541074
"NemotronHConfig47B",
10551075
"NemotronHConfig56B",
1076+
"NemotronNano9Bv2",
10561077
]

nemo/collections/llm/recipes/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
nemotron4_15b_16k,
8282
nemotron4_15b_64k,
8383
nemotron4_340b,
84+
nemotron_nano_9b_v2,
8485
nemotronh_4b,
8586
nemotronh_8b,
8687
nemotronh_47b,
@@ -163,6 +164,7 @@
163164
"nemotronh_8b",
164165
"nemotronh_47b",
165166
"nemotronh_56b",
167+
"nemotron_nano_9b_v2",
166168
"mistral_7b",
167169
"mistral_nemo_12b",
168170
"hyena_base",

0 commit comments

Comments
 (0)