-
Notifications
You must be signed in to change notification settings - Fork 13.3k
Addresses #15409 - Support for NVIDIA Nemotron-H hybrid architecture models DRAFT #15572
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 16 commits
cce8cb1
423d890
f1acd11
175d60e
3a99e79
1f55ace
62accf9
cc9b929
36dc3eb
657903a
3df06e6
154459a
ca4c978
a556953
e2b0dda
0d9725c
3efbb74
743681b
bfc234d
2ebaa43
497d73b
7c668fd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1076,9 +1076,15 @@ def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool, sub_type: G | |
kv_data += self._pack("Q", len(encoded_val)) | ||
kv_data += encoded_val | ||
elif vtype == GGUFValueType.ARRAY: | ||
|
||
# Convert numpy arrays to lists for serialization | ||
if hasattr(val, 'tolist'): | ||
val = val.tolist() | ||
|
||
if not isinstance(val, Sequence): | ||
raise ValueError("Invalid GGUF metadata array, expecting sequence") | ||
print(f"DEBUG: Failed metadata key type: {type(val)}") | ||
print(f"DEBUG: Failed metadata value: {val}") | ||
print(f"DEBUG: Caller info available in stack trace") | ||
raise ValueError(f"Invalid GGUF metadata array, expecting sequence but got {type(val)}: {val}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. more debug, didnt mean to commit. will clean up. |
||
|
||
if len(val) == 0: | ||
raise ValueError("Invalid GGUF metadata array. Empty array") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,6 +48,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = { | |
{ LLM_ARCH_MAMBA, "mamba" }, | ||
{ LLM_ARCH_MAMBA2, "mamba2" }, | ||
{ LLM_ARCH_JAMBA, "jamba" }, | ||
{ LLM_ARCH_NEMOTRON_H, "nemotron_h" }, | ||
{ LLM_ARCH_FALCON_H1, "falcon-h1" }, | ||
{ LLM_ARCH_XVERSE, "xverse" }, | ||
{ LLM_ARCH_COMMAND_R, "command-r" }, | ||
|
@@ -200,6 +201,9 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = { | |
|
||
{ LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" }, | ||
|
||
// Nemotron-H specific | ||
{ LLM_KV_LAYER_TYPES, "%s.layer_types" }, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can get away with not adding this new
|
||
|
||
{ LLM_KV_SHORTCONV_L_CACHE, "%s.shortconv.l_cache" }, | ||
|
||
{ LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" }, | ||
|
@@ -1101,6 +1105,31 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N | |
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" }, | ||
}, | ||
}, | ||
{ | ||
LLM_ARCH_NEMOTRON_H, | ||
{ | ||
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" }, | ||
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" }, | ||
{ LLM_TENSOR_OUTPUT, "output" }, | ||
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, | ||
// Mamba2 layers | ||
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" }, | ||
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" }, | ||
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" }, | ||
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" }, | ||
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" }, | ||
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" }, | ||
{ LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" }, | ||
// Attention layers | ||
{ LLM_TENSOR_ATTN_Q, "blk.%d.wq" }, | ||
{ LLM_TENSOR_ATTN_K, "blk.%d.wk" }, | ||
{ LLM_TENSOR_ATTN_V, "blk.%d.wv" }, | ||
{ LLM_TENSOR_ATTN_OUT, "blk.%d.wo" }, | ||
// MLP layers | ||
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, | ||
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, | ||
}, | ||
}, | ||
{ | ||
LLM_ARCH_FALCON_H1, | ||
{ | ||
|
@@ -2334,6 +2363,7 @@ bool llm_arch_is_recurrent(const llm_arch & arch) { | |
bool llm_arch_is_hybrid(const llm_arch & arch) { | ||
switch (arch) { | ||
case LLM_ARCH_JAMBA: | ||
case LLM_ARCH_NEMOTRON_H: | ||
case LLM_ARCH_FALCON_H1: | ||
case LLM_ARCH_PLAMO2: | ||
case LLM_ARCH_GRANITE_HYBRID: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I got tired of typing it. Temporary.