Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -7648,7 +7648,7 @@ def set_vocab(self):
@ModelBase.register("NemotronHForCausalLM")
class NemotronHModel(GraniteHybridModel):
"""Hybrid mamba2/attention model from NVIDIA"""
model_arch = gguf.MODEL_ARCH.NEMOTRONH
model_arch = gguf.MODEL_ARCH.NEMOTRON_H

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand Down
6 changes: 3 additions & 3 deletions gguf-py/gguf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ class MODEL_ARCH(IntEnum):
T5ENCODER = auto()
JAIS = auto()
NEMOTRON = auto()
NEMOTRONH = auto()
NEMOTRON_H = auto()
EXAONE = auto()
EXAONE4 = auto()
GRANITE = auto()
Expand Down Expand Up @@ -701,7 +701,7 @@ class MODEL_TENSOR(IntEnum):
MODEL_ARCH.T5ENCODER: "t5encoder",
MODEL_ARCH.JAIS: "jais",
MODEL_ARCH.NEMOTRON: "nemotron",
MODEL_ARCH.NEMOTRONH: "nemotronh",
MODEL_ARCH.NEMOTRON_H: "nemotron_h",
MODEL_ARCH.EXAONE: "exaone",
MODEL_ARCH.EXAONE4: "exaone4",
MODEL_ARCH.GRANITE: "granite",
Expand Down Expand Up @@ -2299,7 +2299,7 @@ class MODEL_TENSOR(IntEnum):
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
],
MODEL_ARCH.NEMOTRONH: [
MODEL_ARCH.NEMOTRON_H: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
Expand Down
6 changes: 3 additions & 3 deletions src/llama-arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
{ LLM_ARCH_T5ENCODER, "t5encoder" },
{ LLM_ARCH_JAIS, "jais" },
{ LLM_ARCH_NEMOTRON, "nemotron" },
{ LLM_ARCH_NEMOTRONH, "nemotronh" },
{ LLM_ARCH_NEMOTRON_H, "nemotron_h" },
{ LLM_ARCH_EXAONE, "exaone" },
{ LLM_ARCH_EXAONE4, "exaone4" },
{ LLM_ARCH_RWKV6, "rwkv6" },
Expand Down Expand Up @@ -1552,7 +1552,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
},
},
{
LLM_ARCH_NEMOTRONH,
LLM_ARCH_NEMOTRON_H,
{
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
Expand Down Expand Up @@ -2381,7 +2381,7 @@ bool llm_arch_is_hybrid(const llm_arch & arch) {
case LLM_ARCH_PLAMO2:
case LLM_ARCH_GRANITE_HYBRID:
case LLM_ARCH_LFM2:
case LLM_ARCH_NEMOTRONH:
case LLM_ARCH_NEMOTRON_H:
return true;
default:
return false;
Expand Down
2 changes: 1 addition & 1 deletion src/llama-arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ enum llm_arch {
LLM_ARCH_T5ENCODER,
LLM_ARCH_JAIS,
LLM_ARCH_NEMOTRON,
LLM_ARCH_NEMOTRONH,
LLM_ARCH_NEMOTRON_H,
LLM_ARCH_EXAONE,
LLM_ARCH_EXAONE4,
LLM_ARCH_RWKV6,
Expand Down
18 changes: 9 additions & 9 deletions src/llama-model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1570,7 +1570,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
default: type = LLM_TYPE_UNKNOWN;
}
} break;
case LLM_ARCH_NEMOTRONH:
case LLM_ARCH_NEMOTRON_H:
{
ml.get_key(LLM_KV_SSM_CONV_KERNEL, hparams.ssm_d_conv);
ml.get_key(LLM_KV_SSM_INNER_SIZE, hparams.ssm_d_inner);
Expand Down Expand Up @@ -4709,7 +4709,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
layer.ffn_up_b = create_tensor(tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, TENSOR_NOT_REQUIRED);
}
} break;
case LLM_ARCH_NEMOTRONH:
case LLM_ARCH_NEMOTRON_H:
{
// mamba2 Mixer SSM params
// NOTE: int64_t for tensor dimensions
Expand Down Expand Up @@ -5953,7 +5953,7 @@ void llama_model::print_info() const {
arch == LLM_ARCH_FALCON_H1 ||
arch == LLM_ARCH_PLAMO2 ||
arch == LLM_ARCH_GRANITE_HYBRID ||
arch == LLM_ARCH_NEMOTRONH) {
arch == LLM_ARCH_NEMOTRON_H) {
LLAMA_LOG_INFO("%s: ssm_d_conv = %u\n", __func__, hparams.ssm_d_conv);
LLAMA_LOG_INFO("%s: ssm_d_inner = %u\n", __func__, hparams.ssm_d_inner);
LLAMA_LOG_INFO("%s: ssm_d_state = %u\n", __func__, hparams.ssm_d_state);
Expand Down Expand Up @@ -14220,8 +14220,8 @@ struct llm_build_nemotron : public llm_graph_context {
}
};

struct llm_build_nemotronh : public llm_graph_context_mamba {
llm_build_nemotronh(
struct llm_build_nemotron_h : public llm_graph_context_mamba {
llm_build_nemotron_h(
const llama_model & model,
const llm_graph_params & params) :
llm_graph_context_mamba(params) {
Expand Down Expand Up @@ -18508,7 +18508,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
if (arch == LLM_ARCH_FALCON_H1) {
filter_attn = [&](int32_t) { return true; };
filter_recr = [&](int32_t) { return true; };
} else if (arch == LLM_ARCH_NEMOTRONH) {
} else if (arch == LLM_ARCH_NEMOTRON_H) {
filter_attn = [&](int32_t il) {
return !hparams.is_recurrent(il) && hparams.n_ff(il) == 0;
};
Expand Down Expand Up @@ -18865,9 +18865,9 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
{
llm = std::make_unique<llm_build_nemotron>(*this, params);
} break;
case LLM_ARCH_NEMOTRONH:
case LLM_ARCH_NEMOTRON_H:
{
llm = std::make_unique<llm_build_nemotronh>(*this, params);
llm = std::make_unique<llm_build_nemotron_h>(*this, params);
} break;
case LLM_ARCH_EXAONE:
{
Expand Down Expand Up @@ -19104,7 +19104,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
case LLM_ARCH_RWKV7:
case LLM_ARCH_ARWKV7:
case LLM_ARCH_WAVTOKENIZER_DEC:
case LLM_ARCH_NEMOTRONH:
case LLM_ARCH_NEMOTRON_H:
return LLAMA_ROPE_TYPE_NONE;

// use what we call a normal RoPE, operating on pairs of consecutive head values
Expand Down