Skip to content

Commit a986efb

Browse files
committed
wip
1 parent b498594 commit a986efb

File tree

3 files changed

+51
-24
lines changed

3 files changed

+51
-24
lines changed

convert_hf_to_gguf.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6691,13 +6691,6 @@ def prepare_tensors(self):
66916691
class SmolLM3Model(LlamaModel):
66926692
model_arch = gguf.MODEL_ARCH.SMOLLM3
66936693

6694-
def set_gguf_parameters(self):
6695-
super().set_gguf_parameters()
6696-
6697-
no_rope_layer_interval = self.hparams.get("no_rope_layer_interval")
6698-
if no_rope_layer_interval is not None:
6699-
self.gguf_writer.add_uint32("no_rope_layer_interval", no_rope_layer_interval)
6700-
67016694
###### CONVERSION LOGIC ######
67026695

67036696

src/llama-arch.cpp

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1728,19 +1728,18 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
17281728
{
17291729
LLM_ARCH_SMOLLM3,
17301730
{
1731-
{ LLM_TENSOR_TOKEN_EMBD, "token_embd.weight" },
1732-
{ LLM_TENSOR_OUTPUT_NORM, "output_norm.weight" },
1733-
{ LLM_TENSOR_OUTPUT, "output.weight" },
1734-
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1735-
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm.weight" },
1736-
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q.weight" },
1737-
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k.weight" },
1738-
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v.weight" },
1739-
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output.weight" },
1740-
{ LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1741-
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate.weight" },
1742-
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down.weight" },
1743-
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up.weight" },
1731+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1732+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1733+
{ LLM_TENSOR_OUTPUT, "output" },
1734+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1735+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1736+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1737+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1738+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1739+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1740+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1741+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1742+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
17441743
},
17451744
},
17461745
};

src/llama-model.cpp

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -452,10 +452,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
452452
return;
453453
}
454454

455-
if (arch == LLM_ARCH_SMOLLM3) {
456-
ml.get_key("no_rope_layer_interval", hparams.no_rope_layer_interval);
457-
}
458-
459455
ml.get_key(LLM_KV_CONTEXT_LENGTH, hparams.n_ctx_train);
460456
ml.get_key(LLM_KV_EMBEDDING_LENGTH, hparams.n_embd);
461457
ml.get_key(LLM_KV_BLOCK_COUNT, hparams.n_layer);
@@ -1565,6 +1561,15 @@ void llama_model::load_hparams(llama_model_loader & ml) {
15651561
default: type = LLM_TYPE_UNKNOWN;
15661562
}
15671563
} break;
1564+
case LLM_ARCH_SMOLLM3:
1565+
{
1566+
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
1567+
1568+
switch (hparams.n_layer) {
1569+
case 36: type = LLM_TYPE_3B; break;
1570+
default: type = LLM_TYPE_UNKNOWN;
1571+
}
1572+
} break;
15681573
default: throw std::runtime_error("unsupported model architecture");
15691574
}
15701575

@@ -4528,6 +4533,35 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
45284533
layer.ffn_down_shexp = create_tensor(tn(LLM_TENSOR_FFN_DOWN_SHEXP, "weight", i), {hparams.n_ff_shexp, n_embd}, 0);
45294534
}
45304535
} break;
4536+
case LLM_ARCH_SMOLLM3:
4537+
{
4538+
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
4539+
4540+
// output
4541+
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
4542+
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
4543+
4544+
// if output is NULL, init from the input tok embed
4545+
if (output == NULL) {
4546+
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
4547+
}
4548+
4549+
for (int i = 0; i < n_layer; ++i) {
4550+
auto & layer = layers[i];
4551+
4552+
layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0);
4553+
4554+
layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd_head_k * n_head}, 0);
4555+
layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_k_gqa}, 0);
4556+
layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_v_gqa}, 0);
4557+
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd_head_k * n_head, n_embd}, 0);
4558+
4559+
layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);
4560+
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0);
4561+
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, 0);
4562+
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0);
4563+
}
4564+
} break;
45314565
default:
45324566
throw std::runtime_error("unknown architecture");
45334567
}
@@ -14962,6 +14996,7 @@ llm_graph_result_ptr llama_model::build_graph(
1496214996
llm = std::make_unique<llm_build_llama>(*this, params, gf);
1496314997
} break;
1496414998
case LLM_ARCH_LLAMA4:
14999+
case LLM_ARCH_SMOLLM3:
1496515000
{
1496615001
llm = std::make_unique<llm_build_llama_iswa>(*this, params, gf);
1496715002
} break;

0 commit comments

Comments
 (0)