Skip to content

Commit 9961952

Browse files
committed
up.
1 parent 97c64a0 commit 9961952

File tree

3 files changed

+12
-17
lines changed

3 files changed

+12
-17
lines changed

convert_hf_to_gguf.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6305,11 +6305,9 @@ class SmolLM3Model(LlamaModel):
63056305
def set_gguf_parameters(self):
63066306
super().set_gguf_parameters()
63076307

6308-
# if self.model.config.no_rope_layers is not None:
6309-
# self.gguf_writer.add_array("smollm3.no_rope_layers", self.model.config.no_rope_layers, gguf.GGUFValueType.INT32)
6310-
no_rope_layers = self.hparams.get("no_rope_layers")
6311-
if no_rope_layers is not None:
6312-
self.gguf_writer.add_array("smollm3.no_rope_layers", no_rope_layers)
6308+
no_rope_layer_interval = self.hparams.get("no_rope_layer_interval")
6309+
if no_rope_layer_interval is not None:
6310+
self.gguf_writer.add_uint32("no_rope_layer_interval", no_rope_layer_interval)
63136311

63146312
###### CONVERSION LOGIC ######
63156313

src/llama-hparams.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,9 @@ struct llama_hparams {
186186
// dimension of the recurrent state embeddings
187187
uint32_t n_embd_v_s() const;
188188

189+
// for NoPE interval
190+
uint32_t no_rope_layer_interval = 0;
191+
189192
bool is_swa(uint32_t il) const;
190193
};
191194

src/llama-model.cpp

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,10 @@ void llama_model::load_hparams(llama_model_loader & ml) {
443443
return;
444444
}
445445

446+
if (arch == LLM_ARCH_SMOLLM3) {
447+
ml.get_key("no_rope_layer_interval", hparams.no_rope_layer_interval);
448+
}
449+
446450
ml.get_key(LLM_KV_CONTEXT_LENGTH, hparams.n_ctx_train);
447451
ml.get_key(LLM_KV_EMBEDDING_LENGTH, hparams.n_embd);
448452
ml.get_key(LLM_KV_BLOCK_COUNT, hparams.n_layer);
@@ -13740,17 +13744,7 @@ struct llm_build_smollm3 : public llm_graph_context {
1374013744
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
1374113745
GGML_ASSERT(n_embd_head == hparams.n_rot);
1374213746

13743-
// collect layers for which RoPE is disabled (metadata key: "smollm3.no_rope_layers")
13744-
std::vector<int32_t> no_rope_layers;
13745-
if (arch == LLM_ARCH_SMOLLM3) {
13746-
const int kid = gguf_find_key(model.meta, "smollm3.no_rope_layers");
13747-
if (kid != -1) {
13748-
const uint32_t n = gguf_get_arr_n(model.meta, kid);
13749-
no_rope_layers.resize(n);
13750-
const int nb = gguf_get_arr_data(model.meta, kid, no_rope_layers.data(), n * sizeof(int32_t));
13751-
GGML_ASSERT(nb == int(n * sizeof(int32_t)));
13752-
}
13753-
}
13747+
const uint32_t interval = hparams.no_rope_layer_interval;
1375413748

1375513749
// token embeddings
1375613750
ggml_tensor * inpL = build_inp_embd(model.tok_embd);
@@ -13793,7 +13787,7 @@ struct llm_build_smollm3 : public llm_graph_context {
1379313787
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
1379413788
Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
1379513789

13796-
if (std::find(no_rope_layers.begin(), no_rope_layers.end(), il) == no_rope_layers.end()) {
13790+
if (interval == 0 || il % interval != 0) {
1379713791
ggml_tensor * rope_factors = model.get_rope_factors(cparams, il);
1379813792
Qcur = ggml_rope_ext(ctx0, Qcur, inp_pos, rope_factors,
1379913793
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,

0 commit comments

Comments
 (0)