@@ -443,6 +443,10 @@ void llama_model::load_hparams(llama_model_loader & ml) {
443443 return;
444444 }
445445
446+ if (arch == LLM_ARCH_SMOLLM3) {
447+ ml.get_key("no_rope_layer_interval", hparams.no_rope_layer_interval);
448+ }
449+
446450 ml.get_key(LLM_KV_CONTEXT_LENGTH, hparams.n_ctx_train);
447451 ml.get_key(LLM_KV_EMBEDDING_LENGTH, hparams.n_embd);
448452 ml.get_key(LLM_KV_BLOCK_COUNT, hparams.n_layer);
@@ -13740,17 +13744,7 @@ struct llm_build_smollm3 : public llm_graph_context {
1374013744 GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
1374113745 GGML_ASSERT(n_embd_head == hparams.n_rot);
1374213746
13743- // collect layers for which RoPE is disabled (metadata key: "smollm3.no_rope_layers")
13744- std::vector<int32_t> no_rope_layers;
13745- if (arch == LLM_ARCH_SMOLLM3) {
13746- const int kid = gguf_find_key(model.meta, "smollm3.no_rope_layers");
13747- if (kid != -1) {
13748- const uint32_t n = gguf_get_arr_n(model.meta, kid);
13749- no_rope_layers.resize(n);
13750- const int nb = gguf_get_arr_data(model.meta, kid, no_rope_layers.data(), n * sizeof(int32_t));
13751- GGML_ASSERT(nb == int(n * sizeof(int32_t)));
13752- }
13753- }
13747+ const uint32_t interval = hparams.no_rope_layer_interval;
1375413748
1375513749 // token embeddings
1375613750 ggml_tensor * inpL = build_inp_embd(model.tok_embd);
@@ -13793,7 +13787,7 @@ struct llm_build_smollm3 : public llm_graph_context {
1379313787 Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
1379413788 Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
1379513789
13796- if (std::find(no_rope_layers.begin(), no_rope_layers.end(), il) == no_rope_layers.end() ) {
13790+ if (interval == 0 || il % interval != 0 ) {
1379713791 ggml_tensor * rope_factors = model.get_rope_factors(cparams, il);
1379813792 Qcur = ggml_rope_ext(ctx0, Qcur, inp_pos, rope_factors,
1379913793 n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
0 commit comments