Skip to content

Commit e28d2c5

Browse files
committed
fix 4b rope bug
1 parent 8e2cb21 commit e28d2c5

File tree

2 files changed

+10
-35
lines changed

2 files changed

+10
-35
lines changed

convert_hf_to_gguf.py

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7590,8 +7590,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
75907590

75917591

75927592

7593-
@ModelBase.register("SmallthinkerForCausalLM")
7594-
class SmallthinkerModel(TextModel):
7593+
@ModelBase.register("SmallThinkerForCausalLM")
7594+
class SmallThinkerModel(TextModel):
75957595
model_arch = gguf.MODEL_ARCH.SMALLTHINKER
75967596

75977597
def set_gguf_parameters(self):
@@ -7602,10 +7602,8 @@ def set_gguf_parameters(self):
76027602
self.gguf_writer.add_expert_used_count(n_experts_used)
76037603
if (moe_intermediate_size := self.hparams.get("moe_ffn_hidden_size")) is not None:
76047604
self.gguf_writer.add_expert_feed_forward_length(moe_intermediate_size)
7605+
self.gguf_writer.add_feed_forward_length(moe_intermediate_size)
76057606
logger.info(f"gguf: expert feed forward length = {moe_intermediate_size}")
7606-
if (shared_expert_intermediate_size := self.hparams.get('shared_expert_intermediate_size')) is not None:
7607-
self.gguf_writer.add_expert_shared_feed_forward_length(shared_expert_intermediate_size)
7608-
logger.info(f"gguf: expert shared feed forward length = {shared_expert_intermediate_size}")
76097607
if (self.hparams.get('moe_primary_router_apply_softmax')):
76107608
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
76117609
else:
@@ -7618,29 +7616,13 @@ def set_gguf_parameters(self):
76187616
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
76197617
self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"])
76207618

7621-
sliding_window = self.hparams.get("sliding_window")
76227619
sliding_window_layout = self.hparams.get("sliding_window_layout")
7623-
if sliding_window and sliding_window_layout:
7620+
if sliding_window_layout:
76247621
for i in sliding_window_layout:
76257622
if i != 0:
7623+
sliding_window = self.hparams.get("sliding_window_size")
76267624
self.gguf_writer.add_sliding_window(sliding_window)
76277625
break
7628-
elif sliding_window:
7629-
self.gguf_writer.add_sliding_window(sliding_window)
7630-
7631-
intermediate_size = self.hparams.get("ffn_hidden_size")
7632-
moe_intermediate_size = self.hparams.get("moe_ffn_hidden_size")
7633-
moe_layer_layout = self.hparams.get("moe_layer_layout")
7634-
ffn_layout = []
7635-
for i, layout in enumerate(moe_layer_layout):
7636-
if layout == 0:
7637-
ffn_layout.append(intermediate_size)
7638-
elif layout == 1:
7639-
ffn_layout.append(moe_intermediate_size)
7640-
else:
7641-
raise ValueError(f"Unknown moe layer layout: {layout}")
7642-
self.gguf_writer.add_feed_forward_length(ffn_layout)
7643-
# def add_feed_forward_length(self, length: int | Sequence[int]) -> None:
76447626

76457627
_experts: list[dict[str, Tensor]] | None = None
76467628

src/llama-model.cpp

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5200,7 +5200,6 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
52005200

52015201
for (int i = 0; i < n_layer; ++i) {
52025202
auto & layer = layers[i];
5203-
const int64_t n_ff_cur = hparams.n_ff_arr[i];
52045203

52055204
layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), { n_embd }, 0);
52065205

@@ -5220,16 +5219,10 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
52205219

52215220
// MoE branch
52225221
const int64_t n_ff_exp = hparams.n_ff_exp;
5223-
if (n_ff_exp == n_ff_cur) {
5224-
layer.ffn_gate_inp = create_tensor(tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), { n_embd, n_expert }, 0);
5225-
layer.ffn_gate_exps = create_tensor(tn(LLM_TENSOR_FFN_GATE_EXPS, "weight", i), { n_embd, n_ff_exp, n_expert }, 0);
5226-
layer.ffn_down_exps = create_tensor(tn(LLM_TENSOR_FFN_DOWN_EXPS, "weight", i), { n_ff_exp, n_embd, n_expert }, 0);
5227-
layer.ffn_up_exps = create_tensor(tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), { n_embd, n_ff_exp, n_expert }, 0);
5228-
} else {
5229-
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), { n_embd, n_ff_cur }, 0);
5230-
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff_cur,n_embd }, 0);
5231-
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), { n_embd, n_ff_cur }, 0);
5232-
}
5222+
layer.ffn_gate_inp = create_tensor(tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), { n_embd, n_expert }, 0);
5223+
layer.ffn_gate_exps = create_tensor(tn(LLM_TENSOR_FFN_GATE_EXPS, "weight", i), { n_embd, n_ff_exp, n_expert }, 0);
5224+
layer.ffn_down_exps = create_tensor(tn(LLM_TENSOR_FFN_DOWN_EXPS, "weight", i), { n_ff_exp, n_embd, n_expert }, 0);
5225+
layer.ffn_up_exps = create_tensor(tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), { n_embd, n_ff_exp, n_expert }, 0);
52335226
}
52345227
} break;
52355228
default:
@@ -17134,7 +17127,7 @@ struct llm_build_smallthinker : public llm_graph_context{
1713417127
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
1713517128
Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
1713617129

17137-
if(il % hparams.n_no_rope_layer_step) {
17130+
if(hparams.n_no_rope_layer_step == n_layer || il % hparams.n_no_rope_layer_step != 0) {
1713817131
Qcur = ggml_rope_ext(ctx0, Qcur, inp_pos, nullptr, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
1713917132
ext_factor, attn_factor, beta_fast, beta_slow);
1714017133

0 commit comments

Comments
 (0)