Skip to content

Commit 140eb29

Browse files
committed
gguf-py, llama : rename expert_weights to exp_probs in tensor and variable names
1 parent d2f784d commit 140eb29

File tree

3 files changed

+13
-13
lines changed

3 files changed

+13
-13
lines changed

gguf-py/gguf/constants.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ class MODEL_TENSOR(IntEnum):
314314
FFN_GATE_SHEXP = auto()
315315
FFN_DOWN_SHEXP = auto()
316316
FFN_UP_SHEXP = auto()
317-
FFN_EXPERT_WEIGHTS_B = auto()
317+
FFN_EXP_PROBS_B = auto()
318318
ATTN_Q_NORM = auto()
319319
ATTN_K_NORM = auto()
320320
LAYER_OUT_NORM = auto()
@@ -499,7 +499,7 @@ class MODEL_TENSOR(IntEnum):
499499
MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
500500
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
501501
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
502-
MODEL_TENSOR.FFN_EXPERT_WEIGHTS_B: "blk.{bid}.expert_weights_b",
502+
MODEL_TENSOR.FFN_EXP_PROBS_B: "blk.{bid}.exp_probs_b",
503503
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
504504
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
505505
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
@@ -1280,7 +1280,7 @@ class MODEL_TENSOR(IntEnum):
12801280
MODEL_TENSOR.FFN_GATE_SHEXP,
12811281
MODEL_TENSOR.FFN_DOWN_SHEXP,
12821282
MODEL_TENSOR.FFN_UP_SHEXP,
1283-
MODEL_TENSOR.FFN_EXPERT_WEIGHTS_B,
1283+
MODEL_TENSOR.FFN_EXP_PROBS_B,
12841284
],
12851285
MODEL_ARCH.CHATGLM : [
12861286
MODEL_TENSOR.TOKEN_EMBD,

gguf-py/gguf/tensor_mapping.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ class TensorNameMap:
276276
"model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
277277
),
278278

279-
MODEL_TENSOR.FFN_EXPERT_WEIGHTS_B: (
279+
MODEL_TENSOR.FFN_EXP_PROBS_B: (
280280
"model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3
281281
),
282282

src/llama.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ enum llm_tensor {
564564
LLM_TENSOR_FFN_DOWN_SHEXP,
565565
LLM_TENSOR_FFN_GATE_SHEXP,
566566
LLM_TENSOR_FFN_UP_SHEXP,
567-
LLM_TENSOR_FFN_EXPERT_WEIGHTS_B,
567+
LLM_TENSOR_FFN_EXP_PROBS_B,
568568
LLM_TENSOR_ATTN_Q_NORM,
569569
LLM_TENSOR_ATTN_K_NORM,
570570
LLM_TENSOR_LAYER_OUT_NORM,
@@ -1434,7 +1434,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
14341434
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
14351435
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
14361436
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1437-
{ LLM_TENSOR_FFN_EXPERT_WEIGHTS_B, "blk.%d.expert_weights_b" },
1437+
{ LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
14381438
},
14391439
},
14401440
{
@@ -2934,7 +2934,7 @@ struct llama_layer {
29342934
struct ggml_tensor * ffn_down_b = nullptr; // b2
29352935
struct ggml_tensor * ffn_up_b = nullptr; // b3
29362936
struct ggml_tensor * ffn_act = nullptr;
2937-
struct ggml_tensor * ffn_expert_weights_bias = nullptr;
2937+
struct ggml_tensor * ffn_exp_probs_b = nullptr;
29382938

29392939
// mamba proj
29402940
struct ggml_tensor * ssm_in = nullptr;
@@ -7480,7 +7480,7 @@ static const std::map<llm_tensor, llm_tensor_info> llm_tensor_info_mapping = {
74807480
{LLM_TENSOR_FFN_DOWN_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
74817481
{LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
74827482
{LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
7483-
{LLM_TENSOR_FFN_EXPERT_WEIGHTS_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
7483+
{LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
74847484
// this tensor is loaded for T5, but never used
74857485
{LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_NONE}},
74867486
{LLM_TENSOR_CONV1D, {LLM_TENSOR_LAYER_INPUT, GGML_OP_IM2COL}},
@@ -9283,7 +9283,7 @@ static bool llm_load_tensors(
92839283
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0);
92849284
} else {
92859285
layer.ffn_gate_inp = create_tensor(tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert}, 0);
9286-
layer.ffn_expert_weights_bias = create_tensor(tn(LLM_TENSOR_FFN_EXPERT_WEIGHTS_B, "bias", i), {n_expert}, llama_model_loader::TENSOR_NOT_REQUIRED);
9286+
layer.ffn_exp_probs_b = create_tensor(tn(LLM_TENSOR_FFN_EXP_PROBS_B, "bias", i), {n_expert}, llama_model_loader::TENSOR_NOT_REQUIRED);
92879287

92889288
if (n_expert == 0) {
92899289
throw std::runtime_error("n_expert must be > 0");
@@ -10285,22 +10285,22 @@ llm_expert_gating_func_type gating_op,
1028510285
case LLM_EXPERT_GATING_FUNC_SOFTMAX:
1028610286
{
1028710287
probs = ggml_soft_max(ctx, logits); // [n_expert, n_tokens]
10288-
cb(probs, "ffn_moe_probs", il);
1028910288
} break;
1029010289
case LLM_EXPERT_GATING_FUNC_SIGMOID:
1029110290
{
1029210291
probs = ggml_sigmoid(ctx, logits); // [n_expert, n_tokens]
10293-
cb(probs, "ffn_moe_sigm", il);
1029410292
} break;
1029510293
default:
1029610294
GGML_ABORT("fatal error");
1029710295
}
10296+
cb(probs, "ffn_moe_probs", il);
1029810297

1029910298
// add experts selection bias - introduced in DeepSeek V3
10299+
// leave probs unbiased as it's later used to get expert weights
1030010300
ggml_tensor * selection_probs = probs;
1030110301
if (expert_weights_b != nullptr) {
1030210302
selection_probs = ggml_add(ctx, probs, expert_weights_b);
10303-
cb(selection_probs, "ffn_moe_sigm_biased", il);
10303+
cb(selection_probs, "ffn_moe_probs_biased", il);
1030410304
}
1030510305

1030610306
// select experts
@@ -16241,7 +16241,7 @@ struct llm_build_context {
1624116241
model.layers[il].ffn_up_exps,
1624216242
model.layers[il].ffn_gate_exps,
1624316243
model.layers[il].ffn_down_exps,
16244-
model.layers[il].ffn_expert_weights_bias,
16244+
model.layers[il].ffn_exp_probs_b,
1624516245
n_expert, n_expert_used,
1624616246
LLM_FFN_SILU, hparams.expert_weights_norm,
1624716247
true, hparams.expert_weights_scale,

0 commit comments

Comments
 (0)