Skip to content

Commit 81f95c0

Browse files
committed
We're through to the generation stage.
1 parent 300e537 commit 81f95c0

File tree

2 files changed

+3
-6
lines changed

2 files changed

+3
-6
lines changed

src/llama-arch.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2076,11 +2076,11 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
20762076
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
20772077
{ LLM_TENSOR_OUTPUT, "output" },
20782078
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2079-
{ LLM_TENSOR_POST_ATTN_NORM, "blk.%d.post_attn_norm" },
20802079
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
20812080
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
20822081
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
20832082
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2083+
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
20842084
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
20852085
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
20862086
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },

src/llama-model.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3977,9 +3977,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
39773977
} break;
39783978
case LLM_ARCH_SEED_OSS:
39793979
{
3980-
const uint32_t num_heads = hparams.ssm_dt_rank;
3981-
const uint32_t intermediate_size = hparams.ssm_d_inner;
3982-
const uint32_t head_dim = intermediate_size / num_heads;
3980+
const uint32_t head_dim = hparams.n_embd_head_k;
39833981
const int64_t n_qo_dim = n_head * head_dim;
39843982
const int64_t n_kv_dim = n_head_kv * head_dim;
39853983

@@ -4007,7 +4005,6 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
40074005
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0);
40084006
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0);
40094007
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, 0);
4010-
layer.ffn_post_norm = create_tensor(tn(LLM_TENSOR_FFN_POST_NORM, "weight", i), {n_embd}, 0);
40114008
}
40124009
} break;
40134010

@@ -18862,6 +18859,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
1886218859
case LLM_ARCH_ARCEE:
1886318860
case LLM_ARCH_ERNIE4_5:
1886418861
case LLM_ARCH_ERNIE4_5_MOE:
18862+
case LLM_ARCH_SEED_OSS:
1886518863
return LLAMA_ROPE_TYPE_NORM;
1886618864

1886718865
// the pairs of head values are offset by n_rot/2
@@ -18882,7 +18880,6 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
1888218880
case LLM_ARCH_QWEN3MOE:
1888318881
case LLM_ARCH_OLMO2:
1888418882
case LLM_ARCH_OLMOE:
18885-
case LLM_ARCH_SEED_OSS:
1888618883
case LLM_ARCH_PHI2:
1888718884
case LLM_ARCH_PHI3:
1888818885
case LLM_ARCH_PHIMOE:

0 commit comments

Comments
 (0)