Skip to content

Commit 1d59acd

Browse files
committed
fix: resolve remaining merge conflicts and remove duplicate case statements
1 parent b27e51c commit 1d59acd

File tree

1 file changed

+2
-50
lines changed

1 file changed

+2
-50
lines changed

src/llama-model.cpp

Lines changed: 2 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,17 +1099,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
10991099
? 1.0f / std::sqrt(float(hparams.n_embd / hparams.n_head(0)))
11001100
: 1.0f / std::sqrt(float(hparams.n_embd_head_k));
11011101
} break;
1102-
case LLM_ARCH_GEMMA_EMBEDDING:
1103-
{
1104-
// EmbeddingGemma is an embedding model based on GEMMA architecture
1105-
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
1106-
ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type);
1107-
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn, false);
1108-
1109-
// Set embedding-specific defaults
1110-
hparams.causal_attn = false; // Embeddings use bidirectional attention
1111-
type = LLM_TYPE_300M; // EmbeddingGemma is 300M params
1112-
} break;
11131102
case LLM_ARCH_GEMMA3:
11141103
{
11151104
hparams.swa_type = LLAMA_SWA_TYPE_STANDARD;
@@ -3515,36 +3504,6 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
35153504
layer.ffn_post_norm = create_tensor(tn(LLM_TENSOR_FFN_POST_NORM, "weight", i), {n_embd}, 0);
35163505
}
35173506
} break;
3518-
case LLM_ARCH_GEMMA_EMBEDDING:
3519-
{
3520-
// EmbeddingGemma uses similar structure to GEMMA3
3521-
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
3522-
3523-
// output norm for embeddings
3524-
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
3525-
3526-
// layers (similar to GEMMA3 but for embeddings)
3527-
for (int i = 0; i < n_layer; ++i) {
3528-
auto & layer = layers[i];
3529-
3530-
layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0);
3531-
3532-
layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd_head_k * n_head}, 0);
3533-
layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_k_gqa}, 0);
3534-
layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_v_gqa}, 0);
3535-
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd_head_k * n_head, n_embd}, 0);
3536-
3537-
layer.attn_post_norm = create_tensor(tn(LLM_TENSOR_ATTN_POST_NORM, "weight", i), {n_embd}, 0);
3538-
layer.attn_k_norm = create_tensor(tn(LLM_TENSOR_ATTN_K_NORM, "weight", i), {n_embd_head_k}, 0);
3539-
layer.attn_q_norm = create_tensor(tn(LLM_TENSOR_ATTN_Q_NORM, "weight", i), {n_embd_head_k}, 0);
3540-
3541-
layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);
3542-
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0);
3543-
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0);
3544-
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0);
3545-
layer.ffn_post_norm = create_tensor(tn(LLM_TENSOR_FFN_POST_NORM, "weight", i), {n_embd}, 0);
3546-
}
3547-
} break;
35483507
case LLM_ARCH_GEMMA3:
35493508
case LLM_ARCH_GEMMA_EMBEDDING:
35503509
{
@@ -10644,8 +10603,6 @@ struct llm_build_gemma3_iswa : public llm_graph_context {
1064410603
struct llm_build_gemma_embedding : public llm_graph_context {
1064510604
llm_build_gemma_embedding(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
1064610605
const int64_t n_embd_head = hparams.n_embd_head_k;
10647-
const int64_t n_embd_k_gqa = hparams.n_embd_k_gqa();
10648-
const int64_t n_embd_v_gqa = hparams.n_embd_v_gqa();
1064910606

1065010607
ggml_tensor * cur;
1065110608
ggml_tensor * inpL;
@@ -19008,18 +18965,14 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
1900818965
{
1900918966
llm = std::make_unique<llm_build_gemma3_iswa>(*this, params);
1901018967
} break;
19011-
case LLM_ARCH_GEMMA_EMBEDDING:
19012-
{
19013-
// EmbeddingGemma uses custom embedding builder
19014-
llm = std::make_unique<llm_build_gemma_embedding>(*this, params);
19015-
} break;
1901618968
case LLM_ARCH_GEMMA3N:
1901718969
{
1901818970
llm = std::make_unique<llm_build_gemma3n_iswa>(*this, params);
1901918971
} break;
1902018972
case LLM_ARCH_GEMMA_EMBEDDING:
1902118973
{
19022-
llm = std::make_unique<llm_build_gemma_embedding_iswa>(*this, params);
18974+
// EmbeddingGemma uses custom embedding builder
18975+
llm = std::make_unique<llm_build_gemma_embedding>(*this, params);
1902318976
} break;
1902418977
case LLM_ARCH_STARCODER2:
1902518978
{
@@ -19420,7 +19373,6 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
1942019373
case LLM_ARCH_GEMMA:
1942119374
case LLM_ARCH_GEMMA2:
1942219375
case LLM_ARCH_GEMMA3:
19423-
case LLM_ARCH_GEMMA_EMBEDDING:
1942419376
case LLM_ARCH_GEMMA3N:
1942519377
case LLM_ARCH_GEMMA_EMBEDDING:
1942619378
case LLM_ARCH_STARCODER2:

0 commit comments

Comments
 (0)