@@ -1099,17 +1099,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
10991099 ? 1.0f / std::sqrt(float(hparams.n_embd / hparams.n_head(0)))
11001100 : 1.0f / std::sqrt(float(hparams.n_embd_head_k));
11011101 } break;
1102- case LLM_ARCH_GEMMA_EMBEDDING:
1103- {
1104- // EmbeddingGemma is an embedding model based on GEMMA architecture
1105- ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
1106- ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type);
1107- ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn, false);
1108-
1109- // Set embedding-specific defaults
1110- hparams.causal_attn = false; // Embeddings use bidirectional attention
1111- type = LLM_TYPE_300M; // EmbeddingGemma is 300M params
1112- } break;
11131102 case LLM_ARCH_GEMMA3:
11141103 {
11151104 hparams.swa_type = LLAMA_SWA_TYPE_STANDARD;
@@ -3515,36 +3504,6 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
35153504 layer.ffn_post_norm = create_tensor(tn(LLM_TENSOR_FFN_POST_NORM, "weight", i), {n_embd}, 0);
35163505 }
35173506 } break;
3518- case LLM_ARCH_GEMMA_EMBEDDING:
3519- {
3520- // EmbeddingGemma uses similar structure to GEMMA3
3521- tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
3522-
3523- // output norm for embeddings
3524- output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
3525-
3526- // layers (similar to GEMMA3 but for embeddings)
3527- for (int i = 0; i < n_layer; ++i) {
3528- auto & layer = layers[i];
3529-
3530- layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0);
3531-
3532- layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd_head_k * n_head}, 0);
3533- layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_k_gqa}, 0);
3534- layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_v_gqa}, 0);
3535- layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd_head_k * n_head, n_embd}, 0);
3536-
3537- layer.attn_post_norm = create_tensor(tn(LLM_TENSOR_ATTN_POST_NORM, "weight", i), {n_embd}, 0);
3538- layer.attn_k_norm = create_tensor(tn(LLM_TENSOR_ATTN_K_NORM, "weight", i), {n_embd_head_k}, 0);
3539- layer.attn_q_norm = create_tensor(tn(LLM_TENSOR_ATTN_Q_NORM, "weight", i), {n_embd_head_k}, 0);
3540-
3541- layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);
3542- layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0);
3543- layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0);
3544- layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0);
3545- layer.ffn_post_norm = create_tensor(tn(LLM_TENSOR_FFN_POST_NORM, "weight", i), {n_embd}, 0);
3546- }
3547- } break;
35483507 case LLM_ARCH_GEMMA3:
35493508 case LLM_ARCH_GEMMA_EMBEDDING:
35503509 {
@@ -10644,8 +10603,6 @@ struct llm_build_gemma3_iswa : public llm_graph_context {
1064410603struct llm_build_gemma_embedding : public llm_graph_context {
1064510604 llm_build_gemma_embedding(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
1064610605 const int64_t n_embd_head = hparams.n_embd_head_k;
10647- const int64_t n_embd_k_gqa = hparams.n_embd_k_gqa();
10648- const int64_t n_embd_v_gqa = hparams.n_embd_v_gqa();
1064910606
1065010607 ggml_tensor * cur;
1065110608 ggml_tensor * inpL;
@@ -19008,18 +18965,14 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
1900818965 {
1900918966 llm = std::make_unique<llm_build_gemma3_iswa>(*this, params);
1901018967 } break;
19011- case LLM_ARCH_GEMMA_EMBEDDING:
19012- {
19013- // EmbeddingGemma uses custom embedding builder
19014- llm = std::make_unique<llm_build_gemma_embedding>(*this, params);
19015- } break;
1901618968 case LLM_ARCH_GEMMA3N:
1901718969 {
1901818970 llm = std::make_unique<llm_build_gemma3n_iswa>(*this, params);
1901918971 } break;
1902018972 case LLM_ARCH_GEMMA_EMBEDDING:
1902118973 {
19022- llm = std::make_unique<llm_build_gemma_embedding_iswa>(*this, params);
18974+ // EmbeddingGemma uses custom embedding builder
18975+ llm = std::make_unique<llm_build_gemma_embedding>(*this, params);
1902318976 } break;
1902418977 case LLM_ARCH_STARCODER2:
1902518978 {
@@ -19420,7 +19373,6 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
1942019373 case LLM_ARCH_GEMMA:
1942119374 case LLM_ARCH_GEMMA2:
1942219375 case LLM_ARCH_GEMMA3:
19423- case LLM_ARCH_GEMMA_EMBEDDING:
1942419376 case LLM_ARCH_GEMMA3N:
1942519377 case LLM_ARCH_GEMMA_EMBEDDING:
1942619378 case LLM_ARCH_STARCODER2:
0 commit comments