Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion ggml/src/ggml-cuda/ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2806,6 +2806,8 @@ static bool check_node_graph_compatibility(ggml_cgraph * cgraph,
const std::string ffn_moe_down_bias_prefix = "ffn_moe_down_biased";
const std::string nemotron_h_block_out_prefix = "nemotron_h_block_out";
const std::string mamba2_y_add_d_prefix = "mamba2_y_add_d";
const std::string emGemma_sa_out_prefix = "emGemma_sa_out";
const std::string emGemma_l_out_prefix = "emGemma_l_out";

for (int i = 0; i < cgraph->n_nodes; i++) {
ggml_tensor * node = cgraph->nodes[i];
Expand Down Expand Up @@ -2836,7 +2838,9 @@ static bool check_node_graph_compatibility(ggml_cgraph * cgraph,
strncmp(node->name, ffn_moe_up_bias_prefix.c_str(), ffn_moe_up_bias_prefix.size()) != 0 &&
strncmp(node->name, ffn_moe_down_bias_prefix.c_str(), ffn_moe_down_bias_prefix.size()) != 0 &&
strncmp(node->name, nemotron_h_block_out_prefix.c_str(), nemotron_h_block_out_prefix.size()) != 0 &&
strncmp(node->name, mamba2_y_add_d_prefix.c_str(), mamba2_y_add_d_prefix.size()) != 0) {
strncmp(node->name, mamba2_y_add_d_prefix.c_str(), mamba2_y_add_d_prefix.size()) != 0 &&
strncmp(node->name, emGemma_sa_out_prefix.c_str(), emGemma_sa_out_prefix.size()) != 0 &&
strncmp(node->name, emGemma_l_out_prefix.c_str(), emGemma_l_out_prefix.size()) != 0) {
// disable CUDA graphs for batch size > 1 for now while excluding the matrix-matrix addition as part of Gemma3n's `project_per_layer_input` operation
// by means of matching node names. See
// https://github.com/ggml-org/llama.cpp/blob/f9a31eea06a859e34cecb88b4d020c7f03d86cc4/src/llama-model.cpp#L10199-L10241 and
Expand Down
4 changes: 2 additions & 2 deletions src/llama-model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11573,7 +11573,7 @@ struct llm_build_gemma_embedding : public llm_graph_context {
cb(cur, "attn_post_norm", il);

ggml_tensor * sa_out = ggml_add(ctx0, cur, inpL);
cb(sa_out, "sa_out", il);
cb(sa_out, "emGemma_sa_out", il);

cur = build_norm(sa_out,
model.layers[il].ffn_norm, NULL,
Expand All @@ -11599,7 +11599,7 @@ struct llm_build_gemma_embedding : public llm_graph_context {
cur = ggml_add(ctx0, cur, sa_out);

cur = build_cvec(cur, il);
cb(cur, "l_out", il);
cb(cur, "emGemma_l_out", il);

// input for next layer
inpL = cur;
Expand Down
Loading