ubatch issues, the assert for checking equal seqs in llama-graph.cpp when building attention keeps failing, setting ubatch size to 1 when running llama-embedding with --ubatch-size 1 makes it work, but needs to be looked into more

ryan-mangeno · ryan-mangeno · commit 2a1c75047c18 · 2025-08-28T12:59:42.000-04:00
diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp
@@ -644,8 +644,6 @@ ggml_tensor * llm_graph_context::build_ffn(
      llm_ffn_op_type   type_op,
    llm_ffn_gate_type   type_gate,
                  int   il) const {
-
-
     ggml_tensor * tmp = up ? build_lora_mm(up, cur) : cur;
     cb(tmp, "ffn_up", il);
 
@@ -1377,9 +1375,9 @@ ggml_tensor * llm_graph_context::build_attn(
 
     // [TAG_NO_CACHE_PAD]
     // TODO: if ubatch.equal_seqs() == true, we can split the three tensors below into ubatch.n_seqs_unq streams
-    LLAMA_LOG_INFO("ubatch.equal_seqs() = %d, n_seqs = %d\n", ubatch.equal_seqs(), ubatch.n_seqs);
-
-    //  sassert(!ubatch.equal_seqs());
+    if (ubatch.n_seqs > 1) {
+        assert(!ubatch.equal_seqs());
+    }
 
     ggml_tensor * q = q_cur;
     ggml_tensor * k = k_cur;
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -7589,7 +7589,6 @@ struct llm_build_modern_bert : public llm_graph_context {
                     LLM_NORM, il);
                 cb(x_attn_in, "attn_pre_norm", il);
             } else {
-                LLAMA_LOG_INFO("Identity Tensor\n");
                 cb(x_attn_in, "attn_pre_norm_identity", il);
             }
 

Original file line number	Diff line number	Diff line change
`@@ -7589,7 +7589,6 @@ struct llm_build_modern_bert : public llm_graph_context {`
`7589`	`7589`	`LLM_NORM, il);`
`7590`	`7590`	`cb(x_attn_in, "attn_pre_norm", il);`
`7591`	`7591`	`} else {`
`7592`		`- LLAMA_LOG_INFO("Identity Tensor\n");`
`7593`	`7592`	`cb(x_attn_in, "attn_pre_norm_identity", il);`
`7594`	`7593`	`}`
`7595`	`7594`