Skip to content

Commit 2a1c750

Browse files
committed
ubatch issues, the assert for checking equal seqs in llama-graph.cpp when building attention keeps failing, setting ubatch size to 1 when running llama-embedding with --ubatch-size 1 makes it work, but needs to be looked into more
1 parent 853f344 commit 2a1c750

File tree

2 files changed

+3
-6
lines changed

2 files changed

+3
-6
lines changed

src/llama-graph.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -644,8 +644,6 @@ ggml_tensor * llm_graph_context::build_ffn(
644644
llm_ffn_op_type type_op,
645645
llm_ffn_gate_type type_gate,
646646
int il) const {
647-
648-
649647
ggml_tensor * tmp = up ? build_lora_mm(up, cur) : cur;
650648
cb(tmp, "ffn_up", il);
651649

@@ -1377,9 +1375,9 @@ ggml_tensor * llm_graph_context::build_attn(
13771375

13781376
// [TAG_NO_CACHE_PAD]
13791377
// TODO: if ubatch.equal_seqs() == true, we can split the three tensors below into ubatch.n_seqs_unq streams
1380-
LLAMA_LOG_INFO("ubatch.equal_seqs() = %d, n_seqs = %d\n", ubatch.equal_seqs(), ubatch.n_seqs);
1381-
1382-
// sassert(!ubatch.equal_seqs());
1378+
if (ubatch.n_seqs > 1) {
1379+
assert(!ubatch.equal_seqs());
1380+
}
13831381

13841382
ggml_tensor * q = q_cur;
13851383
ggml_tensor * k = k_cur;

src/llama-model.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7589,7 +7589,6 @@ struct llm_build_modern_bert : public llm_graph_context {
75897589
LLM_NORM, il);
75907590
cb(x_attn_in, "attn_pre_norm", il);
75917591
} else {
7592-
LLAMA_LOG_INFO("Identity Tensor\n");
75937592
cb(x_attn_in, "attn_pre_norm_identity", il);
75947593
}
75957594

0 commit comments

Comments
 (0)