Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions src/llama-batch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -669,10 +669,8 @@ llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, u

auto udata = std::make_shared<llama_ubatch::data_t>();

const int32_t n_pos_cur = batch.embd ? n_pos_per_embd : 1;

const int64_t n_embd_all = batch.embd ? (int64_t) n_tokens*n_embd : 0;
const int64_t n_pos_all = (int64_t) n_tokens*n_pos_cur;
const int64_t n_pos_all = (int64_t) n_tokens*n_pos_per_embd;

udata->token .resize(n_tokens);
udata->embd .resize(n_embd_all);
Expand All @@ -694,8 +692,13 @@ llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, u
memcpy(udata->embd.data() + i*n_embd, batch.embd + (int64_t) idxs[i]*n_embd, n_embd*sizeof(float));
}

for (int j = 0; j < n_pos_cur; ++j) {
udata->pos[j*n_tokens + i] = batch.pos[j*batch.n_tokens + idxs[i]];
for (size_t j = 0; j < (size_t)n_pos_per_embd; ++j) {
// if we are using M-RoPE
// if the current batch is text, we need to broadcast the same position across all RoPE sections
Copy link
Collaborator Author

@ngxson ngxson Oct 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please note that even though only 3 dims are used in QwenVL (while n_pos_per_embd = 4), the last dim is never actually processed by ggml, as its hparams.mrope_section will be 0. Therefore, it's safe to broadcast the all dims here.

// otherwise, the input batch is image embeddings, we copy the positions as-is
// if we are not using M-RoPE, there is only one position per token (this loop runs only once)
size_t src_off = batch.token ? 0 : j*batch.n_tokens;
udata->pos[j*n_tokens + i] = batch.pos[src_off + idxs[i]];
}

udata->n_seq_id[i] = batch.n_seq_id[idxs[i]];
Expand Down
Loading