Skip to content

Commit ee3a9fc

Browse files
authored
context : fix index overflow on huge outputs (#15080)
* context : fix overflow when re-ordering huge outputs * context : fix logits size overflow for huge batches
1 parent ec428b0 commit ee3a9fc

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

src/llama-context.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -786,7 +786,7 @@ int llama_context::encode(const llama_batch & batch_inp) {
786786
const auto & hparams = model.hparams;
787787

788788
const int64_t n_embd = hparams.n_embd;
789-
const int32_t n_vocab = model.vocab.n_tokens();
789+
const int64_t n_vocab = model.vocab.n_tokens();
790790

791791
// note: during encode, we always pass the full sequence starting from pos = 0
792792
if (!balloc->init(batch_inp, model.vocab, nullptr, n_embd, cparams.kv_unified ? LLAMA_MAX_SEQ : cparams.n_seq_max, true)) {
@@ -959,7 +959,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
959959
const auto & vocab = model.vocab;
960960
const auto & hparams = model.hparams;
961961

962-
const int32_t n_vocab = vocab.n_tokens();
962+
const int64_t n_vocab = vocab.n_tokens();
963963
const int64_t n_embd = hparams.n_embd;
964964

965965
// when computing embeddings, all tokens are output
@@ -1328,21 +1328,21 @@ uint32_t llama_context::output_reserve(int32_t n_outputs) {
13281328
}
13291329

13301330
void llama_context::output_reorder() {
1331-
const uint32_t n_vocab = model.vocab.n_tokens();
1331+
const uint64_t n_vocab = model.vocab.n_tokens();
13321332
const uint64_t n_embd = model.hparams.n_embd;
13331333

1334-
for (uint32_t s = 0; s < output_swaps.size(); ++s) {
1335-
const uint32_t i0 = output_swaps[s].i0;
1336-
const uint32_t i1 = output_swaps[s].i1;
1334+
for (size_t s = 0; s < output_swaps.size(); ++s) {
1335+
const uint64_t i0 = output_swaps[s].i0;
1336+
const uint64_t i1 = output_swaps[s].i1;
13371337

13381338
if (logits_size > 0) {
1339-
for (uint32_t k = 0; k < n_vocab; k++) {
1339+
for (uint64_t k = 0; k < n_vocab; k++) {
13401340
std::swap(logits[i0*n_vocab + k], logits[i1*n_vocab + k]);
13411341
}
13421342
}
13431343

13441344
if (embd_size > 0) {
1345-
for (uint32_t k = 0; k < n_embd; k++) {
1345+
for (uint64_t k = 0; k < n_embd; k++) {
13461346
std::swap(embd[i0*n_embd + k], embd[i1*n_embd + k]);
13471347
}
13481348
}

0 commit comments

Comments
 (0)