Skip to content

Commit d396b43

Browse files
authored
server : fix "can batch with" bug (ggml-org#17263)
1 parent 45c6ef7 commit d396b43

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

tools/server/server.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3591,13 +3591,13 @@ struct server_context {
35913591
// next, batch any pending prompts without exceeding n_batch
35923592
if (params_base.cont_batching || batch.n_tokens == 0) {
35933593
for (auto & slot : slots) {
3594+
if (!slot.is_processing()) {
3595+
continue;
3596+
}
3597+
35943598
// check if we can batch this slot with the previous one
3595-
if (slot.is_processing()) {
3596-
if (!slot_batched) {
3597-
slot_batched = &slot;
3598-
} else if (!slot_batched->can_batch_with(slot)) {
3599-
continue;
3600-
}
3599+
if (slot_batched && !slot_batched->can_batch_with(slot)) {
3600+
continue;
36013601
}
36023602

36033603
// this slot still has a prompt to be processed
@@ -4028,6 +4028,10 @@ struct server_context {
40284028
}
40294029
}
40304030

4031+
if (!slot_batched) {
4032+
slot_batched = &slot;
4033+
}
4034+
40314035
if (batch.n_tokens >= n_batch) {
40324036
break;
40334037
}

0 commit comments

Comments
 (0)