Skip to content

Commit b9b2b63

Browse files
committed
move can_batch_with check
1 parent 9947b07 commit b9b2b63

File tree

2 files changed

+8
-9
lines changed

2 files changed

+8
-9
lines changed

examples/server/server.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2588,6 +2588,13 @@ struct server_context {
25882588
// next, batch any pending prompts without exceeding n_batch
25892589
if (params_base.cont_batching || batch.n_tokens == 0) {
25902590
for (auto & slot : slots) {
2591+
// check if we can batch this slot with the previous one
2592+
if (!slot_batched) {
2593+
slot_batched = &slot;
2594+
} else if (slot_batched && !slot_batched->can_batch_with(slot)) {
2595+
continue;
2596+
}
2597+
25912598
// this slot still has a prompt to be processed
25922599
if (slot.state == SLOT_STATE_PROCESSING_PROMPT || slot.state == SLOT_STATE_STARTED) {
25932600
auto & prompt_tokens = slot.prompt_tokens;
@@ -2748,13 +2755,6 @@ struct server_context {
27482755
}
27492756
}
27502757

2751-
// check if we can batch this slot with the previous one
2752-
if (!slot_batched) {
2753-
slot_batched = &slot;
2754-
} else if (slot_batched && !slot_batched->can_batch_with(slot)) {
2755-
continue;
2756-
}
2757-
27582758
// keep only the common part
27592759
if (!llama_kv_cache_seq_rm(ctx, slot.id, slot.n_past, -1)) {
27602760
// could not partially delete (likely using a non-Transformer model)

examples/server/tests/unit/test_lora.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,9 @@ def test_lora_per_request():
6868
"temperature": 0.0,
6969
"cache_prompt": False, # TODO: remove this once test_cache_vs_nocache_prompt is fixed
7070
})
71-
) for lora, re_test in lora_config]
71+
) for lora, _ in lora_config]
7272
results = parallel_function_calls(tasks)
7373

74-
print(results)
7574
assert all([res.status_code == 200 for res in results])
7675
for res, (_, re_test) in zip(results, lora_config):
7776
assert match_regex(re_test, res.body["content"])

0 commit comments

Comments
 (0)