Skip to content

Commit 076346d

Browse files
committed
fix condition
1 parent b9b2b63 commit 076346d

File tree

3 files changed

+24
-10
lines changed

3 files changed

+24
-10
lines changed

examples/server/server.cpp

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2558,12 +2558,22 @@ struct server_context {
25582558
// start populating the batch for this iteration
25592559
common_batch_clear(batch);
25602560

2561+
// track if given slot can be batched with slots already in the batch
2562+
server_slot * slot_batched = nullptr;
2563+
25612564
// frist, add sampled tokens from any ongoing sequences
25622565
for (auto & slot : slots) {
25632566
if (slot.state != SLOT_STATE_GENERATING) {
25642567
continue;
25652568
}
25662569

2570+
// check if we can batch this slot with the previous one
2571+
if (!slot_batched) {
2572+
slot_batched = &slot;
2573+
} else if (slot_batched && !slot_batched->can_batch_with(slot)) {
2574+
continue;
2575+
}
2576+
25672577
slot.i_batch = batch.n_tokens;
25682578

25692579
common_batch_add(batch, slot.sampled, slot.n_past, { slot.id }, true);
@@ -2582,17 +2592,16 @@ struct server_context {
25822592
int32_t n_batch = llama_n_batch(ctx);
25832593
int32_t n_ubatch = llama_n_ubatch(ctx);
25842594

2585-
// track if given slot can be batched with slots already in the batch
2586-
server_slot * slot_batched = nullptr;
2587-
25882595
// next, batch any pending prompts without exceeding n_batch
25892596
if (params_base.cont_batching || batch.n_tokens == 0) {
25902597
for (auto & slot : slots) {
25912598
// check if we can batch this slot with the previous one
2592-
if (!slot_batched) {
2593-
slot_batched = &slot;
2594-
} else if (slot_batched && !slot_batched->can_batch_with(slot)) {
2595-
continue;
2599+
if (slot.is_processing()) {
2600+
if (!slot_batched) {
2601+
slot_batched = &slot;
2602+
} else if (slot_batched && !slot_batched->can_batch_with(slot)) {
2603+
continue;
2604+
}
25962605
}
25972606

25982607
// this slot still has a prompt to be processed

examples/server/tests/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@ To run with stdout/stderr display in real time (verbose output, but useful for d
4444
DEBUG=1 ./tests.sh -s -v -x
4545
```
4646

47+
To run single test unit:
48+
49+
```shell
50+
./tests.sh unit/test_{name of test case here}.py -v -x
51+
```
52+
4753
Hint: You can compile and run test in single command, useful for local developement:
4854
4955
```shell

examples/server/tests/unit/test_lora.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,11 @@ def test_lora_per_request():
5252
lora_config = [
5353
( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ),
5454
( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ),
55-
( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ),
56-
( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ),
55+
( [{"id": 0, "scale": 0.3}], "(special|thing|gifted)+" ),
56+
( [{"id": 0, "scale": 0.7}], "(far|from|home|away)+" ),
5757
( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ),
5858
( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ),
5959
]
60-
# FIXME: tesing with scale between 0.0 and 1.0 (i.e. 0.2, 0.5, 0.7) produces unreliable results
6160

6261
tasks = [(
6362
server.make_request,

0 commit comments

Comments
 (0)