fix condition

ngxson · ngxson · commit 076346db8a85 · 2024-12-28T16:16:57.000+01:00
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -2558,12 +2558,22 @@ struct server_context {
         // start populating the batch for this iteration
         common_batch_clear(batch);
 
+        // track if given slot can be batched with slots already in the batch
+        server_slot * slot_batched = nullptr;
+
         // frist, add sampled tokens from any ongoing sequences
         for (auto & slot : slots) {
             if (slot.state != SLOT_STATE_GENERATING) {
                 continue;
             }
 
+            // check if we can batch this slot with the previous one
+            if (!slot_batched) {
+                slot_batched = &slot;
+            } else if (slot_batched && !slot_batched->can_batch_with(slot)) {
+                continue;
+            }
+
             slot.i_batch = batch.n_tokens;
 
             common_batch_add(batch, slot.sampled, slot.n_past, { slot.id }, true);
@@ -2582,17 +2592,16 @@ struct server_context {
         int32_t n_batch  = llama_n_batch(ctx);
         int32_t n_ubatch = llama_n_ubatch(ctx);
 
-        // track if given slot can be batched with slots already in the batch
-        server_slot * slot_batched = nullptr;
-
         // next, batch any pending prompts without exceeding n_batch
         if (params_base.cont_batching || batch.n_tokens == 0) {
             for (auto & slot : slots) {
                 // check if we can batch this slot with the previous one
-                if (!slot_batched) {
-                    slot_batched = &slot;
-                } else if (slot_batched && !slot_batched->can_batch_with(slot)) {
-                    continue;
+                if (slot.is_processing()) {
+                    if (!slot_batched) {
+                        slot_batched = &slot;
+                    } else if (slot_batched && !slot_batched->can_batch_with(slot)) {
+                        continue;
+                    }
                 }
 
                 // this slot still has a prompt to be processed
diff --git a/examples/server/tests/README.md b/examples/server/tests/README.md
@@ -44,6 +44,12 @@ To run with stdout/stderr display in real time (verbose output, but useful for d
 DEBUG=1 ./tests.sh -s -v -x
 ```
 
+To run single test unit:
+
+```shell
+./tests.sh unit/test_{name of test case here}.py -v -x
+```
+
 Hint: You can compile and run test in single command, useful for local developement:
 
 ```shell
diff --git a/examples/server/tests/unit/test_lora.py b/examples/server/tests/unit/test_lora.py
@@ -52,12 +52,11 @@ def test_lora_per_request():
     lora_config = [
         ( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ),
         ( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ),
-        ( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ),
-        ( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ),
+        ( [{"id": 0, "scale": 0.3}], "(special|thing|gifted)+" ),
+        ( [{"id": 0, "scale": 0.7}], "(far|from|home|away)+" ),
         ( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ),
         ( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ),
     ]
-    # FIXME: tesing with scale between 0.0 and 1.0 (i.e. 0.2, 0.5, 0.7) produces unreliable results
 
     tasks = [(
         server.make_request,