File tree Expand file tree Collapse file tree 2 files changed +8
-9
lines changed Expand file tree Collapse file tree 2 files changed +8
-9
lines changed Original file line number Diff line number Diff line change @@ -2588,6 +2588,13 @@ struct server_context {
25882588 // next, batch any pending prompts without exceeding n_batch
25892589 if (params_base.cont_batching || batch.n_tokens == 0 ) {
25902590 for (auto & slot : slots) {
2591+ // check if we can batch this slot with the previous one
2592+ if (!slot_batched) {
2593+ slot_batched = &slot;
2594+ } else if (slot_batched && !slot_batched->can_batch_with (slot)) {
2595+ continue ;
2596+ }
2597+
25912598 // this slot still has a prompt to be processed
25922599 if (slot.state == SLOT_STATE_PROCESSING_PROMPT || slot.state == SLOT_STATE_STARTED) {
25932600 auto & prompt_tokens = slot.prompt_tokens ;
@@ -2748,13 +2755,6 @@ struct server_context {
27482755 }
27492756 }
27502757
2751- // check if we can batch this slot with the previous one
2752- if (!slot_batched) {
2753- slot_batched = &slot;
2754- } else if (slot_batched && !slot_batched->can_batch_with (slot)) {
2755- continue ;
2756- }
2757-
27582758 // keep only the common part
27592759 if (!llama_kv_cache_seq_rm (ctx, slot.id , slot.n_past , -1 )) {
27602760 // could not partially delete (likely using a non-Transformer model)
Original file line number Diff line number Diff line change @@ -68,10 +68,9 @@ def test_lora_per_request():
6868 "temperature" : 0.0 ,
6969 "cache_prompt" : False , # TODO: remove this once test_cache_vs_nocache_prompt is fixed
7070 })
71- ) for lora , re_test in lora_config ]
71+ ) for lora , _ in lora_config ]
7272 results = parallel_function_calls (tasks )
7373
74- print (results )
7574 assert all ([res .status_code == 200 for res in results ])
7675 for res , (_ , re_test ) in zip (results , lora_config ):
7776 assert match_regex (re_test , res .body ["content" ])
You can’t perform that action at this time.
0 commit comments