Skip to content

Commit 60d4194

Browse files
committed
fix incorrect if branch
1 parent 3abc339 commit 60d4194

File tree

1 file changed

+50
-50
lines changed

1 file changed

+50
-50
lines changed

examples/server/server.cpp

Lines changed: 50 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2009,75 +2009,75 @@ struct server_context {
20092009
}
20102010

20112011
slot.n_prompt_tokens_processed = 0;
2012-
}
20132012

2014-
// non-causal tasks require to fit the entire prompt in the physical batch
2015-
if (slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING || slot.cmpl_type == SERVER_TASK_CMPL_TYPE_RERANK) {
2016-
// cannot fit the prompt in the current batch - will try next iter
2017-
if (batch.n_tokens + slot.n_prompt_tokens > n_batch) {
2018-
continue;
2013+
// non-causal tasks require to fit the entire prompt in the physical batch
2014+
if (slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING || slot.cmpl_type == SERVER_TASK_CMPL_TYPE_RERANK) {
2015+
// cannot fit the prompt in the current batch - will try next iter
2016+
if (batch.n_tokens + slot.n_prompt_tokens > n_batch) {
2017+
continue;
2018+
}
20192019
}
2020-
}
20212020

2022-
// check that we are in the right batch_type, if not defer the slot
2023-
const bool slot_type =
2024-
slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING ||
2025-
slot.cmpl_type == SERVER_TASK_CMPL_TYPE_RERANK ? 1 : 0;
2021+
// check that we are in the right batch_type, if not defer the slot
2022+
const bool slot_type =
2023+
slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING ||
2024+
slot.cmpl_type == SERVER_TASK_CMPL_TYPE_RERANK ? 1 : 0;
20262025

2027-
if (batch_type == -1) {
2028-
batch_type = slot_type;
2029-
} else if (batch_type != slot_type) {
2030-
continue;
2031-
}
2026+
if (batch_type == -1) {
2027+
batch_type = slot_type;
2028+
} else if (batch_type != slot_type) {
2029+
continue;
2030+
}
20322031

2033-
// keep only the common part
2034-
if (!llama_kv_cache_seq_rm(ctx, slot.id + 1, slot.n_past, -1)) {
2035-
// could not partially delete (likely using a non-Transformer model)
2036-
llama_kv_cache_seq_rm(ctx, slot.id + 1, -1, -1);
2032+
// keep only the common part
2033+
if (!llama_kv_cache_seq_rm(ctx, slot.id + 1, slot.n_past, -1)) {
2034+
// could not partially delete (likely using a non-Transformer model)
2035+
llama_kv_cache_seq_rm(ctx, slot.id + 1, -1, -1);
20372036

2038-
// there is no common part left
2039-
slot.n_past = 0;
2037+
// there is no common part left
2038+
slot.n_past = 0;
20402039

2041-
common_sampler_reset(slot.smpl);
2042-
}
2040+
common_sampler_reset(slot.smpl);
2041+
}
20432042

2044-
SLT_INF(slot, "kv cache rm [%d, end)\n", slot.n_past);
2043+
SLT_INF(slot, "kv cache rm [%d, end)\n", slot.n_past);
20452044

2046-
// remove the non-common part from the cache
2047-
slot.cache_tokens.resize(slot.n_past);
2045+
// remove the non-common part from the cache
2046+
slot.cache_tokens.resize(slot.n_past);
20482047

2049-
// add prompt tokens for processing in the current batch
2050-
while (slot.n_past < slot.n_prompt_tokens && batch.n_tokens < n_batch) {
2051-
common_batch_add(batch, slot.prompt_tokens[slot.n_past], slot.n_past, { slot.id + 1 }, false);
2048+
// add prompt tokens for processing in the current batch
2049+
while (slot.n_past < slot.n_prompt_tokens && batch.n_tokens < n_batch) {
2050+
common_batch_add(batch, slot.prompt_tokens[slot.n_past], slot.n_past, { slot.id + 1 }, false);
20522051

2053-
if (slot.params.cache_prompt) {
2054-
slot.cache_tokens.push_back(slot.prompt_tokens[slot.n_past]);
2055-
}
2052+
if (slot.params.cache_prompt) {
2053+
slot.cache_tokens.push_back(slot.prompt_tokens[slot.n_past]);
2054+
}
20562055

2057-
slot.n_prompt_tokens_processed++;
2058-
slot.n_past++;
2059-
}
2056+
slot.n_prompt_tokens_processed++;
2057+
slot.n_past++;
2058+
}
20602059

2061-
SLT_INF(slot, "prompt processing progress, n_past = %d, n_tokens = %d, progress = %f\n", slot.n_past, batch.n_tokens, (float) slot.n_prompt_tokens_processed / slot.n_prompt_tokens);
2060+
SLT_INF(slot, "prompt processing progress, n_past = %d, n_tokens = %d, progress = %f\n", slot.n_past, batch.n_tokens, (float) slot.n_prompt_tokens_processed / slot.n_prompt_tokens);
20622061

2063-
// entire prompt has been processed
2064-
if (slot.n_past == slot.n_prompt_tokens) {
2065-
slot.state = SLOT_STATE_DONE_PROMPT;
2062+
// entire prompt has been processed
2063+
if (slot.n_past == slot.n_prompt_tokens) {
2064+
slot.state = SLOT_STATE_DONE_PROMPT;
20662065

2067-
GGML_ASSERT(batch.n_tokens > 0);
2066+
GGML_ASSERT(batch.n_tokens > 0);
20682067

2069-
// Process all prompt tokens through sampler system
2070-
for (int i = 0; i < slot.n_prompt_tokens; ++i) {
2071-
common_sampler_accept(slot.smpl, slot.prompt_tokens[i], false);
2072-
}
2068+
// Process all prompt tokens through sampler system
2069+
for (int i = 0; i < slot.n_prompt_tokens; ++i) {
2070+
common_sampler_accept(slot.smpl, slot.prompt_tokens[i], false);
2071+
}
20732072

2074-
// extract the logits only for the last token
2075-
batch.logits[batch.n_tokens - 1] = true;
2073+
// extract the logits only for the last token
2074+
batch.logits[batch.n_tokens - 1] = true;
20762075

2077-
slot.n_decoded = 0;
2078-
slot.i_batch = batch.n_tokens - 1;
2076+
slot.n_decoded = 0;
2077+
slot.i_batch = batch.n_tokens - 1;
20792078

2080-
SLT_INF(slot, "prompt done, n_past = %d, n_tokens = %d\n", slot.n_past, batch.n_tokens);
2079+
SLT_INF(slot, "prompt done, n_past = %d, n_tokens = %d\n", slot.n_past, batch.n_tokens);
2080+
}
20812081
}
20822082

20832083
if (batch.n_tokens >= n_batch) {

0 commit comments

Comments
 (0)