From 568886416dcb9bd5e904ef07e052c017fc6008ce Mon Sep 17 00:00:00 2001 From: VJHack Date: Wed, 18 Sep 2024 19:34:05 -0500 Subject: [PATCH 1/5] allow disable context shift for sever --- common/arg.cpp | 2 +- examples/server/README.md | 1 + examples/server/server.cpp | 13 +++++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/common/arg.cpp b/common/arg.cpp index 922391069d32a..c1ec3c4f99c37 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -691,7 +691,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex, [](gpt_params & params) { params.ctx_shift = false; } - ).set_examples({LLAMA_EXAMPLE_MAIN})); + ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER})); add_opt(llama_arg( {"--chunks"}, "N", format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks), diff --git a/examples/server/README.md b/examples/server/README.md index 326e05e1e3ea1..c54f5b20344ca 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -37,6 +37,7 @@ The project is under active development, and we are [looking for feedback and co | `--poll-batch <0\|1>` | use polling to wait for work (default: same as --poll) | | `-c, --ctx-size N` | size of the prompt context (default: 0, 0 = loaded from model)
(env: LLAMA_ARG_CTX_SIZE) | | `-n, --predict, --n-predict N` | number of tokens to predict (default: -1, -1 = infinity, -2 = until context filled)
(env: LLAMA_ARG_N_PREDICT) | +| `--no-context-shift` | stop generation when context window is full (default: context shift is enabled)
| | `-b, --batch-size N` | logical maximum batch size (default: 2048)
(env: LLAMA_ARG_BATCH) | | `-ub, --ubatch-size N` | physical maximum batch size (default: 512)
(env: LLAMA_ARG_UBATCH) | | `--keep N` | number of tokens to keep from the initial prompt (default: 0, -1 = all) | diff --git a/examples/server/server.cpp b/examples/server/server.cpp index dce69f832e8bd..cf395830bcb73 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1815,6 +1815,13 @@ struct server_context { for (server_slot & slot : slots) { if (slot.ga_n == 1) { if (slot.is_processing() && (int) system_tokens.size() + slot.n_past >= slot.n_ctx - 1) { + if (!params.ctx_shift){ + slot.release(); + slot.print_timings(); + send_final_response(slot); + metrics.on_prediction(slot); + continue; + } // Shift context const int n_keep = slot.params.n_keep + add_bos_token; const int n_left = (int) system_tokens.size() + slot.n_past - n_keep; @@ -1940,6 +1947,12 @@ struct server_context { send_final_response(slot); continue; } + // context shift is disabled and prompt is too large - discard it + if (!params.ctx_shift && slot.n_prompt_tokens > slot.n_ctx ){ + slot.release(); + send_error(slot, "Input is too large to process. Either disable context shift or increase context length. ", ERROR_TYPE_SERVER); + continue; + } if (slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING) { // this prompt is too large to process - discard it From 2f2e4b35a65f54dd3f264ff5b456d76d7e39403a Mon Sep 17 00:00:00 2001 From: VJHack Date: Fri, 20 Sep 2024 08:48:59 -0500 Subject: [PATCH 2/5] Fixed error message to say 'enable context shift' --- examples/server/server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index cf395830bcb73..2b10469b196c1 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1950,7 +1950,7 @@ struct server_context { // context shift is disabled and prompt is too large - discard it if (!params.ctx_shift && slot.n_prompt_tokens > slot.n_ctx ){ slot.release(); - send_error(slot, "Input is too large to process. Either disable context shift or increase context length. ", ERROR_TYPE_SERVER); + send_error(slot, "input is too large to process. enable context shift or increase the context length", ERROR_TYPE_SERVER); continue; } From 0cabcbe588ae0055dfc0fc4880797cf05618df49 Mon Sep 17 00:00:00 2001 From: VJHack Date: Fri, 20 Sep 2024 14:54:08 -0500 Subject: [PATCH 3/5] fixed server 200 null response when context is exceeded --- examples/server/server.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 2b10469b196c1..79acbaccaf60f 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1464,11 +1464,10 @@ struct server_context { std::vector results(id_tasks.size()); for (size_t i = 0; i < id_tasks.size(); i++) { server_task_result result = queue_results.recv(id_tasks); - if (result.error) { error_handler(result.data); cancel_tasks(id_tasks); - break; + return; } size_t idx = result.data["index"]; @@ -1948,9 +1947,9 @@ struct server_context { continue; } // context shift is disabled and prompt is too large - discard it - if (!params.ctx_shift && slot.n_prompt_tokens > slot.n_ctx ){ + if (!params.ctx_shift && (slot.n_prompt_tokens > slot.n_ctx) ){ slot.release(); - send_error(slot, "input is too large to process. enable context shift or increase the context length", ERROR_TYPE_SERVER); + send_error(slot, "Input is too large to process. Enable context shift or increase the context length", ERROR_TYPE_SERVER); continue; } From 9880e3a0696430b0673c45ff3a57e63bd880f4b4 Mon Sep 17 00:00:00 2001 From: VJHack Date: Fri, 20 Sep 2024 14:56:03 -0500 Subject: [PATCH 4/5] changed error message wording --- examples/server/server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 79acbaccaf60f..36db183b3aa21 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1949,7 +1949,7 @@ struct server_context { // context shift is disabled and prompt is too large - discard it if (!params.ctx_shift && (slot.n_prompt_tokens > slot.n_ctx) ){ slot.release(); - send_error(slot, "Input is too large to process. Enable context shift or increase the context length", ERROR_TYPE_SERVER); + send_error(slot, "Input is too large to process. Either enable context shift or increase the context length.", ERROR_TYPE_SERVER); continue; } From 4af076b4946abf70bb5b0869c2528c93e1214170 Mon Sep 17 00:00:00 2001 From: VJHack Date: Sat, 21 Sep 2024 00:35:11 -0500 Subject: [PATCH 5/5] updated context shift error to ERROR_TYPE_INVALID_REQUEST --- examples/server/server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 36db183b3aa21..531c0dddf487d 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1949,7 +1949,7 @@ struct server_context { // context shift is disabled and prompt is too large - discard it if (!params.ctx_shift && (slot.n_prompt_tokens > slot.n_ctx) ){ slot.release(); - send_error(slot, "Input is too large to process. Either enable context shift or increase the context length.", ERROR_TYPE_SERVER); + send_error(slot, "Input is too large to process. Either enable context shift or increase the context length.", ERROR_TYPE_INVALID_REQUEST); continue; }