You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
if (slot.is_processing() && slot.n_past >= slot.n_ctx - 1) {
1801
+
if (slot.is_processing() && slot.n_past+ 1>= slot.n_ctx) {
1801
1802
if (!params.ctx_shift) {
1802
1803
// this check is redundant (for good)
1803
1804
// we should never get here, because generation should already stopped in process_token()
@@ -1960,6 +1961,8 @@ struct server_context {
1960
1961
} else {
1961
1962
if (!params.ctx_shift) {
1962
1963
// if context shift is disabled, we make sure prompt size is smaller than KV size
1964
+
// TODO: there should be a separate parameter that control prompt truncation
1965
+
// context shift should be applied only during the generation phase
1963
1966
if (slot.n_prompt_tokens >= slot.n_ctx) {
1964
1967
slot.release();
1965
1968
send_error(slot, "the request exceeds the available context size. try increasing the context size or enable context shift", ERROR_TYPE_INVALID_REQUEST);
0 commit comments