Skip to content

Commit faff4b1

Browse files
committed
DRY: Cleaned up server sampling fix in preparation for rebase with separate PR
1 parent 1cdcf36 commit faff4b1

File tree

1 file changed

+0
-14
lines changed

1 file changed

+0
-14
lines changed

examples/server/server.cpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2192,19 +2192,9 @@ struct server_context {
21922192
GGML_ASSERT(slot.n_prompt_tokens < slot.n_ctx);
21932193
}
21942194

2195-
// Should this be (re-)moved?
2196-
//common_sampler_reset(slot.smpl);
2197-
21982195
if (slot.params.cache_prompt) {
21992196
// reuse any previously computed tokens that are common with the new prompt
22002197
slot.n_past = longest_common_prefix(slot.cache_tokens, prompt_tokens);
2201-
// Not sure if the for loop below should happen in multiple places but for now I moved it
2202-
// until after the entire prompt is processed so that sampling would happen consistently.
2203-
2204-
// push the prompt into the sampling context (do not apply grammar)
2205-
// for (int i = 0; i < slot.n_past; ++i) {
2206-
// common_sampler_accept(slot.smpl, slot.cache_tokens[i], false);
2207-
// }
22082198

22092199
// reuse chunks from the cached prompt by shifting their KV cache in the new position
22102200
if (params.n_cache_reuse > 0) {
@@ -2238,8 +2228,6 @@ struct server_context {
22382228
for (size_t i = 0; i < n_match; i++) {
22392229
slot.cache_tokens[head_p + i] = slot.cache_tokens[head_c + i];
22402230

2241-
//common_sampler_accept(slot.smpl, slot.cache_tokens[head_p + i], false);
2242-
22432231
slot.n_past++;
22442232
}
22452233

@@ -2291,8 +2279,6 @@ struct server_context {
22912279

22922280
// there is no common part left
22932281
slot.n_past = 0;
2294-
2295-
//common_sampler_reset(slot.smpl);
22962282
}
22972283

22982284
SLT_INF(slot, "kv cache rm [%d, end)\n", slot.n_past);

0 commit comments

Comments
 (0)