@@ -2192,19 +2192,9 @@ struct server_context {
21922192                                GGML_ASSERT (slot.n_prompt_tokens  < slot.n_ctx );
21932193                            }
21942194
2195-                             //  Should this be (re-)moved?
2196-                             // common_sampler_reset(slot.smpl);
2197- 
21982195                            if  (slot.params .cache_prompt ) {
21992196                                //  reuse any previously computed tokens that are common with the new prompt
22002197                                slot.n_past  = longest_common_prefix (slot.cache_tokens , prompt_tokens);
2201-                                 //  Not sure if the for loop below should happen in multiple places but for now I moved it
2202-                                 //  until after the entire prompt is processed so that sampling would happen consistently.
2203- 
2204-                                 //  push the prompt into the sampling context (do not apply grammar)
2205-                                 //  for (int i = 0; i < slot.n_past; ++i) {
2206-                                 //      common_sampler_accept(slot.smpl, slot.cache_tokens[i], false);
2207-                                 //  }
22082198
22092199                                //  reuse chunks from the cached prompt by shifting their KV cache in the new position
22102200                                if  (params.n_cache_reuse  > 0 ) {
@@ -2238,8 +2228,6 @@ struct server_context {
22382228                                            for  (size_t  i = 0 ; i < n_match; i++) {
22392229                                                slot.cache_tokens [head_p + i] = slot.cache_tokens [head_c + i];
22402230
2241-                                                 // common_sampler_accept(slot.smpl, slot.cache_tokens[head_p + i], false);
2242- 
22432231                                                slot.n_past ++;
22442232                                            }
22452233
@@ -2291,8 +2279,6 @@ struct server_context {
22912279
22922280                        //  there is no common part left
22932281                        slot.n_past  = 0 ;
2294- 
2295-                         // common_sampler_reset(slot.smpl);
22962282                    }
22972283
22982284                    SLT_INF (slot, " kv cache rm [%d, end)\n "  , slot.n_past );
0 commit comments