@@ -247,6 +247,7 @@ struct server_slot {
247247 if (is_processing ()) {
248248 SLT_INF (*this , " stop processing: n_past = %d, truncated = %d\n " , n_past, truncated);
249249
250+ t_last_used = ggml_time_us ();
250251 t_token_generation = (ggml_time_us () - t_start_generation) / 1e3 ;
251252 state = SLOT_STATE_IDLE;
252253 callback_on_release (id);
@@ -730,7 +731,7 @@ struct server_context {
730731
731732 // find the slot that has at least n% prompt similarity
732733 if (ret == nullptr && slot_prompt_similarity != 0 .0f ) {
733- int max_lcs_len = 0 ;
734+ int lcs_len = 0 ;
734735 float similarity = 0 ;
735736
736737 for (server_slot & slot : slots) {
@@ -745,20 +746,21 @@ struct server_context {
745746 }
746747
747748 // length of the Longest Common Subsequence between the current slot's prompt and the input prompt
748- int lcs_len = longest_common_subsequence (slot.cache_tokens , task.prompt_tokens );
749+ int cur_lcs_len = longest_common_subsequence (slot.cache_tokens , task.prompt_tokens );
749750
750751 // fraction of the common subsequence length compared to the current slot's prompt length
751- similarity = static_cast <float >(lcs_len ) / static_cast <int >(slot.cache_tokens .size ());
752+ float cur_similarity = static_cast <float >(cur_lcs_len ) / static_cast <int >(slot.cache_tokens .size ());
752753
753754 // select the current slot if the criteria match
754- if (lcs_len > max_lcs_len && similarity > slot_prompt_similarity) {
755- max_lcs_len = lcs_len;
755+ if (cur_lcs_len > lcs_len && cur_similarity > slot_prompt_similarity) {
756+ lcs_len = cur_lcs_len;
757+ similarity = cur_similarity;
756758 ret = &slot;
757759 }
758760 }
759761
760762 if (ret != nullptr ) {
761- SLT_DBG (*ret, " selected slot by lcs similarity, max_lcs_len = %d, similarity = %f\n " , max_lcs_len , similarity);
763+ SLT_DBG (*ret, " selected slot by lcs similarity, lcs_len = %d, similarity = %f\n " , lcs_len , similarity);
762764 }
763765 }
764766
0 commit comments