@@ -1312,7 +1312,7 @@ struct server_slot {
13121312        return  task_type == SERVER_TASK_TYPE_EMBEDDING || task_type == SERVER_TASK_TYPE_RERANK;
13131313    }
13141314
1315-     bool  can_batch_with (server_slot & other_slot) {
1315+     bool  can_batch_with (server_slot & other_slot) const   {
13161316        return  is_non_causal () == other_slot.is_non_causal ()
13171317            && are_lora_equal (lora, other_slot.lora );
13181318    }
@@ -2157,14 +2157,6 @@ struct server_context {
21572157        }
21582158
21592159        if  (slot.has_new_line ) {
2160-             //  if we have already seen a new line, we stop after a certain time limit
2161-             if  (slot.params .t_max_predict_ms  > 0  && (ggml_time_us () - slot.t_start_generation  > 1000 .0f *slot.params .t_max_predict_ms )) {
2162-                 slot.stop            = STOP_TYPE_LIMIT;
2163-                 slot.has_next_token  = false ;
2164- 
2165-                 SLT_DBG (slot, " stopped by time limit, n_decoded = %d, t_max_predict_ms = %d ms\n "  , slot.n_decoded , (int ) slot.params .t_max_predict_ms );
2166-             }
2167- 
21682160            //  require that each new line has a whitespace prefix (i.e. indentation) of at least slot.params.n_indent
21692161            if  (slot.params .n_indent  > 0 ) {
21702162                //  check the current indentation
@@ -2203,6 +2195,14 @@ struct server_context {
22032195        //  check if there is a new line in the generated text
22042196        if  (result.text_to_send .find (' \n '  ) != std::string::npos) {
22052197            slot.has_new_line  = true ;
2198+ 
2199+             //  if we have seen a new line, we stop after a certain time limit, but only upon another new line
2200+             if  (slot.params .t_max_predict_ms  > 0  && (ggml_time_us () - slot.t_start_generation  > 1000 .0f *slot.params .t_max_predict_ms )) {
2201+                 slot.stop            = STOP_TYPE_LIMIT;
2202+                 slot.has_next_token  = false ;
2203+ 
2204+                 SLT_DBG (slot, " stopped by time limit, n_decoded = %d, t_max_predict_ms = %d ms\n "  , slot.n_decoded , (int ) slot.params .t_max_predict_ms );
2205+             }
22062206        }
22072207
22082208        //  if context shift is disabled, we stop when it reaches the context limit
0 commit comments