File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -2699,7 +2699,7 @@ struct server_context {
26992699
27002700    //  return true if at least one slot has been purged
27012701    //  TODO: improve logic
2702-     //        - smarter decision which slot to purge
2702+     //        - smarter decision which slot to purge (LRU or longest prompt?) 
27032703    //        - move slot to level 2 cache instead of removing?
27042704    //        - instead of purging, try to store and resume later?
27052705    bool  try_purge_idle_slots () {
@@ -4159,7 +4159,7 @@ struct server_context {
41594159                    std::string err;
41604160
41614161                    if  (n_batch == 1  && ret == 1 ) {
4162-                         //  TODO: try to terminate only the largest active slot and continue
4162+                         //  TODO: try to terminate only the largest active slot/sequence  and continue with the rest 
41634163                        //        need to remove the tokens from the current batch too
41644164                        err = " Context size has been exceeded."  ;
41654165                    }
    
 
   
 
     
   
   
          
     
  
    
     
 
    
      
     
 
     
    You can’t perform that action at this time.
  
 
    
  
     
    
      
        
     
 
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments