File tree Expand file tree Collapse file tree 3 files changed +37
-3
lines changed Expand file tree Collapse file tree 3 files changed +37
-3
lines changed Original file line number Diff line number Diff line change @@ -244,7 +244,7 @@ struct server_slot {
244244        if  (params.n_predict  != -1 ) {
245245            n_remaining = params.n_predict  - n_decoded;
246246        } else  if  (global_params.n_predict  == -2 ) {
247-             n_remaining = n_ctx - n_past;
247+             n_remaining = n_ctx - n_past -  1 ;
248248        } else  if  (global_params.n_predict  != -1 ) {
249249            n_remaining = global_params.n_predict  - n_decoded;
250250        }
Original file line number Diff line number Diff line change 1+ @llama.cpp 
2+ @n_predict 
3+ Feature : llama.cpp server 
4+ 
5+   Background : Server startup 
6+     Given  a server listening on localhost:8080
7+     And    a model file test-model.gguf
8+     And    a model alias tinyllama-2
9+     And    42 as server seed
10+     And    64 KV cache size
11+ 
12+   Scenario : Generate N tokens 
13+     And    12 max tokens to predict
14+     Then   the server is starting
15+     Then   the server is healthy
16+     Given  a prompt:
17+     """ 
18+     Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. 
19+     """ 
20+     And    a completion request with no api error
21+     Then   12 tokens are predicted
22+ 
23+   Scenario : Generate tokens until context is full 
24+     And    -2 server max tokens to predict
25+     Then   the server is starting
26+     Then   the server is healthy
27+     Given  a prompt:
28+     """ 
29+     Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. 
30+     """ 
31+     And    a completion request with no api error
32+     Then   11 tokens are predicted
Original file line number Diff line number Diff line change @@ -154,8 +154,10 @@ def step_n_slots(context, n_slots: int):
154154
155155@step ('{n_predict:d} server max tokens to predict' ) 
156156def  step_server_n_predict (context , n_predict : int ):
157-     context .n_server_predict  =  n_predict  if  n_predict  >  0  else  None 
158- 
157+     if  n_predict  >  0  or  n_predict  in  (- 1 , - 2 ):
158+         context .n_server_predict  =  n_predict 
159+     else :
160+         context .n_server_predict  =  None 
159161
160162@step ('{slot_save_path} as slot save path' ) 
161163def  step_slot_save_path (context , slot_save_path : str ):
    
 
   
 
     
   
   
          
     
  
    
     
 
    
      
     
 
     
    You can’t perform that action at this time.
  
 
    
  
     
    
      
        
     
 
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments