File tree Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -77,7 +77,7 @@ def __init__(
7777        mul_mat_q : bool  =  True ,
7878        logits_all : bool  =  False ,
7979        embedding : bool  =  False ,
80-         offload_kqv : bool  =  False ,
80+         offload_kqv : bool  =  True ,
8181        # Sampling Params 
8282        last_n_tokens_size : int  =  64 ,
8383        # LoRA Params 
Original file line number Diff line number Diff line change @@ -90,7 +90,7 @@ class ModelSettings(BaseSettings):
9090    logits_all : bool  =  Field (default = True , description = "Whether to return logits." )
9191    embedding : bool  =  Field (default = True , description = "Whether to use embeddings." )
9292    offload_kqv : bool  =  Field (
93-         default = False , description = "Whether to offload kqv to the GPU." 
93+         default = True , description = "Whether to offload kqv to the GPU." 
9494    )
9595    # Sampling Params 
9696    last_n_tokens_size : int  =  Field (
    
 
   
 
     
   
   
          
     
  
    
     
 
    
      
     
 
     
    You can’t perform that action at this time.
  
 
    
  
     
    
      
        
     
 
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments