This repository was archived by the owner on Sep 10, 2025. It is now read-only.
  
  
  
  
    
    
    
      
    
  
  
    
File tree Expand file tree Collapse file tree 3 files changed +13
-3
lines changed Expand file tree Collapse file tree 3 files changed +13
-3
lines changed Original file line number Diff line number Diff line change @@ -731,6 +731,7 @@ jobs:
731731
732732          git clone https://github.com/ggerganov/llama.cpp.git 
733733          pushd llama.cpp 
734+           git checkout 64ed2091b24b2f9747148fdf49a34ed5938762c3 
734735          make 
735736          popd 
736737
Original file line number Diff line number Diff line change @@ -373,6 +373,8 @@ def _load_model_gguf(builder_args: BuilderArgs) -> Model:
373373        kwargs  =  {}
374374    else :
375375        kwargs  =  builder_args .gguf_kwargs 
376+ 
377+     kwargs .setdefault ("device" , builder_args .device )
376378    model  =  Model .from_gguf (builder_args .gguf_path , ** kwargs )
377379    return  model 
378380
Original file line number Diff line number Diff line change @@ -570,6 +570,7 @@ def load_model_and_state_dict(
570570    load_state_dict : bool  =  True ,
571571    load_as_quantized : bool  =  True ,
572572    inner_k_tiles = 8 ,
573+     device = "cpu" ,
573574) ->  torch .nn .Module :
574575    """ 
575576    Parses the GGUF file and returns an nn.Module on meta device along with a state_dict 
@@ -609,9 +610,15 @@ def load_model_and_state_dict(
609610                q , s , z  =  Q4_0 .unpack (t )
610611                scales_and_zeros  =  pack_scales_and_zeros (s , z )
611612                q_uint8  =  (q [::, ::2 ] <<  4  |  q [::, 1 ::2 ]).to (torch .uint8 )
612-                 weight_int4pack  =  torch .ops .aten ._convert_weight_to_int4pack (
613-                     q_uint8 , inner_k_tiles 
614-                 )
613+                 
614+                 if  torch .device (device ).type  ==  "cpu" :
615+                     weight_int4pack  =  torch .ops .aten ._convert_weight_to_int4pack_for_cpu (
616+                         q_uint8 , inner_k_tiles  
617+                     )
618+                 else :
619+                     weight_int4pack  =  torch .ops .aten ._convert_weight_to_int4pack (
620+                         q_uint8 , inner_k_tiles 
621+                     )
615622                state_dict [f"{ fqn }  .weight" ] =  weight_int4pack 
616623                state_dict [f"{ fqn }  .scales_and_zeros" ] =  scales_and_zeros 
617624
    
 
   
 
     
   
   
          
     
  
    
     
 
    
      
     
 
     
    You can’t perform that action at this time.
  
 
    
  
     
    
      
        
     
 
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments