File tree Expand file tree Collapse file tree 1 file changed +8
-1
lines changed 
examples/models/llama/source_transformation Expand file tree Collapse file tree 1 file changed +8
-1
lines changed Original file line number Diff line number Diff line change @@ -73,6 +73,13 @@ def quantize(  # noqa C901
7373        # Add quantization mode options here: group size, bit width, etc. 
7474        return  WeightOnlyInt8QuantHandler (model ).quantized_model ()
7575    elif  qmode .startswith ("torchao:" ):
76+         import  os 
77+         import  glob 
78+         libs  =  glob .glob (os .path .abspath (os .path .join (os .path .dirname (__file__ ), "../../../../cmake-out/lib/libtorchao_ops_aten.*" )))
79+         assert  len (libs ) ==  1 , f"Expected 1 library but got { len (libs )}  
80+         logging .info (f"Loading custom ops library: { libs [0 ]}  )
81+         torch .ops .load_library (libs [0 ])
82+ 
7683        logging .warning (
7784            "When qmode is torchao, the groupsize is obtained from the qmode string with regex parse; blocksize is ignored." 
7885        )
@@ -107,7 +114,7 @@ def quantize(  # noqa C901
107114        from  torchao .quantization .quant_api  import  Int8DynActInt4WeightQuantizer 
108115
109116        model  =  Int8DynActInt4WeightQuantizer (
110-             precision = torch_dtype , groupsize = group_size ,  bitwidth = 4 
117+             precision = torch_dtype , groupsize = group_size 
111118        ).quantize (model )
112119
113120        if  verbose :
 
 
   
 
     
   
   
          
    
    
     
    
      
     
     
    You can’t perform that action at this time.
  
 
    
  
    
      
        
     
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments