@@ -207,7 +207,6 @@ def quantize_q4(
207207 block_size : int ,
208208 is_symmetric : bool ,
209209 accuracy_level : int ,
210- op_block_list : List [str ] = [],
211210):
212211 """
213212 Quantize the weights of the model from float32 to 4-bit int
@@ -289,6 +288,7 @@ def quantize(input_folder, output_folder, quantization_args: QuantizationArgumen
289288 quantize_fp16 (
290289 model ,
291290 save_path ,
291+ quantization_args .op_block_list
292292 )
293293
294294 elif mode in (QuantMode .Q4 , QuantMode .Q4F16 ):
@@ -300,12 +300,12 @@ def quantize(input_folder, output_folder, quantization_args: QuantizationArgumen
300300 block_size = block_size ,
301301 is_symmetric = quantization_args .is_symmetric ,
302302 accuracy_level = quantization_args .accuracy_level ,
303- op_block_list = quantization_args .op_block_list ,
304303 )
305304 if mode == QuantMode .Q4F16 :
306305 quantize_fp16 (
307306 q4_model ,
308307 save_path ,
308+ quantization_args .op_block_list ,
309309 )
310310
311311 elif mode == QuantMode .BNB4 :
@@ -318,7 +318,6 @@ def quantize(input_folder, output_folder, quantization_args: QuantizationArgumen
318318 if quantization_args .quant_type is not None
319319 else MatMulBnb4Quantizer .NF4
320320 ),
321- op_block_list = quantization_args .op_block_list ,
322321 )
323322
324323 elif mode in (QuantMode .Q8 , QuantMode .QI8 , QuantMode .QU8 ):
0 commit comments