@@ -49,31 +49,31 @@ def check_quantization_setting(model: nn.Module = None):
4949 logger .info ("Validating config settings" )
5050 if quant_config ["quant_method" ] == "compressed-tensors" :
5151 if quant_config ["format" ] != "float-quantized" :
52- raise Exception (
52+ raise ValueError (
5353 "The input activation and weight quantization dtypes are not supported"
5454 )
5555
5656 if (
5757 quant_config ["config_groups" ]["group_0" ]["input_activations" ]["num_bits" ]
5858 != 8
5959 ):
60- raise Exception ("Only 8 bit FP input activation quantization is supported" )
60+ raise ValueError ("Only 8 bit FP input activation quantization is supported" )
6161
6262 if quant_config ["config_groups" ]["group_0" ]["weights" ]["num_bits" ] != 8 :
63- raise Exception ("Only 8-bit FP weight quantization is supported" )
63+ raise ValueError ("Only 8-bit FP weight quantization is supported" )
6464
6565 if quant_config ["kv_cache_scheme" ] is None :
6666 pass
6767 else :
6868 if quant_config ["kv_cache_scheme" ]["type" ] is not float :
69- raise Exception ("The KV-Cache quantization dtype is not supported" )
69+ raise ValueError ("The KV-Cache quantization dtype is not supported" )
7070
7171 if quant_config ["kv_cache_scheme" ]["num_bits" ] != 8 :
72- raise Exception ("Only 8-bit KV-Cache quantization dtype is supported" )
72+ raise ValueError ("Only 8-bit KV-Cache quantization dtype is supported" )
7373
7474 return True
7575
76- raise Exception ("This quantization method is not supported for inferencing" )
76+ raise ValueError ("This quantization method is not supported for inferencing" )
7777
7878
7979def load_inference_qconfig_file (model_args , fms_mo_args ):
@@ -115,7 +115,7 @@ def update_qcfg_from_model_config(model_args, qcfg):
115115 ):
116116 qcfg ["qa_mode" ] = "fp8_e4m3_scale_perToken"
117117 else :
118- raise Exception ("Only perToken Fp8 activation quantizer is supported" )
118+ raise ValueError ("Only perToken Fp8 activation quantizer is supported" )
119119
120120 if (
121121 config ["quantization_config" ]["config_groups" ]["group_0" ]["weights" ]["strategy" ]
@@ -128,7 +128,7 @@ def update_qcfg_from_model_config(model_args, qcfg):
128128 ):
129129 qcfg ["qw_mode" ] = "fp8_e4m3_scale"
130130 else :
131- raise Exception (
131+ raise ValueError (
132132 "Only perChannel or pertensor FP8 quantizers are currently supported"
133133 )
134134
0 commit comments