Skip to content

Commit 4878ba1

Browse files
committed
fix: corrected the inference file
Signed-off-by: Omobayode Fagbohungbe <[email protected]>
1 parent 31dd8c7 commit 4878ba1

File tree

3 files changed

+10
-10
lines changed

3 files changed

+10
-10
lines changed

fms_mo/prep.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ def make_quant_module(module, curr_full_name, qcfg, verbose=False):
404404
f"{curr_full_name} {type(module)} seems to be a wrapper of Linear."
405405
"Please make sure it doesn't wrap BN and activ func. Otherwise"
406406
"please create an equivalent Linear wrapper and change qcfg['mapping']."
407-
)
407+
)
408408
QLin = mapping.get(nn.Linear, None)
409409
if QLin is None:
410410
if verbose:

fms_mo/utils/dq_inf.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,31 +49,31 @@ def check_quantization_setting(model: nn.Module = None):
4949
logger.info("Validating config settings")
5050
if quant_config["quant_method"] == "compressed-tensors":
5151
if quant_config["format"] != "float-quantized":
52-
raise Exception(
52+
raise ValueError(
5353
"The input activation and weight quantization dtypes are not supported"
5454
)
5555

5656
if (
5757
quant_config["config_groups"]["group_0"]["input_activations"]["num_bits"]
5858
!= 8
5959
):
60-
raise Exception("Only 8 bit FP input activation quantization is supported")
60+
raise ValueError("Only 8 bit FP input activation quantization is supported")
6161

6262
if quant_config["config_groups"]["group_0"]["weights"]["num_bits"] != 8:
63-
raise Exception("Only 8-bit FP weight quantization is supported")
63+
raise ValueError("Only 8-bit FP weight quantization is supported")
6464

6565
if quant_config["kv_cache_scheme"] is None:
6666
pass
6767
else:
6868
if quant_config["kv_cache_scheme"]["type"] is not float:
69-
raise Exception("The KV-Cache quantization dtype is not supported")
69+
raise ValueError("The KV-Cache quantization dtype is not supported")
7070

7171
if quant_config["kv_cache_scheme"]["num_bits"] != 8:
72-
raise Exception("Only 8-bit KV-Cache quantization dtype is supported")
72+
raise ValueError("Only 8-bit KV-Cache quantization dtype is supported")
7373

7474
return True
7575

76-
raise Exception("This quantization method is not supported for inferencing")
76+
raise ValueError("This quantization method is not supported for inferencing")
7777

7878

7979
def load_inference_qconfig_file(model_args, fms_mo_args):
@@ -115,7 +115,7 @@ def update_qcfg_from_model_config(model_args, qcfg):
115115
):
116116
qcfg["qa_mode"] = "fp8_e4m3_scale_perToken"
117117
else:
118-
raise Exception("Only perToken Fp8 activation quantizer is supported")
118+
raise ValueError("Only perToken Fp8 activation quantizer is supported")
119119

120120
if (
121121
config["quantization_config"]["config_groups"]["group_0"]["weights"]["strategy"]
@@ -128,7 +128,7 @@ def update_qcfg_from_model_config(model_args, qcfg):
128128
):
129129
qcfg["qw_mode"] = "fp8_e4m3_scale"
130130
else:
131-
raise Exception(
131+
raise ValueError(
132132
"Only perChannel or pertensor FP8 quantizers are currently supported"
133133
)
134134

fms_mo/utils/import_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
"torchvision",
4343
"huggingface_hub",
4444
"torchao",
45-
"compressed_tensors",
45+
#"compressed_tensors",
4646
]
4747

4848
available_packages = {}

0 commit comments

Comments
 (0)