Skip to content

Commit adb7f38

Browse files
committed
fix: re-naming of qcfg inference parameter
Signed-off-by: Omobayode Fagbohungbe <[email protected]>
1 parent b458d18 commit adb7f38

File tree

4 files changed

+6
-6
lines changed

4 files changed

+6
-6
lines changed

fms_mo/dq.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
176176
loading fms_mo_args and recipe"
177177
)
178178
qcfg = qconfig_init(recipe="dq", args=fms_mo_args)
179-
qcfg["inference"] = True
179+
qcfg["fp8_inference"] = True
180180

181181
model_size = model_size_Wb(model, unit="GB")
182182
gpu_mem_util_per = model_size / total_gpu_memory

fms_mo/prep.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -701,7 +701,7 @@ def qmodel_prep(
701701
Returns:
702702
nn.Module: quantized model ready for further PTQ/QAT
703703
"""
704-
if qcfg["inference"]:
704+
if qcfg["fp8_inference"]:
705705
if qcfg.get("QBmm"):
706706
swap_qbmm(model, qcfg)
707707

fms_mo/recipes/dq.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,6 @@
1111
"nbits_bmm1" : 32,
1212
"nbits_bmm2" : 32,
1313
"nbits_kvcache" : 32,
14-
"inference": false,
14+
"fp8_inference": false,
1515
"output_folder": null
1616
}

fms_mo/utils/qconfig_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,6 @@ def config_defaults() -> dict:
8888
"nbits_w_lstm": None,
8989
"nbits_i_lstm": None,
9090
"nbits_h_lstm": None,
91-
"inference": False,
92-
"output_folder": None,
9391
# qmodes vars
9492
"qa_mode": "pact+",
9593
"qw_mode": "sawb+",
@@ -152,6 +150,8 @@ def config_defaults() -> dict:
152150
"smoothq_scale_layers": [],
153151
"smoothq_act_scale_path": None,
154152
# Other vars
153+
"fp8_inference": False,
154+
"output_folder": None,
155155
"which2patch_contextmanager": None,
156156
"force_stop_if_qbmm_auto_check_failed": False,
157157
"world_size": max(1, torch.cuda.device_count()),
@@ -301,7 +301,7 @@ def qconfig_init(recipe: str = None, args: Any = None, use_mx: bool = False) ->
301301
qcfg["w_init_method"] = "sawb"
302302
qcfg["a_init_method"] = "percentile"
303303
qcfg["clip_val_asst_percentile"] = (0.1, 99.9)
304-
qcfg["inference"] = False
304+
qcfg["fp8_inference"] = False
305305
qcfg["output_folder"] = None
306306

307307
# ways to control which layers to be quantized/skipped

0 commit comments

Comments
 (0)