Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions examples/llm_ptq/hf_ptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,11 @@ def main(args):
model = model.language_model
model_type = get_model_type(model)

if model_type == "phi4mm":
warnings.warn(
"Please set the default input_mode to InputMode.LANGUAGE before quantizing."
)

if args.sparsity_fmt != "dense":
if args.batch_size == 0:
# Sparse algorithm takes more GPU memory so we reduce the batch_size by 4.
Expand Down Expand Up @@ -478,9 +483,6 @@ def main(args):
quant_cfg["quant_cfg"]["*audio*"] = {"enable": False}
quant_cfg["quant_cfg"]["*image*"] = {"enable": False}
quant_cfg["quant_cfg"]["*vision*"] = {"enable": False}
warnings.warn(
"Please set the default input_mode to InputMode.LANGUAGE before quantizing."
)

if not model_is_already_quantized or calibration_only:
# Only run single sample for preview
Expand Down
Loading