Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions torchchat/utils/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,9 @@ def quantize_model(
if not support_tensor_subclass:
unwrap_tensor_subclass(model)
continue
# Use dtype precision specified in user config, else fallback on global precision.
if "precision" in quantize_options:
dtype = quantize_options["precision"].get("dtype", str(get_precision()))
precision = name_to_dtype(dtype, device)
else:
precision = get_precision()
# We set global precision from quantize options if it is specified at cli.py:485
# so the precision returned by get_precision() is always the authoritative precision/dtype in torchchat
precision = get_precision()

try:
if quantizer == "linear:a8wxdq":
Expand Down
Loading