[Forward Call] fake quant fix (#145)

George · web-flow · commit 4328b41b6206 · 2024-09-03T15:14:19.000-04:00
* fake quant fix

* fix

* comment

* commetn
diff --git a/src/compressed_tensors/quantization/lifecycle/forward.py b/src/compressed_tensors/quantization/lifecycle/forward.py
@@ -348,7 +348,16 @@ def maybe_calibrate_or_quantize(
             update_parameter_data(module, updated_scale, f"{base_name}_scale")
             update_parameter_data(module, updated_zero_point, f"{base_name}_zero_point")
 
-    return fake_quantize(value, scale, zero_point, args, g_idx=g_idx)
+            scale = updated_scale
+            zero_point = updated_zero_point
+
+    return fake_quantize(
+        x=value,
+        scale=scale,
+        zero_point=zero_point,
+        args=args,
+        g_idx=g_idx,
+    )
 
 
 @torch.no_grad()