feat: Added guards for save_for_aiu and added a test for 0 clip vals

BrandonGroth · BrandonGroth · commit daf9f97b72e6 · 2025-05-29T21:57:58.000-04:00
Signed-off-by: Brandon Groth &lt;brandon.m.groth@gmail.com&gt;
diff --git a/fms_mo/quant/quantizers.py b/fms_mo/quant/quantizers.py
@@ -3029,7 +3029,7 @@ def __init__(
             self.register_buffer("clip_valn", torch.zeros(perGp[0]))
         else:
             self.register_buffer(
-                "clip_val", torch.zeros(perCh) if perCh else torch.Tensor([1.0])
+                "clip_val", torch.zeros(perCh) if perCh else torch.Tensor([0.0])
             )
             self.register_buffer(
                 "clip_valn", torch.zeros(perCh) if perCh else torch.Tensor([0.0])
diff --git a/fms_mo/utils/aiu_utils.py b/fms_mo/utils/aiu_utils.py
@@ -232,6 +232,14 @@ def process_weight(
     is_w_recomputed = False
     if layer_name + ".quantize_weight.clip_val" in model.state_dict():
         w_cv = model.state_dict()[layer_name + ".quantize_weight.clip_val"]
+
+        # Check that clip values are initialized
+        if torch.any(w_cv.isclose(torch.tensor(0.0))):
+            raise ValueError(
+                f"Quantization clip values for {layer_name=} have near-zero values and "
+                "are likely uninitialized."
+            )
+
         if w_cv.numel() > 1:
             w_cv = w_cv.unsqueeze(dim=1)
         weight_int_as_fp = torch.clamp(127 / w_cv * weight_pre_quant, -127, 127).round()
diff --git a/tests/models/test_save_aiu.py b/tests/models/test_save_aiu.py
@@ -29,9 +29,9 @@ def test_save_model_bert(
 
     Args:
         model_tiny_bert (BertModel): Bert Tiny Model
-        config_tiny_bert (BertConfig): Bert Tiny config
         input_tiny (BatchEncoding): Fake tiny input
-        qcfg_bert (dict): Quantized config for Bert
+        qcfg_bert (dict): Quantized config for Tiny Bert
+        bert_linear_names (list): Names of linear layers for Bert
     """
     # Quantize model and save state dict
     qmodel_prep(model_tiny_bert, input_tiny, qcfg_bert, use_dynamo=True)
@@ -54,8 +54,8 @@ def test_large_outlier_bert(
     Args:
         model_tiny_bert (BertModel): Bert Tiny Model
         input_tiny (BatchEncoding): Bert Tiny config
-        qcfg_bert (dict): Fake tiny input
-        bert_linear_names (list): Quantized config for Bert
+        qcfg_bert (dict): Quantized config for Tiny Bert
+        bert_linear_names (list): Names of linear layers for Bert
     """
     # Third Party
     import torch
@@ -87,6 +87,27 @@ def test_large_outlier_bert(
             assert torch.all(perCh_stdev_loaded >= perCh_stdev_model)
 
 
+def test_clip_vals_zero_bert(
+    model_tiny_bert: BertModel,
+    input_tiny: BatchEncoding,
+    qcfg_bert: dict,
+):
+    """
+    Test if uninitialized clip vals throws an error
+
+    Args:
+        model_tiny_bert (BertModel): Bert Tiny Model
+        input_tiny (BatchEncoding): Bert Tiny config
+        qcfg_bert (dict): Quantized config for Tiny Bert
+    """
+    # Turn off calibration -> clip vals are init as 0
+    qcfg_bert["qmodel_calibration"] = 0
+    qmodel_prep(model_tiny_bert, input_tiny, qcfg_bert, use_dynamo=True)
+
+    with pytest.raises(ValueError):
+        save_for_aiu(model_tiny_bert, qcfg=qcfg_bert, verbose=True)
+
+
 def test_save_model_llama(
     model_tiny_llama: LlamaModel,
     input_tiny: BatchEncoding,

Original file line number	Diff line number	Diff line change
`@@ -3029,7 +3029,7 @@ def __init__(`
`3029`	`3029`	`self.register_buffer("clip_valn", torch.zeros(perGp[0]))`
`3030`	`3030`	`else:`
`3031`	`3031`	`self.register_buffer(`
`3032`		`- "clip_val", torch.zeros(perCh) if perCh else torch.Tensor([1.0])`
	`3032`	`+ "clip_val", torch.zeros(perCh) if perCh else torch.Tensor([0.0])`
`3033`	`3033`	`)`
`3034`	`3034`	`self.register_buffer(`
`3035`	`3035`	`"clip_valn", torch.zeros(perCh) if perCh else torch.Tensor([0.0])`