feat: Added recompute test in test_save_aiu

BrandonGroth · BrandonGroth · commit d36e61fe6169 · 2025-05-29T21:54:36.000-04:00
Signed-off-by: Brandon Groth &lt;brandon.m.groth@gmail.com&gt;
diff --git a/tests/models/test_model_utils.py b/tests/models/test_model_utils.py
@@ -238,3 +238,4 @@ def check_linear_dtypes(state_dict: dict, linear_names: list):
         for k, v in state_dict.items()
         if all(n not in k for n in linear_names) or not k.endswith(".weight")
     )
+
diff --git a/tests/models/test_save_aiu.py b/tests/models/test_save_aiu.py
@@ -3,7 +3,11 @@
 import pytest
 
 # Local
-from .test_model_utils import check_linear_dtypes, delete_config, load_state_dict
+from .test_model_utils import (
+    check_linear_dtypes,
+    delete_config,
+    load_state_dict,
+)
 from fms_mo import qmodel_prep
 from fms_mo.utils.aiu_utils import save_for_aiu
 
@@ -42,6 +46,50 @@ def test_save_model_bert(
     check_linear_dtypes(state_dict, bert_linear_names)
 
 
+def test_large_outlier_bert(
+    model_tiny_bert: BertModel,
+    input_tiny: BatchEncoding,
+    qcfg_bert: dict,
+    bert_linear_names: list,
+):
+    """
+    Test if the recomputation mode increases standard deviation of a tensor with an outlier.
+
+    Args:
+        model_tiny_bert (BertModel): Bert Tiny Model
+        input_tiny (BatchEncoding): Bert Tiny config
+        qcfg_bert (dict): Fake tiny input
+        bert_linear_names (list): Quantized config for Bert
+    """
+    import torch
+
+    # Break every tensor channel with a large magnitude outlier
+    for k,v in model_tiny_bert.state_dict().items():
+        if k.endswith(".weight") and any(n in k for n in bert_linear_names):
+            v[:,0] = 1.21
+
+    # Set recomputation for narrow weights and prep
+    qcfg_bert["recompute_narrow_weights"] = True
+    qmodel_prep(model_tiny_bert, input_tiny, qcfg_bert, use_dynamo=True)
+
+    # Qmax should break the quantization with an outlier to have skinny distribution
+    layer2stdev: dict[str, torch.Tensor] = {}
+    for k,v in model_tiny_bert.state_dict().items():
+        if k.endswith(".weight") and any(n in k for n in bert_linear_names):
+            layer2stdev[k] = v.to(torch.float32).std(dim=-1)
+
+    save_for_aiu(model_tiny_bert, qcfg=qcfg_bert, verbose=True)
+    state_dict = load_state_dict()
+
+    # Loaded model w/ recomputed SAWB should have widened channel quantization stdev
+    for k,v in state_dict.items():
+        if k.endswith(".weight") and any(n in k for n in bert_linear_names):
+            perCh_stdev_model = layer2stdev.get(k)
+            perCh_stdev_loaded = v.to(torch.float32).std(dim=-1)
+           
+            assert torch.all(perCh_stdev_loaded >= perCh_stdev_model)
+
+
 def test_save_model_llama(
     model_tiny_llama: LlamaModel,
     input_tiny: BatchEncoding,
@@ -88,3 +136,4 @@ def test_save_model_granite(
     # Fetch saved state dict
     state_dict = load_state_dict()
     check_linear_dtypes(state_dict, granite_linear_names)
+

Original file line number	Diff line number	Diff line change
`@@ -238,3 +238,4 @@ def check_linear_dtypes(state_dict: dict, linear_names: list):`
`238`	`238`	`for k, v in state_dict.items()`
`239`	`239`	`if all(n not in k for n in linear_names) or not k.endswith(".weight")`
`240`	`240`	`)`
	`241`	`+`