bug fix, in DQ example, when nbits_kvcache=8, context manager will detect an incorrect frame, and cause error.

chichun-charlie-liu · chichun-charlie-liu · commit 1f0c65c850a2 · 2025-02-28T16:32:26.000Z
Signed-off-by: cliu-us &lt;cliu@us.ibm.com&gt;
diff --git a/fms_mo/dq.py b/fms_mo/dq.py
@@ -207,7 +207,6 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
         else:
             act_scales = get_act_scales(model, dq_dataloader, qcfg)
         torch.save(act_scales, scale_file)
-
     qmodel_prep(
         model,
         dq_dataloader,
diff --git a/fms_mo/utils/utils.py b/fms_mo/utils/utils.py
@@ -115,7 +115,9 @@ def mockmatmul(mat1, mat2):
     while cf.f_back and qbmm_mod is None:
         # First frame is QBmm's forward itself, can start searching from previous stack
         cf = cf.f_back
-        if "forward" in cf.f_code.co_name or "_attn" in cf.f_code.co_name:
+        if (
+            "forward" in cf.f_code.co_name or "_attn" in cf.f_code.co_name
+        ) and "self" in cf.f_locals:
             mod_calling_bmm_function = cf.f_locals["self"]
             # If not found -> default to torch.bmm
             qbmm_mod = getattr(