Merge pull request #74 from chichun-charlie-liu/bug_fix

chichun-charlie-liu · web-flow · commit 2f0f78017455 · 2025-02-28T14:11:02.000-05:00
fix: in DQ example, when nbits_kvcache=8, context manager will detect incorrect frame
diff --git a/fms_mo/dq.py b/fms_mo/dq.py
@@ -207,7 +207,6 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
         else:
             act_scales = get_act_scales(model, dq_dataloader, qcfg)
         torch.save(act_scales, scale_file)
-
     qmodel_prep(
         model,
         dq_dataloader,
diff --git a/fms_mo/utils/utils.py b/fms_mo/utils/utils.py
@@ -115,7 +115,9 @@ def mockmatmul(mat1, mat2):
     while cf.f_back and qbmm_mod is None:
         # First frame is QBmm's forward itself, can start searching from previous stack
         cf = cf.f_back
-        if "forward" in cf.f_code.co_name or "_attn" in cf.f_code.co_name:
+        if (
+            "forward" in cf.f_code.co_name or "_attn" in cf.f_code.co_name
+        ) and "self" in cf.f_locals:
             mod_calling_bmm_function = cf.f_locals["self"]
             # If not found -> default to torch.bmm
             qbmm_mod = getattr(