Fix issue of attention.core_attention is None (#334)

yueshen2016 · web-flow · commit bbb230449138 · 2025-09-18T01:19:51.000Z
Signed-off-by: Yue &lt;yueshen@nvidia.com&gt;
diff --git a/modelopt/torch/export/unified_export_megatron.py b/modelopt/torch/export/unified_export_megatron.py
@@ -1085,7 +1085,10 @@ def _get_state_dict(self):
                             self.rules["k_layernorm"](layer.self_attention.k_layernorm, layer_id)
                         self.rules["linear_qkv"](layer.self_attention.linear_qkv, layer_id)
                         self.rules["linear_proj"](layer.self_attention.linear_proj, layer_id)
-                        if hasattr(layer.self_attention.core_attention, "softmax_offset"):
+                        if (
+                            getattr(layer.self_attention.core_attention, "softmax_offset", None)
+                            is not None
+                        ):
                             self.rules["softmax_offset"](
                                 layer.self_attention.core_attention.softmax_offset, layer_id
                             )