Fix issues of attention.core_attention.softmax_offset is None for megatron importer

yueshen2016 · yueshen2016 · commit da6dc1b94d4f · 2025-09-17T16:27:06.000Z
Signed-off-by: Yue &lt;yueshen@nvidia.com&gt;
diff --git a/modelopt/torch/export/plugins/megatron_importer.py b/modelopt/torch/export/plugins/megatron_importer.py
@@ -512,7 +512,10 @@ def _import_state_dict(self):
                             self.rules["k_layernorm"](attention.k_layernorm, layer_id)
                         self.rules["linear_qkv"](attention.linear_qkv, layer_id)
                         self.rules["linear_proj"](attention.linear_proj, layer_id)
-                        if hasattr(attention.core_attention, "softmax_offset"):
+                        if (
+                            hasattr(attention.core_attention, "softmax_offset")
+                            and attention.core_attention.softmax_offset is not None
+                        ):
                             self.rules["softmax_offset"](
                                 attention.core_attention.softmax_offset, layer_id
                             )