We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 682bf6d commit da6dc1bCopy full SHA for da6dc1b
modelopt/torch/export/plugins/megatron_importer.py
@@ -512,7 +512,10 @@ def _import_state_dict(self):
512
self.rules["k_layernorm"](attention.k_layernorm, layer_id)
513
self.rules["linear_qkv"](attention.linear_qkv, layer_id)
514
self.rules["linear_proj"](attention.linear_proj, layer_id)
515
- if hasattr(attention.core_attention, "softmax_offset"):
+ if (
516
+ hasattr(attention.core_attention, "softmax_offset")
517
+ and attention.core_attention.softmax_offset is not None
518
+ ):
519
self.rules["softmax_offset"](
520
attention.core_attention.softmax_offset, layer_id
521
)
0 commit comments