Combine Specs (#304)

Muennighoff · thomasw21 · web-flow · commit c3be5d3fa4ea · 2022-07-07T10:29:03.000+02:00
* Add support for weighted train

* Combine attn_mask dropping &amp; data fromat specs

Co-authored-by: thomasw21 &lt;24695242+thomasw21@users.noreply.github.com&gt;
diff --git a/megatron/model/gpt_model.py b/megatron/model/gpt_model.py
@@ -254,13 +254,12 @@ def _to_float16(inputs):
                     # TODO: Change naming of class from GPT to something that encapsulate prefix lm.
                     self_attn_mask_type=attn_mask_type))
 
-
-        if not hasattr(args, 'attn_mask'):
-            # We drop attention mask from the pipeline
-            self.specs.append(lambda x: x[0])
-
         # Undo data format change
-        self.specs.append(lambda x: x.transpose(0, 1).contiguous())
+        def undo(x):
+            if not hasattr(args, 'attn_mask'):
+                x = x[0]
+            return x.transpose(0, 1).contiguous()
+        self.specs.append(undo)
 
         # Final layernorm after transformer layers
         self.specs.append(