[BAICHUAN][INF] fix attention_mask shape alignment issue (#4659) (#4673)

YizhouZ · web-flow · commit af7009e6a167 · 2024-08-16T17:54:35.000+08:00
* fix attention_mask shape alignment issue

* fix flake8
diff --git a/intel_extension_for_pytorch/transformers/models/xpu/optimize_transformers/modules/baichuan.py b/intel_extension_for_pytorch/transformers/models/xpu/optimize_transformers/modules/baichuan.py
@@ -168,6 +168,19 @@ def forward(
             print("Unsupported input shape")
             return
 
+        # broadcast attention mask if needed
+        if attention_mask.dim() < 4:
+            attention_mask = (
+                attention_mask.unsqueeze(0)
+                .expand(
+                    bs * beam,
+                    attention_mask.shape[0],
+                    attention_mask.shape[1],
+                    attention_mask.shape[2],
+                )
+                .contiguous()
+            )
+
         IPEXTransformerAttn.beam_size = beam
         first_token = True if past_key_value is None else False