patch 2

broxigarchen · broxigarchen · commit 86af61b2c7ee · 2025-02-19T15:48:56.000-05:00
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3076,42 +3076,43 @@ def : GCNPat <
                                         (i32 8), /* clamp */ 0, /* op_sel */ 0))
 >;
 
-// FIXME: This should have been narrowed to i32 during legalization.
-// This pattern should also be skipped for GlobalISel
-class bswapi64ExtPat<Instruction inst, bit hasTrue16> {
-  dag pattern = (i64 (bswap i64:$a));
-  dag operand1 = (i32 (EXTRACT_SUBREG VReg_64:$a, sub1));
-  dag operand2 = (i32 (EXTRACT_SUBREG VReg_64:$a, sub0));
-  dag ALIGNBIT32_INST1 = !if(hasTrue16,
-                            (inst 0, operand1, 0, operand1, 0, (i32 24), 0, 0),
-                            (inst operand1, operand1, (i32 24)));
-  dag ALIGNBIT32_INST2 = !if(hasTrue16,
-                            (inst 0, operand1, 0, operand1, 0, (i32 8), 0, 0),
-                            (inst operand1, operand1, (i32 8)));
-  dag ALIGNBIT32_INST3 = !if(hasTrue16,
-                            (inst 0, operand2, 0, operand2, 0, (i32 24), 0, 0),
-                            (inst operand2, operand2, (i32 24)));
-  dag ALIGNBIT32_INST4 = !if(hasTrue16,
-                            (inst 0, operand2, 0, operand2, 0, (i32 8), 0, 0),
-                            (inst operand2, operand2, (i32 8)));
-  dag result = (REG_SEQUENCE VReg_64,
-               (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
-                               ALIGNBIT32_INST1,
-                               ALIGNBIT32_INST2),
-               sub0,
-               (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
-                               ALIGNBIT32_INST3,
-                               ALIGNBIT32_INST4),
-               sub1);
+class AlignBit32Inst<dag op1, dag op2, dag op3, bit hasTrue16> {
+  Instruction inst = !if(hasTrue16, V_ALIGNBIT_B32_fake16_e64, V_ALIGNBIT_B32_e64);
+  dag NoMods = !if(hasTrue16, (inst 0), (inst));
+  dag expr = !con(NoMods, (inst op1), NoMods, (inst op2),
+                     NoMods, (inst op3), NoMods, NoMods);
+}
+
+multiclass bswapi64ExtPat<bit hasTrue16> {
+def : GCNPat <
+  (i64 (bswap i64:$a)),
+  (REG_SEQUENCE VReg_64,
+  (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
+     AlignBit32Inst<(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
+                     (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
+                     (i32 24), hasTrue16>.expr,
+     AlignBit32Inst<(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
+                     (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
+                     (i32 8), hasTrue16>.expr),
+  sub0,
+  (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
+     AlignBit32Inst<(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
+                     (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
+                     (i32 24), hasTrue16>.expr,
+     AlignBit32Inst<(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
+                     (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
+                     (i32 8), hasTrue16>.expr),
+  sub1)
+>;
 }
 
+// FIXME: This should have been narrowed to i32 during legalization.
+// This pattern should also be skipped for GlobalISel
 let True16Predicate = NotHasTrue16BitInsts in
-def : GCNPat <bswapi64ExtPat<V_ALIGNBIT_B32_e64, 0>.pattern,
-              bswapi64ExtPat<V_ALIGNBIT_B32_e64, 0>.result>;
+defm : bswapi64ExtPat</*hasTrue16*/0>;
 
 let True16Predicate = UseFakeTrue16Insts in
-def : GCNPat <bswapi64ExtPat<V_ALIGNBIT_B32_fake16_e64, 1>.pattern,
-              bswapi64ExtPat<V_ALIGNBIT_B32_fake16_e64, 1>.result>;
+defm : bswapi64ExtPat</*hasTrue16*/1>;
 
 // FIXME: The AddedComplexity should not be needed, but in GlobalISel
 // the BFI pattern ends up taking precedence without it.