Skip to content

Commit 862fea3

Browse files
committed
simplify and remove duplication
1 parent 03a61c0 commit 862fea3

File tree

1 file changed

+31
-52
lines changed

1 file changed

+31
-52
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 31 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -3049,61 +3049,40 @@ def : GCNPat <
30493049

30503050
// FIXME: This should have been narrowed to i32 during legalization.
30513051
// This pattern should also be skipped for GlobalISel
3052+
class bswapi64ExtPat<Instruction inst, bit hasTrue16> {
3053+
dag pattern = (i64 (bswap i64:$a));
3054+
dag operand1 = (i32 (EXTRACT_SUBREG VReg_64:$a, sub1));
3055+
dag operand2 = (i32 (EXTRACT_SUBREG VReg_64:$a, sub0));
3056+
dag ALIGNBIT32_INST1 = !if(hasTrue16,
3057+
(inst 0, operand1, 0, operand1, 0, (i32 24), 0, 0),
3058+
(inst operand1, operand1, (i32 24)));
3059+
dag ALIGNBIT32_INST2 = !if(hasTrue16,
3060+
(inst 0, operand1, 0, operand1, 0, (i32 8), 0, 0),
3061+
(inst operand1, operand1, (i32 8)));
3062+
dag ALIGNBIT32_INST3 = !if(hasTrue16,
3063+
(inst 0, operand2, 0, operand2, 0, (i32 24), 0, 0),
3064+
(inst operand2, operand2, (i32 24)));
3065+
dag ALIGNBIT32_INST4 = !if(hasTrue16,
3066+
(inst 0, operand2, 0, operand2, 0, (i32 8), 0, 0),
3067+
(inst operand2, operand2, (i32 8)));
3068+
dag result = (REG_SEQUENCE VReg_64,
3069+
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3070+
ALIGNBIT32_INST1,
3071+
ALIGNBIT32_INST2),
3072+
sub0,
3073+
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3074+
ALIGNBIT32_INST3,
3075+
ALIGNBIT32_INST4),
3076+
sub1);
3077+
}
3078+
30523079
let True16Predicate = NotHasTrue16BitInsts in
3053-
def : GCNPat <
3054-
(i64 (bswap i64:$a)),
3055-
(REG_SEQUENCE VReg_64,
3056-
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3057-
(V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3058-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3059-
(i32 24)),
3060-
(V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3061-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3062-
(i32 8))),
3063-
sub0,
3064-
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3065-
(V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3066-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3067-
(i32 24)),
3068-
(V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3069-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3070-
(i32 8))),
3071-
sub1)
3072-
>;
3080+
def : GCNPat <bswapi64ExtPat<V_ALIGNBIT_B32_e64, 0>.pattern,
3081+
bswapi64ExtPat<V_ALIGNBIT_B32_e64, 0>.result>;
30733082

30743083
let True16Predicate = UseFakeTrue16Insts in
3075-
def : GCNPat <
3076-
(i64 (bswap i64:$a)),
3077-
(REG_SEQUENCE VReg_64,
3078-
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3079-
(V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3080-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3081-
0, /* src1_modifiers */
3082-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3083-
0, /* src2_modifiers */
3084-
(i32 24), /* clamp */ 0, /* op_sel */ 0),
3085-
(V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3086-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3087-
0, /* src1_modifiers */
3088-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3089-
0, /* src2_modifiers */
3090-
(i32 8), /* clamp */ 0, /* op_sel */ 0)),
3091-
sub0,
3092-
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3093-
(V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3094-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3095-
0, /* src1_modifiers */
3096-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3097-
0, /* src2_modifiers */
3098-
(i32 24), /* clamp */ 0, /* op_sel */ 0),
3099-
(V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3100-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3101-
0, /* src1_modifiers */
3102-
(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3103-
0, /* src2_modifiers */
3104-
(i32 8), /* clamp */ 0, /* op_sel */ 0)),
3105-
sub1)
3106-
>;
3084+
def : GCNPat <bswapi64ExtPat<V_ALIGNBIT_B32_fake16_e64, 1>.pattern,
3085+
bswapi64ExtPat<V_ALIGNBIT_B32_fake16_e64, 1>.result>;
31073086

31083087
// FIXME: The AddedComplexity should not be needed, but in GlobalISel
31093088
// the BFI pattern ends up taking precedence without it.

0 commit comments

Comments
 (0)