@@ -3076,42 +3076,43 @@ def : GCNPat <
30763076 (i32 8), /* clamp */ 0, /* op_sel */ 0))
30773077>;
30783078
3079- // FIXME: This should have been narrowed to i32 during legalization.
3080- // This pattern should also be skipped for GlobalISel
3081- class bswapi64ExtPat<Instruction inst, bit hasTrue16> {
3082- dag pattern = (i64 (bswap i64:$a));
3083- dag operand1 = (i32 (EXTRACT_SUBREG VReg_64:$a, sub1));
3084- dag operand2 = (i32 (EXTRACT_SUBREG VReg_64:$a, sub0));
3085- dag ALIGNBIT32_INST1 = !if(hasTrue16,
3086- (inst 0, operand1, 0, operand1, 0, (i32 24), 0, 0),
3087- (inst operand1, operand1, (i32 24)));
3088- dag ALIGNBIT32_INST2 = !if(hasTrue16,
3089- (inst 0, operand1, 0, operand1, 0, (i32 8), 0, 0),
3090- (inst operand1, operand1, (i32 8)));
3091- dag ALIGNBIT32_INST3 = !if(hasTrue16,
3092- (inst 0, operand2, 0, operand2, 0, (i32 24), 0, 0),
3093- (inst operand2, operand2, (i32 24)));
3094- dag ALIGNBIT32_INST4 = !if(hasTrue16,
3095- (inst 0, operand2, 0, operand2, 0, (i32 8), 0, 0),
3096- (inst operand2, operand2, (i32 8)));
3097- dag result = (REG_SEQUENCE VReg_64,
3098- (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3099- ALIGNBIT32_INST1,
3100- ALIGNBIT32_INST2),
3101- sub0,
3102- (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3103- ALIGNBIT32_INST3,
3104- ALIGNBIT32_INST4),
3105- sub1);
3079+ class AlignBit32Inst<dag op1, dag op2, dag op3, bit hasTrue16> {
3080+ Instruction inst = !if(hasTrue16, V_ALIGNBIT_B32_fake16_e64, V_ALIGNBIT_B32_e64);
3081+ dag NoMods = !if(hasTrue16, (inst 0), (inst));
3082+ dag expr = !con(NoMods, (inst op1), NoMods, (inst op2),
3083+ NoMods, (inst op3), NoMods, NoMods);
3084+ }
3085+
3086+ multiclass bswapi64ExtPat<bit hasTrue16> {
3087+ def : GCNPat <
3088+ (i64 (bswap i64:$a)),
3089+ (REG_SEQUENCE VReg_64,
3090+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3091+ AlignBit32Inst<(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3092+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3093+ (i32 24), hasTrue16>.expr,
3094+ AlignBit32Inst<(i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3095+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3096+ (i32 8), hasTrue16>.expr),
3097+ sub0,
3098+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3099+ AlignBit32Inst<(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3100+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3101+ (i32 24), hasTrue16>.expr,
3102+ AlignBit32Inst<(i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3103+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3104+ (i32 8), hasTrue16>.expr),
3105+ sub1)
3106+ >;
31063107}
31073108
3109+ // FIXME: This should have been narrowed to i32 during legalization.
3110+ // This pattern should also be skipped for GlobalISel
31083111let True16Predicate = NotHasTrue16BitInsts in
3109- def : GCNPat <bswapi64ExtPat<V_ALIGNBIT_B32_e64, 0>.pattern,
3110- bswapi64ExtPat<V_ALIGNBIT_B32_e64, 0>.result>;
3112+ defm : bswapi64ExtPat</*hasTrue16*/0>;
31113113
31123114let True16Predicate = UseFakeTrue16Insts in
3113- def : GCNPat <bswapi64ExtPat<V_ALIGNBIT_B32_fake16_e64, 1>.pattern,
3114- bswapi64ExtPat<V_ALIGNBIT_B32_fake16_e64, 1>.result>;
3115+ defm : bswapi64ExtPat</*hasTrue16*/1>;
31153116
31163117// FIXME: The AddedComplexity should not be needed, but in GlobalISel
31173118// the BFI pattern ends up taking precedence without it.
0 commit comments