@@ -3049,61 +3049,40 @@ def : GCNPat <
30493049
30503050// FIXME: This should have been narrowed to i32 during legalization.
30513051// This pattern should also be skipped for GlobalISel
3052+ class bswapi64ExtPat<Instruction inst, bit hasTrue16> {
3053+ dag pattern = (i64 (bswap i64:$a));
3054+ dag operand1 = (i32 (EXTRACT_SUBREG VReg_64:$a, sub1));
3055+ dag operand2 = (i32 (EXTRACT_SUBREG VReg_64:$a, sub0));
3056+ dag ALIGNBIT32_INST1 = !if(hasTrue16,
3057+ (inst 0, operand1, 0, operand1, 0, (i32 24), 0, 0),
3058+ (inst operand1, operand1, (i32 24)));
3059+ dag ALIGNBIT32_INST2 = !if(hasTrue16,
3060+ (inst 0, operand1, 0, operand1, 0, (i32 8), 0, 0),
3061+ (inst operand1, operand1, (i32 8)));
3062+ dag ALIGNBIT32_INST3 = !if(hasTrue16,
3063+ (inst 0, operand2, 0, operand2, 0, (i32 24), 0, 0),
3064+ (inst operand2, operand2, (i32 24)));
3065+ dag ALIGNBIT32_INST4 = !if(hasTrue16,
3066+ (inst 0, operand2, 0, operand2, 0, (i32 8), 0, 0),
3067+ (inst operand2, operand2, (i32 8)));
3068+ dag result = (REG_SEQUENCE VReg_64,
3069+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3070+ ALIGNBIT32_INST1,
3071+ ALIGNBIT32_INST2),
3072+ sub0,
3073+ (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3074+ ALIGNBIT32_INST3,
3075+ ALIGNBIT32_INST4),
3076+ sub1);
3077+ }
3078+
30523079let True16Predicate = NotHasTrue16BitInsts in
3053- def : GCNPat <
3054- (i64 (bswap i64:$a)),
3055- (REG_SEQUENCE VReg_64,
3056- (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3057- (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3058- (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3059- (i32 24)),
3060- (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3061- (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3062- (i32 8))),
3063- sub0,
3064- (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3065- (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3066- (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3067- (i32 24)),
3068- (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3069- (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3070- (i32 8))),
3071- sub1)
3072- >;
3080+ def : GCNPat <bswapi64ExtPat<V_ALIGNBIT_B32_e64, 0>.pattern,
3081+ bswapi64ExtPat<V_ALIGNBIT_B32_e64, 0>.result>;
30733082
30743083let True16Predicate = UseFakeTrue16Insts in
3075- def : GCNPat <
3076- (i64 (bswap i64:$a)),
3077- (REG_SEQUENCE VReg_64,
3078- (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3079- (V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3080- (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3081- 0, /* src1_modifiers */
3082- (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3083- 0, /* src2_modifiers */
3084- (i32 24), /* clamp */ 0, /* op_sel */ 0),
3085- (V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3086- (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3087- 0, /* src1_modifiers */
3088- (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3089- 0, /* src2_modifiers */
3090- (i32 8), /* clamp */ 0, /* op_sel */ 0)),
3091- sub0,
3092- (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00ff00ff)),
3093- (V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3094- (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3095- 0, /* src1_modifiers */
3096- (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
3097- 0, /* src2_modifiers */
3098- (i32 24), /* clamp */ 0, /* op_sel */ 0),
3099- (V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
3100- (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3101- 0, /* src1_modifiers */
3102- (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
3103- 0, /* src2_modifiers */
3104- (i32 8), /* clamp */ 0, /* op_sel */ 0)),
3105- sub1)
3106- >;
3084+ def : GCNPat <bswapi64ExtPat<V_ALIGNBIT_B32_fake16_e64, 1>.pattern,
3085+ bswapi64ExtPat<V_ALIGNBIT_B32_fake16_e64, 1>.result>;
31073086
31083087// FIXME: The AddedComplexity should not be needed, but in GlobalISel
31093088// the BFI pattern ends up taking precedence without it.
0 commit comments