@@ -2334,9 +2334,9 @@ def : AMDGPUPatIgnoreCopies <
23342334 (COPY_TO_REGCLASS VSrc_b32:$z, VGPR_32))
23352335>;
23362336
2337- // 64-bit version
2337+ foreach vt = [i64, v2i32] in {
23382338def : AMDGPUPatIgnoreCopies <
2339- (DivergentBinFrag<xor> i64 :$z, (and i64 :$x, (xor i64 :$y, i64 :$z))),
2339+ (DivergentBinFrag<xor> vt :$z, (and vt :$x, (xor vt :$y, vt :$z))),
23402340 (REG_SEQUENCE VReg_64,
23412341 (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)),
23422342 (i32 (EXTRACT_SUBREG VReg_64:$y, sub0)),
@@ -2345,6 +2345,7 @@ def : AMDGPUPatIgnoreCopies <
23452345 (i32 (EXTRACT_SUBREG VReg_64:$y, sub1)),
23462346 (i32 (EXTRACT_SUBREG VReg_64:$z, sub1))), sub1)
23472347>;
2348+ }
23482349
23492350def : AMDGPUPat <
23502351 (fcopysign f32:$src0, f32:$src1),
@@ -2378,13 +2379,45 @@ def : AMDGPUPat <
23782379let True16Predicate = NotHasTrue16BitInsts in {
23792380def : ROTRPattern <V_ALIGNBIT_B32_e64>;
23802381
2382+ def : AMDGPUPat <
2383+ (rotr v2i32:$src0, v2i32:$src1),
2384+ (REG_SEQUENCE VReg_64,
2385+ (V_ALIGNBIT_B32_e64
2386+ (i32 (EXTRACT_SUBREG VReg_64:$src0, sub0)),
2387+ (i32 (EXTRACT_SUBREG VReg_64:$src0, sub0)),
2388+ (i32 (EXTRACT_SUBREG VReg_64:$src1, sub0))), sub0,
2389+ (V_ALIGNBIT_B32_e64
2390+ (i32 (EXTRACT_SUBREG VReg_64:$src0, sub1)),
2391+ (i32 (EXTRACT_SUBREG VReg_64:$src0, sub1)),
2392+ (i32 (EXTRACT_SUBREG VReg_64:$src1, sub1))), sub1)
2393+ >;
2394+
2395+ // Prevents regression in fneg-modifier-casting.ll along with modifications to XorCombine().
2396+ def : AMDGPUPat <
2397+ (fneg (select i1:$src0, (f32 (bitconvert i32:$src1)), (f32 (bitconvert i32:$src2)))),
2398+ (V_CNDMASK_B32_e64 (i32 1), $src2, (i32 1), $src1, $src0)>;
2399+
23812400def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
23822401 (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
23832402 (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
23842403
23852404def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
23862405 (V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
23872406 (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
2407+
2408+ def : GCNPat <
2409+ (rotr v2i32:$src0, v2i32:$src1),
2410+ (REG_SEQUENCE VReg_64,
2411+ (V_ALIGNBIT_B32_e64
2412+ (i32 (EXTRACT_SUBREG VReg_64:$src0, sub0)),
2413+ (i32 (EXTRACT_SUBREG VReg_64:$src0, sub0)),
2414+ (i32 (EXTRACT_SUBREG VReg_64:$src1, sub0))), sub0,
2415+ (V_ALIGNBIT_B32_e64
2416+ (i32 (EXTRACT_SUBREG VReg_64:$src0, sub1)),
2417+ (i32 (EXTRACT_SUBREG VReg_64:$src0, sub1)),
2418+ (i32 (EXTRACT_SUBREG VReg_64:$src1, sub1))), sub1)
2419+ >;
2420+
23882421} // end True16Predicate = NotHasTrue16BitInsts
23892422
23902423let True16Predicate = UseRealTrue16Insts in {
@@ -2397,6 +2430,20 @@ def : GCNPat <
23972430 /* clamp */ 0, /* op_sel */ 0)
23982431>;
23992432
2433+ def : GCNPat <
2434+ (rotr v2i32:$src0, v2i32:$src1),
2435+ (REG_SEQUENCE VReg_64,
2436+ (V_ALIGNBIT_B32_t16_e64
2437+ 0, (i32 (EXTRACT_SUBREG VReg_64:$src0, sub0)),
2438+ 0, (i32 (EXTRACT_SUBREG VReg_64:$src0, sub0)),
2439+ 0, (EXTRACT_SUBREG (i32 (EXTRACT_SUBREG VReg_64:$src1, sub0)) ,lo16),0,0), sub0,
2440+ (V_ALIGNBIT_B32_t16_e64
2441+ 0, (i32 (EXTRACT_SUBREG VReg_64:$src0, sub1)),
2442+ 0, (i32 (EXTRACT_SUBREG VReg_64:$src0, sub1)),
2443+ 0, (EXTRACT_SUBREG (i32 (EXTRACT_SUBREG VReg_64:$src1, sub0)) ,lo16),0,0), sub1)
2444+ >;
2445+
2446+
24002447def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
24012448 (V_ALIGNBIT_B32_t16_e64 0, /* src0_modifiers */
24022449 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
@@ -2423,6 +2470,20 @@ def : GCNPat <
24232470 $src1, /* clamp */ 0, /* op_sel */ 0)
24242471>;
24252472
2473+ def : GCNPat <
2474+ (rotr v2i32:$src0, v2i32:$src1),
2475+ (REG_SEQUENCE VReg_64,
2476+ (V_ALIGNBIT_B32_fake16_e64
2477+ 0, (i32 (EXTRACT_SUBREG VReg_64:$src0, sub0)),
2478+ 0, (i32 (EXTRACT_SUBREG VReg_64:$src0, sub0)),
2479+ 0, (i32 (EXTRACT_SUBREG VReg_64:$src1, sub0)),0,0), sub0,
2480+ (V_ALIGNBIT_B32_fake16_e64
2481+ 0, (i32 (EXTRACT_SUBREG VReg_64:$src0, sub1)),
2482+ 0, (i32 (EXTRACT_SUBREG VReg_64:$src0, sub1)),
2483+ 0, (i32 (EXTRACT_SUBREG VReg_64:$src1, sub1)),0,0), sub1)
2484+ >;
2485+
2486+
24262487def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
24272488 (V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
24282489 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
@@ -2449,6 +2510,7 @@ def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
24492510>;
24502511} // end True16Predicate = UseFakeTrue16Insts
24512512
2513+
24522514/********** ====================== **********/
24532515/********** Indirect addressing **********/
24542516/********** ====================== **********/
0 commit comments