Skip to content

Commit a862618

Browse files
committed
extending new uniform patterns to all fshr cases;tests updated
1 parent a46e108 commit a862618

File tree

4 files changed

+1022
-596
lines changed

4 files changed

+1022
-596
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2694,12 +2694,21 @@ def : GCNPat<pat,
26942694
$src1, /* clamp */ 0, /* op_sel */ 0)
26952695
>;
26962696

2697-
def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
2697+
def : GCNPat<(DivergentTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
26982698
(V_ALIGNBIT_B32_opsel_e64 /* src0_modifiers */ 0, $src0,
26992699
/* src1_modifiers */ 0, $src1,
27002700
/* src2_modifiers */ 0,
27012701
$src2, /* clamp */ 0, /* op_sel */ 0)
27022702
>;
2703+
2704+
def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
2705+
(i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), (S_AND_B32 $src2, (i32 31))), sub0))
2706+
>;
2707+
2708+
def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, (i32 ShiftAmt32Imm:$src2)),
2709+
(i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), $src2), sub0))
2710+
>;
2711+
27032712
} // isGFX9GFX10
27042713
} // end True16Predicate = NotHasTrue16BitInsts
27052714

@@ -2722,12 +2731,21 @@ def : GCNPat<(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (i32 ShiftAmt32Imm:
27222731
(i16 (EXTRACT_SUBREG VGPR_32:$src1, lo16)),
27232732
/* clamp */ 0, /* op_sel */ 0)>;
27242733

2725-
def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
2734+
def : GCNPat<(DivergentTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
27262735
(V_ALIGNBIT_B32_t16_e64 /* src0_modifiers */ 0, $src0,
27272736
/* src1_modifiers */ 0, $src1,
27282737
/* src2_modifiers */ 0,
27292738
(EXTRACT_SUBREG VGPR_32:$src2, lo16),
27302739
/* clamp */ 0, /* op_sel */ 0)>;
2740+
2741+
def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
2742+
(i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), (S_AND_B32 $src2, (i32 31))), sub0))
2743+
>;
2744+
2745+
def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, (i32 ShiftAmt32Imm:$src2)),
2746+
(i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), $src2), sub0))
2747+
>;
2748+
27312749
} // end True16Predicate = UseRealTrue16Insts
27322750

27332751
let True16Predicate = UseFakeTrue16Insts in {
@@ -2757,26 +2775,20 @@ def : GCNPat<(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (i32 ShiftAmt32Imm:
27572775
$src1, /* clamp */ 0, /* op_sel */ 0)
27582776
>;
27592777

2760-
def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
2778+
def : GCNPat<(DivergentTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
27612779
(V_ALIGNBIT_B32_fake16_e64 /* src0_modifiers */ 0, $src0,
27622780
/* src1_modifiers */ 0, $src1,
27632781
/* src2_modifiers */ 0,
27642782
$src2, /* clamp */ 0, /* op_sel */ 0)
27652783
>;
27662784

2767-
// The commented out code has been left intentionally to aid the review process, if needed.
2768-
// Will delete before landing.
2769-
//def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
2770-
// (S_OR_B32 (S_LSHR_B32 $src1, $src2), (S_LSHL_B32 $src0, (S_SUB_I32 (i32 32), $src2)))
2771-
//>;
2772-
27732785
def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
27742786
(i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), (S_AND_B32 $src2, (i32 31))), sub0))
27752787
>;
27762788

2777-
//def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, (i32 ShiftAmt32Imm:$src2)),
2778-
// (i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), $src2), sub0))
2779-
//>;
2789+
def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, (i32 ShiftAmt32Imm:$src2)),
2790+
(i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), $src2), sub0))
2791+
>;
27802792

27812793
} // end True16Predicate = UseFakeTrue16Insts
27822794

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,17 @@ class DivergentBinFrag<SDPatternOperator Op> : PatFrag <
207207
let GISelPredicateCode = [{return true;}];
208208
}
209209

210+
class DivergentTernaryFrag<SDPatternOperator Op> : PatFrag <
211+
(ops node:$src0, node:$src1, node:$src2),
212+
(Op $src0, $src1, $src2),
213+
[{ return N->isDivergent(); }]> {
214+
// This check is unnecessary as it's captured by the result register
215+
// bank constraint.
216+
//
217+
// FIXME: Should add a way for the emitter to recognize this is a
218+
// trivially true predicate to eliminate the check.
219+
let GISelPredicateCode = [{return true;}];
220+
}
210221

211222
let isMoveImm = 1 in {
212223
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {

0 commit comments

Comments
 (0)