Skip to content

Commit 3604206

Browse files
committed
enabling pattern for all archs
1 parent 2de3d6b commit 3604206

23 files changed

+21228
-19067
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -791,6 +791,15 @@ def : GCNPat<
791791
(SI_CALL_ISEL $src0, (i64 0))
792792
>;
793793

794+
// Handle fshr with uniform inputs to map to scalar instructions
795+
def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
796+
(i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), (S_AND_B32 $src2, (i32 31))), sub0))
797+
>;
798+
799+
def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, (i32 ShiftAmt32Imm:$src2)),
800+
(i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), $src2), sub0))
801+
>;
802+
794803
// Wrapper around s_swappc_b64 with extra $callee parameter to track
795804
// the called function after regalloc.
796805
def SI_CALL : SPseudoInstSI <
@@ -2694,7 +2703,7 @@ def : GCNPat<pat,
26942703
$src1, /* clamp */ 0, /* op_sel */ 0)
26952704
>;
26962705

2697-
def : GCNPat<(DivergentTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
2706+
def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
26982707
(V_ALIGNBIT_B32_opsel_e64 /* src0_modifiers */ 0, $src0,
26992708
/* src1_modifiers */ 0, $src1,
27002709
/* src2_modifiers */ 0,
@@ -2723,7 +2732,7 @@ def : GCNPat<(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (i32 ShiftAmt32Imm:
27232732
(i16 (EXTRACT_SUBREG VGPR_32:$src1, lo16)),
27242733
/* clamp */ 0, /* op_sel */ 0)>;
27252734

2726-
def : GCNPat<(DivergentTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
2735+
def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
27272736
(V_ALIGNBIT_B32_t16_e64 /* src0_modifiers */ 0, $src0,
27282737
/* src1_modifiers */ 0, $src1,
27292738
/* src2_modifiers */ 0,
@@ -2759,7 +2768,7 @@ def : GCNPat<(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (i32 ShiftAmt32Imm:
27592768
$src1, /* clamp */ 0, /* op_sel */ 0)
27602769
>;
27612770

2762-
def : GCNPat<(DivergentTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
2771+
def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
27632772
(V_ALIGNBIT_B32_fake16_e64 /* src0_modifiers */ 0, $src0,
27642773
/* src1_modifiers */ 0, $src1,
27652774
/* src2_modifiers */ 0,
@@ -3854,15 +3863,8 @@ class PackB32Pat<Instruction inst> : GCNPat <
38543863
(inst $src0_mods, VGPR_32:$src0, $src1_mods, VGPR_32:$src1)
38553864
>;
38563865
}
3857-
let SubtargetPredicate = isGFX9Plus in {
3858-
def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
3859-
(i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), (S_AND_B32 $src2, (i32 31))), sub0))
3860-
>;
38613866

3862-
def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, (i32 ShiftAmt32Imm:$src2)),
3863-
(i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), $src2), sub0))
3864-
>;
3865-
3867+
let SubtargetPredicate = isGFX9Plus in {
38663868
let True16Predicate = NotHasTrue16BitInsts in
38673869
def : PackB32Pat<V_PACK_B32_F16_e64>;
38683870

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -207,18 +207,6 @@ class DivergentBinFrag<SDPatternOperator Op> : PatFrag <
207207
let GISelPredicateCode = [{return true;}];
208208
}
209209

210-
class DivergentTernaryFrag<SDPatternOperator Op> : PatFrag <
211-
(ops node:$src0, node:$src1, node:$src2),
212-
(Op $src0, $src1, $src2),
213-
[{ return N->isDivergent(); }]> {
214-
// This check is unnecessary as it's captured by the result register
215-
// bank constraint.
216-
//
217-
// FIXME: Should add a way for the emitter to recognize this is a
218-
// trivially true predicate to eliminate the check.
219-
let GISelPredicateCode = [{return true;}];
220-
}
221-
222210
let isMoveImm = 1 in {
223211
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
224212
def S_MOV_B32 : SOP1_32 <"s_mov_b32">;

0 commit comments

Comments
 (0)