Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 30 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -2694,12 +2694,21 @@ def : GCNPat<pat,
$src1, /* clamp */ 0, /* op_sel */ 0)
>;

def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
def : GCNPat<(DivergentTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
(V_ALIGNBIT_B32_opsel_e64 /* src0_modifiers */ 0, $src0,
/* src1_modifiers */ 0, $src1,
/* src2_modifiers */ 0,
$src2, /* clamp */ 0, /* op_sel */ 0)
>;

def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
(i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), (S_AND_B32 $src2, (i32 31))), sub0))
>;

def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, (i32 ShiftAmt32Imm:$src2)),
(i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), $src2), sub0))
>;

} // isGFX9GFX10
} // end True16Predicate = NotHasTrue16BitInsts

Expand All @@ -2722,12 +2731,21 @@ def : GCNPat<(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (i32 ShiftAmt32Imm:
(i16 (EXTRACT_SUBREG VGPR_32:$src1, lo16)),
/* clamp */ 0, /* op_sel */ 0)>;

def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
def : GCNPat<(DivergentTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
(V_ALIGNBIT_B32_t16_e64 /* src0_modifiers */ 0, $src0,
/* src1_modifiers */ 0, $src1,
/* src2_modifiers */ 0,
(EXTRACT_SUBREG VGPR_32:$src2, lo16),
/* clamp */ 0, /* op_sel */ 0)>;

def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
(i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), (S_AND_B32 $src2, (i32 31))), sub0))
>;

def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, (i32 ShiftAmt32Imm:$src2)),
(i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), $src2), sub0))
>;

} // end True16Predicate = UseRealTrue16Insts

let True16Predicate = UseFakeTrue16Insts in {
Expand Down Expand Up @@ -2757,12 +2775,21 @@ def : GCNPat<(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (i32 ShiftAmt32Imm:
$src1, /* clamp */ 0, /* op_sel */ 0)
>;

def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
def : GCNPat<(DivergentTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
(V_ALIGNBIT_B32_fake16_e64 /* src0_modifiers */ 0, $src0,
/* src1_modifiers */ 0, $src1,
/* src2_modifiers */ 0,
$src2, /* clamp */ 0, /* op_sel */ 0)
>;

def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
(i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), (S_AND_B32 $src2, (i32 31))), sub0))
>;

def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, (i32 ShiftAmt32Imm:$src2)),
(i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), $src2), sub0))
>;

} // end True16Predicate = UseFakeTrue16Insts

/********** ====================== **********/
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/AMDGPU/SOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,17 @@ class DivergentBinFrag<SDPatternOperator Op> : PatFrag <
let GISelPredicateCode = [{return true;}];
}

class DivergentTernaryFrag<SDPatternOperator Op> : PatFrag <
(ops node:$src0, node:$src1, node:$src2),
(Op $src0, $src1, $src2),
[{ return N->isDivergent(); }]> {
// This check is unnecessary as it's captured by the result register
// bank constraint.
//
// FIXME: Should add a way for the emitter to recognize this is a
// trivially true predicate to eliminate the check.
let GISelPredicateCode = [{return true;}];
}

let isMoveImm = 1 in {
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
Expand Down
Loading