Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -2694,12 +2694,13 @@ def : GCNPat<pat,
$src1, /* clamp */ 0, /* op_sel */ 0)
>;

def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
def : GCNPat<(DivergentTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
(V_ALIGNBIT_B32_opsel_e64 /* src0_modifiers */ 0, $src0,
/* src1_modifiers */ 0, $src1,
/* src2_modifiers */ 0,
$src2, /* clamp */ 0, /* op_sel */ 0)
>;

} // isGFX9GFX10
} // end True16Predicate = NotHasTrue16BitInsts

Expand All @@ -2722,12 +2723,13 @@ def : GCNPat<(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (i32 ShiftAmt32Imm:
(i16 (EXTRACT_SUBREG VGPR_32:$src1, lo16)),
/* clamp */ 0, /* op_sel */ 0)>;

def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
def : GCNPat<(DivergentTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
(V_ALIGNBIT_B32_t16_e64 /* src0_modifiers */ 0, $src0,
/* src1_modifiers */ 0, $src1,
/* src2_modifiers */ 0,
(EXTRACT_SUBREG VGPR_32:$src2, lo16),
/* clamp */ 0, /* op_sel */ 0)>;

} // end True16Predicate = UseRealTrue16Insts

let True16Predicate = UseFakeTrue16Insts in {
Expand Down Expand Up @@ -2757,12 +2759,13 @@ def : GCNPat<(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (i32 ShiftAmt32Imm:
$src1, /* clamp */ 0, /* op_sel */ 0)
>;

def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
def : GCNPat<(DivergentTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
(V_ALIGNBIT_B32_fake16_e64 /* src0_modifiers */ 0, $src0,
/* src1_modifiers */ 0, $src1,
/* src2_modifiers */ 0,
$src2, /* clamp */ 0, /* op_sel */ 0)
>;

} // end True16Predicate = UseFakeTrue16Insts

/********** ====================== **********/
Expand Down Expand Up @@ -3852,6 +3855,14 @@ class PackB32Pat<Instruction inst> : GCNPat <
>;
}
let SubtargetPredicate = isGFX9Plus in {
def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, i32:$src2),
(i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), (S_AND_B32 $src2, (i32 31))), sub0))
>;

def : GCNPat<(UniformTernaryFrag<fshr> i32:$src0, i32:$src1, (i32 ShiftAmt32Imm:$src2)),
(i32 (EXTRACT_SUBREG (S_LSHR_B64 (REG_SEQUENCE SReg_64, $src1, sub0, $src0, sub1), $src2), sub0))
>;

let True16Predicate = NotHasTrue16BitInsts in
def : PackB32Pat<V_PACK_B32_F16_e64>;

Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/AMDGPU/SOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,17 @@ class DivergentBinFrag<SDPatternOperator Op> : PatFrag <
let GISelPredicateCode = [{return true;}];
}

class DivergentTernaryFrag<SDPatternOperator Op> : PatFrag <
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you check whether this is reqlly required? I.e. do any tests fail if you remove this from the V_ALIGNBIT patterns?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No tests fail if I remove DivergentTernaryFrag from the V_ALIGNBIT patterns.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then I suggest removing this from the patch.

(ops node:$src0, node:$src1, node:$src2),
(Op $src0, $src1, $src2),
[{ return N->isDivergent(); }]> {
// This check is unnecessary as it's captured by the result register
// bank constraint.
//
// FIXME: Should add a way for the emitter to recognize this is a
// trivially true predicate to eliminate the check.
let GISelPredicateCode = [{return true;}];
}

let isMoveImm = 1 in {
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
Expand Down
Loading
Loading