Skip to content

Commit 26b0bef

Browse files
authored
AMDGPU: Use pattern to select instruction for intrinsic llvm.fptrunc.round (#105761)
Use GCNPat instead of Custom Lowering to select instructions for intrinsic llvm.fptrunc.round. "SupportedRoundMode : TImmLeaf" is used as a predicate to select only when the rounding mode is supported. "as_hw_round_mode : SDNodeXForm" is developed to translate the round modes to the corresponding ones that hardware recognizes.
1 parent 22ba351 commit 26b0bef

16 files changed

+128
-161
lines changed

llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ def : GINodeEquiv<G_FFLOOR, ffloor>;
161161
def : GINodeEquiv<G_FRINT, frint>;
162162
def : GINodeEquiv<G_FNEARBYINT, fnearbyint>;
163163
def : GINodeEquiv<G_INTRINSIC_TRUNC, ftrunc>;
164+
def : GINodeEquiv<G_INTRINSIC_FPTRUNC_ROUND, fptrunc_round>;
164165
def : GINodeEquiv<G_INTRINSIC_ROUND, fround>;
165166
def : GINodeEquiv<G_INTRINSIC_ROUNDEVEN, froundeven>;
166167
def : GINodeEquiv<G_INTRINSIC_LRINT, lrint>;

llvm/include/llvm/Target/TargetSelectionDAG.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,9 @@ def SDTFPUnaryOp : SDTypeProfile<1, 1, [ // fneg, fsqrt, etc
158158
def SDTFPRoundOp : SDTypeProfile<1, 1, [ // fpround
159159
SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>
160160
]>;
161+
def SDTFPTruncRoundOp : SDTypeProfile<1, 2, [
162+
SDTCisFP<0>, SDTCisFP<1>, SDTCisInt<2>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>
163+
]>;
161164
def SDTFPExtendOp : SDTypeProfile<1, 1, [ // fpextend
162165
SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1>
163166
]>;
@@ -552,6 +555,8 @@ def llround : SDNode<"ISD::LLROUND" , SDTFPToIntOp>;
552555
def lrint : SDNode<"ISD::LRINT" , SDTFPToIntOp>;
553556
def llrint : SDNode<"ISD::LLRINT" , SDTFPToIntOp>;
554557

558+
def fptrunc_round : SDNode<"ISD::FPTRUNC_ROUND", SDTFPTruncRoundOp>;
559+
555560
def fpround : SDNode<"ISD::FP_ROUND" , SDTFPRoundOp>;
556561
def fpextend : SDNode<"ISD::FP_EXTEND" , SDTFPExtendOp>;
557562
def fcopysign : SDNode<"ISD::FCOPYSIGN" , SDTFPSignOp>;

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,8 +297,6 @@ def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_UBYTE, SIsbuffer_load_ubyte>;
297297
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_SSHORT, SIsbuffer_load_short>;
298298
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_USHORT, SIsbuffer_load_ushort>;
299299

300-
def : GINodeEquiv<G_FPTRUNC_ROUND, SIfptrunc_round>;
301-
302300
class GISelSop2Pat <
303301
SDPatternOperator node,
304302
Instruction inst,
@@ -419,3 +417,6 @@ def gi_frameindex_to_targetframeindex : GICustomOperandRenderer<"renderFrameInde
419417

420418
def gi_fp_pow2_to_exponent : GICustomOperandRenderer<"renderFPPow2ToExponent">,
421419
GISDNodeXFormEquiv<FPPow2ToExponentXForm>;
420+
421+
def gi_as_hw_round_mode : GICustomOperandRenderer<"renderRoundMode">,
422+
GISDNodeXFormEquiv<as_hw_round_mode>;

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5511,7 +5511,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
55115511
NODE_NAME_CASE(CONST_DATA_PTR)
55125512
NODE_NAME_CASE(PC_ADD_REL_OFFSET)
55135513
NODE_NAME_CASE(LDS)
5514-
NODE_NAME_CASE(FPTRUNC_ROUND)
55155514
NODE_NAME_CASE(DUMMY_CHAIN)
55165515
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
55175516
NODE_NAME_CASE(LOAD_D16_HI)

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,6 @@ enum NodeType : unsigned {
553553
CONST_DATA_PTR,
554554
PC_ADD_REL_OFFSET,
555555
LDS,
556-
FPTRUNC_ROUND,
557556

558557
DUMMY_CHAIN,
559558
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5594,6 +5594,16 @@ void AMDGPUInstructionSelector::renderFPPow2ToExponent(MachineInstrBuilder &MIB,
55945594
MIB.addImm(ExpVal);
55955595
}
55965596

5597+
void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB,
5598+
const MachineInstr &MI,
5599+
int OpIdx) const {
5600+
// "round.towardzero" -> TowardZero 0 -> FP_ROUND_ROUND_TO_ZERO 3
5601+
// "round.tonearest" -> NearestTiesToEven 1 -> FP_ROUND_ROUND_TO_NEAREST 0
5602+
// "round.upward" -> TowardPositive 2 -> FP_ROUND_ROUND_TO_INF 1
5603+
// "round.downward -> TowardNegative 3 -> FP_ROUND_ROUND_TO_NEGINF 2
5604+
MIB.addImm((MI.getOperand(OpIdx).getImm() + 3) % 4);
5605+
}
5606+
55975607
bool AMDGPUInstructionSelector::isInlineImmediate(const APInt &Imm) const {
55985608
return TII.isInlineConstant(Imm);
55995609
}

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,9 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
359359
void renderFPPow2ToExponent(MachineInstrBuilder &MIB, const MachineInstr &MI,
360360
int OpIdx) const;
361361

362+
void renderRoundMode(MachineInstrBuilder &MIB, const MachineInstr &MI,
363+
int OpIdx) const;
364+
362365
bool isInlineImmediate(const APInt &Imm) const;
363366
bool isInlineImmediate(const APFloat &Imm) const;
364367

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1137,7 +1137,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
11371137
.lower();
11381138

11391139
getActionDefinitionsBuilder(G_INTRINSIC_FPTRUNC_ROUND)
1140-
.customFor({S16, S32})
1140+
.legalFor({S16, S32})
11411141
.scalarize(0)
11421142
.lower();
11431143

@@ -2179,8 +2179,6 @@ bool AMDGPULegalizerInfo::legalizeCustom(
21792179
return legalizeCTLZ_CTTZ(MI, MRI, B);
21802180
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
21812181
return legalizeCTLZ_ZERO_UNDEF(MI, MRI, B);
2182-
case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND:
2183-
return legalizeFPTruncRound(MI, B);
21842182
case TargetOpcode::G_STACKSAVE:
21852183
return legalizeStackSave(MI, B);
21862184
case TargetOpcode::G_GET_FPENV:
@@ -7093,35 +7091,6 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
70937091
return true;
70947092
}
70957093

7096-
bool AMDGPULegalizerInfo::legalizeFPTruncRound(MachineInstr &MI,
7097-
MachineIRBuilder &B) const {
7098-
MachineRegisterInfo &MRI = *B.getMRI();
7099-
Register Src = MI.getOperand(1).getReg();
7100-
if (MRI.getType(Src) != LLT::scalar(32))
7101-
return false;
7102-
7103-
// Only support towardzero, tonearest, upward and downward.
7104-
int RoundMode = MI.getOperand(2).getImm();
7105-
if (RoundMode != (int)RoundingMode::TowardZero &&
7106-
RoundMode != (int)RoundingMode::NearestTiesToEven &&
7107-
RoundMode != (int)RoundingMode::TowardPositive &&
7108-
RoundMode != (int)RoundingMode::TowardNegative)
7109-
return false;
7110-
7111-
// "round.towardzero" -> TowardZero 0 -> FP_ROUND_ROUND_TO_ZERO 3
7112-
// "round.tonearest" -> NearestTiesToEven 1 -> FP_ROUND_ROUND_TO_NEAREST 0
7113-
// "round.upward" -> TowardPositive 2 -> FP_ROUND_ROUND_TO_INF 1
7114-
// "round.downward -> TowardNegative 3 -> FP_ROUND_ROUND_TO_NEGINF 2
7115-
unsigned HW_Mode = (RoundMode + 3) % 4;
7116-
B.buildInstr(AMDGPU::G_FPTRUNC_ROUND)
7117-
.addDef(MI.getOperand(0).getReg())
7118-
.addUse(Src)
7119-
.addImm(HW_Mode);
7120-
7121-
MI.eraseFromParent();
7122-
return true;
7123-
}
7124-
71257094
bool AMDGPULegalizerInfo::legalizeStackSave(MachineInstr &MI,
71267095
MachineIRBuilder &B) const {
71277096
const SITargetLowering *TLI = ST.getTargetLowering();

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,6 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
212212

213213
bool legalizeBVHIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;
214214

215-
bool legalizeFPTruncRound(MachineInstr &MI, MachineIRBuilder &B) const;
216215
bool legalizeStackSave(MachineInstr &MI, MachineIRBuilder &B) const;
217216
bool legalizeWaveID(MachineInstr &MI, MachineIRBuilder &B) const;
218217

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5255,7 +5255,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
52555255
OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
52565256
break;
52575257
}
5258-
case AMDGPU::G_FPTRUNC_ROUND:
5258+
case AMDGPU::G_INTRINSIC_FPTRUNC_ROUND:
52595259
return getDefaultMappingVOP(MI);
52605260
case AMDGPU::G_PREFETCH:
52615261
OpdsMapping[0] = getSGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);

0 commit comments

Comments
 (0)