Skip to content

Commit 7aee700

Browse files
petar-avramovicmemfrob
authored andcommitted
AMDGPU/GlobalISel: Lower G_FREM
Add custom lower for G_FREM. Differential Revision: https://reviews.llvm.org/D84324
1 parent eeeb2dc commit 7aee700

File tree

4 files changed

+1226
-0
lines changed

4 files changed

+1226
-0
lines changed

llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1539,6 +1539,13 @@ class MachineIRBuilder {
15391539
return buildInstr(TargetOpcode::G_FSUB, {Dst}, {Src0, Src1}, Flags);
15401540
}
15411541

1542+
/// Build and insert \p Res = G_FDIV \p Op0, \p Op1
1543+
MachineInstrBuilder buildFDiv(const DstOp &Dst, const SrcOp &Src0,
1544+
const SrcOp &Src1,
1545+
Optional<unsigned> Flags = None) {
1546+
return buildInstr(TargetOpcode::G_FDIV, {Dst}, {Src0, Src1}, Flags);
1547+
}
1548+
15421549
/// Build and insert \p Res = G_FMA \p Op0, \p Op1, \p Op2
15431550
MachineInstrBuilder buildFMA(const DstOp &Dst, const SrcOp &Src0,
15441551
const SrcOp &Src1, const SrcOp &Src2,

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
708708
FMad.scalarize(0)
709709
.lower();
710710

711+
auto &FRem = getActionDefinitionsBuilder(G_FREM);
712+
if (ST.has16BitInsts()) {
713+
FRem.customFor({S16, S32, S64});
714+
} else {
715+
FRem.minScalar(0, S32)
716+
.customFor({S32, S64});
717+
}
718+
FRem.scalarize(0);
719+
711720
// TODO: Do we need to clamp maximum bitwidth?
712721
getActionDefinitionsBuilder(G_TRUNC)
713722
.legalIf(isScalar(0))
@@ -1601,6 +1610,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
16011610
return legalizeFrint(MI, MRI, B);
16021611
case TargetOpcode::G_FCEIL:
16031612
return legalizeFceil(MI, MRI, B);
1613+
case TargetOpcode::G_FREM:
1614+
return legalizeFrem(MI, MRI, B);
16041615
case TargetOpcode::G_INTRINSIC_TRUNC:
16051616
return legalizeIntrinsicTrunc(MI, MRI, B);
16061617
case TargetOpcode::G_SITOFP:
@@ -1869,6 +1880,23 @@ bool AMDGPULegalizerInfo::legalizeFceil(
18691880
return true;
18701881
}
18711882

1883+
bool AMDGPULegalizerInfo::legalizeFrem(
1884+
MachineInstr &MI, MachineRegisterInfo &MRI,
1885+
MachineIRBuilder &B) const {
1886+
Register DstReg = MI.getOperand(0).getReg();
1887+
Register Src0Reg = MI.getOperand(1).getReg();
1888+
Register Src1Reg = MI.getOperand(2).getReg();
1889+
auto Flags = MI.getFlags();
1890+
LLT Ty = MRI.getType(DstReg);
1891+
1892+
auto Div = B.buildFDiv(Ty, Src0Reg, Src1Reg, Flags);
1893+
auto Trunc = B.buildIntrinsicTrunc(Ty, Div, Flags);
1894+
auto Neg = B.buildFNeg(Ty, Trunc, Flags);
1895+
B.buildFMA(DstReg, Neg, Src1Reg, Src0Reg, Flags);
1896+
MI.eraseFromParent();
1897+
return true;
1898+
}
1899+
18721900
static MachineInstrBuilder extractF64Exponent(Register Hi,
18731901
MachineIRBuilder &B) {
18741902
const unsigned FractBits = 52;

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
4444
MachineIRBuilder &B) const;
4545
bool legalizeFceil(MachineInstr &MI, MachineRegisterInfo &MRI,
4646
MachineIRBuilder &B) const;
47+
bool legalizeFrem(MachineInstr &MI, MachineRegisterInfo &MRI,
48+
MachineIRBuilder &B) const;
4749
bool legalizeIntrinsicTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
4850
MachineIRBuilder &B) const;
4951
bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI,

0 commit comments

Comments
 (0)