Skip to content

Commit d08abf2

Browse files
committed
[AMDGPU] Implement IR variant of isFMAFasterThanFMulAndFAdd
Change-Id: I2484db303227da9aa53cc8842283c4ba6a332b3a
1 parent 0965515 commit d08abf2

File tree

3 files changed

+246
-0
lines changed

3 files changed

+246
-0
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5728,6 +5728,33 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
57285728
return false;
57295729
}
57305730

5731+
// Refer to comments added to the MIR variant of isFMAFasterThanFMulAndFAdd for
5732+
// specific details.
5733+
bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
5734+
Type *Ty) const {
5735+
SIModeRegisterDefaults Mode = SIModeRegisterDefaults(F, *Subtarget);
5736+
switch (Ty->getScalarSizeInBits()) {
5737+
case 32: {
5738+
if (!Subtarget->hasMadMacF32Insts())
5739+
return Subtarget->hasFastFMAF32();
5740+
5741+
if (Mode.FP32Denormals != DenormalMode::getPreserveSign())
5742+
return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts();
5743+
5744+
return Subtarget->hasFastFMAF32() && Subtarget->hasDLInsts();
5745+
}
5746+
case 64:
5747+
return true;
5748+
case 16:
5749+
return Subtarget->has16BitInsts() &&
5750+
Mode.FP64FP16Denormals != DenormalMode::getPreserveSign();
5751+
default:
5752+
break;
5753+
}
5754+
5755+
return false;
5756+
}
5757+
57315758
bool SITargetLowering::isFMADLegal(const MachineInstr &MI, LLT Ty) const {
57325759
if (!Ty.isScalar())
57335760
return false;
@@ -16942,6 +16969,37 @@ bool SITargetLowering::checkForPhysRegDependency(
1694216969
return false;
1694316970
}
1694416971

16972+
/// Check if it is profitable to hoist instruction in then/else to if.
16973+
/// Not profitable if I and it's user can form a FMA instruction
16974+
/// because we prefer FMSUB/FMADD.
16975+
bool SITargetLowering::isProfitableToHoist(Instruction *I) const {
16976+
if (!I->hasOneUse())
16977+
return true;
16978+
16979+
Instruction *User = I->user_back();
16980+
// TODO: Add more patterns that are not profitable to hoist
16981+
switch (I->getOpcode()) {
16982+
case Instruction::FMul: {
16983+
if (User->getOpcode() != Instruction::FSub &&
16984+
User->getOpcode() != Instruction::FAdd)
16985+
return true;
16986+
16987+
const TargetOptions &Options = getTargetMachine().Options;
16988+
const Function *F = I->getFunction();
16989+
const DataLayout &DL = F->getDataLayout();
16990+
Type *Ty = User->getOperand(0)->getType();
16991+
16992+
return !isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) ||
16993+
(Options.AllowFPOpFusion != FPOpFusion::Fast &&
16994+
!Options.UnsafeFPMath) ||
16995+
!isFMAFasterThanFMulAndFAdd(*F, Ty);
16996+
}
16997+
default:
16998+
return true;
16999+
}
17000+
return true;
17001+
}
17002+
1694517003
void SITargetLowering::emitExpandAtomicAddrSpacePredicate(
1694617004
Instruction *AI) const {
1694717005
// Given: atomicrmw fadd ptr %addr, float %val ordering

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
457457
EVT VT) const override;
458458
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
459459
const LLT Ty) const override;
460+
bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
460461
bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override;
461462
bool isFMADLegal(const MachineInstr &MI, const LLT Ty) const override;
462463

@@ -536,6 +537,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
536537
const TargetInstrInfo *TII, unsigned &PhysReg,
537538
int &Cost) const override;
538539

540+
bool isProfitableToHoist(Instruction *I) const override;
541+
539542
bool isKnownNeverNaNForTargetNode(SDValue Op,
540543
const SelectionDAG &DAG,
541544
bool SNaN = false,
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes=simplifycfg -verify-machineinstrs -hoist-common-insts=true -mtriple=amdgcn-- -mcpu=gfx1030 -fp-contract=fast < %s | FileCheck -check-prefix=GFX -check-prefix=GFX-FP-CONTRACT %s
3+
; RUN: opt -S -passes=simplifycfg -verify-machineinstrs -hoist-common-insts=true -mtriple=amdgcn-- -mcpu=gfx1030 -enable-unsafe-fp-math --denormal-fp-math=ieee < %s | FileCheck -check-prefix=GFX -check-prefix=GFX-UNSAFE-FP-IEEE %s
4+
; RUN: opt -S -passes=simplifycfg -verify-machineinstrs -hoist-common-insts=true -mtriple=amdgcn-- -mcpu=gfx1030 -enable-unsafe-fp-math --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX -check-prefix=GFX-UNSAFE-FP-PRESERVE %s
5+
6+
define double @_branch(ptr dereferenceable(8) %x, ptr dereferenceable(8) %y, ptr dereferenceable(8) %a) #0 {
7+
; GFX-LABEL: define double @_branch(
8+
; GFX-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
9+
; GFX-NEXT: [[ENTRY:.*:]]
10+
; GFX-NEXT: [[TMP0:%.*]] = load double, ptr [[Y]], align 8
11+
; GFX-NEXT: [[CMP:%.*]] = fcmp oeq double [[TMP0]], 0.000000e+00
12+
; GFX-NEXT: [[TMP1:%.*]] = load double, ptr [[X]], align 8
13+
; GFX-NEXT: [[TMP2:%.*]] = load double, ptr [[A]], align 8
14+
; GFX-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
15+
; GFX: [[COMMON_RET:.*]]:
16+
; GFX-NEXT: [[COMMON_RET_OP:%.*]] = phi double [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
17+
; GFX-NEXT: ret double [[COMMON_RET_OP]]
18+
; GFX: [[IF_THEN]]:
19+
; GFX-NEXT: [[MUL]] = fmul fast double [[TMP1]], [[TMP2]]
20+
; GFX-NEXT: [[ADD:%.*]] = fadd fast double 1.000000e+00, [[MUL]]
21+
; GFX-NEXT: br label %[[COMMON_RET]]
22+
; GFX: [[IF_ELSE]]:
23+
; GFX-NEXT: [[MUL1:%.*]] = fmul fast double [[TMP1]], [[TMP2]]
24+
; GFX-NEXT: [[SUB]] = fsub fast double [[MUL1]], [[TMP0]]
25+
; GFX-NEXT: br label %[[COMMON_RET]]
26+
;
27+
entry:
28+
%0 = load double, ptr %y, align 8
29+
%cmp = fcmp oeq double %0, 0.000000e+00
30+
%1 = load double, ptr %x, align 8
31+
br i1 %cmp, label %if.then, label %if.else
32+
33+
if.then: ; preds = %entry
34+
%2 = load double, ptr %a, align 8
35+
%mul = fmul fast double %1, %2
36+
%add = fadd fast double 1.000000e+00, %mul
37+
ret double %mul
38+
39+
if.else: ; preds = %entry
40+
%3 = load double, ptr %a, align 8
41+
%mul1 = fmul fast double %1, %3
42+
%sub = fsub fast double %mul1, %0
43+
ret double %sub
44+
}
45+
46+
define float @_branch2(ptr dereferenceable(8) %x, ptr dereferenceable(8) %y, ptr dereferenceable(8) %a) #0 {
47+
; GFX-LABEL: define float @_branch2(
48+
; GFX-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
49+
; GFX-NEXT: [[ENTRY:.*:]]
50+
; GFX-NEXT: [[TMP0:%.*]] = load float, ptr [[Y]], align 8
51+
; GFX-NEXT: [[CMP:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00
52+
; GFX-NEXT: [[TMP1:%.*]] = load float, ptr [[X]], align 8
53+
; GFX-NEXT: [[TMP2:%.*]] = load float, ptr [[A]], align 8
54+
; GFX-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
55+
; GFX: [[COMMON_RET:.*]]:
56+
; GFX-NEXT: [[COMMON_RET_OP:%.*]] = phi float [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
57+
; GFX-NEXT: ret float [[COMMON_RET_OP]]
58+
; GFX: [[IF_THEN]]:
59+
; GFX-NEXT: [[MUL]] = fmul fast float [[TMP1]], [[TMP2]]
60+
; GFX-NEXT: [[ADD:%.*]] = fadd fast float 1.000000e+00, [[MUL]]
61+
; GFX-NEXT: br label %[[COMMON_RET]]
62+
; GFX: [[IF_ELSE]]:
63+
; GFX-NEXT: [[MUL1:%.*]] = fmul fast float [[TMP1]], [[TMP2]]
64+
; GFX-NEXT: [[SUB]] = fsub fast float [[MUL1]], [[TMP0]]
65+
; GFX-NEXT: br label %[[COMMON_RET]]
66+
;
67+
entry:
68+
%0 = load float, ptr %y, align 8
69+
%cmp = fcmp oeq float %0, 0.000000e+00
70+
%1 = load float, ptr %x, align 8
71+
br i1 %cmp, label %if.then, label %if.else
72+
73+
74+
if.then: ; preds = %entry
75+
%2 = load float, ptr %a, align 8
76+
%mul = fmul fast float %1, %2
77+
%add = fadd fast float 1.000000e+00, %mul
78+
ret float %mul
79+
80+
if.else: ; preds = %entry
81+
%3 = load float, ptr %a, align 8
82+
%mul1 = fmul fast float %1, %3
83+
%sub = fsub fast float %mul1, %0
84+
ret float %sub
85+
}
86+
87+
define half @_branch3(ptr dereferenceable(8) %x, ptr dereferenceable(8) %y, ptr dereferenceable(8) %a) #0 {
88+
; GFX-CONTRACT-LABEL: define half @_branchr32(
89+
; GFX-CONTRACT-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
90+
; GFX-CONTRACT-NEXT: [[ENTRY:.*:]]
91+
; GFX-CONTRACT-NEXT: [[TMP0:%.*]] = load half, ptr [[Y]], align 8
92+
; GFX-CONTRACT-NEXT: [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
93+
; GFX-CONTRACT-NEXT: [[TMP1:%.*]] = load half, ptr [[X]], align 8
94+
; GFX-CONTRACT-NEXT: [[TMP2:%.*]] = load half, ptr [[A]], align 8
95+
; GFX-CONTRACT-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
96+
; GFX-CONTRACT: [[COMMON_RET:.*]]:
97+
; GFX-CONTRACT-NEXT: [[COMMON_RET_OP:%.*]] = phi half [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
98+
; GFX-CONTRACT-NEXT: ret half [[COMMON_RET_OP]]
99+
; GFX-CONTRACT: [[IF_THEN]]:
100+
; GFX-CONTRACT-NEXT: [[MUL]] = fmul fast half [[TMP1]], [[TMP2]]
101+
; GFX-CONTRACT-NEXT: [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
102+
; GFX-CONTRACT-NEXT: br label %[[COMMON_RET]]
103+
; GFX-CONTRACT: [[IF_ELSE]]:
104+
; GFX-CONTRACT-NEXT: [[MUL1:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
105+
; GFX-CONTRACT-NEXT: [[SUB]] = fsub fast half [[MUL1]], [[TMP0]]
106+
; GFX-CONTRACT-NEXT: br label %[[COMMON_RET]]
107+
;
108+
; GFX-FP-CONTRACT-LABEL: define half @_branch3(
109+
; GFX-FP-CONTRACT-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
110+
; GFX-FP-CONTRACT-NEXT: [[ENTRY:.*:]]
111+
; GFX-FP-CONTRACT-NEXT: [[TMP0:%.*]] = load half, ptr [[Y]], align 8
112+
; GFX-FP-CONTRACT-NEXT: [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
113+
; GFX-FP-CONTRACT-NEXT: [[TMP1:%.*]] = load half, ptr [[X]], align 8
114+
; GFX-FP-CONTRACT-NEXT: [[TMP2:%.*]] = load half, ptr [[A]], align 8
115+
; GFX-FP-CONTRACT-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
116+
; GFX-FP-CONTRACT: [[COMMON_RET:.*]]:
117+
; GFX-FP-CONTRACT-NEXT: [[COMMON_RET_OP:%.*]] = phi half [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
118+
; GFX-FP-CONTRACT-NEXT: ret half [[COMMON_RET_OP]]
119+
; GFX-FP-CONTRACT: [[IF_THEN]]:
120+
; GFX-FP-CONTRACT-NEXT: [[MUL]] = fmul fast half [[TMP1]], [[TMP2]]
121+
; GFX-FP-CONTRACT-NEXT: [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
122+
; GFX-FP-CONTRACT-NEXT: br label %[[COMMON_RET]]
123+
; GFX-FP-CONTRACT: [[IF_ELSE]]:
124+
; GFX-FP-CONTRACT-NEXT: [[MUL1:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
125+
; GFX-FP-CONTRACT-NEXT: [[SUB]] = fsub fast half [[MUL1]], [[TMP0]]
126+
; GFX-FP-CONTRACT-NEXT: br label %[[COMMON_RET]]
127+
;
128+
; GFX-UNSAFE-FP-IEEE-LABEL: define half @_branch3(
129+
; GFX-UNSAFE-FP-IEEE-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
130+
; GFX-UNSAFE-FP-IEEE-NEXT: [[ENTRY:.*:]]
131+
; GFX-UNSAFE-FP-IEEE-NEXT: [[TMP0:%.*]] = load half, ptr [[Y]], align 8
132+
; GFX-UNSAFE-FP-IEEE-NEXT: [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
133+
; GFX-UNSAFE-FP-IEEE-NEXT: [[TMP1:%.*]] = load half, ptr [[X]], align 8
134+
; GFX-UNSAFE-FP-IEEE-NEXT: [[TMP2:%.*]] = load half, ptr [[A]], align 8
135+
; GFX-UNSAFE-FP-IEEE-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
136+
; GFX-UNSAFE-FP-IEEE: [[COMMON_RET:.*]]:
137+
; GFX-UNSAFE-FP-IEEE-NEXT: [[COMMON_RET_OP:%.*]] = phi half [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
138+
; GFX-UNSAFE-FP-IEEE-NEXT: ret half [[COMMON_RET_OP]]
139+
; GFX-UNSAFE-FP-IEEE: [[IF_THEN]]:
140+
; GFX-UNSAFE-FP-IEEE-NEXT: [[MUL]] = fmul fast half [[TMP1]], [[TMP2]]
141+
; GFX-UNSAFE-FP-IEEE-NEXT: [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
142+
; GFX-UNSAFE-FP-IEEE-NEXT: br label %[[COMMON_RET]]
143+
; GFX-UNSAFE-FP-IEEE: [[IF_ELSE]]:
144+
; GFX-UNSAFE-FP-IEEE-NEXT: [[MUL1:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
145+
; GFX-UNSAFE-FP-IEEE-NEXT: [[SUB]] = fsub fast half [[MUL1]], [[TMP0]]
146+
; GFX-UNSAFE-FP-IEEE-NEXT: br label %[[COMMON_RET]]
147+
;
148+
; GFX-UNSAFE-FP-PRESERVE-LABEL: define half @_branch3(
149+
; GFX-UNSAFE-FP-PRESERVE-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
150+
; GFX-UNSAFE-FP-PRESERVE-NEXT: [[ENTRY:.*:]]
151+
; GFX-UNSAFE-FP-PRESERVE-NEXT: [[TMP0:%.*]] = load half, ptr [[Y]], align 8
152+
; GFX-UNSAFE-FP-PRESERVE-NEXT: [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
153+
; GFX-UNSAFE-FP-PRESERVE-NEXT: [[TMP1:%.*]] = load half, ptr [[X]], align 8
154+
; GFX-UNSAFE-FP-PRESERVE-NEXT: [[TMP2:%.*]] = load half, ptr [[A]], align 8
155+
; GFX-UNSAFE-FP-PRESERVE-NEXT: [[MUL:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
156+
; GFX-UNSAFE-FP-PRESERVE-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
157+
; GFX-UNSAFE-FP-PRESERVE: [[COMMON_RET:.*]]:
158+
; GFX-UNSAFE-FP-PRESERVE-NEXT: [[COMMON_RET_OP:%.*]] = phi half [ [[MUL]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
159+
; GFX-UNSAFE-FP-PRESERVE-NEXT: ret half [[COMMON_RET_OP]]
160+
; GFX-UNSAFE-FP-PRESERVE: [[IF_THEN]]:
161+
; GFX-UNSAFE-FP-PRESERVE-NEXT: [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
162+
; GFX-UNSAFE-FP-PRESERVE-NEXT: br label %[[COMMON_RET]]
163+
; GFX-UNSAFE-FP-PRESERVE: [[IF_ELSE]]:
164+
; GFX-UNSAFE-FP-PRESERVE-NEXT: [[SUB]] = fsub fast half [[MUL]], [[TMP0]]
165+
; GFX-UNSAFE-FP-PRESERVE-NEXT: br label %[[COMMON_RET]]
166+
;
167+
entry:
168+
%0 = load half, ptr %y, align 8
169+
%cmp = fcmp oeq half %0, 0.000000e+00
170+
%1 = load half, ptr %x, align 8
171+
br i1 %cmp, label %if.then, label %if.else
172+
173+
if.then: ; preds = %entry
174+
%2 = load half, ptr %a, align 8
175+
%mul = fmul fast half %1, %2
176+
%add = fadd fast half 1.000000e+00, %mul
177+
ret half %mul
178+
179+
if.else: ; preds = %entry
180+
%3 = load half, ptr %a, align 8
181+
%mul1 = fmul fast half %1, %3
182+
%sub = fsub fast half %mul1, %0
183+
ret half %sub
184+
}
185+

0 commit comments

Comments
 (0)