Skip to content

Commit 9102ec6

Browse files
committed
Review feedback 2: Don't run AMDGPUAttributor for R600.
1 parent 4becdd9 commit 9102ec6

File tree

2 files changed

+21
-25
lines changed

2 files changed

+21
-25
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -162,16 +162,12 @@ class AMDGPUInformationCache : public InformationCache {
162162

163163
/// Check if the subtarget has aperture regs.
164164
bool hasApertureRegs(Function &F) {
165-
if (!TM.getTargetTriple().isAMDGCN())
166-
return false;
167165
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
168166
return ST.hasApertureRegs();
169167
}
170168

171169
/// Check if the subtarget supports GetDoorbellID.
172170
bool supportsGetDoorbellID(Function &F) {
173-
if (!TM.getTargetTriple().isAMDGCN())
174-
return false;
175171
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
176172
return ST.supportsGetDoorbellID();
177173
}
@@ -186,18 +182,18 @@ class AMDGPUInformationCache : public InformationCache {
186182

187183
std::pair<unsigned, unsigned>
188184
getDefaultFlatWorkGroupSize(const Function &F) const {
189-
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
185+
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
190186
return ST.getDefaultFlatWorkGroupSize(F.getCallingConv());
191187
}
192188

193189
std::pair<unsigned, unsigned>
194190
getMaximumFlatWorkGroupRange(const Function &F) {
195-
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
191+
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
196192
return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
197193
}
198194

199195
SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {
200-
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
196+
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
201197
return ST.getMaxNumWorkGroups(F);
202198
}
203199

@@ -210,7 +206,7 @@ class AMDGPUInformationCache : public InformationCache {
210206
std::pair<unsigned, unsigned>
211207
getWavesPerEU(const Function &F,
212208
std::pair<unsigned, unsigned> FlatWorkGroupSize) {
213-
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
209+
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
214210
return ST.getWavesPerEU(FlatWorkGroupSize, getLDSSize(F), F);
215211
}
216212

@@ -221,7 +217,7 @@ class AMDGPUInformationCache : public InformationCache {
221217
if (!Val)
222218
return std::nullopt;
223219
if (!Val->second) {
224-
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
220+
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
225221
Val->second = ST.getMaxWavesPerEU();
226222
}
227223
return std::make_pair(Val->first, *(Val->second));
@@ -231,13 +227,13 @@ class AMDGPUInformationCache : public InformationCache {
231227
getEffectiveWavesPerEU(const Function &F,
232228
std::pair<unsigned, unsigned> WavesPerEU,
233229
std::pair<unsigned, unsigned> FlatWorkGroupSize) {
234-
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
230+
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
235231
return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize,
236232
getLDSSize(F));
237233
}
238234

239235
unsigned getMaxWavesPerEU(const Function &F) {
240-
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
236+
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
241237
return ST.getMaxWavesPerEU();
242238
}
243239

@@ -1515,11 +1511,9 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
15151511
A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));
15161512
}
15171513

1518-
if (TM.getTargetTriple().isAMDGCN()) {
1519-
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
1520-
if (!F->isDeclaration() && ST.hasClusters())
1521-
A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
1522-
}
1514+
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
1515+
if (!F->isDeclaration() && ST.hasClusters())
1516+
A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
15231517

15241518
for (auto &I : instructions(F)) {
15251519
Value *Ptr = nullptr;

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -924,16 +924,18 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
924924
});
925925

926926
// FIXME: Why is AMDGPUAttributor not in CGSCC?
927-
PB.registerOptimizerLastEPCallback([this](ModulePassManager &MPM,
928-
OptimizationLevel Level,
929-
ThinOrFullLTOPhase Phase) {
930-
if (Level != OptimizationLevel::O0) {
931-
if (!isLTOPreLink(Phase)) {
932-
AMDGPUAttributorOptions Opts;
933-
MPM.addPass(AMDGPUAttributorPass(*this, Opts, Phase));
927+
if (getTargetTriple().isAMDGCN()) {
928+
PB.registerOptimizerLastEPCallback([this](ModulePassManager &MPM,
929+
OptimizationLevel Level,
930+
ThinOrFullLTOPhase Phase) {
931+
if (Level != OptimizationLevel::O0) {
932+
if (!isLTOPreLink(Phase)) {
933+
AMDGPUAttributorOptions Opts;
934+
MPM.addPass(AMDGPUAttributorPass(*this, Opts, Phase));
935+
}
934936
}
935-
}
936-
});
937+
});
938+
}
937939

938940
PB.registerFullLinkTimeOptimizationLastEPCallback(
939941
[this](ModulePassManager &PM, OptimizationLevel Level) {

0 commit comments

Comments
 (0)