Skip to content

Commit d6f7297

Browse files
committed
AMDGPU: don't call getSubtarget<GCNSubtarget> on R600 targets.
Unfortunately, `TargetMachine::getSubtarget<ST>` does an unchecked static_cast to `ST&`, which makes it easy to get wrong. The modifications here were created by running check-llvm with an assert added to getSubtarget. However, that asssert requires that RTTI is enabled, which LLVM doesn't use, so I've reverted the assert before sending this fix upstream.
1 parent 8252b49 commit d6f7297

File tree

2 files changed

+17
-11
lines changed

2 files changed

+17
-11
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -162,12 +162,16 @@ class AMDGPUInformationCache : public InformationCache {
162162

163163
/// Check if the subtarget has aperture regs.
164164
bool hasApertureRegs(Function &F) {
165+
if (!TM.getTargetTriple().isAMDGCN())
166+
return false;
165167
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
166168
return ST.hasApertureRegs();
167169
}
168170

169171
/// Check if the subtarget supports GetDoorbellID.
170172
bool supportsGetDoorbellID(Function &F) {
173+
if (!TM.getTargetTriple().isAMDGCN())
174+
return false;
171175
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
172176
return ST.supportsGetDoorbellID();
173177
}
@@ -182,18 +186,18 @@ class AMDGPUInformationCache : public InformationCache {
182186

183187
std::pair<unsigned, unsigned>
184188
getDefaultFlatWorkGroupSize(const Function &F) const {
185-
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
189+
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
186190
return ST.getDefaultFlatWorkGroupSize(F.getCallingConv());
187191
}
188192

189193
std::pair<unsigned, unsigned>
190194
getMaximumFlatWorkGroupRange(const Function &F) {
191-
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
195+
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
192196
return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
193197
}
194198

195199
SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {
196-
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
200+
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
197201
return ST.getMaxNumWorkGroups(F);
198202
}
199203

@@ -206,7 +210,7 @@ class AMDGPUInformationCache : public InformationCache {
206210
std::pair<unsigned, unsigned>
207211
getWavesPerEU(const Function &F,
208212
std::pair<unsigned, unsigned> FlatWorkGroupSize) {
209-
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
213+
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
210214
return ST.getWavesPerEU(FlatWorkGroupSize, getLDSSize(F), F);
211215
}
212216

@@ -217,7 +221,7 @@ class AMDGPUInformationCache : public InformationCache {
217221
if (!Val)
218222
return std::nullopt;
219223
if (!Val->second) {
220-
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
224+
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
221225
Val->second = ST.getMaxWavesPerEU();
222226
}
223227
return std::make_pair(Val->first, *(Val->second));
@@ -227,13 +231,13 @@ class AMDGPUInformationCache : public InformationCache {
227231
getEffectiveWavesPerEU(const Function &F,
228232
std::pair<unsigned, unsigned> WavesPerEU,
229233
std::pair<unsigned, unsigned> FlatWorkGroupSize) {
230-
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
234+
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
231235
return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize,
232236
getLDSSize(F));
233237
}
234238

235239
unsigned getMaxWavesPerEU(const Function &F) {
236-
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
240+
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
237241
return ST.getMaxWavesPerEU();
238242
}
239243

@@ -1511,9 +1515,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
15111515
A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));
15121516
}
15131517

1514-
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
1515-
if (!F->isDeclaration() && ST.hasClusters())
1516-
A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
1518+
if (TM.getTargetTriple().isAMDGCN()) {
1519+
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
1520+
if (!F->isDeclaration() && ST.hasClusters())
1521+
A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
1522+
}
15171523

15181524
for (auto &I : instructions(F)) {
15191525
Value *Ptr = nullptr;

llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ bool optimizeSection(ArrayRef<SmallVector<IntrinsicInst *, 4>> MergeableInsts) {
281281
}
282282

283283
static bool imageIntrinsicOptimizerImpl(Function &F, const TargetMachine *TM) {
284-
if (!TM)
284+
if (!TM || !TM->getTargetTriple().isAMDGCN())
285285
return false;
286286

287287
// This optimization only applies to GFX11 and beyond.

0 commit comments

Comments
 (0)