AMDGPU: don't call getSubtarget<GCNSubtarget> on R600 targets.

jyknight · jyknight · commit d6f729761c77 · 2025-10-06T20:48:54.000-04:00
Unfortunately, `TargetMachine::getSubtarget&lt;ST&gt;` does an unchecked
static_cast to `ST&amp;`, which makes it easy to get wrong.

The modifications here were created by running check-llvm with an
assert added to getSubtarget. However, that asssert requires that RTTI
is enabled, which LLVM doesn't use, so I've reverted the assert before
sending this fix upstream.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -162,12 +162,16 @@ class AMDGPUInformationCache : public InformationCache {
 
   /// Check if the subtarget has aperture regs.
   bool hasApertureRegs(Function &F) {
+    if (!TM.getTargetTriple().isAMDGCN())
+      return false;
     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
     return ST.hasApertureRegs();
   }
 
   /// Check if the subtarget supports GetDoorbellID.
   bool supportsGetDoorbellID(Function &F) {
+    if (!TM.getTargetTriple().isAMDGCN())
+      return false;
     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
     return ST.supportsGetDoorbellID();
   }
@@ -182,18 +186,18 @@ class AMDGPUInformationCache : public InformationCache {
 
   std::pair<unsigned, unsigned>
   getDefaultFlatWorkGroupSize(const Function &F) const {
-    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+    const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
     return ST.getDefaultFlatWorkGroupSize(F.getCallingConv());
   }
 
   std::pair<unsigned, unsigned>
   getMaximumFlatWorkGroupRange(const Function &F) {
-    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+    const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
     return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
   }
 
   SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {
-    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+    const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
     return ST.getMaxNumWorkGroups(F);
   }
 
@@ -206,7 +210,7 @@ class AMDGPUInformationCache : public InformationCache {
   std::pair<unsigned, unsigned>
   getWavesPerEU(const Function &F,
                 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
-    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+    const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
     return ST.getWavesPerEU(FlatWorkGroupSize, getLDSSize(F), F);
   }
 
@@ -217,7 +221,7 @@ class AMDGPUInformationCache : public InformationCache {
     if (!Val)
       return std::nullopt;
     if (!Val->second) {
-      const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+      const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
       Val->second = ST.getMaxWavesPerEU();
     }
     return std::make_pair(Val->first, *(Val->second));
@@ -227,13 +231,13 @@ class AMDGPUInformationCache : public InformationCache {
   getEffectiveWavesPerEU(const Function &F,
                          std::pair<unsigned, unsigned> WavesPerEU,
                          std::pair<unsigned, unsigned> FlatWorkGroupSize) {
-    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+    const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
     return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize,
                                      getLDSSize(F));
   }
 
   unsigned getMaxWavesPerEU(const Function &F) {
-    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+    const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
     return ST.getMaxWavesPerEU();
   }
 
@@ -1511,9 +1515,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
       A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));
     }
 
-    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
-    if (!F->isDeclaration() && ST.hasClusters())
-      A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
+    if (TM.getTargetTriple().isAMDGCN()) {
+      const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
+      if (!F->isDeclaration() && ST.hasClusters())
+        A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
+    }
 
     for (auto &I : instructions(F)) {
       Value *Ptr = nullptr;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp
@@ -281,7 +281,7 @@ bool optimizeSection(ArrayRef<SmallVector<IntrinsicInst *, 4>> MergeableInsts) {
 }
 
 static bool imageIntrinsicOptimizerImpl(Function &F, const TargetMachine *TM) {
-  if (!TM)
+  if (!TM || !TM->getTargetTriple().isAMDGCN())
     return false;
 
   // This optimization only applies to GFX11 and beyond.

Original file line number	Diff line number	Diff line change
`@@ -281,7 +281,7 @@ bool optimizeSection(ArrayRef<SmallVector<IntrinsicInst *, 4>> MergeableInsts) {`
`281`	`281`	`}`
`282`	`282`
`283`	`283`	`static bool imageIntrinsicOptimizerImpl(Function &F, const TargetMachine *TM) {`
`284`		`- if (!TM)`
	`284`	`+ if (!TM \|\| !TM->getTargetTriple().isAMDGCN())`
`285`	`285`	`return false;`
`286`	`286`
`287`	`287`	`// This optimization only applies to GFX11 and beyond.`