212212#define DEBUG_TYPE " amdgpu-lower-module-lds"
213213
214214using namespace llvm ;
215+ using namespace AMDGPU ;
215216
216217namespace {
217218
@@ -234,17 +235,6 @@ cl::opt<LoweringKind> LoweringKindLoc(
234235 clEnumValN(LoweringKind::hybrid, " hybrid" ,
235236 " Lower via mixture of above strategies" )));
236237
237- bool isKernelLDS (const Function *F) {
238- // Some weirdness here. AMDGPU::isKernelCC does not call into
239- // AMDGPU::isKernel with the calling conv, it instead calls into
240- // isModuleEntryFunction which returns true for more calling conventions
241- // than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel.
242- // There's also a test that checks that the LDS lowering does not hit on
243- // a graphics shader, denoted amdgpu_ps, so stay with the limited case.
244- // Putting LDS in the name of the function to draw attention to this.
245- return AMDGPU::isKernel (F->getCallingConv ());
246- }
247-
248238template <typename T> std::vector<T> sortByName (std::vector<T> &&V) {
249239 llvm::sort (V.begin (), V.end (), [](const auto *L, const auto *R) {
250240 return L->getName () < R->getName ();
@@ -305,183 +295,9 @@ class AMDGPULowerModuleLDS {
305295 Decl, {}, {OperandBundleDefT<Value *>(" ExplicitUse" , UseInstance)});
306296 }
307297
308- static bool eliminateConstantExprUsesOfLDSFromAllInstructions (Module &M) {
309- // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS
310- // global may have uses from multiple different functions as a result.
311- // This pass specialises LDS variables with respect to the kernel that
312- // allocates them.
313-
314- // This is semantically equivalent to (the unimplemented as slow):
315- // for (auto &F : M.functions())
316- // for (auto &BB : F)
317- // for (auto &I : BB)
318- // for (Use &Op : I.operands())
319- // if (constantExprUsesLDS(Op))
320- // replaceConstantExprInFunction(I, Op);
321-
322- SmallVector<Constant *> LDSGlobals;
323- for (auto &GV : M.globals ())
324- if (AMDGPU::isLDSVariableToLower (GV))
325- LDSGlobals.push_back (&GV);
326-
327- return convertUsersOfConstantsToInstructions (LDSGlobals);
328- }
329-
330298public:
331299 AMDGPULowerModuleLDS (const AMDGPUTargetMachine &TM_) : TM(TM_) {}
332300
333- using FunctionVariableMap = DenseMap<Function *, DenseSet<GlobalVariable *>>;
334-
335- using VariableFunctionMap = DenseMap<GlobalVariable *, DenseSet<Function *>>;
336-
337- static void getUsesOfLDSByFunction (CallGraph const &CG, Module &M,
338- FunctionVariableMap &kernels,
339- FunctionVariableMap &functions) {
340-
341- // Get uses from the current function, excluding uses by called functions
342- // Two output variables to avoid walking the globals list twice
343- for (auto &GV : M.globals ()) {
344- if (!AMDGPU::isLDSVariableToLower (GV)) {
345- continue ;
346- }
347-
348- for (User *V : GV.users ()) {
349- if (auto *I = dyn_cast<Instruction>(V)) {
350- Function *F = I->getFunction ();
351- if (isKernelLDS (F)) {
352- kernels[F].insert (&GV);
353- } else {
354- functions[F].insert (&GV);
355- }
356- }
357- }
358- }
359- }
360-
361- struct LDSUsesInfoTy {
362- FunctionVariableMap direct_access;
363- FunctionVariableMap indirect_access;
364- };
365-
366- static LDSUsesInfoTy getTransitiveUsesOfLDS (CallGraph const &CG, Module &M) {
367-
368- FunctionVariableMap direct_map_kernel;
369- FunctionVariableMap direct_map_function;
370- getUsesOfLDSByFunction (CG, M, direct_map_kernel, direct_map_function);
371-
372- // Collect variables that are used by functions whose address has escaped
373- DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
374- for (Function &F : M.functions ()) {
375- if (!isKernelLDS (&F))
376- if (F.hasAddressTaken (nullptr ,
377- /* IgnoreCallbackUses */ false ,
378- /* IgnoreAssumeLikeCalls */ false ,
379- /* IgnoreLLVMUsed */ true ,
380- /* IgnoreArcAttachedCall */ false )) {
381- set_union (VariablesReachableThroughFunctionPointer,
382- direct_map_function[&F]);
383- }
384- }
385-
386- auto functionMakesUnknownCall = [&](const Function *F) -> bool {
387- assert (!F->isDeclaration ());
388- for (const CallGraphNode::CallRecord &R : *CG[F]) {
389- if (!R.second ->getFunction ()) {
390- return true ;
391- }
392- }
393- return false ;
394- };
395-
396- // Work out which variables are reachable through function calls
397- FunctionVariableMap transitive_map_function = direct_map_function;
398-
399- // If the function makes any unknown call, assume the worst case that it can
400- // access all variables accessed by functions whose address escaped
401- for (Function &F : M.functions ()) {
402- if (!F.isDeclaration () && functionMakesUnknownCall (&F)) {
403- if (!isKernelLDS (&F)) {
404- set_union (transitive_map_function[&F],
405- VariablesReachableThroughFunctionPointer);
406- }
407- }
408- }
409-
410- // Direct implementation of collecting all variables reachable from each
411- // function
412- for (Function &Func : M.functions ()) {
413- if (Func.isDeclaration () || isKernelLDS (&Func))
414- continue ;
415-
416- DenseSet<Function *> seen; // catches cycles
417- SmallVector<Function *, 4 > wip{&Func};
418-
419- while (!wip.empty ()) {
420- Function *F = wip.pop_back_val ();
421-
422- // Can accelerate this by referring to transitive map for functions that
423- // have already been computed, with more care than this
424- set_union (transitive_map_function[&Func], direct_map_function[F]);
425-
426- for (const CallGraphNode::CallRecord &R : *CG[F]) {
427- Function *ith = R.second ->getFunction ();
428- if (ith) {
429- if (!seen.contains (ith)) {
430- seen.insert (ith);
431- wip.push_back (ith);
432- }
433- }
434- }
435- }
436- }
437-
438- // direct_map_kernel lists which variables are used by the kernel
439- // find the variables which are used through a function call
440- FunctionVariableMap indirect_map_kernel;
441-
442- for (Function &Func : M.functions ()) {
443- if (Func.isDeclaration () || !isKernelLDS (&Func))
444- continue ;
445-
446- for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
447- Function *ith = R.second ->getFunction ();
448- if (ith) {
449- set_union (indirect_map_kernel[&Func], transitive_map_function[ith]);
450- } else {
451- set_union (indirect_map_kernel[&Func],
452- VariablesReachableThroughFunctionPointer);
453- }
454- }
455- }
456-
457- // Verify that we fall into one of 2 cases:
458- // - All variables are absolute: this is a re-run of the pass
459- // so we don't have anything to do.
460- // - No variables are absolute.
461- std::optional<bool > HasAbsoluteGVs;
462- for (auto &Map : {direct_map_kernel, indirect_map_kernel}) {
463- for (auto &[Fn, GVs] : Map) {
464- for (auto *GV : GVs) {
465- bool IsAbsolute = GV->isAbsoluteSymbolRef ();
466- if (HasAbsoluteGVs.has_value ()) {
467- if (*HasAbsoluteGVs != IsAbsolute) {
468- report_fatal_error (
469- " Module cannot mix absolute and non-absolute LDS GVs" );
470- }
471- } else
472- HasAbsoluteGVs = IsAbsolute;
473- }
474- }
475- }
476-
477- // If we only had absolute GVs, we have nothing to do, return an empty
478- // result.
479- if (HasAbsoluteGVs && *HasAbsoluteGVs)
480- return {FunctionVariableMap (), FunctionVariableMap ()};
481-
482- return {std::move (direct_map_kernel), std::move (indirect_map_kernel)};
483- }
484-
485301 struct LDSVariableReplacement {
486302 GlobalVariable *SGV = nullptr ;
487303 DenseMap<GlobalVariable *, Constant *> LDSVarsToConstantGEP;
0 commit comments