@@ -511,19 +511,6 @@ class AMDGPULowerModuleLDS {
511511 return MostUsed.GV ;
512512 }
513513
514- static void recordLDSAbsoluteAddress (Module *M, GlobalVariable *GV,
515- uint32_t Address) {
516- // Write the specified address into metadata where it can be retrieved by
517- // the assembler. Format is a half open range, [Address Address+1)
518- LLVMContext &Ctx = M->getContext ();
519- auto *IntTy =
520- M->getDataLayout ().getIntPtrType (Ctx, AMDGPUAS::LOCAL_ADDRESS);
521- auto *MinC = ConstantAsMetadata::get (ConstantInt::get (IntTy, Address));
522- auto *MaxC = ConstantAsMetadata::get (ConstantInt::get (IntTy, Address + 1 ));
523- GV->setMetadata (LLVMContext::MD_absolute_symbol,
524- MDNode::get (Ctx, {MinC, MaxC}));
525- }
526-
527514 DenseMap<Function *, Value *> tableKernelIndexCache;
528515 Value *getTableLookupKernelIndex (Module &M, Function *F) {
529516 // Accesses from a function use the amdgcn_lds_kernel_id intrinsic which
@@ -922,126 +909,6 @@ class AMDGPULowerModuleLDS {
922909 return KernelToCreatedDynamicLDS;
923910 }
924911
925- static GlobalVariable *uniquifyGVPerKernel (Module &M, GlobalVariable *GV,
926- Function *KF) {
927- bool NeedsReplacement = false ;
928- for (Use &U : GV->uses ()) {
929- if (auto *I = dyn_cast<Instruction>(U.getUser ())) {
930- Function *F = I->getFunction ();
931- if (isKernelLDS (F) && F != KF) {
932- NeedsReplacement = true ;
933- break ;
934- }
935- }
936- }
937- if (!NeedsReplacement)
938- return GV;
939- // Create a new GV used only by this kernel and its function
940- GlobalVariable *NewGV = new GlobalVariable (
941- M, GV->getValueType (), GV->isConstant (), GV->getLinkage (),
942- GV->getInitializer (), GV->getName () + " ." + KF->getName (), nullptr ,
943- GV->getThreadLocalMode (), GV->getType ()->getAddressSpace ());
944- NewGV->copyAttributesFrom (GV);
945- for (Use &U : make_early_inc_range (GV->uses ())) {
946- if (auto *I = dyn_cast<Instruction>(U.getUser ())) {
947- Function *F = I->getFunction ();
948- if (!isKernelLDS (F) || F == KF) {
949- U.getUser ()->replaceUsesOfWith (GV, NewGV);
950- }
951- }
952- }
953- return NewGV;
954- }
955-
956- bool lowerSpecialLDSVariables (
957- Module &M, LDSUsesInfoTy &LDSUsesInfo,
958- VariableFunctionMap &LDSToKernelsThatNeedToAccessItIndirectly) {
959- bool Changed = false ;
960- const DataLayout &DL = M.getDataLayout ();
961- // The 1st round: give module-absolute assignments
962- int NumAbsolutes = 0 ;
963- std::vector<GlobalVariable *> OrderedGVs;
964- for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
965- GlobalVariable *GV = K.first ;
966- if (!isNamedBarrier (*GV))
967- continue ;
968- // give a module-absolute assignment if it is indirectly accessed by
969- // multiple kernels. This is not precise, but we don't want to duplicate
970- // a function when it is called by multiple kernels.
971- if (LDSToKernelsThatNeedToAccessItIndirectly[GV].size () > 1 ) {
972- OrderedGVs.push_back (GV);
973- } else {
974- // leave it to the 2nd round, which will give a kernel-relative
975- // assignment if it is only indirectly accessed by one kernel
976- LDSUsesInfo.direct_access [*K.second .begin ()].insert (GV);
977- }
978- LDSToKernelsThatNeedToAccessItIndirectly.erase (GV);
979- }
980- OrderedGVs = sortByName (std::move (OrderedGVs));
981- for (GlobalVariable *GV : OrderedGVs) {
982- unsigned BarrierScope = llvm::AMDGPU::Barrier::BARRIER_SCOPE_WORKGROUP;
983- unsigned BarId = NumAbsolutes + 1 ;
984- unsigned BarCnt = DL.getTypeAllocSize (GV->getValueType ()) / 16 ;
985- NumAbsolutes += BarCnt;
986-
987- // 4 bits for alignment, 5 bits for the barrier num,
988- // 3 bits for the barrier scope
989- unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4 ;
990- recordLDSAbsoluteAddress (&M, GV, Offset);
991- }
992- OrderedGVs.clear ();
993-
994- // The 2nd round: give a kernel-relative assignment for GV that
995- // either only indirectly accessed by single kernel or only directly
996- // accessed by multiple kernels.
997- std::vector<Function *> OrderedKernels;
998- for (auto &K : LDSUsesInfo.direct_access ) {
999- Function *F = K.first ;
1000- assert (isKernelLDS (F));
1001- OrderedKernels.push_back (F);
1002- }
1003- OrderedKernels = sortByName (std::move (OrderedKernels));
1004-
1005- llvm::DenseMap<Function *, uint32_t > Kernel2BarId;
1006- for (Function *F : OrderedKernels) {
1007- for (GlobalVariable *GV : LDSUsesInfo.direct_access [F]) {
1008- if (!isNamedBarrier (*GV))
1009- continue ;
1010-
1011- LDSUsesInfo.direct_access [F].erase (GV);
1012- if (GV->isAbsoluteSymbolRef ()) {
1013- // already assigned
1014- continue ;
1015- }
1016- OrderedGVs.push_back (GV);
1017- }
1018- OrderedGVs = sortByName (std::move (OrderedGVs));
1019- for (GlobalVariable *GV : OrderedGVs) {
1020- // GV could also be used directly by other kernels. If so, we need to
1021- // create a new GV used only by this kernel and its function.
1022- auto NewGV = uniquifyGVPerKernel (M, GV, F);
1023- Changed |= (NewGV != GV);
1024- unsigned BarrierScope = llvm::AMDGPU::Barrier::BARRIER_SCOPE_WORKGROUP;
1025- unsigned BarId = Kernel2BarId[F];
1026- BarId += NumAbsolutes + 1 ;
1027- unsigned BarCnt = DL.getTypeAllocSize (GV->getValueType ()) / 16 ;
1028- Kernel2BarId[F] += BarCnt;
1029- unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4 ;
1030- recordLDSAbsoluteAddress (&M, NewGV, Offset);
1031- }
1032- OrderedGVs.clear ();
1033- }
1034- // Also erase those special LDS variables from indirect_access.
1035- for (auto &K : LDSUsesInfo.indirect_access ) {
1036- assert (isKernelLDS (K.first ));
1037- for (GlobalVariable *GV : K.second ) {
1038- if (isNamedBarrier (*GV))
1039- K.second .erase (GV);
1040- }
1041- }
1042- return Changed;
1043- }
1044-
1045912 bool runOnModule (Module &M) {
1046913 CallGraph CG = CallGraph (M);
1047914 bool Changed = superAlignLDSGlobals (M);
0 commit comments