@@ -391,6 +391,13 @@ class InstrLowerer final {
391391 // / Create INSTR_PROF_DATA variable for counters and bitmaps.
392392 void createDataVariable (InstrProfCntrInstBase *Inc);
393393
394+ // / Creates delayed initialiation function for data relative offsets
395+ // / This is only relevant on NVPTX targets where circular constant structures
396+ // / are not allowed
397+ bool
398+ emitDataDelayedInit (SmallVector<Function *> &Kernels,
399+ SmallVector<const InstrProfCntrInstBase *> &ValueSites);
400+
394401 // / Get the counters for virtual table values, creating them if necessary.
395402 void getOrCreateVTableProfData (GlobalVariable *GV);
396403
@@ -947,11 +954,18 @@ bool InstrLowerer::lower() {
947954 if (!ContainsProfiling && !CoverageNamesVar)
948955 return MadeChange;
949956
957+ // Cached info for generating delayed offset calculations
958+ // This is only relevant on NVPTX targets
959+ SmallVector<Function *> Kernels;
960+ SmallVector<const InstrProfCntrInstBase *> ValueSites;
961+
950962 // We did not know how many value sites there would be inside
951963 // the instrumented function. This is counting the number of instrumented
952964 // target value sites to enter it as field in the profile data variable.
953965 for (Function &F : M) {
954966 InstrProfCntrInstBase *FirstProfInst = nullptr ;
967+ if (F.getCallingConv () == CallingConv::PTX_Kernel)
968+ Kernels.push_back (&F);
955969 for (BasicBlock &BB : F) {
956970 for (auto I = BB.begin (), E = BB.end (); I != E; I++) {
957971 if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
@@ -971,9 +985,12 @@ bool InstrLowerer::lower() {
971985 // Also create the data variable based on the MCDCParams.
972986 if (FirstProfInst != nullptr ) {
973987 static_cast <void >(getOrCreateRegionCounters (FirstProfInst));
988+ ValueSites.push_back (FirstProfInst);
974989 }
975990 }
976991
992+ MadeChange |= emitDataDelayedInit (Kernels, ValueSites);
993+
977994 if (EnableVTableValueProfiling)
978995 for (GlobalVariable &GV : M.globals ())
979996 // Global variables with type metadata are virtual table variables.
@@ -1734,6 +1751,13 @@ InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) {
17341751 return PD.RegionCounters ;
17351752}
17361753
1754+ // Calculates difference between two global variable addresses as an integer
1755+ Constant *globalVarDiff (Module &M, GlobalVariable *A, GlobalVariable *B) {
1756+ auto *IntPtrTy = M.getDataLayout ().getIntPtrType (M.getContext ());
1757+ return ConstantExpr::getSub (ConstantExpr::getPtrToInt (A, IntPtrTy),
1758+ ConstantExpr::getPtrToInt (B, IntPtrTy));
1759+ }
1760+
17371761void InstrLowerer::createDataVariable (InstrProfCntrInstBase *Inc) {
17381762 // When debug information is correlated to profile data, a data variable
17391763 // is not needed.
@@ -1854,13 +1878,12 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
18541878 // Reference the counter variable with a label difference (link-time
18551879 // constant).
18561880 DataSectionKind = IPSK_data;
1857- RelativeCounterPtr =
1858- ConstantExpr::getSub ( ConstantExpr::getPtrToInt (CounterPtr, IntPtrTy),
1859- ConstantExpr::getPtrToInt (Data, IntPtrTy) );
1881+ const Triple T (M. getTargetTriple ());
1882+ RelativeCounterPtr = T. isNVPTX () ? ConstantInt::get (IntPtrTy, 0 )
1883+ : globalVarDiff (M, CounterPtr, Data );
18601884 if (BitmapPtr != nullptr )
1861- RelativeBitmapPtr =
1862- ConstantExpr::getSub (ConstantExpr::getPtrToInt (BitmapPtr, IntPtrTy),
1863- ConstantExpr::getPtrToInt (Data, IntPtrTy));
1885+ RelativeBitmapPtr = T.isNVPTX () ? ConstantInt::get (IntPtrTy, 0 )
1886+ : globalVarDiff (M, BitmapPtr, Data);
18641887 }
18651888
18661889 Constant *DataVals[] = {
@@ -1887,6 +1910,51 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
18871910 ReferencedNames.push_back (NamePtr);
18881911}
18891912
1913+ bool InstrLowerer::emitDataDelayedInit (
1914+ SmallVector<Function *> &Kernels,
1915+ SmallVector<const InstrProfCntrInstBase *> &ValueSites) {
1916+ const Triple T (M.getTargetTriple ());
1917+ if (!T.isNVPTX () || ProfileCorrelate == InstrProfCorrelator::BINARY ||
1918+ Kernels.empty () || ValueSites.empty ()) {
1919+ return false ;
1920+ }
1921+
1922+ auto *VoidTy = Type::getVoidTy (M.getContext ());
1923+ auto *Int32Ty = Type::getInt32Ty (M.getContext ());
1924+ auto *IntPtrTy = M.getDataLayout ().getIntPtrType (M.getContext ());
1925+ auto *DelayedInitFTy = FunctionType::get (VoidTy, false );
1926+ auto *DelayedInitF =
1927+ Function::Create (DelayedInitFTy, GlobalValue::InternalLinkage,
1928+ getInstrProfDelayedInitFuncName (), M);
1929+
1930+ IRBuilder<> IRB (BasicBlock::Create (M.getContext (), " " , DelayedInitF));
1931+
1932+ for (const auto *ValueSite : ValueSites) {
1933+ GlobalVariable *NamePtr = ValueSite->getName ();
1934+ auto &PD = ProfileDataMap[NamePtr];
1935+ auto *RelativeCounter = globalVarDiff (M, PD.RegionCounters , PD.DataVar );
1936+ auto *RelativeCounterPtr =
1937+ IRB.CreateGEP (IntPtrTy, PD.DataVar , {ConstantInt::get (Int32Ty, 2 )});
1938+ IRB.CreateStore (RelativeCounter, RelativeCounterPtr);
1939+ if (PD.RegionBitmaps != nullptr ) {
1940+ auto *RelativeBitmap = globalVarDiff (M, PD.RegionBitmaps , PD.DataVar );
1941+ auto *RelativeBitmapPtr =
1942+ IRB.CreateGEP (IntPtrTy, PD.DataVar , {ConstantInt::get (Int32Ty, 3 )});
1943+ IRB.CreateStore (RelativeBitmap, RelativeBitmapPtr);
1944+ }
1945+ }
1946+
1947+ IRB.CreateRetVoid ();
1948+
1949+ for (auto *Kernel : Kernels) {
1950+ auto &KernelEntry = Kernel->getEntryBlock ();
1951+ IRB.SetInsertPoint (KernelEntry.getFirstNonPHI ());
1952+ IRB.CreateCall (DelayedInitF);
1953+ }
1954+
1955+ return true ;
1956+ }
1957+
18901958void InstrLowerer::emitVNodes () {
18911959 if (!ValueProfileStaticAlloc)
18921960 return ;
0 commit comments