Skip to content

Commit a1e71e8

Browse files
committed
[SYCL] Implement coverage instrumentation for device code
Signed-off-by: Michael Aziz <[email protected]>
1 parent 611e245 commit a1e71e8

File tree

13 files changed

+227
-10
lines changed

13 files changed

+227
-10
lines changed

buildbot/configure.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def do_configure(args, passthrough_args):
2121
if not os.path.isdir(abs_obj_dir):
2222
os.makedirs(abs_obj_dir)
2323

24-
llvm_external_projects = "sycl;llvm-spirv;opencl;xpti;xptifw"
24+
llvm_external_projects = "sycl;llvm-spirv;opencl;xpti;xptifw;compiler-rt"
2525

2626
# libdevice build requires a working SYCL toolchain, which is not the case
2727
# with macOS target right now.

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -601,7 +601,7 @@ getInstrProfOptions(const CodeGenOptions &CodeGenOpts,
601601
Options.InstrProfileOutput = CodeGenOpts.ContinuousProfileSync
602602
? ("%c" + CodeGenOpts.InstrProfileOutput)
603603
: CodeGenOpts.InstrProfileOutput;
604-
Options.Atomic = CodeGenOpts.AtomicProfileUpdate;
604+
Options.Atomic = LangOpts.SYCLIsDevice || CodeGenOpts.AtomicProfileUpdate;
605605
return Options;
606606
}
607607

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1574,11 +1574,6 @@ static ArrayRef<options::ID> getUnsupportedOpts() {
15741574
options::OPT_fno_profile_generate, // -f[no-]profile-generate
15751575
options::OPT_ftest_coverage,
15761576
options::OPT_fno_test_coverage, // -f[no-]test-coverage
1577-
options::OPT_fcoverage_mapping,
1578-
options::OPT_coverage, // --coverage
1579-
options::OPT_fno_coverage_mapping, // -f[no-]coverage-mapping
1580-
options::OPT_fprofile_instr_generate,
1581-
options::OPT_fprofile_instr_generate_EQ,
15821577
options::OPT_fprofile_arcs,
15831578
options::OPT_fno_profile_arcs, // -f[no-]profile-arcs
15841579
options::OPT_fno_profile_instr_generate, // -f[no-]profile-instr-generate

compiler-rt/lib/profile/InstrProfilingRuntime.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,22 @@ extern "C" {
1010

1111
#include "InstrProfiling.h"
1212

13+
void __sycl_increment_profile_counters(uint64_t FnHash, size_t NumCounters,
14+
const uint64_t *Increments) {
15+
for (const __llvm_profile_data *DataVar = __llvm_profile_begin_data();
16+
DataVar < __llvm_profile_end_data(); DataVar++) {
17+
if (DataVar->NameRef != FnHash || DataVar->NumCounters != NumCounters)
18+
continue;
19+
20+
uint64_t *const Counters = reinterpret_cast<uint64_t *>(
21+
reinterpret_cast<uintptr_t>(DataVar) +
22+
reinterpret_cast<uintptr_t>(DataVar->CounterPtr));
23+
for (size_t i = 0; i < NumCounters; i++)
24+
Counters[i] += Increments[i];
25+
break;
26+
}
27+
}
28+
1329
static int RegisterRuntime() {
1430
__llvm_profile_initialize();
1531
#ifdef _AIX

llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,6 +1002,9 @@ bool InstrLowerer::lower() {
10021002
if (!NeedsRuntimeHook && ContainsProfiling)
10031003
emitRuntimeHook();
10041004

1005+
if (M.getTargetTriple().isSPIR())
1006+
return true;
1007+
10051008
emitRegistration();
10061009
emitUses();
10071010
emitInitialization();
@@ -1116,6 +1119,21 @@ GlobalVariable *InstrLowerer::getOrCreateBiasVar(StringRef VarName) {
11161119
}
11171120

11181121
Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) {
1122+
if (M.getTargetTriple().isSPIR()) {
1123+
auto *Counters = getOrCreateRegionCounters(I);
1124+
IRBuilder<> Builder(I);
1125+
auto *Addr = Builder.CreateLoad(PointerType::get(M.getContext(), 1),
1126+
Counters, "pgocount.addr");
1127+
const std::uint64_t Index = I->getIndex()->getZExtValue();
1128+
if (Index > 0) {
1129+
auto *Offset = Builder.getInt64(I->getIndex()->getZExtValue());
1130+
auto *AddrWithOffset = Builder.CreateGEP(Type::getInt64Ty(M.getContext()),
1131+
Addr, Offset, "pgocount.addr");
1132+
return AddrWithOffset;
1133+
}
1134+
return Addr;
1135+
}
1136+
11191137
auto *Counters = getOrCreateRegionCounters(I);
11201138
IRBuilder<> Builder(I);
11211139

@@ -1657,6 +1675,28 @@ InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) {
16571675
GlobalVariable *
16581676
InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name,
16591677
GlobalValue::LinkageTypes Linkage) {
1678+
if (M.getTargetTriple().isSPIR()) {
1679+
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1680+
auto &Ctx = M.getContext();
1681+
GlobalVariable *GV;
1682+
auto *PtrTy = PointerType::get(Ctx, 1);
1683+
auto *IntTy = Type::getInt64Ty(Ctx);
1684+
auto *StructTy = StructType::get(Ctx, {PtrTy, IntTy});
1685+
GV = new GlobalVariable(M, StructTy, false, Linkage,
1686+
Constant::getNullValue(StructTy), Name);
1687+
const std::uint64_t FnHash = IndexedInstrProf::ComputeHash(
1688+
getPGOFuncNameVarInitializer(Inc->getName()));
1689+
const std::string FnName = [&] {
1690+
auto *Arr = cast<ConstantDataArray>(Inc->getName()->getInitializer());
1691+
StringRef NameStr =
1692+
Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
1693+
return std::string{"__profc_"} + std::to_string(FnHash);
1694+
}();
1695+
GV->addAttribute("sycl-unique-id", FnName);
1696+
GV->addAttribute("sycl-device-global-size", Twine(NumCounters * 8).str());
1697+
return GV;
1698+
}
1699+
16601700
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
16611701
auto &Ctx = M.getContext();
16621702
GlobalVariable *GV;

sycl/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,13 +389,16 @@ add_custom_target(sycl-compiler
389389
clang-offload-extract
390390
clang-offload-packager
391391
clang-linker-wrapper
392+
compiler-rt
392393
file-table-tform
393394
llc
394395
llvm-ar
395396
llvm-foreach
396397
llvm-spirv
397398
llvm-link
398399
llvm-objcopy
400+
llvm-profdata
401+
llvm-cov
399402
spirv-to-ir-wrapper
400403
sycl-post-link
401404
opencl-aot

sycl/source/detail/context_impl.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,11 @@ context_impl::~context_impl() {
128128
if (DGEntry != nullptr)
129129
DGEntry->removeAssociatedResources(this);
130130
}
131+
// Free all profile counter USM allocations associated with this context.
132+
for (DeviceGlobalMapEntry *DGEntry :
133+
detail::ProgramManager::getInstance()
134+
.getProfileCounterDeviceGlobalEntries(this))
135+
DGEntry->cleanupProfileCounter(this);
131136
MCachedLibPrograms.clear();
132137
// TODO catch an exception and put it to list of asynchronous exceptions
133138
getAdapter().call_nocheck<UrApiKind::urContextRelease>(MContext);

sycl/source/detail/device_global_map.hpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,9 @@ class DeviceGlobalMap {
7575
// cannot be set until registration happens.
7676
auto EntryUPtr = std::make_unique<DeviceGlobalMapEntry>(
7777
DeviceGlobal->Name, Img, TypeSize, DeviceImageScopeDecorated);
78-
MDeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr));
78+
auto NewEntry = MDeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr));
79+
if (NewEntry.first->second->isProfileCounter())
80+
MProfileCounterDeviceGlobals.push_back(NewEntry.first->second.get());
7981
}
8082
}
8183
}
@@ -114,6 +116,8 @@ class DeviceGlobalMap {
114116
auto EntryUPtr =
115117
std::make_unique<DeviceGlobalMapEntry>(UniqueId, DeviceGlobalPtr);
116118
auto NewEntry = MDeviceGlobals.emplace(UniqueId, std::move(EntryUPtr));
119+
if (NewEntry.first->second->isProfileCounter())
120+
MProfileCounterDeviceGlobals.push_back(NewEntry.first->second.get());
117121
MPtr2DeviceGlobal.insert({DeviceGlobalPtr, NewEntry.first->second.get()});
118122
}
119123

@@ -154,6 +158,11 @@ class DeviceGlobalMap {
154158
}
155159
}
156160

161+
std::vector<DeviceGlobalMapEntry *> getProfileCounterEntries() {
162+
std::lock_guard<std::mutex> DeviceGlobalsGuard(MDeviceGlobalsMutex);
163+
return MProfileCounterDeviceGlobals;
164+
}
165+
157166
const std::unordered_map<const void *, DeviceGlobalMapEntry *>
158167
getPointerMap() const {
159168
return MPtr2DeviceGlobal;
@@ -177,6 +186,9 @@ class DeviceGlobalMap {
177186
MDeviceGlobals;
178187
std::unordered_map<const void *, DeviceGlobalMapEntry *> MPtr2DeviceGlobal;
179188

189+
// List of profile counter device globals.
190+
std::vector<DeviceGlobalMapEntry *> MProfileCounterDeviceGlobals;
191+
180192
/// Protects MDeviceGlobals and MPtr2DeviceGlobal.
181193
std::mutex MDeviceGlobalsMutex;
182194
};

sycl/source/detail/device_global_map_entry.cpp

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,63 @@ OwnedUrEvent DeviceGlobalUSMMem::getInitEvent(adapter_impl &Adapter) {
5353
}
5454
}
5555

56+
bool DeviceGlobalMapEntry::isAvailableInContext(const context_impl *CtxImpl) {
57+
std::lock_guard<std::mutex> Lock{MDeviceToUSMPtrMapMutex};
58+
for (const auto &It : MDeviceToUSMPtrMap)
59+
if (It.first.second == CtxImpl)
60+
return true;
61+
return false;
62+
}
63+
64+
bool DeviceGlobalMapEntry::isProfileCounter() {
65+
const std::string CounterPrefix = "__profc_";
66+
return MUniqueId.substr(0, CounterPrefix.size()) == CounterPrefix;
67+
}
68+
69+
extern "C" void __attribute__((weak))
70+
__sycl_increment_profile_counters(std::uint64_t FnHash, std::size_t NumCounters,
71+
const std::uint64_t *Increments);
72+
73+
void DeviceGlobalMapEntry::cleanupProfileCounter(context_impl *CtxImpl) {
74+
std::lock_guard<std::mutex> Lock{MDeviceToUSMPtrMapMutex};
75+
const std::size_t NumCounters = MDeviceGlobalTSize / sizeof(std::uint64_t);
76+
const std::uint64_t FnHash = [&] {
77+
const auto PrefixSize = std::string{"__profc_"}.size();
78+
constexpr int DecimalBase = 10;
79+
return std::strtoull(MUniqueId.substr(PrefixSize).c_str(), nullptr,
80+
DecimalBase);
81+
}();
82+
for (device_impl &Device : CtxImpl->getDevices()) {
83+
auto USMPtrIt = MDeviceToUSMPtrMap.find({&Device, CtxImpl});
84+
if (USMPtrIt != MDeviceToUSMPtrMap.end()) {
85+
DeviceGlobalUSMMem &USMMem = USMPtrIt->second;
86+
87+
// Get the increments from the USM pointer
88+
std::vector<std::uint64_t> Increments(NumCounters);
89+
const std::uint64_t *Counters = static_cast<std::uint64_t *>(USMMem.MPtr);
90+
for (std::size_t I = 0; I < NumCounters; ++I)
91+
Increments[I] += Counters[I];
92+
93+
// Call the weak symbol to update the profile counters
94+
if (__sycl_increment_profile_counters) {
95+
__sycl_increment_profile_counters(FnHash, Increments.size(),
96+
Increments.data());
97+
}
98+
99+
// Free the USM memory and release the event if it exists.
100+
detail::usm::freeInternal(USMMem.MPtr, CtxImpl);
101+
if (USMMem.MInitEvent != nullptr)
102+
CtxImpl->getAdapter().call<UrApiKind::urEventRelease>(
103+
USMMem.MInitEvent);
104+
105+
// Set to nullptr to avoid double free.
106+
USMMem.MPtr = nullptr;
107+
USMMem.MInitEvent = nullptr;
108+
MDeviceToUSMPtrMap.erase(USMPtrIt);
109+
}
110+
}
111+
}
112+
56113
DeviceGlobalUSMMem &
57114
DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) {
58115
assert(!MIsDeviceImageScopeDecorated &&
@@ -67,7 +124,8 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) {
67124
return DGUSMPtr->second;
68125

69126
void *NewDGUSMPtr = detail::usm::alignedAllocInternal(
70-
0, MDeviceGlobalTSize, &CtxImpl, &DevImpl, sycl::usm::alloc::device);
127+
0, MDeviceGlobalTSize, &CtxImpl, &DevImpl,
128+
isProfileCounter() ? sycl::usm::alloc::shared : sycl::usm::alloc::device);
71129

72130
auto NewAllocIt = MDeviceToUSMPtrMap.emplace(
73131
std::piecewise_construct, std::forward_as_tuple(&DevImpl, &CtxImpl),
@@ -125,7 +183,8 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(const context &Context) {
125183
return DGUSMPtr->second;
126184

127185
void *NewDGUSMPtr = detail::usm::alignedAllocInternal(
128-
0, MDeviceGlobalTSize, &CtxImpl, &DevImpl, sycl::usm::alloc::device);
186+
0, MDeviceGlobalTSize, &CtxImpl, &DevImpl,
187+
isProfileCounter() ? sycl::usm::alloc::shared : sycl::usm::alloc::device);
129188

130189
auto NewAllocIt = MDeviceToUSMPtrMap.emplace(
131190
std::piecewise_construct, std::forward_as_tuple(&DevImpl, &CtxImpl),

sycl/source/detail/device_global_map_entry.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,15 @@ struct DeviceGlobalMapEntry {
110110
MIsDeviceImageScopeDecorated = IsDeviceImageScopeDecorated;
111111
}
112112

113+
// Checks if the device_global is available in the given context.
114+
bool isAvailableInContext(const context_impl *CtxImpl);
115+
116+
// Returns true if the device_global is a profile counter.
117+
bool isProfileCounter();
118+
119+
// Cleans up a profile counter device global
120+
void cleanupProfileCounter(context_impl *CtxImpl);
121+
113122
// Gets or allocates USM memory for a device_global.
114123
DeviceGlobalUSMMem &getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl);
115124

0 commit comments

Comments
 (0)