Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion buildbot/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def do_configure(args, passthrough_args):
if not os.path.isdir(abs_obj_dir):
os.makedirs(abs_obj_dir)

llvm_external_projects = "sycl;llvm-spirv;opencl;xpti;xptifw"
llvm_external_projects = "sycl;llvm-spirv;opencl;xpti;xptifw;compiler-rt"

# libdevice build requires a working SYCL toolchain, which is not the case
# with macOS target right now.
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,7 @@ getInstrProfOptions(const CodeGenOptions &CodeGenOpts,
Options.InstrProfileOutput = CodeGenOpts.ContinuousProfileSync
? ("%c" + CodeGenOpts.InstrProfileOutput)
: CodeGenOpts.InstrProfileOutput;
Options.Atomic = CodeGenOpts.AtomicProfileUpdate;
Options.Atomic = LangOpts.SYCLIsDevice || CodeGenOpts.AtomicProfileUpdate;
return Options;
}

Expand Down
5 changes: 0 additions & 5 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1574,11 +1574,6 @@ static ArrayRef<options::ID> getUnsupportedOpts() {
options::OPT_fno_profile_generate, // -f[no-]profile-generate
options::OPT_ftest_coverage,
options::OPT_fno_test_coverage, // -f[no-]test-coverage
options::OPT_fcoverage_mapping,
options::OPT_coverage, // --coverage
options::OPT_fno_coverage_mapping, // -f[no-]coverage-mapping
options::OPT_fprofile_instr_generate,
options::OPT_fprofile_instr_generate_EQ,
options::OPT_fprofile_arcs,
options::OPT_fno_profile_arcs, // -f[no-]profile-arcs
options::OPT_fno_profile_instr_generate, // -f[no-]profile-instr-generate
Expand Down
16 changes: 16 additions & 0 deletions compiler-rt/lib/profile/InstrProfilingRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,22 @@ extern "C" {

#include "InstrProfiling.h"

void __sycl_increment_profile_counters(uint64_t FnHash, size_t NumCounters,
const uint64_t *Increments) {
for (const __llvm_profile_data *DataVar = __llvm_profile_begin_data();
DataVar < __llvm_profile_end_data(); DataVar++) {
if (DataVar->NameRef != FnHash || DataVar->NumCounters != NumCounters)
continue;

uint64_t *const Counters = reinterpret_cast<uint64_t *>(
reinterpret_cast<uintptr_t>(DataVar) +
reinterpret_cast<uintptr_t>(DataVar->CounterPtr));
for (size_t i = 0; i < NumCounters; i++)
Counters[i] += Increments[i];
break;
}
}

static int RegisterRuntime() {
__llvm_profile_initialize();
#ifdef _AIX
Expand Down
40 changes: 40 additions & 0 deletions llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1002,6 +1002,9 @@ bool InstrLowerer::lower() {
if (!NeedsRuntimeHook && ContainsProfiling)
emitRuntimeHook();

if (M.getTargetTriple().isSPIR())
return true;

emitRegistration();
emitUses();
emitInitialization();
Expand Down Expand Up @@ -1116,6 +1119,21 @@ GlobalVariable *InstrLowerer::getOrCreateBiasVar(StringRef VarName) {
}

Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) {
if (M.getTargetTriple().isSPIR()) {
auto *Counters = getOrCreateRegionCounters(I);
IRBuilder<> Builder(I);
auto *Addr = Builder.CreateLoad(PointerType::get(M.getContext(), 1),
Counters, "pgocount.addr");
const std::uint64_t Index = I->getIndex()->getZExtValue();
if (Index > 0) {
auto *Offset = Builder.getInt64(I->getIndex()->getZExtValue());
auto *AddrWithOffset = Builder.CreateGEP(Type::getInt64Ty(M.getContext()),
Addr, Offset, "pgocount.addr");
return AddrWithOffset;
}
return Addr;
}

auto *Counters = getOrCreateRegionCounters(I);
IRBuilder<> Builder(I);

Expand Down Expand Up @@ -1657,6 +1675,28 @@ InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) {
GlobalVariable *
InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name,
GlobalValue::LinkageTypes Linkage) {
if (M.getTargetTriple().isSPIR()) {
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
auto &Ctx = M.getContext();
GlobalVariable *GV;
auto *PtrTy = PointerType::get(Ctx, 1);
auto *IntTy = Type::getInt64Ty(Ctx);
auto *StructTy = StructType::get(Ctx, {PtrTy, IntTy});
GV = new GlobalVariable(M, StructTy, false, Linkage,
Constant::getNullValue(StructTy), Name);
const std::uint64_t FnHash = IndexedInstrProf::ComputeHash(
getPGOFuncNameVarInitializer(Inc->getName()));
const std::string FnName = [&] {
auto *Arr = cast<ConstantDataArray>(Inc->getName()->getInitializer());
StringRef NameStr =
Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
return std::string{"__profc_"} + std::to_string(FnHash);
}();
GV->addAttribute("sycl-unique-id", FnName);
GV->addAttribute("sycl-device-global-size", Twine(NumCounters * 8).str());
return GV;
}

uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
auto &Ctx = M.getContext();
GlobalVariable *GV;
Expand Down
3 changes: 3 additions & 0 deletions sycl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -389,13 +389,16 @@ add_custom_target(sycl-compiler
clang-offload-extract
clang-offload-packager
clang-linker-wrapper
compiler-rt
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do other projects enable profiling unconditionally in the build too?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the only remaining question from me, the rest is 🔥 👍

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know of any projects that enable profiling unconditionally. This change enables support for coverage instrumentation in our build but does not enable the instrumentation itself. I think it makes sense to do this unconditionally so that the related functionality in the SYCL runtime can be tested as part of our regular end-to-end testing process.

I'd like to add a build flag for building the SYCL runtime with instrumentation enabled. This flag would allow us to produce coverage reports and identify areas in the runtime that are not covered by tests. I plan to do this in a separate PR and make the instrumentation disabled by default.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it makes sense to do this unconditionally so that the related functionality in the SYCL runtime can be tested as part of our regular end-to-end testing process.

I'd argue that that would be a reason to enable it in configure.py under --ci-defaults, but I'd still think that this unconditional dependency here is too much.

That said, if others are ok, I'm not going to block PR on this.

file-table-tform
llc
llvm-ar
llvm-foreach
llvm-spirv
llvm-link
llvm-objcopy
llvm-profdata
llvm-cov
spirv-to-ir-wrapper
sycl-post-link
opencl-aot
Expand Down
5 changes: 5 additions & 0 deletions sycl/source/detail/context_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,11 @@ context_impl::~context_impl() {
if (DGEntry != nullptr)
DGEntry->removeAssociatedResources(this);
}
// Free all profile counter USM allocations associated with this context.
for (DeviceGlobalMapEntry *DGEntry :
detail::ProgramManager::getInstance()
.getProfileCounterDeviceGlobalEntries(this))
DGEntry->cleanupProfileCounter(this);
MCachedLibPrograms.clear();
// TODO catch an exception and put it to list of asynchronous exceptions
getAdapter().call_nocheck<UrApiKind::urContextRelease>(MContext);
Expand Down
15 changes: 14 additions & 1 deletion sycl/source/detail/device_global_map.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,10 @@ class DeviceGlobalMap {
// cannot be set until registration happens.
auto EntryUPtr = std::make_unique<DeviceGlobalMapEntry>(
DeviceGlobal->Name, Img, TypeSize, DeviceImageScopeDecorated);
MDeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr));
auto NewEntry =
MDeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr));
if (NewEntry.first->second->isProfileCounter())
MProfileCounterDeviceGlobals.push_back(NewEntry.first->second.get());
}
}
}
Expand Down Expand Up @@ -114,6 +117,8 @@ class DeviceGlobalMap {
auto EntryUPtr =
std::make_unique<DeviceGlobalMapEntry>(UniqueId, DeviceGlobalPtr);
auto NewEntry = MDeviceGlobals.emplace(UniqueId, std::move(EntryUPtr));
if (NewEntry.first->second->isProfileCounter())
MProfileCounterDeviceGlobals.push_back(NewEntry.first->second.get());
MPtr2DeviceGlobal.insert({DeviceGlobalPtr, NewEntry.first->second.get()});
}

Expand Down Expand Up @@ -154,6 +159,11 @@ class DeviceGlobalMap {
}
}

std::vector<DeviceGlobalMapEntry *> getProfileCounterEntries() {
std::lock_guard<std::mutex> DeviceGlobalsGuard(MDeviceGlobalsMutex);
return MProfileCounterDeviceGlobals;
}

const std::unordered_map<const void *, DeviceGlobalMapEntry *>
getPointerMap() const {
return MPtr2DeviceGlobal;
Expand All @@ -177,6 +187,9 @@ class DeviceGlobalMap {
MDeviceGlobals;
std::unordered_map<const void *, DeviceGlobalMapEntry *> MPtr2DeviceGlobal;

// List of profile counter device globals.
std::vector<DeviceGlobalMapEntry *> MProfileCounterDeviceGlobals;

/// Protects MDeviceGlobals and MPtr2DeviceGlobal.
std::mutex MDeviceGlobalsMutex;
};
Expand Down
63 changes: 61 additions & 2 deletions sycl/source/detail/device_global_map_entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,63 @@ OwnedUrEvent DeviceGlobalUSMMem::getInitEvent(adapter_impl &Adapter) {
}
}

bool DeviceGlobalMapEntry::isAvailableInContext(const context_impl *CtxImpl) {
std::lock_guard<std::mutex> Lock{MDeviceToUSMPtrMapMutex};
for (const auto &It : MDeviceToUSMPtrMap)
if (It.first.second == CtxImpl)
return true;
return false;
}

bool DeviceGlobalMapEntry::isProfileCounter() {
const std::string CounterPrefix = "__profc_";
return MUniqueId.substr(0, CounterPrefix.size()) == CounterPrefix;
}

extern "C" void __attribute__((weak))
__sycl_increment_profile_counters(std::uint64_t FnHash, std::size_t NumCounters,
const std::uint64_t *Increments);

void DeviceGlobalMapEntry::cleanupProfileCounter(context_impl *CtxImpl) {
std::lock_guard<std::mutex> Lock{MDeviceToUSMPtrMapMutex};
const std::size_t NumCounters = MDeviceGlobalTSize / sizeof(std::uint64_t);
const std::uint64_t FnHash = [&] {
const auto PrefixSize = std::string{"__profc_"}.size();
constexpr int DecimalBase = 10;
return std::strtoull(MUniqueId.substr(PrefixSize).c_str(), nullptr,
DecimalBase);
}();
for (device_impl &Device : CtxImpl->getDevices()) {
auto USMPtrIt = MDeviceToUSMPtrMap.find({&Device, CtxImpl});
if (USMPtrIt != MDeviceToUSMPtrMap.end()) {
DeviceGlobalUSMMem &USMMem = USMPtrIt->second;

// Get the increments from the USM pointer
std::vector<std::uint64_t> Increments(NumCounters);
const std::uint64_t *Counters = static_cast<std::uint64_t *>(USMMem.MPtr);
for (std::size_t I = 0; I < NumCounters; ++I)
Increments[I] += Counters[I];

// Call the weak symbol to update the profile counters
if (__sycl_increment_profile_counters) {
__sycl_increment_profile_counters(FnHash, Increments.size(),
Increments.data());
}

// Free the USM memory and release the event if it exists.
detail::usm::freeInternal(USMMem.MPtr, CtxImpl);
if (USMMem.MInitEvent != nullptr)
CtxImpl->getAdapter().call<UrApiKind::urEventRelease>(
USMMem.MInitEvent);

// Set to nullptr to avoid double free.
USMMem.MPtr = nullptr;
USMMem.MInitEvent = nullptr;
MDeviceToUSMPtrMap.erase(USMPtrIt);
}
}
}

DeviceGlobalUSMMem &
DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) {
assert(!MIsDeviceImageScopeDecorated &&
Expand All @@ -67,7 +124,8 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) {
return DGUSMPtr->second;

void *NewDGUSMPtr = detail::usm::alignedAllocInternal(
0, MDeviceGlobalTSize, &CtxImpl, &DevImpl, sycl::usm::alloc::device);
0, MDeviceGlobalTSize, &CtxImpl, &DevImpl,
isProfileCounter() ? sycl::usm::alloc::shared : sycl::usm::alloc::device);

auto NewAllocIt = MDeviceToUSMPtrMap.emplace(
std::piecewise_construct, std::forward_as_tuple(&DevImpl, &CtxImpl),
Expand Down Expand Up @@ -125,7 +183,8 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(const context &Context) {
return DGUSMPtr->second;

void *NewDGUSMPtr = detail::usm::alignedAllocInternal(
0, MDeviceGlobalTSize, &CtxImpl, &DevImpl, sycl::usm::alloc::device);
0, MDeviceGlobalTSize, &CtxImpl, &DevImpl,
isProfileCounter() ? sycl::usm::alloc::shared : sycl::usm::alloc::device);

auto NewAllocIt = MDeviceToUSMPtrMap.emplace(
std::piecewise_construct, std::forward_as_tuple(&DevImpl, &CtxImpl),
Expand Down
9 changes: 9 additions & 0 deletions sycl/source/detail/device_global_map_entry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,15 @@ struct DeviceGlobalMapEntry {
MIsDeviceImageScopeDecorated = IsDeviceImageScopeDecorated;
}

// Checks if the device_global is available in the given context.
bool isAvailableInContext(const context_impl *CtxImpl);

// Returns true if the device_global is a profile counter.
bool isProfileCounter();

// Cleans up a profile counter device global
void cleanupProfileCounter(context_impl *CtxImpl);

// Gets or allocates USM memory for a device_global.
DeviceGlobalUSMMem &getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl);

Expand Down
13 changes: 13 additions & 0 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2466,6 +2466,19 @@ std::vector<DeviceGlobalMapEntry *> ProgramManager::getDeviceGlobalEntries(
return FoundEntries;
}

std::vector<DeviceGlobalMapEntry *>
ProgramManager::getProfileCounterDeviceGlobalEntries(
const context_impl *CtxImpl) {
std::vector<DeviceGlobalMapEntry *> ProfileCounters =
ProgramManager::getInstance().m_DeviceGlobals.getProfileCounterEntries();
std::vector<DeviceGlobalMapEntry *> FoundEntries;
for (const auto &DGEntry : ProfileCounters) {
if (DGEntry->isAvailableInContext(CtxImpl))
FoundEntries.push_back(DGEntry);
}
return FoundEntries;
}

void ProgramManager::addOrInitHostPipeEntry(const void *HostPipePtr,
const char *UniqueId) {
std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
Expand Down
5 changes: 5 additions & 0 deletions sycl/source/detail/program_manager/program_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,11 @@ class ProgramManager {
std::vector<DeviceGlobalMapEntry *>
getDeviceGlobalEntries(const std::vector<std::string> &UniqueIds,
bool ExcludeDeviceImageScopeDecorated = false);

// The function gets all device_global entries that are profile counters.
std::vector<DeviceGlobalMapEntry *>
getProfileCounterDeviceGlobalEntries(const context_impl *CtxImpl);

// The function inserts or initializes a host_pipe entry into the
// host_pipe map.
void addOrInitHostPipeEntry(const void *HostPipePtr, const char *UniqueId);
Expand Down
60 changes: 60 additions & 0 deletions sycl/test-e2e/Basic/device_code_coverage.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// RUN: %{build} -fprofile-instr-generate -fcoverage-mapping -o %t.out
// RUN: %{run} LLVM_PROFILE_FILE=%t.profraw %t.out
// RUN: llvm-profdata merge %t.profraw -o %t.profdata
// RUN: llvm-cov show -instr-profile=%t.profdata %t.out -name="main" | FileCheck %s

#include <sycl/sycl.hpp>

int main() {
sycl::queue q;
int *values = sycl::malloc_shared<int>(10, q);
q.submit([&](sycl::handler &h) {
h.parallel_for(sycl::range<1>(10), [=](sycl::id<1> idx) {
if (idx[0] < 8)
values[idx] = 42;
else
values[idx] = 7;
});
}).wait();
for (int i = 0; i < 10; i++)
assert(values[i] == (i < 8 ? 42 : 7));
sycl::free(values, q);
return 0;
}

// UNSUPPORTED: opencl && gpu
// UNSUPPORTED-TRACKER: GSD-4287

// CHECK: main:
// CHECK: 8| 1|int main() {
// CHECK: 9| 1| sycl::queue q;
// CHECK: 10| 1| int *values = sycl::malloc_shared<int>(10, q);
// CHECK: 11| 1| q.submit([&](sycl::handler &h) {
// CHECK: 12| 1| h.parallel_for(sycl::range<1>(10), [=](sycl::id<1> idx) {
// CHECK: 13| 1| if (idx[0] < 8)
// CHECK: 14| 1| values[idx] = 42;
// CHECK: 15| 1| else
// CHECK: 16| 1| values[idx] = 7;
// CHECK: 17| 1| });
// CHECK: 18| 1| }).wait();
// CHECK: 19| 11| for (int i = 0; i < 10; i++)
// CHECK: 20| 10| assert(values[i] == (i < 8 ? 42 : 7));
// CHECK: 21| 1| sycl::free(values, q);
// CHECK: 22| 1| return 0;
// CHECK: 23| 1|}
// CHECK: device_code_coverage.cpp:_ZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_:
// CHECK: 11| 1| q.submit([&](sycl::handler &h) {
// CHECK: 12| 1| h.parallel_for(sycl::range<1>(10), [=](sycl::id<1> idx) {
// CHECK: 13| 1| if (idx[0] < 8)
// CHECK: 14| 1| values[idx] = 42;
// CHECK: 15| 1| else
// CHECK: 16| 1| values[idx] = 7;
// CHECK: 17| 1| });
// CHECK: 18| 1| }).wait();
// CHECK: device_code_coverage.cpp:_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlNS0_2idILi1EEEE_clES5_:
// CHECK: 12| 10| h.parallel_for(sycl::range<1>(10), [=](sycl::id<1> idx) {
// CHECK: 13| 10| if (idx[0] < 8)
// CHECK: 14| 8| values[idx] = 42;
// CHECK: 15| 2| else
// CHECK: 16| 2| values[idx] = 7;
// CHECK: 17| 10| });
Loading