Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 59 additions & 1 deletion sycl/source/detail/kernel_program_cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#pragma once

#include "sycl/exception.hpp"
#include <detail/config.hpp>
#include <detail/kernel_arg_mask.hpp>
#include <detail/platform_impl.hpp>
#include <sycl/detail/common.hpp>
Expand All @@ -19,8 +20,10 @@

#include <atomic>
#include <condition_variable>
#include <iomanip>
#include <mutex>
#include <set>
#include <thread>
#include <type_traits>

#include <boost/unordered/unordered_flat_map.hpp>
Expand Down Expand Up @@ -176,6 +179,49 @@ class KernelProgramCache {

void setContextPtr(const ContextPtr &AContext) { MParentContext = AContext; }

/* Sends message to std:cerr stream when SYCL_CACHE_TRACE environemnt is
* set.*/
static inline void traceProgram(const std::string &Msg,
const ProgramCacheKeyT &CacheKey) {
static const bool traceEnabled =
SYCLConfig<SYCL_CACHE_TRACE>::isTraceInMemCache();
if (traceEnabled) {

int ImageId = CacheKey.first.second;
std::stringstream DeviceList;
for (const auto &Device : CacheKey.second)
DeviceList << "0x" << std::setbase(16)
<< reinterpret_cast<uintptr_t>(Device) << ",";

std::string Identifier = "[Key:{imageId = " + std::to_string(ImageId) +
",urDevice = " + DeviceList.str() + "}]: ";

// Get TID of current thread.
thread_local std::thread::id this_id = std::this_thread::get_id();
std::cerr << "[In-Memory Cache][Thread Id:" << this_id
<< "][Program Cache]" << Identifier << Msg << std::endl;
}
}

/* Sends message to std:cerr stream when SYCL_CACHE_TRACE environemnt is
* set.*/
static inline void traceKernel(const std::string &Msg,
const std::string &KernelName,
bool IsKernelFastCache = false) {
static const bool traceEnabled =
SYCLConfig<SYCL_CACHE_TRACE>::isTraceInMemCache();
if (traceEnabled) {
std::string Identifier =
"[IsFastCache: " + std::to_string(IsKernelFastCache) +
"][Key:{Name = " + KernelName + "}]: ";

// Get TID of current thread.
thread_local std::thread::id this_id = std::this_thread::get_id();
std::cerr << "[In-Memory Cache][Thread Id:" << this_id
<< "][Kernel Cache]" << Identifier << Msg << std::endl;
}
}

Locked<ProgramCache> acquireCachedPrograms() {
return {MCachedPrograms, MProgramCacheMutex};
}
Expand All @@ -195,7 +241,10 @@ class KernelProgramCache {
CommonProgramKeyT CommonKey =
std::make_pair(CacheKey.first.second, CacheKey.second);
ProgCache.KeyMap.emplace(CommonKey, CacheKey);
traceProgram("Program inserted.", CacheKey);
}
else
traceProgram("Program fetched.", CacheKey);
return std::make_pair(It->second, DidInsert);
}

Expand All @@ -217,7 +266,10 @@ class KernelProgramCache {
CommonProgramKeyT CommonKey =
std::make_pair(CacheKey.first.second, CacheKey.second);
ProgCache.KeyMap.emplace(CommonKey, CacheKey);
traceProgram("Program inserted.", CacheKey);
}
else
traceProgram("Program fetched.", CacheKey);
return DidInsert;
}

Expand All @@ -227,8 +279,12 @@ class KernelProgramCache {
auto LockedCache = acquireKernelsPerProgramCache();
auto &Cache = LockedCache.get()[Program];
auto [It, DidInsert] = Cache.try_emplace(KernelName, nullptr);
if (DidInsert)
if (DidInsert) {
It->second = std::make_shared<KernelBuildResult>(getAdapter());
traceKernel("Kernel inserted.", KernelName);
}
else
traceKernel("Kernel fetched.", KernelName);
return std::make_pair(It->second, DidInsert);
}

Expand All @@ -237,6 +293,7 @@ class KernelProgramCache {
std::unique_lock<std::mutex> Lock(MKernelFastCacheMutex);
auto It = MKernelFastCache.find(CacheKey);
if (It != MKernelFastCache.end()) {
traceKernel("Kernel fetched.", std::get<3>(CacheKey), true);
return It->second;
}
return std::make_tuple(nullptr, nullptr, nullptr, nullptr);
Expand All @@ -247,6 +304,7 @@ class KernelProgramCache {
std::unique_lock<std::mutex> Lock(MKernelFastCacheMutex);
// if no insertion took place, thus some other thread has already inserted
// smth in the cache
traceKernel("Kernel inserted.", std::get<3>(CacheKey), true);
MKernelFastCache.emplace(CacheKey, CacheVal);
}

Expand Down
61 changes: 61 additions & 0 deletions sycl/test-e2e/KernelAndProgram/trace_kernel_program_cache.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Tests tracing of in-memory kernel and program cache.

// RUN: %{build} -o %t.out

// There should be no tracing output when SYCL_CACHE_IN_MEM is not set
// or SYCL_CACHE_TRACE is set to 0.

// RUN: env SYCL_CACHE_IN_MEM=0 %{run} %t.out \
// RUN: | FileCheck --allow-empty --check-prefix=CHECK-NO-TRACE %s
// RUN: env SYCL_CACHE_TRACE=0 %{run} %t.out \
// RUN: | FileCheck --allow-empty --check-prefix=CHECK-NO-TRACE %s

// CHECK-NO-TRACE-NOT: [In-Memory Cache]{{.*}}

// RUN: env SYCL_CACHE_TRACE=2 %{run} %t.out 2> %t.trace
// RUN: FileCheck %s --input-file=%t.trace --check-prefix=CHECK-CACHE-TRACE

#include <sycl/detail/core.hpp>

#include <sycl/specialization_id.hpp>
#include <sycl/usm.hpp>

using namespace sycl;

constexpr specialization_id<int> spec_id;

int main() {
queue q;

// Check program insertion into cache and kernel insertion into fast and
// regular kernel cache.
// CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Program Cache][Key:{{.*}}]: Program inserted.
// CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Kernel Cache][IsFastCache: 0][Key:{Name = [[KERNELNAME1:.*]]]: Kernel inserted.
// CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Kernel Cache][IsFastCache: 1][Key:{Name = [[KERNELNAME1]]]: Kernel inserted.

// In the 2nd and 3rd invocation of this loop, the kernel should be fetched
// from fast kernel cache.
// CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Kernel Cache][IsFastCache: 1][Key:{Name = [[KERNELNAME1]]]: Kernel fetched.
// CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Kernel Cache][IsFastCache: 1][Key:{Name = [[KERNELNAME1]]]: Kernel fetched.
for (int i = 0; i < 3; i++)
q.single_task([] {}).wait();

auto *p = malloc_device<int>(1, q);

// Check program and kernel insertion into cache. There should be different
// programs for different iterations of this loop, because of the different
// specialization constants.
// CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Program Cache][Key:{{.*}}]: Program inserted.
// CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Kernel Cache][IsFastCache: 0][Key:{Name = [[KERNELNAME2:.*]]]: Kernel inserted.
// CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Program Cache][Key:{{.*}}]: Program inserted.
// CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Kernel Cache][IsFastCache: 0][Key:{Name = [[KERNELNAME2]]]: Kernel inserted.
for (int i = 0; i < 2; ++i)
q.submit([&](handler &cgh) {
cgh.set_specialization_constant<spec_id>(i);
cgh.parallel_for(1, [=](auto, kernel_handler kh) {
*p = kh.get_specialization_constant<spec_id>();
});
}).wait();

free(p, q);
}
Loading