Skip to content

Commit 3717903

Browse files
[SYCL] Refactor kernel name based data approach (#19117)
- Rename `KernelNameBasedCacheT` to `DeviceKernelInfo` as that reflects its usage better and avoids the confusion with kernel caches. - Make the data presence unconditional by looking it up at runtime as a fallback. This consolidates the if branches into one and saves us a couple of map lookups with old applications. - Add compile time kernel information to the struct (unused for now). - Switch to eager initialization of the runtime data members. - Add cleanup of data instances when unloading a library. --------- Co-authored-by: Andrei Elovikov <[email protected]>
1 parent 1d3074d commit 3717903

34 files changed

+562
-280
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
//==------------------- compile_time_kernel_info.hpp -----------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
#pragma once
9+
10+
#include <sycl/detail/kernel_desc.hpp>
11+
#include <sycl/detail/string_view.hpp>
12+
13+
namespace sycl {
14+
inline namespace _V1 {
15+
namespace detail {
16+
inline namespace compile_time_kernel_info_v1 {
17+
18+
// This is being passed across ABI boundary, so we don't use std::string_view,
19+
// at least for as long as we support user apps built with GNU libstdc++'s
20+
// pre-C++11 ABI.
21+
struct CompileTimeKernelInfoTy {
22+
detail::string_view Name;
23+
unsigned NumParams = 0;
24+
bool IsESIMD = false;
25+
detail::string_view FileName{};
26+
detail::string_view FunctionName{};
27+
unsigned LineNumber = 0;
28+
unsigned ColumnNumber = 0;
29+
int64_t KernelSize = 0;
30+
using ParamDescGetterT = kernel_param_desc_t (*)(int);
31+
ParamDescGetterT ParamDescGetter = nullptr;
32+
bool HasSpecialCaptures = true;
33+
};
34+
35+
template <class Kernel>
36+
inline constexpr CompileTimeKernelInfoTy CompileTimeKernelInfo{
37+
std::string_view(getKernelName<Kernel>()),
38+
getKernelNumParams<Kernel>(),
39+
isKernelESIMD<Kernel>(),
40+
std::string_view(getKernelFileName<Kernel>()),
41+
std::string_view(getKernelFunctionName<Kernel>()),
42+
getKernelLineNumber<Kernel>(),
43+
getKernelColumnNumber<Kernel>(),
44+
getKernelSize<Kernel>(),
45+
&getKernelParamDesc<Kernel>,
46+
hasSpecialCaptures<Kernel>()};
47+
48+
} // namespace compile_time_kernel_info_v1
49+
} // namespace detail
50+
} // namespace _V1
51+
} // namespace sycl
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
//==--------------------- get_device_kernel_info.hpp -----------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
#pragma once
9+
10+
#include <sycl/detail/compile_time_kernel_info.hpp>
11+
#include <sycl/detail/kernel_desc.hpp>
12+
13+
namespace sycl {
14+
inline namespace _V1 {
15+
namespace detail {
16+
17+
class DeviceKernelInfo;
18+
// Lifetime of the underlying `DeviceKernelInfo` is tied to the availability of
19+
// the `sycl_device_binaries` corresponding to this kernel. In other words, once
20+
// user library is unloaded (see __sycl_unregister_lib), program manager destoys
21+
// this `DeviceKernelInfo` object and the reference returned from here becomes
22+
// stale.
23+
__SYCL_EXPORT DeviceKernelInfo &
24+
getDeviceKernelInfo(const CompileTimeKernelInfoTy &);
25+
26+
template <class Kernel> DeviceKernelInfo &getDeviceKernelInfo() {
27+
static DeviceKernelInfo &Info =
28+
getDeviceKernelInfo(CompileTimeKernelInfo<Kernel>);
29+
return Info;
30+
}
31+
32+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
33+
struct KernelNameBasedCacheT;
34+
__SYCL_EXPORT KernelNameBasedCacheT *createKernelNameBasedCache();
35+
#endif
36+
37+
} // namespace detail
38+
} // namespace _V1
39+
} // namespace sycl

sycl/include/sycl/detail/kernel_desc.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,6 @@ template <typename KernelNameType> constexpr bool hasSpecialCaptures() {
277277
}
278278
return FoundSpecialCapture;
279279
}
280-
281280
} // namespace detail
282281
} // namespace _V1
283282
} // namespace sycl

sycl/include/sycl/detail/kernel_name_based_cache.hpp

Lines changed: 0 additions & 29 deletions
This file was deleted.

sycl/include/sycl/handler.hpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@
1414
#include <sycl/detail/common.hpp>
1515
#include <sycl/detail/defines_elementary.hpp>
1616
#include <sycl/detail/export.hpp>
17+
#include <sycl/detail/get_device_kernel_info.hpp>
1718
#include <sycl/detail/id_queries_fit_in_int.hpp>
1819
#include <sycl/detail/impl_utils.hpp>
1920
#include <sycl/detail/kernel_desc.hpp>
2021
#include <sycl/detail/kernel_launch_helper.hpp>
21-
#include <sycl/detail/kernel_name_based_cache.hpp>
2222
#include <sycl/detail/kernel_name_str_t.hpp>
2323
#include <sycl/detail/reduction_forward.hpp>
2424
#include <sycl/detail/string.hpp>
@@ -863,14 +863,14 @@ class __SYCL_EXPORT handler {
863863
constexpr std::string_view KernelNameStr =
864864
detail::getKernelName<KernelName>();
865865
MKernelName = KernelNameStr;
866+
setDeviceKernelInfoPtr(&detail::getDeviceKernelInfo<KernelName>());
866867
} else {
867868
// In case w/o the integration header it is necessary to process
868869
// accessors from the list(which are associated with this handler) as
869870
// arguments. We must copy the associated accessors as they are checked
870871
// later during finalize.
871872
setArgsToAssociatedAccessors();
872873
}
873-
setKernelNameBasedCachePtr(detail::getKernelNameBasedCache<KernelName>());
874874

875875
// If the kernel lambda is callable with a kernel_handler argument, manifest
876876
// the associated kernel handler.
@@ -3685,8 +3685,11 @@ class __SYCL_EXPORT handler {
36853685
sycl::handler &h, size_t size,
36863686
const ext::oneapi::experimental::memory_pool &pool);
36873687

3688+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
36883689
void setKernelNameBasedCachePtr(
36893690
detail::KernelNameBasedCacheT *KernelNameBasedCachePtr);
3691+
#endif
3692+
void setDeviceKernelInfoPtr(detail::DeviceKernelInfo *DeviceKernelInfoPtr);
36903693

36913694
queue getQueue();
36923695

sycl/source/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,8 @@ set(SYCL_COMMON_SOURCES
288288
"detail/kernel_compiler/kernel_compiler_opencl.cpp"
289289
"detail/kernel_compiler/kernel_compiler_sycl.cpp"
290290
"detail/kernel_impl.cpp"
291-
"detail/kernel_name_based_cache.cpp"
291+
"detail/get_device_kernel_info.cpp"
292+
"detail/device_kernel_info.cpp"
292293
"detail/kernel_program_cache.cpp"
293294
"detail/memory_export.cpp"
294295
"detail/memory_manager.cpp"

sycl/source/detail/cg.hpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ class CGExecKernel : public CG {
254254
std::shared_ptr<detail::kernel_bundle_impl> MKernelBundle;
255255
std::vector<ArgDesc> MArgs;
256256
KernelNameStrT MKernelName;
257-
KernelNameBasedCacheT *MKernelNameBasedCachePtr;
257+
DeviceKernelInfo &MDeviceKernelInfo;
258258
std::vector<std::shared_ptr<detail::stream_impl>> MStreams;
259259
std::vector<std::shared_ptr<const void>> MAuxiliaryResources;
260260
/// Used to implement ext_oneapi_graph dynamic_command_group. Stores the list
@@ -269,8 +269,7 @@ class CGExecKernel : public CG {
269269
std::shared_ptr<detail::kernel_impl> SyclKernel,
270270
std::shared_ptr<detail::kernel_bundle_impl> KernelBundle,
271271
CG::StorageInitHelper CGData, std::vector<ArgDesc> Args,
272-
KernelNameStrT KernelName,
273-
KernelNameBasedCacheT *KernelNameBasedCachePtr,
272+
KernelNameStrT KernelName, DeviceKernelInfo &DeviceKernelInfo,
274273
std::vector<std::shared_ptr<detail::stream_impl>> Streams,
275274
std::vector<std::shared_ptr<const void>> AuxiliaryResources,
276275
CGType Type, ur_kernel_cache_config_t KernelCacheConfig,
@@ -279,8 +278,7 @@ class CGExecKernel : public CG {
279278
: CG(Type, std::move(CGData), std::move(loc)), MNDRDesc(NDRDesc),
280279
MHostKernel(std::move(HKernel)), MSyclKernel(std::move(SyclKernel)),
281280
MKernelBundle(std::move(KernelBundle)), MArgs(std::move(Args)),
282-
MKernelName(std::move(KernelName)),
283-
MKernelNameBasedCachePtr(KernelNameBasedCachePtr),
281+
MKernelName(std::move(KernelName)), MDeviceKernelInfo(DeviceKernelInfo),
284282
MStreams(std::move(Streams)),
285283
MAuxiliaryResources(std::move(AuxiliaryResources)),
286284
MAlternativeKernels{}, MKernelCacheConfig(std::move(KernelCacheConfig)),
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
//==---------------------- device_kernel_info.cpp ----------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
#include <detail/device_kernel_info.hpp>
9+
#include <detail/program_manager/program_manager.hpp>
10+
11+
namespace sycl {
12+
inline namespace _V1 {
13+
namespace detail {
14+
15+
DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info)
16+
: CompileTimeKernelInfoTy(Info)
17+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
18+
,
19+
Name(Info.Name.data())
20+
#endif
21+
{
22+
init(Name.data());
23+
}
24+
25+
void DeviceKernelInfo::init(KernelNameStrRefT KernelName) {
26+
auto &PM = detail::ProgramManager::getInstance();
27+
MUsesAssert = PM.kernelUsesAssert(KernelName);
28+
MImplicitLocalArgPos = PM.kernelImplicitLocalArgPos(KernelName);
29+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
30+
MInitialized.store(true);
31+
#endif
32+
}
33+
34+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
35+
void DeviceKernelInfo::initIfNeeded(KernelNameStrRefT KernelName) {
36+
if (!MInitialized.load())
37+
init(KernelName);
38+
}
39+
#endif
40+
41+
template <typename OtherTy>
42+
inline constexpr bool operator==(const CompileTimeKernelInfoTy &LHS,
43+
const OtherTy &RHS) {
44+
// TODO replace with std::tie(...) == std::tie(...) once there is
45+
// implicit conversion from detail to std string_view.
46+
return std::string_view{LHS.Name} == std::string_view{RHS.Name} &&
47+
LHS.NumParams == RHS.NumParams && LHS.IsESIMD == RHS.IsESIMD &&
48+
std::string_view{LHS.FileName} == std::string_view{RHS.FileName} &&
49+
std::string_view{LHS.FunctionName} ==
50+
std::string_view{RHS.FunctionName} &&
51+
LHS.LineNumber == RHS.LineNumber &&
52+
LHS.ColumnNumber == RHS.ColumnNumber &&
53+
LHS.KernelSize == RHS.KernelSize &&
54+
LHS.ParamDescGetter == RHS.ParamDescGetter &&
55+
LHS.HasSpecialCaptures == RHS.HasSpecialCaptures;
56+
}
57+
58+
void DeviceKernelInfo::setCompileTimeInfoIfNeeded(
59+
const CompileTimeKernelInfoTy &Info) {
60+
if (isCompileTimeInfoSet())
61+
CompileTimeKernelInfoTy::operator=(Info);
62+
assert(isCompileTimeInfoSet());
63+
assert(Info == *this);
64+
}
65+
66+
FastKernelSubcacheT &DeviceKernelInfo::getKernelSubcache() {
67+
assertInitialized();
68+
return MFastKernelSubcache;
69+
}
70+
bool DeviceKernelInfo::usesAssert() {
71+
assertInitialized();
72+
return MUsesAssert;
73+
}
74+
const std::optional<int> &DeviceKernelInfo::getImplicitLocalArgPos() {
75+
assertInitialized();
76+
return MImplicitLocalArgPos;
77+
}
78+
79+
bool DeviceKernelInfo::isCompileTimeInfoSet() const { return KernelSize != 0; }
80+
81+
void DeviceKernelInfo::assertInitialized() {
82+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
83+
assert(MInitialized.load() && "Data needs to be initialized before use");
84+
#endif
85+
}
86+
87+
} // namespace detail
88+
} // namespace _V1
89+
} // namespace sycl

sycl/source/detail/kernel_name_based_cache_t.hpp renamed to sycl/source/detail/device_kernel_info.hpp

Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//==-------------------- kernel_name_based_cache_t.hpp ---------------------==//
1+
//==---------------------- device_kernel_info.hpp ----------------------==//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
@@ -10,6 +10,8 @@
1010
#include <detail/hashers.hpp>
1111
#include <detail/kernel_arg_mask.hpp>
1212
#include <emhash/hash_table8.hpp>
13+
#include <sycl/detail/compile_time_kernel_info.hpp>
14+
#include <sycl/detail/kernel_name_str_t.hpp>
1315
#include <sycl/detail/spinlock.hpp>
1416
#include <sycl/detail/ur.hpp>
1517

@@ -23,9 +25,9 @@ using FastKernelCacheKeyT = std::pair<ur_device_handle_t, ur_context_handle_t>;
2325

2426
struct FastKernelCacheVal {
2527
Managed<ur_kernel_handle_t> MKernelHandle; /* UR kernel. */
26-
std::mutex *MMutex; /* Mutex guarding this kernel. When
27-
caching is disabled, the pointer is
28-
nullptr. */
28+
std::mutex *MMutex; /* Mutex guarding this kernel. When
29+
caching is disabled, the pointer is
30+
nullptr. */
2931
const KernelArgMask *MKernelArgMask; /* Eliminated kernel argument mask. */
3032
Managed<ur_program_handle_t> MProgramHandle; /* UR program handle
3133
corresponding to this kernel. */
@@ -71,18 +73,53 @@ struct FastKernelEntryT {
7173

7274
using FastKernelSubcacheEntriesT = std::vector<FastKernelEntryT>;
7375

76+
// Structure for caching built kernels with a specific name.
77+
// Used by instances of the kernel program cache class (potentially multiple).
7478
struct FastKernelSubcacheT {
7579
FastKernelSubcacheEntriesT Entries;
7680
FastKernelSubcacheMutexT Mutex;
7781
};
7882

79-
struct KernelNameBasedCacheT {
80-
FastKernelSubcacheT FastKernelSubcache;
81-
std::optional<bool> UsesAssert;
82-
// Implicit local argument position is represented by an optional int, this
83-
// uses another optional on top of that to represent lazy initialization of
84-
// the cached value.
85-
std::optional<std::optional<int>> ImplicitLocalArgPos;
83+
// This class aggregates information specific to device kernels (i.e.
84+
// information that is uniform between different submissions of the same
85+
// kernel). Pointers to instances of this class are stored in header function
86+
// templates as a static variable to avoid repeated runtime lookup overhead.
87+
// TODO Currently this class duplicates information fetched from the program
88+
// manager. Instead, we should merge all of this information
89+
// into this structure and get rid of the other KernelName -> * maps.
90+
class DeviceKernelInfo : public CompileTimeKernelInfoTy {
91+
public:
92+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
93+
// Needs to own the kernel name string in non-preview builds since we pass it
94+
// using a temporary string instead of a string view there.
95+
std::string Name;
96+
#endif
97+
98+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
99+
DeviceKernelInfo() = default;
100+
#endif
101+
DeviceKernelInfo(const CompileTimeKernelInfoTy &Info);
102+
103+
void init(KernelNameStrRefT KernelName);
104+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
105+
void initIfNeeded(KernelNameStrRefT KernelName);
106+
#endif
107+
void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info);
108+
109+
FastKernelSubcacheT &getKernelSubcache();
110+
bool usesAssert();
111+
const std::optional<int> &getImplicitLocalArgPos();
112+
113+
private:
114+
void assertInitialized();
115+
bool isCompileTimeInfoSet() const;
116+
117+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
118+
std::atomic<bool> MInitialized = false;
119+
#endif
120+
FastKernelSubcacheT MFastKernelSubcache;
121+
bool MUsesAssert;
122+
std::optional<int> MImplicitLocalArgPos;
86123
};
87124

88125
} // namespace detail

0 commit comments

Comments
 (0)