Skip to content

Commit b85a646

Browse files
[SYCL] Get rid of device kernel info duplication
With the introduction of DeviceKernelInfo, assert usage and implicit local argument information is now duplicated in program manager. This patch removes the duplicate maps and makes it so that device kernel info map is filled out during image registration, with the compile time information added when it's available (during the first submission of the kernel).
1 parent d782af3 commit b85a646

File tree

8 files changed

+64
-93
lines changed

8 files changed

+64
-93
lines changed

sycl/source/detail/device_kernel_info.cpp

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,10 @@ DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info)
1919
Name(Info.Name.data())
2020
#endif
2121
{
22-
init(Name.data());
23-
}
24-
25-
void DeviceKernelInfo::init(KernelNameStrRefT KernelName) {
26-
auto &PM = detail::ProgramManager::getInstance();
27-
MUsesAssert = PM.kernelUsesAssert(KernelName);
28-
MImplicitLocalArgPos = PM.kernelImplicitLocalArgPos(KernelName);
2922
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
23+
// Non-legacy implementation either fills out the data during image
24+
// registration after this constructor is called, or uses default values
25+
// if this instance of DeviceKernelInfo corresponds to an interop kernel.
3026
MInitialized.store(true);
3127
#endif
3228
}
@@ -36,9 +32,18 @@ void DeviceKernelInfo::initIfEmpty(const CompileTimeKernelInfoTy &Info) {
3632
if (MInitialized.load())
3733
return;
3834

39-
CompileTimeKernelInfoTy::operator=(Info);
40-
Name = Info.Name.data();
41-
init(Name.data());
35+
// If this function is called, then this is a default initialized
36+
// device kernel info created from older headers and stored in global handler.
37+
// In that case, fetch the proper instance from program manager and copy its
38+
// values.
39+
auto &PM = detail::ProgramManager::getInstance();
40+
DeviceKernelInfo &PMDeviceKernelInfo = PM.getDeviceKernelInfo(KernelNameStrRefT(Info.Name));
41+
42+
PMDeviceKernelInfo.CompileTimeKernelInfoTy::operator=(Info);
43+
PMDeviceKernelInfo.Name = Info.Name.data();
44+
45+
MUsesAssert = PMDeviceKernelInfo.MUsesAssert;
46+
MImplicitLocalArgPos = PMDeviceKernelInfo.MImplicitLocalArgPos;
4247
}
4348
#endif
4449

@@ -78,18 +83,25 @@ FastKernelSubcacheT &DeviceKernelInfo::getKernelSubcache() {
7883
assertInitialized();
7984
return MFastKernelSubcache;
8085
}
81-
bool DeviceKernelInfo::usesAssert() {
86+
bool DeviceKernelInfo::usesAssert() const {
8287
assertInitialized();
8388
return MUsesAssert;
8489
}
85-
const std::optional<int> &DeviceKernelInfo::getImplicitLocalArgPos() {
90+
const std::optional<int> &DeviceKernelInfo::getImplicitLocalArgPos() const {
8691
assertInitialized();
8792
return MImplicitLocalArgPos;
8893
}
8994

95+
void DeviceKernelInfo::setUsesAssert() { MUsesAssert = true; }
96+
97+
void DeviceKernelInfo::setImplicitLocalArgPos(int Pos) {
98+
assert(!MImplicitLocalArgPos.has_value() || MImplicitLocalArgPos == Pos);
99+
MImplicitLocalArgPos = Pos;
100+
}
101+
90102
bool DeviceKernelInfo::isCompileTimeInfoSet() const { return KernelSize != 0; }
91103

92-
void DeviceKernelInfo::assertInitialized() {
104+
void DeviceKernelInfo::assertInitialized() const {
93105
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
94106
assert(MInitialized.load() && "Data needs to be initialized before use");
95107
#endif

sycl/source/detail/device_kernel_info.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,14 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy {
108108
void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info);
109109

110110
FastKernelSubcacheT &getKernelSubcache();
111-
bool usesAssert();
112-
const std::optional<int> &getImplicitLocalArgPos();
111+
bool usesAssert() const;
112+
const std::optional<int> &getImplicitLocalArgPos() const;
113+
114+
void setUsesAssert();
115+
void setImplicitLocalArgPos(int Pos);
113116

114117
private:
115-
void assertInitialized();
118+
void assertInitialized() const;
116119
bool isCompileTimeInfoSet() const;
117120

118121
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES

sycl/source/detail/get_device_kernel_info.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ KernelNameBasedCacheT *createKernelNameBasedCache() {
2222
#endif
2323

2424
DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) {
25-
return ProgramManager::getInstance().getOrCreateDeviceKernelInfo(Info);
25+
return ProgramManager::getInstance().getDeviceKernelInfo(Info);
2626
}
2727

2828
} // namespace detail

sycl/source/detail/kernel_impl.hpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -240,10 +240,9 @@ class kernel_impl {
240240
std::string_view getName() const;
241241

242242
DeviceKernelInfo &getDeviceKernelInfo() {
243-
return MIsInterop
244-
? MInteropDeviceKernelInfo
245-
: ProgramManager::getInstance().getOrCreateDeviceKernelInfo(
246-
KernelNameStrT(getName()));
243+
return MIsInterop ? MInteropDeviceKernelInfo
244+
: ProgramManager::getInstance().getDeviceKernelInfo(
245+
KernelNameStrT(getName()));
247246
}
248247

249248
private:

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1797,8 +1797,11 @@ void ProgramManager::cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img) {
17971797
const RTDeviceBinaryImage::PropertyRange &AssertUsedRange =
17981798
Img.getAssertUsed();
17991799
if (AssertUsedRange.isAvailable())
1800-
for (const auto &Prop : AssertUsedRange)
1801-
m_KernelUsesAssert.insert(Prop->Name);
1800+
for (const auto &Prop : AssertUsedRange) {
1801+
auto It = m_DeviceKernelInfoMap.find(Prop->Name);
1802+
assert(It != m_DeviceKernelInfoMap.end());
1803+
It->second.setUsesAssert();
1804+
}
18021805
}
18031806

18041807
void ProgramManager::cacheKernelImplicitLocalArg(
@@ -1807,36 +1810,27 @@ void ProgramManager::cacheKernelImplicitLocalArg(
18071810
Img.getImplicitLocalArg();
18081811
if (ImplicitLocalArgRange.isAvailable())
18091812
for (auto Prop : ImplicitLocalArgRange) {
1810-
m_KernelImplicitLocalArgPos[Prop->Name] =
1811-
DeviceBinaryProperty(Prop).asUint32();
1813+
auto It = m_DeviceKernelInfoMap.find(Prop->Name);
1814+
assert(It != m_DeviceKernelInfoMap.end());
1815+
It->second.setImplicitLocalArgPos(DeviceBinaryProperty(Prop).asUint32());
18121816
}
18131817
}
18141818

1815-
std::optional<int>
1816-
ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const {
1817-
auto it = m_KernelImplicitLocalArgPos.find(KernelName);
1818-
if (it != m_KernelImplicitLocalArgPos.end())
1819-
return it->second;
1820-
return {};
1821-
}
1822-
1823-
DeviceKernelInfo &ProgramManager::getOrCreateDeviceKernelInfo(
1824-
const CompileTimeKernelInfoTy &Info) {
1819+
DeviceKernelInfo &
1820+
ProgramManager::getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) {
18251821
std::lock_guard<std::mutex> Guard(m_DeviceKernelInfoMapMutex);
1826-
auto [Iter, Inserted] =
1827-
m_DeviceKernelInfoMap.try_emplace(KernelNameStrT{Info.Name.data()}, Info);
1828-
if (!Inserted)
1829-
Iter->second.setCompileTimeInfoIfNeeded(Info);
1830-
return Iter->second;
1822+
auto It = m_DeviceKernelInfoMap.find(KernelNameStrT{Info.Name.data()});
1823+
assert(It != m_DeviceKernelInfoMap.end());
1824+
It->second.setCompileTimeInfoIfNeeded(Info);
1825+
return It->second;
18311826
}
18321827

18331828
DeviceKernelInfo &
1834-
ProgramManager::getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName) {
1829+
ProgramManager::getDeviceKernelInfo(KernelNameStrRefT KernelName) {
18351830
std::lock_guard<std::mutex> Guard(m_DeviceKernelInfoMapMutex);
1836-
CompileTimeKernelInfoTy DefaultCompileTimeInfo{std::string_view(KernelName)};
1837-
auto Result =
1838-
m_DeviceKernelInfoMap.try_emplace(KernelName, DefaultCompileTimeInfo);
1839-
return Result.first->second;
1831+
auto It = m_DeviceKernelInfoMap.find(KernelName);
1832+
assert(It != m_DeviceKernelInfoMap.end());
1833+
return It->second;
18401834
}
18411835

18421836
static bool isBfloat16DeviceLibImage(sycl_device_binary RawImg,
@@ -2039,6 +2033,10 @@ void ProgramManager::addImage(sycl_device_binary RawImg,
20392033
m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get()));
20402034
KernelIDs->push_back(It->second);
20412035

2036+
CompileTimeKernelInfoTy DefaultCompileTimeInfo{std::string_view(name)};
2037+
m_DeviceKernelInfoMap.try_emplace(KernelNameStrT(name),
2038+
DefaultCompileTimeInfo);
2039+
20422040
// Keep track of image to kernel name reference count for cleanup.
20432041
m_KernelNameRefCount[name]++;
20442042
}
@@ -2232,8 +2230,6 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) {
22322230
if (--RefCount == 0) {
22332231
// TODO aggregate all these maps into a single one since their entries
22342232
// share lifetime.
2235-
m_KernelUsesAssert.erase(Name);
2236-
m_KernelImplicitLocalArgPos.erase(Name);
22372233
m_DeviceKernelInfoMap.erase(Name);
22382234
m_KernelNameRefCount.erase(RefCountIt);
22392235
if (Name2IDIt != m_KernelName2KernelIDs.end())

sycl/source/detail/program_manager/program_manager.hpp

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -365,19 +365,13 @@ class ProgramManager {
365365
ProgramManager();
366366
~ProgramManager() = default;
367367

368-
template <typename NameT>
369-
bool kernelUsesAssert(const NameT &KernelName) const {
370-
return m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end();
371-
}
372-
373368
SanitizerType kernelUsesSanitizer() const { return m_SanitizerFoundInImage; }
374369

375-
std::optional<int>
376-
kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const;
370+
void cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img);
371+
void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img);
377372

378-
DeviceKernelInfo &
379-
getOrCreateDeviceKernelInfo(const CompileTimeKernelInfoTy &Info);
380-
DeviceKernelInfo &getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName);
373+
DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info);
374+
DeviceKernelInfo &getDeviceKernelInfo(KernelNameStrRefT KernelName);
381375

382376
std::set<const RTDeviceBinaryImage *>
383377
getRawDeviceImages(const std::vector<kernel_id> &KernelIDs);
@@ -406,12 +400,6 @@ class ProgramManager {
406400
/// Dumps image to current directory
407401
void dumpImage(const RTDeviceBinaryImage &Img, uint32_t SequenceID = 0) const;
408402

409-
/// Add info on kernels using assert into cache
410-
void cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img);
411-
412-
/// Add info on kernels using local arg into cache
413-
void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img);
414-
415403
std::set<const RTDeviceBinaryImage *>
416404
collectDependentDeviceImagesForVirtualFunctions(
417405
const RTDeviceBinaryImage &Img, const device_impl &Dev);
@@ -518,14 +506,6 @@ class ProgramManager {
518506
bool m_UseSpvFile = false;
519507
RTDeviceBinaryImageUPtr m_SpvFileImage;
520508

521-
// std::less<> is a transparent comparator that enabled comparison between
522-
// different types without temporary key_type object creation. This includes
523-
// standard overloads, such as comparison between std::string and
524-
// std::string_view or just char*.
525-
using KernelUsesAssertSet = std::set<KernelNameStrT, std::less<>>;
526-
KernelUsesAssertSet m_KernelUsesAssert;
527-
std::unordered_map<KernelNameStrT, int> m_KernelImplicitLocalArgPos;
528-
529509
// Map for storing device kernel information. Runtime lookup should be avoided
530510
// by caching the pointers when possible.
531511
std::unordered_map<KernelNameStrT, DeviceKernelInfo> m_DeviceKernelInfoMap;

sycl/source/handler.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,7 @@ event handler::finalize() {
567567
// Fetch the device kernel info pointer if it hasn't been set (e.g.
568568
// in kernel bundle or free function cases).
569569
impl->MKernelData.setDeviceKernelInfoPtr(
570-
&detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo(
570+
&detail::ProgramManager::getInstance().getDeviceKernelInfo(
571571
toKernelNameStrT(MKernelName)));
572572
}
573573
assert(impl->MKernelData.getKernelName() == MKernelName);
@@ -974,7 +974,7 @@ void handler::extractArgsAndReqs() {
974974
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
975975
if (impl->MKernelData.getDeviceKernelInfoPtr() == nullptr) {
976976
impl->MKernelData.setDeviceKernelInfoPtr(
977-
&detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo(
977+
&detail::ProgramManager::getInstance().getDeviceKernelInfo(
978978
detail::toKernelNameStrT(MKernel->getName())));
979979
}
980980
#endif
@@ -2249,8 +2249,7 @@ void handler::setKernelNameBasedCachePtr(
22492249
HandlerInfo.IsESIMD = impl->MKernelIsESIMD;
22502250
HandlerInfo.HasSpecialCaptures = impl->MKernelHasSpecialCaptures;
22512251
impl->MKernelData.setDeviceKernelInfoPtr(
2252-
&detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo(
2253-
HandlerInfo));
2252+
&detail::ProgramManager::getInstance().getDeviceKernelInfo(HandlerInfo));
22542253
}
22552254

22562255
void handler::setKernelInfo(

sycl/unittests/program_manager/Cleanup.cpp

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,6 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager {
7373
return m_EliminatedKernelArgMasks;
7474
}
7575

76-
KernelUsesAssertSet &getKernelUsesAssert() { return m_KernelUsesAssert; }
77-
78-
std::unordered_map<sycl::detail::KernelNameStrT, int> &
79-
getKernelImplicitLocalArgPos() {
80-
return m_KernelImplicitLocalArgPos;
81-
}
82-
8376
std::unordered_map<std::string,
8477
std::unique_ptr<sycl::detail::HostPipeMapEntry>> &
8578
getHostPipes() {
@@ -311,11 +304,6 @@ void checkAllInvolvedContainers(ProgramManagerExposed &PM,
311304
"Kernel name reference count " + CommentPostfix);
312305
EXPECT_EQ(PM.getEliminatedKernelArgMask().size(), ExpectedImgCount)
313306
<< "Eliminated kernel arg mask " + CommentPostfix;
314-
checkContainer(PM.getKernelUsesAssert(), ExpectedEntryCount,
315-
generateRefNames(ImgIds, "Kernel"),
316-
"KernelUsesAssert " + CommentPostfix);
317-
EXPECT_EQ(PM.getKernelImplicitLocalArgPos().size(), ExpectedEntryCount)
318-
<< "Kernel implicit local arg pos " + CommentPostfix;
319307

320308
if (!MultipleImgsPerEntryTestCase) {
321309
// FIXME expected to fail for now, device globals cleanup seems to be
@@ -365,10 +353,6 @@ TEST(ImageRemoval, BaseContainers) {
365353
generateRefName("B", "HostPipe").c_str());
366354
PM.addOrInitHostPipeEntry(PipeC::get_host_ptr(),
367355
generateRefName("C", "HostPipe").c_str());
368-
std::vector<std::string> KernelNames =
369-
generateRefNames({"A", "B", "C"}, "Kernel");
370-
for (const std::string &Name : KernelNames)
371-
PM.getOrCreateDeviceKernelInfo(Name);
372356

373357
checkAllInvolvedContainers(PM, ImagesToRemove.size() + ImagesToKeep.size(),
374358
{"A", "B", "C"}, "check failed before removal");
@@ -392,8 +376,6 @@ TEST(ImageRemoval, MultipleImagesPerEntry) {
392376
convertAndAddImages(PM, ImagesToRemoveSameEntries, NativeImagesForRemoval,
393377
TestBinaries);
394378

395-
std::string KernelName = generateRefName("A", "Kernel");
396-
PM.getOrCreateDeviceKernelInfo(KernelName);
397379
checkAllInvolvedContainers(
398380
PM, ImagesToRemoveSameEntries.size() + ImagesToKeepSameEntries.size(),
399381
/*ExpectedEntryCount*/ 1, {"A"}, "check failed before removal",

0 commit comments

Comments
 (0)