From 862c1878617d8bd34f7417243d59a0124d4579e9 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Thu, 9 Jan 2025 04:12:29 +0000 Subject: [PATCH 01/24] [SYCL][RTC] Initial support for device globals Signed-off-by: Julian Oppermann --- .../lib/rtc/DeviceCompilation.cpp | 7 +- sycl/source/detail/jit_compiler.cpp | 7 +- sycl/source/detail/kernel_bundle_impl.hpp | 40 +++++++- .../kernel_compiler_sycl_jit.cpp | 94 ++++++++++++++++++- 4 files changed, 141 insertions(+), 7 deletions(-) diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index daa82d0932787..f7abb62762528 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -503,8 +503,9 @@ jit_compiler::performPostLink(std::unique_ptr Module, /*IROutputOnly=*/false, EmitOnlyKernelsAsEntryPoints); assert(Splitter->hasMoreSplits()); - // TODO: Call `verifyNoCrossModuleDeviceGlobalUsage` if device globals shall - // be processed. + if (auto Err = Splitter->verifyNoCrossModuleDeviceGlobalUsage()) { + return std::move(Err); + } // TODO: This allocation assumes that there are no further splits required, // i.e. there are no mixed SYCL/ESIMD modules. @@ -547,7 +548,7 @@ jit_compiler::performPostLink(std::unique_ptr Module, GlobalBinImageProps PropReq{ /*EmitKernelParamInfo=*/true, /*EmitProgramMetadata=*/true, /*EmitExportedSymbols=*/true, /*EmitImportedSymbols=*/true, - /*DeviceGlobals=*/false}; + /*DeviceGlobals=*/true}; PropertySetRegistry Properties = computeModuleProperties(MDesc.getModule(), MDesc.entries(), PropReq); // TODO: Manually add `compile_target` property as in diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index e95b3ab2e58b8..b6f013032fd32 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -1150,14 +1150,19 @@ sycl_device_binaries jit_compiler::createDeviceBinaryImage( } for (const auto &FPS : DevImgInfo.Properties) { + bool IsDeviceGlobalsPropSet = + FPS.Name == __SYCL_PROPERTY_SET_SYCL_DEVICE_GLOBALS; PropertySetContainer PropSet{FPS.Name.c_str()}; for (const auto &FPV : FPS.Values) { if (FPV.IsUIntValue) { PropSet.addProperty( PropertyContainer{FPV.Name.c_str(), FPV.UIntValue}); } else { + std::string PrefixedName = + (IsDeviceGlobalsPropSet ? OffloadEntryPrefix : "") + + FPV.Name.c_str(); PropSet.addProperty(PropertyContainer{ - FPV.Name.c_str(), FPV.Bytes.begin(), FPV.Bytes.size(), + PrefixedName.c_str(), FPV.Bytes.begin(), FPV.Bytes.size(), sycl_property_type::SYCL_PROPERTY_TYPE_BYTE_ARRAY}); } } diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index a8d0bf13f287d..764940d1e8352 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -518,8 +518,45 @@ class kernel_bundle_impl { } } - return std::make_shared( + // Create the executable bundle. + auto ExecBundle = std::make_shared( MContext, MDevices, KernelIDs, KernelNames, Prefix, Language); + + // Determine IDs of all device globals referenced by this bundle's + // kernels. These IDs are also prefixed. + std::set UniqueDeviceGlobalIDs; + std::vector DeviceGlobalIDs; + for (const auto &RawImg : PM.getRawDeviceImages(KernelIDs)) { + for (const auto &DeviceGlobalProp : RawImg->getDeviceGlobals()) { + auto [It, Ins] = UniqueDeviceGlobalIDs.insert(DeviceGlobalProp->Name); + if (Ins) { + DeviceGlobalIDs.push_back(*It); + } + } + } + + for (auto *DeviceGlobalEntry : + PM.getDeviceGlobalEntries(DeviceGlobalIDs)) { + // Device globals without `device_image_scope` are usually statically + // allocated and registered in the integration footer, which we don't + // have in the RTC context. Instead, we dynamically allocate storage + // tied to the executable kernel bundle. + if (!DeviceGlobalEntry->MIsDeviceImageScopeDecorated) { + auto Alloc = std::make_unique( + DeviceGlobalEntry->MDeviceGlobalTSize); + PM.addOrInitDeviceGlobalEntry(Alloc.get(), + DeviceGlobalEntry->MUniqueId.c_str()); + ExecBundle->DeviceGlobals.push_back(std::move(Alloc)); + } + + // Drop the RTC prefix from the entry's symbol name. Note that the PM + // still manages this device global under its prefixed name. + assert(DeviceGlobalEntry->MUniqueId.find(Prefix) == 0); + DeviceGlobalEntry->MUniqueId = + DeviceGlobalEntry->MUniqueId.substr(Prefix.length()); + } + + return ExecBundle; } ur_program_handle_t UrProgram = nullptr; @@ -960,6 +997,7 @@ class kernel_bundle_impl { std::vector KernelNames; std::string Prefix; include_pairs_t IncludePairs; + std::vector> DeviceGlobals; }; } // namespace detail diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp index d0240bc9b8964..a5d8b1a1a41e4 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp @@ -58,6 +58,29 @@ void ff_templated(T *ptr, T *unused) { } )==="; +auto constexpr DGSource = R"===( +#include + +namespace syclex = sycl::ext::oneapi::experimental; + +syclex::device_global DG; + +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (syclex::single_task_kernel)) void ff_dg_setter(int val) { + DG = val; +} + +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (syclex::single_task_kernel)) void ff_dg_adder(int val) { + DG = DG + val; +} + +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (syclex::single_task_kernel)) void ff_dg_getter(int *val) { + *val = DG; +} +)==="; + auto constexpr ESIMDSource = R"===( #include #include @@ -219,6 +242,73 @@ int test_build_and_run() { return 0; } +int test_device_global() { + namespace syclex = sycl::ext::oneapi::experimental; + using source_kb = sycl::kernel_bundle; + using exe_kb = sycl::kernel_bundle; + + sycl::queue q; + sycl::context ctx = q.get_context(); + + bool ok = + q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl_jit); + if (!ok) { + std::cout << "Apparently this device does not support `sycl_jit` source " + "kernel bundle extension: " + << q.get_device().get_info() + << std::endl; + return -1; + } + + auto modifyDG = [&q](sycl::kernel &k, int val) { + q.submit([&](sycl::handler &CGH) { + CGH.set_arg(0, val); + CGH.single_task(k); + }); + q.wait(); + }; + + auto getDG = [&q](sycl::kernel &k) -> int { + int *buf = sycl::malloc_shared(1, q); + q.submit([&](sycl::handler &CGH) { + CGH.set_arg(0, buf); + CGH.single_task(k); + }); + q.wait(); + int val = *buf; + sycl::free(buf, q); + return val; + }; + + source_kb kbSrc = syclex::create_kernel_bundle_from_source( + ctx, syclex::source_language::sycl_jit, DGSource); + + exe_kb kbExe1 = syclex::build(kbSrc); + + auto setK = kbExe1.ext_oneapi_get_kernel("ff_dg_setter"); + auto addK = kbExe1.ext_oneapi_get_kernel("ff_dg_adder"); + auto getK = kbExe1.ext_oneapi_get_kernel("ff_dg_getter"); + + assert(getDG(getK) == 0); + modifyDG(setK, 42); + assert(getDG(getK) == 42); + modifyDG(addK, 1); + assert(getDG(getK) == 43); + + exe_kb kbExe2 = syclex::build(kbSrc); + + auto setK2 = kbExe2.ext_oneapi_get_kernel("ff_dg_setter"); + auto getK2 = kbExe2.ext_oneapi_get_kernel("ff_dg_getter"); + + // `DG` is private per RTC bundle + assert(getDG(getK2) == 0); + modifyDG(setK2, -17); + assert(getDG(getK2) == -17); + assert(getDG(getK) == 43); + + return 0; +} + int test_esimd() { namespace syclex = sycl::ext::oneapi::experimental; using source_kb = sycl::kernel_bundle; @@ -390,8 +480,8 @@ int test_warning() { int main(int argc, char **) { #ifdef SYCL_EXT_ONEAPI_KERNEL_COMPILER int optional_tests = (argc > 1) ? test_warning() : 0; - return test_build_and_run() || test_esimd() || test_unsupported_options() || - test_error() || optional_tests; + return test_build_and_run() || test_device_global() || test_esimd() || + test_unsupported_options() || test_error() || optional_tests; #else static_assert(false, "Kernel Compiler feature test macro undefined"); #endif From e4ef41cff2d694c1157621755ed8e13751dd6571 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Fri, 10 Jan 2025 09:20:38 +0000 Subject: [PATCH 02/24] Add `ext_oneapi_has_device_global`. Signed-off-by: Julian Oppermann --- sycl/include/sycl/kernel_bundle.hpp | 19 +++++ sycl/source/detail/kernel_bundle_impl.hpp | 77 ++++++++++++------- sycl/source/kernel_bundle.cpp | 5 ++ .../kernel_compiler_sycl_jit.cpp | 5 ++ 4 files changed, 80 insertions(+), 26 deletions(-) diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index a61019efdbf5d..8898e48e4a47b 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -201,6 +201,11 @@ class __SYCL_EXPORT kernel_bundle_plain { return ext_oneapi_get_kernel(detail::string_view{name}); } + bool ext_oneapi_has_device_global(const std::string &name, + const device &dev) { + return ext_oneapi_has_device_global(detail::string_view{name}, dev); + } + protected: // \returns a kernel object which represents the kernel identified by // kernel_id passed @@ -229,6 +234,9 @@ class __SYCL_EXPORT kernel_bundle_plain { private: bool ext_oneapi_has_kernel(detail::string_view name); kernel ext_oneapi_get_kernel(detail::string_view name); + + bool ext_oneapi_has_device_global(detail::string_view name, + const device &dev); }; } // namespace detail @@ -449,6 +457,17 @@ class kernel_bundle : public detail::kernel_bundle_plain, return detail::kernel_bundle_plain::ext_oneapi_get_kernel(name); } + ///////////////////////// + // ext_oneapi_has_device_global + // only true if created from source and has this global for the given device + ///////////////////////// + template > + bool ext_oneapi_has_device_global(const std::string &name, + const device &dev) { + return detail::kernel_bundle_plain::ext_oneapi_has_device_global(name, dev); + } + private: kernel_bundle(detail::KernelBundleImplPtr Impl) : kernel_bundle_plain(std::move(Impl)) {} diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 764940d1e8352..af169f7048da8 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -380,8 +380,9 @@ class kernel_bundle_impl { // program manager integration, only for sycl_jit language kernel_bundle_impl(context Ctx, std::vector Devs, const std::vector &KernelIDs, - std::vector KNames, std::string Pfx, - syclex::source_language Lang) + const std::vector &KNames, + const std::vector &DGNames, + const std::string &Pfx, syclex::source_language Lang) : kernel_bundle_impl(Ctx, Devs, KernelIDs, bundle_state::executable) { assert(Lang == syclex::source_language::sycl_jit); // Mark this bundle explicitly as "interop" to ensure that its kernels are @@ -391,6 +392,7 @@ class kernel_bundle_impl { // from the (unprefixed) kernel name. MIsInterop = true; KernelNames = KNames; + DeviceGlobalNames = DGNames; Prefix = Pfx; Language = Lang; } @@ -509,51 +511,60 @@ class kernel_bundle_impl { // `jit_compiler::compileSYCL(..)` uses `CompilationID + '$'` as prefix // for offload entry names. std::string Prefix = CompilationID + '$'; + auto PrefixLen = Prefix.length(); for (const auto &KernelID : PM.getAllSYCLKernelIDs()) { std::string_view KernelName{KernelID.get_name()}; if (KernelName.find(Prefix) == 0) { KernelIDs.push_back(KernelID); - KernelName.remove_prefix(Prefix.length()); + KernelName.remove_prefix(PrefixLen); KernelNames.emplace_back(KernelName); } } - // Create the executable bundle. - auto ExecBundle = std::make_shared( - MContext, MDevices, KernelIDs, KernelNames, Prefix, Language); - // Determine IDs of all device globals referenced by this bundle's // kernels. These IDs are also prefixed. - std::set UniqueDeviceGlobalIDs; - std::vector DeviceGlobalIDs; + std::set DeviceGlobalIDSet; + std::vector DeviceGlobalIDVec; + std::vector DeviceGlobalNames; for (const auto &RawImg : PM.getRawDeviceImages(KernelIDs)) { for (const auto &DeviceGlobalProp : RawImg->getDeviceGlobals()) { - auto [It, Ins] = UniqueDeviceGlobalIDs.insert(DeviceGlobalProp->Name); + std::string_view DeviceGlobalName{DeviceGlobalProp->Name}; + assert(DeviceGlobalName.find(Prefix) == 0); + auto [It, Ins] = DeviceGlobalIDSet.emplace(DeviceGlobalName); if (Ins) { - DeviceGlobalIDs.push_back(*It); + DeviceGlobalIDVec.emplace_back(DeviceGlobalName); + DeviceGlobalName.remove_prefix(PrefixLen); + DeviceGlobalNames.emplace_back(DeviceGlobalName); } } } + // Create the executable bundle. + auto ExecBundle = std::make_shared( + MContext, MDevices, KernelIDs, KernelNames, DeviceGlobalNames, Prefix, + Language); + + // Device globals are usually statically allocated and registered in the + // integration footer, which we don't have in the RTC context. Instead, we + // dynamically allocate storage tied to the executable kernel bundle. for (auto *DeviceGlobalEntry : - PM.getDeviceGlobalEntries(DeviceGlobalIDs)) { - // Device globals without `device_image_scope` are usually statically - // allocated and registered in the integration footer, which we don't - // have in the RTC context. Instead, we dynamically allocate storage - // tied to the executable kernel bundle. + PM.getDeviceGlobalEntries(DeviceGlobalIDVec)) { + + size_t AllocSize = DeviceGlobalEntry->MDeviceGlobalTSize; // init value if (!DeviceGlobalEntry->MIsDeviceImageScopeDecorated) { - auto Alloc = std::make_unique( - DeviceGlobalEntry->MDeviceGlobalTSize); - PM.addOrInitDeviceGlobalEntry(Alloc.get(), - DeviceGlobalEntry->MUniqueId.c_str()); - ExecBundle->DeviceGlobals.push_back(std::move(Alloc)); + // USM pointer. TODO: it's actually a decorated multi_ptr. + AllocSize += sizeof(void *); } + auto Alloc = std::make_unique(AllocSize); + std::string_view DeviceGlobalName{DeviceGlobalEntry->MUniqueId}; + PM.addOrInitDeviceGlobalEntry(Alloc.get(), DeviceGlobalName.data()); + ExecBundle->DeviceGlobalAllocations.push_back(std::move(Alloc)); // Drop the RTC prefix from the entry's symbol name. Note that the PM // still manages this device global under its prefixed name. - assert(DeviceGlobalEntry->MUniqueId.find(Prefix) == 0); - DeviceGlobalEntry->MUniqueId = - DeviceGlobalEntry->MUniqueId.substr(Prefix.length()); + assert(DeviceGlobalName.find(Prefix) == 0); + DeviceGlobalName.remove_prefix(PrefixLen); + DeviceGlobalEntry->MUniqueId = DeviceGlobalName; } return ExecBundle; @@ -735,6 +746,18 @@ class kernel_bundle_impl { return detail::createSyclObjFromImpl(KernelImpl); } + std::string mangle_device_global_name(const std::string &Name) { + // TODO: Support device globals declared in namespaces. + return "_Z" + std::to_string(Name.length()) + Name; + } + + bool ext_oneapi_has_device_global(const std::string &Name, + [[maybe_unused]] const device &Dev) { + std::string MangledName = mangle_device_global_name(Name); + return std::find(DeviceGlobalNames.begin(), DeviceGlobalNames.end(), + MangledName) != DeviceGlobalNames.end(); + } + bool empty() const noexcept { return MDeviceImages.empty(); } backend get_backend() const noexcept { @@ -993,11 +1016,13 @@ class kernel_bundle_impl { // Language is for both state::source and state::executable. syclex::source_language Language = syclex::source_language::opencl; const std::variant> Source; - // only kernel_bundles created from source have KernelNames member. + // only kernel_bundles created from source have the following members. std::vector KernelNames; + std::vector DeviceGlobalNames; std::string Prefix; include_pairs_t IncludePairs; - std::vector> DeviceGlobals; + + std::vector> DeviceGlobalAllocations; }; } // namespace detail diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index e19c2b9df2a75..84150152cddee 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -124,6 +124,11 @@ kernel kernel_bundle_plain::ext_oneapi_get_kernel(detail::string_view name) { return impl->ext_oneapi_get_kernel(name.data(), impl); } +bool kernel_bundle_plain::ext_oneapi_has_device_global(detail::string_view name, + const device &dev) { + return impl->ext_oneapi_has_device_global(name.data(), dev); +} + ////////////////////////////////// ///// sycl::detail free functions ////////////////////////////////// diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp index a5d8b1a1a41e4..e19afc005db06 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp @@ -285,6 +285,11 @@ int test_device_global() { exe_kb kbExe1 = syclex::build(kbSrc); + // Check presence of device global. + assert(kbExe1.ext_oneapi_has_device_global("DG", q.get_device())); + // Querying a non-existing device global shall not crash. + assert(!kbExe1.ext_oneapi_has_device_global("bogus_DG", q.get_device())); + auto setK = kbExe1.ext_oneapi_get_kernel("ff_dg_setter"); auto addK = kbExe1.ext_oneapi_get_kernel("ff_dg_adder"); auto getK = kbExe1.ext_oneapi_get_kernel("ff_dg_getter"); From d45415dce958d40ce0b65d3b4d03edb136a09ac7 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Mon, 13 Jan 2025 08:11:12 +0000 Subject: [PATCH 03/24] Add get_address, get_size, and attempt to design a copy-API. Signed-off-by: Julian Oppermann --- sycl/include/sycl/kernel_bundle.hpp | 34 ++++++++ sycl/source/detail/kernel_bundle_impl.hpp | 83 +++++++++++++++++-- .../kernel_compiler/kernel_compiler_sycl.cpp | 37 +++++++++ .../kernel_compiler/kernel_compiler_sycl.hpp | 17 ++++ sycl/source/kernel_bundle.cpp | 24 ++++++ .../kernel_compiler_sycl_jit.cpp | 82 +++++++++--------- 6 files changed, 230 insertions(+), 47 deletions(-) diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index 8898e48e4a47b..472d577f20d2b 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -206,6 +206,30 @@ class __SYCL_EXPORT kernel_bundle_plain { return ext_oneapi_has_device_global(detail::string_view{name}, dev); } + void *ext_oneapi_get_device_global_address(const std::string &name, + const device &dev) { + return ext_oneapi_get_device_global_address(detail::string_view{name}, dev); + } + + size_t ext_oneapi_get_device_global_size(const std::string &name, + const device &dev) { + return ext_oneapi_get_device_global_size(detail::string_view{name}, dev); + } + + template + event ext_oneapi_copy_to_device_global(const std::string &dest, const T &src, + const queue &queue) { + return ext_oneapi_copy_to_device_global(detail::string_view{dest}, &src, + sizeof(T), queue); + } + + template + event ext_oneapi_copy_from_device_global(T &dest, const std::string &src, + const queue &queue) { + return ext_oneapi_copy_from_device_global(&dest, detail::string_view{src}, + sizeof(T), queue); + } + protected: // \returns a kernel object which represents the kernel identified by // kernel_id passed @@ -237,6 +261,16 @@ class __SYCL_EXPORT kernel_bundle_plain { bool ext_oneapi_has_device_global(detail::string_view name, const device &dev); + void *ext_oneapi_get_device_global_address(detail::string_view name, + const device &dev); + size_t ext_oneapi_get_device_global_size(detail::string_view name, + const device &dev); + event ext_oneapi_copy_to_device_global(detail::string_view dest, + const void *src, size_t num_bytes, + const queue &queue); + event ext_oneapi_copy_from_device_global(void *dest, detail::string_view src, + size_t num_bytes, + const queue &queue); }; } // namespace detail diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index af169f7048da8..555f5075d4d89 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -674,6 +674,8 @@ class kernel_bundle_impl { KernelNames, Language); } + // Utility methods for kernel_compiler functionality +private: std::string adjust_kernel_name(const std::string &Name, syclex::source_language Lang) { // Once name demangling support is in, we won't need this. @@ -685,6 +687,35 @@ class kernel_bundle_impl { return isMangled ? Name : "__sycl_kernel_" + Name; } + std::string mangle_device_global_name(const std::string &Name) { + // TODO: Support device globals declared in namespaces. + return "_Z" + std::to_string(Name.length()) + Name; + } + + const DeviceGlobalMapEntry *get_device_global_entry(const std::string &Name, + const device &Dev) { + if (Language != syclex::source_language::sycl_jit || Prefix.empty()) { + throw sycl::exception(make_error_code(errc::invalid), + "Querying device globals by name is only available " + "in kernel_bundles successfully built from " + "kernel_bundle " + "with 'sycl_jit' source language."); + } + + if (!ext_oneapi_has_device_global(Name, Dev)) { + throw sycl::exception(make_error_code(errc::invalid), + "device global '" + Name + + "' not found in kernel_bundle"); + } + + std::vector Entries = + ProgramManager::getInstance().getDeviceGlobalEntries( + {Prefix + mangle_device_global_name(Name)}); + assert(Entries.size() == 1); + return Entries.front(); + } + +public: bool ext_oneapi_has_kernel(const std::string &Name) { auto it = std::find(KernelNames.begin(), KernelNames.end(), adjust_kernel_name(Name, Language)); @@ -746,18 +777,58 @@ class kernel_bundle_impl { return detail::createSyclObjFromImpl(KernelImpl); } - std::string mangle_device_global_name(const std::string &Name) { - // TODO: Support device globals declared in namespaces. - return "_Z" + std::to_string(Name.length()) + Name; - } - bool ext_oneapi_has_device_global(const std::string &Name, - [[maybe_unused]] const device &Dev) { + const device &Dev) { + if (!std::any_of( + MDevices.begin(), MDevices.end(), + [&Dev](const device &DevCand) { return Dev == DevCand; })) { + // TODO: device_image::has_kernel(id, device) checks the device if the + // given device is a sub-device. + return false; + } + std::string MangledName = mangle_device_global_name(Name); return std::find(DeviceGlobalNames.begin(), DeviceGlobalNames.end(), MangledName) != DeviceGlobalNames.end(); } + void *ext_oneapi_get_device_global_address(const std::string &Name, + const device &Dev) { + return const_cast( + get_device_global_entry(Name, Dev)->MDeviceGlobalPtr); + } + + size_t ext_oneapi_get_device_global_size(const std::string &Name, + const device &Dev) { + return get_device_global_entry(Name, Dev)->MDeviceGlobalTSize; + } + + event ext_oneapi_copy_to_device_global(const std::string &Dest, + const void *Src, size_t NumBytes, + const queue &Queue) { + const auto *Entry = get_device_global_entry(Dest, Queue.get_device()); + if (NumBytes != Entry->MDeviceGlobalTSize) { + throw sycl::exception(make_error_code(errc::invalid), + "Incompatible type size for device global '" + + Dest + "'"); + } + return syclex::detail::SYCL_JIT_memcpy_to_device_global( + Entry, Src, NumBytes, /*Offset=*/0, Queue, /*DepEvents=*/{}); + } + + event ext_oneapi_copy_from_device_global(void *Dest, const std::string &Src, + size_t NumBytes, + const queue &Queue) { + const auto *Entry = get_device_global_entry(Src, Queue.get_device()); + if (NumBytes != Entry->MDeviceGlobalTSize) { + throw sycl::exception(make_error_code(errc::invalid), + "Incompatible type size for device global '" + Src + + "'"); + } + return syclex::detail::SYCL_JIT_memcpy_from_device_global( + Dest, Entry, NumBytes, /*Offset=*/0, Queue, /*DepEvents=*/{}); + } + bool empty() const noexcept { return MDeviceImages.empty(); } backend get_backend() const noexcept { diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp index 9108572bb5b1d..130f49f55a3d3 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp @@ -288,6 +288,8 @@ bool SYCL_Compilation_Available() { #if SYCL_EXT_JIT_ENABLE #include "../jit_compiler.hpp" #include +#include +#include #endif namespace sycl { @@ -323,6 +325,41 @@ std::pair SYCL_JIT_to_SPIRV( #endif } +event SYCL_JIT_memcpy_to_device_global( + [[maybe_unused]] const DeviceGlobalMapEntry *Dest, + [[maybe_unused]] const void *Src, [[maybe_unused]] size_t NumBytes, + [[maybe_unused]] size_t Offset, [[maybe_unused]] const queue &Queue, + [[maybe_unused]] const std::vector &DepEvents) { +#if SYCL_EXT_JIT_ENABLE + const std::shared_ptr &QueueImplPtr = getSyclObjImpl(Queue); + return QueueImplPtr->memcpyToDeviceGlobal( + QueueImplPtr, const_cast(Dest->MDeviceGlobalPtr), Src, + Dest->MIsDeviceImageScopeDecorated, NumBytes, Offset, DepEvents, + /*CallerNeedsEvent=*/true); +#else + throw sycl::exception(sycl::errc::invalid, + "runtime-compiled device global support not available"); +#endif +} + +event SYCL_JIT_memcpy_from_device_global( + [[maybe_unused]] void *Dest, + [[maybe_unused]] const DeviceGlobalMapEntry *Src, + [[maybe_unused]] size_t NumBytes, [[maybe_unused]] size_t Offset, + [[maybe_unused]] const queue &Queue, + [[maybe_unused]] const std::vector &DepEvents) { +#if SYCL_EXT_JIT_ENABLE + const std::shared_ptr &QueueImplPtr = getSyclObjImpl(Queue); + return QueueImplPtr->memcpyFromDeviceGlobal( + QueueImplPtr, Dest, const_cast(Src->MDeviceGlobalPtr), + Src->MIsDeviceImageScopeDecorated, NumBytes, Offset, DepEvents, + /*CallerNeedsEvent=*/true); +#else + throw sycl::exception(sycl::errc::invalid, + "runtime-compiled device global support not available"); +#endif +} + } // namespace detail } // namespace ext::oneapi::experimental } // namespace _V1 diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp index 8187c5373150a..c46bbd98c4b77 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp @@ -11,6 +11,7 @@ #include #include // __SYCL_EXPORT #include +#include #include // sycl_device_binaries @@ -20,6 +21,11 @@ namespace sycl { inline namespace _V1 { + +namespace detail { +struct DeviceGlobalMapEntry; +} // namespace detail + namespace ext::oneapi::experimental { namespace detail { @@ -42,6 +48,17 @@ SYCL_JIT_to_SPIRV(const std::string &Source, include_pairs_t IncludePairs, bool SYCL_JIT_Compilation_Available(); +event SYCL_JIT_memcpy_to_device_global(const DeviceGlobalMapEntry *Dest, + const void *Src, size_t NumBytes, + size_t Offset, const queue &Queue, + const std::vector &DepEvents); + +event SYCL_JIT_memcpy_from_device_global(void *Dest, + const DeviceGlobalMapEntry *Src, + size_t NumBytes, size_t Offset, + const queue &Queue, + const std::vector &DepEvents); + } // namespace detail } // namespace ext::oneapi::experimental diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index 84150152cddee..9e1782dcc69fe 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -129,6 +129,30 @@ bool kernel_bundle_plain::ext_oneapi_has_device_global(detail::string_view name, return impl->ext_oneapi_has_device_global(name.data(), dev); } +void *kernel_bundle_plain::ext_oneapi_get_device_global_address( + detail::string_view name, const device &dev) { + return impl->ext_oneapi_get_device_global_address(name.data(), dev); +} + +size_t +kernel_bundle_plain::ext_oneapi_get_device_global_size(detail::string_view name, + const device &dev) { + return impl->ext_oneapi_get_device_global_size(name.data(), dev); +} + +event kernel_bundle_plain::ext_oneapi_copy_to_device_global( + detail::string_view dest, const void *src, size_t num_bytes, + const queue &queue) { + return impl->ext_oneapi_copy_to_device_global(dest.data(), src, num_bytes, + queue); +} + +event kernel_bundle_plain::ext_oneapi_copy_from_device_global( + void *dest, detail::string_view src, size_t num_bytes, const queue &queue) { + return impl->ext_oneapi_copy_from_device_global(dest, src.data(), num_bytes, + queue); +} + ////////////////////////////////// ///// sycl::detail free functions ////////////////////////////////// diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp index e19afc005db06..60606c551e8cd 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp @@ -63,12 +63,8 @@ auto constexpr DGSource = R"===( namespace syclex = sycl::ext::oneapi::experimental; -syclex::device_global DG; - -extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( - (syclex::single_task_kernel)) void ff_dg_setter(int val) { - DG = val; -} +syclex::device_global DG; +syclex::device_global DG_DIS; extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( (syclex::single_task_kernel)) void ff_dg_adder(int val) { @@ -76,8 +72,8 @@ extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( } extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( - (syclex::single_task_kernel)) void ff_dg_getter(int *val) { - *val = DG; + (syclex::single_task_kernel)) void ff_dg_dis_adder(int val) { + DG_DIS += val; } )==="; @@ -249,14 +245,13 @@ int test_device_global() { sycl::queue q; sycl::context ctx = q.get_context(); + sycl::device d = q.get_device(); - bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl_jit); + bool ok = d.ext_oneapi_can_compile(syclex::source_language::sycl_jit); if (!ok) { std::cout << "Apparently this device does not support `sycl_jit` source " "kernel bundle extension: " - << q.get_device().get_info() - << std::endl; + << d.get_info() << std::endl; return -1; } @@ -268,48 +263,53 @@ int test_device_global() { q.wait(); }; - auto getDG = [&q](sycl::kernel &k) -> int { - int *buf = sycl::malloc_shared(1, q); - q.submit([&](sycl::handler &CGH) { - CGH.set_arg(0, buf); - CGH.single_task(k); - }); - q.wait(); - int val = *buf; - sycl::free(buf, q); - return val; + int32_t i32_val; + int64_t i64_val; + auto checkDGs = [&](int32_t expected32, int64_t expected64, exe_kb &bundle) { + bundle.ext_oneapi_copy_from_device_global(i32_val, "DG", q).wait(); + bundle.ext_oneapi_copy_from_device_global(i64_val, "DG_DIS", q).wait(); + std::cout << "DG = " << i32_val << ", DG_DIS = " << i64_val << '\n'; + assert(i32_val == expected32); + assert(i64_val == expected64); }; source_kb kbSrc = syclex::create_kernel_bundle_from_source( ctx, syclex::source_language::sycl_jit, DGSource); exe_kb kbExe1 = syclex::build(kbSrc); + auto addK = kbExe1.ext_oneapi_get_kernel("ff_dg_adder"); + auto addDisK = kbExe1.ext_oneapi_get_kernel("ff_dg_dis_adder"); - // Check presence of device global. - assert(kbExe1.ext_oneapi_has_device_global("DG", q.get_device())); + // Check presence of device globals. + assert(kbExe1.ext_oneapi_has_device_global("DG", d)); + assert(kbExe1.ext_oneapi_has_device_global("DG_DIS", d)); // Querying a non-existing device global shall not crash. - assert(!kbExe1.ext_oneapi_has_device_global("bogus_DG", q.get_device())); + assert(!kbExe1.ext_oneapi_has_device_global("bogus_DG", d)); - auto setK = kbExe1.ext_oneapi_get_kernel("ff_dg_setter"); - auto addK = kbExe1.ext_oneapi_get_kernel("ff_dg_adder"); - auto getK = kbExe1.ext_oneapi_get_kernel("ff_dg_getter"); + // Check sizes only, as addresses are not meaningful to the app). + assert(kbExe1.ext_oneapi_get_device_global_size("DG", d) == 4); + assert(kbExe1.ext_oneapi_get_device_global_size("DG_DIS", d) == 8); - assert(getDG(getK) == 0); - modifyDG(setK, 42); - assert(getDG(getK) == 42); - modifyDG(addK, 1); - assert(getDG(getK) == 43); + // Both variables should be zero-initialized. + checkDGs(0, 0, kbExe1); - exe_kb kbExe2 = syclex::build(kbSrc); + // Set. + kbExe1.ext_oneapi_copy_to_device_global("DG", -10, q).wait(); + kbExe1.ext_oneapi_copy_to_device_global("DG_DIS", -20L, q).wait(); - auto setK2 = kbExe2.ext_oneapi_get_kernel("ff_dg_setter"); - auto getK2 = kbExe2.ext_oneapi_get_kernel("ff_dg_getter"); + checkDGs(-10, -20, kbExe1); + + // Increment. + modifyDG(addK, 5); + modifyDG(addDisK, -5); + + checkDGs(-5, -25, kbExe1); + + // Rebuilding to test isololation per bundle. + exe_kb kbExe2 = syclex::build(kbSrc); - // `DG` is private per RTC bundle - assert(getDG(getK2) == 0); - modifyDG(setK2, -17); - assert(getDG(getK2) == -17); - assert(getDG(getK) == 43); + checkDGs(0, 0, kbExe2); + checkDGs(-5, -25, kbExe1); return 0; } From e6d205b3a93387790c82d46c0c90ce664acccc5b Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Mon, 13 Jan 2025 08:23:28 +0000 Subject: [PATCH 04/24] Add symbols to dump (linux only for now) Signed-off-by: Julian Oppermann --- sycl/test/abi/sycl_symbols_linux.dump | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 26a129e33ef85..29ab78e59add5 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3280,7 +3280,12 @@ _ZN4sycl3_V16detail18make_kernel_bundleEmRKNS0_7contextEbNS0_12bundle_stateENS0_ _ZN4sycl3_V16detail18stringifyErrorCodeEi _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_get_kernelENS1_11string_viewE _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_has_kernelENS1_11string_viewE +_ZN4sycl3_V16detail19kernel_bundle_plain28ext_oneapi_has_device_globalENS1_11string_viewERKNS0_6deviceE +_ZN4sycl3_V16detail19kernel_bundle_plain32ext_oneapi_copy_to_device_globalENS1_11string_viewEPKvmRKNS0_5queueE _ZN4sycl3_V16detail19kernel_bundle_plain32set_specialization_constant_implEPKcPvm +_ZN4sycl3_V16detail19kernel_bundle_plain33ext_oneapi_get_device_global_sizeENS1_11string_viewERKNS0_6deviceE +_ZN4sycl3_V16detail19kernel_bundle_plain34ext_oneapi_copy_from_device_globalEPvNS1_11string_viewEmRKNS0_5queueE +_ZN4sycl3_V16detail19kernel_bundle_plain36ext_oneapi_get_device_global_addressENS1_11string_viewERKNS0_6deviceE _ZN4sycl3_V16detail20associateWithHandlerERNS0_7handlerEPNS1_16AccessorBaseHostENS0_6access6targetE _ZN4sycl3_V16detail20associateWithHandlerERNS0_7handlerEPNS1_28SampledImageAccessorBaseHostENS0_12image_targetE _ZN4sycl3_V16detail20associateWithHandlerERNS0_7handlerEPNS1_30UnsampledImageAccessorBaseHostENS0_12image_targetE From 4d0d08d110f0c768ffdac2653ae2559288557ee9 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Mon, 13 Jan 2025 21:27:48 +0000 Subject: [PATCH 05/24] Drop proposed copy methods on kernel_bundle Signed-off-by: Julian Oppermann --- sycl/include/sycl/kernel_bundle.hpp | 20 ---------- sycl/source/detail/kernel_bundle_impl.hpp | 26 ------------- .../kernel_compiler/kernel_compiler_sycl.cpp | 37 ------------------ .../kernel_compiler/kernel_compiler_sycl.hpp | 17 -------- sycl/source/kernel_bundle.cpp | 13 ------- .../kernel_compiler_sycl_jit.cpp | 39 ------------------- sycl/test/abi/sycl_symbols_linux.dump | 2 - 7 files changed, 154 deletions(-) diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index 472d577f20d2b..6203330d2148a 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -216,20 +216,6 @@ class __SYCL_EXPORT kernel_bundle_plain { return ext_oneapi_get_device_global_size(detail::string_view{name}, dev); } - template - event ext_oneapi_copy_to_device_global(const std::string &dest, const T &src, - const queue &queue) { - return ext_oneapi_copy_to_device_global(detail::string_view{dest}, &src, - sizeof(T), queue); - } - - template - event ext_oneapi_copy_from_device_global(T &dest, const std::string &src, - const queue &queue) { - return ext_oneapi_copy_from_device_global(&dest, detail::string_view{src}, - sizeof(T), queue); - } - protected: // \returns a kernel object which represents the kernel identified by // kernel_id passed @@ -265,12 +251,6 @@ class __SYCL_EXPORT kernel_bundle_plain { const device &dev); size_t ext_oneapi_get_device_global_size(detail::string_view name, const device &dev); - event ext_oneapi_copy_to_device_global(detail::string_view dest, - const void *src, size_t num_bytes, - const queue &queue); - event ext_oneapi_copy_from_device_global(void *dest, detail::string_view src, - size_t num_bytes, - const queue &queue); }; } // namespace detail diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 555f5075d4d89..e94b5eaa118a5 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -803,32 +803,6 @@ class kernel_bundle_impl { return get_device_global_entry(Name, Dev)->MDeviceGlobalTSize; } - event ext_oneapi_copy_to_device_global(const std::string &Dest, - const void *Src, size_t NumBytes, - const queue &Queue) { - const auto *Entry = get_device_global_entry(Dest, Queue.get_device()); - if (NumBytes != Entry->MDeviceGlobalTSize) { - throw sycl::exception(make_error_code(errc::invalid), - "Incompatible type size for device global '" + - Dest + "'"); - } - return syclex::detail::SYCL_JIT_memcpy_to_device_global( - Entry, Src, NumBytes, /*Offset=*/0, Queue, /*DepEvents=*/{}); - } - - event ext_oneapi_copy_from_device_global(void *Dest, const std::string &Src, - size_t NumBytes, - const queue &Queue) { - const auto *Entry = get_device_global_entry(Src, Queue.get_device()); - if (NumBytes != Entry->MDeviceGlobalTSize) { - throw sycl::exception(make_error_code(errc::invalid), - "Incompatible type size for device global '" + Src + - "'"); - } - return syclex::detail::SYCL_JIT_memcpy_from_device_global( - Dest, Entry, NumBytes, /*Offset=*/0, Queue, /*DepEvents=*/{}); - } - bool empty() const noexcept { return MDeviceImages.empty(); } backend get_backend() const noexcept { diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp index 130f49f55a3d3..9108572bb5b1d 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp @@ -288,8 +288,6 @@ bool SYCL_Compilation_Available() { #if SYCL_EXT_JIT_ENABLE #include "../jit_compiler.hpp" #include -#include -#include #endif namespace sycl { @@ -325,41 +323,6 @@ std::pair SYCL_JIT_to_SPIRV( #endif } -event SYCL_JIT_memcpy_to_device_global( - [[maybe_unused]] const DeviceGlobalMapEntry *Dest, - [[maybe_unused]] const void *Src, [[maybe_unused]] size_t NumBytes, - [[maybe_unused]] size_t Offset, [[maybe_unused]] const queue &Queue, - [[maybe_unused]] const std::vector &DepEvents) { -#if SYCL_EXT_JIT_ENABLE - const std::shared_ptr &QueueImplPtr = getSyclObjImpl(Queue); - return QueueImplPtr->memcpyToDeviceGlobal( - QueueImplPtr, const_cast(Dest->MDeviceGlobalPtr), Src, - Dest->MIsDeviceImageScopeDecorated, NumBytes, Offset, DepEvents, - /*CallerNeedsEvent=*/true); -#else - throw sycl::exception(sycl::errc::invalid, - "runtime-compiled device global support not available"); -#endif -} - -event SYCL_JIT_memcpy_from_device_global( - [[maybe_unused]] void *Dest, - [[maybe_unused]] const DeviceGlobalMapEntry *Src, - [[maybe_unused]] size_t NumBytes, [[maybe_unused]] size_t Offset, - [[maybe_unused]] const queue &Queue, - [[maybe_unused]] const std::vector &DepEvents) { -#if SYCL_EXT_JIT_ENABLE - const std::shared_ptr &QueueImplPtr = getSyclObjImpl(Queue); - return QueueImplPtr->memcpyFromDeviceGlobal( - QueueImplPtr, Dest, const_cast(Src->MDeviceGlobalPtr), - Src->MIsDeviceImageScopeDecorated, NumBytes, Offset, DepEvents, - /*CallerNeedsEvent=*/true); -#else - throw sycl::exception(sycl::errc::invalid, - "runtime-compiled device global support not available"); -#endif -} - } // namespace detail } // namespace ext::oneapi::experimental } // namespace _V1 diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp index c46bbd98c4b77..8187c5373150a 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp @@ -11,7 +11,6 @@ #include #include // __SYCL_EXPORT #include -#include #include // sycl_device_binaries @@ -21,11 +20,6 @@ namespace sycl { inline namespace _V1 { - -namespace detail { -struct DeviceGlobalMapEntry; -} // namespace detail - namespace ext::oneapi::experimental { namespace detail { @@ -48,17 +42,6 @@ SYCL_JIT_to_SPIRV(const std::string &Source, include_pairs_t IncludePairs, bool SYCL_JIT_Compilation_Available(); -event SYCL_JIT_memcpy_to_device_global(const DeviceGlobalMapEntry *Dest, - const void *Src, size_t NumBytes, - size_t Offset, const queue &Queue, - const std::vector &DepEvents); - -event SYCL_JIT_memcpy_from_device_global(void *Dest, - const DeviceGlobalMapEntry *Src, - size_t NumBytes, size_t Offset, - const queue &Queue, - const std::vector &DepEvents); - } // namespace detail } // namespace ext::oneapi::experimental diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index 9e1782dcc69fe..3c6fc6580a04e 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -140,19 +140,6 @@ kernel_bundle_plain::ext_oneapi_get_device_global_size(detail::string_view name, return impl->ext_oneapi_get_device_global_size(name.data(), dev); } -event kernel_bundle_plain::ext_oneapi_copy_to_device_global( - detail::string_view dest, const void *src, size_t num_bytes, - const queue &queue) { - return impl->ext_oneapi_copy_to_device_global(dest.data(), src, num_bytes, - queue); -} - -event kernel_bundle_plain::ext_oneapi_copy_from_device_global( - void *dest, detail::string_view src, size_t num_bytes, const queue &queue) { - return impl->ext_oneapi_copy_from_device_global(dest, src.data(), num_bytes, - queue); -} - ////////////////////////////////// ///// sycl::detail free functions ////////////////////////////////// diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp index 60606c551e8cd..5bc5fb9969ab8 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp @@ -64,17 +64,12 @@ auto constexpr DGSource = R"===( namespace syclex = sycl::ext::oneapi::experimental; syclex::device_global DG; -syclex::device_global DG_DIS; extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( (syclex::single_task_kernel)) void ff_dg_adder(int val) { DG = DG + val; } -extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( - (syclex::single_task_kernel)) void ff_dg_dis_adder(int val) { - DG_DIS += val; -} )==="; auto constexpr ESIMDSource = R"===( @@ -263,53 +258,19 @@ int test_device_global() { q.wait(); }; - int32_t i32_val; - int64_t i64_val; - auto checkDGs = [&](int32_t expected32, int64_t expected64, exe_kb &bundle) { - bundle.ext_oneapi_copy_from_device_global(i32_val, "DG", q).wait(); - bundle.ext_oneapi_copy_from_device_global(i64_val, "DG_DIS", q).wait(); - std::cout << "DG = " << i32_val << ", DG_DIS = " << i64_val << '\n'; - assert(i32_val == expected32); - assert(i64_val == expected64); - }; - source_kb kbSrc = syclex::create_kernel_bundle_from_source( ctx, syclex::source_language::sycl_jit, DGSource); exe_kb kbExe1 = syclex::build(kbSrc); auto addK = kbExe1.ext_oneapi_get_kernel("ff_dg_adder"); - auto addDisK = kbExe1.ext_oneapi_get_kernel("ff_dg_dis_adder"); // Check presence of device globals. assert(kbExe1.ext_oneapi_has_device_global("DG", d)); - assert(kbExe1.ext_oneapi_has_device_global("DG_DIS", d)); // Querying a non-existing device global shall not crash. assert(!kbExe1.ext_oneapi_has_device_global("bogus_DG", d)); // Check sizes only, as addresses are not meaningful to the app). assert(kbExe1.ext_oneapi_get_device_global_size("DG", d) == 4); - assert(kbExe1.ext_oneapi_get_device_global_size("DG_DIS", d) == 8); - - // Both variables should be zero-initialized. - checkDGs(0, 0, kbExe1); - - // Set. - kbExe1.ext_oneapi_copy_to_device_global("DG", -10, q).wait(); - kbExe1.ext_oneapi_copy_to_device_global("DG_DIS", -20L, q).wait(); - - checkDGs(-10, -20, kbExe1); - - // Increment. - modifyDG(addK, 5); - modifyDG(addDisK, -5); - - checkDGs(-5, -25, kbExe1); - - // Rebuilding to test isololation per bundle. - exe_kb kbExe2 = syclex::build(kbSrc); - - checkDGs(0, 0, kbExe2); - checkDGs(-5, -25, kbExe1); return 0; } diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 29ab78e59add5..26c595c5dfca7 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3281,10 +3281,8 @@ _ZN4sycl3_V16detail18stringifyErrorCodeEi _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_get_kernelENS1_11string_viewE _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_has_kernelENS1_11string_viewE _ZN4sycl3_V16detail19kernel_bundle_plain28ext_oneapi_has_device_globalENS1_11string_viewERKNS0_6deviceE -_ZN4sycl3_V16detail19kernel_bundle_plain32ext_oneapi_copy_to_device_globalENS1_11string_viewEPKvmRKNS0_5queueE _ZN4sycl3_V16detail19kernel_bundle_plain32set_specialization_constant_implEPKcPvm _ZN4sycl3_V16detail19kernel_bundle_plain33ext_oneapi_get_device_global_sizeENS1_11string_viewERKNS0_6deviceE -_ZN4sycl3_V16detail19kernel_bundle_plain34ext_oneapi_copy_from_device_globalEPvNS1_11string_viewEmRKNS0_5queueE _ZN4sycl3_V16detail19kernel_bundle_plain36ext_oneapi_get_device_global_addressENS1_11string_viewERKNS0_6deviceE _ZN4sycl3_V16detail20associateWithHandlerERNS0_7handlerEPNS1_16AccessorBaseHostENS0_6access6targetE _ZN4sycl3_V16detail20associateWithHandlerERNS0_7handlerEPNS1_28SampledImageAccessorBaseHostENS0_12image_targetE From 03c24ca37052372e1e3a7f18c9b563ac1d41f555 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Mon, 13 Jan 2025 21:52:20 +0000 Subject: [PATCH 06/24] Add proper device check Signed-off-by: Julian Oppermann --- sycl/source/detail/kernel_bundle_impl.hpp | 25 ++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index e94b5eaa118a5..901cf0e26f50b 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -692,6 +692,25 @@ class kernel_bundle_impl { return "_Z" + std::to_string(Name.length()) + Name; } + bool is_valid_device(const device &DeviceCand) { + // Check if the device is in this bundle's list of devices. + if (std::count(MDevices.begin(), MDevices.end(), DeviceCand)) { + return true; + } + + // Otherwise, if the device candidate is a sub-device it is also valid if + // its parent is valid. + if (!getSyclObjImpl(DeviceCand)->isRootDevice()) { + try { + return is_valid_device( + DeviceCand.get_info()); + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in is_valid_device", e); + } + } + return false; + } + const DeviceGlobalMapEntry *get_device_global_entry(const std::string &Name, const device &Dev) { if (Language != syclex::source_language::sycl_jit || Prefix.empty()) { @@ -779,11 +798,7 @@ class kernel_bundle_impl { bool ext_oneapi_has_device_global(const std::string &Name, const device &Dev) { - if (!std::any_of( - MDevices.begin(), MDevices.end(), - [&Dev](const device &DevCand) { return Dev == DevCand; })) { - // TODO: device_image::has_kernel(id, device) checks the device if the - // given device is a sub-device. + if (!is_valid_device(Dev)) { return false; } From 00dd1b2cf888544cf4def77440075b41e9e80209 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Mon, 13 Jan 2025 23:14:39 +0000 Subject: [PATCH 07/24] Return USM pointer from ext_oneapi_get_device_global_address Signed-off-by: Julian Oppermann --- sycl/source/detail/kernel_bundle_impl.hpp | 20 ++++++++--- .../kernel_compiler_sycl_jit.cpp | 33 +++++++++++++++++-- 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 901cf0e26f50b..06eefa71c889b 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -711,8 +711,8 @@ class kernel_bundle_impl { return false; } - const DeviceGlobalMapEntry *get_device_global_entry(const std::string &Name, - const device &Dev) { + DeviceGlobalMapEntry *get_device_global_entry(const std::string &Name, + const device &Dev) { if (Language != syclex::source_language::sycl_jit || Prefix.empty()) { throw sycl::exception(make_error_code(errc::invalid), "Querying device globals by name is only available " @@ -809,8 +809,20 @@ class kernel_bundle_impl { void *ext_oneapi_get_device_global_address(const std::string &Name, const device &Dev) { - return const_cast( - get_device_global_entry(Name, Dev)->MDeviceGlobalPtr); + DeviceGlobalMapEntry *Entry = get_device_global_entry(Name, Dev); + if (Entry->MIsDeviceImageScopeDecorated) { + throw sycl::exception(make_error_code(errc::invalid), + "Cannot query USM pointer for device global with " + "'device_image_scope' property"); + } + + // TODO: Is this the right approach? Should we just pass the queue as an + // argument? + queue InitQueue{Dev}; + auto &USMMem = + Entry->getOrAllocateDeviceGlobalUSM(getSyclObjImpl(InitQueue)); + InitQueue.wait_and_throw(); + return USMMem.getPtr(); } size_t ext_oneapi_get_device_global_size(const std::string &Name, diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp index 5bc5fb9969ab8..5dfd2829ed5e9 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp @@ -269,8 +269,37 @@ int test_device_global() { // Querying a non-existing device global shall not crash. assert(!kbExe1.ext_oneapi_has_device_global("bogus_DG", d)); - // Check sizes only, as addresses are not meaningful to the app). - assert(kbExe1.ext_oneapi_get_device_global_size("DG", d) == 4); + void *dgAddr = kbExe1.ext_oneapi_get_device_global_address("DG", d); + size_t dgSize = kbExe1.ext_oneapi_get_device_global_size("DG", d); + assert(dgSize == 4); + + int32_t val; + auto checkVal = [&](int32_t expected) { + val = -1; + q.memcpy(&val, dgAddr, dgSize).wait(); + std::cout << "val: " << val << " == " << expected << '\n'; + assert(val == expected); + }; + + // Device globals are zero-initialized. + checkVal(0); + + // Set the DG. + val = 123; + q.memcpy(dgAddr, &val, dgSize).wait(); + checkVal(123); + + // Run a kernel using it. + modifyDG(addK, -17); + checkVal(123 - 17); + + // Test that each bundle has its distinct set of globals. + exe_kb kbExe2 = syclex::build(kbSrc); + dgAddr = kbExe2.ext_oneapi_get_device_global_address("DG", d); + checkVal(0); + + dgAddr = kbExe1.ext_oneapi_get_device_global_address("DG", d); + checkVal(123 - 17); return 0; } From 1aaa3b8a912a48d1a546cacf40d9f78b707792a5 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Mon, 13 Jan 2025 23:27:02 +0000 Subject: [PATCH 08/24] Add missing methods on kernel_bundle Signed-off-by: Julian Oppermann --- sycl/include/sycl/kernel_bundle.hpp | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index 6203330d2148a..56fb43ee6cca8 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -482,6 +482,33 @@ class kernel_bundle : public detail::kernel_bundle_plain, return detail::kernel_bundle_plain::ext_oneapi_has_device_global(name, dev); } + ///////////////////////// + // ext_oneapi_get_device_global_address + // kernel_bundle must be created from source, throws if device global is not + // present for the given device, or has `device_image_scope` property. + // Returns a USM pointer to the variable's allocation on the device. + ///////////////////////// + template > + void *ext_oneapi_get_device_global_address(const std::string &name, + const device &dev) { + return detail::kernel_bundle_plain::ext_oneapi_get_device_global_address( + name, dev); + } + + ///////////////////////// + // ext_oneapi_get_device_global_size + // kernel_bundle must be created from source, throws if device global is not + // present for the given device. Returns size in bytes. + ///////////////////////// + template > + size_t ext_oneapi_get_device_global_size(const std::string &name, + const device &dev) { + return detail::kernel_bundle_plain::ext_oneapi_get_device_global_size(name, + dev); + } + private: kernel_bundle(detail::KernelBundleImplPtr Impl) : kernel_bundle_plain(std::move(Impl)) {} From 4b7dd5c9080c8486be91b1ac556853a66d47f967 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Mon, 13 Jan 2025 23:46:41 +0000 Subject: [PATCH 09/24] Add test for device_image_scope globals. Signed-off-by: Julian Oppermann --- .../kernel_compiler_sycl_jit.cpp | 46 +++++++++++++++---- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp index 5dfd2829ed5e9..47a72b0ce46d4 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp @@ -67,7 +67,16 @@ syclex::device_global DG; extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( (syclex::single_task_kernel)) void ff_dg_adder(int val) { - DG = DG + val; + DG += val; +} + +syclex::device_global DG_DIS; + +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (syclex::single_task_kernel)) void ff_swap(int64_t *val) { + int64_t tmp = DG_DIS; + DG_DIS = *val; + *val = tmp; } )==="; @@ -250,14 +259,6 @@ int test_device_global() { return -1; } - auto modifyDG = [&q](sycl::kernel &k, int val) { - q.submit([&](sycl::handler &CGH) { - CGH.set_arg(0, val); - CGH.single_task(k); - }); - q.wait(); - }; - source_kb kbSrc = syclex::create_kernel_bundle_from_source( ctx, syclex::source_language::sycl_jit, DGSource); @@ -290,7 +291,12 @@ int test_device_global() { checkVal(123); // Run a kernel using it. - modifyDG(addK, -17); + val = -17; + q.submit([&](sycl::handler &CGH) { + CGH.set_arg(0, val); + CGH.single_task(addK); + }); + q.wait(); checkVal(123 - 17); // Test that each bundle has its distinct set of globals. @@ -301,6 +307,26 @@ int test_device_global() { dgAddr = kbExe1.ext_oneapi_get_device_global_address("DG", d); checkVal(123 - 17); + // Test global with `device_image_scope`. We currently cannot read/write these + // from the host, but they should work device-only. + auto swapK = kbExe2.ext_oneapi_get_kernel("ff_swap"); + int64_t *valBuf = sycl::malloc_shared(1, q); + *valBuf = -1; + auto doSwap = [&]() { + q.submit([&](sycl::handler &CGH) { + CGH.set_arg(0, valBuf); + CGH.single_task(swapK); + }); + q.wait(); + }; + + doSwap(); + assert(*valBuf == 0); + doSwap(); + assert(*valBuf == -1); + + sycl::free(valBuf, q); + return 0; } From 1f57684306036dbe110c4fd6419e84a7cff8abee Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Tue, 14 Jan 2025 23:15:18 +0000 Subject: [PATCH 10/24] Windows symbols Signed-off-by: Julian Oppermann --- sycl/test/abi/sycl_symbols_windows.dump | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index a439081b1f382..6ae969a39a3b5 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -3844,6 +3844,10 @@ ?ext_oneapi_fill2d_impl@handler@_V1@sycl@@AEAAXPEAX_KPEBX111@Z ?ext_oneapi_get_composite_devices@platform@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ ?ext_oneapi_get_default_context@platform@_V1@sycl@@QEBA?AVcontext@23@XZ +?ext_oneapi_get_device_global_address@kernel_bundle_plain@detail@_V1@sycl@@AEAAPEAXVstring_view@234@AEBVdevice@34@@Z +?ext_oneapi_get_device_global_address@kernel_bundle_plain@detail@_V1@sycl@@QEAAPEAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEBVdevice@34@@Z +?ext_oneapi_get_device_global_size@kernel_bundle_plain@detail@_V1@sycl@@AEAA_KVstring_view@234@AEBVdevice@34@@Z +?ext_oneapi_get_device_global_size@kernel_bundle_plain@detail@_V1@sycl@@QEAA_KAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEBVdevice@34@@Z ?ext_oneapi_get_graph@queue@_V1@sycl@@QEBA?AV?$command_graph@$0A@@experimental@oneapi@ext@23@XZ ?ext_oneapi_get_kernel@kernel_bundle_plain@detail@_V1@sycl@@AEAA?AVkernel@34@Vstring_view@234@@Z ?ext_oneapi_get_kernel@kernel_bundle_plain@detail@_V1@sycl@@QEAA?AVkernel@34@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z @@ -3853,6 +3857,8 @@ ?ext_oneapi_graph@queue@_V1@sycl@@QEAA?AVevent@23@V?$command_graph@$00@experimental@oneapi@ext@23@AEBUcode_location@detail@23@@Z ?ext_oneapi_graph@queue@_V1@sycl@@QEAA?AVevent@23@V?$command_graph@$00@experimental@oneapi@ext@23@AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?ext_oneapi_graph@queue@_V1@sycl@@QEAA?AVevent@23@V?$command_graph@$00@experimental@oneapi@ext@23@V423@AEBUcode_location@detail@23@@Z +?ext_oneapi_has_device_global@kernel_bundle_plain@detail@_V1@sycl@@AEAA_NVstring_view@234@AEBVdevice@34@@Z +?ext_oneapi_has_device_global@kernel_bundle_plain@detail@_V1@sycl@@QEAA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEBVdevice@34@@Z ?ext_oneapi_has_kernel@kernel_bundle_plain@detail@_V1@sycl@@AEAA_NVstring_view@234@@Z ?ext_oneapi_has_kernel@kernel_bundle_plain@detail@_V1@sycl@@QEAA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z ?ext_oneapi_memcpy2d_impl@handler@_V1@sycl@@AEAAXPEAX_KPEBX111@Z From afba729fec1d9a519a52633ed4539e7c54a43c74 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Wed, 15 Jan 2025 01:17:00 +0000 Subject: [PATCH 11/24] Move device global tests to separate file and mark unsupported on opencl:gpu. Signed-off-by: Julian Oppermann --- .../kernel_compiler_sycl_jit.cpp | 115 +----------- ...ernel_compiler_sycl_jit_device_globals.cpp | 169 ++++++++++++++++++ 2 files changed, 171 insertions(+), 113 deletions(-) create mode 100644 sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_device_globals.cpp diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp index 47a72b0ce46d4..d0240bc9b8964 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit.cpp @@ -58,29 +58,6 @@ void ff_templated(T *ptr, T *unused) { } )==="; -auto constexpr DGSource = R"===( -#include - -namespace syclex = sycl::ext::oneapi::experimental; - -syclex::device_global DG; - -extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( - (syclex::single_task_kernel)) void ff_dg_adder(int val) { - DG += val; -} - -syclex::device_global DG_DIS; - -extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( - (syclex::single_task_kernel)) void ff_swap(int64_t *val) { - int64_t tmp = DG_DIS; - DG_DIS = *val; - *val = tmp; -} - -)==="; - auto constexpr ESIMDSource = R"===( #include #include @@ -242,94 +219,6 @@ int test_build_and_run() { return 0; } -int test_device_global() { - namespace syclex = sycl::ext::oneapi::experimental; - using source_kb = sycl::kernel_bundle; - using exe_kb = sycl::kernel_bundle; - - sycl::queue q; - sycl::context ctx = q.get_context(); - sycl::device d = q.get_device(); - - bool ok = d.ext_oneapi_can_compile(syclex::source_language::sycl_jit); - if (!ok) { - std::cout << "Apparently this device does not support `sycl_jit` source " - "kernel bundle extension: " - << d.get_info() << std::endl; - return -1; - } - - source_kb kbSrc = syclex::create_kernel_bundle_from_source( - ctx, syclex::source_language::sycl_jit, DGSource); - - exe_kb kbExe1 = syclex::build(kbSrc); - auto addK = kbExe1.ext_oneapi_get_kernel("ff_dg_adder"); - - // Check presence of device globals. - assert(kbExe1.ext_oneapi_has_device_global("DG", d)); - // Querying a non-existing device global shall not crash. - assert(!kbExe1.ext_oneapi_has_device_global("bogus_DG", d)); - - void *dgAddr = kbExe1.ext_oneapi_get_device_global_address("DG", d); - size_t dgSize = kbExe1.ext_oneapi_get_device_global_size("DG", d); - assert(dgSize == 4); - - int32_t val; - auto checkVal = [&](int32_t expected) { - val = -1; - q.memcpy(&val, dgAddr, dgSize).wait(); - std::cout << "val: " << val << " == " << expected << '\n'; - assert(val == expected); - }; - - // Device globals are zero-initialized. - checkVal(0); - - // Set the DG. - val = 123; - q.memcpy(dgAddr, &val, dgSize).wait(); - checkVal(123); - - // Run a kernel using it. - val = -17; - q.submit([&](sycl::handler &CGH) { - CGH.set_arg(0, val); - CGH.single_task(addK); - }); - q.wait(); - checkVal(123 - 17); - - // Test that each bundle has its distinct set of globals. - exe_kb kbExe2 = syclex::build(kbSrc); - dgAddr = kbExe2.ext_oneapi_get_device_global_address("DG", d); - checkVal(0); - - dgAddr = kbExe1.ext_oneapi_get_device_global_address("DG", d); - checkVal(123 - 17); - - // Test global with `device_image_scope`. We currently cannot read/write these - // from the host, but they should work device-only. - auto swapK = kbExe2.ext_oneapi_get_kernel("ff_swap"); - int64_t *valBuf = sycl::malloc_shared(1, q); - *valBuf = -1; - auto doSwap = [&]() { - q.submit([&](sycl::handler &CGH) { - CGH.set_arg(0, valBuf); - CGH.single_task(swapK); - }); - q.wait(); - }; - - doSwap(); - assert(*valBuf == 0); - doSwap(); - assert(*valBuf == -1); - - sycl::free(valBuf, q); - - return 0; -} - int test_esimd() { namespace syclex = sycl::ext::oneapi::experimental; using source_kb = sycl::kernel_bundle; @@ -501,8 +390,8 @@ int test_warning() { int main(int argc, char **) { #ifdef SYCL_EXT_ONEAPI_KERNEL_COMPILER int optional_tests = (argc > 1) ? test_warning() : 0; - return test_build_and_run() || test_device_global() || test_esimd() || - test_unsupported_options() || test_error() || optional_tests; + return test_build_and_run() || test_esimd() || test_unsupported_options() || + test_error() || optional_tests; #else static_assert(false, "Kernel Compiler feature test macro undefined"); #endif diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_device_globals.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_device_globals.cpp new file mode 100644 index 0000000000000..7e8d568f360f2 --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_device_globals.cpp @@ -0,0 +1,169 @@ +//=- kernel_compiler_sycl_jit_device_globals.cpp - RTC device globals tests -=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// UNSUPPORTED: accelerator, opencl && gpu +// UNSUPPORTED-TRACKER: GSD-4287 + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out + +#include +#include +#include + +auto constexpr DGSource = R"===( +#include + +namespace syclex = sycl::ext::oneapi::experimental; + +syclex::device_global DG; + +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (syclex::single_task_kernel)) void ff_dg_adder(int val) { + DG += val; +} + +syclex::device_global DG_DIS; + +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (syclex::single_task_kernel)) void ff_swap(int64_t *val) { + int64_t tmp = DG_DIS; + DG_DIS = *val; + *val = tmp; +} + +)==="; + +int test_device_global() { + namespace syclex = sycl::ext::oneapi::experimental; + using source_kb = sycl::kernel_bundle; + using exe_kb = sycl::kernel_bundle; + + sycl::queue q; + sycl::context ctx = q.get_context(); + sycl::device d = q.get_device(); + + bool ok = d.ext_oneapi_can_compile(syclex::source_language::sycl_jit); + if (!ok) { + std::cout << "Apparently this device does not support `sycl_jit` source " + "kernel bundle extension: " + << d.get_info() << std::endl; + return -1; + } + + source_kb kbSrc = syclex::create_kernel_bundle_from_source( + ctx, syclex::source_language::sycl_jit, DGSource); + + exe_kb kbExe1 = syclex::build(kbSrc); + auto addK = kbExe1.ext_oneapi_get_kernel("ff_dg_adder"); + + // Check presence of device globals. + assert(kbExe1.ext_oneapi_has_device_global("DG", d)); + // Querying a non-existing device global shall not crash. + assert(!kbExe1.ext_oneapi_has_device_global("bogus_DG", d)); + + void *dgAddr = kbExe1.ext_oneapi_get_device_global_address("DG", d); + size_t dgSize = kbExe1.ext_oneapi_get_device_global_size("DG", d); + assert(dgSize == 4); + + int32_t val; + auto checkVal = [&](int32_t expected) { + val = -1; + q.memcpy(&val, dgAddr, dgSize).wait(); + std::cout << "val: " << val << " == " << expected << '\n'; + assert(val == expected); + }; + + // Device globals are zero-initialized. + checkVal(0); + + // Set the DG. + val = 123; + q.memcpy(dgAddr, &val, dgSize).wait(); + checkVal(123); + + // Run a kernel using it. + val = -17; + q.submit([&](sycl::handler &CGH) { + CGH.set_arg(0, val); + CGH.single_task(addK); + }); + q.wait(); + checkVal(123 - 17); + + // Test that each bundle has its distinct set of globals. + exe_kb kbExe2 = syclex::build(kbSrc); + dgAddr = kbExe2.ext_oneapi_get_device_global_address("DG", d); + checkVal(0); + + dgAddr = kbExe1.ext_oneapi_get_device_global_address("DG", d); + checkVal(123 - 17); + + // Test global with `device_image_scope`. We currently cannot read/write these + // from the host, but they should work device-only. + auto swapK = kbExe2.ext_oneapi_get_kernel("ff_swap"); + int64_t *valBuf = sycl::malloc_shared(1, q); + *valBuf = -1; + auto doSwap = [&]() { + q.submit([&](sycl::handler &CGH) { + CGH.set_arg(0, valBuf); + CGH.single_task(swapK); + }); + q.wait(); + }; + + doSwap(); + assert(*valBuf == 0); + doSwap(); + assert(*valBuf == -1); + + sycl::free(valBuf, q); + + return 0; +} + +int test_error() { + namespace syclex = sycl::ext::oneapi::experimental; + using source_kb = sycl::kernel_bundle; + using exe_kb = sycl::kernel_bundle; + + sycl::queue q; + sycl::context ctx = q.get_context(); + sycl::device d = q.get_device(); + + bool ok = d.ext_oneapi_can_compile(syclex::source_language::sycl_jit); + if (!ok) { + return 0; + } + + source_kb kbSrc = syclex::create_kernel_bundle_from_source( + ctx, syclex::source_language::sycl_jit, DGSource); + exe_kb kbExe = syclex::build(kbSrc); + + try { + kbExe.ext_oneapi_get_device_global_address("DG_DIS", d); + assert(false && "we should not be here"); + } catch (sycl::exception &e) { + assert(e.code() == sycl::errc::invalid); + assert(std::string(e.what()).find( + "Cannot query USM pointer for device global with " + "'device_image_scope' property") != std::string::npos); + } + return 0; +} + +int main() { +#ifdef SYCL_EXT_ONEAPI_KERNEL_COMPILER + return test_device_global() || test_error(); +#else + static_assert(false, "Kernel Compiler feature test macro undefined"); +#endif + return 0; +} From d1177c8fd4031cc761903f39a90ef2c4c1905239 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Wed, 15 Jan 2025 02:05:07 +0000 Subject: [PATCH 12/24] Bump sycl.hpp counter for added RTC test. Signed-off-by: Julian Oppermann --- sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index 3aaa3e6cb8bf4..427681e88662b 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 5 +// CHECK-NUM-MATCHES: 6 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see From e96e7e05b7661b9c4b7b373c32f911c88997b1fb Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Wed, 15 Jan 2025 22:41:04 +0000 Subject: [PATCH 13/24] Use bundle's context for adhoc queue. Signed-off-by: Julian Oppermann --- sycl/source/detail/kernel_bundle_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 06eefa71c889b..15fd5dca3b885 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -818,7 +818,7 @@ class kernel_bundle_impl { // TODO: Is this the right approach? Should we just pass the queue as an // argument? - queue InitQueue{Dev}; + queue InitQueue{MContext, Dev}; auto &USMMem = Entry->getOrAllocateDeviceGlobalUSM(getSyclObjImpl(InitQueue)); InitQueue.wait_and_throw(); From dd081f66992d319691a4644c7386b5e636b8c68a Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Thu, 16 Jan 2025 08:17:51 +0000 Subject: [PATCH 14/24] Fix unused variable in structured bindings for old GCC Signed-off-by: Julian Oppermann --- sycl/source/detail/kernel_bundle_impl.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 15fd5dca3b885..82a14b1ad7a01 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -530,8 +530,10 @@ class kernel_bundle_impl { for (const auto &DeviceGlobalProp : RawImg->getDeviceGlobals()) { std::string_view DeviceGlobalName{DeviceGlobalProp->Name}; assert(DeviceGlobalName.find(Prefix) == 0); - auto [It, Ins] = DeviceGlobalIDSet.emplace(DeviceGlobalName); - if (Ins) { + bool Inserted = false; + std::tie(std::ignore, Inserted) = + DeviceGlobalIDSet.emplace(DeviceGlobalName); + if (Inserted) { DeviceGlobalIDVec.emplace_back(DeviceGlobalName); DeviceGlobalName.remove_prefix(PrefixLen); DeviceGlobalNames.emplace_back(DeviceGlobalName); From e5df76dbc94cb65367a5c69f4dbdd24e720b874d Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Fri, 24 Jan 2025 04:01:23 +0000 Subject: [PATCH 15/24] Use unordered set Signed-off-by: Julian Oppermann --- sycl/source/detail/kernel_bundle_impl.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 82a14b1ad7a01..7c1afc1634237 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "split_string.hpp" @@ -523,7 +524,7 @@ class kernel_bundle_impl { // Determine IDs of all device globals referenced by this bundle's // kernels. These IDs are also prefixed. - std::set DeviceGlobalIDSet; + std::unordered_set DeviceGlobalIDSet; std::vector DeviceGlobalIDVec; std::vector DeviceGlobalNames; for (const auto &RawImg : PM.getRawDeviceImages(KernelIDs)) { From 880b5ad09770ccb2958bcd08eb778c0296d80246 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Fri, 24 Jan 2025 04:28:10 +0000 Subject: [PATCH 16/24] Bump sycl.hpp counter again Signed-off-by: Julian Oppermann --- sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index 692ca4b8a16d2..dd068fb40752a 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 6 +// CHECK-NUM-MATCHES: 7 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see From a8868fbaacafb65af390ff63ed0f955f919d65e9 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Tue, 28 Jan 2025 01:05:37 +0000 Subject: [PATCH 17/24] Address feedback. Signed-off-by: Julian Oppermann --- sycl/source/detail/kernel_bundle_impl.hpp | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 7c1afc1634237..2cf18e4ee98ef 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -550,7 +550,7 @@ class kernel_bundle_impl { // Device globals are usually statically allocated and registered in the // integration footer, which we don't have in the RTC context. Instead, we // dynamically allocate storage tied to the executable kernel bundle. - for (auto *DeviceGlobalEntry : + for (DeviceGlobalMapEntry *DeviceGlobalEntry : PM.getDeviceGlobalEntries(DeviceGlobalIDVec)) { size_t AllocSize = DeviceGlobalEntry->MDeviceGlobalTSize; // init value @@ -703,15 +703,8 @@ class kernel_bundle_impl { // Otherwise, if the device candidate is a sub-device it is also valid if // its parent is valid. - if (!getSyclObjImpl(DeviceCand)->isRootDevice()) { - try { - return is_valid_device( - DeviceCand.get_info()); - } catch (std::exception &e) { - __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in is_valid_device", e); - } - } - return false; + return !getSyclObjImpl(DeviceCand)->isRootDevice() && + is_valid_device(DeviceCand.get_info()); } DeviceGlobalMapEntry *get_device_global_entry(const std::string &Name, From e2e0cc7563c3cf9026281c99c84f1354f25ba68f Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Tue, 28 Jan 2025 01:15:51 +0000 Subject: [PATCH 18/24] Update REQUIRES and UNSUPPORTED tags in test Signed-off-by: Julian Oppermann --- .../kernel_compiler_sycl_jit_device_globals.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_device_globals.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_device_globals.cpp index 7e8d568f360f2..fbbb20001177a 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_device_globals.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_device_globals.cpp @@ -7,7 +7,11 @@ //===----------------------------------------------------------------------===// // REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_device_allocations + // UNSUPPORTED: accelerator, opencl && gpu +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. +// UNSUPPORTED: opencl && gpu // UNSUPPORTED-TRACKER: GSD-4287 // RUN: %{build} -o %t.out From 6409766945604e70f8b4e19d0c6001aaddbe7b1d Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Wed, 12 Mar 2025 03:18:07 +0000 Subject: [PATCH 19/24] Untangle destruction of device global map entries Signed-off-by: Julian Oppermann --- sycl/source/detail/context_impl.cpp | 8 +++ sycl/source/detail/context_impl.hpp | 3 ++ sycl/source/detail/kernel_bundle_impl.hpp | 60 ++++++++++++++++------- 3 files changed, 52 insertions(+), 19 deletions(-) diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index e527d0a0c46a8..05aace25dface 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -329,6 +329,11 @@ void context_impl::addAssociatedDeviceGlobal(const void *DeviceGlobalPtr) { MAssociatedDeviceGlobals.insert(DeviceGlobalPtr); } +void context_impl::removeAssociatedDeviceGlobal(const void *DeviceGlobalPtr) { + std::lock_guard Lock{MAssociatedDeviceGlobalsMutex}; + MAssociatedDeviceGlobals.erase(DeviceGlobalPtr); +} + void context_impl::addDeviceGlobalInitializer( ur_program_handle_t Program, const std::vector &Devs, const RTDeviceBinaryImage *BinImage) { @@ -407,6 +412,9 @@ std::vector context_impl::initializeDeviceGlobals( // Device global map entry pointers will not die before the end of the // program and the pointers will stay the same, so we do not need // m_DeviceGlobalsMutex here. + // The lifetimes of device global map entries representing globals in + // runtime-compiled code will be tied to the kernel bundle, so the + // assumption holds in that setting as well. for (DeviceGlobalMapEntry *DeviceGlobalEntry : DeviceGlobalEntries) { // Get or allocate the USM memory associated with the device global. DeviceGlobalUSMMem &DeviceGlobalUSM = diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 65e1fda0a5a7a..d573d19977ccd 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -203,6 +203,9 @@ class context_impl { /// Adds an associated device global to the tracked associates. void addAssociatedDeviceGlobal(const void *DeviceGlobalPtr); + /// Removes an associated device global from the tracked associates. + void removeAssociatedDeviceGlobal(const void *DeviceGlobalPtr); + /// Adds a device global initializer. void addDeviceGlobalInitializer(ur_program_handle_t Program, const std::vector &Devs, diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index eeff7ff4a995a..129acc8219ba4 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -385,6 +385,7 @@ class kernel_bundle_impl { std::vector &&KernelNames, std::unordered_map &&MangledKernelNames, std::vector &&DeviceGlobalNames, + std::vector> &&DeviceGlobalAllocations, sycl_device_binaries Binaries, std::string &&Prefix, syclex::source_language Lang) : kernel_bundle_impl(std::move(Ctx), std::move(Devs), KernelIDs, @@ -399,6 +400,7 @@ class kernel_bundle_impl { MKernelNames = std::move(KernelNames); MMangledKernelNames = std::move(MangledKernelNames); MDeviceGlobalNames = std::move(DeviceGlobalNames); + MDeviceGlobalAllocations = std::move(DeviceGlobalAllocations); MDeviceBinaries = Binaries; MPrefix = std::move(Prefix); MLanguage = Lang; @@ -535,6 +537,12 @@ class kernel_bundle_impl { std::vector KernelIDs; std::vector KernelNames; std::unordered_map MangledKernelNames; + + std::unordered_set DeviceGlobalIDSet; + std::vector DeviceGlobalIDVec; + std::vector DeviceGlobalNames; + std::vector> DeviceGlobalAllocations; + for (const auto &KernelID : PM.getAllSYCLKernelIDs()) { std::string_view KernelName{KernelID.get_name()}; if (KernelName.find(Prefix) == 0) { @@ -552,8 +560,8 @@ class kernel_bundle_impl { } } - // Apply frontend information. for (const auto *RawImg : PM.getRawDeviceImages(KernelIDs)) { + // Mangled names. for (const sycl_device_binary_property &RKProp : RawImg->getRegisteredKernels()) { @@ -563,14 +571,8 @@ class kernel_bundle_impl { reinterpret_cast(BA.begin()), MangledNameLen}; MangledKernelNames.emplace(RKProp->Name, MangledName); } - } - // Determine IDs of all device globals referenced by this bundle's - // kernels. These IDs are also prefixed. - std::unordered_set DeviceGlobalIDSet; - std::vector DeviceGlobalIDVec; - std::vector DeviceGlobalNames; - for (const auto &RawImg : PM.getRawDeviceImages(KernelIDs)) { + // Device globals. for (const auto &DeviceGlobalProp : RawImg->getDeviceGlobals()) { std::string_view DeviceGlobalName{DeviceGlobalProp->Name}; assert(DeviceGlobalName.find(Prefix) == 0); @@ -585,12 +587,6 @@ class kernel_bundle_impl { } } - // Create the executable bundle. - auto ExecBundle = std::make_shared( - MContext, MDevices, KernelIDs, std::move(KernelNames), - std::move(MangledKernelNames), std::move(DeviceGlobalNames), Binaries, - std::move(Prefix), MLanguage); - // Device globals are usually statically allocated and registered in the // integration footer, which we don't have in the RTC context. Instead, we // dynamically allocate storage tied to the executable kernel bundle. @@ -599,13 +595,13 @@ class kernel_bundle_impl { size_t AllocSize = DeviceGlobalEntry->MDeviceGlobalTSize; // init value if (!DeviceGlobalEntry->MIsDeviceImageScopeDecorated) { - // USM pointer. TODO: it's actually a decorated multi_ptr. + // Consider storage for device USM pointer. AllocSize += sizeof(void *); } auto Alloc = std::make_unique(AllocSize); std::string_view DeviceGlobalName{DeviceGlobalEntry->MUniqueId}; PM.addOrInitDeviceGlobalEntry(Alloc.get(), DeviceGlobalName.data()); - ExecBundle->MDeviceGlobalAllocations.push_back(std::move(Alloc)); + DeviceGlobalAllocations.push_back(std::move(Alloc)); // Drop the RTC prefix from the entry's symbol name. Note that the PM // still manages this device global under its prefixed name. @@ -614,7 +610,11 @@ class kernel_bundle_impl { DeviceGlobalEntry->MUniqueId = DeviceGlobalName; } - return ExecBundle; + return std::make_shared( + MContext, MDevices, KernelIDs, std::move(KernelNames), + std::move(MangledKernelNames), std::move(DeviceGlobalNames), + std::move(DeviceGlobalAllocations), Binaries, std::move(Prefix), + MLanguage); } ur_program_handle_t UrProgram = nullptr; @@ -781,6 +781,28 @@ class kernel_bundle_impl { return Entries.front(); } + void unregister_device_globals_from_context() { + if (MDeviceGlobalNames.empty()) + return; + + // Manually trigger the release of resources for all device global map + // entries associated with this runtime-compiled bundle. Normally, this + // would happen in `~context_impl()`, however in the RTC setting, the + // context outlives the DG map entries owned by the program manager. + + std::vector DeviceGlobalIDs; + std::transform(MDeviceGlobalNames.begin(), MDeviceGlobalNames.end(), + std::back_inserter(DeviceGlobalIDs), + [&](const std::string &DGName) { return MPrefix + DGName; }); + auto ContextImpl = getSyclObjImpl(MContext); + for (DeviceGlobalMapEntry *Entry : + ProgramManager::getInstance().getDeviceGlobalEntries( + DeviceGlobalIDs)) { + Entry->removeAssociatedResources(ContextImpl.get()); + ContextImpl->removeAssociatedDeviceGlobal(Entry->MDeviceGlobalPtr); + } + } + public: bool ext_oneapi_has_kernel(const std::string &Name) { return is_kernel_name(adjust_kernel_name(Name)); @@ -1121,6 +1143,7 @@ class kernel_bundle_impl { ~kernel_bundle_impl() { try { if (MDeviceBinaries) { + unregister_device_globals_from_context(); ProgramManager::getInstance().removeImages(MDeviceBinaries); syclex::detail::SYCL_JIT_destroy(MDeviceBinaries); } @@ -1162,11 +1185,10 @@ class kernel_bundle_impl { std::vector MKernelNames; std::unordered_map MMangledKernelNames; std::vector MDeviceGlobalNames; + std::vector> MDeviceGlobalAllocations; sycl_device_binaries MDeviceBinaries = nullptr; std::string MPrefix; include_pairs_t MIncludePairs; - - std::vector> MDeviceGlobalAllocations; }; } // namespace detail From 92dce3df5b19b5ac8c150dbe87e5334deef680ba Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Wed, 12 Mar 2025 04:12:58 +0000 Subject: [PATCH 20/24] Drop device from queries Signed-off-by: Julian Oppermann --- sycl/include/sycl/kernel_bundle.hpp | 39 ++++++++--------- sycl/source/detail/kernel_bundle_impl.hpp | 42 +++++++------------ sycl/source/kernel_bundle.cpp | 13 +++--- ...ernel_compiler_sycl_jit_device_globals.cpp | 6 +-- sycl/test/abi/sycl_symbols_linux.dump | 4 +- .../no_sycl_hpp_in_e2e_tests.cpp | 2 +- 6 files changed, 43 insertions(+), 63 deletions(-) diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index 6416eb46f8cba..5deec1833677a 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -242,9 +242,8 @@ class __SYCL_EXPORT kernel_bundle_plain { ext_oneapi_get_raw_kernel_name(detail::string_view{name}).c_str()}; } - bool ext_oneapi_has_device_global(const std::string &name, - const device &dev) { - return ext_oneapi_has_device_global(detail::string_view{name}, dev); + bool ext_oneapi_has_device_global(const std::string &name) { + return ext_oneapi_has_device_global(detail::string_view{name}); } void *ext_oneapi_get_device_global_address(const std::string &name, @@ -252,9 +251,8 @@ class __SYCL_EXPORT kernel_bundle_plain { return ext_oneapi_get_device_global_address(detail::string_view{name}, dev); } - size_t ext_oneapi_get_device_global_size(const std::string &name, - const device &dev) { - return ext_oneapi_get_device_global_size(detail::string_view{name}, dev); + size_t ext_oneapi_get_device_global_size(const std::string &name) { + return ext_oneapi_get_device_global_size(detail::string_view{name}); } protected: @@ -287,12 +285,10 @@ class __SYCL_EXPORT kernel_bundle_plain { kernel ext_oneapi_get_kernel(detail::string_view name); detail::string ext_oneapi_get_raw_kernel_name(detail::string_view name); - bool ext_oneapi_has_device_global(detail::string_view name, - const device &dev); + bool ext_oneapi_has_device_global(detail::string_view name); void *ext_oneapi_get_device_global_address(detail::string_view name, const device &dev); - size_t ext_oneapi_get_device_global_size(detail::string_view name, - const device &dev); + size_t ext_oneapi_get_device_global_size(detail::string_view name); }; } // namespace detail @@ -524,20 +520,21 @@ class kernel_bundle : public detail::kernel_bundle_plain, ///////////////////////// // ext_oneapi_has_device_global - // only true if created from source and has this global for the given device + // only true if kernel_bundle was created from source and has this device + // global ///////////////////////// template > - bool ext_oneapi_has_device_global(const std::string &name, - const device &dev) { - return detail::kernel_bundle_plain::ext_oneapi_has_device_global(name, dev); + bool ext_oneapi_has_device_global(const std::string &name) { + return detail::kernel_bundle_plain::ext_oneapi_has_device_global(name); } ///////////////////////// // ext_oneapi_get_device_global_address - // kernel_bundle must be created from source, throws if device global is not - // present for the given device, or has `device_image_scope` property. - // Returns a USM pointer to the variable's allocation on the device. + // kernel_bundle must be created from source, throws if bundle was not built + // for this device, or device global is either not present or has + // `device_image_scope` property. + // Returns a USM pointer to the variable's initialized storage on the device. ///////////////////////// template > @@ -550,14 +547,12 @@ class kernel_bundle : public detail::kernel_bundle_plain, ///////////////////////// // ext_oneapi_get_device_global_size // kernel_bundle must be created from source, throws if device global is not - // present for the given device. Returns size in bytes. + // present. Returns the variable's size in bytes. ///////////////////////// template > - size_t ext_oneapi_get_device_global_size(const std::string &name, - const device &dev) { - return detail::kernel_bundle_plain::ext_oneapi_get_device_global_size(name, - dev); + size_t ext_oneapi_get_device_global_size(const std::string &name) { + return detail::kernel_bundle_plain::ext_oneapi_get_device_global_size(name); } private: diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 18706f8ccec35..3803fc9809e9e 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -760,29 +760,16 @@ class kernel_bundle_impl { return "_Z" + std::to_string(Name.length()) + Name; } - bool is_valid_device(const device &DeviceCand) { - // Check if the device is in this bundle's list of devices. - if (std::count(MDevices.begin(), MDevices.end(), DeviceCand)) { - return true; - } - - // Otherwise, if the device candidate is a sub-device it is also valid if - // its parent is valid. - return !getSyclObjImpl(DeviceCand)->isRootDevice() && - is_valid_device(DeviceCand.get_info()); - } - - DeviceGlobalMapEntry *get_device_global_entry(const std::string &Name, - const device &Dev) { + DeviceGlobalMapEntry *get_device_global_entry(const std::string &Name) { if (MLanguage != syclex::source_language::sycl_jit || MPrefix.empty()) { throw sycl::exception(make_error_code(errc::invalid), "Querying device globals by name is only available " "in kernel_bundles successfully built from " - "kernel_bundle " + "kernel_bundle::ext_oneapi_source> " "with 'sycl_jit' source language."); } - if (!ext_oneapi_has_device_global(Name, Dev)) { + if (!ext_oneapi_has_device_global(Name)) { throw sycl::exception(make_error_code(errc::invalid), "device global '" + Name + "' not found in kernel_bundle"); @@ -892,12 +879,7 @@ class kernel_bundle_impl { return AdjustedName; } - bool ext_oneapi_has_device_global(const std::string &Name, - const device &Dev) { - if (!is_valid_device(Dev)) { - return false; - } - + bool ext_oneapi_has_device_global(const std::string &Name) { std::string MangledName = mangle_device_global_name(Name); return std::find(MDeviceGlobalNames.begin(), MDeviceGlobalNames.end(), MangledName) != MDeviceGlobalNames.end(); @@ -905,15 +887,20 @@ class kernel_bundle_impl { void *ext_oneapi_get_device_global_address(const std::string &Name, const device &Dev) { - DeviceGlobalMapEntry *Entry = get_device_global_entry(Name, Dev); + if (std::find(MDevices.begin(), MDevices.end(), Dev) == MDevices.end()) { + throw sycl::exception(make_error_code(errc::invalid), + "kernel_bundle not built for device"); + } + + DeviceGlobalMapEntry *Entry = get_device_global_entry(Name); if (Entry->MIsDeviceImageScopeDecorated) { throw sycl::exception(make_error_code(errc::invalid), "Cannot query USM pointer for device global with " "'device_image_scope' property"); } - // TODO: Is this the right approach? Should we just pass the queue as an - // argument? + // TODO: Add context-only initialization via `urUSMContextMemcpyExp` instead + // of using a throw-away queue. queue InitQueue{MContext, Dev}; auto &USMMem = Entry->getOrAllocateDeviceGlobalUSM(getSyclObjImpl(InitQueue)); @@ -921,9 +908,8 @@ class kernel_bundle_impl { return USMMem.getPtr(); } - size_t ext_oneapi_get_device_global_size(const std::string &Name, - const device &Dev) { - return get_device_global_entry(Name, Dev)->MDeviceGlobalTSize; + size_t ext_oneapi_get_device_global_size(const std::string &Name) { + return get_device_global_entry(Name)->MDeviceGlobalTSize; } bool empty() const noexcept { return MDeviceImages.empty(); } diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index 9a4d79108c106..a8c7ee8cd0232 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -142,9 +142,9 @@ kernel_bundle_plain::ext_oneapi_get_raw_kernel_name(detail::string_view name) { return detail::string{impl->ext_oneapi_get_raw_kernel_name(name.data())}; } -bool kernel_bundle_plain::ext_oneapi_has_device_global(detail::string_view name, - const device &dev) { - return impl->ext_oneapi_has_device_global(name.data(), dev); +bool kernel_bundle_plain::ext_oneapi_has_device_global( + detail::string_view name) { + return impl->ext_oneapi_has_device_global(name.data()); } void *kernel_bundle_plain::ext_oneapi_get_device_global_address( @@ -152,10 +152,9 @@ void *kernel_bundle_plain::ext_oneapi_get_device_global_address( return impl->ext_oneapi_get_device_global_address(name.data(), dev); } -size_t -kernel_bundle_plain::ext_oneapi_get_device_global_size(detail::string_view name, - const device &dev) { - return impl->ext_oneapi_get_device_global_size(name.data(), dev); +size_t kernel_bundle_plain::ext_oneapi_get_device_global_size( + detail::string_view name) { + return impl->ext_oneapi_get_device_global_size(name.data()); } ////////////////////////////////// diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_device_globals.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_device_globals.cpp index fbbb20001177a..5819237b26453 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_device_globals.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_device_globals.cpp @@ -69,12 +69,12 @@ int test_device_global() { auto addK = kbExe1.ext_oneapi_get_kernel("ff_dg_adder"); // Check presence of device globals. - assert(kbExe1.ext_oneapi_has_device_global("DG", d)); + assert(kbExe1.ext_oneapi_has_device_global("DG")); // Querying a non-existing device global shall not crash. - assert(!kbExe1.ext_oneapi_has_device_global("bogus_DG", d)); + assert(!kbExe1.ext_oneapi_has_device_global("bogus_DG")); void *dgAddr = kbExe1.ext_oneapi_get_device_global_address("DG", d); - size_t dgSize = kbExe1.ext_oneapi_get_device_global_size("DG", d); + size_t dgSize = kbExe1.ext_oneapi_get_device_global_size("DG"); assert(dgSize == 4); int32_t val; diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 2c16fef2210f9..564d64026ecc1 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3278,10 +3278,10 @@ _ZN4sycl3_V16detail18make_kernel_bundleEmRKNS0_7contextEbNS0_12bundle_stateENS0_ _ZN4sycl3_V16detail18stringifyErrorCodeEi _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_get_kernelENS1_11string_viewE _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_has_kernelENS1_11string_viewE -_ZN4sycl3_V16detail19kernel_bundle_plain28ext_oneapi_has_device_globalENS1_11string_viewERKNS0_6deviceE +_ZN4sycl3_V16detail19kernel_bundle_plain28ext_oneapi_has_device_globalENS1_11string_viewE _ZN4sycl3_V16detail19kernel_bundle_plain30ext_oneapi_get_raw_kernel_nameENS1_11string_viewE _ZN4sycl3_V16detail19kernel_bundle_plain32set_specialization_constant_implEPKcPvm -_ZN4sycl3_V16detail19kernel_bundle_plain33ext_oneapi_get_device_global_sizeENS1_11string_viewERKNS0_6deviceE +_ZN4sycl3_V16detail19kernel_bundle_plain33ext_oneapi_get_device_global_sizeENS1_11string_viewE _ZN4sycl3_V16detail19kernel_bundle_plain36ext_oneapi_get_device_global_addressENS1_11string_viewERKNS0_6deviceE _ZN4sycl3_V16detail20associateWithHandlerERNS0_7handlerEPNS1_16AccessorBaseHostENS0_6access6targetE _ZN4sycl3_V16detail20associateWithHandlerERNS0_7handlerEPNS1_28SampledImageAccessorBaseHostENS0_12image_targetE diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index e51d5f86e1777..1f8ce4897a5f2 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 12 +// CHECK-NUM-MATCHES: 14 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see From 265eee17a187b46b07369b572ae981c1bb987861 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Wed, 12 Mar 2025 04:17:43 +0000 Subject: [PATCH 21/24] Revert format change Signed-off-by: Julian Oppermann --- .../lib/rtc/DeviceCompilation.cpp | 19 +++++++++---------- sycl/include/sycl/kernel_bundle.hpp | 1 + 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index 4034061ae3eb3..df8812686643c 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -491,18 +491,17 @@ static bool getDeviceLibraries(const ArgList &Args, using SYCLDeviceLibsList = SmallVector; const SYCLDeviceLibsList SYCLDeviceWrapperLibs = { - {"libsycl-crt", "libc"}, - {"libsycl-complex", "libm-fp32"}, - {"libsycl-complex-fp64", "libm-fp64"}, - {"libsycl-cmath", "libm-fp32"}, - {"libsycl-cmath-fp64", "libm-fp64"}, + {"libsycl-crt", "libc"}, + {"libsycl-complex", "libm-fp32"}, + {"libsycl-complex-fp64", "libm-fp64"}, + {"libsycl-cmath", "libm-fp32"}, + {"libsycl-cmath-fp64", "libm-fp64"}, #if defined(_WIN32) - {"libsycl-msvc-math", "libm-fp32"}, + {"libsycl-msvc-math", "libm-fp32"}, #endif - {"libsycl-imf", "libimf-fp32"}, - {"libsycl-imf-fp64", "libimf-fp64"}, - {"libsycl-imf-bf16", "libimf-bf16"} - }; + {"libsycl-imf", "libimf-fp32"}, + {"libsycl-imf-fp64", "libimf-fp64"}, + {"libsycl-imf-bf16", "libimf-bf16"}}; // ITT annotation libraries are linked in separately whenever the device // code instrumentation is enabled. const SYCLDeviceLibsList SYCLDeviceAnnotationLibs = { diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index 5deec1833677a..dc3e683d1dabd 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -509,6 +509,7 @@ class kernel_bundle : public detail::kernel_bundle_plain, return detail::kernel_bundle_plain::ext_oneapi_get_kernel(name); } + ///////////////////////// // ext_oneapi_get_raw_kernel_name // kernel_bundle must be created from source, throws if not present ///////////////////////// From a0ab17451f3f417d0706e7672b76d51630c4652f Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Wed, 12 Mar 2025 04:41:31 +0000 Subject: [PATCH 22/24] Windows symbols Signed-off-by: Julian Oppermann --- sycl/test/abi/sycl_symbols_windows.dump | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 6fcb39cf82d8b..c55a840b78ada 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -3886,8 +3886,8 @@ ?khr_get_default_context@platform@_V1@sycl@@QEBA?AVcontext@23@XZ ?ext_oneapi_get_device_global_address@kernel_bundle_plain@detail@_V1@sycl@@AEAAPEAXVstring_view@234@AEBVdevice@34@@Z ?ext_oneapi_get_device_global_address@kernel_bundle_plain@detail@_V1@sycl@@QEAAPEAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEBVdevice@34@@Z -?ext_oneapi_get_device_global_size@kernel_bundle_plain@detail@_V1@sycl@@AEAA_KVstring_view@234@AEBVdevice@34@@Z -?ext_oneapi_get_device_global_size@kernel_bundle_plain@detail@_V1@sycl@@QEAA_KAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEBVdevice@34@@Z +?ext_oneapi_get_device_global_size@kernel_bundle_plain@detail@_V1@sycl@@AEAA_KVstring_view@234@@Z +?ext_oneapi_get_device_global_size@kernel_bundle_plain@detail@_V1@sycl@@QEAA_KAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z ?ext_oneapi_get_graph@queue@_V1@sycl@@QEBA?AV?$command_graph@$0A@@experimental@oneapi@ext@23@XZ ?ext_oneapi_get_kernel@kernel_bundle_plain@detail@_V1@sycl@@AEAA?AVkernel@34@Vstring_view@234@@Z ?ext_oneapi_get_kernel@kernel_bundle_plain@detail@_V1@sycl@@QEAA?AVkernel@34@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z @@ -3900,8 +3900,8 @@ ?ext_oneapi_graph@queue@_V1@sycl@@QEAA?AVevent@23@V?$command_graph@$00@experimental@oneapi@ext@23@AEBUcode_location@detail@23@@Z ?ext_oneapi_graph@queue@_V1@sycl@@QEAA?AVevent@23@V?$command_graph@$00@experimental@oneapi@ext@23@AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?ext_oneapi_graph@queue@_V1@sycl@@QEAA?AVevent@23@V?$command_graph@$00@experimental@oneapi@ext@23@V423@AEBUcode_location@detail@23@@Z -?ext_oneapi_has_device_global@kernel_bundle_plain@detail@_V1@sycl@@AEAA_NVstring_view@234@AEBVdevice@34@@Z -?ext_oneapi_has_device_global@kernel_bundle_plain@detail@_V1@sycl@@QEAA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEBVdevice@34@@Z +?ext_oneapi_has_device_global@kernel_bundle_plain@detail@_V1@sycl@@AEAA_NVstring_view@234@@Z +?ext_oneapi_has_device_global@kernel_bundle_plain@detail@_V1@sycl@@QEAA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z ?ext_oneapi_has_kernel@kernel_bundle_plain@detail@_V1@sycl@@AEAA_NVstring_view@234@@Z ?ext_oneapi_has_kernel@kernel_bundle_plain@detail@_V1@sycl@@QEAA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z ?ext_oneapi_memcpy2d_impl@handler@_V1@sycl@@AEAAXPEAX_KPEBX111@Z From 62d223b3f2f7e8f89227420b736eadd785513151 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Wed, 12 Mar 2025 18:33:42 +1300 Subject: [PATCH 23/24] Update sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp --- sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index 1f8ce4897a5f2..a27efa2dcfc8f 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 14 +// CHECK-NUM-MATCHES: 13 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see From 1be7568c457d30b50bb302e771bebe1f2ca591ed Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Wed, 12 Mar 2025 10:28:03 +0000 Subject: [PATCH 24/24] Fail a bit earlier and more consistently Signed-off-by: Julian Oppermann --- sycl/source/detail/kernel_bundle_impl.hpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 3803fc9809e9e..f7cc756142661 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -761,7 +761,8 @@ class kernel_bundle_impl { } DeviceGlobalMapEntry *get_device_global_entry(const std::string &Name) { - if (MLanguage != syclex::source_language::sycl_jit || MPrefix.empty()) { + if (MKernelNames.empty() || + MLanguage != syclex::source_language::sycl_jit) { throw sycl::exception(make_error_code(errc::invalid), "Querying device globals by name is only available " "in kernel_bundles successfully built from " @@ -806,7 +807,7 @@ class kernel_bundle_impl { public: bool ext_oneapi_has_kernel(const std::string &Name) { - return is_kernel_name(adjust_kernel_name(Name)); + return !MKernelNames.empty() && is_kernel_name(adjust_kernel_name(Name)); } kernel @@ -880,19 +881,21 @@ class kernel_bundle_impl { } bool ext_oneapi_has_device_global(const std::string &Name) { - std::string MangledName = mangle_device_global_name(Name); - return std::find(MDeviceGlobalNames.begin(), MDeviceGlobalNames.end(), - MangledName) != MDeviceGlobalNames.end(); + return !MDeviceGlobalNames.empty() && + std::find(MDeviceGlobalNames.begin(), MDeviceGlobalNames.end(), + mangle_device_global_name(Name)) != + MDeviceGlobalNames.end(); } void *ext_oneapi_get_device_global_address(const std::string &Name, const device &Dev) { + DeviceGlobalMapEntry *Entry = get_device_global_entry(Name); + if (std::find(MDevices.begin(), MDevices.end(), Dev) == MDevices.end()) { throw sycl::exception(make_error_code(errc::invalid), "kernel_bundle not built for device"); } - DeviceGlobalMapEntry *Entry = get_device_global_entry(Name); if (Entry->MIsDeviceImageScopeDecorated) { throw sycl::exception(make_error_code(errc::invalid), "Cannot query USM pointer for device global with "