@@ -380,8 +380,9 @@ class kernel_bundle_impl {
380380 // program manager integration, only for sycl_jit language
381381 kernel_bundle_impl (context Ctx, std::vector<device> Devs,
382382 const std::vector<kernel_id> &KernelIDs,
383- std::vector<std::string> KNames, std::string Pfx,
384- syclex::source_language Lang)
383+ const std::vector<std::string> &KNames,
384+ const std::vector<std::string> &DGNames,
385+ const std::string &Pfx, syclex::source_language Lang)
385386 : kernel_bundle_impl(Ctx, Devs, KernelIDs, bundle_state::executable) {
386387 assert (Lang == syclex::source_language::sycl_jit);
387388 // Mark this bundle explicitly as "interop" to ensure that its kernels are
@@ -391,6 +392,7 @@ class kernel_bundle_impl {
391392 // from the (unprefixed) kernel name.
392393 MIsInterop = true ;
393394 KernelNames = KNames;
395+ DeviceGlobalNames = DGNames;
394396 Prefix = Pfx;
395397 Language = Lang;
396398 }
@@ -509,51 +511,60 @@ class kernel_bundle_impl {
509511 // `jit_compiler::compileSYCL(..)` uses `CompilationID + '$'` as prefix
510512 // for offload entry names.
511513 std::string Prefix = CompilationID + ' $' ;
514+ auto PrefixLen = Prefix.length ();
512515 for (const auto &KernelID : PM.getAllSYCLKernelIDs ()) {
513516 std::string_view KernelName{KernelID.get_name ()};
514517 if (KernelName.find (Prefix) == 0 ) {
515518 KernelIDs.push_back (KernelID);
516- KernelName.remove_prefix (Prefix. length () );
519+ KernelName.remove_prefix (PrefixLen );
517520 KernelNames.emplace_back (KernelName);
518521 }
519522 }
520523
521- // Create the executable bundle.
522- auto ExecBundle = std::make_shared<kernel_bundle_impl>(
523- MContext, MDevices, KernelIDs, KernelNames, Prefix, Language);
524-
525524 // Determine IDs of all device globals referenced by this bundle's
526525 // kernels. These IDs are also prefixed.
527- std::set<std::string> UniqueDeviceGlobalIDs;
528- std::vector<std::string> DeviceGlobalIDs;
526+ std::set<std::string> DeviceGlobalIDSet;
527+ std::vector<std::string> DeviceGlobalIDVec;
528+ std::vector<std::string> DeviceGlobalNames;
529529 for (const auto &RawImg : PM.getRawDeviceImages (KernelIDs)) {
530530 for (const auto &DeviceGlobalProp : RawImg->getDeviceGlobals ()) {
531- auto [It, Ins] = UniqueDeviceGlobalIDs.insert (DeviceGlobalProp->Name );
531+ std::string_view DeviceGlobalName{DeviceGlobalProp->Name };
532+ assert (DeviceGlobalName.find (Prefix) == 0 );
533+ auto [It, Ins] = DeviceGlobalIDSet.emplace (DeviceGlobalName);
532534 if (Ins) {
533- DeviceGlobalIDs.push_back (*It);
535+ DeviceGlobalIDVec.emplace_back (DeviceGlobalName);
536+ DeviceGlobalName.remove_prefix (PrefixLen);
537+ DeviceGlobalNames.emplace_back (DeviceGlobalName);
534538 }
535539 }
536540 }
537541
542+ // Create the executable bundle.
543+ auto ExecBundle = std::make_shared<kernel_bundle_impl>(
544+ MContext, MDevices, KernelIDs, KernelNames, DeviceGlobalNames, Prefix,
545+ Language);
546+
547+ // Device globals are usually statically allocated and registered in the
548+ // integration footer, which we don't have in the RTC context. Instead, we
549+ // dynamically allocate storage tied to the executable kernel bundle.
538550 for (auto *DeviceGlobalEntry :
539- PM.getDeviceGlobalEntries (DeviceGlobalIDs)) {
540- // Device globals without `device_image_scope` are usually statically
541- // allocated and registered in the integration footer, which we don't
542- // have in the RTC context. Instead, we dynamically allocate storage
543- // tied to the executable kernel bundle.
551+ PM.getDeviceGlobalEntries (DeviceGlobalIDVec)) {
552+
553+ size_t AllocSize = DeviceGlobalEntry->MDeviceGlobalTSize ; // init value
544554 if (!DeviceGlobalEntry->MIsDeviceImageScopeDecorated ) {
545- auto Alloc = std::make_unique<std::byte[]>(
546- DeviceGlobalEntry->MDeviceGlobalTSize );
547- PM.addOrInitDeviceGlobalEntry (Alloc.get (),
548- DeviceGlobalEntry->MUniqueId .c_str ());
549- ExecBundle->DeviceGlobals .push_back (std::move (Alloc));
555+ // USM pointer. TODO: it's actually a decorated multi_ptr.
556+ AllocSize += sizeof (void *);
550557 }
558+ auto Alloc = std::make_unique<std::byte[]>(AllocSize);
559+ std::string_view DeviceGlobalName{DeviceGlobalEntry->MUniqueId };
560+ PM.addOrInitDeviceGlobalEntry (Alloc.get (), DeviceGlobalName.data ());
561+ ExecBundle->DeviceGlobalAllocations .push_back (std::move (Alloc));
551562
552563 // Drop the RTC prefix from the entry's symbol name. Note that the PM
553564 // still manages this device global under its prefixed name.
554- assert (DeviceGlobalEntry-> MUniqueId .find (Prefix) == 0 );
555- DeviceGlobalEntry-> MUniqueId =
556- DeviceGlobalEntry->MUniqueId . substr (Prefix. length ()) ;
565+ assert (DeviceGlobalName .find (Prefix) == 0 );
566+ DeviceGlobalName. remove_prefix (PrefixLen);
567+ DeviceGlobalEntry->MUniqueId = DeviceGlobalName ;
557568 }
558569
559570 return ExecBundle;
@@ -735,6 +746,18 @@ class kernel_bundle_impl {
735746 return detail::createSyclObjFromImpl<kernel>(KernelImpl);
736747 }
737748
749+ std::string mangle_device_global_name (const std::string &Name) {
750+ // TODO: Support device globals declared in namespaces.
751+ return " _Z" + std::to_string (Name.length ()) + Name;
752+ }
753+
754+ bool ext_oneapi_has_device_global (const std::string &Name,
755+ [[maybe_unused]] const device &Dev) {
756+ std::string MangledName = mangle_device_global_name (Name);
757+ return std::find (DeviceGlobalNames.begin (), DeviceGlobalNames.end (),
758+ MangledName) != DeviceGlobalNames.end ();
759+ }
760+
738761 bool empty () const noexcept { return MDeviceImages.empty (); }
739762
740763 backend get_backend () const noexcept {
@@ -993,11 +1016,13 @@ class kernel_bundle_impl {
9931016 // Language is for both state::source and state::executable.
9941017 syclex::source_language Language = syclex::source_language::opencl;
9951018 const std::variant<std::string, std::vector<std::byte>> Source;
996- // only kernel_bundles created from source have KernelNames member .
1019+ // only kernel_bundles created from source have the following members .
9971020 std::vector<std::string> KernelNames;
1021+ std::vector<std::string> DeviceGlobalNames;
9981022 std::string Prefix;
9991023 include_pairs_t IncludePairs;
1000- std::vector<std::unique_ptr<std::byte[]>> DeviceGlobals;
1024+
1025+ std::vector<std::unique_ptr<std::byte[]>> DeviceGlobalAllocations;
10011026};
10021027
10031028} // namespace detail
0 commit comments