2525#include < cstdint>
2626#include < cstring>
2727#include < memory>
28+ #include < unordered_set>
2829#include < vector>
2930
3031#include " split_string.hpp"
@@ -383,6 +384,8 @@ class kernel_bundle_impl {
383384 const std::vector<kernel_id> &KernelIDs,
384385 std::vector<std::string> &&KernelNames,
385386 std::unordered_map<std::string, std::string> &&MangledKernelNames,
387+ std::vector<std::string> &&DeviceGlobalNames,
388+ std::vector<std::unique_ptr<std::byte[]>> &&DeviceGlobalAllocations,
386389 sycl_device_binaries Binaries, std::string &&Prefix,
387390 syclex::source_language Lang)
388391 : kernel_bundle_impl(std::move(Ctx), std::move(Devs), KernelIDs,
@@ -396,6 +399,8 @@ class kernel_bundle_impl {
396399 MIsInterop = true ;
397400 MKernelNames = std::move (KernelNames);
398401 MMangledKernelNames = std::move (MangledKernelNames);
402+ MDeviceGlobalNames = std::move (DeviceGlobalNames);
403+ MDeviceGlobalAllocations = std::move (DeviceGlobalAllocations);
399404 MDeviceBinaries = Binaries;
400405 MPrefix = std::move (Prefix);
401406 MLanguage = Lang;
@@ -546,6 +551,12 @@ class kernel_bundle_impl {
546551 std::vector<kernel_id> KernelIDs;
547552 std::vector<std::string> KernelNames;
548553 std::unordered_map<std::string, std::string> MangledKernelNames;
554+
555+ std::unordered_set<std::string> DeviceGlobalIDSet;
556+ std::vector<std::string> DeviceGlobalIDVec;
557+ std::vector<std::string> DeviceGlobalNames;
558+ std::vector<std::unique_ptr<std::byte[]>> DeviceGlobalAllocations;
559+
549560 for (const auto &KernelID : PM.getAllSYCLKernelIDs ()) {
550561 std::string_view KernelName{KernelID.get_name ()};
551562 if (KernelName.find (Prefix) == 0 ) {
@@ -563,8 +574,8 @@ class kernel_bundle_impl {
563574 }
564575 }
565576
566- // Apply frontend information.
567577 for (const auto *RawImg : PM.getRawDeviceImages (KernelIDs)) {
578+ // Mangled names.
568579 for (const sycl_device_binary_property &RKProp :
569580 RawImg->getRegisteredKernels ()) {
570581
@@ -574,11 +585,49 @@ class kernel_bundle_impl {
574585 reinterpret_cast <const char *>(BA.begin ()), MangledNameLen};
575586 MangledKernelNames.emplace (RKProp->Name , MangledName);
576587 }
588+
589+ // Device globals.
590+ for (const auto &DeviceGlobalProp : RawImg->getDeviceGlobals ()) {
591+ std::string_view DeviceGlobalName{DeviceGlobalProp->Name };
592+ assert (DeviceGlobalName.find (Prefix) == 0 );
593+ bool Inserted = false ;
594+ std::tie (std::ignore, Inserted) =
595+ DeviceGlobalIDSet.emplace (DeviceGlobalName);
596+ if (Inserted) {
597+ DeviceGlobalIDVec.emplace_back (DeviceGlobalName);
598+ DeviceGlobalName.remove_prefix (Prefix.length ());
599+ DeviceGlobalNames.emplace_back (DeviceGlobalName);
600+ }
601+ }
602+ }
603+
604+ // Device globals are usually statically allocated and registered in the
605+ // integration footer, which we don't have in the RTC context. Instead, we
606+ // dynamically allocate storage tied to the executable kernel bundle.
607+ for (DeviceGlobalMapEntry *DeviceGlobalEntry :
608+ PM.getDeviceGlobalEntries (DeviceGlobalIDVec)) {
609+
610+ size_t AllocSize = DeviceGlobalEntry->MDeviceGlobalTSize ; // init value
611+ if (!DeviceGlobalEntry->MIsDeviceImageScopeDecorated ) {
612+ // Consider storage for device USM pointer.
613+ AllocSize += sizeof (void *);
614+ }
615+ auto Alloc = std::make_unique<std::byte[]>(AllocSize);
616+ std::string_view DeviceGlobalName{DeviceGlobalEntry->MUniqueId };
617+ PM.addOrInitDeviceGlobalEntry (Alloc.get (), DeviceGlobalName.data ());
618+ DeviceGlobalAllocations.push_back (std::move (Alloc));
619+
620+ // Drop the RTC prefix from the entry's symbol name. Note that the PM
621+ // still manages this device global under its prefixed name.
622+ assert (DeviceGlobalName.find (Prefix) == 0 );
623+ DeviceGlobalName.remove_prefix (Prefix.length ());
624+ DeviceGlobalEntry->MUniqueId = DeviceGlobalName;
577625 }
578626
579627 return std::make_shared<kernel_bundle_impl>(
580628 MContext, MDevices, KernelIDs, std::move (KernelNames),
581- std::move (MangledKernelNames), Binaries, std::move (Prefix),
629+ std::move (MangledKernelNames), std::move (DeviceGlobalNames),
630+ std::move (DeviceGlobalAllocations), Binaries, std::move (Prefix),
582631 MLanguage);
583632 }
584633
@@ -680,6 +729,8 @@ class kernel_bundle_impl {
680729 KernelNames, MLanguage);
681730 }
682731
732+ // Utility methods for kernel_compiler functionality
733+ private:
683734 std::string adjust_kernel_name (const std::string &Name) {
684735 if (MLanguage == syclex::source_language::sycl) {
685736 auto It = MMangledKernelNames.find (Name);
@@ -694,8 +745,58 @@ class kernel_bundle_impl {
694745 MKernelNames.end ();
695746 }
696747
748+ std::string mangle_device_global_name (const std::string &Name) {
749+ // TODO: Support device globals declared in namespaces.
750+ return " _Z" + std::to_string (Name.length ()) + Name;
751+ }
752+
753+ DeviceGlobalMapEntry *get_device_global_entry (const std::string &Name) {
754+ if (MKernelNames.empty () || MLanguage != syclex::source_language::sycl) {
755+ throw sycl::exception (make_error_code (errc::invalid),
756+ " Querying device globals by name is only available "
757+ " in kernel_bundles successfully built from "
758+ " kernel_bundle<bundle_state>::ext_oneapi_source> "
759+ " with 'sycl' source language." );
760+ }
761+
762+ if (!ext_oneapi_has_device_global (Name)) {
763+ throw sycl::exception (make_error_code (errc::invalid),
764+ " device global '" + Name +
765+ " ' not found in kernel_bundle" );
766+ }
767+
768+ std::vector<DeviceGlobalMapEntry *> Entries =
769+ ProgramManager::getInstance ().getDeviceGlobalEntries (
770+ {MPrefix + mangle_device_global_name (Name)});
771+ assert (Entries.size () == 1 );
772+ return Entries.front ();
773+ }
774+
775+ void unregister_device_globals_from_context () {
776+ if (MDeviceGlobalNames.empty ())
777+ return ;
778+
779+ // Manually trigger the release of resources for all device global map
780+ // entries associated with this runtime-compiled bundle. Normally, this
781+ // would happen in `~context_impl()`, however in the RTC setting, the
782+ // context outlives the DG map entries owned by the program manager.
783+
784+ std::vector<std::string> DeviceGlobalIDs;
785+ std::transform (MDeviceGlobalNames.begin (), MDeviceGlobalNames.end (),
786+ std::back_inserter (DeviceGlobalIDs),
787+ [&](const std::string &DGName) { return MPrefix + DGName; });
788+ auto ContextImpl = getSyclObjImpl (MContext);
789+ for (DeviceGlobalMapEntry *Entry :
790+ ProgramManager::getInstance ().getDeviceGlobalEntries (
791+ DeviceGlobalIDs)) {
792+ Entry->removeAssociatedResources (ContextImpl.get ());
793+ ContextImpl->removeAssociatedDeviceGlobal (Entry->MDeviceGlobalPtr );
794+ }
795+ }
796+
797+ public:
697798 bool ext_oneapi_has_kernel (const std::string &Name) {
698- return is_kernel_name (adjust_kernel_name (Name));
799+ return !MKernelNames. empty () && is_kernel_name (adjust_kernel_name (Name));
699800 }
700801
701802 kernel
@@ -768,6 +869,41 @@ class kernel_bundle_impl {
768869 return AdjustedName;
769870 }
770871
872+ bool ext_oneapi_has_device_global (const std::string &Name) {
873+ return !MDeviceGlobalNames.empty () &&
874+ std::find (MDeviceGlobalNames.begin (), MDeviceGlobalNames.end (),
875+ mangle_device_global_name (Name)) !=
876+ MDeviceGlobalNames.end ();
877+ }
878+
879+ void *ext_oneapi_get_device_global_address (const std::string &Name,
880+ const device &Dev) {
881+ DeviceGlobalMapEntry *Entry = get_device_global_entry (Name);
882+
883+ if (std::find (MDevices.begin (), MDevices.end (), Dev) == MDevices.end ()) {
884+ throw sycl::exception (make_error_code (errc::invalid),
885+ " kernel_bundle not built for device" );
886+ }
887+
888+ if (Entry->MIsDeviceImageScopeDecorated ) {
889+ throw sycl::exception (make_error_code (errc::invalid),
890+ " Cannot query USM pointer for device global with "
891+ " 'device_image_scope' property" );
892+ }
893+
894+ // TODO: Add context-only initialization via `urUSMContextMemcpyExp` instead
895+ // of using a throw-away queue.
896+ queue InitQueue{MContext, Dev};
897+ auto &USMMem =
898+ Entry->getOrAllocateDeviceGlobalUSM (getSyclObjImpl (InitQueue));
899+ InitQueue.wait_and_throw ();
900+ return USMMem.getPtr ();
901+ }
902+
903+ size_t ext_oneapi_get_device_global_size (const std::string &Name) {
904+ return get_device_global_entry (Name)->MDeviceGlobalTSize ;
905+ }
906+
771907 bool empty () const noexcept { return MDeviceImages.empty (); }
772908
773909 backend get_backend () const noexcept {
@@ -999,6 +1135,7 @@ class kernel_bundle_impl {
9991135 ~kernel_bundle_impl () {
10001136 try {
10011137 if (MDeviceBinaries) {
1138+ unregister_device_globals_from_context ();
10021139 ProgramManager::getInstance ().removeImages (MDeviceBinaries);
10031140 syclex::detail::SYCL_JIT_Destroy (MDeviceBinaries);
10041141 }
@@ -1039,6 +1176,8 @@ class kernel_bundle_impl {
10391176 // only kernel_bundles created from source have KernelNames member.
10401177 std::vector<std::string> MKernelNames;
10411178 std::unordered_map<std::string, std::string> MMangledKernelNames;
1179+ std::vector<std::string> MDeviceGlobalNames;
1180+ std::vector<std::unique_ptr<std::byte[]>> MDeviceGlobalAllocations;
10421181 sycl_device_binaries MDeviceBinaries = nullptr ;
10431182 std::string MPrefix;
10441183 include_pairs_t MIncludePairs;
0 commit comments