@@ -385,6 +385,7 @@ class kernel_bundle_impl {
385385 std::vector<std::string> &&KernelNames,
386386 std::unordered_map<std::string, std::string> &&MangledKernelNames,
387387 std::vector<std::string> &&DeviceGlobalNames,
388+ std::vector<std::unique_ptr<std::byte[]>> &&DeviceGlobalAllocations,
388389 sycl_device_binaries Binaries, std::string &&Prefix,
389390 syclex::source_language Lang)
390391 : kernel_bundle_impl(std::move(Ctx), std::move(Devs), KernelIDs,
@@ -399,6 +400,7 @@ class kernel_bundle_impl {
399400 MKernelNames = std::move (KernelNames);
400401 MMangledKernelNames = std::move (MangledKernelNames);
401402 MDeviceGlobalNames = std::move (DeviceGlobalNames);
403+ MDeviceGlobalAllocations = std::move (DeviceGlobalAllocations);
402404 MDeviceBinaries = Binaries;
403405 MPrefix = std::move (Prefix);
404406 MLanguage = Lang;
@@ -535,6 +537,12 @@ class kernel_bundle_impl {
535537 std::vector<kernel_id> KernelIDs;
536538 std::vector<std::string> KernelNames;
537539 std::unordered_map<std::string, std::string> MangledKernelNames;
540+
541+ std::unordered_set<std::string> DeviceGlobalIDSet;
542+ std::vector<std::string> DeviceGlobalIDVec;
543+ std::vector<std::string> DeviceGlobalNames;
544+ std::vector<std::unique_ptr<std::byte[]>> DeviceGlobalAllocations;
545+
538546 for (const auto &KernelID : PM.getAllSYCLKernelIDs ()) {
539547 std::string_view KernelName{KernelID.get_name ()};
540548 if (KernelName.find (Prefix) == 0 ) {
@@ -552,8 +560,8 @@ class kernel_bundle_impl {
552560 }
553561 }
554562
555- // Apply frontend information.
556563 for (const auto *RawImg : PM.getRawDeviceImages (KernelIDs)) {
564+ // Mangled names.
557565 for (const sycl_device_binary_property &RKProp :
558566 RawImg->getRegisteredKernels ()) {
559567
@@ -563,14 +571,8 @@ class kernel_bundle_impl {
563571 reinterpret_cast <const char *>(BA.begin ()), MangledNameLen};
564572 MangledKernelNames.emplace (RKProp->Name , MangledName);
565573 }
566- }
567574
568- // Determine IDs of all device globals referenced by this bundle's
569- // kernels. These IDs are also prefixed.
570- std::unordered_set<std::string> DeviceGlobalIDSet;
571- std::vector<std::string> DeviceGlobalIDVec;
572- std::vector<std::string> DeviceGlobalNames;
573- for (const auto &RawImg : PM.getRawDeviceImages (KernelIDs)) {
575+ // Device globals.
574576 for (const auto &DeviceGlobalProp : RawImg->getDeviceGlobals ()) {
575577 std::string_view DeviceGlobalName{DeviceGlobalProp->Name };
576578 assert (DeviceGlobalName.find (Prefix) == 0 );
@@ -585,12 +587,6 @@ class kernel_bundle_impl {
585587 }
586588 }
587589
588- // Create the executable bundle.
589- auto ExecBundle = std::make_shared<kernel_bundle_impl>(
590- MContext, MDevices, KernelIDs, std::move (KernelNames),
591- std::move (MangledKernelNames), std::move (DeviceGlobalNames), Binaries,
592- std::move (Prefix), MLanguage);
593-
594590 // Device globals are usually statically allocated and registered in the
595591 // integration footer, which we don't have in the RTC context. Instead, we
596592 // dynamically allocate storage tied to the executable kernel bundle.
@@ -599,13 +595,13 @@ class kernel_bundle_impl {
599595
600596 size_t AllocSize = DeviceGlobalEntry->MDeviceGlobalTSize ; // init value
601597 if (!DeviceGlobalEntry->MIsDeviceImageScopeDecorated ) {
602- // USM pointer. TODO: it's actually a decorated multi_ptr .
598+ // Consider storage for device USM pointer .
603599 AllocSize += sizeof (void *);
604600 }
605601 auto Alloc = std::make_unique<std::byte[]>(AllocSize);
606602 std::string_view DeviceGlobalName{DeviceGlobalEntry->MUniqueId };
607603 PM.addOrInitDeviceGlobalEntry (Alloc.get (), DeviceGlobalName.data ());
608- ExecBundle-> MDeviceGlobalAllocations .push_back (std::move (Alloc));
604+ DeviceGlobalAllocations .push_back (std::move (Alloc));
609605
610606 // Drop the RTC prefix from the entry's symbol name. Note that the PM
611607 // still manages this device global under its prefixed name.
@@ -614,7 +610,11 @@ class kernel_bundle_impl {
614610 DeviceGlobalEntry->MUniqueId = DeviceGlobalName;
615611 }
616612
617- return ExecBundle;
613+ return std::make_shared<kernel_bundle_impl>(
614+ MContext, MDevices, KernelIDs, std::move (KernelNames),
615+ std::move (MangledKernelNames), std::move (DeviceGlobalNames),
616+ std::move (DeviceGlobalAllocations), Binaries, std::move (Prefix),
617+ MLanguage);
618618 }
619619
620620 ur_program_handle_t UrProgram = nullptr ;
@@ -781,6 +781,28 @@ class kernel_bundle_impl {
781781 return Entries.front ();
782782 }
783783
784+ void unregister_device_globals_from_context () {
785+ if (MDeviceGlobalNames.empty ())
786+ return ;
787+
788+ // Manually trigger the release of resources for all device global map
789+ // entries associated with this runtime-compiled bundle. Normally, this
790+ // would happen in `~context_impl()`, however in the RTC setting, the
791+ // context outlives the DG map entries owned by the program manager.
792+
793+ std::vector<std::string> DeviceGlobalIDs;
794+ std::transform (MDeviceGlobalNames.begin (), MDeviceGlobalNames.end (),
795+ std::back_inserter (DeviceGlobalIDs),
796+ [&](const std::string &DGName) { return MPrefix + DGName; });
797+ auto ContextImpl = getSyclObjImpl (MContext);
798+ for (DeviceGlobalMapEntry *Entry :
799+ ProgramManager::getInstance ().getDeviceGlobalEntries (
800+ DeviceGlobalIDs)) {
801+ Entry->removeAssociatedResources (ContextImpl.get ());
802+ ContextImpl->removeAssociatedDeviceGlobal (Entry->MDeviceGlobalPtr );
803+ }
804+ }
805+
784806public:
785807 bool ext_oneapi_has_kernel (const std::string &Name) {
786808 return is_kernel_name (adjust_kernel_name (Name));
@@ -1121,6 +1143,7 @@ class kernel_bundle_impl {
11211143 ~kernel_bundle_impl () {
11221144 try {
11231145 if (MDeviceBinaries) {
1146+ unregister_device_globals_from_context ();
11241147 ProgramManager::getInstance ().removeImages (MDeviceBinaries);
11251148 syclex::detail::SYCL_JIT_destroy (MDeviceBinaries);
11261149 }
@@ -1162,11 +1185,10 @@ class kernel_bundle_impl {
11621185 std::vector<std::string> MKernelNames;
11631186 std::unordered_map<std::string, std::string> MMangledKernelNames;
11641187 std::vector<std::string> MDeviceGlobalNames;
1188+ std::vector<std::unique_ptr<std::byte[]>> MDeviceGlobalAllocations;
11651189 sycl_device_binaries MDeviceBinaries = nullptr ;
11661190 std::string MPrefix;
11671191 include_pairs_t MIncludePairs;
1168-
1169- std::vector<std::unique_ptr<std::byte[]>> MDeviceGlobalAllocations;
11701192};
11711193
11721194} // namespace detail
0 commit comments