@@ -53,6 +53,63 @@ OwnedUrEvent DeviceGlobalUSMMem::getInitEvent(adapter_impl &Adapter) {
5353 }
5454}
5555
56+ bool DeviceGlobalMapEntry::isAvailableInContext (const context_impl *CtxImpl) {
57+ std::lock_guard<std::mutex> Lock{MDeviceToUSMPtrMapMutex};
58+ for (const auto &It : MDeviceToUSMPtrMap)
59+ if (It.first .second == CtxImpl)
60+ return true ;
61+ return false ;
62+ }
63+
64+ bool DeviceGlobalMapEntry::isProfileCounter () {
65+ const std::string CounterPrefix = " __profc_" ;
66+ return MUniqueId.substr (0 , CounterPrefix.size ()) == CounterPrefix;
67+ }
68+
69+ extern " C" void __attribute__ ((weak))
70+ __sycl_increment_profile_counters(std::uint64_t FnHash, std::size_t NumCounters,
71+ const std::uint64_t *Increments);
72+
73+ void DeviceGlobalMapEntry::cleanupProfileCounter (context_impl *CtxImpl) {
74+ std::lock_guard<std::mutex> Lock{MDeviceToUSMPtrMapMutex};
75+ const std::size_t NumCounters = MDeviceGlobalTSize / sizeof (std::uint64_t );
76+ const std::uint64_t FnHash = [&] {
77+ const auto PrefixSize = std::string{" __profc_" }.size ();
78+ constexpr int DecimalBase = 10 ;
79+ return std::strtoull (MUniqueId.substr (PrefixSize).c_str (), nullptr ,
80+ DecimalBase);
81+ }();
82+ for (device_impl &Device : CtxImpl->getDevices ()) {
83+ auto USMPtrIt = MDeviceToUSMPtrMap.find ({&Device, CtxImpl});
84+ if (USMPtrIt != MDeviceToUSMPtrMap.end ()) {
85+ DeviceGlobalUSMMem &USMMem = USMPtrIt->second ;
86+
87+ // Get the increments from the USM pointer
88+ std::vector<std::uint64_t > Increments (NumCounters);
89+ const std::uint64_t *Counters = static_cast <std::uint64_t *>(USMMem.MPtr );
90+ for (std::size_t I = 0 ; I < NumCounters; ++I)
91+ Increments[I] += Counters[I];
92+
93+ // Call the weak symbol to update the profile counters
94+ if (__sycl_increment_profile_counters) {
95+ __sycl_increment_profile_counters (FnHash, Increments.size (),
96+ Increments.data ());
97+ }
98+
99+ // Free the USM memory and release the event if it exists.
100+ detail::usm::freeInternal (USMMem.MPtr , CtxImpl);
101+ if (USMMem.MInitEvent != nullptr )
102+ CtxImpl->getAdapter ().call <UrApiKind::urEventRelease>(
103+ USMMem.MInitEvent );
104+
105+ // Set to nullptr to avoid double free.
106+ USMMem.MPtr = nullptr ;
107+ USMMem.MInitEvent = nullptr ;
108+ MDeviceToUSMPtrMap.erase (USMPtrIt);
109+ }
110+ }
111+ }
112+
56113DeviceGlobalUSMMem &
57114DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM (queue_impl &QueueImpl) {
58115 assert (!MIsDeviceImageScopeDecorated &&
@@ -67,7 +124,8 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) {
67124 return DGUSMPtr->second ;
68125
69126 void *NewDGUSMPtr = detail::usm::alignedAllocInternal (
70- 0 , MDeviceGlobalTSize, &CtxImpl, &DevImpl, sycl::usm::alloc::device);
127+ 0 , MDeviceGlobalTSize, &CtxImpl, &DevImpl,
128+ isProfileCounter () ? sycl::usm::alloc::shared : sycl::usm::alloc::device);
71129
72130 auto NewAllocIt = MDeviceToUSMPtrMap.emplace (
73131 std::piecewise_construct, std::forward_as_tuple (&DevImpl, &CtxImpl),
@@ -125,7 +183,8 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(const context &Context) {
125183 return DGUSMPtr->second ;
126184
127185 void *NewDGUSMPtr = detail::usm::alignedAllocInternal (
128- 0 , MDeviceGlobalTSize, &CtxImpl, &DevImpl, sycl::usm::alloc::device);
186+ 0 , MDeviceGlobalTSize, &CtxImpl, &DevImpl,
187+ isProfileCounter () ? sycl::usm::alloc::shared : sycl::usm::alloc::device);
129188
130189 auto NewAllocIt = MDeviceToUSMPtrMap.emplace (
131190 std::piecewise_construct, std::forward_as_tuple (&DevImpl, &CtxImpl),
0 commit comments