@@ -68,22 +68,33 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) {
6868 {
6969 std::lock_guard<std::mutex> Lock (NewAlloc.MInitEventMutex );
7070 ur_event_handle_t InitEvent;
71- // C++ guarantees members appear in memory in the order they are declared,
72- // so since the member variable that contains the initial contents of the
73- // device_global is right after the usm_ptr member variable we can do
74- // some pointer arithmetic to memcopy over this value to the usm_ptr. This
75- // value inside of the device_global will be zero-initialized if it was not
76- // given a value on construction.
77-
78- MemoryManager::copy_usm (reinterpret_cast <const void *>(
79- reinterpret_cast <uintptr_t >(MDeviceGlobalPtr) +
80- sizeof (MDeviceGlobalPtr)),
81- QueueImpl, MDeviceGlobalTSize, NewAlloc.MPtr ,
82- std::vector<ur_event_handle_t >{}, &InitEvent);
71+ if (MDeviceGlobalPtr) {
72+ // C++ guarantees members appear in memory in the order they are declared,
73+ // so since the member variable that contains the initial contents of the
74+ // device_global is right after the usm_ptr member variable we can do
75+ // some pointer arithmetic to memcopy over this value to the usm_ptr. This
76+ // value inside of the device_global will be zero-initialized if it was
77+ // not given a value on construction.
78+ MemoryManager::copy_usm (
79+ reinterpret_cast <const void *>(
80+ reinterpret_cast <uintptr_t >(MDeviceGlobalPtr) +
81+ sizeof (MDeviceGlobalPtr)),
82+ QueueImpl, MDeviceGlobalTSize, NewAlloc.MPtr ,
83+ std::vector<ur_event_handle_t >{}, &InitEvent);
84+ } else {
85+ // For SYCLBIN device globals we do not have a host pointer to copy from,
86+ // so instead we fill the USM memory with 0's.
87+ MemoryManager::fill_usm (NewAlloc.MPtr , QueueImpl, MDeviceGlobalTSize,
88+ {static_cast <unsigned char >(0 )}, {}, &InitEvent);
89+ }
8390 NewAlloc.MInitEvent = InitEvent;
8491 }
8592
86- CtxImpl.addAssociatedDeviceGlobal (MDeviceGlobalPtr);
93+ // Only device globals with host variables need to be registered with the
94+ // context. The rest will be managed by their kernel bundles and cleaned up
95+ // accordingly.
96+ if (MDeviceGlobalPtr)
97+ CtxImpl.addAssociatedDeviceGlobal (MDeviceGlobalPtr);
8798 return NewAlloc;
8899}
89100
@@ -111,19 +122,32 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(const context &Context) {
111122 " USM allocation for device and context already happened." );
112123 DeviceGlobalUSMMem &NewAlloc = NewAllocIt.first ->second ;
113124
114- // C++ guarantees members appear in memory in the order they are declared,
115- // so since the member variable that contains the initial contents of the
116- // device_global is right after the usm_ptr member variable we can do
117- // some pointer arithmetic to memcopy over this value to the usm_ptr. This
118- // value inside of the device_global will be zero-initialized if it was not
119- // given a value on construction.
120- MemoryManager::context_copy_usm (
121- reinterpret_cast <const void *>(
122- reinterpret_cast <uintptr_t >(MDeviceGlobalPtr) +
123- sizeof (MDeviceGlobalPtr)),
124- &CtxImpl, MDeviceGlobalTSize, NewAlloc.MPtr );
125-
126- CtxImpl.addAssociatedDeviceGlobal (MDeviceGlobalPtr);
125+ if (MDeviceGlobalPtr) {
126+ // C++ guarantees members appear in memory in the order they are declared,
127+ // so since the member variable that contains the initial contents of the
128+ // device_global is right after the usm_ptr member variable we can do
129+ // some pointer arithmetic to memcopy over this value to the usm_ptr. This
130+ // value inside of the device_global will be zero-initialized if it was not
131+ // given a value on construction.
132+ MemoryManager::context_copy_usm (
133+ reinterpret_cast <const void *>(
134+ reinterpret_cast <uintptr_t >(MDeviceGlobalPtr) +
135+ sizeof (MDeviceGlobalPtr)),
136+ &CtxImpl, MDeviceGlobalTSize, NewAlloc.MPtr );
137+ } else {
138+ // For SYCLBIN device globals we do not have a host pointer to copy from,
139+ // so instead we fill the USM memory with 0's.
140+ std::vector<unsigned char > ImmBuff (MDeviceGlobalTSize,
141+ static_cast <unsigned char >(0 ));
142+ MemoryManager::context_copy_usm (ImmBuff.data (), &CtxImpl,
143+ MDeviceGlobalTSize, NewAlloc.MPtr );
144+ }
145+
146+ // Only device globals with host variables need to be registered with the
147+ // context. The rest will be managed by their kernel bundles and cleaned up
148+ // accordingly.
149+ if (MDeviceGlobalPtr)
150+ CtxImpl.addAssociatedDeviceGlobal (MDeviceGlobalPtr);
127151 return NewAlloc;
128152}
129153
@@ -150,6 +174,30 @@ void DeviceGlobalMapEntry::removeAssociatedResources(
150174 }
151175}
152176
177+ void DeviceGlobalMapEntry::cleanup () {
178+ std::lock_guard<std::mutex> Lock{MDeviceToUSMPtrMapMutex};
179+ assert (MDeviceGlobalPtr == nullptr &&
180+ " Entry has host variable, so it should be associated with a context "
181+ " and should be cleaned up by its dtor." );
182+ for (auto &USMPtrIt : MDeviceToUSMPtrMap) {
183+ // The context should be alive through the kernel_bundle owning these
184+ // device_global entries.
185+ const context_impl *CtxImpl = USMPtrIt.first .second ;
186+ DeviceGlobalUSMMem &USMMem = USMPtrIt.second ;
187+ detail::usm::freeInternal (USMMem.MPtr , CtxImpl);
188+ if (USMMem.MInitEvent .has_value ())
189+ CtxImpl->getAdapter ()->call <UrApiKind::urEventRelease>(
190+ *USMMem.MInitEvent );
191+ #ifndef NDEBUG
192+ // For debugging we set the event and memory to some recognizable values
193+ // to allow us to check that this cleanup happens before erasure.
194+ USMMem.MPtr = nullptr ;
195+ USMMem.MInitEvent = {};
196+ #endif
197+ }
198+ MDeviceToUSMPtrMap.clear ();
199+ }
200+
153201} // namespace detail
154202} // namespace _V1
155203} // namespace sycl
0 commit comments