@@ -122,8 +122,10 @@ namespace cms::alpakatools {
122122 explicit CachingAllocator (
123123 Device const & device,
124124 AllocatorConfig const & config,
125- bool reuseSameQueueAllocations, // reuse non-ready allocations if they are in the same queue as the new one;
126- // this is safe only if all memory operations are scheduled in the same queue
125+ bool reuseSameQueueAllocations, // Reuse non-ready allocations if they are in the same queue as the new one;
126+ // this is safe only if all memory operations are scheduled in the same queue.
127+ // In particular, this is not safe if the memory will be accessed without using
128+ // any queue, like host memory accessed directly or with immediate operations.
127129 bool debug = false )
128130 : device_(device),
129131 binGrowth_(config.binGrowth),
@@ -175,6 +177,22 @@ namespace cms::alpakatools {
175177 return cachedBytes_;
176178 }
177179
180+ // Fill a memory buffer with the specified bye value.
181+ // If the underlying device is the host and the allocator is configured to support immediate
182+ // (non queue-ordered) operations, fill the memory synchronously using std::memset.
183+ // Otherwise, let the alpaka queue schedule the operation.
184+ //
185+ // This is not used for deallocation/caching, because the memory may still be in use until the
186+ // corresponding event is reached.
187+ void immediateOrAsyncMemset (Queue queue, Buffer buffer, uint8_t value) {
188+ // host-only
189+ if (std::is_same_v<Device, alpaka::DevCpu> and not reuseSameQueueAllocations_) {
190+ std::memset (buffer.data (), value, alpaka::getExtentProduct (buffer) * sizeof (alpaka::Elem<Buffer>));
191+ } else {
192+ alpaka::memset (queue, buffer, value);
193+ }
194+ }
195+
178196 // Allocate given number of bytes on the current device associated to given queue
179197 void * allocate (size_t bytes, Queue queue) {
180198 // create a block descriptor for the requested allocation
@@ -187,15 +205,15 @@ namespace cms::alpakatools {
187205 if (tryReuseCachedBlock (block)) {
188206 // fill the re-used memory block with a pattern
189207 if (fillReallocations_) {
190- alpaka::memset (*block.queue , *block.buffer , fillReallocationValue_);
208+ immediateOrAsyncMemset (*block.queue , *block.buffer , fillReallocationValue_);
191209 } else if (fillAllocations_) {
192- alpaka::memset (*block.queue , *block.buffer , fillAllocationValue_);
210+ immediateOrAsyncMemset (*block.queue , *block.buffer , fillAllocationValue_);
193211 }
194212 } else {
195213 allocateNewBlock (block);
196214 // fill the newly allocated memory block with a pattern
197215 if (fillAllocations_) {
198- alpaka::memset (*block.queue , *block.buffer , fillAllocationValue_);
216+ immediateOrAsyncMemset (*block.queue , *block.buffer , fillAllocationValue_);
199217 }
200218 }
201219
0 commit comments