Skip to content

Commit 05c95f8

Browse files
committed
Fix CachingAllocator debug for non-async operations
1 parent 388ec50 commit 05c95f8

File tree

1 file changed

+23
-5
lines changed

1 file changed

+23
-5
lines changed

HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,10 @@ namespace cms::alpakatools {
122122
explicit CachingAllocator(
123123
Device const& device,
124124
AllocatorConfig const& config,
125-
bool reuseSameQueueAllocations, // reuse non-ready allocations if they are in the same queue as the new one;
126-
// this is safe only if all memory operations are scheduled in the same queue
125+
bool reuseSameQueueAllocations, // Reuse non-ready allocations if they are in the same queue as the new one;
126+
// this is safe only if all memory operations are scheduled in the same queue.
127+
// In particular, this is not safe if the memory will be accessed without using
128+
// any queue, like host memory accessed directly or with immediate operations.
127129
bool debug = false)
128130
: device_(device),
129131
binGrowth_(config.binGrowth),
@@ -175,6 +177,22 @@ namespace cms::alpakatools {
175177
return cachedBytes_;
176178
}
177179

180+
// Fill a memory buffer with the specified bye value.
181+
// If the underlying device is the host and the allocator is configured to support immediate
182+
// (non queue-ordered) operations, fill the memory synchronously using std::memset.
183+
// Otherwise, let the alpaka queue schedule the operation.
184+
//
185+
// This is not used for deallocation/caching, because the memory may still be in use until the
186+
// corresponding event is reached.
187+
void immediateOrAsyncMemset(Queue queue, Buffer buffer, uint8_t value) {
188+
// host-only
189+
if (std::is_same_v<Device, alpaka::DevCpu> and not reuseSameQueueAllocations_) {
190+
std::memset(buffer.data(), value, alpaka::getExtentProduct(buffer) * sizeof(alpaka::Elem<Buffer>));
191+
} else {
192+
alpaka::memset(queue, buffer, value);
193+
}
194+
}
195+
178196
// Allocate given number of bytes on the current device associated to given queue
179197
void* allocate(size_t bytes, Queue queue) {
180198
// create a block descriptor for the requested allocation
@@ -187,15 +205,15 @@ namespace cms::alpakatools {
187205
if (tryReuseCachedBlock(block)) {
188206
// fill the re-used memory block with a pattern
189207
if (fillReallocations_) {
190-
alpaka::memset(*block.queue, *block.buffer, fillReallocationValue_);
208+
immediateOrAsyncMemset(*block.queue, *block.buffer, fillReallocationValue_);
191209
} else if (fillAllocations_) {
192-
alpaka::memset(*block.queue, *block.buffer, fillAllocationValue_);
210+
immediateOrAsyncMemset(*block.queue, *block.buffer, fillAllocationValue_);
193211
}
194212
} else {
195213
allocateNewBlock(block);
196214
// fill the newly allocated memory block with a pattern
197215
if (fillAllocations_) {
198-
alpaka::memset(*block.queue, *block.buffer, fillAllocationValue_);
216+
immediateOrAsyncMemset(*block.queue, *block.buffer, fillAllocationValue_);
199217
}
200218
}
201219

0 commit comments

Comments
 (0)