Skip to content

Commit 4d44642

Browse files
committed
Made CPU/GPU buffer initialization significantly faster with std::fill and enqueueFillBuffer
1 parent 20db9fd commit 4d44642

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

src/opencl.hpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -243,11 +243,10 @@ template<typename T> class Memory {
243243
allocate_device_buffer(device, allocate_device);
244244
if(allocate_host) {
245245
host_buffer = new T[N*(ulong)d];
246-
for(ulong i=0ull; i<N*(ulong)d; i++) host_buffer[i] = value;
247246
initialize_auxiliary_pointers();
248247
host_buffer_exists = true;
249248
}
250-
write_to_device();
249+
reset(value);
251250
}
252251
inline Memory(Device& device, const ulong N, const uint dimensions, T* const host_buffer, const bool allocate_device=true) {
253252
if(!device.is_initialized()) print_error("No Device selected. Call Device constructor.");
@@ -328,8 +327,13 @@ template<typename T> class Memory {
328327
delete_host_buffer();
329328
}
330329
inline void reset(const T value=(T)0) {
331-
if(host_buffer_exists) for(ulong i=0ull; i<N*(ulong)d; i++) host_buffer[i] = value;
332-
write_to_device();
330+
if(host_buffer_exists) {
331+
std::fill(host_buffer, host_buffer+range(), value); // faster than "for(ulong i=0ull; i<range(); i++) host_buffer[i] = value;"
332+
}
333+
if(device_buffer_exists) {
334+
cl_queue.enqueueFillBuffer(device_buffer, value, 0ull, capacity()); // faster than "write_to_device();"
335+
cl_queue.finish();
336+
}
333337
}
334338
inline const ulong length() const { return N; }
335339
inline const uint dimensions() const { return d; }

0 commit comments

Comments
 (0)