Skip to content

Commit d2d52aa

Browse files
committed
Fixed memory leak enqueuing marker
1 parent 3554909 commit d2d52aa

File tree

4 files changed

+11
-3
lines changed

4 files changed

+11
-3
lines changed

src/Queue.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ void Queue::queueMarkerEvent() {
9292
}
9393
// Enqueue a marker for nVidia GPUs
9494
else {
95-
clEnqueueMarkerWithWaitList(get(), 0, NULL, &markerEvent);
95+
markerEvent = enqueueMarker(get());
9696
markerQueued = true;
9797
queueCount = 0;
9898
}
@@ -103,7 +103,7 @@ void Queue::waitForMarkerEvent() {
103103
if (!markerQueued) return;
104104
// By default, nVidia finish causes a CPU busy wait. Instead, sleep for a while. Since we know how many items are enqueued after the marker we can make an
105105
// educated guess of how long to sleep to keep CPU overhead low.
106-
while (getEventInfo(markerEvent) != CL_COMPLETE) {
106+
while (getEventInfo(markerEvent.get()) != CL_COMPLETE) {
107107
// There are 4, 7, or 10 kernels per squaring. Don't overestimate sleep time. Divide by much more than the number of kernels.
108108
std::this_thread::sleep_for(std::chrono::microseconds(1 + queueCount * squareTime / squareKernels / 2));
109109
}

src/Queue.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class Queue : public QueueHolder {
5555

5656
private: // This replaces the "call queue->finish every 400 squarings" code in Gpu.cpp. Solves the busy wait on nVidia GPUs.
5757
int MAX_QUEUE_COUNT; // Queue size before a marker will be enqueued. Typically, 100 to 1000 squarings.
58-
cl_event markerEvent; // Event associated with an enqueued marker placed in the queue every MAX_QUEUE_COUNT entries and before r/w operations.
58+
EventHolder markerEvent; // Event associated with an enqueued marker placed in the queue every MAX_QUEUE_COUNT entries and before r/w operations.
5959
bool markerQueued; // TRUE if a marker and event have been queued
6060
int queueCount; // Count of items added to the queue since last marker
6161
int squareTime; // Time to do one squaring (in microseconds)

src/clwrap.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,12 @@ EventHolder fillBuf(cl_queue q, vector<cl_event>&& waits,
363363
return genEvent ? EventHolder{event} : EventHolder{};
364364
}
365365

366+
EventHolder enqueueMarker(cl_queue q) {
367+
cl_event event{};
368+
CHECK1(clEnqueueMarkerWithWaitList(q, 0, 0, &event));
369+
return EventHolder{event};
370+
}
371+
366372
void waitForEvents(vector<cl_event>&& waits) {
367373
if (!waits.empty()) {
368374
CHECK1(clWaitForEvents(waits.size(), waits.data()));

src/clwrap.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ EventHolder copyBuf(cl_queue queue, vector<cl_event>&& waits, const cl_mem src,
106106

107107
EventHolder fillBuf(cl_queue q, vector<cl_event>&& waits, cl_mem buf, const void *pat, size_t patSize, size_t size, bool genEvent);
108108

109+
EventHolder enqueueMarker(cl_queue q);
110+
109111
void waitForEvents(vector<cl_event>&& waits);
110112

111113

0 commit comments

Comments
 (0)