Skip to content

Commit 60d5546

Browse files
committed
Fix race condition in SerialTaskQueue
There was a race where if the queue was empty right when the running task finished and the task sets m_taskChosen and see that the queue is empty and right then a new task is added and that thread sees m_taskChosen was set, then neither threads will start the next task.
1 parent 5d50a9e commit 60d5546

File tree

2 files changed

+18
-16
lines changed

2 files changed

+18
-16
lines changed

FWCore/Concurrency/interface/SerialTaskQueue.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,14 @@
6666
namespace edm {
6767
class SerialTaskQueue {
6868
public:
69-
SerialTaskQueue() : m_taskChosen(false), m_pauseCount{0} {}
69+
SerialTaskQueue() : m_pauseCount{0}, m_taskChosen{false}, m_pickingNextTask{false} {}
7070

7171
SerialTaskQueue(SerialTaskQueue&& iOther)
7272
: m_tasks(std::move(iOther.m_tasks)),
73+
m_pauseCount(iOther.m_pauseCount.exchange(0)),
7374
m_taskChosen(iOther.m_taskChosen.exchange(false)),
74-
m_pauseCount(iOther.m_pauseCount.exchange(0)) {
75-
assert(m_tasks.empty() and m_taskChosen == false);
75+
m_pickingNextTask(false) {
76+
assert(m_tasks.empty() and m_taskChosen == false and iOther.m_pickingNextTask == false);
7677
}
7778
SerialTaskQueue(const SerialTaskQueue&) = delete;
7879
const SerialTaskQueue& operator=(const SerialTaskQueue&) = delete;
@@ -159,8 +160,9 @@ namespace edm {
159160

160161
// ---------- member data --------------------------------
161162
oneapi::tbb::concurrent_queue<TaskBase*> m_tasks;
162-
std::atomic<bool> m_taskChosen;
163163
std::atomic<unsigned long> m_pauseCount;
164+
std::atomic<bool> m_taskChosen;
165+
std::atomic<bool> m_pickingNextTask;
164166
};
165167

166168
template <typename T>

FWCore/Concurrency/src/SerialTaskQueue.cc

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "FWCore/Concurrency/interface/SerialTaskQueue.h"
1919

2020
#include "FWCore/Utilities/interface/Likely.h"
21+
#include "FWCore/Utilities/interface/make_sentry.h"
2122

2223
using namespace edm;
2324

@@ -86,25 +87,24 @@ SerialTaskQueue::TaskBase* SerialTaskQueue::finishedTask() {
8687
}
8788

8889
SerialTaskQueue::TaskBase* SerialTaskQueue::pickNextTask() {
90+
if UNLIKELY (0 != m_pauseCount)
91+
return nullptr;
8992
bool expect = false;
90-
if LIKELY (0 == m_pauseCount and m_taskChosen.compare_exchange_strong(expect, true)) {
93+
//need pop task and setting m_taskChosen to be atomic to avoid
94+
// case where thread pauses just after try_pop failed but then
95+
// a task is added and that call fails the check on m_taskChosen
96+
while (not m_pickingNextTask.compare_exchange_strong(expect, true)) {
97+
expect = false;
98+
}
99+
auto sentry = edm::make_sentry(&m_pickingNextTask, [](auto* v) { v->store(false); });
100+
101+
if LIKELY (m_taskChosen.compare_exchange_strong(expect, true)) {
91102
TaskBase* t = nullptr;
92103
if LIKELY (m_tasks.try_pop(t)) {
93104
return t;
94105
}
95106
//no task was actually pulled
96107
m_taskChosen.store(false);
97-
98-
//was a new entry added after we called 'try_pop' but before we did the clear?
99-
expect = false;
100-
if (not m_tasks.empty() and m_taskChosen.compare_exchange_strong(expect, true)) {
101-
t = nullptr;
102-
if (m_tasks.try_pop(t)) {
103-
return t;
104-
}
105-
//no task was still pulled since a different thread beat us to it
106-
m_taskChosen.store(false);
107-
}
108108
}
109109
return nullptr;
110110
}

0 commit comments

Comments
 (0)