From 7dabb34394413e7db4c8313bcd9803be4780ae63 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Fri, 27 Sep 2024 07:15:20 -0700 Subject: [PATCH 1/2] [SYCL] Fix a thread pool data race during shutdown The flag that kills the threads waiting for work was an atomic bool, but that is not enough: any modification of a condition must be made under the mutex associated with that condition variable. Otherwise, the bool value flip and the call to notify might happen after the condition check in the other thread, but before it starts waiting for notifications. --- sycl/source/detail/thread_pool.hpp | 14 ++++++++------ .../Regression/unwaited_for_host_task.cpp | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 sycl/test-e2e/Regression/unwaited_for_host_task.cpp diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 304045389b53b..50240e0a98b06 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -29,17 +29,17 @@ class ThreadPool { std::queue> MJobQueue; std::mutex MJobQueueMutex; std::condition_variable MDoSmthOrStop; - std::atomic_bool MStop; + bool MStop = false; std::atomic_uint MJobsInPool; void worker() { GlobalHandler::instance().registerSchedulerUsage(/*ModifyCounter*/ false); std::unique_lock Lock(MJobQueueMutex); while (true) { - MDoSmthOrStop.wait( - Lock, [this]() { return !MJobQueue.empty() || MStop.load(); }); + MDoSmthOrStop.wait(Lock, + [this]() { return !MJobQueue.empty() || MStop; }); - if (MStop.load()) + if (MStop) break; std::function Job = std::move(MJobQueue.front()); @@ -57,7 +57,6 @@ class ThreadPool { void start() { MLaunchedThreads.reserve(MThreadCount); - MStop.store(false); MJobsInPool.store(0); for (size_t Idx = 0; Idx < MThreadCount; ++Idx) @@ -83,7 +82,10 @@ class ThreadPool { } void finishAndWait() { - MStop.store(true); + { + std::lock_guard Lock(MJobQueueMutex); + MStop = true; + } MDoSmthOrStop.notify_all(); diff --git a/sycl/test-e2e/Regression/unwaited_for_host_task.cpp b/sycl/test-e2e/Regression/unwaited_for_host_task.cpp new file mode 100644 index 0000000000000..dcc306df4b8d2 --- /dev/null +++ b/sycl/test-e2e/Regression/unwaited_for_host_task.cpp @@ -0,0 +1,16 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include + +using namespace sycl; + +// This test checks that host tasks that haven't been waited for do not cause +// issues during shutdown. +int main(int argc, char *argv[]) { + queue q; + + q.submit([&](handler &h) { h.host_task([=]() {}); }); + + return 0; +} From a312a734908585ea143679e235251739e2a7f7de Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Fri, 27 Sep 2024 08:27:24 -0700 Subject: [PATCH 2/2] Fix test header --- sycl/test-e2e/Regression/unwaited_for_host_task.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/Regression/unwaited_for_host_task.cpp b/sycl/test-e2e/Regression/unwaited_for_host_task.cpp index dcc306df4b8d2..6f6ae3090c1eb 100644 --- a/sycl/test-e2e/Regression/unwaited_for_host_task.cpp +++ b/sycl/test-e2e/Regression/unwaited_for_host_task.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include using namespace sycl;