@@ -20,14 +20,6 @@ void ThreadPoolCallbackRunnerFast::initThreadPool(ThreadPool & pool_, size_t max
2020 max_threads = max_threads_;
2121 thread_name = thread_name_;
2222 thread_group = thread_group_;
23-
24- // / We could dynamically add and remove threads based on load, but it's not clear whether it's
25- // / worth the added complexity.
26- for (size_t i = 0 ; i < max_threads; ++i)
27- {
28- pool->scheduleOrThrowOnError ([this ] { threadFunction (); });
29- ++threads; // only if scheduleOrThrowOnError didn't throw
30- }
3123}
3224
3325ThreadPoolCallbackRunnerFast::ThreadPoolCallbackRunnerFast (Mode mode_) : mode(mode_)
@@ -58,19 +50,30 @@ void ThreadPoolCallbackRunnerFast::shutdown()
5850 chassert (active_tasks.load () == queue.size ());
5951}
6052
53+ void ThreadPoolCallbackRunnerFast::startMoreThreadsIfNeeded (size_t active_tasks_, std::unique_lock<std::mutex> &)
54+ {
55+ while (threads < max_threads && threads < active_tasks_ && !shutdown_requested)
56+ {
57+ pool->scheduleOrThrow ([this ] { threadFunction (); });
58+ ++threads; // only if scheduleOrThrow didn't throw
59+ }
60+ }
61+
6162void ThreadPoolCallbackRunnerFast::operator ()(std::function<void ()> f)
6263{
6364 if (mode == Mode::Disabled)
6465 throw Exception (ErrorCodes::LOGICAL_ERROR, " Thread pool runner is not initialized" );
6566
67+ size_t active_tasks_ = 1 + active_tasks.fetch_add (1 , std::memory_order_relaxed);
68+
6669 {
6770 std::unique_lock lock (mutex);
6871 queue.push_back (std::move (f));
72+ startMoreThreadsIfNeeded (active_tasks_, lock);
6973 }
7074
7175 if (mode == Mode::ThreadPool)
7276 {
73- active_tasks.fetch_add (1 , std::memory_order_relaxed);
7477#ifdef OS_LINUX
7578 UInt32 prev_size = queue_size.fetch_add (1 , std::memory_order_release);
7679 if (prev_size < max_threads)
@@ -89,14 +92,16 @@ void ThreadPoolCallbackRunnerFast::bulkSchedule(std::vector<std::function<void()
8992 if (mode == Mode::Disabled)
9093 throw Exception (ErrorCodes::LOGICAL_ERROR, " Thread pool runner is not initialized" );
9194
95+ size_t active_tasks_ = fs.size () + active_tasks.fetch_add (fs.size (), std::memory_order_relaxed);
96+
9297 {
9398 std::unique_lock lock (mutex);
9499 queue.insert (queue.end (), std::move_iterator (fs.begin ()), std::move_iterator (fs.end ()));
100+ startMoreThreadsIfNeeded (active_tasks_, lock);
95101 }
96102
97103 if (mode == Mode::ThreadPool)
98104 {
99- active_tasks.fetch_add (fs.size (), std::memory_order_relaxed);
100105#ifdef OS_LINUX
101106 UInt32 prev_size = queue_size.fetch_add (fs.size (), std::memory_order_release);
102107 if (prev_size < max_threads)
@@ -127,68 +132,94 @@ bool ThreadPoolCallbackRunnerFast::runTaskInline()
127132
128133void ThreadPoolCallbackRunnerFast::threadFunction ()
129134{
135+ std::optional<ThreadGroupSwitcher> switcher;
136+ switcher.emplace (thread_group, thread_name.c_str ());
137+
138+ while (true )
130139 {
131- ThreadGroupSwitcher switcher (thread_group, thread_name. c_str ()) ;
140+ bool timed_out = false ;
132141
142+ #ifdef OS_LINUX
143+ UInt32 x = queue_size.load (std::memory_order_relaxed);
133144 while (true )
134145 {
135- #ifdef OS_LINUX
136- UInt32 x = queue_size.load (std::memory_order_relaxed);
137- while (true )
146+ if (x == 0 )
138147 {
139- if (x == 0 )
148+ Int64 waited = futexTimedWait (&queue_size, 0 , THREAD_IDLE_TIMEOUT_NS);
149+ x = queue_size.load (std::memory_order_relaxed);
150+
151+ if (waited < 0 && errno == ETIMEDOUT && x == 0 )
140152 {
141- futexWait (&queue_size, 0 );
142- x = queue_size.load (std::memory_order_relaxed);
143- }
144- else if (queue_size.compare_exchange_weak (
145- x, x - 1 , std::memory_order_acquire, std::memory_order_relaxed))
153+ timed_out = true ;
146154 break ;
155+ }
147156 }
148- #endif
157+ else if (queue_size.compare_exchange_weak (
158+ x, x - 1 , std::memory_order_acquire, std::memory_order_relaxed))
159+ break ;
160+ }
161+ #endif
149162
150- std::function<void ()> f;
151- {
152- std::unique_lock lock (mutex);
163+ std::function<void ()> f;
164+ {
165+ std::unique_lock lock (mutex);
153166
154- #ifndef OS_LINUX
155- queue_cv.wait (lock, [&] { return shutdown_requested || !queue.empty (); });
156- #endif
167+ #ifdef OS_LINUX
168+ // / Important to never stop the last thread if queue is not empty (checked under the
169+ // / same `lock` as decrementing `threads`). Otherwise we'll deadlock like this:
170+ // / 0. `threads` == 1, queue is empty.
171+ // / 1. The worker thread times out; it didn't lock mutex or decrement `threads` yet.
172+ // / 2. A manager thread enqueues a task. It sees active_tasks == 1 and `threads` == 1,
173+ // / so it doesn't start another thread.
174+ // / 3. The worker thread exits.
175+ // / 4. There are no threads, but the queue is not empty, oops.
176+ if (timed_out && !queue.empty () && !shutdown_requested)
177+ // / We can't just proceed to `queue.pop_front()` here because we haven't
178+ // / decremented queue_size.
179+ continue ;
180+ #else
181+ timed_out = !queue_cv.wait_for (
182+ lock, std::chrono::nanoseconds (THREAD_IDLE_TIMEOUT_NS),
183+ [&] { return shutdown_requested || !queue.empty (); });
184+ #endif
157185
158- if (shutdown_requested)
159- break ;
186+ if (shutdown_requested || timed_out)
187+ {
188+ // / Important that we destroy the `ThreadGroupSwitcher` before decrementing `threads`.
189+ // / Otherwise ~ThreadGroupSwitcher may access global Context after the query is
190+ // / finished, which may race with mutating Context (specifically, Settings) at the
191+ // / start of next query.
192+ switcher.reset ();
160193
161- chassert (!queue.empty ());
194+ threads -= 1 ;
195+ if (threads == 0 )
196+ shutdown_cv.notify_all ();
162197
163- f = std::move (queue.front ());
164- queue.pop_front ();
198+ return ;
165199 }
166200
167- try
168- {
169- f ();
201+ chassert (!queue.empty ());
170202
171- CurrentThread::updatePerformanceCountersIfNeeded ( );
172- }
173- catch (...)
174- {
175- tryLogCurrentException ( " FastThreadPool " );
176- chassert ( false );
177- }
203+ f = std::move (queue. front () );
204+ queue. pop_front ();
205+ }
206+
207+ try
208+ {
209+ f ();
178210
179- active_tasks.fetch_sub (1 , std::memory_order_relaxed);
211+ CurrentThread::updatePerformanceCountersIfNeeded ();
212+ }
213+ catch (...)
214+ {
215+ tryLogCurrentException (" FastThreadPool" );
216+ chassert (false );
180217 }
181- }
182218
183- // / Important that we destroy the `ThreadGroupSwitcher` before decrementing `threads`.
184- // / Otherwise ~ThreadGroupSwitcher may access global Context after the query is finished, which
185- // / may race with mutating Context (specifically, Settings) at the start of next query.
186- {
187- std::unique_lock lock (mutex);
188- threads -= 1 ;
189- if (threads == 0 )
190- shutdown_cv.notify_all ();
219+ active_tasks.fetch_sub (1 , std::memory_order_relaxed);
191220 }
221+
222+ chassert (false );
192223}
193224
194225bool ShutdownHelper::try_lock_shared ()
0 commit comments