@@ -154,11 +154,14 @@ template <typename ScalingPolicy = LatencyScaling<10>> class thread_pool_scaling
154154 }
155155 m_action_queue.stop ();
156156
157- // 2. Steal the active workers list
157+ // 2. Steal the active workers list. Explicitly clear the source so that any
158+ // worker that acquires the lock afterwards sees size()==0 and cannot
159+ // voluntarily retire into the zombies deque after step 4's final reap.
158160 std::list<std::thread> workers_to_join;
159161 {
160162 auto reg_h = m_reg.handle ();
161163 workers_to_join = std::move (reg_h->workers );
164+ reg_h->workers .clear ();
162165 }
163166
164167 // 3. Join everything in our stolen list
@@ -168,9 +171,18 @@ template <typename ScalingPolicy = LatencyScaling<10>> class thread_pool_scaling
168171 }
169172 }
170173
171- // 4. Join and clear any zombies that retired before the move
172- auto reg_h = m_reg.handle ();
173- reap_zombies_internal (reg_h);
174+ // 4. Join any zombies that retired before the steal. Steal the deque first
175+ // so the join happens outside the lock (same pattern as the worker loop).
176+ std::deque<std::thread> zombies_to_join;
177+ {
178+ auto reg_h = m_reg.handle ();
179+ zombies_to_join = std::move (reg_h->zombies );
180+ }
181+ for (auto & t : zombies_to_join) {
182+ if (t.joinable ()) {
183+ t.join ();
184+ }
185+ }
174186 }
175187
176188 /* *
@@ -250,10 +262,29 @@ template <typename ScalingPolicy = LatencyScaling<10>> class thread_pool_scaling
250262 auto task_opt = m_action_queue.try_pop (m_idle_timeout);
251263
252264 if (task_opt) {
253- task_opt->func ();
254- auto reg_h = m_reg.handle ();
255- if (!reg_h->zombies .empty ()) {
256- reap_zombies_internal (reg_h);
265+ try {
266+ task_opt->func ();
267+ } catch (...) {
268+ // Suppress exception to prevent thread termination.
269+ // Fire-and-forget tasks are responsible for their own error handling.
270+ }
271+ // Steal the zombie deque under the lock, then join outside it.
272+ // Joining while holding the lock is safe in practice (the zombie has already
273+ // released the lock before it can appear in the deque), but it blocks the
274+ // registry mutex for the duration of the join — delaying scaling decisions
275+ // and the destructor. Stealing first bounds the critical section to a cheap
276+ // list move.
277+ std::deque<std::thread> zombies_to_join;
278+ {
279+ auto reg_h = m_reg.handle ();
280+ if (!reg_h->zombies .empty ()) {
281+ zombies_to_join = std::move (reg_h->zombies );
282+ }
283+ }
284+ for (auto & t : zombies_to_join) {
285+ if (t.joinable ()) {
286+ t.join ();
287+ }
257288 }
258289 } else {
259290 auto reg_h = m_reg.handle ();
@@ -279,19 +310,6 @@ template <typename ScalingPolicy = LatencyScaling<10>> class thread_pool_scaling
279310 });
280311 }
281312
282- /* *
283- * @brief Joins and clears retired threads.
284- * @param[in] reg_h Handle to the monitor-protected registry data.
285- */
286- void reap_zombies_internal (handle& reg_h) {
287- while (!reg_h->zombies .empty ()) {
288- if (reg_h->zombies .front ().joinable ()) {
289- reg_h->zombies .front ().join ();
290- }
291- reg_h->zombies .pop_front ();
292- }
293- }
294-
295313 const std::size_t m_min_threads; // /< Minimum persistent thread count.
296314 const std::size_t m_max_threads; // /< Maximum allowed thread count.
297315 const std::chrono::milliseconds m_idle_timeout; // /< Surplus thread idle timeout.
0 commit comments