bitcoin-core · ryanofsky · Sep 17, 2025 · Sep 10, 2025 · Sep 10, 2025 · Sep 10, 2025
diff --git a/ci/configs/llvm.bash b/ci/configs/llvm.bash
@@ -2,7 +2,7 @@ CI_DESC="CI job using LLVM-based libraries and tools (clang, libc++, clang-tidy,
 CI_DIR=build-llvm
 NIX_ARGS=(--arg enableLibcxx true)
 export CXX=clang++
-export CXXFLAGS="-Werror -Wall -Wextra -Wpedantic -Wthread-safety-analysis -Wno-unused-parameter"
+export CXXFLAGS="-Werror -Wall -Wextra -Wpedantic -Wthread-safety -Wno-unused-parameter"
 CMAKE_ARGS=(
   -G Ninja
   -DMP_ENABLE_CLANG_TIDY=ON

diff --git a/ci/configs/sanitize.bash b/ci/configs/sanitize.bash
@@ -1,7 +1,7 @@
 CI_DESC="CI job running ThreadSanitizer"
 CI_DIR=build-sanitize
 export CXX=clang++
-export CXXFLAGS="-ggdb -Werror -Wall -Wextra -Wpedantic -Wthread-safety-analysis -Wno-unused-parameter -fsanitize=thread"
+export CXXFLAGS="-ggdb -Werror -Wall -Wextra -Wpedantic -Wthread-safety -Wno-unused-parameter -fsanitize=thread"
 CMAKE_ARGS=()
 BUILD_ARGS=(-k -j4)
 BUILD_TARGETS=(mptest)
diff --git a/include/mp/proxy-io.h b/include/mp/proxy-io.h
@@ -66,8 +66,6 @@ struct ProxyClient<Thread> : public ProxyClientBase<Thread, ::capnp::Void>
     ProxyClient(const ProxyClient&) = delete;
     ~ProxyClient();
 
-    void setDisconnectCallback(const std::function<void()>& fn);
-
     //! Reference to callback function that is run if there is a sudden
     //! disconnect and the Connection object is destroyed before this
     //! ProxyClient<Thread> object. The callback will destroy this object and
@@ -275,16 +273,16 @@ struct Waiter
     template <typename Fn>
     void post(Fn&& fn)
     {
-        const std::unique_lock<std::mutex> lock(m_mutex);
+        const Lock lock(m_mutex);
         assert(!m_fn);
         m_fn = std::forward<Fn>(fn);
         m_cv.notify_all();
     }
 
     template <class Predicate>
-    void wait(std::unique_lock<std::mutex>& lock, Predicate pred)
+    void wait(Lock& lock, Predicate pred)
     {
-        m_cv.wait(lock, [&] {
+        m_cv.wait(lock.m_lock, [&]() MP_REQUIRES(m_mutex) {
             // Important for this to be "while (m_fn)", not "if (m_fn)" to avoid
             // a lost-wakeup bug. A new m_fn and m_cv notification might be sent
             // after the fn() call and before the lock.lock() call in this loop
@@ -307,9 +305,9 @@ struct Waiter
     //! mutexes than necessary. This mutex can be held at the same time as
     //! EventLoop::m_mutex as long as Waiter::mutex is locked first and
     //! EventLoop::m_mutex is locked second.
-    std::mutex m_mutex;
+    Mutex m_mutex;
     std::condition_variable m_cv;
-    std::optional<kj::Function<void()>> m_fn;
+    std::optional<kj::Function<void()>> m_fn MP_GUARDED_BY(m_mutex);
 };
 
 //! Object holding network & rpc state associated with either an incoming server
@@ -534,29 +532,73 @@ void ProxyServerBase<Interface, Impl>::invokeDestroy()
     CleanupRun(m_context.cleanup_fns);
 }
 
-using ConnThreads = std::map<Connection*, ProxyClient<Thread>>;
+//! Map from Connection to local or remote thread handle which will be used over
+//! that connection. This map will typically only contain one entry, but can
+//! contain multiple if a single thread makes IPC calls over multiple
+//! connections. A std::optional value type is used to avoid the map needing to
+//! be locked while ProxyClient<Thread> objects are constructed, see
+//! ThreadContext "Synchronization note" below.
+using ConnThreads = std::map<Connection*, std::optional<ProxyClient<Thread>>>;
 using ConnThread = ConnThreads::iterator;
 
 // Retrieve ProxyClient<Thread> object associated with this connection from a
 // map, or create a new one and insert it into the map. Return map iterator and
 // inserted bool.
-std::tuple<ConnThread, bool> SetThread(ConnThreads& threads, std::mutex& mutex, Connection* connection, const std::function<Thread::Client()>& make_thread);
+std::tuple<ConnThread, bool> SetThread(GuardedRef<ConnThreads> threads, Connection* connection, const std::function<Thread::Client()>& make_thread);
 
+//! The thread_local ThreadContext g_thread_context struct provides information
+//! about individual threads and a way of communicating between them. Because
+//! it's a thread local struct, each ThreadContext instance is initialized by
+//! the thread that owns it.
+//!
+//! ThreadContext is used for any client threads created externally which make
+//! IPC calls, and for server threads created by
+//! ProxyServer<ThreadMap>::makeThread() which execute IPC calls for clients.
+//!
+//! In both cases, the struct holds information like the thread name, and a
+//! Waiter object where the EventLoop can post incoming IPC requests to execute
+//! on the thread. The struct also holds ConnThread maps associating the thread
+//! with local and remote ProxyClient<Thread> objects.
 struct ThreadContext
 {
     //! Identifying string for debug.
     std::string thread_name;
 
-    //! Waiter object used to allow client threads blocked waiting for a server
-    //! response to execute callbacks made from the client's corresponding
-    //! server thread.
+    //! Waiter object used to allow remote clients to execute code on this
+    //! thread. For server threads created by
+    //! ProxyServer<ThreadMap>::makeThread(), this is initialized in that
+    //! function. Otherwise, for client threads created externally, this is
+    //! initialized the first time the thread tries to make an IPC call. Having
+    //! a waiter is necessary for threads making IPC calls in case a server they
+    //! are calling expects them to execute a callback during the call, before
+    //! it sends a response.
+    //!
+    //! For IPC client threads, the Waiter pointer is never cleared and the Waiter
+    //! just gets destroyed when the thread does. For server threads created by
+    //! makeThread(), this pointer is set to null in the ~ProxyServer<Thread> as
+    //! a signal for the thread to exit and destroy itself. In both cases, the
+    //! same Waiter object is used across different calls and only created and
+    //! destroyed once for the lifetime of the thread.
     std::unique_ptr<Waiter> waiter = nullptr;
 
     //! When client is making a request to a server, this is the
     //! `callbackThread` argument it passes in the request, used by the server
     //! in case it needs to make callbacks into the client that need to execute
     //! while the client is waiting. This will be set to a local thread object.
-    ConnThreads callback_threads;
+    //!
+    //! Synchronization note: The callback_thread and request_thread maps are
+    //! only ever accessed internally by this thread's destructor and externally
+    //! by Cap'n Proto event loop threads. Since it's possible for IPC client
+    //! threads to make calls over different connections that could have
+    //! different event loops, these maps are guarded by Waiter::m_mutex in case
+    //! different event loop threads add or remove map entries simultaneously.
+    //! However, individual ProxyClient<Thread> objects in the maps will only be
+    //! associated with one event loop and guarded by EventLoop::m_mutex. So
+    //! Waiter::m_mutex does not need to be held while accessing individual
+    //! ProxyClient<Thread> instances, and may even need to be released to
+    //! respect lock order and avoid locking Waiter::m_mutex before
+    //! EventLoop::m_mutex.
+    ConnThreads callback_threads MP_GUARDED_BY(waiter->m_mutex);
 
     //! When client is making a request to a server, this is the `thread`
     //! argument it passes in the request, used to control which thread on
@@ -565,7 +607,9 @@ struct ThreadContext
     //! by makeThread. If a client call is being made from a thread currently
     //! handling a server request, this will be set to the `callbackThread`
     //! request thread argument passed in that request.
-    ConnThreads request_threads;
+    //!
+    //! Synchronization note: \ref callback_threads note applies here as well.
+    ConnThreads request_threads MP_GUARDED_BY(waiter->m_mutex);
 
     //! Whether this thread is a capnp event loop thread. Not really used except
     //! to assert false if there's an attempt to execute a blocking operation

diff --git a/include/mp/proxy-types.h b/include/mp/proxy-types.h
@@ -617,7 +617,7 @@ void clientInvoke(ProxyClient& proxy_client, const GetRequest& get_request, Fiel
     const char* disconnected = nullptr;
     proxy_client.m_context.loop->sync([&]() {
         if (!proxy_client.m_context.connection) {
-            const std::unique_lock<std::mutex> lock(thread_context.waiter->m_mutex);
+            const Lock lock(thread_context.waiter->m_mutex);
             done = true;
             disconnected = "IPC client method called after disconnect.";
             thread_context.waiter->m_cv.notify_all();
@@ -644,7 +644,7 @@ void clientInvoke(ProxyClient& proxy_client, const GetRequest& get_request, Fiel
                 } catch (...) {
                     exception = std::current_exception();
                 }
-                const std::unique_lock<std::mutex> lock(thread_context.waiter->m_mutex);
+                const Lock lock(thread_context.waiter->m_mutex);
                 done = true;
                 thread_context.waiter->m_cv.notify_all();
             },
@@ -656,13 +656,13 @@ void clientInvoke(ProxyClient& proxy_client, const GetRequest& get_request, Fiel
                     proxy_client.m_context.loop->logPlain()
                         << "{" << thread_context.thread_name << "} IPC client exception " << kj_exception;
                 }
-                const std::unique_lock<std::mutex> lock(thread_context.waiter->m_mutex);
+                const Lock lock(thread_context.waiter->m_mutex);
                 done = true;
                 thread_context.waiter->m_cv.notify_all();
             }));
     });
 
-    std::unique_lock<std::mutex> lock(thread_context.waiter->m_mutex);
+    Lock lock(thread_context.waiter->m_mutex);
     thread_context.waiter->wait(lock, [&done]() { return done; });
     if (exception) std::rethrow_exception(exception);
     if (!kj_exception.empty()) proxy_client.m_context.loop->raise() << kj_exception;

diff --git a/include/mp/type-context.h b/include/mp/type-context.h
@@ -25,7 +25,7 @@ void CustomBuildField(TypeList<>,
     // Also store the Thread::Client reference in the callback_threads map so
     // future calls over this connection can reuse it.
     auto [callback_thread, _]{SetThread(
-        thread_context.callback_threads, thread_context.waiter->m_mutex, &connection,
+        GuardedRef{thread_context.waiter->m_mutex, thread_context.callback_threads}, &connection,
         [&] { return connection.m_threads.add(kj::heap<ProxyServer<Thread>>(thread_context, std::thread{})); })};
 
     // Call remote ThreadMap.makeThread function so server will create a
@@ -43,12 +43,12 @@ void CustomBuildField(TypeList<>,
         return request.send().getResult(); // Nonblocking due to capnp request pipelining.
     }};
     auto [request_thread, _1]{SetThread(
-        thread_context.request_threads, thread_context.waiter->m_mutex,
+        GuardedRef{thread_context.waiter->m_mutex, thread_context.request_threads},
         &connection, make_request_thread)};
 
     auto context = output.init();
-    context.setThread(request_thread->second.m_client);
-    context.setCallbackThread(callback_thread->second.m_client);
+    context.setThread(request_thread->second->m_client);
+    context.setCallbackThread(callback_thread->second->m_client);
 }
 
 //! PassField override for mp.Context arguments. Return asynchronously and call
@@ -89,29 +89,39 @@ auto PassField(Priority<1>, TypeList<>, ServerContext& server_context, const Fn&
                     // need to update the map.
                     auto& thread_context = g_thread_context;
                     auto& request_threads = thread_context.request_threads;
-                    auto [request_thread, inserted]{SetThread(
-                        request_threads, thread_context.waiter->m_mutex,
-                        server.m_context.connection,
-                        [&] { return context_arg.getCallbackThread(); })};
+                    ConnThread request_thread;
+                    bool inserted;
+                    server.m_context.loop->sync([&] {
+                        std::tie(request_thread, inserted) = SetThread(
+                            GuardedRef{thread_context.waiter->m_mutex, request_threads}, server.m_context.connection,
+                            [&] { return context_arg.getCallbackThread(); });
+                    });
 
-                    // If an entry was inserted into the requests_threads map,
+                    // If an entry was inserted into the request_threads map,
                     // remove it after calling fn.invoke. If an entry was not
                     // inserted, one already existed, meaning this must be a
                     // recursive call (IPC call calling back to the caller which
                     // makes another IPC call), so avoid modifying the map.
                     const bool erase_thread{inserted};
                     KJ_DEFER(if (erase_thread) {
-                        std::unique_lock<std::mutex> lock(thread_context.waiter->m_mutex);
-                        // Call erase here with a Connection* argument instead
-                        // of an iterator argument, because the `request_thread`
-                        // iterator may be invalid if the connection is closed
-                        // during this function call. More specifically, the
-                        // iterator may be invalid because SetThread adds a
-                        // cleanup callback to the Connection destructor that
-                        // erases the thread from the map, and also because the
-                        // ProxyServer<Thread> destructor calls
-                        // request_threads.clear().
-                        request_threads.erase(server.m_context.connection);
+                        // Erase the request_threads entry on the event loop
+                        // thread with loop->sync(), so if the connection is
+                        // broken there is not a race between this thread and
+                        // the disconnect handler trying to destroy the thread
+                        // client object.
+                        server.m_context.loop->sync([&] {
+                            // Look up the thread again without using existing
+                            // iterator since entry may no longer be there after
+                            // a disconnect. Destroy node after releasing
+                            // Waiter::m_mutex, so the ProxyClient<Thread>
+                            // destructor is able to use EventLoop::mutex
+                            // without violating lock order.
+                            ConnThreads::node_type removed;
+                            {
+                                Lock lock(thread_context.waiter->m_mutex);
+                                removed = request_threads.extract(server.m_context.connection);
+                            }
+                        });
                     });
                     fn.invoke(server_context, args...);
                 }

diff --git a/include/mp/util.h b/include/mp/util.h
@@ -182,6 +182,17 @@ class MP_SCOPED_CAPABILITY Lock {
     std::unique_lock<std::mutex> m_lock;
 };
 
+template<typename T>
-template<typename T>
+template <typename T>
-template<typename T>
+template <typename T>
+struct GuardedRef
+{
+    Mutex& mutex;
+    T& ref MP_GUARDED_BY(mutex);
+};
+
+// CTAD for Clang 16: GuardedRef{mutex, x} -> GuardedRef<decltype(x)>
+template <class U>
+GuardedRef(Mutex&, U&) -> GuardedRef<U>;
+
 //! Analog to std::lock_guard that unlocks instead of locks.
 template <typename Lock>
 struct UnlockGuard