Skip to content

Commit 8612926

Browse files
authored
[lldb] Fix race condition in Process::WaitForProcessToStop() (#144919)
This PR addresses a race condition encountered when using LLDB through the Python scripting interface. I'm relatively new to LLDB, so feedback is very welcome, especially if there's a more appropriate way to address this issue. ### Bug Description When running a script that repeatedly calls `debugger.GetListener().WaitForEvent()` in a loop, and at some point invokes `process.Kill()` from within that loop to terminate the session, a race condition can occur if `process.Kill()` is called around the same time a breakpoint is hit. ### Race Condition Details The issue arises when the following sequence of events happens: 1. The process's **private state** transitions to `stopped` when the breakpoint is hit. 2. `process.Kill()` calls `Process::Destroy()`, which invokes `Process::WaitForProcessToStop()`. At this point: - `private_state = stopped` - `public_state = running` (the public state has not yet been updated) 3. The **public stop event** is broadcast **before** the hijack listener is installed. 4. As a result, the stop event is delivered to the **non-hijack listener**. 5. The interrupt request sent by `Process::StopForDestroyOrDetach()` is ignored because the process is already stopped (`private_state = stopped`). 6. No public stop event reaches the hijack listener. 7. `Process::WaitForProcessToStop()` hangs waiting for a public stop event, but the event is never received. 8. `process.Kill()` times out after 20 seconds ### Fix Summary This patch modifies `Process::WaitForProcessToStop()` to ensure that any pending events in the non-hijack listener queue are processed before checking the hijack listener. This guarantees that any missed public state change events are handled, preventing the hang. ### Additional Context A discussion of this issue, including a script to reproduce the bug, can be found here: [LLDB hangs when killing process at the same time a breakpoint is hit](https://discourse.llvm.org/t/lldb-hangs-when-killing-process-at-the-same-time-a-breakpoint-is-hit)
1 parent a35b290 commit 8612926

File tree

4 files changed

+140
-0
lines changed

4 files changed

+140
-0
lines changed

lldb/include/lldb/Utility/Listener.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,13 @@ class Listener : public std::enable_shared_from_this<Listener> {
5353

5454
void AddEvent(lldb::EventSP &event);
5555

56+
/// Transfers all events matching the specified broadcaster and type to the
57+
/// destination listener. This can be useful when setting up a hijack listener,
58+
/// to ensure that no relevant events are missed.
59+
void MoveEvents(lldb::ListenerSP destination,
60+
Broadcaster *broadcaster, // nullptr for any broadcaster
61+
uint32_t event_type_mask);
62+
5663
void Clear();
5764

5865
const char *GetName() { return m_name.c_str(); }

lldb/source/Utility/Broadcaster.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,18 @@ bool Broadcaster::BroadcasterImpl::HijackBroadcaster(
335335
"{0} Broadcaster(\"{1}\")::HijackBroadcaster (listener(\"{2}\")={3})",
336336
static_cast<void *>(this), GetBroadcasterName(),
337337
listener_sp->m_name.c_str(), static_cast<void *>(listener_sp.get()));
338+
339+
// Move pending events from the previous listener to the hijack listener.
340+
// This ensures that no relevant event queued before the transition is missed
341+
// by the hijack listener.
342+
ListenerSP prev_listener;
343+
if (!m_hijacking_listeners.empty())
344+
prev_listener = m_hijacking_listeners.back();
345+
else if (m_primary_listener_sp)
346+
prev_listener = m_primary_listener_sp;
347+
if (prev_listener && listener_sp)
348+
prev_listener->MoveEvents(listener_sp, &m_broadcaster, event_mask);
349+
338350
m_hijacking_listeners.push_back(listener_sp);
339351
m_hijacking_masks.push_back(event_mask);
340352
return true;
@@ -367,6 +379,19 @@ void Broadcaster::BroadcasterImpl::RestoreBroadcaster() {
367379
static_cast<void *>(this), GetBroadcasterName(),
368380
listener_sp->m_name.c_str(),
369381
static_cast<void *>(listener_sp.get()));
382+
383+
// Move any remaining events from the hijack listener back to
384+
// the previous listener. This ensures that no events are dropped when
385+
// restoring the original listener.
386+
ListenerSP prev_listener;
387+
if (m_hijacking_listeners.size() > 1)
388+
prev_listener = m_hijacking_listeners[m_hijacking_listeners.size() - 2];
389+
else if (m_primary_listener_sp)
390+
prev_listener = m_primary_listener_sp;
391+
if (listener_sp && prev_listener && !m_hijacking_masks.empty())
392+
listener_sp->MoveEvents(prev_listener, &m_broadcaster,
393+
m_hijacking_masks.back());
394+
370395
m_hijacking_listeners.pop_back();
371396
}
372397
if (!m_hijacking_masks.empty())

lldb/source/Utility/Listener.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,33 @@ void Listener::AddEvent(EventSP &event_sp) {
176176
m_events_condition.notify_all();
177177
}
178178

179+
void Listener::MoveEvents(
180+
ListenerSP destination,
181+
Broadcaster *broadcaster, // nullptr for any broadcaster
182+
uint32_t event_type_mask) {
183+
Log *log = GetLog(LLDBLog::Events);
184+
185+
std::lock_guard<std::mutex> guard(m_events_mutex);
186+
auto pos = m_events.begin();
187+
while (pos != m_events.end()) {
188+
EventSP &event_sp = *pos;
189+
if (event_sp &&
190+
((broadcaster == nullptr) || event_sp->BroadcasterIs(broadcaster)) &&
191+
(event_type_mask == 0 || event_type_mask & event_sp->GetType())) {
192+
LLDB_LOGF(
193+
log, "%p Listener('%s')::MoveEvents moving event %p to %p('%s')",
194+
static_cast<void *>(this), m_name.c_str(),
195+
static_cast<void *>(event_sp.get()),
196+
static_cast<void *>(destination.get()), destination->GetName());
197+
198+
destination->AddEvent(event_sp);
199+
pos = m_events.erase(pos);
200+
} else {
201+
++pos;
202+
}
203+
}
204+
}
205+
179206
bool Listener::FindNextEventInternal(
180207
std::unique_lock<std::mutex> &lock,
181208
Broadcaster *broadcaster, // nullptr for any broadcaster

lldb/unittests/Utility/ListenerTest.cpp

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,3 +150,84 @@ TEST(ListenerTest, StartStopListeningForEventSpec) {
150150
ASSERT_EQ(event_sp->GetBroadcaster(), &broadcaster1);
151151
ASSERT_FALSE(listener_sp->GetEvent(event_sp, std::chrono::seconds(0)));
152152
}
153+
154+
TEST(ListenerTest, MoveEventsOnHijackAndRestore) {
155+
Broadcaster broadcaster(nullptr, "test-broadcaster");
156+
const uint32_t event_mask = 1;
157+
EventSP event_sp;
158+
159+
// Create the original listener and start listening.
160+
ListenerSP original_listener = Listener::MakeListener("original-listener");
161+
ASSERT_EQ(event_mask, original_listener->StartListeningForEvents(&broadcaster,
162+
event_mask));
163+
broadcaster.SetPrimaryListener(original_listener);
164+
165+
// Queue two events to original listener, but do not consume them yet.
166+
broadcaster.BroadcastEvent(event_mask, nullptr);
167+
broadcaster.BroadcastEvent(event_mask, nullptr);
168+
169+
// Hijack.
170+
ListenerSP hijack_listener = Listener::MakeListener("hijack-listener");
171+
broadcaster.HijackBroadcaster(hijack_listener, event_mask);
172+
173+
// The events should have been moved to the hijack listener.
174+
EXPECT_FALSE(original_listener->GetEvent(event_sp, std::chrono::seconds(0)));
175+
EXPECT_TRUE(hijack_listener->GetEvent(event_sp, std::chrono::seconds(0)));
176+
EXPECT_TRUE(hijack_listener->GetEvent(event_sp, std::chrono::seconds(0)));
177+
178+
// Queue two events while hijacked, but do not consume them yet.
179+
broadcaster.BroadcastEvent(event_mask, nullptr);
180+
broadcaster.BroadcastEvent(event_mask, nullptr);
181+
182+
// Restore the original listener.
183+
broadcaster.RestoreBroadcaster();
184+
185+
// The events queued while hijacked should have been moved back to the
186+
// original listener.
187+
EXPECT_FALSE(hijack_listener->GetEvent(event_sp, std::chrono::seconds(0)));
188+
EXPECT_TRUE(original_listener->GetEvent(event_sp, std::chrono::seconds(0)));
189+
EXPECT_TRUE(original_listener->GetEvent(event_sp, std::chrono::seconds(0)));
190+
}
191+
192+
TEST(ListenerTest, MoveEventsBetweenHijackers) {
193+
Broadcaster broadcaster(nullptr, "test-broadcaster");
194+
const uint32_t event_mask = 1;
195+
EventSP event_sp;
196+
197+
// Create the original listener and start listening.
198+
ListenerSP original_listener = Listener::MakeListener("original-listener");
199+
ASSERT_EQ(event_mask, original_listener->StartListeningForEvents(&broadcaster,
200+
event_mask));
201+
broadcaster.SetPrimaryListener(original_listener);
202+
203+
// First hijack.
204+
ListenerSP hijack_listener1 = Listener::MakeListener("hijack-listener1");
205+
broadcaster.HijackBroadcaster(hijack_listener1, event_mask);
206+
207+
// Queue two events while hijacked, but do not consume
208+
// them yet.
209+
broadcaster.BroadcastEvent(event_mask, nullptr);
210+
broadcaster.BroadcastEvent(event_mask, nullptr);
211+
212+
// Second hijack.
213+
ListenerSP hijack_listener2 = Listener::MakeListener("hijack-listener2");
214+
broadcaster.HijackBroadcaster(hijack_listener2, event_mask);
215+
216+
// The second hijacker should have the events now.
217+
EXPECT_FALSE(hijack_listener1->GetEvent(event_sp, std::chrono::seconds(0)));
218+
EXPECT_TRUE(hijack_listener2->GetEvent(event_sp, std::chrono::seconds(0)));
219+
EXPECT_TRUE(hijack_listener2->GetEvent(event_sp, std::chrono::seconds(0)));
220+
221+
// Queue two events while hijacked with second hijacker, but do not consume
222+
// them yet.
223+
broadcaster.BroadcastEvent(event_mask, nullptr);
224+
broadcaster.BroadcastEvent(event_mask, nullptr);
225+
226+
// Restore the previous hijacker.
227+
broadcaster.RestoreBroadcaster();
228+
229+
// The first hijacker should now have the events.
230+
EXPECT_FALSE(hijack_listener2->GetEvent(event_sp, std::chrono::seconds(0)));
231+
EXPECT_TRUE(hijack_listener1->GetEvent(event_sp, std::chrono::seconds(0)));
232+
EXPECT_TRUE(hijack_listener1->GetEvent(event_sp, std::chrono::seconds(0)));
233+
}

0 commit comments

Comments
 (0)