Skip to content

Commit 83b6283

Browse files
Jlalondkusmour
authored andcommitted
[REVERT WHEN 137041 LANDS][GDBServer/PTRACE SEIZE] Implement internal prototype for ptrace seize
Summary: This is an internal version of my upstream PR [llvm#137041](llvm#137041) where I'm hacking in PTRACE_SEIZE support for Coredumping processes, and then preventing them from being resumed. We only allow exiting, I've explained this in further detail in my test plan. This is landing internally first, to give the upstream PR and RFC more time to mature and identify problems or new capabilities we could build on top of this workflow. Test Plan: The setup to test this is quite convoluted, and I explain in greater detail in my upstream PR, but to summarize here I compile this program to hold the kernel provided pipe ``` // Set up the corepipe to our program echo "|/data/users/jalalonde/sand_test_code/ptrace.out %p" | sudo tee /proc/sys/kernel/core_pattern > /dev/null Invoke the following: int main() { return 42 / 0; } ``` This will create a program that is coredumping, I've named this sigabrt. We then use pgrep to find our pid. ``` [[email protected] /data/users/jalalonde/sand_test_code]$ pgrep "sigabrt" 3009565 ``` And walk through attaching to the coredumping proc, and then trying to continue ``` [[email protected] /data/users/jalalonde/llvm-sand/dbg]$ ./bin/lldb attach 3009565 Process 3009565 stopped * thread llvm#1, name = 'sigabrt.out', stop reason = signal SIGSTOP frame #0: 0x00005618d35b114d sigabrt.out`main at sigabrt.cpp:6:18 3 } Executable binary set to "/data/users/jalalonde/sand_test_code/sigabrt.out". Architecture set to: x86_64-unknown-linux-gnu. warning: sigabrt.cpp: source file checksum mismatch between line table (69220de0d0b840cdf9c5b92a82466d5e) and file on disk (f76c8c2b73dc057688650908e7e17f34) (lldb) bt * thread llvm#1, name = 'sigabrt.out', stop reason = signal SIGSTOP * frame #0: 0x00005618d35b114d sigabrt.out`main at sigabrt.cpp:6:18 frame llvm#1: 0x00007f0233a295d0 libc.so.6`__libc_start_call_main + 128 frame llvm#2: 0x00007f0233a29680 libc.so.6`__libc_start_main@@GLIBC_2.34 + 128 frame llvm#3: 0x00005618d35b1065 sigabrt.out`_start + 37 (lldb) continue error: Failed to resume process: Process is in a non-resumable stop. Only detach or exit are supported.. (lldb) detach Process 3009565 detached ``` So we succeeded in attaching, moving from 'S' to trace stop 't' and then preventing resumption would kill the proc, this is crucial so data formatters or other code doesn't accidentally resume and kill the process. Test Matrix | Case | LLDB behavior before patch | LLDB Behavior with patch | |-------|--------------------------------------------------------- | Attach to Root process | Waitpid hang until ctrl-c | Waitpid hang until ctrl-c | | Attach to Root process coredumping | Waitpid hanging until ctrl-c | Waitpid hanging until ctrl-c | | Run Expression | N/A Can't attach | Runs without jitting, no error | Call function | N/A | Fails gracefully with error Reviewers: gclayton, wanyi, jeffreytan, peix Reviewed By: gclayton Subscribers: davidayoung, peix, #lldb_team Differential Revision: https://phabricator.intern.facebook.com/D73806555
1 parent 9c89ac3 commit 83b6283

File tree

10 files changed

+183
-13
lines changed

10 files changed

+183
-13
lines changed

lldb/include/lldb/Host/common/NativeProcessProtocol.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,11 @@ class NativeProcessProtocol {
187187

188188
bool IsStepping() const { return m_state == lldb::eStateStepping; }
189189

190-
bool CanResume() const { return m_state == lldb::eStateStopped; }
190+
bool CanResume() const {
191+
return m_state == lldb::eStateStopped && !InNonResumableStop();
192+
}
193+
194+
bool IsStopped() const { return m_state == lldb::eStateStopped; }
191195

192196
lldb::ByteOrder GetByteOrder() const {
193197
return GetArchitecture().GetByteOrder();
@@ -409,6 +413,16 @@ class NativeProcessProtocol {
409413
"Not implemented");
410414
}
411415

416+
/// Check if the process is in a stop that cannot be safely resumed,
417+
/// instead only allowing exit of the program.
418+
///
419+
/// Some examples are in Linux being PTRACE_O_TRACEEXIT or calling
420+
/// PTRACE_SEIZE on a coredumping process.
421+
///
422+
/// \return
423+
/// A bool indicating whether this process can ever be resumed.
424+
virtual bool InNonResumableStop() const { return false; }
425+
412426
protected:
413427
struct SoftwareBreakpoint {
414428
uint32_t ref_count;

lldb/include/lldb/Utility/ProcessInfo.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,13 @@ class ProcessInstanceInfo : public ProcessInfo {
250250
// proc/../status specifies CoreDumping as the field
251251
// so we match the case here.
252252
void SetIsCoreDumping(bool is_coredumping) { m_coredumping = is_coredumping; }
253-
std::optional<bool> IsCoreDumping() const { return m_coredumping; }
253+
bool IsCoreDumping() const { return m_coredumping && *m_coredumping; };
254+
255+
void SetNonResumable(bool is_nonresumable) {
256+
m_non_resumable = is_nonresumable;
257+
}
258+
259+
std::optional<bool> IsNonResumable() const { return m_non_resumable; }
254260

255261
void Dump(Stream &s, UserIDResolver &resolver) const;
256262

@@ -272,6 +278,7 @@ class ProcessInstanceInfo : public ProcessInfo {
272278
std::optional<int8_t> m_priority_value = std::nullopt;
273279
std::optional<bool> m_zombie = std::nullopt;
274280
std::optional<bool> m_coredumping = std::nullopt;
281+
std::optional<bool> m_non_resumable = std::nullopt;
275282
};
276283

277284
typedef std::vector<ProcessInstanceInfo> ProcessInstanceInfoList;

lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp

Lines changed: 123 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -312,17 +312,41 @@ NativeProcessLinux::Manager::Attach(
312312
Log *log = GetLog(POSIXLog::Process);
313313
LLDB_LOG(log, "pid = {0:x}", pid);
314314

315-
auto tids_or = NativeProcessLinux::Attach(pid);
316-
if (!tids_or)
317-
return tids_or.takeError();
318-
ArrayRef<::pid_t> tids = *tids_or;
315+
// This safety check lets us decide if we should
316+
// seize or attach.
317+
ProcessInstanceInfo process_info;
318+
if (!Host::GetProcessInfo(pid, process_info))
319+
return llvm::make_error<StringError>("Unable to read process info",
320+
llvm::inconvertibleErrorCode());
321+
322+
std::vector<::pid_t> tids;
323+
if (process_info.IsCoreDumping()) {
324+
auto attached_or = NativeProcessLinux::Seize(pid);
325+
if (!attached_or)
326+
return attached_or.takeError();
327+
tids = std::move(*attached_or);
328+
} else {
329+
auto attached_or = NativeProcessLinux::Attach(pid);
330+
if (!attached_or)
331+
return attached_or.takeError();
332+
tids = std::move(*attached_or);
333+
}
334+
319335
llvm::Expected<ArchSpec> arch_or =
320336
NativeRegisterContextLinux::DetermineArchitecture(tids[0]);
321337
if (!arch_or)
322338
return arch_or.takeError();
323339

324-
return std::unique_ptr<NativeProcessLinux>(
340+
auto native_up = std::unique_ptr<NativeProcessLinux>(
325341
new NativeProcessLinux(pid, -1, native_delegate, *arch_or, *this, tids));
342+
343+
// We currently only seize a process if it's coredumping and thus unresumable.
344+
// This is a setter instead of being in the constructor because this could
345+
// also be extended in the future to stop the process from being resumed if it
346+
// stops for PTRACE_O_TRACEEXIT.
347+
native_up->SetInNonResumableStop(process_info.IsCoreDumping());
348+
349+
return std::move(native_up);
326350
}
327351

328352
NativeProcessLinux::Extension
@@ -444,6 +468,93 @@ NativeProcessLinux::NativeProcessLinux(::pid_t pid, int terminal_fd,
444468
SetState(StateType::eStateStopped, false);
445469
}
446470

471+
llvm::Expected<std::vector<::pid_t>> NativeProcessLinux::Seize(::pid_t pid) {
472+
// TODO: Because the Seize during coredumping change introduces the
473+
// concept of a non resumable stop, we should also check for
474+
// PTRACE_O_TRACEEXIT, which per the man page the status will equal
475+
// status >> 8 == (SIGTRAP | (PTRACE_EVENT_EXEC<<8))
476+
// and if this is true, we should say we can't resume.
477+
Log *log = GetLog(POSIXLog::Process);
478+
479+
uint64_t options = GetDefaultPtraceOpts();
480+
Status status;
481+
// Use a map to keep track of the threads which we have attached/need to
482+
// attach.
483+
Host::TidMap tids_to_attach;
484+
while (Host::FindProcessThreads(pid, tids_to_attach)) {
485+
for (Host::TidMap::iterator it = tids_to_attach.begin();
486+
it != tids_to_attach.end();) {
487+
if (it->second == true) {
488+
continue;
489+
}
490+
lldb::tid_t tid = it->first;
491+
if ((status = PtraceWrapper(PTRACE_SEIZE, tid, nullptr, (void *)options))
492+
.Fail()) {
493+
// No such thread. The thread may have exited. More error handling
494+
// may be needed.
495+
if (status.GetError() == ESRCH) {
496+
it = tids_to_attach.erase(it);
497+
continue;
498+
}
499+
if (status.GetError() == EPERM) {
500+
// Depending on the value of ptrace_scope, we can return a
501+
// different error that suggests how to fix it.
502+
return AddPtraceScopeNote(status.ToError());
503+
}
504+
return status.ToError();
505+
}
506+
507+
if ((status = PtraceWrapper(PTRACE_INTERRUPT, tid)).Fail()) {
508+
// No such thread. The thread may have exited. More error handling
509+
// may be needed.
510+
if (status.GetError() == ESRCH) {
511+
it = tids_to_attach.erase(it);
512+
continue;
513+
}
514+
if (status.GetError() == EPERM) {
515+
// Depending on the value of ptrace_scope, we can return a
516+
// different error that suggests how to fix it.
517+
return AddPtraceScopeNote(status.ToError());
518+
}
519+
return status.ToError();
520+
}
521+
522+
int wpid =
523+
llvm::sys::RetryAfterSignal(-1, ::waitpid, tid, nullptr, __WALL);
524+
// Need to use __WALL otherwise we receive an error with errno=ECHLD At
525+
// this point we should have a thread stopped if waitpid succeeds.
526+
if (wpid < 0) {
527+
// No such thread. The thread may have exited. More error handling
528+
// may be needed.
529+
if (errno == ESRCH) {
530+
it = tids_to_attach.erase(it);
531+
continue;
532+
}
533+
return llvm::errorCodeToError(
534+
std::error_code(errno, std::generic_category()));
535+
}
536+
537+
LLDB_LOG(log, "adding tid = {0}", tid);
538+
it->second = true;
539+
540+
// move the loop forward
541+
++it;
542+
}
543+
}
544+
545+
size_t tid_count = tids_to_attach.size();
546+
if (tid_count == 0)
547+
return llvm::make_error<StringError>("No such process",
548+
llvm::inconvertibleErrorCode());
549+
550+
std::vector<::pid_t> tids;
551+
tids.reserve(tid_count);
552+
for (const auto &p : tids_to_attach)
553+
tids.push_back(p.first);
554+
555+
return std::move(tids);
556+
}
557+
447558
llvm::Expected<std::vector<::pid_t>> NativeProcessLinux::Attach(::pid_t pid) {
448559
Log *log = GetLog(POSIXLog::Process);
449560

@@ -513,8 +624,8 @@ llvm::Expected<std::vector<::pid_t>> NativeProcessLinux::Attach(::pid_t pid) {
513624
return std::move(tids);
514625
}
515626

516-
Status NativeProcessLinux::SetDefaultPtraceOpts(lldb::pid_t pid) {
517-
long ptrace_opts = 0;
627+
uint64_t NativeProcessLinux::GetDefaultPtraceOpts() {
628+
uint64_t ptrace_opts = 0;
518629

519630
// Have the child raise an event on exit. This is used to keep the child in
520631
// limbo until it is destroyed.
@@ -537,6 +648,11 @@ Status NativeProcessLinux::SetDefaultPtraceOpts(lldb::pid_t pid) {
537648
// the child finishes sharing memory.
538649
ptrace_opts |= PTRACE_O_TRACEVFORKDONE;
539650

651+
return ptrace_opts;
652+
}
653+
654+
Status NativeProcessLinux::SetDefaultPtraceOpts(lldb::pid_t pid) {
655+
uint64_t ptrace_opts = GetDefaultPtraceOpts();
540656
return PtraceWrapper(PTRACE_SETOPTIONS, pid, nullptr, (void *)ptrace_opts);
541657
}
542658

lldb/source/Plugins/Process/Linux/NativeProcessLinux.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,8 @@ class NativeProcessLinux : public NativeProcessELF,
165165
/// Writes a siginfo_t structure corresponding to the given thread ID to the
166166
/// memory region pointed to by \p siginfo.
167167
Status GetSignalInfo(lldb::tid_t tid, void *siginfo) const;
168+
void SetInNonResumableStop(bool value) { m_nonresumable_stop = value; }
169+
bool InNonResumableStop() const override { return m_nonresumable_stop; }
168170

169171
protected:
170172
llvm::Expected<llvm::ArrayRef<uint8_t>>
@@ -175,7 +177,7 @@ class NativeProcessLinux : public NativeProcessELF,
175177
private:
176178
Manager &m_manager;
177179
ArchSpec m_arch;
178-
180+
bool m_nonresumable_stop = false;
179181
LazyBool m_supports_mem_region = eLazyBoolCalculate;
180182
std::vector<std::pair<MemoryRegionInfo, FileSpec>> m_mem_region_cache;
181183

@@ -191,9 +193,13 @@ class NativeProcessLinux : public NativeProcessELF,
191193

192194
// Returns a list of process threads that we have attached to.
193195
static llvm::Expected<std::vector<::pid_t>> Attach(::pid_t pid);
196+
// Returns a list of process threads that we have seized and interrupted.
197+
static llvm::Expected<std::vector<::pid_t>> Seize(::pid_t pid);
194198

195199
static Status SetDefaultPtraceOpts(const lldb::pid_t);
196200

201+
static uint64_t GetDefaultPtraceOpts();
202+
197203
bool TryHandleWaitStatus(lldb::pid_t pid, WaitStatus status);
198204

199205
void MonitorCallback(NativeThreadLinux &thread, WaitStatus status);

lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2235,6 +2235,9 @@ bool GDBRemoteCommunicationClient::GetCurrentProcessInfo(bool allow_lazy) {
22352235
if (llvm::to_integer(x, vmaddr, 16))
22362236
m_binary_addresses.push_back(vmaddr);
22372237
}
2238+
} else if (name == "non_resumable") {
2239+
if (!value.getAsInteger(0, m_in_nonresumable_stop))
2240+
++num_keys_decoded;
22382241
}
22392242
}
22402243
if (num_keys_decoded > 0)

lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,8 @@ class GDBRemoteCommunicationClient : public GDBRemoteClientBase {
530530

531531
llvm::Expected<int> KillProcess(lldb::pid_t pid);
532532

533+
bool SafeToResume() const { return !m_in_nonresumable_stop; }
534+
533535
protected:
534536
LazyBool m_supports_not_sending_acks = eLazyBoolCalculate;
535537
LazyBool m_supports_thread_suffix = eLazyBoolCalculate;
@@ -622,6 +624,8 @@ class GDBRemoteCommunicationClient : public GDBRemoteClientBase {
622624
int m_target_vm_page_size = 0; // target system VM page size; 0 unspecified
623625
uint64_t m_max_packet_size = 0; // as returned by qSupported
624626
std::string m_qSupported_response; // the complete response to qSupported
627+
bool m_in_nonresumable_stop =
628+
false; // true if we are in a stop that cannot be resumed, only exited.
625629

626630
bool m_supported_async_json_packets_is_valid = false;
627631
lldb_private::StructuredData::ObjectSP m_supported_async_json_packets_sp;

lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,6 +1304,9 @@ void GDBRemoteCommunicationServerCommon::
13041304
if (!abi.empty())
13051305
response.Printf("elf_abi:%s;", abi.c_str());
13061306
response.Printf("ptrsize:%d;", proc_arch.GetAddressByteSize());
1307+
std::optional<bool> non_resumable = proc_info.IsNonResumable();
1308+
if (non_resumable)
1309+
response.Printf("non_resumable:%d;", *non_resumable);
13071310
}
13081311
}
13091312

lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ enum GDBRemoteServerError {
6262
eErrorFirst = 29,
6363
eErrorNoProcess = eErrorFirst,
6464
eErrorResume,
65-
eErrorExitStatus
65+
eErrorExitStatus,
6666
};
6767
}
6868

@@ -1396,6 +1396,10 @@ GDBRemoteCommunicationServerLLGS::Handle_qProcessInfo(
13961396
if (!Host::GetProcessInfo(pid, proc_info))
13971397
return SendErrorResponse(1);
13981398

1399+
// We check for the bool so we don't emit the false and waste bytes.
1400+
if (m_current_process->InNonResumableStop())
1401+
proc_info.SetNonResumable(true);
1402+
13991403
StreamString response;
14001404
CreateProcessInfoResponse_DebugServerStyle(proc_info, response);
14011405
return SendPacketNoLock(response.GetString());
@@ -1825,6 +1829,12 @@ GDBRemoteCommunicationServerLLGS::Handle_vCont(
18251829
return SendErrorResponse(GDBRemoteServerError::eErrorResume);
18261830
}
18271831

1832+
if (process_it->second.process_up->InNonResumableStop()) {
1833+
LLDB_LOG(log, "vCont failed for process {0}: process not resumable",
1834+
x.first);
1835+
return SendErrorResponse(GDBRemoteServerError::eErrorResume);
1836+
}
1837+
18281838
// There are four possible scenarios here. These are:
18291839
// 1. vCont on a stopped process that resumes at least one thread.
18301840
// In this case, we call Resume().

lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -966,6 +966,11 @@ void ProcessGDBRemote::DidLaunchOrAttach(ArchSpec &process_arch) {
966966
// architecture we got from the remote GDB server
967967
GetTarget().SetArchitecture(process_arch);
968968
}
969+
970+
// If the process is in a non resumable stop, we'll keep erroring out
971+
// trying to jit and continue the process. So we set can JIT to false
972+
// so any expression is evaluated in LLDB.
973+
SetCanJIT(m_gdb_comm.SafeToResume());
969974
}
970975

971976
// Target and Process are reasonably initailized;
@@ -1181,6 +1186,9 @@ void ProcessGDBRemote::DidAttach(ArchSpec &process_arch) {
11811186
}
11821187
11831188
Status ProcessGDBRemote::WillResume() {
1189+
if (!m_gdb_comm.SafeToResume())
1190+
return Status::FromErrorString("Process is in a non-resumable stop. Only "
1191+
"detach or exit are supported");
11841192
m_continue_c_tids.clear();
11851193
m_continue_C_tids.clear();
11861194
m_continue_s_tids.clear();

lldb/unittests/Host/posix/HostTest.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,6 @@ TEST_F(HostTest, GetProcessInfoSetsPriority) {
116116
ASSERT_TRUE(Info.IsZombie().has_value());
117117
ASSERT_FALSE(Info.IsZombie().value());
118118

119-
ASSERT_TRUE(Info.IsCoreDumping().has_value());
120-
ASSERT_FALSE(Info.IsCoreDumping().value());
119+
ASSERT_FALSE(Info.IsCoreDumping());
121120
}
122121
#endif

0 commit comments

Comments
 (0)