Skip to content

WaitManager.cc:126:wait() errno: EINVAL because of using waitid() with __WALL #3605

@GitMensch

Description

@GitMensch

Debugging shows that on each call of rr::WaitState::wait() I'm getting to an error return of -1 with errno set to EINVAL.

$ bin/rr true
rr: Saving execution to trace directory `/home/so/.local/share/rr/true-16'.
[FATAL /tmp/rr/src/WaitManager.cc:126:wait() errno: EINVAL] Unexpected error waiting for 14876
=== Start rr backtrace:
bin/rr(_ZN2rr13dump_rr_stackEv+0x33)[0x964de5]
bin/rr(_ZN2rr15notifying_abortEv+0x12)[0x964d70]
bin/rr(_ZN2rr12FatalOstreamD1Ev+0x28)[0x7bbf10]
bin/rr(_ZN2rr9WaitState4waitERKNS_11WaitOptionsEi+0x1ab)[0x96bb2b]
bin/rr(_ZN2rr11WaitManager9wait_stopERKNS_11WaitOptionsE+0x28)[0x96beba]
bin/rr(_ZN2rr4Task4waitEd+0x1ec)[0x928232]
bin/rr(_ZN2rr4Task5spawnERNS_7SessionERNS_8ScopedFdEPS3_S5_PiRKSsRKSt6vectorISsSaISsEESD_i+0x6b3)[0x92f9a7]
bin/rr(_ZN2rr13RecordSessionC1ERKSsRKSt6vectorISsSaISsEES7_RKNS_20DisableCPUIDFeaturesENS0_16SyscallBufferingEiNS_7BindCPUES2_PKNS_9TraceUuidEbb+0x33d)[0x8108c3]
bin/rr(_ZN2rr13RecordSession6createERKSt6vectorISsSaISsEES5_RKNS_20DisableCPUIDFeaturesENS0_16SyscallBufferingEhNS_7BindCPUERKSsPKNS_9TraceUuidEbbbb+0xc2a)[0x810152]
bin/rr[0x8043f1]
bin/rr(_ZN2rr13RecordCommand3runERSt6vectorISsSaISsEE+0x3b0)[0x805170]
bin/rr(main+0x24e)[0x7cd113]
/usr/lib64/libc.so.6(__libc_start_main+0xf5)[0x7f6d1a60f555]
bin/rr[0x6e4ea5]
=== End rr backtrace
Aborted

GDB backtrace:

Reading symbols from bin/rr...
Breakpoint 1 at 0x96bae1: file /tmp/rr/src/WaitManager.cc, line 126.
Starting program: /tmp/rr/build/bin/rr true
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/usr/lib64/libthread_db.so.1".
[New Thread 0x7ffff64fd700 (LWP 14950)]
[New Thread 0x7ffff59fb700 (LWP 14951)]
[New Thread 0x7fffeffff700 (LWP 14952)]
[New Thread 0x7fffef7fe700 (LWP 14953)]
[New Thread 0x7fffeeffd700 (LWP 14954)]
[New Thread 0x7fffee7fc700 (LWP 14955)]
[New Thread 0x7fffedffb700 (LWP 14956)]
[New Thread 0x7fffed7fa700 (LWP 14957)]
[New Thread 0x7fffecff9700 (LWP 14958)]
[New Thread 0x7fffec7f8700 (LWP 14959)]
[New Thread 0x7fffebff7700 (LWP 14960)]
[New Thread 0x7fffeb7f6700 (LWP 14961)]
rr: Saving execution to trace directory `/home/so/.local/share/rr/true-18'.
[Detaching after fork from child process 14962]

Thread 1 "rr" hit Breakpoint 1, rr::WaitState::wait (this=0xd96a00 <rr::wait_state()::static_state>, options=..., type=2) at /tmp/rr/src/WaitManager.cc:126
126         FATAL() << "Unexpected error waiting for " << options.tid;
(gdb) bt
#0  rr::WaitState::wait (this=0xd96a00 <rr::wait_state()::static_state>, options=..., type=2) at /tmp/rr/src/WaitManager.cc:126
#1  0x000000000096beba in rr::WaitManager::wait_stop (options=...) at /tmp/rr/src/WaitManager.cc:191
#2  0x0000000000928232 in rr::Task::wait (this=0xd9f6b0, interrupt_after_elapsed=-1) at /tmp/rr/src/Task.cc:2015
#3  0x000000000092f9a7 in rr::Task::spawn (session=..., error_fd=..., sock_fd_out=0xd97890, sock_fd_receiver_out=0xd97b40,
    tracee_socket_fd_number_out=0xd9c530, exe_path=..., argv=..., envp=..., rec_tid=-1) at /tmp/rr/src/Task.cc:3653
#4  0x00000000008108c3 in rr::RecordSession::RecordSession (this=0xd9c450, exe_path=..., argv=..., envp=...,
    disable_cpuid_features=..., syscallbuf=rr::RecordSession::ENABLE_SYSCALL_BUF, syscallbuf_desched_sig=30,
    bind_cpu=rr::BIND_CPU, output_trace_dir=..., trace_id=0x0, use_audit=false, unmap_vdso=false)
    at /tmp/rr/src/RecordSession.cc:2471
#5  0x0000000000810152 in rr::RecordSession::create (argv=..., extra_env=..., disable_cpuid_features=...,
    syscallbuf=rr::RecordSession::ENABLE_SYSCALL_BUF, syscallbuf_desched_sig=30 '\036', bind_cpu=rr::BIND_CPU,
    output_trace_dir=..., trace_id=0x0, use_audit=false, unmap_vdso=false, force_asan_active=false, force_tsan_active=false)
    at /tmp/rr/src/RecordSession.cc:2422
#6  0x00000000008043f1 in rr::record (args=..., flags=...) at /tmp/rr/src/RecordCommand.cc:670
#7  0x0000000000805170 in rr::RecordCommand::run (this=0xd94bc0 <rr::RecordCommand::singleton>, args=...)
    at /tmp/rr/src/RecordCommand.cc:860
#8  0x00000000007cd113 in main (argc=2, argv=0x7fffffffd558) at /tmp/rr/src/main.cc:278

This fatal error is raised in wait()

rr/src/WaitManager.cc

Lines 110 to 127 in 5817b78

pid_t ret = do_wait(options.unblock_on_other_tasks ? -1 : options.tid,
options.consume, type, options.block_seconds,
result.status);
if (ret == 0) {
result.code = WAIT_NO_STATUS;
return result;
}
if (ret < 0) {
if (errno == EINTR) {
result.code = WAIT_NO_STATUS;
return result;
}
if (errno == ECHILD) {
result.code = WAIT_NO_CHILD;
return result;
}
FATAL() << "Unexpected error waiting for " << options.tid;
}
because of the return code and errno set by do_wait()

rr/src/WaitManager.cc

Lines 41 to 58 in 5817b78

pid_t WaitState::do_wait(pid_t tid, bool consume, int type, double block_seconds, WaitStatus& status) {
int options = type | __WALL;
if (!consume) {
options |= WNOWAIT;
}
siginfo_t siginfo;
memset(&siginfo, 0, sizeof(siginfo));
if (block_seconds <= 0.0) {
options |= WNOHANG;
} else if (block_seconds < WAIT_BLOCK_MAX) {
struct itimerval timer = { { 0, 0 }, to_timeval(block_seconds) };
if (setitimer(ITIMER_REAL, &timer, nullptr) < 0) {
FATAL() << "Failed to set itimer";
}
LOG(debug) << " Arming timer for polling";
// XXX what if the timer fires before we get into waitid???
}
int ret = waitid(tid >= 0 ? P_PID : P_ALL, tid, &siginfo, options);
because of the call to waitid().

According to the mainpage for waitid():

EINVAL
The options argument was invalid.

The following Linux-specific options are for use with children created using clone(2); they cannot be used with waitid():
__WALL (since Linux 2.4)
Wait for all children, regardless of type ("clone" or "non-clone").

The code that sets this option is already in since the implementation of WaitManager, so I do wonder: is the tid created by clone() (If yes: where does this happen?) and:
Do I misread the docs that we actually should not use __WALL (done in line 42) with waitid() (done in line 54)?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions