-
Notifications
You must be signed in to change notification settings - Fork 640
Description
Debugging shows that on each call of rr::WaitState::wait() I'm getting to an error return of -1 with errno set to EINVAL.
$ bin/rr true
rr: Saving execution to trace directory `/home/so/.local/share/rr/true-16'.
[FATAL /tmp/rr/src/WaitManager.cc:126:wait() errno: EINVAL] Unexpected error waiting for 14876
=== Start rr backtrace:
bin/rr(_ZN2rr13dump_rr_stackEv+0x33)[0x964de5]
bin/rr(_ZN2rr15notifying_abortEv+0x12)[0x964d70]
bin/rr(_ZN2rr12FatalOstreamD1Ev+0x28)[0x7bbf10]
bin/rr(_ZN2rr9WaitState4waitERKNS_11WaitOptionsEi+0x1ab)[0x96bb2b]
bin/rr(_ZN2rr11WaitManager9wait_stopERKNS_11WaitOptionsE+0x28)[0x96beba]
bin/rr(_ZN2rr4Task4waitEd+0x1ec)[0x928232]
bin/rr(_ZN2rr4Task5spawnERNS_7SessionERNS_8ScopedFdEPS3_S5_PiRKSsRKSt6vectorISsSaISsEESD_i+0x6b3)[0x92f9a7]
bin/rr(_ZN2rr13RecordSessionC1ERKSsRKSt6vectorISsSaISsEES7_RKNS_20DisableCPUIDFeaturesENS0_16SyscallBufferingEiNS_7BindCPUES2_PKNS_9TraceUuidEbb+0x33d)[0x8108c3]
bin/rr(_ZN2rr13RecordSession6createERKSt6vectorISsSaISsEES5_RKNS_20DisableCPUIDFeaturesENS0_16SyscallBufferingEhNS_7BindCPUERKSsPKNS_9TraceUuidEbbbb+0xc2a)[0x810152]
bin/rr[0x8043f1]
bin/rr(_ZN2rr13RecordCommand3runERSt6vectorISsSaISsEE+0x3b0)[0x805170]
bin/rr(main+0x24e)[0x7cd113]
/usr/lib64/libc.so.6(__libc_start_main+0xf5)[0x7f6d1a60f555]
bin/rr[0x6e4ea5]
=== End rr backtrace
AbortedGDB backtrace:
Reading symbols from bin/rr...
Breakpoint 1 at 0x96bae1: file /tmp/rr/src/WaitManager.cc, line 126.
Starting program: /tmp/rr/build/bin/rr true
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/usr/lib64/libthread_db.so.1".
[New Thread 0x7ffff64fd700 (LWP 14950)]
[New Thread 0x7ffff59fb700 (LWP 14951)]
[New Thread 0x7fffeffff700 (LWP 14952)]
[New Thread 0x7fffef7fe700 (LWP 14953)]
[New Thread 0x7fffeeffd700 (LWP 14954)]
[New Thread 0x7fffee7fc700 (LWP 14955)]
[New Thread 0x7fffedffb700 (LWP 14956)]
[New Thread 0x7fffed7fa700 (LWP 14957)]
[New Thread 0x7fffecff9700 (LWP 14958)]
[New Thread 0x7fffec7f8700 (LWP 14959)]
[New Thread 0x7fffebff7700 (LWP 14960)]
[New Thread 0x7fffeb7f6700 (LWP 14961)]
rr: Saving execution to trace directory `/home/so/.local/share/rr/true-18'.
[Detaching after fork from child process 14962]
Thread 1 "rr" hit Breakpoint 1, rr::WaitState::wait (this=0xd96a00 <rr::wait_state()::static_state>, options=..., type=2) at /tmp/rr/src/WaitManager.cc:126
126 FATAL() << "Unexpected error waiting for " << options.tid;
(gdb) bt
#0 rr::WaitState::wait (this=0xd96a00 <rr::wait_state()::static_state>, options=..., type=2) at /tmp/rr/src/WaitManager.cc:126
#1 0x000000000096beba in rr::WaitManager::wait_stop (options=...) at /tmp/rr/src/WaitManager.cc:191
#2 0x0000000000928232 in rr::Task::wait (this=0xd9f6b0, interrupt_after_elapsed=-1) at /tmp/rr/src/Task.cc:2015
#3 0x000000000092f9a7 in rr::Task::spawn (session=..., error_fd=..., sock_fd_out=0xd97890, sock_fd_receiver_out=0xd97b40,
tracee_socket_fd_number_out=0xd9c530, exe_path=..., argv=..., envp=..., rec_tid=-1) at /tmp/rr/src/Task.cc:3653
#4 0x00000000008108c3 in rr::RecordSession::RecordSession (this=0xd9c450, exe_path=..., argv=..., envp=...,
disable_cpuid_features=..., syscallbuf=rr::RecordSession::ENABLE_SYSCALL_BUF, syscallbuf_desched_sig=30,
bind_cpu=rr::BIND_CPU, output_trace_dir=..., trace_id=0x0, use_audit=false, unmap_vdso=false)
at /tmp/rr/src/RecordSession.cc:2471
#5 0x0000000000810152 in rr::RecordSession::create (argv=..., extra_env=..., disable_cpuid_features=...,
syscallbuf=rr::RecordSession::ENABLE_SYSCALL_BUF, syscallbuf_desched_sig=30 '\036', bind_cpu=rr::BIND_CPU,
output_trace_dir=..., trace_id=0x0, use_audit=false, unmap_vdso=false, force_asan_active=false, force_tsan_active=false)
at /tmp/rr/src/RecordSession.cc:2422
#6 0x00000000008043f1 in rr::record (args=..., flags=...) at /tmp/rr/src/RecordCommand.cc:670
#7 0x0000000000805170 in rr::RecordCommand::run (this=0xd94bc0 <rr::RecordCommand::singleton>, args=...)
at /tmp/rr/src/RecordCommand.cc:860
#8 0x00000000007cd113 in main (argc=2, argv=0x7fffffffd558) at /tmp/rr/src/main.cc:278
This fatal error is raised in wait()
Lines 110 to 127 in 5817b78
| pid_t ret = do_wait(options.unblock_on_other_tasks ? -1 : options.tid, | |
| options.consume, type, options.block_seconds, | |
| result.status); | |
| if (ret == 0) { | |
| result.code = WAIT_NO_STATUS; | |
| return result; | |
| } | |
| if (ret < 0) { | |
| if (errno == EINTR) { | |
| result.code = WAIT_NO_STATUS; | |
| return result; | |
| } | |
| if (errno == ECHILD) { | |
| result.code = WAIT_NO_CHILD; | |
| return result; | |
| } | |
| FATAL() << "Unexpected error waiting for " << options.tid; | |
| } |
do_wait() Lines 41 to 58 in 5817b78
| pid_t WaitState::do_wait(pid_t tid, bool consume, int type, double block_seconds, WaitStatus& status) { | |
| int options = type | __WALL; | |
| if (!consume) { | |
| options |= WNOWAIT; | |
| } | |
| siginfo_t siginfo; | |
| memset(&siginfo, 0, sizeof(siginfo)); | |
| if (block_seconds <= 0.0) { | |
| options |= WNOHANG; | |
| } else if (block_seconds < WAIT_BLOCK_MAX) { | |
| struct itimerval timer = { { 0, 0 }, to_timeval(block_seconds) }; | |
| if (setitimer(ITIMER_REAL, &timer, nullptr) < 0) { | |
| FATAL() << "Failed to set itimer"; | |
| } | |
| LOG(debug) << " Arming timer for polling"; | |
| // XXX what if the timer fires before we get into waitid??? | |
| } | |
| int ret = waitid(tid >= 0 ? P_PID : P_ALL, tid, &siginfo, options); |
waitid().
According to the mainpage for waitid():
EINVAL
The options argument was invalid.
The following Linux-specific options are for use with children created using
clone(2); they cannot be used withwaitid():
__WALL(since Linux 2.4)
Wait for all children, regardless of type ("clone" or "non-clone").
The code that sets this option is already in since the implementation of WaitManager, so I do wonder: is the tid created by clone() (If yes: where does this happen?) and:
Do I misread the docs that we actually should not use __WALL (done in line 42) with waitid() (done in line 54)?