1515#include <sys/types.h>
1616#include <wait.h>
1717#include <unistd.h>
18+ #include <stdarg.h>
1819#include <errno.h>
1920#include <assert.h>
2021
2122#include <flux/core.h>
2223
2324#include "ccan/str/str.h"
2425#include "src/common/libczmqcontainers/czmq_containers.h"
26+ #include "src/common/libutil/errprintf.h"
2527#include "src/common/libutil/log.h"
2628#include "src/common/libutil/fdwalk.h"
2729#include "src/common/libutil/macros.h"
3739
3840static void remote_kill_nowait (flux_subprocess_t * p , int signum );
3941
42+ static void set_failed (flux_subprocess_t * p , const char * fmt , ...)
43+ {
44+ va_list ap ;
45+ va_start (ap , fmt );
46+ verrprintf (& p -> failed_error , fmt , ap );
47+ p -> failed_errno = errno ;
48+ va_end (ap );
49+ }
50+
4051static void start_channel_watchers (flux_subprocess_t * p )
4152{
4253 struct subprocess_channel * c ;
@@ -169,6 +180,7 @@ static int remote_write (struct subprocess_channel *c)
169180
170181 if (!(ptr = flux_buffer_read (c -> write_buffer , -1 , & lenp ))) {
171182 llog_debug (c -> p , "flux_buffer_read: %s" , strerror (errno ));
183+ set_failed (c -> p , "internal buffer read error" );
172184 goto error ;
173185 }
174186
@@ -192,6 +204,7 @@ static int remote_write (struct subprocess_channel *c)
192204 llog_debug (c -> p ,
193205 "error sending rexec.write request: %s" ,
194206 strerror (errno ));
207+ set_failed (c -> p , "internal write error" );
195208 goto error ;
196209 }
197210
@@ -215,6 +228,7 @@ static int remote_close (struct subprocess_channel *c)
215228 llog_debug (c -> p ,
216229 "error sending rexec.write request: %s" ,
217230 strerror (errno ));
231+ set_failed (c -> p , "internal close error" );
218232 return -1 ;
219233 }
220234 /* No need to do a "channel_flush", normal io reactor will handle
@@ -254,7 +268,9 @@ static void remote_in_check_cb (flux_reactor_t *r,
254268 return ;
255269
256270error :
257- c -> p -> failed_errno = errno ;
271+ /* c->p->failed_errno and c->p->failed_error expected to be
272+ * set before this point (typically via set_failed())
273+ */
258274 process_new_state (c -> p , FLUX_SUBPROCESS_FAILED );
259275 remote_kill_nowait (c -> p , SIGKILL );
260276 flux_future_destroy (c -> p -> f );
@@ -520,6 +536,7 @@ static int remote_output (flux_subprocess_t *p,
520536 (int )flux_subprocess_pid (p ),
521537 stream );
522538 errno = EPROTO ;
539+ set_failed (p , "error buffering unknown channel %s" , stream );
523540 return -1 ;
524541 }
525542
@@ -539,6 +556,7 @@ static int remote_output (flux_subprocess_t *p,
539556 (int )flux_subprocess_pid (p ),
540557 stream ,
541558 strerror (errno ));
559+ set_failed (p , "error buffering %d bytes of data" , len );
542560 return -1 ;
543561 }
544562 }
@@ -572,6 +590,7 @@ static void rexec_continuation (flux_future_t *f, void *arg)
572590 remote_completion (p );
573591 return ;
574592 }
593+ set_failed (p , "%s" , future_strerror (f , errno ));
575594 goto error ;
576595 }
577596 if (subprocess_rexec_is_started (f , & p -> pid )) {
@@ -592,7 +611,9 @@ static void rexec_continuation (flux_future_t *f, void *arg)
592611 return ;
593612
594613error :
595- p -> failed_errno = errno ;
614+ /* c->p->failed_errno and c->p->failed_error expected to be
615+ * set before this point (typically via set_failed())
616+ */
596617 process_new_state (p , FLUX_SUBPROCESS_FAILED );
597618 remote_kill_nowait (p , SIGKILL );
598619}
0 commit comments