2929#include "src/common/libutil/fsd.h"
3030#include "src/common/libutil/errno_safe.h"
3131#include "src/common/libutil/monotime.h"
32+ #include "src/common/libutil/errprintf.h"
3233#include "src/common/librouter/rpc_track.h"
3334#include "src/common/libccan/ccan/ptrint/ptrint.h"
3435
@@ -897,8 +898,13 @@ static void child_cb (flux_reactor_t *r, flux_watcher_t *w,
897898 * Send CONTROL_DISCONNECT to force subtree panic.
898899 */
899900 else {
900- if (overlay_control_child (ov , uuid , CONTROL_DISCONNECT , 0 ) < 0 )
901- flux_log_error (ov -> h , "failed to send CONTROL_DISCONNECT" );
901+ if (overlay_control_child (ov , uuid , CONTROL_DISCONNECT , 0 ) < 0 ) {
902+ // not LOG_ERR per flux-framework/flux-core#4464
903+ flux_log (ov -> h ,
904+ LOG_DEBUG ,
905+ "failed to send CONTROL_DISCONNECT: %s" ,
906+ strerror (errno ));
907+ }
902908 }
903909 goto done ;
904910 }
@@ -1023,13 +1029,13 @@ static void parent_cb (flux_reactor_t *r, flux_watcher_t *w,
10231029/* Check child flux-core version 'v1' against this broker's version 'v2'.
10241030 * For now we require an exact match of (major,minor,patch) and
10251031 * ignore any commit id appended to the version string.
1026- * Return 0 on error, or -1 on failure with message for child in 'errubuf '.
1032+ * Return 0 on error, or -1 on failure with message for child in 'error '.
10271033 */
1028- static bool version_check (int v1 , int v2 , char * errbuf , int errbufsz )
1034+ static bool version_check (int v1 , int v2 , flux_error_t * error )
10291035{
10301036 if (v1 != v2 ) {
1031- snprintf ( errbuf , errbufsz ,
1032- "flux-core v %u.%u.%u mismatched with parent v %u.%u.%u" ,
1037+ errprintf ( error ,
1038+ "client ( %u.%u.%u) version mismatched with server ( %u.%u.%u) " ,
10331039 (v1 >> 16 ) & 0xff ,
10341040 (v1 >> 8 ) & 0xff ,
10351041 v1 & 0xff ,
@@ -1054,12 +1060,11 @@ static void hello_request_handler (struct overlay *ov, const flux_msg_t *msg)
10541060 json_int_t rank ;
10551061 int version ;
10561062 const char * errmsg = NULL ;
1057- char errbuf [128 ];
1058- const char * reason = "" ;
1063+ flux_error_t error ;
10591064 flux_msg_t * response ;
10601065 const char * uuid ;
10611066 int status ;
1062- int hello_log_level = LOG_INFO ;
1067+ int hello_log_level = LOG_DEBUG ;
10631068
10641069 if (flux_request_unpack (msg ,
10651070 NULL ,
@@ -1070,47 +1075,52 @@ static void hello_request_handler (struct overlay *ov, const flux_msg_t *msg)
10701075 "status" , & status ) < 0
10711076 || flux_msg_authorize (msg , FLUX_USERID_UNKNOWN ) < 0 )
10721077 goto error ; // EPROTO or EPERM (unlikely)
1073-
10741078 if (!(child = child_lookup_byrank (ov , rank ))) {
1075- snprintf ( errbuf , sizeof ( errbuf ) ,
1079+ errprintf ( & error ,
10761080 "rank %lu is not a peer of parent %lu: mismatched config?" ,
10771081 (unsigned long )rank ,
10781082 (unsigned long )ov -> parent .rank );
1079- errmsg = errbuf ;
1083+ flux_log (ov -> h , LOG_ERR ,
1084+ "rejecting connection from %s (rank %lu): %s" ,
1085+ flux_get_hostbyrank (ov -> h , rank ),
1086+ (unsigned long )rank ,
1087+ error .text );
1088+ errmsg = error .text ;
10801089 errno = EINVAL ;
1081- reason = "not a peer of this broker - mismatched config?" ;
1082- goto error_log ;
1090+ goto error ;
10831091 }
10841092 /* Oops, child was previously online, but is now saying hello.
10851093 * Update the (old) child's subtree status to LOST. If the hello
10861094 * request is successful, another update will immediately follow.
10871095 */
10881096 if (subtree_is_online (child -> status )) { // crash
10891097 flux_log (ov -> h , LOG_ERR ,
1090- "%s (rank %lu) reconnected after crash, dropping old uuid %s " ,
1098+ "%s (rank %lu) reconnected after crash, dropping old connection state " ,
10911099 flux_get_hostbyrank (ov -> h , child -> rank ),
1092- (unsigned long )child -> rank ,
1093- child -> uuid );
1100+ (unsigned long )child -> rank );
10941101 overlay_child_status_update (ov , child , SUBTREE_STATUS_LOST );
10951102 hello_log_level = LOG_ERR ; // want hello log to stand out in this case
10961103 }
10971104
1098- if (!version_check (version , ov -> version , errbuf , sizeof (errbuf ))) {
1099- errmsg = errbuf ;
1105+ if (!version_check (version , ov -> version , & error )) {
1106+ flux_log (ov -> h , LOG_ERR ,
1107+ "rejecting connection from %s (rank %lu): %s" ,
1108+ flux_get_hostbyrank (ov -> h , rank ),
1109+ (unsigned long )rank ,
1110+ error .text );
1111+ errmsg = error .text ;
11001112 errno = EINVAL ;
1101- reason = "flux-core version mismatch - needs software update?" ;
1102- goto error_log ;
1113+ goto error ;
11031114 }
11041115
11051116 snprintf (child -> uuid , sizeof (child -> uuid ), "%s" , uuid );
11061117 overlay_child_status_update (ov , child , status );
11071118
11081119 flux_log (ov -> h ,
11091120 hello_log_level ,
1110- "accepting connection from %s (rank %lu) uuid %s status %s" ,
1121+ "accepting connection from %s (rank %lu) status %s" ,
11111122 flux_get_hostbyrank (ov -> h , child -> rank ),
11121123 (unsigned long )child -> rank ,
1113- child -> uuid ,
11141124 subtree_status_str (child -> status ));
11151125
11161126 if (!(response = flux_response_derive (msg , 0 ))
@@ -1119,13 +1129,6 @@ static void hello_request_handler (struct overlay *ov, const flux_msg_t *msg)
11191129 flux_log_error (ov -> h , "error responding to overlay.hello request" );
11201130 flux_msg_destroy (response );
11211131 return ;
1122- error_log :
1123- flux_log (ov -> h , LOG_ERR ,
1124- "rejecting connection from %s (rank %lu) uuid %s: %s" ,
1125- flux_get_hostbyrank (ov -> h , rank ),
1126- (unsigned long )rank ,
1127- uuid ,
1128- reason );
11291132error :
11301133 if (!(response = flux_response_derive (msg , errno ))
11311134 || (errmsg && flux_msg_set_string (response , errmsg ) < 0 )
0 commit comments