Skip to content

Commit 6ec1d3a

Browse files
committed
broker: avoid spurious overlay peer warning
Problem: the broker complains of being unable to resolve its upstream peer when bootstrapped via PMI, but the peer uri is valid. Warning: unable to resolve upstream peer [: Name or service not known The URI is generated by 0mq after concretizing a wildcard bind address. When ipv6 is enabled, 0mq puts the address in a form that cannot be parsed by yuarel, nor can it be looked up by getaddrinfo(3), e.g. tcp://[::ffff:134.9.50.69]:49155 Move the check into boot_config.c so that it is only performed when the URIs come from a config file. Fixes #5273
1 parent 7a64c5b commit 6ec1d3a

File tree

2 files changed

+37
-36
lines changed

2 files changed

+37
-36
lines changed

src/broker/boot_config.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
#if HAVE_CONFIG_H
1414
#include "config.h"
1515
#endif
16+
#include <sys/types.h>
17+
#include <sys/socket.h>
18+
#include <netdb.h>
1619
#include <unistd.h>
1720
#include <stdbool.h>
1821
#include <assert.h>
@@ -22,6 +25,7 @@
2225
#include <flux/hostlist.h>
2326
#include <flux/taskmap.h>
2427

28+
#include "src/common/libyuarel/yuarel.h"
2529
#include "src/common/libutil/log.h"
2630
#include "src/common/libutil/errno_safe.h"
2731
#include "ccan/str/str.h"
@@ -495,6 +499,38 @@ static int set_broker_boot_method_attr (attr_t *attrs, const char *value)
495499
return 0;
496500
}
497501

502+
/* Zeromq treats failed hostname resolution as a transient error, and silently
503+
* retries in the background, which can make config problems hard to diagnose.
504+
* Parse the URI in advance, and if the host portion is invalid, log it.
505+
* Ref: flux-framework/flux-core#5009
506+
*/
507+
static void warn_of_invalid_host (flux_t *h, const char *uri)
508+
{
509+
char *cpy;
510+
struct yuarel u = { 0 };
511+
struct addrinfo *result;
512+
int e;
513+
514+
if (!(cpy = strdup (uri))
515+
|| yuarel_parse (&u, cpy) < 0
516+
|| !u.scheme
517+
|| !u.host
518+
|| !streq (u.scheme, "tcp"))
519+
goto done;
520+
/* N.B. this URI will be used for zmq_connect(), therefore it must
521+
* be a valid peer address, not an interface name or wildcard.
522+
*/
523+
if ((e = getaddrinfo (u.host, NULL, NULL, &result)) == 0) {
524+
freeaddrinfo (result);
525+
goto done;
526+
}
527+
log_msg ("Warning: unable to resolve upstream peer %s: %s",
528+
u.host,
529+
gai_strerror (e));
530+
done:
531+
free (cpy);
532+
}
533+
498534
int boot_config (flux_t *h, struct overlay *overlay, attr_t *attrs)
499535
{
500536
struct boot_conf conf;
@@ -623,6 +659,7 @@ int boot_config (flux_t *h, struct overlay *overlay, attr_t *attrs)
623659
parent_uri,
624660
sizeof (parent_uri)) < 0)
625661
goto error;
662+
warn_of_invalid_host (h, parent_uri);
626663
if (overlay_set_parent_uri (overlay, parent_uri) < 0) {
627664
log_err ("overlay_set_parent_uri %s", parent_uri);
628665
goto error;

src/broker/overlay.c

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
#include "src/common/libutil/errprintf.h"
3333
#include "src/common/librouter/rpc_track.h"
3434
#include "src/common/libccan/ccan/ptrint/ptrint.h"
35-
#include "src/common/libyuarel/yuarel.h"
3635

3736
#include "overlay.h"
3837
#include "attr.h"
@@ -1264,48 +1263,13 @@ static void parent_monitor_cb (struct zmqutil_monitor *mon, void *arg)
12641263
}
12651264
}
12661265

1267-
/* Zeromq treats failed hostname resolution as a transient error, and silently
1268-
* retries in the background, which can make config problems hard to diagnose.
1269-
* Parse the URI in advance, and if the host portion is invalid, log it.
1270-
* Ref: flux-framework/flux-core#5009
1271-
*/
1272-
static void warn_of_invalid_host (flux_t *h, const char *uri)
1273-
{
1274-
char *cpy;
1275-
struct yuarel u = { 0 };
1276-
struct addrinfo *result;
1277-
int e;
1278-
1279-
if (!(cpy = strdup (uri))
1280-
|| yuarel_parse (&u, cpy) < 0
1281-
|| !u.scheme
1282-
|| !u.host
1283-
|| !streq (u.scheme, "tcp"))
1284-
goto done;
1285-
/* N.B. this URI will be used for zmq_connect(), therefore it must
1286-
* be a valid peer address, not an interface name or wildcard.
1287-
*/
1288-
if ((e = getaddrinfo (u.host, NULL, NULL, &result)) == 0) {
1289-
freeaddrinfo (result);
1290-
goto done;
1291-
}
1292-
flux_log (h,
1293-
LOG_ERR,
1294-
"Warning: unable to resolve upstream peer %s: %s",
1295-
u.host,
1296-
gai_strerror (e));
1297-
done:
1298-
free (cpy);
1299-
}
1300-
13011266
int overlay_connect (struct overlay *ov)
13021267
{
13031268
if (ov->rank > 0) {
13041269
if (!ov->h || ov->rank == FLUX_NODEID_ANY || !ov->parent.uri) {
13051270
errno = EINVAL;
13061271
return -1;
13071272
}
1308-
warn_of_invalid_host (ov->h, ov->parent.uri);
13091273
if (!(ov->parent.zsock = zsock_new_dealer (NULL)))
13101274
goto nomem;
13111275
/* The socket monitor is only used for logging.

0 commit comments

Comments
 (0)