22 * Copyright (c) 2016 UChicago Argonne, LLC
33 * (C) Copyright 2018-2024 Intel Corporation.
44 * (C) Copyright 2025 Google LLC
5+ * (C) Copyright 2025 Hewlett Packard Enterprise Development LP
56 *
67 * SPDX-License-Identifier: BSD-2-Clause-Patent
78 */
@@ -20,6 +21,7 @@ static const char *SWIM_STATUS_STR[] = {
2021static uint64_t swim_prot_period_len ;
2122static uint64_t swim_suspect_timeout ;
2223static uint64_t swim_ping_timeout ;
24+ static int swim_subgroup_size ;
2325
2426static inline uint64_t
2527swim_prot_period_len_default (void )
@@ -48,6 +50,15 @@ swim_ping_timeout_default(void)
4850 return val ;
4951}
5052
53+ static inline int
54+ swim_subgroup_size_default (void )
55+ {
56+ unsigned int val = SWIM_SUBGROUP_SIZE ;
57+
58+ d_getenv_uint ("SWIM_SUBGROUP_SIZE" , & val );
59+ return val ;
60+ }
61+
5162void
5263swim_period_set (uint64_t val )
5364{
@@ -112,9 +123,8 @@ swim_dump_updates(swim_id_t self_id, swim_id_t from_id, swim_id_t to_id,
112123 fclose (fp );
113124 /* msg and msg_size will be set after fclose(fp) only */
114125 if (msg_size > 0 )
115- SWIM_INFO ("%lu %s %lu:%s\n" , self_id ,
116- self_id == from_id ? "=>" : "<=" ,
117- self_id == from_id ? to_id : from_id , msg );
126+ SWIM_DEBUG ("%lu %s %lu:%s\n" , self_id , self_id == from_id ? "=>" : "<=" ,
127+ self_id == from_id ? to_id : from_id , msg );
118128 free (msg ); /* allocated by open_memstream() */
119129 }
120130}
@@ -149,7 +159,7 @@ swim_updates_prepare(struct swim_context *ctx, swim_id_t id, swim_id_t to,
149159 rc = ctx -> sc_ops -> get_member_state (ctx , id , & upds [n ].smu_state );
150160 if (rc ) {
151161 if (rc == - DER_NONEXIST )
152- SWIM_INFO ("%lu: not bootstrapped yet with %lu\n" , self_id , id );
162+ SWIM_DEBUG ("%lu: not bootstrapped yet with %lu\n" , self_id , id );
153163 else
154164 SWIM_ERROR ("get_member_state(%lu): " DF_RC "\n" , id , DP_RC (rc ));
155165 D_GOTO (out_unlock , rc );
@@ -171,7 +181,7 @@ swim_updates_prepare(struct swim_context *ctx, swim_id_t id, swim_id_t to,
171181 rc = ctx -> sc_ops -> get_member_state (ctx , to , & upds [n ].smu_state );
172182 if (rc ) {
173183 if (rc == - DER_NONEXIST )
174- SWIM_INFO ("%lu: not bootstrapped yet with %lu\n" , self_id , to );
184+ SWIM_DEBUG ("%lu: not bootstrapped yet with %lu\n" , self_id , to );
175185 else
176186 SWIM_ERROR ("get_member_state(%lu): " DF_RC "\n" , to , DP_RC (rc ));
177187 D_GOTO (out_unlock , rc );
@@ -295,7 +305,7 @@ swim_member_alive(struct swim_context *ctx, swim_id_t from, swim_id_t id, uint64
295305 swim_id_t self_id = swim_self_get (ctx );
296306
297307 if (rc == - DER_NONEXIST )
298- SWIM_INFO ("%lu: not bootstrapped yet with %lu\n" , self_id , id );
308+ SWIM_DEBUG ("%lu: not bootstrapped yet with %lu\n" , self_id , id );
299309 else
300310 SWIM_ERROR ("get_member_state(%lu): " DF_RC "\n" , id , DP_RC (rc ));
301311 D_GOTO (out , rc );
@@ -327,7 +337,7 @@ swim_member_alive(struct swim_context *ctx, swim_id_t from, swim_id_t id, uint64
327337 }
328338 }
329339
330- SWIM_INFO ("member %lu %lu is ALIVE\n" , id , nr );
340+ SWIM_INFO ("%lu: member %lu %lu is ALIVE from %lu \n" , ctx -> sc_self , id , nr , from );
331341 id_state .sms_incarnation = nr ;
332342 id_state .sms_status = SWIM_MEMBER_ALIVE ;
333343 rc = swim_updates_notify (ctx , from , id , & id_state , count );
@@ -374,7 +384,8 @@ swim_member_dead(struct swim_context *ctx, swim_id_t from, swim_id_t id, uint64_
374384 }
375385 }
376386
377- SWIM_ERROR ("member %lu %lu is DEAD\n" , id , nr );
387+ SWIM_ERROR ("%lu: member %lu %lu is DEAD from %lu%s\n" , ctx -> sc_self , id , nr , from ,
388+ from == ctx -> sc_self ? " (self)" : "" );
378389 id_state .sms_incarnation = nr ;
379390 id_state .sms_status = SWIM_MEMBER_DEAD ;
380391 rc = swim_updates_notify (ctx , from , id , & id_state , 0 );
@@ -437,7 +448,8 @@ swim_member_suspect(struct swim_context *ctx, swim_id_t from, swim_id_t id, uint
437448 TAILQ_INSERT_TAIL (& ctx -> sc_suspects , item , si_link );
438449
439450update :
440- SWIM_INFO ("member %lu %lu is SUSPECT\n" , id , nr );
451+ SWIM_INFO ("%lu: member %lu %lu is SUSPECT from %lu%s\n" , ctx -> sc_self , id , nr , from ,
452+ from == ctx -> sc_self ? " (self)" : "" );
441453 id_state .sms_incarnation = nr ;
442454 id_state .sms_status = SWIM_MEMBER_SUSPECT ;
443455 rc = swim_updates_notify (ctx , from , id , & id_state , 0 );
@@ -472,8 +484,7 @@ swim_member_update_suspected(struct swim_context *ctx, uint64_t now, uint64_t ne
472484 D_GOTO (next_item , rc = 0 );
473485 }
474486
475- SWIM_INFO ("%lu: suspect timeout %lu\n" ,
476- self_id , item -> si_id );
487+ SWIM_DEBUG ("%lu: suspect timeout %lu\n" , self_id , item -> si_id );
477488 if (item -> si_from != self_id ) {
478489 /* let's try to confirm from gossip origin */
479490 id = item -> si_id ;
@@ -512,8 +523,8 @@ swim_member_update_suspected(struct swim_context *ctx, uint64_t now, uint64_t ne
512523 while (item != NULL ) {
513524 next = TAILQ_NEXT (item , si_link );
514525
515- SWIM_INFO ("try to confirm from source. %lu: %lu <= %lu\n" ,
516- self_id , item -> si_id , item -> si_from );
526+ SWIM_DEBUG ("try to confirm from source. %lu: %lu <= %lu\n" , self_id , item -> si_id ,
527+ item -> si_from );
517528
518529 rc = swim_updates_send (ctx , item -> si_id , item -> si_from );
519530 if (rc )
@@ -556,8 +567,8 @@ swim_ipings_update(struct swim_context *ctx, uint64_t now, uint64_t net_glitch_d
556567 item = TAILQ_FIRST (& targets );
557568 while (item != NULL ) {
558569 next = TAILQ_NEXT (item , si_link );
559- SWIM_INFO ("reply IREQ expired. %lu: %lu => %lu\n" ,
560- self_id , item -> si_from , item -> si_id );
570+ SWIM_DEBUG ("reply IREQ expired. %lu: %lu => %lu\n" , self_id , item -> si_from ,
571+ item -> si_id );
561572
562573 rc = ctx -> sc_ops -> send_reply (ctx , item -> si_id , item -> si_from ,
563574 - DER_TIMEDOUT , item -> si_args );
@@ -596,8 +607,7 @@ swim_ipings_reply(struct swim_context *ctx, swim_id_t to_id, int ret_rc)
596607 item = TAILQ_FIRST (& targets );
597608 while (item != NULL ) {
598609 next = TAILQ_NEXT (item , si_link );
599- SWIM_INFO ("reply IREQ. %lu: %lu <= %lu\n" ,
600- self_id , item -> si_id , item -> si_from );
610+ SWIM_DEBUG ("reply IREQ. %lu: %lu <= %lu\n" , self_id , item -> si_id , item -> si_from );
601611
602612 rc = ctx -> sc_ops -> send_reply (ctx , item -> si_id , item -> si_from ,
603613 ret_rc , item -> si_args );
@@ -649,7 +659,7 @@ swim_subgroup_init(struct swim_context *ctx)
649659 swim_id_t id ;
650660 int i , rc = 0 ;
651661
652- for (i = 0 ; i < SWIM_SUBGROUP_SIZE ; i ++ ) {
662+ for (i = 0 ; i < swim_subgroup_size ; i ++ ) {
653663 id = ctx -> sc_ops -> get_iping_target (ctx );
654664 if (id == SWIM_ID_INVALID )
655665 D_GOTO (out , rc = 0 );
@@ -736,6 +746,7 @@ swim_init(swim_id_t self_id, struct swim_ops *swim_ops, void *data)
736746 swim_prot_period_len = swim_prot_period_len_default ();
737747 swim_suspect_timeout = swim_suspect_timeout_default ();
738748 swim_ping_timeout = swim_ping_timeout_default ();
749+ swim_subgroup_size = swim_subgroup_size_default ();
739750
740751 ctx -> sc_default_ping_timeout = swim_ping_timeout ;
741752
@@ -911,12 +922,12 @@ swim_progress(struct swim_context *ctx, int64_t timeout_us)
911922 target_id = ctx -> sc_target ;
912923 sendto_id = ctx -> sc_target ;
913924 send_updates = true;
914- SWIM_INFO ("%lu: dping %lu => {%lu %c %lu} "
915- "delay: %u ms, timeout: %lu ms\n" ,
916- ctx -> sc_self , ctx -> sc_self , sendto_id ,
917- SWIM_STATUS_CHARS [target_state .sms_status ],
918- target_state .sms_incarnation ,
919- target_state . sms_delay , delay );
925+ SWIM_DEBUG ("%lu: dping %lu => {%lu %c %lu} "
926+ "delay: %u ms, timeout: %lu ms\n" ,
927+ ctx -> sc_self , ctx -> sc_self , sendto_id ,
928+ SWIM_STATUS_CHARS [target_state .sms_status ],
929+ target_state .sms_incarnation , target_state . sms_delay ,
930+ delay );
920931
921932 ctx -> sc_next_tick_time = now + swim_period_get ();
922933 ctx -> sc_deadline = now + delay ;
@@ -992,23 +1003,22 @@ swim_progress(struct swim_context *ctx, int64_t timeout_us)
9921003 goto done_item ;
9931004
9941005 delay *= 2 ;
995- SWIM_INFO ("%lu: ireq %lu => {%lu %c %lu} "
996- "delay: %u ms, timeout: %lu ms\n" ,
997- ctx -> sc_self , sendto_id , target_id ,
998- SWIM_STATUS_CHARS [target_state .sms_status ],
999- target_state .sms_incarnation ,
1000- target_state .sms_delay , delay );
1006+ SWIM_DEBUG ("%lu: ireq %lu => {%lu %c %lu} "
1007+ "delay: %u ms, timeout: %lu ms\n" ,
1008+ ctx -> sc_self , sendto_id , target_id ,
1009+ SWIM_STATUS_CHARS [target_state .sms_status ],
1010+ target_state .sms_incarnation ,
1011+ target_state .sms_delay , delay );
10011012 } else {
10021013 /* Send ping only if this member is not respond yet */
10031014 if (state .sms_status != SWIM_MEMBER_INACTIVE )
10041015 goto done_item ;
10051016
1006- SWIM_INFO ("%lu: dping %lu => {%lu %c %lu} "
1007- "delay: %u ms, timeout: %lu ms\n" ,
1008- ctx -> sc_self , ctx -> sc_self , sendto_id ,
1009- SWIM_STATUS_CHARS [state .sms_status ],
1010- state .sms_incarnation ,
1011- state .sms_delay , delay );
1017+ SWIM_DEBUG ("%lu: dping %lu => {%lu %c %lu} "
1018+ "delay: %u ms, timeout: %lu ms\n" ,
1019+ ctx -> sc_self , ctx -> sc_self , sendto_id ,
1020+ SWIM_STATUS_CHARS [state .sms_status ],
1021+ state .sms_incarnation , state .sms_delay , delay );
10121022 }
10131023
10141024 send_updates = true;
@@ -1097,7 +1107,8 @@ swim_updates_parse(struct swim_context *ctx, swim_id_t from_id, swim_id_t id,
10971107 rc = ctx -> sc_ops -> get_member_state (ctx , from_id , & id_state );
10981108 if (rc == - DER_NONEXIST || id_state .sms_status == SWIM_MEMBER_DEAD ) {
10991109 swim_ctx_unlock (ctx );
1100- SWIM_INFO ("%lu: skip untrustable update from %lu, rc = %d\n" , self_id , from_id , rc );
1110+ SWIM_DEBUG ("%lu: skip untrustable update from %lu, rc = %d\n" , self_id , from_id ,
1111+ rc );
11011112 D_GOTO (out , rc = - DER_NONEXIST );
11021113 } else if (rc != 0 ) {
11031114 swim_ctx_unlock (ctx );
@@ -1108,8 +1119,8 @@ swim_updates_parse(struct swim_context *ctx, swim_id_t from_id, swim_id_t id,
11081119 if ((from_id == ctx -> sc_target || id == ctx -> sc_target ) &&
11091120 (ctx_state == SCS_BEGIN || ctx_state == SCS_PINGED || ctx_state == SCS_IPINGED )) {
11101121 ctx_state = SCS_SELECT ;
1111- SWIM_INFO ("target %lu %s okay\n" , ctx -> sc_target ,
1112- from_id == id ? "dping" : "iping" );
1122+ SWIM_DEBUG ("target %lu %s okay\n" , ctx -> sc_target ,
1123+ from_id == id ? "dping" : "iping" );
11131124 }
11141125
11151126 for (i = 0 ; i < nupds ; i ++ ) {
0 commit comments