1717// You should have received a copy of the GNU Affero General Public License
1818// along with this program. If not, see <http://www.gnu.org/licenses/>.
1919
20+ use std:: collections:: btree_map:: Entry ;
2021use std:: collections:: BTreeMap ;
2122
2223use chitchat:: { ChitchatId , NodeState } ;
@@ -25,7 +26,7 @@ use quickwit_common::tower::{make_channel, warmup_channel};
2526use tracing:: { info, warn} ;
2627
2728use crate :: member:: NodeStateExt ;
28- use crate :: ClusterNode ;
29+ use crate :: { ClusterNode , NodeId } ;
2930
3031#[ derive( Debug , Clone ) ]
3132pub enum ClusterChange {
@@ -39,7 +40,7 @@ pub enum ClusterChange {
3940pub ( crate ) async fn compute_cluster_change_events (
4041 cluster_id : & str ,
4142 self_chitchat_id : & ChitchatId ,
42- previous_nodes : & mut BTreeMap < ChitchatId , ClusterNode > ,
43+ previous_nodes : & mut BTreeMap < NodeId , ClusterNode > ,
4344 previous_node_states : & BTreeMap < ChitchatId , NodeState > ,
4445 new_node_states : & BTreeMap < ChitchatId , NodeState > ,
4546) -> Vec < ClusterChange > {
@@ -96,17 +97,8 @@ async fn compute_cluster_change_events_on_added(
9697 self_chitchat_id : & ChitchatId ,
9798 new_chitchat_id : & ChitchatId ,
9899 new_node_state : & NodeState ,
99- previous_nodes : & mut BTreeMap < ChitchatId , ClusterNode > ,
100+ previous_nodes : & mut BTreeMap < NodeId , ClusterNode > ,
100101) -> Option < ClusterChange > {
101- let is_self_node = self_chitchat_id == new_chitchat_id;
102- if !is_self_node {
103- info ! (
104- cluster_id=%cluster_id,
105- node_id=%new_chitchat_id. node_id,
106- "Node `{}` has joined the cluster." ,
107- new_chitchat_id. node_id
108- ) ;
109- }
110102 let grpc_advertise_addr = match new_node_state. grpc_advertise_addr ( ) {
111103 Ok ( addr) => addr,
112104 Err ( error) => {
@@ -120,6 +112,7 @@ async fn compute_cluster_change_events_on_added(
120112 }
121113 } ;
122114 let channel = make_channel ( grpc_advertise_addr) . await ;
115+ let is_self_node = self_chitchat_id == new_chitchat_id;
123116 let new_node = match ClusterNode :: try_new (
124117 new_chitchat_id. clone ( ) ,
125118 new_node_state,
@@ -137,8 +130,26 @@ async fn compute_cluster_change_events_on_added(
137130 return None ;
138131 }
139132 } ;
140- previous_nodes. insert ( new_chitchat_id. clone ( ) , new_node. clone ( ) ) ;
133+ let new_node_id = new_node. chitchat_id ( ) . node_id . clone ( ) ;
134+ let previous_node_opt = previous_nodes. insert ( new_node_id, new_node. clone ( ) ) ;
141135
136+ if !is_self_node {
137+ if previous_node_opt. is_some ( ) {
138+ info ! (
139+ cluster_id=%cluster_id,
140+ node_id=%new_chitchat_id. node_id,
141+ "Node `{}` has rejoined the cluster." ,
142+ new_chitchat_id. node_id
143+ ) ;
144+ } else {
145+ info ! (
146+ cluster_id=%cluster_id,
147+ node_id=%new_chitchat_id. node_id,
148+ "Node `{}` has joined the cluster." ,
149+ new_chitchat_id. node_id
150+ ) ;
151+ }
152+ }
142153 if new_node. is_ready ( ) {
143154 warmup_channel ( new_node. channel ( ) ) . await ;
144155
@@ -160,9 +171,9 @@ async fn compute_cluster_change_events_on_updated(
160171 self_chitchat_id : & ChitchatId ,
161172 updated_chitchat_id : & ChitchatId ,
162173 updated_node_state : & NodeState ,
163- previous_nodes : & mut BTreeMap < ChitchatId , ClusterNode > ,
174+ previous_nodes : & mut BTreeMap < NodeId , ClusterNode > ,
164175) -> Option < ClusterChange > {
165- let previous_node = previous_nodes. get ( updated_chitchat_id) ?. clone ( ) ;
176+ let previous_node = previous_nodes. get ( & updated_chitchat_id. node_id ) ?. clone ( ) ;
166177 let previous_channel = previous_node. channel ( ) ;
167178 let is_self_node = self_chitchat_id == updated_chitchat_id;
168179 let updated_node = match ClusterNode :: try_new (
@@ -182,7 +193,7 @@ async fn compute_cluster_change_events_on_updated(
182193 return None ;
183194 }
184195 } ;
185- previous_nodes. insert ( updated_chitchat_id. clone ( ) , updated_node. clone ( ) ) ;
196+ previous_nodes. insert ( updated_chitchat_id. node_id . clone ( ) , updated_node. clone ( ) ) ;
186197
187198 if !previous_node. is_ready ( ) && updated_node. is_ready ( ) {
188199 warmup_channel ( updated_node. channel ( ) ) . await ;
@@ -217,23 +228,30 @@ fn compute_cluster_change_events_on_removed(
217228 cluster_id : & str ,
218229 self_chitchat_id : & ChitchatId ,
219230 removed_chitchat_id : & ChitchatId ,
220- previous_nodes : & mut BTreeMap < ChitchatId , ClusterNode > ,
231+ previous_nodes : & mut BTreeMap < NodeId , ClusterNode > ,
221232) -> Option < ClusterChange > {
222- if self_chitchat_id != removed_chitchat_id {
223- info ! (
224- cluster_id=%cluster_id,
225- node_id=%removed_chitchat_id. node_id,
226- "Node `{}` has left the cluster." ,
227- removed_chitchat_id. node_id
228- ) ;
229- }
230- let previous_node = previous_nodes. remove ( removed_chitchat_id) ?;
233+ let removed_node_id = removed_chitchat_id. node_id . clone ( ) ;
234+
235+ if let Entry :: Occupied ( previous_node_entry) = previous_nodes. entry ( removed_node_id) {
236+ let previous_node_ref = previous_node_entry. get ( ) ;
237+
238+ if previous_node_ref. chitchat_id ( ) . generation_id == removed_chitchat_id. generation_id {
239+ if self_chitchat_id != removed_chitchat_id {
240+ info ! (
241+ cluster_id=%cluster_id,
242+ node_id=%removed_chitchat_id. node_id,
243+ "Node `{}` has left the cluster." ,
244+ removed_chitchat_id. node_id
245+ ) ;
246+ }
247+ let previous_node = previous_node_entry. remove ( ) ;
231248
232- if previous_node. is_ready ( ) {
233- Some ( ClusterChange :: Remove ( previous_node) )
234- } else {
235- None
236- }
249+ if previous_node. is_ready ( ) {
250+ return Some ( ClusterChange :: Remove ( previous_node) ) ;
251+ }
252+ }
253+ } ;
254+ None
237255}
238256
239257#[ cfg( test) ]
@@ -361,7 +379,7 @@ mod tests {
361379 . await ;
362380 assert ! ( event. is_none( ) ) ;
363381
364- let node = previous_nodes. get ( & new_chitchat_id) . unwrap ( ) ;
382+ let node = previous_nodes. get ( & new_chitchat_id. node_id ) . unwrap ( ) ;
365383
366384 assert_eq ! ( node. chitchat_id( ) , & new_chitchat_id) ;
367385 assert_eq ! ( node. grpc_advertise_addr( ) , grpc_advertise_addr) ;
@@ -396,7 +414,7 @@ mod tests {
396414 assert_eq ! ( node. grpc_advertise_addr( ) , grpc_advertise_addr) ;
397415 assert ! ( !node. is_self_node( ) ) ;
398416 assert ! ( node. is_ready( ) ) ;
399- assert_eq ! ( previous_nodes. get( & new_chitchat_id) . unwrap( ) , & node) ;
417+ assert_eq ! ( previous_nodes. get( & new_chitchat_id. node_id ) . unwrap( ) , & node) ;
400418 }
401419 {
402420 // Self node joined the cluster and is ready.
@@ -425,7 +443,7 @@ mod tests {
425443 assert_eq ! ( node. grpc_advertise_addr( ) , grpc_advertise_addr) ;
426444 assert ! ( node. is_self_node( ) ) ;
427445 assert ! ( node. is_ready( ) ) ;
428- assert_eq ! ( previous_nodes. get( & new_chitchat_id) . unwrap( ) , & node) ;
446+ assert_eq ! ( previous_nodes. get( & new_chitchat_id. node_id ) . unwrap( ) , & node) ;
429447 }
430448 }
431449
@@ -453,7 +471,7 @@ mod tests {
453471 )
454472 . unwrap ( ) ;
455473 let mut previous_nodes =
456- BTreeMap :: from_iter ( [ ( updated_chitchat_id. clone ( ) , previous_node) ] ) ;
474+ BTreeMap :: from_iter ( [ ( updated_chitchat_id. node_id . clone ( ) , previous_node) ] ) ;
457475
458476 let updated_node_state = NodeStateBuilder :: default ( )
459477 . with_grpc_advertise_addr ( grpc_advertise_addr)
@@ -476,7 +494,10 @@ mod tests {
476494 assert_eq ! ( node. grpc_advertise_addr( ) , grpc_advertise_addr) ;
477495 assert ! ( node. is_ready( ) ) ;
478496 assert ! ( !node. is_self_node( ) ) ;
479- assert_eq ! ( previous_nodes. get( & updated_chitchat_id) . unwrap( ) , & node) ;
497+ assert_eq ! (
498+ previous_nodes. get( & updated_chitchat_id. node_id) . unwrap( ) ,
499+ & node
500+ ) ;
480501 }
481502 {
482503 // Node changed.
@@ -497,7 +518,7 @@ mod tests {
497518 )
498519 . unwrap ( ) ;
499520 let mut previous_nodes =
500- BTreeMap :: from_iter ( [ ( updated_chitchat_id. clone ( ) , previous_node) ] ) ;
521+ BTreeMap :: from_iter ( [ ( updated_chitchat_id. node_id . clone ( ) , previous_node) ] ) ;
501522
502523 let updated_node_state = NodeStateBuilder :: default ( )
503524 . with_grpc_advertise_addr ( grpc_advertise_addr)
@@ -520,7 +541,10 @@ mod tests {
520541 assert_eq ! ( node. grpc_advertise_addr( ) , grpc_advertise_addr) ;
521542 assert ! ( !node. is_self_node( ) ) ;
522543 assert ! ( node. is_ready( ) ) ;
523- assert_eq ! ( previous_nodes. get( & updated_chitchat_id) . unwrap( ) , & node) ;
544+ assert_eq ! (
545+ previous_nodes. get( & updated_chitchat_id. node_id) . unwrap( ) ,
546+ & node
547+ ) ;
524548 }
525549 {
526550 // Node is no longer ready.
@@ -541,7 +565,7 @@ mod tests {
541565 )
542566 . unwrap ( ) ;
543567 let mut previous_nodes =
544- BTreeMap :: from_iter ( [ ( updated_chitchat_id. clone ( ) , previous_node) ] ) ;
568+ BTreeMap :: from_iter ( [ ( updated_chitchat_id. node_id . clone ( ) , previous_node) ] ) ;
545569
546570 let updated_node_state = NodeStateBuilder :: default ( )
547571 . with_grpc_advertise_addr ( grpc_advertise_addr)
@@ -564,7 +588,10 @@ mod tests {
564588 assert_eq ! ( node. grpc_advertise_addr( ) , grpc_advertise_addr) ;
565589 assert ! ( !node. is_self_node( ) ) ;
566590 assert ! ( !node. is_ready( ) ) ;
567- assert_eq ! ( previous_nodes. get( & updated_chitchat_id) . unwrap( ) , & node) ;
591+ assert_eq ! (
592+ previous_nodes. get( & updated_chitchat_id. node_id) . unwrap( ) ,
593+ & node
594+ ) ;
568595 }
569596 }
570597
@@ -606,7 +633,7 @@ mod tests {
606633 )
607634 . unwrap ( ) ;
608635 let mut previous_nodes =
609- BTreeMap :: from_iter ( [ ( removed_chitchat_id. clone ( ) , previous_node) ] ) ;
636+ BTreeMap :: from_iter ( [ ( removed_chitchat_id. node_id . clone ( ) , previous_node) ] ) ;
610637
611638 let event_opt = compute_cluster_change_events_on_removed (
612639 & cluster_id,
@@ -615,7 +642,7 @@ mod tests {
615642 & mut previous_nodes,
616643 ) ;
617644 assert ! ( event_opt. is_none( ) ) ;
618- assert ! ( !previous_nodes. contains_key( & removed_chitchat_id) ) ;
645+ assert ! ( !previous_nodes. contains_key( & removed_chitchat_id. node_id ) ) ;
619646 }
620647 {
621648 // Node left the cluster in ready state.
@@ -630,7 +657,8 @@ mod tests {
630657 let node =
631658 ClusterNode :: try_new ( removed_chitchat_id. clone ( ) , & new_node_state, channel, false )
632659 . unwrap ( ) ;
633- let mut previous_nodes = BTreeMap :: from_iter ( [ ( removed_chitchat_id. clone ( ) , node) ] ) ;
660+ let mut previous_nodes =
661+ BTreeMap :: from_iter ( [ ( removed_chitchat_id. node_id . clone ( ) , node) ] ) ;
634662
635663 let event = compute_cluster_change_events_on_removed (
636664 & cluster_id,
@@ -647,7 +675,7 @@ mod tests {
647675 assert_eq ! ( node. grpc_advertise_addr( ) , grpc_advertise_addr) ;
648676 assert ! ( !node. is_self_node( ) ) ;
649677 assert ! ( node. is_ready( ) ) ;
650- assert ! ( !previous_nodes. contains_key( & removed_chitchat_id) ) ;
678+ assert ! ( !previous_nodes. contains_key( & removed_chitchat_id. node_id ) ) ;
651679 }
652680 }
653681
@@ -683,7 +711,7 @@ mod tests {
683711 )
684712 . unwrap ( ) ;
685713 let mut previous_nodes =
686- BTreeMap :: from_iter ( [ ( self_chitchat_id. clone ( ) , previous_node) ] ) ;
714+ BTreeMap :: from_iter ( [ ( self_chitchat_id. node_id . clone ( ) , previous_node) ] ) ;
687715 let previous_node_states =
688716 BTreeMap :: from_iter ( [ ( self_chitchat_id. clone ( ) , previous_node_state) ] ) ;
689717
@@ -747,7 +775,7 @@ mod tests {
747775 )
748776 . unwrap ( ) ;
749777 let mut previous_nodes =
750- BTreeMap :: from_iter ( [ ( self_chitchat_id. clone ( ) , previous_node) ] ) ;
778+ BTreeMap :: from_iter ( [ ( self_chitchat_id. node_id . clone ( ) , previous_node) ] ) ;
751779 let previous_node_states =
752780 BTreeMap :: from_iter ( [ ( self_chitchat_id. clone ( ) , previous_node_state) ] ) ;
753781
0 commit comments