@@ -438,6 +438,61 @@ def test_decommissioning_working_node(
438438 if not delete_topic :
439439 self .verify ()
440440
441+ @cluster (num_nodes = 6 )
442+ def test_decommission_status (self ):
443+ self .start_redpanda ()
444+ self ._create_topics (replication_factors = [3 ])
445+ self .start_producer ()
446+ to_decommission = random .choice (self .redpanda .nodes )
447+ to_decommission_id = self .redpanda .node_id (to_decommission )
448+ self .logger .info (
449+ f"decommissioning node: { to_decommission_id } " ,
450+ )
451+ # Block recovery to ensure decommissioning takes some time
452+ # and we see some status in the middle of decommissioning
453+ self ._set_recovery_rate (0 )
454+ self ._decommission (to_decommission_id )
455+
456+ def validate_decommission_status ():
457+ def check_decommission_status (rpc_node : ClusterNode ):
458+ status = self .admin .get_decommission_status (
459+ id = to_decommission_id , node = rpc_node
460+ )
461+ finished = status ["finished" ]
462+ replicas_left = status ["replicas_left" ]
463+ partition_meta = []
464+ for p in status .get ("partitions" , []):
465+ if p ["topic" ] != self ._topic :
466+ # We are only producing data to self._topic
467+ continue
468+ has_valid_meta = (
469+ p ["partition_size" ] > 0 and p ["bytes_left_to_move" ] > 0
470+ )
471+ if not has_valid_meta :
472+ self .logger .debug (
473+ f"partition { p } is not reporting valid metadata"
474+ )
475+ partition_meta .append (has_valid_meta )
476+ return (
477+ not finished
478+ and replicas_left > 0
479+ and partition_meta
480+ and all (partition_meta )
481+ )
482+
483+ return all (check_decommission_status (n ) for n in self .redpanda .nodes )
484+
485+ self .redpanda .wait_until (
486+ validate_decommission_status ,
487+ timeout_sec = 30 ,
488+ backoff_sec = 1 ,
489+ err_msg = "Decommission status not reported as in_progress on all nodes" ,
490+ retry_on_exc = True ,
491+ )
492+ self ._set_recovery_rate (2 << 30 )
493+
494+ self ._wait_for_node_removed (to_decommission_id )
495+
441496 @cluster (num_nodes = 6 , log_allow_list = CHAOS_LOG_ALLOW_LIST )
442497 def test_decommissioning_node_rf_1_replica (self ):
443498 self .start_redpanda ()
0 commit comments