@@ -569,23 +569,45 @@ def revive_osd(self, osd=None, skip_admin_check=False):
569569
570570 def out_host (self , host = None ):
571571 """
572- Make all osds on a host out
572+ Make all OSDs on a host out if the host has more than min_in OSDs.
573573 :param host: Host to be marked.
574574 """
575- # check that all osd remotes have a valid console
575+ # Check that all OSD remotes have a valid console
576576 osds = self .ceph_manager .ctx .cluster .only (teuthology .is_type ('osd' , self .ceph_manager .cluster ))
577- if host is None :
578- host = random .choice (list (osds .remotes .keys ()))
579- self .log ("Removing all osds in host %s" % (host ,))
580-
581- for role in osds .remotes [host ]:
582- if not role .startswith ("osd." ):
583- continue
584- osdid = int (role .split ('.' )[1 ])
585- if self .in_osds .count (osdid ) == 0 :
586- continue
587- self .out_osd (osdid )
577+ all_hosts = list (osds .remotes .keys ())
578+ min_in = self .minin
579+
580+ if host is not None :
581+ all_hosts = [host ] if host in all_hosts else []
582+
583+ random .shuffle (all_hosts ) # Shuffle the list to pick hosts randomly
584+
585+ for host in all_hosts :
586+ self .log ("Checking the number of in OSDs in host %s" % (host ,))
587+
588+ # Count the number of in OSDs in the host
589+ in_host_osd_count = 0
590+ for role in osds .remotes [host ]:
591+ if role .startswith ("osd." ):
592+ osdid = int (role .split ('.' )[1 ])
593+ if osdid in self .in_osds :
594+ in_host_osd_count += 1
595+
596+ # Check taking out that host will cause the number
597+ # of in OSDs to be less than min_in
598+ if len (self .in_osds ) - in_host_osd_count >= min_in :
599+ self .log ("Removing all OSDs in host %s" % (host ,))
600+ # Proceed to take out OSDs
601+ for role in osds .remotes [host ]:
602+ if role .startswith ("osd." ):
603+ osdid = int (role .split ('.' )[1 ])
604+ if osdid in self .in_osds :
605+ self .out_osd (osdid )
606+ return
607+ else :
608+ self .log ("Host %s can't be trashed as it will left %d OSDs in" % (host , len (self .in_osds ) - in_host_osd_count ))
588609
610+ self .log ("No suitable host found to thrash" )
589611
590612 def out_osd (self , osd = None ):
591613 """
@@ -1254,7 +1276,6 @@ def choose_action(self):
12541276 (minin , minout , minlive , mindead , chance_down ))
12551277 actions = []
12561278 if thrash_hosts :
1257- self .log ("check thrash_hosts" )
12581279 if len (self .in_osds ) > minin :
12591280 self .log ("check thrash_hosts: in_osds > minin" )
12601281 actions .append ((self .out_host , 1.0 ,))
0 commit comments