@@ -74,6 +74,7 @@ class KubernetesClusterConnector(ClusterConnector):
7474 _unschedulable_pods : List [KubernetesPod ]
7575 _excluded_pods_by_ip : Mapping [str , List [KubernetesPod ]]
7676 _pods_by_ip : Mapping [str , List [KubernetesPod ]]
77+ _label_selectors : List [str ]
7778
7879 def __init__ (self , cluster : str , pool : Optional [str ], init_crd : bool = False ) -> None :
7980 super ().__init__ (cluster , pool )
@@ -84,6 +85,14 @@ def __init__(self, cluster: str, pool: Optional[str], init_crd: bool = False) ->
8485 )
8586 self ._nodes_by_ip = {}
8687 self ._init_crd_client = init_crd
88+ self ._label_selectors = []
89+ if self .pool :
90+ # TODO(CLUSTERMAN-659): Switch to using just pool_label_key once the new node labels are applied everywhere
91+ node_label_selector = self .pool_config .read_string (
92+ "node_label_key" ,
93+ default = self .pool_config .read_string ("pool_label_key" , default = "clusterman.com/pool" ),
94+ )
95+ self ._label_selectors .append (f"{ node_label_selector } ={ self .pool } " )
8796
8897 def reload_state (self ) -> None :
8998 logger .info ("Reloading nodes" )
@@ -113,6 +122,16 @@ def reload_client(self) -> None:
113122 else None
114123 )
115124
125+ def set_label_selectors (self , label_selectors : List [str ], add_to_existing : bool = False ) -> None :
126+ """Set label selectors for node listing purposes
127+
128+ :param List[str] label_selectors: list of selectors (joined with logic and)
129+ :param bool add_to_existing: if set add to existing selectors rather than replacing
130+ """
131+ self ._label_selectors = sorted (
132+ (set (self ._label_selectors ) | set (label_selectors )) if add_to_existing else set (label_selectors )
133+ )
134+
116135 def get_num_removed_nodes_before_last_reload (self ) -> int :
117136 previous_nodes = self ._prev_nodes_by_ip
118137 current_nodes = self ._nodes_by_ip
@@ -270,6 +289,15 @@ def create_node_migration_resource(
270289 except Exception as e :
271290 logger .error (f"Failed creating migration event resource: { e } " )
272291
292+ def has_enough_capacity_for_pods (self ) -> bool :
293+ """Checks whether there are unschedulable pods due to insufficient resources
294+
295+ :return: True if no unschedulable pods are due to resource constraints
296+ """
297+ return not any (
298+ reason == PodUnschedulableReason .InsufficientResources for _ , reason in self .get_unschedulable_pods ()
299+ )
300+
273301 def _evict_or_delete_pods (self , node_name : str , pods : List [KubernetesPod ], disable_eviction : bool ) -> bool :
274302 all_done = True
275303 action_name = "deleted" if disable_eviction else "evicted"
@@ -403,12 +431,8 @@ def _is_node_safe_to_kill(self, node_ip: str) -> bool:
403431 return True
404432
405433 def _get_nodes_by_ip (self ) -> Mapping [str , KubernetesNode ]:
406- # TODO(CLUSTERMAN-659): Switch to using just pool_label_key once the new node labels are applied everywhere
407- node_label_selector = self .pool_config .read_string (
408- "node_label_key" , default = self .pool_config .read_string ("pool_label_key" , default = "clusterman.com/pool" )
409- )
410- label_selector = f"{ node_label_selector } ={ self .pool } "
411- pool_nodes = self ._core_api .list_node (label_selector = label_selector ).items
434+ kwargs = {"label_selector" : "," .join (self ._label_selectors )} if self ._label_selectors else {}
435+ pool_nodes = self ._core_api .list_node (** kwargs ).items
412436 return {get_node_ip (node ): node for node in pool_nodes }
413437
414438 def _get_pods_info (
0 commit comments