88import re
99from typing import Final
1010
11- import aiodocker
1211from models_library .docker import DockerLabelKey
1312from models_library .generated_models .docker_rest_api import (
1413 Node ,
2423from tenacity .stop import stop_after_delay
2524from tenacity .wait import wait_fixed
2625
27- from .models import Resources
26+ from ..models import Resources
27+ from ..modules .docker import AutoscalingDocker
2828
2929logger = logging .getLogger (__name__ )
3030_NANO_CPU : Final [float ] = 10 ** 9
4040_TIMEOUT_WAITING_FOR_NODES_S : Final [int ] = 5 * _MINUTE
4141
4242
43- async def get_monitored_nodes (node_labels : list [DockerLabelKey ]) -> list [Node ]:
44- async with aiodocker .Docker () as docker :
45- nodes = parse_obj_as (
46- list [Node ],
47- await docker .nodes .list (
48- filters = {"node.label" : [f"{ label } =true" for label in node_labels ]}
49- ),
50- )
43+ async def get_monitored_nodes (
44+ docker_client : AutoscalingDocker , node_labels : list [DockerLabelKey ]
45+ ) -> list [Node ]:
46+ nodes = parse_obj_as (
47+ list [Node ],
48+ await docker_client .nodes .list (
49+ filters = {"node.label" : [f"{ label } =true" for label in node_labels ]}
50+ ),
51+ )
5152 return nodes
5253
5354
54- async def remove_monitored_down_nodes (nodes : list [Node ]) -> list [Node ]:
55+ async def remove_monitored_down_nodes (
56+ docker_client : AutoscalingDocker , nodes : list [Node ]
57+ ) -> list [Node ]:
5558 """removes docker nodes that are in the down state"""
5659
5760 def _check_if_node_is_removable (node : Node ) -> bool :
@@ -69,15 +72,15 @@ def _check_if_node_is_removable(node: Node) -> bool:
6972 return False
7073
7174 nodes_that_need_removal = [n for n in nodes if _check_if_node_is_removable (n )]
72- async with aiodocker .Docker () as docker :
73- for node in nodes_that_need_removal :
74- assert node .ID # nosec
75- with log_context (logger , logging .INFO , msg = f"remove { node .ID = } " ):
76- await docker .nodes .remove (node_id = node .ID )
75+ for node in nodes_that_need_removal :
76+ assert node .ID # nosec
77+ with log_context (logger , logging .INFO , msg = f"remove { node .ID = } " ):
78+ await docker_client .nodes .remove (node_id = node .ID )
7779 return nodes_that_need_removal
7880
7981
8082async def pending_service_tasks_with_insufficient_resources (
83+ docker_client : AutoscalingDocker ,
8184 service_labels : list [DockerLabelKey ],
8285) -> list [Task ]:
8386 """
@@ -88,16 +91,15 @@ async def pending_service_tasks_with_insufficient_resources(
8891 - have an error message with "insufficient resources"
8992 - are not scheduled on any node
9093 """
91- async with aiodocker .Docker () as docker :
92- tasks = parse_obj_as (
93- list [Task ],
94- await docker .tasks .list (
95- filters = {
96- "desired-state" : "running" ,
97- "label" : service_labels ,
98- }
99- ),
100- )
94+ tasks = parse_obj_as (
95+ list [Task ],
96+ await docker_client .tasks .list (
97+ filters = {
98+ "desired-state" : "running" ,
99+ "label" : service_labels ,
100+ }
101+ ),
102+ )
101103
102104 def _is_task_waiting_for_resources (task : Task ) -> bool :
103105 # NOTE: https://docs.docker.com/engine/swarm/how-swarm-mode-works/swarm-task-states/
@@ -171,36 +173,38 @@ def get_max_resources_from_docker_task(task: Task) -> Resources:
171173 return Resources (cpus = 0 , ram = ByteSize (0 ))
172174
173175
174- async def compute_node_used_resources (node : Node ) -> Resources :
176+ async def compute_node_used_resources (
177+ docker_client : AutoscalingDocker ,
178+ node : Node ,
179+ ) -> Resources :
175180 cluster_resources_counter = collections .Counter ({"ram" : 0 , "cpus" : 0 })
176- async with aiodocker .Docker () as docker :
177- all_tasks_on_node = parse_obj_as (
178- list [Task ], await docker .tasks .list (filters = {"node" : node .ID })
179- )
180- for task in all_tasks_on_node :
181- assert task .Status # nosec
182- if (
183- task .Status .State in _TASK_STATUS_WITH_ASSIGNED_RESOURCES
184- and task .Spec
185- and task .Spec .Resources
186- and task .Spec .Resources .Reservations
187- ):
188- task_reservations = task .Spec .Resources .Reservations .dict (
189- exclude_none = True
190- )
191- cluster_resources_counter .update (
192- {
193- "ram" : task_reservations .get ("MemoryBytes" , 0 ),
194- "cpus" : task_reservations .get ("NanoCPUs" , 0 ) / _NANO_CPU ,
195- }
196- )
181+ all_tasks_on_node = parse_obj_as (
182+ list [Task ], await docker_client .tasks .list (filters = {"node" : node .ID })
183+ )
184+ for task in all_tasks_on_node :
185+ assert task .Status # nosec
186+ if (
187+ task .Status .State in _TASK_STATUS_WITH_ASSIGNED_RESOURCES
188+ and task .Spec
189+ and task .Spec .Resources
190+ and task .Spec .Resources .Reservations
191+ ):
192+ task_reservations = task .Spec .Resources .Reservations .dict (exclude_none = True )
193+ cluster_resources_counter .update (
194+ {
195+ "ram" : task_reservations .get ("MemoryBytes" , 0 ),
196+ "cpus" : task_reservations .get ("NanoCPUs" , 0 ) / _NANO_CPU ,
197+ }
198+ )
197199 return Resources .parse_obj (dict (cluster_resources_counter ))
198200
199201
200- async def compute_cluster_used_resources (nodes : list [Node ]) -> Resources :
202+ async def compute_cluster_used_resources (
203+ docker_client : AutoscalingDocker , nodes : list [Node ]
204+ ) -> Resources :
201205 """Returns the total amount of resources (reservations) used on each of the given nodes"""
202206 list_of_used_resources = await logged_gather (
203- * (compute_node_used_resources (node ) for node in nodes )
207+ * (compute_node_used_resources (docker_client , node ) for node in nodes )
204208 )
205209 counter = collections .Counter ({k : 0 for k in Resources .__fields__ .keys ()})
206210 for result in list_of_used_resources :
@@ -243,32 +247,37 @@ async def get_docker_swarm_join_bash_command() -> str:
243247 before_sleep = before_sleep_log (logger , logging .WARNING ),
244248 wait = wait_fixed (5 ),
245249)
246- async def wait_for_node (node_name : str ) -> Node :
247- async with aiodocker .Docker () as docker :
248- list_of_nodes = await docker .nodes .list (filters = {"name" : node_name })
250+ async def wait_for_node (
251+ docker_client : AutoscalingDocker ,
252+ node_name : str ,
253+ ) -> Node :
254+ list_of_nodes = await docker_client .nodes .list (filters = {"name" : node_name })
249255 if not list_of_nodes :
250256 raise TryAgain
251257 return parse_obj_as (Node , list_of_nodes [0 ])
252258
253259
254260async def tag_node (
255- node : Node , * , tags : dict [DockerLabelKey , str ], available : bool
261+ docker_client : AutoscalingDocker ,
262+ node : Node ,
263+ * ,
264+ tags : dict [DockerLabelKey , str ],
265+ available : bool ,
256266) -> None :
257267 with log_context (
258268 logger , logging .DEBUG , msg = f"tagging { node .ID = } with { tags = } and { available = } "
259269 ):
260- async with aiodocker .Docker () as docker :
261- assert node .ID # nosec
262- assert node .Version # nosec
263- assert node .Version .Index # nosec
264- assert node .Spec # nosec
265- assert node .Spec .Role # nosec
266- await docker .nodes .update (
267- node_id = node .ID ,
268- version = node .Version .Index ,
269- spec = {
270- "Availability" : "active" if available else "drain" ,
271- "Labels" : tags ,
272- "Role" : node .Spec .Role .value ,
273- },
274- )
270+ assert node .ID # nosec
271+ assert node .Version # nosec
272+ assert node .Version .Index # nosec
273+ assert node .Spec # nosec
274+ assert node .Spec .Role # nosec
275+ await docker_client .nodes .update (
276+ node_id = node .ID ,
277+ version = node .Version .Index ,
278+ spec = {
279+ "Availability" : "active" if available else "drain" ,
280+ "Labels" : tags ,
281+ "Role" : node .Spec .Role .value ,
282+ },
283+ )
0 commit comments