6666 AutoscalingDocker ,
6767 get_docker_client ,
6868)
69+ from simcore_service_autoscaling .utils .auto_scaling_core import (
70+ node_host_name_from_ec2_private_dns ,
71+ )
6972from simcore_service_autoscaling .utils .utils_docker import (
7073 _OSPARC_NODE_EMPTY_DATETIME_LABEL_KEY ,
7174 _OSPARC_NODE_TERMINATION_PROCESS_LABEL_KEY ,
@@ -2088,6 +2091,7 @@ async def test_warm_buffers_only_replace_hot_buffer_if_service_is_started_issue7
20882091 patch_ec2_client_launch_instances_min_number_of_instances : mock .Mock ,
20892092 minimal_configuration : None ,
20902093 with_instances_machines_hot_buffer : EnvVarsDict ,
2094+ with_drain_nodes_labelled : bool ,
20912095 ec2_client : EC2Client ,
20922096 initialized_app : FastAPI ,
20932097 app_settings : ApplicationSettings ,
@@ -2097,18 +2101,27 @@ async def test_warm_buffers_only_replace_hot_buffer_if_service_is_started_issue7
20972101 [int , InstanceTypeType , InstanceStateNameType , list [DockerGenericTag ] | None ],
20982102 Awaitable [list [str ]],
20992103 ],
2104+ create_services_batch : Callable [[_ScaleUpParams ], Awaitable [list [Service ]]],
2105+ hot_buffer_instance_type : InstanceTypeType ,
21002106 spied_cluster_analysis : MockType ,
21012107 instance_type_filters : Sequence [FilterTypeDef ],
21022108 stopped_instance_type_filters : Sequence [FilterTypeDef ],
21032109 mock_find_node_with_name_returns_fake_node : mock .Mock ,
21042110 mock_compute_node_used_resources : mock .Mock ,
21052111 mock_docker_tag_node : mock .Mock ,
2112+ mocker : MockerFixture ,
2113+ fake_node : Node ,
21062114):
21072115 # NOTE: https://github.com/ITISFoundation/osparc-simcore/issues/7071
21082116
2109- # pre-requisites
2117+ #
2118+ # PRE-requisites
2119+ #
21102120 assert app_settings .AUTOSCALING_EC2_INSTANCES
21112121 assert app_settings .AUTOSCALING_EC2_INSTANCES .EC2_INSTANCES_MACHINES_BUFFER > 0
2122+ num_hot_buffer = (
2123+ app_settings .AUTOSCALING_EC2_INSTANCES .EC2_INSTANCES_MACHINES_BUFFER
2124+ )
21122125
21132126 # we have nothing running now
21142127 all_instances = await ec2_client .describe_instances ()
@@ -2121,78 +2134,95 @@ async def test_warm_buffers_only_replace_hot_buffer_if_service_is_started_issue7
21212134 await assert_autoscaled_dynamic_ec2_instances (
21222135 ec2_client ,
21232136 expected_num_reservations = 1 ,
2124- expected_num_instances = app_settings .AUTOSCALING_EC2_INSTANCES .EC2_INSTANCES_MACHINES_BUFFER ,
2125- expected_instance_type = cast (
2126- InstanceTypeType ,
2127- next (
2128- iter (app_settings .AUTOSCALING_EC2_INSTANCES .EC2_INSTANCES_ALLOWED_TYPES )
2129- ),
2130- ),
2137+ expected_num_instances = num_hot_buffer ,
2138+ expected_instance_type = hot_buffer_instance_type ,
21312139 expected_instance_state = "running" ,
21322140 expected_additional_tag_keys = list (ec2_instance_custom_tags ),
21332141 instance_filters = instance_type_filters ,
21342142 )
2135- # calling again should attach the new nodes to the reserve, but nothing should start
2143+ # this brings a new analysis
21362144 await auto_scale_cluster (
21372145 app = initialized_app , auto_scaling_mode = DynamicAutoscaling ()
21382146 )
2147+ spied_cluster = assert_cluster_state (
2148+ spied_cluster_analysis , expected_calls = 2 , expected_num_machines = 5
2149+ )
2150+ # calling again should attach the new nodes to the reserve, but nothing should start
2151+ fake_attached_node_base = deepcopy (fake_node )
2152+ assert fake_attached_node_base .spec
2153+ fake_attached_node_base .spec .availability = (
2154+ Availability .active if with_drain_nodes_labelled else Availability .drain
2155+ )
2156+ assert fake_attached_node_base .spec .labels
2157+ assert app_settings .AUTOSCALING_NODES_MONITORING
2158+ expected_docker_node_tags = {
2159+ tag_key : "true"
2160+ for tag_key in (
2161+ app_settings .AUTOSCALING_NODES_MONITORING .NODES_MONITORING_NODE_LABELS
2162+ + app_settings .AUTOSCALING_NODES_MONITORING .NODES_MONITORING_NEW_NODES_LABELS
2163+ )
2164+ } | {
2165+ DOCKER_TASK_EC2_INSTANCE_TYPE_PLACEMENT_CONSTRAINT_KEY : f"{ hot_buffer_instance_type } "
2166+ }
2167+ fake_attached_node_base .spec .labels |= expected_docker_node_tags | {
2168+ _OSPARC_SERVICE_READY_LABEL_KEY : "true"
2169+ }
2170+ fake_hot_buffer_nodes = []
2171+ for i in range (num_hot_buffer ):
2172+ node = fake_attached_node_base .model_copy (deep = True )
2173+ assert node .description
2174+ node .description .hostname = node_host_name_from_ec2_private_dns (
2175+ spied_cluster .pending_ec2s [i ].ec2_instance
2176+ )
2177+ fake_hot_buffer_nodes .append (node )
2178+ auto_scaling_mode = DynamicAutoscaling ()
2179+ mocker .patch .object (
2180+ auto_scaling_mode ,
2181+ "get_monitored_nodes" ,
2182+ autospec = True ,
2183+ return_value = fake_hot_buffer_nodes ,
2184+ )
2185+ await auto_scale_cluster (app = initialized_app , auto_scaling_mode = auto_scaling_mode )
21392186 await assert_autoscaled_dynamic_ec2_instances (
21402187 ec2_client ,
21412188 expected_num_reservations = 1 ,
2142- expected_num_instances = app_settings .AUTOSCALING_EC2_INSTANCES .EC2_INSTANCES_MACHINES_BUFFER ,
2143- expected_instance_type = cast (
2144- InstanceTypeType ,
2145- next (
2146- iter (app_settings .AUTOSCALING_EC2_INSTANCES .EC2_INSTANCES_ALLOWED_TYPES )
2147- ),
2148- ),
2189+ expected_num_instances = num_hot_buffer ,
2190+ expected_instance_type = hot_buffer_instance_type ,
21492191 expected_instance_state = "running" ,
21502192 expected_additional_tag_keys = list (ec2_instance_custom_tags ),
21512193 instance_filters = instance_type_filters ,
21522194 )
2195+ spied_cluster = assert_cluster_state (
2196+ spied_cluster_analysis , expected_calls = 1 , expected_num_machines = 5
2197+ )
2198+ assert len (spied_cluster .buffer_drained_nodes ) == num_hot_buffer
2199+ assert not spied_cluster .buffer_ec2s
21532200
21542201 # have a few warm buffers ready with the same type as the hot buffer machines
2155- buffer_machines = await create_buffer_machines (
2202+ await create_buffer_machines (
21562203 buffer_count ,
2157- cast (
2158- InstanceTypeType ,
2159- next (
2160- iter (app_settings .AUTOSCALING_EC2_INSTANCES .EC2_INSTANCES_ALLOWED_TYPES )
2161- ),
2162- ),
2204+ hot_buffer_instance_type ,
21632205 "stopped" ,
21642206 None ,
21652207 )
21662208 await assert_autoscaled_dynamic_warm_pools_ec2_instances (
21672209 ec2_client ,
21682210 expected_num_reservations = 1 ,
21692211 expected_num_instances = buffer_count ,
2170- expected_instance_type = cast (
2171- InstanceTypeType ,
2172- next (
2173- iter (app_settings .AUTOSCALING_EC2_INSTANCES .EC2_INSTANCES_ALLOWED_TYPES )
2174- ),
2175- ),
2212+ expected_instance_type = hot_buffer_instance_type ,
21762213 expected_instance_state = "stopped" ,
21772214 expected_additional_tag_keys = list (ec2_instance_custom_tags ),
21782215 expected_pre_pulled_images = None ,
21792216 instance_filters = stopped_instance_type_filters ,
21802217 )
21812218
21822219 # calling again should do nothing
2183- await auto_scale_cluster (
2184- app = initialized_app , auto_scaling_mode = DynamicAutoscaling ()
2185- )
2220+ await auto_scale_cluster (app = initialized_app , auto_scaling_mode = auto_scaling_mode )
21862221 await assert_autoscaled_dynamic_ec2_instances (
21872222 ec2_client ,
21882223 expected_num_reservations = 1 ,
2189- expected_num_instances = app_settings .AUTOSCALING_EC2_INSTANCES .EC2_INSTANCES_MACHINES_BUFFER ,
2190- expected_instance_type = cast (
2191- InstanceTypeType ,
2192- next (
2193- iter (app_settings .AUTOSCALING_EC2_INSTANCES .EC2_INSTANCES_ALLOWED_TYPES )
2194- ),
2195- ),
2224+ expected_num_instances = num_hot_buffer ,
2225+ expected_instance_type = hot_buffer_instance_type ,
21962226 expected_instance_state = "running" ,
21972227 expected_additional_tag_keys = list (ec2_instance_custom_tags ),
21982228 instance_filters = instance_type_filters ,
@@ -2201,12 +2231,73 @@ async def test_warm_buffers_only_replace_hot_buffer_if_service_is_started_issue7
22012231 ec2_client ,
22022232 expected_num_reservations = 1 ,
22032233 expected_num_instances = buffer_count ,
2204- expected_instance_type = cast (
2205- InstanceTypeType ,
2206- next (
2207- iter (app_settings .AUTOSCALING_EC2_INSTANCES .EC2_INSTANCES_ALLOWED_TYPES )
2208- ),
2234+ expected_instance_type = hot_buffer_instance_type ,
2235+ expected_instance_state = "stopped" ,
2236+ expected_additional_tag_keys = list (ec2_instance_custom_tags ),
2237+ expected_pre_pulled_images = None ,
2238+ instance_filters = stopped_instance_type_filters ,
2239+ )
2240+ spied_cluster = assert_cluster_state (
2241+ spied_cluster_analysis , expected_calls = 1 , expected_num_machines = 5
2242+ )
2243+ assert len (spied_cluster .buffer_drained_nodes ) == num_hot_buffer
2244+ assert len (spied_cluster .buffer_ec2s ) == buffer_count
2245+
2246+ #
2247+ # BUG REPRODUCTION
2248+ #
2249+ # start a service that imposes same type as the hot buffer
2250+ assert (
2251+ hot_buffer_instance_type == "t2.xlarge"
2252+ ), "the test is hard-coded for this type and accordingly resource. If this changed then the resource shall be changed too"
2253+ scale_up_params = _ScaleUpParams (
2254+ imposed_instance_type = hot_buffer_instance_type ,
2255+ service_resources = Resources (
2256+ cpus = 2 , ram = TypeAdapter (ByteSize ).validate_python ("1Gib" )
22092257 ),
2258+ num_services = 1 ,
2259+ expected_instance_type = "t2.xlarge" ,
2260+ expected_num_instances = 1 ,
2261+ )
2262+ await create_services_batch (scale_up_params )
2263+ # this should trigger usage of the hot buffer and the warm buffers should remain stopped
2264+ await auto_scale_cluster (app = initialized_app , auto_scaling_mode = auto_scaling_mode )
2265+ await assert_autoscaled_dynamic_ec2_instances (
2266+ ec2_client ,
2267+ expected_num_reservations = 1 ,
2268+ expected_num_instances = num_hot_buffer ,
2269+ expected_instance_type = hot_buffer_instance_type ,
2270+ expected_instance_state = "running" ,
2271+ expected_additional_tag_keys = list (ec2_instance_custom_tags ),
2272+ instance_filters = instance_type_filters ,
2273+ )
2274+ await assert_autoscaled_dynamic_warm_pools_ec2_instances (
2275+ ec2_client ,
2276+ expected_num_reservations = 1 ,
2277+ expected_num_instances = buffer_count ,
2278+ expected_instance_type = hot_buffer_instance_type ,
2279+ expected_instance_state = "stopped" ,
2280+ expected_additional_tag_keys = list (ec2_instance_custom_tags ),
2281+ expected_pre_pulled_images = None ,
2282+ instance_filters = stopped_instance_type_filters ,
2283+ )
2284+
2285+ # this should trigger replacement TAG_PREFIX: staging-hotfix-github of the hot buffer by 1 warm buffer
2286+ await auto_scale_cluster (app = initialized_app , auto_scaling_mode = auto_scaling_mode )
2287+ await assert_autoscaled_dynamic_ec2_instances (
2288+ ec2_client ,
2289+ expected_num_reservations = 1 ,
2290+ expected_num_instances = num_hot_buffer + 1 ,
2291+ expected_instance_type = hot_buffer_instance_type ,
2292+ expected_instance_state = "running" ,
2293+ expected_additional_tag_keys = list (ec2_instance_custom_tags ),
2294+ instance_filters = instance_type_filters ,
2295+ )
2296+ await assert_autoscaled_dynamic_warm_pools_ec2_instances (
2297+ ec2_client ,
2298+ expected_num_reservations = 1 ,
2299+ expected_num_instances = buffer_count - 1 ,
2300+ expected_instance_type = hot_buffer_instance_type ,
22102301 expected_instance_state = "stopped" ,
22112302 expected_additional_tag_keys = list (ec2_instance_custom_tags ),
22122303 expected_pre_pulled_images = None ,
0 commit comments