@@ -250,7 +250,7 @@ def test_slurm_from_login_nodes_in_private_network(
250250@pytest .mark .usefixtures ("region" , "os" , "instance" , "scheduler" )
251251@pytest .mark .slurm_scaling
252252def test_slurm_scaling (
253- scheduler , region , instance , pcluster_config_reader , clusters_factory , test_datadir , scheduler_commands_factory
253+ scheduler , region , os , instance , pcluster_config_reader , clusters_factory , test_datadir , scheduler_commands_factory
254254):
255255 """Test that slurm-specific scaling logic is behaving as expected for normal actions and failures."""
256256 cluster_config = pcluster_config_reader (scaledown_idletime = 3 )
@@ -291,6 +291,7 @@ def test_slurm_scaling(
291291 test_datadir ,
292292 cluster .cfn_name ,
293293 region ,
294+ os ,
294295 partition = "ondemand1" ,
295296 num_static_nodes = 2 ,
296297 num_dynamic_nodes = 3 ,
@@ -1171,6 +1172,7 @@ def _test_replace_down_nodes(
11711172 test_datadir ,
11721173 cluster_name ,
11731174 region ,
1175+ os ,
11741176 partition ,
11751177 num_static_nodes ,
11761178 num_dynamic_nodes ,
@@ -1194,7 +1196,10 @@ def _test_replace_down_nodes(
11941196 remote_command_executor .run_remote_script (str (test_datadir / "slurm_kill_slurmd_job.sh" ), args = [node ])
11951197 # set dynamic to down manually
11961198 _set_nodes_to_down_manually (scheduler_commands , dynamic_nodes )
1197- _wait_for_node_reset (scheduler_commands , static_nodes , dynamic_nodes )
1199+ # TOFIX We observe in 3.13.0 an increase in the bootstrap time for Rocky and RHEL.
1200+ # We must address it and restore the default wait time to 300s.
1201+ stop_max_delay_secs = 360 if os .starts_with ("rocky" ) else 300
1202+ _wait_for_node_reset (scheduler_commands , static_nodes , dynamic_nodes , stop_max_delay_secs = stop_max_delay_secs )
11981203 assert_num_instances_in_cluster (cluster_name , region , len (static_nodes ))
11991204
12001205
0 commit comments