@@ -178,6 +178,21 @@ def wait_cluster_ready
178178 end
179179end
180180
181+ def get_static_node_count
182+ require 'yaml'
183+ cluster_config = YAML . safe_load ( File . read ( node [ 'cluster' ] [ 'cluster_config_path' ] ) )
184+ total_min_count = 0
185+ slurm_queues_section = cluster_config . dig ( "Scheduling" , "SlurmQueues" )
186+ if slurm_queues_section
187+ slurm_queues_section . each do |queue_config |
188+ queue_config [ 'ComputeResources' ] . each do |compute_resource_config |
189+ total_min_count += compute_resource_config [ 'MinCount' ] . to_i
190+ end
191+ end
192+ end
193+ total_min_count
194+ end
195+
181196def wait_static_fleet_running
182197 ruby_block "wait for static fleet capacity" do
183198 block do
@@ -203,15 +218,21 @@ def check_for_protected_mode(fleet_status_command) # rubocop:disable Lint/Nested
203218 fleet_status_command = Shellwords . escape (
204219 "/usr/local/bin/get-compute-fleet-status.sh"
205220 )
221+
222+ total_static_node_count = get_static_node_count
223+ Chef ::Log . info ( "Count of cluster static nodes is #{ total_static_node_count } " )
224+
206225 # Example output for sinfo
207226 # sinfo -h -o '%N %t'
208227 # queue-0-dy-compute-resource-g4dn-0-[1-10],queue-1-dy-compute-resource-g4dn-1-[1-10] idle~
209228 # queue-2-dy-compute-resource-g4dn-2-[1-10],queue-3-dy-compute-resource-g4dn-3-[1-10] idle
210- until shell_out! ( "/bin/bash -c /usr/local/bin/is_fleet_ready.sh" ) . stdout . strip . empty?
211- check_for_protected_mode ( fleet_status_command )
229+ if total_static_node_count . to_i > 0
230+ until shell_out! ( "/bin/bash -c /usr/local/bin/is_fleet_ready.sh" ) . stdout . strip . empty?
231+ check_for_protected_mode ( fleet_status_command )
212232
213- Chef ::Log . info ( "Waiting for static fleet capacity provisioning" )
214- sleep ( 15 )
233+ Chef ::Log . info ( "Waiting for static fleet capacity provisioning" )
234+ sleep ( 15 )
235+ end
215236 end
216237 Chef ::Log . info ( "Static fleet capacity is ready" )
217238 end
0 commit comments