File tree Expand file tree Collapse file tree 2 files changed +16
-8
lines changed
aws-parallelcluster-computefleet/templates/compute_fleet_status
aws-parallelcluster-slurm/libraries Expand file tree Collapse file tree 2 files changed +16
-8
lines changed Original file line number Diff line number Diff line change 11#! /bin/bash
22
3- sinfo_output=$( < %= node[' cluster' ][' slurm' ][' install_dir' ] %> /bin/sinfo -h -o ' %N %t' | grep -v -E ' (idle|alloc|mix|maint)$' )
4- while IFS= read -r line; do
5- nodelist=$( echo " $line " | awk ' {print $1}' )
6- < %= node[' cluster' ][' slurm' ][' install_dir' ] %> /bin/scontrol show hostnames " $nodelist " | { grep -E ' ^[a-z0-9\-]+\-st\-[a-z0-9\-]+\-[0-9]+.*' || true ; }
7- done <<< " $sinfo_output"
3+
4+
5+ cluster_static_node_count=$1
6+ if [[ -z " $cluster_static_node_count " ]]; then
7+ cluster_static_node_count=1
8+ fi
9+
10+ if [[ " $cluster_static_node_count " -ge " 1" ]]; then
11+ sinfo_output=$( < %= node[' cluster' ][' slurm' ][' install_dir' ] %> /bin/sinfo -h -o ' %N %t' | grep -v -E ' (idle|alloc|mix|maint)$' )
12+ while IFS= read -r line; do
13+ nodelist=$( echo " $line " | awk ' {print $1}' )
14+ < %= node[' cluster' ][' slurm' ][' install_dir' ] %> /bin/scontrol show hostnames " $nodelist " | { grep -E ' ^[a-z0-9\-]+\-st\-[a-z0-9\-]+\-[0-9]+.*' || true ; }
15+ done <<< " $sinfo_output"
16+ fi
Original file line number Diff line number Diff line change @@ -213,15 +213,14 @@ def check_for_protected_mode(fleet_status_command) # rubocop:disable Lint/Nested
213213 # sinfo -h -o '%N %t'
214214 # queue-0-dy-compute-resource-g4dn-0-[1-10],queue-1-dy-compute-resource-g4dn-1-[1-10] idle~
215215 # queue-2-dy-compute-resource-g4dn-2-[1-10],queue-3-dy-compute-resource-g4dn-3-[1-10] idle
216- until shell_out! ( "/bin/bash -c /usr/local/bin/is_fleet_ready.sh" ) . stdout . strip . empty?
217- check_for_protected_mode ( fleet_status_command )
216+ until shell_out! ( "/bin/bash -c /usr/local/bin/is_fleet_ready.sh #{ get_static_node_count . to_i } " ) . stdout . strip . empty?
217+ check_for_protected_mode ( fleet_status_command ) #TODO Separate check for dynamic Nodes during dfsmv2
218218
219219 Chef ::Log . info ( "Waiting for static fleet capacity provisioning" )
220220 sleep ( 15 )
221221 end
222222 Chef ::Log . info ( "Static fleet capacity is ready" )
223223 end
224- only_if { get_static_node_count . to_i > 0 }
225224 end
226225end
227226
You can’t perform that action at this time.
0 commit comments