Skip to content

Commit 90ec5ea

Browse files
author
Himani Anil Deshpande
committed
Add TOTAL_MIN_COUNT of a cluster as comment
* Run static fleet checks if there are any static nodes
1 parent e5e4c1f commit 90ec5ea

File tree

2 files changed

+11
-0
lines changed

2 files changed

+11
-0
lines changed

cookbooks/aws-parallelcluster-slurm/files/default/head_node_slurm/slurm/templates/slurm_parallelcluster.conf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,20 @@ include {{ output_dir }}/pcluster/slurm_parallelcluster_{{ queue.Name }}_partiti
3636
{% endfor %}
3737

3838
{% if ns.has_static %}
39+
{%- set ns.total_min_count = 0 %}
3940
SuspendExcNodes=
4041
{%- set ns.is_first = True %}
4142
{%- for queue in queues %}
4243
{% for compute_resource in queue.ComputeResources %}
4344
{% if compute_resource.MinCount > 0 %}
4445
{{- "," if not ns.is_first else "" -}}
4546
{{ queue.Name }}-st-{{ compute_resource.Name }}-[1-{{ compute_resource.MinCount }}]
47+
{%- set ns.total_min_count = ns.total_min_count + compute_resource.MinCount %}
4648
{%- set ns.is_first = False %}
4749
{%- endif %}
4850
{% endfor %}
4951
{% endfor %}
5052

5153
{% endif %}
54+
55+
#TOTAL_MIN_COUNT={{ ns.total_min_count }}

cookbooks/aws-parallelcluster-slurm/libraries/helpers.rb

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,11 @@ def wait_cluster_ready
178178
end
179179
end
180180

181+
def get_static_node_count
182+
cmd = Mixlib::ShellOut.new("cat #{node['cluster']['slurm']['install_dir']}/etc/slurm_parallelcluster.conf | grep -o '#TOTAL_MIN_COUNT=\([0-9]*\)' | cut -d'=' -f2")
183+
cmd.run_command.stdout.strip
184+
end
185+
181186
def wait_static_fleet_running
182187
ruby_block "wait for static fleet capacity" do
183188
block do
@@ -203,6 +208,7 @@ def check_for_protected_mode(fleet_status_command) # rubocop:disable Lint/Nested
203208
fleet_status_command = Shellwords.escape(
204209
"/usr/local/bin/get-compute-fleet-status.sh"
205210
)
211+
206212
# Example output for sinfo
207213
# sinfo -h -o '%N %t'
208214
# queue-0-dy-compute-resource-g4dn-0-[1-10],queue-1-dy-compute-resource-g4dn-1-[1-10] idle~
@@ -215,6 +221,7 @@ def check_for_protected_mode(fleet_status_command) # rubocop:disable Lint/Nested
215221
end
216222
Chef::Log.info("Static fleet capacity is ready")
217223
end
224+
only_if { get_static_node_count.to_i > 0 }
218225
end
219226
end
220227

0 commit comments

Comments
 (0)