Skip to content

Commit c68e181

Browse files
committed
Use checkpoint system
1 parent 15390a0 commit c68e181

File tree

4 files changed

+22
-3
lines changed

4 files changed

+22
-3
lines changed

cookbooks/aws-parallelcluster-computefleet/recipes/config/config_check_update_systemd_service.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,14 @@
3838
action :create_if_missing
3939
end
4040

41+
file node['cluster']['update_checkpoint'] do
42+
content ''
43+
owner 'root'
44+
group 'root'
45+
mode '0644'
46+
action :create_if_missing
47+
end
48+
4149
service 'check-update.timer' do
4250
action [:enable, :start]
4351
end

cookbooks/aws-parallelcluster-computefleet/templates/check_update/check-update.service.erb

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,18 @@ Description=Check for recent file modifications
33

44
[Service]
55
Type=oneshot
6-
ExecStart=/bin/bash -c "echo 'Checking file: <%= node['cluster']['shared_update_path'] %>'; if [ $(( $(date +%%s) - $(stat -c %%Y <%= node['cluster']['shared_update_path'] %>) )) -lt 80 ]; then echo 'File modified recently, running action'; <%= node['cluster']['scripts_dir'] %>/cfn-hup-update-action.sh; else echo 'File not modified recently'; fi"
7-
6+
ExecStart=/bin/bash -c '\
7+
SHARED_FILE="<%= node['cluster']['shared_update_path'] %>"; \
8+
LOCAL_CHECKPOINT="<%= node['cluster']['update_checkpoint'] %>"; \
9+
\
10+
[ ! -f "$SHARED_FILE" ] && exit 0; \
11+
\
12+
CURRENT_UPDATE=$(cat "$SHARED_FILE"); \
13+
LAST_APPLIED=$([ -f "$LOCAL_CHECKPOINT" ] && cat "$LOCAL_CHECKPOINT" || echo ""); \
14+
\
15+
if [ "$CURRENT_UPDATE" != "$LAST_APPLIED" ]; then \
16+
echo "$CURRENT_UPDATE" > "$LOCAL_CHECKPOINT" && <%= node['cluster']['scripts_dir'] %>/cfn-hup-update-action.sh; \
17+
fi'
818

919
[Install]
1020
WantedBy=multi-user.target

cookbooks/aws-parallelcluster-environment/templates/cfn_hup_configuration/cfn-hook-update.conf.erb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@ triggers=post.update
33
<% case node['cluster']['node_type'] -%>
44
<% when 'HeadNode', 'LoginNode' -%>
55
path=Resources.<%= @launch_template_resource_id %>.Metadata.AWS::CloudFormation::Init
6-
action=<% if node['cluster']['node_type'] == 'HeadNode' -%>echo "Update triggered at $(date) by <%= node['cluster']['node_type'] %>" >> <%= @update_dir %>; <% end -%>PATH=/usr/local/bin:/bin:/usr/bin:/opt/aws/bin;. /etc/parallelcluster/pcluster_cookbook_environment.sh; $CFN_BOOTSTRAP_VIRTUALENV_PATH/cfn-init -v --stack <%= @stack_id %> --resource <%= @launch_template_resource_id %> --configsets update --region <%= @region %> --url <%= @cloudformation_url %> --role <%= @cfn_init_role %>
6+
action=<% if node['cluster']['node_type'] == 'HeadNode' -%>echo "Update at $(date)" >> <%= @update_dir %>; <% end -%>PATH=/usr/local/bin:/bin:/usr/bin:/opt/aws/bin;. /etc/parallelcluster/pcluster_cookbook_environment.sh; $CFN_BOOTSTRAP_VIRTUALENV_PATH/cfn-init -v --stack <%= @stack_id %> --resource <%= @launch_template_resource_id %> --configsets update --region <%= @region %> --url <%= @cloudformation_url %> --role <%= @cfn_init_role %>
77
<% end %>
88
runas=root

cookbooks/aws-parallelcluster-shared/attributes/cluster.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
# Shared file used to manage inplace updates
1313
default['cluster']['shared_update_path'] = "#{node['cluster']['shared_dir']}/check_update"
14+
default['cluster']['update_checkpoint'] = "#{node['cluster']['scripts_dir']}/update_checkpoint"
1415

1516
# Slurm_plugin_dir is used by slurm cookbook and custom_actions recipe
1617
default['cluster']['slurm_plugin_dir'] = "#{node['cluster']['etc_dir']}/slurm_plugin"

0 commit comments

Comments
 (0)