Skip to content

Commit 0eaec4c

Browse files
authored
Merge branch 'develop' into develop
2 parents 2b67150 + 01181cb commit 0eaec4c

File tree

4 files changed

+10
-4
lines changed

4 files changed

+10
-4
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ This file is used to list changes made in each version of the AWS ParallelCluste
1616
- Upgrade DCV to version 2024.0-19030.
1717
- Remove `berkshelf`. All cookbooks are local and do not need `berkshelf` dependency management.
1818

19+
**BUG FIXES**
20+
- Fix a race condition in CloudWatch Agent startup that could cause nodes bootstrap failures.
21+
1922
3.13.2
2023
------
2124

cookbooks/aws-parallelcluster-environment/files/cloudwatch/write_cloudwatch_agent_json.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from cloudwatch_agent_common_utils import render_jinja_template
1515

16-
AWS_CLOUDWATCH_CFG_PATH = "/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json"
16+
AWS_CLOUDWATCH_CFG_PATH = "/etc/parallelcluster/amazon-cloudwatch-agent/amazon-cloudwatch-agent.json"
1717
DEFAULT_METRICS_COLLECTION_INTERVAL = 60
1818

1919

@@ -45,6 +45,7 @@ def gethostname():
4545

4646
def write_config(config):
4747
"""Write config to AWS_CLOUDWATCH_CFG_PATH."""
48+
os.makedirs(os.path.dirname(AWS_CLOUDWATCH_CFG_PATH), exist_ok=True)
4849
with open(AWS_CLOUDWATCH_CFG_PATH, "w+", encoding="utf-8") as output_config_file:
4950
json.dump(config, output_config_file, indent=4)
5051

cookbooks/aws-parallelcluster-environment/resources/cloudwatch/partial/_cloudwatch_common.rb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,8 @@ def package_path
163163
command "#{cookbook_virtualenv_path}/bin/python #{validator_script_path}"
164164
end unless redhat_on_docker?
165165

166+
CW_AGENT_CONFIG_JSON = '/etc/parallelcluster/amazon-cloudwatch-agent/amazon-cloudwatch-agent.json'
167+
166168
execute "cloudwatch-config-creation" do
167169
user 'root'
168170
timeout 300
@@ -182,6 +184,6 @@ def package_path
182184
execute "cloudwatch-agent-start" do
183185
user 'root'
184186
timeout 300
185-
command "/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a append-config -m ec2 -c file:/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json -s || /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json -s"
187+
command "/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a append-config -m ec2 -c file:#{CW_AGENT_CONFIG_JSON} -s || /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:#{CW_AGENT_CONFIG_JSON} -s"
186188
end unless node['cluster']['cw_logging_enabled'] != 'true' || on_docker?
187189
end

cookbooks/aws-parallelcluster-environment/spec/unit/resources/cloudwatch_spec.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ def self.configure(chef_run)
255255
is_expected.to run_execute("cloudwatch-agent-start").with(
256256
user: 'root',
257257
timeout: 300,
258-
command: "/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a append-config -m ec2 -c file:/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json -s || /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json -s"
258+
command: "/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a append-config -m ec2 -c file:/etc/parallelcluster/amazon-cloudwatch-agent/amazon-cloudwatch-agent.json -s || /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:/etc/parallelcluster/amazon-cloudwatch-agent/amazon-cloudwatch-agent.json -s"
259259
)
260260
end
261261
end
@@ -313,7 +313,7 @@ def self.configure(chef_run)
313313
is_expected.to run_execute("cloudwatch-agent-start").with(
314314
user: 'root',
315315
timeout: 300,
316-
command: "/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a append-config -m ec2 -c file:/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json -s || /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json -s"
316+
command: "/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a append-config -m ec2 -c file:/etc/parallelcluster/amazon-cloudwatch-agent/amazon-cloudwatch-agent.json -s || /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:/etc/parallelcluster/amazon-cloudwatch-agent/amazon-cloudwatch-agent.json -s"
317317
)
318318
end
319319
end

0 commit comments

Comments
 (0)