Skip to content

Commit dacd055

Browse files
author
Himani Anil Deshpande
committed
Change permission for cfn-hup files and directories for only root user access
* Add Proxy if being used.
1 parent 2ae4854 commit dacd055

File tree

9 files changed

+122
-57
lines changed

9 files changed

+122
-57
lines changed
Lines changed: 77 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
1+
# Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License").
44
# You may not use this file except in compliance with the
@@ -11,49 +11,88 @@
1111
# limitations under the License.
1212

1313

14-
14+
import configparser
1515
import argparse
1616
from email import message_from_string
1717
import json
1818
import os
1919
import boto3
20+
from botocore.config import Config
2021
import yaml
2122
import base64
2223
import logging
2324
from retrying import retry
2425

25-
SHARED_LOCATION = "/opt/parallelcluster/"
26-
27-
COMPUTE_FLEET_SHARED_LOCATION = SHARED_LOCATION + 'shared/'
26+
COMPUTE_FLEET_SHARED_LOCATION = "/opt/parallelcluster/shared/"
2827

29-
COMPUTE_FLEET_DNA_LOC = COMPUTE_FLEET_SHARED_LOCATION + 'dna/'
28+
COMPUTE_FLEET_SHARED_DNA_LOCATION = COMPUTE_FLEET_SHARED_LOCATION + 'dna/'
3029

31-
COMPUTE_FLEET_LAUNCH_TEMPLATE_ID = COMPUTE_FLEET_SHARED_LOCATION + 'launch-templates-config.json'
30+
COMPUTE_FLEET_LAUNCH_TEMPLATE_CONFIG = COMPUTE_FLEET_SHARED_LOCATION + 'launch-templates-config.json'
3231

3332
logger = logging.getLogger(__name__)
3433
logging.basicConfig(level=logging.INFO)
3534

36-
def get_compute_launch_template_ids(shared_storage):
37-
"""Load launch-templates-config.json which contains ID, Version number and Logical ID of all queues in Compute Fleet's Launch Template."""
35+
def get_compute_launch_template_ids(lt_config_file_name):
36+
"""Load launch-templates-config.json which contains ID, Version number and Logical ID of all queues in Compute Fleet's Launch Template.
37+
The format of launch-templates-config.json is
38+
{
39+
"Queues": {
40+
"queue1": {
41+
"ComputeResources": {
42+
"queue1-i1": {
43+
"LaunchTemplate": {
44+
"Version": "1",
45+
"LogicalId": "LaunchTemplate123456789012345",
46+
"Id": "lt-12345678901234567"
47+
}
48+
}
49+
}
50+
},
51+
"queue2": {
52+
"ComputeResources": {
53+
"queue2-i1": {
54+
"LaunchTemplate": {
55+
"Version": "1",
56+
"LogicalId": "LaunchTemplate012345678901234",
57+
"Id": "lt-01234567890123456"
58+
}
59+
}
60+
}
61+
}
62+
}
63+
}
64+
"""
65+
lt_config = None
3866
try:
39-
with open(shared_storage, 'r') as file:
67+
with open(lt_config_file_name, 'r') as file:
4068
lt_config = json.loads(file.read())
4169
except Exception as err:
42-
logger.warn("Unable to read %s due to %s", shared_storage, err)
70+
logger.warn("Unable to read %s due to %s", lt_config_file_name, err)
4371

4472
return lt_config
4573

4674

47-
48-
def create_dna_files(args):
75+
def share_compute_fleet_dna(args):
4976
"""Creates all dna.json for each queue in cluster."""
50-
lt_config = get_compute_launch_template_ids(COMPUTE_FLEET_LAUNCH_TEMPLATE_ID)
77+
lt_config = get_compute_launch_template_ids(COMPUTE_FLEET_LAUNCH_TEMPLATE_CONFIG)
5178
if lt_config:
5279
all_queues = lt_config.get('Queues')
5380
for _, queues in all_queues.items():
5481
compute_resources = queues.get('ComputeResources')
5582
for _, compute_res in compute_resources.items():
56-
get_latest_dns_data(compute_res, COMPUTE_FLEET_DNA_LOC, args)
83+
get_latest_dna_data(compute_res, COMPUTE_FLEET_SHARED_DNA_LOCATION, args)
84+
85+
86+
# FIXME: Fix Code Duplication
87+
def parse_proxy_config():
88+
config = configparser.RawConfigParser()
89+
config.read("/etc/boto.cfg")
90+
proxy_config = Config()
91+
if config.has_option("Boto", "proxy") and config.has_option("Boto", "proxy_port"):
92+
proxy = config.get("Boto", "proxy")
93+
proxy_port = config.get("Boto", "proxy_port")
94+
proxy_config = Config(proxies={"https": f"{proxy}:{proxy_port}"})
95+
return proxy_config
5796

5897

5998
@retry(stop_max_attempt_number=5, wait_fixed=3000)
@@ -65,8 +104,11 @@ def get_user_data(lt_id, lt_version, region_name):
65104
:param region_name: AWS region name (eg: us-east-1)
66105
:return: User_data in MIME format
67106
"""
107+
decoded_data = None
68108
try:
69-
ec2_client = boto3.client("ec2", region_name=region_name)
109+
proxy_config = parse_proxy_config()
110+
111+
ec2_client = boto3.client("ec2", region_name=region_name, config=proxy_config)
70112
response = ec2_client.describe_launch_template_versions(
71113
LaunchTemplateId= lt_id,
72114
Versions=[
@@ -78,23 +120,25 @@ def get_user_data(lt_id, lt_version, region_name):
78120
if hasattr(err, "message"):
79121
err = err.message
80122
logger.error(
81-
"Unable to get UserData for launch template%s with version %s.\nException: %s",
123+
"Unable to get UserData for launch template %s with version %s.\nException: %s",
82124
lt_id, lt_version, err
83125
)
84126

85127
return decoded_data
86128

87129

88-
89-
def parse_mime_user_data(user_data):
130+
def get_write_directives_section(user_data):
90131
"""
91132
Parses MIME formatted UserData that we get from EC2 to extract write_files section from cloud-config section.
92133
"""
93-
data = message_from_string(user_data)
94-
for cloud_config_section in data.walk():
95-
if cloud_config_section.get_content_type() == 'text/cloud-config':
96-
write_directives_section = yaml.safe_load(cloud_config_section._payload).get('write_files')
97-
134+
write_directives_section = None
135+
try:
136+
data = message_from_string(user_data)
137+
for cloud_config_section in data.walk():
138+
if cloud_config_section.get_content_type() == 'text/cloud-config':
139+
write_directives_section = yaml.safe_load(cloud_config_section._payload).get('write_files')
140+
except Exception as err:
141+
logger.error("Error occurred while parsing write_files section.\nException: %s", err)
98142
return write_directives_section
99143

100144

@@ -116,7 +160,8 @@ def write_dna_files(write_files_section, shared_storage_loc):
116160
err = err.message
117161
logger.error("Unable to write %s due to %s", file_path, err)
118162

119-
def get_latest_dns_data(resource, output_location, args):
163+
164+
def get_latest_dna_data(resource, output_location, args):
120165
"""
121166
Function to get latest User Data, extract relevant details and write dna.json.
122167
:param resource: Resource containing LT ID, Version and Logical id
@@ -125,8 +170,10 @@ def get_latest_dns_data(resource, output_location, args):
125170
:rtype: None
126171
"""
127172
user_data = get_user_data(resource.get('LaunchTemplate').get('Id'), resource.get('LaunchTemplate').get('Version'), args.region)
128-
write_directives = parse_mime_user_data(user_data)
129-
write_dna_files(write_directives, output_location+resource.get('LaunchTemplate').get("LogicalId"))
173+
if user_data:
174+
write_directives = get_write_directives_section(user_data)
175+
write_dna_files(write_directives, output_location+resource.get('LaunchTemplate').get("LogicalId"))
176+
130177

131178
def cleanup(directory_loc):
132179
"""Cleanup dna.json and extra.json files."""
@@ -138,6 +185,7 @@ def cleanup(directory_loc):
138185
except Exception as err:
139186
logger.warn(f"Unable to delete %s due to %s", f_path, err)
140187

188+
141189
def _parse_cli_args():
142190
"""Parse command line args."""
143191
parser = argparse.ArgumentParser(
@@ -157,7 +205,6 @@ def _parse_cli_args():
157205
"-c",
158206
"--cleanup",
159207
action="store_true",
160-
default=False,
161208
required=False,
162209
help="Cleanup DNA files created",
163210
)
@@ -171,9 +218,9 @@ def main():
171218
try:
172219
args = _parse_cli_args()
173220
if args.cleanup:
174-
cleanup(COMPUTE_FLEET_DNA_LOC)
221+
cleanup(COMPUTE_FLEET_SHARED_DNA_LOCATION)
175222
else:
176-
create_dna_files(args)
223+
share_compute_fleet_dna(args)
177224
except Exception as err:
178225
if hasattr(err, "message"):
179226
err = err.message

cookbooks/aws-parallelcluster-environment/recipes/config.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,4 @@
3838
# spack 'Configure Spack Packages' do
3939
# action :configure
4040
# end
41-
cfn_hup_configuration "Configure Cfn-hup"
41+
cfn_hup_configuration "Configure cfn-hup"

cookbooks/aws-parallelcluster-environment/resources/cfn_hup_configuration.rb

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# frozen_string_literal: true
22

33
#
4-
# Copyright:: 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
4+
# Copyright:: 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
55
#
66
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the
77
# License. A copy of the License is located at
@@ -27,14 +27,14 @@
2727
directory '/etc/cfn' do
2828
owner 'root'
2929
group 'root'
30-
mode '0770'
30+
mode '0700'
3131
recursive true
3232
end
3333

3434
directory '/etc/cfn/hooks.d' do
3535
owner 'root'
3636
group 'root'
37-
mode '0770'
37+
mode '0700'
3838
recursive true
3939
end
4040

@@ -64,19 +64,20 @@
6464
cloudformation_url: cloudformation_url,
6565
cfn_init_role: instance_role_name,
6666
launch_template_resource_id: node['cluster']['launch_template_id'],
67-
update_hook_script_dir: node['cluster']['scripts_dir']
67+
update_hook_script_dir: node['cluster']['scripts_dir'],
68+
node_bootstrap_timeout: node['cluster']['compute_node_bootstrap_timeout'] || node['cluster']['Timeout']
6869
)
6970
end
7071
end
7172

7273
action :extra_configuration do
7374
case node['cluster']['node_type']
7475
when 'HeadNode'
75-
cookbook_file "#{node['cluster']['scripts_dir']}/get_compute_user_data.py" do
76-
source 'cfn_hup_configuration/get_compute_user_data.py'
76+
cookbook_file "#{node['cluster']['scripts_dir']}/share_compute_fleet_dna.py" do
77+
source 'cfn_hup_configuration/share_compute_fleet_dna.py'
7778
owner 'root'
7879
group 'root'
79-
mode '0755'
80+
mode '0700'
8081
action :create_if_missing
8182
end
8283

cookbooks/aws-parallelcluster-environment/spec/unit/resources/cfn_hup_configuration_spec.rb

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ def self.configure(chef_run)
1818
LAUNCH_TEMPLATE_ID = "LAUNCH_TEMPLATE_ID".freeze
1919
SCRIPT_DIR = "SCRIPT_DIR".freeze
2020
MONITOR_SHARED_DIR = "MONITOR_SHARED_DIR".freeze
21+
NODE_BOOTSTRAP_TIMEOUT = "1800"
2122

2223
describe 'cfn_hup_configuration:configure' do
2324
for_all_oses do |platform, version|
@@ -39,6 +40,7 @@ def self.configure(chef_run)
3940
node.override["cluster"]["launch_template_id"] = LAUNCH_TEMPLATE_ID
4041
node.override['cluster']['scripts_dir'] = SCRIPT_DIR
4142
node.override['cluster']['shared_dir'] = MONITOR_SHARED_DIR
43+
node.override['cluster']['compute_node_bootstrap_timeout'] = NODE_BOOTSTRAP_TIMEOUT
4244
end
4345
ConvergeCfnHupConfiguration.configure(runner)
4446
end
@@ -49,7 +51,7 @@ def self.configure(chef_run)
4951
is_expected.to create_directory(dir)
5052
.with(owner: 'root')
5153
.with(group: 'root')
52-
.with(mode: "0770")
54+
.with(mode: "0700")
5355
.with(recursive: true)
5456
end
5557
end
@@ -81,6 +83,7 @@ def self.configure(chef_run)
8183
cfn_init_role: INSTANCE_ROLE_NAME,
8284
launch_template_resource_id: LAUNCH_TEMPLATE_ID,
8385
update_hook_script_dir: SCRIPT_DIR,
86+
node_bootstrap_timeout: NODE_BOOTSTRAP_TIMEOUT,
8487
})
8588
end
8689

@@ -97,12 +100,12 @@ def self.configure(chef_run)
97100
})
98101
end
99102
elsif node_type == 'HeadNode'
100-
it "creates #{SCRIPT_DIR}/get_compute_user_data.py" do
101-
is_expected.to create_if_missing_cookbook_file("#{SCRIPT_DIR}/get_compute_user_data.py")
102-
.with(source: 'cfn_hup_configuration/get_compute_user_data.py')
103+
it "creates #{SCRIPT_DIR}/share_compute_fleet_dna.py" do
104+
is_expected.to create_if_missing_cookbook_file("#{SCRIPT_DIR}/share_compute_fleet_dna.py")
105+
.with(source: 'cfn_hup_configuration/share_compute_fleet_dna.py')
103106
.with(user: 'root')
104107
.with(group: 'root')
105-
.with(mode: '0755')
108+
.with(mode: '0700')
106109
end
107110

108111
it "creates the directory #{MONITOR_SHARED_DIR}/dna" do

cookbooks/aws-parallelcluster-environment/templates/cfn_hup_configuration/ComputeFleet/cfn-hup-update-action.sh.erb

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
#!/bin/bash
22
set -ex
33

4-
5-
4+
# This script is invoked by cfn-hup as part of its update hook action.
5+
# This script runs on each node of ComputeFleet to monitor the shared location for latest dna.json and extra.json files and run ParallelCluster Cookbook recipes.
6+
#
7+
# Usage: ./cfn-hup-update-action.sh
68

79
function run_cookbook_recipes() {
810
LATEST_DNA_LOC=<%= @monitor_shared_dir %>
@@ -35,7 +37,7 @@ function run_cookbook_recipes() {
3537
main() {
3638
PATH=/usr/local/bin:/bin:/usr/bin:/opt/aws/bin;
3739
. /etc/parallelcluster/pcluster_cookbook_environment.sh;
38-
echo "We monitor <%= @monitor_shared_dir %> to check for <%= @launch_template_resource_id %>-dna.json is being added"
40+
echo "We monitor <%= @monitor_shared_dir %> to check for <%= @launch_template_resource_id %>-dna.json has been added and run ParallelCluster cookbook recipes."
3941
run_cookbook_recipes
4042
}
4143

cookbooks/aws-parallelcluster-environment/templates/cfn_hup_configuration/cfn-hook-update.conf.erb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@ path=Resources.<%= @launch_template_resource_id %>.Metadata.AWS::CloudFormation:
66
action=PATH=/usr/local/bin:/bin:/usr/bin:/opt/aws/bin; . /etc/parallelcluster/pcluster_cookbook_environment.sh; $CFN_BOOTSTRAP_VIRTUALENV_PATH/cfn-init -v --stack <%= @stack_id %> --resource <%= @launch_template_resource_id %> --configsets update --region <%= @region %> --url <%= @cloudformation_url %> --role <%= @cfn_init_role %>
77
<% when 'ComputeFleet' -%>
88
path=Resources.<%= @launch_template_resource_id %>
9-
action=timeout 900 <%= @update_hook_script_dir %>/cfn-hup-update-action.sh
9+
action=timeout <%= @node_bootstrap_timeout %> <%= @update_hook_script_dir %>/cfn-hup-update-action.sh
1010
<% end %>
1111
runas=root

cookbooks/aws-parallelcluster-environment/test/controls/cfn_hup_configuration_spec.rb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright:: 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
1+
# Copyright:: 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License").
44
# You may not use this file except in compliance with the License. A copy of the License is located at
@@ -15,7 +15,7 @@
1515
%w(/etc/cfn /etc/cfn/hooks.d).each do |dir|
1616
describe directory(dir) do
1717
it { should exist }
18-
its('mode') { should cmp '0770' }
18+
its('mode') { should cmp '0700' }
1919
its('owner') { should eq 'root' }
2020
its('group') { should eq 'root' }
2121
end
@@ -35,9 +35,9 @@
3535
title "cfn_hup configuration files and directories for HeadNode should be created"
3636
only_if { instance.head_node? }
3737

38-
describe file("#{node['cluster']['scripts_dir']}/get_compute_user_data.py") do
38+
describe file("#{node['cluster']['scripts_dir']}/share_compute_fleet_dna.py") do
3939
it { should exist }
40-
its('mode') { should cmp '0400' }
40+
its('mode') { should cmp '0700' }
4141
its('owner') { should eq 'root' }
4242
its('group') { should eq 'root' }
4343
end

0 commit comments

Comments
 (0)