Skip to content

Commit 879de41

Browse files
author
Himani Anil Deshpande
committed
Remove usage of Login Node from get_compute_user_data.py
1 parent a644f17 commit 879de41

File tree

1 file changed

+72
-37
lines changed

1 file changed

+72
-37
lines changed

cookbooks/aws-parallelcluster-environment/files/cfn_hup_configuration/get_compute_user_data.py

Lines changed: 72 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -15,34 +15,37 @@
1515
import argparse
1616
from email import message_from_string
1717
import json
18-
import mimetypes
1918
import os
2019
import boto3
2120
import yaml
2221
import base64
22+
import logging
23+
from retrying import retry
2324

2425
SHARED_LOCATION = "/opt/parallelcluster/"
2526

2627
COMPUTE_FLEET_SHARED_LOCATION = SHARED_LOCATION + 'shared/'
27-
LOGIN_POOL_SHARED_LOCATION = SHARED_LOCATION + 'shared_login_nodes/'
2828

2929
COMPUTE_FLEET_DNA_LOC = COMPUTE_FLEET_SHARED_LOCATION + 'dna/'
30-
LOGIN_POOL_DNA_LOC = LOGIN_POOL_SHARED_LOCATION + 'dna/'
3130

3231
COMPUTE_FLEET_LAUNCH_TEMPLATE_ID = COMPUTE_FLEET_SHARED_LOCATION + 'launch-templates-config.json'
3332

34-
LOGIN_POOL_LAUNCH_TEMPLATE_ID = LOGIN_POOL_SHARED_LOCATION + 'launch-templates-config.json'
33+
logger = logging.getLogger(__name__)
34+
logging.basicConfig(level=logging.INFO)
3535

36+
def get_compute_launch_template_ids(shared_storage):
37+
"""Load launch-templates-config.json which contains ID, Version number and Logical ID of all queues in Compute Fleet's Launch Template."""
38+
try:
39+
with open(shared_storage, 'r') as file:
40+
lt_config = json.loads(file.read())
41+
return lt_config
42+
except Exception as err:
43+
logger.warn("Unable to read %s due to %s", shared_storage, err)
3644

3745

38-
def get_launch_template_details(shared_storage):
39-
with open(shared_storage, 'r') as file:
40-
lt_config = json.loads(file.read())
41-
return lt_config
42-
43-
44-
def get_compute_launch_template_ids(args):
45-
lt_config = get_launch_template_details(COMPUTE_FLEET_LAUNCH_TEMPLATE_ID)
46+
def create_dna_files(args):
47+
"""Creates all dna.json for each queue in cluster."""
48+
lt_config = get_compute_launch_template_ids(COMPUTE_FLEET_LAUNCH_TEMPLATE_ID)
4649
if lt_config:
4750
all_queues = lt_config.get('Queues')
4851
for _, queues in all_queues.items():
@@ -51,15 +54,15 @@ def get_compute_launch_template_ids(args):
5154
get_latest_dns_data(compute_res, COMPUTE_FLEET_DNA_LOC, args)
5255

5356

54-
def get_login_pool_launch_template_ids(args):
55-
lt_config = get_launch_template_details(LOGIN_POOL_LAUNCH_TEMPLATE_ID)
56-
if lt_config:
57-
login_pools = lt_config.get('LoginPools')
58-
for _, pool in login_pools.items():
59-
get_latest_dns_data(pool, LOGIN_POOL_DNA_LOC, args)
60-
61-
57+
@retry(stop_max_attempt_number=5, wait_fixed=3000)
6258
def get_user_data(lt_id, lt_version, region_name):
59+
"""
60+
Calls EC2 DescribeLaunchTemplateVersions API to get UserData from Launch Template specified.
61+
:param lt_id: Launch Template ID (eg: lt-12345678901234567)
62+
:param lt_version: Launch Template latest Version Number (eg: 2)
63+
:param region_name: AWS region name (eg: us-east-1)
64+
:return: User_data in MIME format
65+
"""
6366
try:
6467
ec2_client = boto3.client("ec2", region_name=region_name)
6568
response = ec2_client.describe_launch_template_versions(
@@ -70,11 +73,19 @@ def get_user_data(lt_id, lt_version, region_name):
7073
).get('LaunchTemplateVersions')
7174
decoded_data = base64.b64decode(response[0]['LaunchTemplateData']['UserData'], validate=True).decode('utf-8')
7275
return decoded_data
73-
except Exception as e: # binascii.Error:
74-
print("Exception raised", e)
76+
except Exception as err:
77+
if hasattr(err, "message"):
78+
err = err.message
79+
logger.error(
80+
"Unable to get UserData for launch template%s with version %s.\nException: %s",
81+
lt_id, lt_version, err
82+
)
7583

7684

7785
def parse_mime_user_data(user_data):
86+
"""
87+
Parses MIME formatted UserData that we get from EC2 to extract write_files section from cloud-config section.
88+
"""
7889
data = message_from_string(user_data)
7990
for cloud_config_section in data.walk():
8091
if cloud_config_section.get_content_type() == 'text/cloud-config':
@@ -84,29 +95,49 @@ def parse_mime_user_data(user_data):
8495

8596

8697
def write_dna_files(write_files_section, shared_storage_loc):
87-
for data in write_files_section:
88-
if data['path'] in ['/tmp/dna.json']:
89-
with open(shared_storage_loc+"-dna.json" ,"w") as file:
90-
file.write(json.dumps(json.loads(data['content']),indent=4))
91-
98+
"""
99+
Writes the dna.json in shared location after extracting it from write_files section of UserData.
100+
:param write_files_section: Entire write_files section from UserData
101+
:param shared_storage_loc: Shared Storage Location of where to write dna.json
102+
:return: None
103+
"""
104+
try:
105+
file_path = shared_storage_loc+"-dna.json"
106+
for data in write_files_section:
107+
if data['path'] in ['/tmp/dna.json']:
108+
with open(file_path,"w") as file:
109+
file.write(json.dumps(json.loads(data['content']),indent=4))
110+
except Exception as err:
111+
if hasattr(err, "message"):
112+
err = err.message
113+
logger.error("Unable to write %s due to %s", file_path, err)
92114

93115
def get_latest_dns_data(resource, output_location, args):
116+
"""
117+
Function to get latest User Data, extract relevant details and write dna.json.
118+
:param resource: Resource containing LT ID, Version and Logical id
119+
:param output_location: Shared Storage Location were we want to write dna.json
120+
:param args: Command Line arguments
121+
:rtype: None
122+
"""
94123
user_data = get_user_data(resource.get('LaunchTemplate').get('Id'), resource.get('LaunchTemplate').get('Version'), args.region)
95124
write_directives = parse_mime_user_data(user_data)
96125
write_dna_files(write_directives, output_location+resource.get('LaunchTemplate').get("LogicalId"))
97126

98127
def cleanup(directory_loc):
128+
"""Cleanup dna.json and extra.json files."""
99129
for f in os.listdir(directory_loc):
100130
f_path = os.path.join(directory_loc, f)
101131
try:
102132
if os.path.isfile(f_path):
103133
os.remove(f_path)
104-
except Exception as e:
105-
print(f"Error deleting {f_path}: {e}")
134+
except Exception as err:
135+
logger.warn(f"Unable to delete %s due to %s", f_path, err)
106136

107137
def _parse_cli_args():
138+
"""Parse command line args."""
108139
parser = argparse.ArgumentParser(
109-
description="Get latest User Data from Compute and Login Node Launch Templates.", exit_on_error=False
140+
description="Get latest User Data from ComputeFleet Launch Templates.", exit_on_error=False
110141
)
111142

112143
parser.add_argument(
@@ -133,13 +164,17 @@ def _parse_cli_args():
133164

134165

135166
def main():
136-
args = _parse_cli_args()
137-
if args.cleanup:
138-
cleanup(COMPUTE_FLEET_DNA_LOC)
139-
cleanup(LOGIN_POOL_DNA_LOC)
140-
else:
141-
get_compute_launch_template_ids(args)
142-
#get_login_pool_launch_template_ids(args)
167+
try:
168+
args = _parse_cli_args()
169+
if args.cleanup:
170+
cleanup(COMPUTE_FLEET_DNA_LOC)
171+
else:
172+
create_dna_files(args)
173+
except Exception as err:
174+
if hasattr(err, "message"):
175+
err = err.message
176+
logger.exception("Encountered exception when fetching latest dna.json for ComputeFleet, exiting gracefully: %s", err)
177+
raise SystemExit(0)
143178

144179

145180
if __name__ == "__main__":

0 commit comments

Comments
 (0)