1515import argparse
1616from email import message_from_string
1717import json
18- import mimetypes
1918import os
2019import boto3
2120import yaml
2221import base64
22+ import logging
23+ from retrying import retry
2324
2425SHARED_LOCATION = "/opt/parallelcluster/"
2526
2627COMPUTE_FLEET_SHARED_LOCATION = SHARED_LOCATION + 'shared/'
27- LOGIN_POOL_SHARED_LOCATION = SHARED_LOCATION + 'shared_login_nodes/'
2828
2929COMPUTE_FLEET_DNA_LOC = COMPUTE_FLEET_SHARED_LOCATION + 'dna/'
30- LOGIN_POOL_DNA_LOC = LOGIN_POOL_SHARED_LOCATION + 'dna/'
3130
3231COMPUTE_FLEET_LAUNCH_TEMPLATE_ID = COMPUTE_FLEET_SHARED_LOCATION + 'launch-templates-config.json'
3332
34- LOGIN_POOL_LAUNCH_TEMPLATE_ID = LOGIN_POOL_SHARED_LOCATION + 'launch-templates-config.json'
33+ logger = logging .getLogger (__name__ )
34+ logging .basicConfig (level = logging .INFO )
3535
36+ def get_compute_launch_template_ids (shared_storage ):
37+ """Load launch-templates-config.json which contains ID, Version number and Logical ID of all queues in Compute Fleet's Launch Template."""
38+ try :
39+ with open (shared_storage , 'r' ) as file :
40+ lt_config = json .loads (file .read ())
41+ return lt_config
42+ except Exception as err :
43+ logger .warn ("Unable to read %s due to %s" , shared_storage , err )
3644
3745
38- def get_launch_template_details (shared_storage ):
39- with open (shared_storage , 'r' ) as file :
40- lt_config = json .loads (file .read ())
41- return lt_config
42-
43-
44- def get_compute_launch_template_ids (args ):
45- lt_config = get_launch_template_details (COMPUTE_FLEET_LAUNCH_TEMPLATE_ID )
46+ def create_dna_files (args ):
47+ """Creates all dna.json for each queue in cluster."""
48+ lt_config = get_compute_launch_template_ids (COMPUTE_FLEET_LAUNCH_TEMPLATE_ID )
4649 if lt_config :
4750 all_queues = lt_config .get ('Queues' )
4851 for _ , queues in all_queues .items ():
@@ -51,15 +54,15 @@ def get_compute_launch_template_ids(args):
5154 get_latest_dns_data (compute_res , COMPUTE_FLEET_DNA_LOC , args )
5255
5356
54- def get_login_pool_launch_template_ids (args ):
55- lt_config = get_launch_template_details (LOGIN_POOL_LAUNCH_TEMPLATE_ID )
56- if lt_config :
57- login_pools = lt_config .get ('LoginPools' )
58- for _ , pool in login_pools .items ():
59- get_latest_dns_data (pool , LOGIN_POOL_DNA_LOC , args )
60-
61-
57+ @retry (stop_max_attempt_number = 5 , wait_fixed = 3000 )
6258def get_user_data (lt_id , lt_version , region_name ):
59+ """
60+ Calls EC2 DescribeLaunchTemplateVersions API to get UserData from Launch Template specified.
61+ :param lt_id: Launch Template ID (eg: lt-12345678901234567)
62+ :param lt_version: Launch Template latest Version Number (eg: 2)
63+ :param region_name: AWS region name (eg: us-east-1)
64+ :return: User_data in MIME format
65+ """
6366 try :
6467 ec2_client = boto3 .client ("ec2" , region_name = region_name )
6568 response = ec2_client .describe_launch_template_versions (
@@ -70,11 +73,19 @@ def get_user_data(lt_id, lt_version, region_name):
7073 ).get ('LaunchTemplateVersions' )
7174 decoded_data = base64 .b64decode (response [0 ]['LaunchTemplateData' ]['UserData' ], validate = True ).decode ('utf-8' )
7275 return decoded_data
73- except Exception as e : # binascii.Error:
74- print ("Exception raised" , e )
76+ except Exception as err :
77+ if hasattr (err , "message" ):
78+ err = err .message
79+ logger .error (
80+ "Unable to get UserData for launch template%s with version %s.\n Exception: %s" ,
81+ lt_id , lt_version , err
82+ )
7583
7684
7785def parse_mime_user_data (user_data ):
86+ """
87+ Parses MIME formatted UserData that we get from EC2 to extract write_files section from cloud-config section.
88+ """
7889 data = message_from_string (user_data )
7990 for cloud_config_section in data .walk ():
8091 if cloud_config_section .get_content_type () == 'text/cloud-config' :
@@ -84,29 +95,49 @@ def parse_mime_user_data(user_data):
8495
8596
8697def write_dna_files (write_files_section , shared_storage_loc ):
87- for data in write_files_section :
88- if data ['path' ] in ['/tmp/dna.json' ]:
89- with open (shared_storage_loc + "-dna.json" ,"w" ) as file :
90- file .write (json .dumps (json .loads (data ['content' ]),indent = 4 ))
91-
98+ """
99+ Writes the dna.json in shared location after extracting it from write_files section of UserData.
100+ :param write_files_section: Entire write_files section from UserData
101+ :param shared_storage_loc: Shared Storage Location of where to write dna.json
102+ :return: None
103+ """
104+ try :
105+ file_path = shared_storage_loc + "-dna.json"
106+ for data in write_files_section :
107+ if data ['path' ] in ['/tmp/dna.json' ]:
108+ with open (file_path ,"w" ) as file :
109+ file .write (json .dumps (json .loads (data ['content' ]),indent = 4 ))
110+ except Exception as err :
111+ if hasattr (err , "message" ):
112+ err = err .message
113+ logger .error ("Unable to write %s due to %s" , file_path , err )
92114
93115def get_latest_dns_data (resource , output_location , args ):
116+ """
117+ Function to get latest User Data, extract relevant details and write dna.json.
118+ :param resource: Resource containing LT ID, Version and Logical id
119+ :param output_location: Shared Storage Location were we want to write dna.json
120+ :param args: Command Line arguments
121+ :rtype: None
122+ """
94123 user_data = get_user_data (resource .get ('LaunchTemplate' ).get ('Id' ), resource .get ('LaunchTemplate' ).get ('Version' ), args .region )
95124 write_directives = parse_mime_user_data (user_data )
96125 write_dna_files (write_directives , output_location + resource .get ('LaunchTemplate' ).get ("LogicalId" ))
97126
98127def cleanup (directory_loc ):
128+ """Cleanup dna.json and extra.json files."""
99129 for f in os .listdir (directory_loc ):
100130 f_path = os .path .join (directory_loc , f )
101131 try :
102132 if os .path .isfile (f_path ):
103133 os .remove (f_path )
104- except Exception as e :
105- print (f"Error deleting { f_path } : { e } " )
134+ except Exception as err :
135+ logger . warn (f"Unable to delete %s due to %s" , f_path , err )
106136
107137def _parse_cli_args ():
138+ """Parse command line args."""
108139 parser = argparse .ArgumentParser (
109- description = "Get latest User Data from Compute and Login Node Launch Templates." , exit_on_error = False
140+ description = "Get latest User Data from ComputeFleet Launch Templates." , exit_on_error = False
110141 )
111142
112143 parser .add_argument (
@@ -133,13 +164,17 @@ def _parse_cli_args():
133164
134165
135166def main ():
136- args = _parse_cli_args ()
137- if args .cleanup :
138- cleanup (COMPUTE_FLEET_DNA_LOC )
139- cleanup (LOGIN_POOL_DNA_LOC )
140- else :
141- get_compute_launch_template_ids (args )
142- #get_login_pool_launch_template_ids(args)
167+ try :
168+ args = _parse_cli_args ()
169+ if args .cleanup :
170+ cleanup (COMPUTE_FLEET_DNA_LOC )
171+ else :
172+ create_dna_files (args )
173+ except Exception as err :
174+ if hasattr (err , "message" ):
175+ err = err .message
176+ logger .exception ("Encountered exception when fetching latest dna.json for ComputeFleet, exiting gracefully: %s" , err )
177+ raise SystemExit (0 )
143178
144179
145180if __name__ == "__main__" :
0 commit comments