1- # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
1+ # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
22#
33# Licensed under the Apache License, Version 2.0 (the "License").
44# You may not use this file except in compliance with the
1111# limitations under the License.
1212
1313
14-
14+ import configparser
1515import argparse
1616from email import message_from_string
1717import json
1818import os
1919import boto3
20+ from botocore .config import Config
2021import yaml
2122import base64
2223import logging
2324from retrying import retry
2425
25- SHARED_LOCATION = "/opt/parallelcluster/"
26-
27- COMPUTE_FLEET_SHARED_LOCATION = SHARED_LOCATION + 'shared/'
26+ COMPUTE_FLEET_SHARED_LOCATION = "/opt/parallelcluster/shared/"
2827
29- COMPUTE_FLEET_DNA_LOC = COMPUTE_FLEET_SHARED_LOCATION + 'dna/'
28+ COMPUTE_FLEET_SHARED_DNA_LOCATION = COMPUTE_FLEET_SHARED_LOCATION + 'dna/'
3029
31- COMPUTE_FLEET_LAUNCH_TEMPLATE_ID = COMPUTE_FLEET_SHARED_LOCATION + 'launch-templates-config.json'
30+ COMPUTE_FLEET_LAUNCH_TEMPLATE_CONFIG = COMPUTE_FLEET_SHARED_LOCATION + 'launch-templates-config.json'
3231
3332logger = logging .getLogger (__name__ )
3433logging .basicConfig (level = logging .INFO )
3534
36- def get_compute_launch_template_ids (shared_storage ):
37- """Load launch-templates-config.json which contains ID, Version number and Logical ID of all queues in Compute Fleet's Launch Template."""
35+ def get_compute_launch_template_ids (lt_config_file_name ):
36+ """Load launch-templates-config.json which contains ID, Version number and Logical ID of all queues in Compute Fleet's Launch Template.
37+ The format of launch-templates-config.json is
38+ {
39+ "Queues": {
40+ "queue1": {
41+ "ComputeResources": {
42+ "queue1-i1": {
43+ "LaunchTemplate": {
44+ "Version": "1",
45+ "LogicalId": "LaunchTemplate123456789012345",
46+ "Id": "lt-12345678901234567"
47+ }
48+ }
49+ }
50+ },
51+ "queue2": {
52+ "ComputeResources": {
53+ "queue2-i1": {
54+ "LaunchTemplate": {
55+ "Version": "1",
56+ "LogicalId": "LaunchTemplate012345678901234",
57+ "Id": "lt-01234567890123456"
58+ }
59+ }
60+ }
61+ }
62+ }
63+ }
64+ """
65+ lt_config = None
3866 try :
39- with open (shared_storage , 'r' ) as file :
67+ with open (lt_config_file_name , 'r' ) as file :
4068 lt_config = json .loads (file .read ())
4169 except Exception as err :
42- logger .warn ("Unable to read %s due to %s" , shared_storage , err )
70+ logger .warn ("Unable to read %s due to %s" , lt_config_file_name , err )
4371
4472 return lt_config
4573
4674
47-
48- def create_dna_files (args ):
75+ def share_compute_fleet_dna (args ):
4976 """Creates all dna.json for each queue in cluster."""
50- lt_config = get_compute_launch_template_ids (COMPUTE_FLEET_LAUNCH_TEMPLATE_ID )
77+ lt_config = get_compute_launch_template_ids (COMPUTE_FLEET_LAUNCH_TEMPLATE_CONFIG )
5178 if lt_config :
5279 all_queues = lt_config .get ('Queues' )
5380 for _ , queues in all_queues .items ():
5481 compute_resources = queues .get ('ComputeResources' )
5582 for _ , compute_res in compute_resources .items ():
56- get_latest_dns_data (compute_res , COMPUTE_FLEET_DNA_LOC , args )
83+ get_latest_dna_data (compute_res , COMPUTE_FLEET_SHARED_DNA_LOCATION , args )
84+
85+
86+ # FIXME: Fix Code Duplication
87+ def parse_proxy_config ():
88+ config = configparser .RawConfigParser ()
89+ config .read ("/etc/boto.cfg" )
90+ proxy_config = Config ()
91+ if config .has_option ("Boto" , "proxy" ) and config .has_option ("Boto" , "proxy_port" ):
92+ proxy = config .get ("Boto" , "proxy" )
93+ proxy_port = config .get ("Boto" , "proxy_port" )
94+ proxy_config = Config (proxies = {"https" : f"{ proxy } :{ proxy_port } " })
95+ return proxy_config
5796
5897
5998@retry (stop_max_attempt_number = 5 , wait_fixed = 3000 )
@@ -65,8 +104,11 @@ def get_user_data(lt_id, lt_version, region_name):
65104 :param region_name: AWS region name (eg: us-east-1)
66105 :return: User_data in MIME format
67106 """
107+ decoded_data = None
68108 try :
69- ec2_client = boto3 .client ("ec2" , region_name = region_name )
109+ proxy_config = parse_proxy_config ()
110+
111+ ec2_client = boto3 .client ("ec2" , region_name = region_name , config = proxy_config )
70112 response = ec2_client .describe_launch_template_versions (
71113 LaunchTemplateId = lt_id ,
72114 Versions = [
@@ -78,23 +120,25 @@ def get_user_data(lt_id, lt_version, region_name):
78120 if hasattr (err , "message" ):
79121 err = err .message
80122 logger .error (
81- "Unable to get UserData for launch template%s with version %s.\n Exception: %s" ,
123+ "Unable to get UserData for launch template %s with version %s.\n Exception: %s" ,
82124 lt_id , lt_version , err
83125 )
84126
85127 return decoded_data
86128
87129
88-
89- def parse_mime_user_data (user_data ):
130+ def get_write_directives_section (user_data ):
90131 """
91132 Parses MIME formatted UserData that we get from EC2 to extract write_files section from cloud-config section.
92133 """
93- data = message_from_string (user_data )
94- for cloud_config_section in data .walk ():
95- if cloud_config_section .get_content_type () == 'text/cloud-config' :
96- write_directives_section = yaml .safe_load (cloud_config_section ._payload ).get ('write_files' )
97-
134+ write_directives_section = None
135+ try :
136+ data = message_from_string (user_data )
137+ for cloud_config_section in data .walk ():
138+ if cloud_config_section .get_content_type () == 'text/cloud-config' :
139+ write_directives_section = yaml .safe_load (cloud_config_section ._payload ).get ('write_files' )
140+ except Exception as err :
141+ logger .error ("Error occurred while parsing write_files section.\n Exception: %s" , err )
98142 return write_directives_section
99143
100144
@@ -116,7 +160,8 @@ def write_dna_files(write_files_section, shared_storage_loc):
116160 err = err .message
117161 logger .error ("Unable to write %s due to %s" , file_path , err )
118162
119- def get_latest_dns_data (resource , output_location , args ):
163+
164+ def get_latest_dna_data (resource , output_location , args ):
120165 """
121166 Function to get latest User Data, extract relevant details and write dna.json.
122167 :param resource: Resource containing LT ID, Version and Logical id
@@ -125,8 +170,10 @@ def get_latest_dns_data(resource, output_location, args):
125170 :rtype: None
126171 """
127172 user_data = get_user_data (resource .get ('LaunchTemplate' ).get ('Id' ), resource .get ('LaunchTemplate' ).get ('Version' ), args .region )
128- write_directives = parse_mime_user_data (user_data )
129- write_dna_files (write_directives , output_location + resource .get ('LaunchTemplate' ).get ("LogicalId" ))
173+ if user_data :
174+ write_directives = get_write_directives_section (user_data )
175+ write_dna_files (write_directives , output_location + resource .get ('LaunchTemplate' ).get ("LogicalId" ))
176+
130177
131178def cleanup (directory_loc ):
132179 """Cleanup dna.json and extra.json files."""
@@ -138,6 +185,7 @@ def cleanup(directory_loc):
138185 except Exception as err :
139186 logger .warn (f"Unable to delete %s due to %s" , f_path , err )
140187
188+
141189def _parse_cli_args ():
142190 """Parse command line args."""
143191 parser = argparse .ArgumentParser (
@@ -157,7 +205,6 @@ def _parse_cli_args():
157205 "-c" ,
158206 "--cleanup" ,
159207 action = "store_true" ,
160- default = False ,
161208 required = False ,
162209 help = "Cleanup DNA files created" ,
163210 )
@@ -171,9 +218,9 @@ def main():
171218 try :
172219 args = _parse_cli_args ()
173220 if args .cleanup :
174- cleanup (COMPUTE_FLEET_DNA_LOC )
221+ cleanup (COMPUTE_FLEET_SHARED_DNA_LOCATION )
175222 else :
176- create_dna_files (args )
223+ share_compute_fleet_dna (args )
177224 except Exception as err :
178225 if hasattr (err , "message" ):
179226 err = err .message
0 commit comments