diff --git a/cookbooks/aws-parallelcluster-environment/files/cfn_hup_configuration/share_compute_fleet_dna.py b/cookbooks/aws-parallelcluster-environment/files/cfn_hup_configuration/share_compute_fleet_dna.py new file mode 100644 index 0000000000..729a610fb7 --- /dev/null +++ b/cookbooks/aws-parallelcluster-environment/files/cfn_hup_configuration/share_compute_fleet_dna.py @@ -0,0 +1,243 @@ +# Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the +# License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and +# limitations under the License. + + +import argparse +import base64 +import configparser +import json +import logging +import os +from email import message_from_string + +import boto3 +import yaml +from botocore.config import Config +from retrying import retry + +COMPUTE_FLEET_SHARED_LOCATION = "/opt/parallelcluster/shared/" + +COMPUTE_FLEET_SHARED_DNA_LOCATION = COMPUTE_FLEET_SHARED_LOCATION + "dna/" + +COMPUTE_FLEET_LAUNCH_TEMPLATE_CONFIG = COMPUTE_FLEET_SHARED_LOCATION + "launch-templates-config.json" + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +def get_compute_launch_template_ids(lt_config_file_name): + """ + Load launch-templates-config.json. + + It contains ID, Version number and Logical ID of all queues in Compute Fleet's Launch Template. + + The format of launch-templates-config.json: + { + "Queues": { + "queue1": { + "ComputeResources": { + "queue1-i1": { + "LaunchTemplate": { + "Version": "1", + "LogicalId": "LaunchTemplate123456789012345", + "Id": "lt-12345678901234567" + } + } + } + }, + "queue2": { + "ComputeResources": { + "queue2-i1": { + "LaunchTemplate": { + "Version": "1", + "LogicalId": "LaunchTemplate012345678901234", + "Id": "lt-01234567890123456" + } + } + } + } + } + } + + """ + lt_config = None + try: + with open(lt_config_file_name, "r", encoding="utf-8") as file: + lt_config = json.loads(file.read()) + except Exception as err: + logger.warning("Unable to read %s due to %s", lt_config_file_name, err) + + return lt_config + + +def share_compute_fleet_dna(args): + """Create dna.json for each queue in cluster.""" + lt_config = get_compute_launch_template_ids(COMPUTE_FLEET_LAUNCH_TEMPLATE_CONFIG) + if lt_config: + all_queues = lt_config.get("Queues") + for _, queues in all_queues.items(): + compute_resources = queues.get("ComputeResources") + for _, compute_res in compute_resources.items(): + get_latest_dna_data(compute_res, COMPUTE_FLEET_SHARED_DNA_LOCATION, args) + + +# FIXME: Fix Code Duplication +def parse_proxy_config(): + config = configparser.RawConfigParser() + config.read("/etc/boto.cfg") + proxy_config = Config() + if config.has_option("Boto", "proxy") and config.has_option("Boto", "proxy_port"): + proxy = config.get("Boto", "proxy") + proxy_port = config.get("Boto", "proxy_port") + proxy_config = Config(proxies={"https": f"{proxy}:{proxy_port}"}) + return proxy_config + + +@retry(stop_max_attempt_number=5, wait_fixed=3000) +def get_user_data(lt_id, lt_version, region_name): + """ + Get UserData from specified Launch Template using EC2 DescribeLaunchTemplateVersions API. + + :param lt_id: Launch Template ID (eg: lt-12345678901234567) + :param lt_version: Launch Template latest Version Number (eg: 2) + :param region_name: AWS region name (eg: us-east-1) + :return: string of user_data in MIME format + """ + decoded_data = None + try: + proxy_config = parse_proxy_config() + + ec2_client = boto3.client("ec2", region_name=region_name, config=proxy_config) + response = ec2_client.describe_launch_template_versions( + LaunchTemplateId=lt_id, + Versions=[ + lt_version, + ], + ).get("LaunchTemplateVersions") + decoded_data = base64.b64decode(response[0]["LaunchTemplateData"]["UserData"], validate=True).decode("utf-8") + except Exception as err: + if hasattr(err, "message"): + err = err.message + logger.error( + "Unable to get UserData for launch template %s with version %s.\nException: %s", lt_id, lt_version, err + ) + + return decoded_data + + +def get_write_directives_section(user_data): + """Get write_files section from cloud-config section of MIME formatted UserData.""" + write_directives_section = None + try: + data = message_from_string(user_data) + for cloud_config_section in data.walk(): + if cloud_config_section.get_content_type() == "text/cloud-config": + write_directives_section = yaml.safe_load(cloud_config_section._payload).get("write_files") + except Exception as err: + logger.error("Error occurred while parsing write_files section.\nException: %s", err) + return write_directives_section + + +def write_dna_files(write_files_section, shared_storage_loc): + """ + After extracting dna.json from write_files section of UserData, write it in shared location. + + :param write_files_section: Entire write_files section from UserData + :param shared_storage_loc: Shared Storage Location of where to write dna.json + :return: None + """ + try: + file_path = shared_storage_loc + "-dna.json" + for data in write_files_section: + if data["path"] in ["/tmp/dna.json"]: # nosec B108 + with open(file_path, "w", encoding="utf-8") as file: + file.write(json.dumps(json.loads(data["content"]), indent=4)) + except Exception as err: + if hasattr(err, "message"): + err = err.message + logger.error("Unable to write %s due to %s", file_path, err) + + +def get_latest_dna_data(resource, output_location, args): + """ + Get latest User Data, extract relevant details and write dna.json. + + :param resource: Resource containing LT ID, Version and Logical id + :param output_location: Shared Storage Location were we want to write dna.json + :param args: Command Line arguments + :rtype: None + """ + user_data = get_user_data( + resource.get("LaunchTemplate").get("Id"), resource.get("LaunchTemplate").get("Version"), args.region + ) + if user_data: + write_directives = get_write_directives_section(user_data) + write_dna_files(write_directives, output_location + resource.get("LaunchTemplate").get("LogicalId")) + + +def cleanup(directory_loc): + """Cleanup dna.json and extra.json files.""" + for f in os.listdir(directory_loc): + f_path = os.path.join(directory_loc, f) + try: + if os.path.isfile(f_path): + os.remove(f_path) + except Exception as err: + logger.warning("Unable to delete %s due to %s", f_path, err) + + +def _parse_cli_args(): + """Parse command line args.""" + parser = argparse.ArgumentParser( + description="Get latest User Data from ComputeFleet Launch Templates.", exit_on_error=False + ) + + parser.add_argument( + "-r", + "--region", + required=False, + type=str, + default=os.getenv("AWS_REGION", None), + help="the cluster AWS region, defaults to AWS_REGION env variable", + ) + + parser.add_argument( + "-c", + "--cleanup", + action="store_true", + required=False, + help="Cleanup DNA files created", + ) + + args = parser.parse_args() + + return args + + +def main(): + try: + args = _parse_cli_args() + if args.cleanup: + cleanup(COMPUTE_FLEET_SHARED_DNA_LOCATION) + else: + share_compute_fleet_dna(args) + except Exception as err: + if hasattr(err, "message"): + err = err.message + logger.exception( + "Encountered exception when fetching latest dna.json for ComputeFleet, exiting gracefully: %s", err + ) + raise SystemExit(0) + + +if __name__ == "__main__": + main() diff --git a/cookbooks/aws-parallelcluster-environment/kitchen.environment-config.yml b/cookbooks/aws-parallelcluster-environment/kitchen.environment-config.yml index 9e6f5eb351..ac4053c36b 100644 --- a/cookbooks/aws-parallelcluster-environment/kitchen.environment-config.yml +++ b/cookbooks/aws-parallelcluster-environment/kitchen.environment-config.yml @@ -314,6 +314,20 @@ suites: directory_service: enabled: "true" node_type: HeadNode + - name: cfn_hup_configuration + run_list: + - recipe[aws-parallelcluster-tests::setup] + - recipe[aws-parallelcluster-tests::test_resource] + verifier: + controls: + - /tag:config_cfn_hup/ + attributes: + resource: cfn_hup_configuration:configure + dependencies: + - recipe:aws-parallelcluster-platform::directories + cluster: + node_type: HeadNode + stack_arn: 'test' # Recipes - name: cfnconfig_mixed diff --git a/cookbooks/aws-parallelcluster-environment/recipes/config.rb b/cookbooks/aws-parallelcluster-environment/recipes/config.rb index b0c0057e40..6d58292e20 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/config.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/config.rb @@ -38,4 +38,4 @@ # spack 'Configure Spack Packages' do # action :configure # end -include_recipe 'aws-parallelcluster-environment::config_cfn_hup' +cfn_hup_configuration "Configure cfn-hup" diff --git a/cookbooks/aws-parallelcluster-environment/recipes/config/config_cfn_hup.rb b/cookbooks/aws-parallelcluster-environment/recipes/config/config_cfn_hup.rb deleted file mode 100644 index 7a82d57773..0000000000 --- a/cookbooks/aws-parallelcluster-environment/recipes/config/config_cfn_hup.rb +++ /dev/null @@ -1,61 +0,0 @@ -# frozen_string_literal: true - -# -# Copyright:: 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the -# License. A copy of the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES -# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and -# limitations under the License. - -cloudformation_url = "https://cloudformation.#{node['cluster']['region']}.#{node['cluster']['aws_domain']}" -instance_role_name = lambda { - # IMDS is not available on Docker - return "FAKE_INSTANCE_ROLE_NAME" if on_docker? - get_metadata_with_token(get_metadata_token, URI("http://169.254.169.254/latest/meta-data/iam/security-credentials")) -}.call - -directory '/etc/cfn' do - owner 'root' - group 'root' - mode '0770' - recursive true -end - -directory '/etc/cfn/hooks.d' do - owner 'root' - group 'root' - mode '0770' - recursive true -end - -template '/etc/cfn/cfn-hup.conf' do - source 'cfn_bootstrap/cfn-hup.conf.erb' - owner 'root' - group 'root' - mode '0400' - variables( - stack_id: node['cluster']['stack_arn'], - region: node['cluster']['region'], - cloudformation_url: cloudformation_url, - cfn_init_role: instance_role_name - ) -end - -template '/etc/cfn/hooks.d/pcluster-update.conf' do - source 'cfn_bootstrap/cfn-hook-update.conf.erb' - owner 'root' - group 'root' - mode '0400' - variables( - stack_id: node['cluster']['stack_arn'], - region: node['cluster']['region'], - cloudformation_url: cloudformation_url, - cfn_init_role: instance_role_name, - launch_template_resource_id: node['cluster']['launch_template_id'] - ) -end diff --git a/cookbooks/aws-parallelcluster-environment/recipes/install/cfn_bootstrap.rb b/cookbooks/aws-parallelcluster-environment/recipes/install/cfn_bootstrap.rb index 7c86e45b7e..41fb845c4f 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/install/cfn_bootstrap.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/install/cfn_bootstrap.rb @@ -94,7 +94,7 @@ # Add cfn-hup runner template "#{node['cluster']['scripts_dir']}/cfn-hup-runner.sh" do - source "cfn_bootstrap/cfn-hup-runner.sh.erb" + source "cfn_hup_configuration/cfn-hup-runner.sh.erb" owner 'root' group 'root' mode '0744' diff --git a/cookbooks/aws-parallelcluster-environment/resources/cfn_hup_configuration.rb b/cookbooks/aws-parallelcluster-environment/resources/cfn_hup_configuration.rb new file mode 100644 index 0000000000..60ecc760af --- /dev/null +++ b/cookbooks/aws-parallelcluster-environment/resources/cfn_hup_configuration.rb @@ -0,0 +1,104 @@ +# frozen_string_literal: true + +# +# Copyright:: 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the +# License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and +# limitations under the License. + +provides :cfn_hup_configuration +unified_mode true +default_action :configure + +action :configure do + cloudformation_url = "https://cloudformation.#{node['cluster']['region']}.#{node['cluster']['aws_domain']}" + instance_role_name = lambda { + # IMDS is not available on Docker + return "FAKE_INSTANCE_ROLE_NAME" if on_docker? + get_metadata_with_token(get_metadata_token, URI("http://169.254.169.254/latest/meta-data/iam/security-credentials")) + }.call + + directory '/etc/cfn' do + owner 'root' + group 'root' + mode '0700' + recursive true + end + + directory '/etc/cfn/hooks.d' do + owner 'root' + group 'root' + mode '0700' + recursive true + end + + template '/etc/cfn/cfn-hup.conf' do + source 'cfn_hup_configuration/cfn-hup.conf.erb' + owner 'root' + group 'root' + mode '0400' + variables( + stack_id: node['cluster']['stack_arn'], + region: node['cluster']['region'], + cloudformation_url: cloudformation_url, + cfn_init_role: instance_role_name + ) + end + + action_extra_configuration + + template '/etc/cfn/hooks.d/pcluster-update.conf' do + source "cfn_hup_configuration/cfn-hook-update.conf.erb" + owner 'root' + group 'root' + mode '0400' + variables( + stack_id: node['cluster']['stack_arn'], + region: node['cluster']['region'], + cloudformation_url: cloudformation_url, + cfn_init_role: instance_role_name, + launch_template_resource_id: node['cluster']['launch_template_id'], + update_hook_script_dir: node['cluster']['scripts_dir'], + node_bootstrap_timeout: node['cluster']['compute_node_bootstrap_timeout'] || node['cluster']['Timeout'] + ) + end +end + +action :extra_configuration do + case node['cluster']['node_type'] + when 'HeadNode' + cookbook_file "#{node['cluster']['scripts_dir']}/share_compute_fleet_dna.py" do + source 'cfn_hup_configuration/share_compute_fleet_dna.py' + owner 'root' + group 'root' + mode '0700' + action :create_if_missing + end + + directory "#{node['cluster']['shared_dir']}/dna" + + when 'ComputeFleet' + template "#{node['cluster']['scripts_dir']}/cfn-hup-update-action.sh" do + source "cfn_hup_configuration/#{node['cluster']['node_type']}/cfn-hup-update-action.sh.erb" + owner 'root' + group 'root' + mode '0700' + variables( + monitor_shared_dir: monitor_shared_dir, + launch_template_resource_id: node['cluster']['launch_template_id'] + ) + end + end +end + +action_class do + def monitor_shared_dir + "#{node['cluster']['shared_dir']}/dna" + end +end diff --git a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/cfn_bootstrap_spec.rb b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/cfn_bootstrap_spec.rb index 4e12931a1f..73bd8cd4e5 100644 --- a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/cfn_bootstrap_spec.rb +++ b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/cfn_bootstrap_spec.rb @@ -77,7 +77,7 @@ it 'adds cfn-hup runner' do is_expected.to create_template("#{node['cluster']['scripts_dir']}/cfn-hup-runner.sh").with( - source: "cfn_bootstrap/cfn-hup-runner.sh.erb", + source: "cfn_hup_configuration/cfn-hup-runner.sh.erb", owner: 'root', group: 'root', mode: '0744', diff --git a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_cfn_hup_spec.rb b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_cfn_hup_spec.rb deleted file mode 100644 index 0e3c372a58..0000000000 --- a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_cfn_hup_spec.rb +++ /dev/null @@ -1,90 +0,0 @@ -# frozen_string_literal: true - -# Copyright:: 2024 Amazon.com, Inc. and its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the -# License. A copy of the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES -# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and -# limitations under the License. - -require "spec_helper" - -describe "aws-parallelcluster-environment::config_cfn_hup" do - AWS_REGION = "AWS_REGION" - AWS_DOMAIN = "AWS_DOMAIN" - STACK_ID = "STACK_ID" - CLOUDFORMATION_URL = "https://cloudformation.#{AWS_REGION}.#{AWS_DOMAIN}" - INSTANCE_ROLE_NAME = "INSTANCE_ROLE_NAME" - LAUNCH_TEMPLATE_ID = "LAUNCH_TEMPLATE_ID" - - for_all_oses do |platform, version| - context "on #{platform}#{version}" do - for_all_node_types do |node_type| - context "when #{node_type}" do - cached(:chef_run) do - runner = runner(platform: platform, version: version) do |node| - allow_any_instance_of(Object).to receive(:get_metadata_token).and_return("IMDS_TOKEN") - allow_any_instance_of(Object).to receive(:get_metadata_with_token) - .with("IMDS_TOKEN", URI("http://169.254.169.254/latest/meta-data/iam/security-credentials")) - .and_return(INSTANCE_ROLE_NAME) - - node.override["cluster"]["node_type"] = node_type - node.override["cluster"]["region"] = AWS_REGION - node.override["cluster"]["aws_domain"] = AWS_DOMAIN - # TODO: We inject the stack id into the attribute stack_arn when generating the dna.json in the CLI. - # This should be fixed at the CLI level first and adapt the cookbook accordingly. - node.override["cluster"]["stack_arn"] = STACK_ID - node.override["cluster"]["launch_template_id"] = LAUNCH_TEMPLATE_ID - end - runner.converge(described_recipe) - end - cached(:node) { chef_run.node } - - %w(/etc/cfn /etc/cfn/hooks.d).each do |dir| - it "creates the directory #{dir}" do - is_expected.to create_directory(dir).with( - owner: "root", - group: "root", - mode: "0770", - recursive: true - ) - end - end - - it "creates the file /etc/cfn/cfn-hup.conf" do - is_expected.to create_template("/etc/cfn/cfn-hup.conf") - .with(source: 'cfn_bootstrap/cfn-hup.conf.erb') - .with(user: "root") - .with(group: "root") - .with(mode: "0400") - .with(variables: { - stack_id: STACK_ID, - region: AWS_REGION, - cloudformation_url: CLOUDFORMATION_URL, - cfn_init_role: INSTANCE_ROLE_NAME, - }) - end - - it "creates the file /etc/cfn/hooks.d/pcluster-update.conf" do - is_expected.to create_template("/etc/cfn/hooks.d/pcluster-update.conf") - .with(source: 'cfn_bootstrap/cfn-hook-update.conf.erb') - .with(user: "root") - .with(group: "root") - .with(mode: "0400") - .with(variables: { - stack_id: STACK_ID, - region: AWS_REGION, - cloudformation_url: CLOUDFORMATION_URL, - cfn_init_role: INSTANCE_ROLE_NAME, - launch_template_resource_id: LAUNCH_TEMPLATE_ID, - }) - end - end - end - end - end -end diff --git a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_spec.rb b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_spec.rb index e4a80b2679..a0d1402048 100644 --- a/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_spec.rb +++ b/cookbooks/aws-parallelcluster-environment/spec/unit/recipes/config_spec.rb @@ -27,7 +27,6 @@ aws-parallelcluster-environment::raid aws-parallelcluster-environment::efs aws-parallelcluster-environment::fsx - aws-parallelcluster-environment::config_cfn_hup ) @expected_recipes.each do |recipe_name| allow_any_instance_of(Chef::Recipe).to receive(:include_recipe).with(recipe_name) do diff --git a/cookbooks/aws-parallelcluster-environment/spec/unit/resources/cfn_hup_configuration_spec.rb b/cookbooks/aws-parallelcluster-environment/spec/unit/resources/cfn_hup_configuration_spec.rb new file mode 100644 index 0000000000..c522c426bc --- /dev/null +++ b/cookbooks/aws-parallelcluster-environment/spec/unit/resources/cfn_hup_configuration_spec.rb @@ -0,0 +1,119 @@ +require 'spec_helper' + +class ConvergeCfnHupConfiguration + def self.configure(chef_run) + chef_run.converge_dsl('aws-parallelcluster-environment') do + cfn_hup_configuration 'configure' do + action :configure + end + end + end +end + +AWS_REGION = "AWS_REGION".freeze +AWS_DOMAIN = "AWS_DOMAIN".freeze +STACK_ID = "STACK_ID".freeze +CLOUDFORMATION_URL = "https://cloudformation.#{AWS_REGION}.#{AWS_DOMAIN}".freeze +INSTANCE_ROLE_NAME = "INSTANCE_ROLE_NAME".freeze +LAUNCH_TEMPLATE_ID = "LAUNCH_TEMPLATE_ID".freeze +SCRIPT_DIR = "SCRIPT_DIR".freeze +MONITOR_SHARED_DIR = "MONITOR_SHARED_DIR".freeze +NODE_BOOTSTRAP_TIMEOUT = "1800".freeze + +describe 'cfn_hup_configuration:configure' do + for_all_oses do |platform, version| + context "on #{platform}#{version}" do + for_all_node_types do |node_type| + context "when #{node_type}" do + cached(:chef_run) do + runner = runner(platform: platform, version: version, step_into: ['cfn_hup_configuration']) do |node| + allow_any_instance_of(Object).to receive(:get_metadata_token).and_return("IMDS_TOKEN") + allow_any_instance_of(Object).to receive(:get_metadata_with_token) + .with("IMDS_TOKEN", URI("http://169.254.169.254/latest/meta-data/iam/security-credentials")) + .and_return(INSTANCE_ROLE_NAME) + node.override["cluster"]["node_type"] = node_type + node.override["cluster"]["region"] = AWS_REGION + node.override["cluster"]["aws_domain"] = AWS_DOMAIN + # TODO: We inject the stack id into the attribute stack_arn when generating the dna.json in the CLI. + # This should be fixed at the CLI level first and adapt the cookbook accordingly. + node.override["cluster"]["stack_arn"] = STACK_ID + node.override["cluster"]["launch_template_id"] = LAUNCH_TEMPLATE_ID + node.override['cluster']['scripts_dir'] = SCRIPT_DIR + node.override['cluster']['shared_dir'] = MONITOR_SHARED_DIR + node.override['cluster']['compute_node_bootstrap_timeout'] = NODE_BOOTSTRAP_TIMEOUT + end + ConvergeCfnHupConfiguration.configure(runner) + end + cached(:node) { chef_run.node } + + %w(/etc/cfn /etc/cfn/hooks.d).each do |dir| + it "creates the directory #{dir}" do + is_expected.to create_directory(dir) + .with(owner: 'root') + .with(group: 'root') + .with(mode: "0700") + .with(recursive: true) + end + end + + it "creates the file /etc/cfn/cfn-hup.conf" do + is_expected.to create_template("/etc/cfn/cfn-hup.conf") + .with(source: 'cfn_hup_configuration/cfn-hup.conf.erb') + .with(user: "root") + .with(group: "root") + .with(mode: "0400") + .with(variables: { + stack_id: STACK_ID, + region: AWS_REGION, + cloudformation_url: CLOUDFORMATION_URL, + cfn_init_role: INSTANCE_ROLE_NAME, + }) + end + + it "creates the file /etc/cfn/hooks.d/pcluster-update.conf" do + is_expected.to create_template("/etc/cfn/hooks.d/pcluster-update.conf") + .with(source: 'cfn_hup_configuration/cfn-hook-update.conf.erb') + .with(user: "root") + .with(group: "root") + .with(mode: "0400") + .with(variables: { + stack_id: STACK_ID, + region: AWS_REGION, + cloudformation_url: CLOUDFORMATION_URL, + cfn_init_role: INSTANCE_ROLE_NAME, + launch_template_resource_id: LAUNCH_TEMPLATE_ID, + update_hook_script_dir: SCRIPT_DIR, + node_bootstrap_timeout: NODE_BOOTSTRAP_TIMEOUT, + }) + end + + if %(ComputeFleet).include?(node_type) + it "creates the file #{SCRIPT_DIR}/cfn-hup-update-action.sh" do + is_expected.to create_template("#{SCRIPT_DIR}/cfn-hup-update-action.sh") + .with(source: "cfn_hup_configuration/#{node_type}/cfn-hup-update-action.sh.erb") + .with(user: "root") + .with(group: "root") + .with(mode: "0700") + .with(variables: { + monitor_shared_dir: "#{MONITOR_SHARED_DIR}/dna", + launch_template_resource_id: LAUNCH_TEMPLATE_ID, + }) + end + elsif node_type == 'HeadNode' + it "creates #{SCRIPT_DIR}/share_compute_fleet_dna.py" do + is_expected.to create_if_missing_cookbook_file("#{SCRIPT_DIR}/share_compute_fleet_dna.py") + .with(source: 'cfn_hup_configuration/share_compute_fleet_dna.py') + .with(user: 'root') + .with(group: 'root') + .with(mode: '0700') + end + + it "creates the directory #{MONITOR_SHARED_DIR}/dna" do + is_expected.to create_directory("#{MONITOR_SHARED_DIR}/dna") + end + end + end + end + end + end +end diff --git a/cookbooks/aws-parallelcluster-environment/templates/cfn_hup_configuration/ComputeFleet/cfn-hup-update-action.sh.erb b/cookbooks/aws-parallelcluster-environment/templates/cfn_hup_configuration/ComputeFleet/cfn-hup-update-action.sh.erb new file mode 100644 index 0000000000..648603d6aa --- /dev/null +++ b/cookbooks/aws-parallelcluster-environment/templates/cfn_hup_configuration/ComputeFleet/cfn-hup-update-action.sh.erb @@ -0,0 +1,45 @@ +#!/bin/bash +set -ex + +# This script is invoked by cfn-hup as part of its update hook action. +# This script runs on each node of ComputeFleet to monitor the shared location for latest dna.json and extra.json files and run ParallelCluster Cookbook recipes. +# +# Usage: ./cfn-hup-update-action.sh + +function run_cookbook_recipes() { + LATEST_DNA_LOC=<%= @monitor_shared_dir %> + LATEST_DNA_FILE=$LATEST_DNA_LOC/<%= @launch_template_resource_id %>-dna.json + LATEST_EXTRA_FILE=$LATEST_DNA_LOC/extra.json + + GET_DNA_FILE=true + while $GET_DNA_FILE; do + if [[ -f $LATEST_DNA_FILE ]]; then + GET_DNA_FILE=false + cp $LATEST_DNA_FILE /tmp/dna.json + chown root:root /tmp/dna.json + chmod 000644 /tmp/dna.json + cp $LATEST_EXTRA_FILE /tmp/extra.json + chown root:root /tmp/extra.json + chmod 000644 /tmp/extra.json + mkdir -p /etc/chef/ohai/hints + touch /etc/chef/ohai/hints/ec2.json + jq -s ".[0] * .[1]" /tmp/dna.json /tmp/extra.json > /etc/chef/dna.json || ( echo "jq not installed"; cp /tmp/dna.json /etc/chef/dna.json ) + cd /etc/chef + cinc-client --local-mode --config /etc/chef/client.rb --log_level info --logfile /var/log/chef-client.log --force-formatter --no-color --chef-zero-port 8889 --json-attributes /etc/chef/dna.json --override-runlist aws-parallelcluster-entrypoints::update && /opt/parallelcluster/scripts/fetch_and_run -postupdate + + fi + + sleep 60 + done +} + + +main() { + PATH=/usr/local/bin:/bin:/usr/bin:/opt/aws/bin; + . /etc/parallelcluster/pcluster_cookbook_environment.sh; + echo "We monitor <%= @monitor_shared_dir %> to check for <%= @launch_template_resource_id %>-dna.json has been added and run ParallelCluster cookbook recipes." + run_cookbook_recipes +} + + +main "$@" diff --git a/cookbooks/aws-parallelcluster-environment/templates/cfn_bootstrap/cfn-hook-update.conf.erb b/cookbooks/aws-parallelcluster-environment/templates/cfn_hup_configuration/cfn-hook-update.conf.erb similarity index 64% rename from cookbooks/aws-parallelcluster-environment/templates/cfn_bootstrap/cfn-hook-update.conf.erb rename to cookbooks/aws-parallelcluster-environment/templates/cfn_hup_configuration/cfn-hook-update.conf.erb index 075895042f..d918ab4334 100644 --- a/cookbooks/aws-parallelcluster-environment/templates/cfn_bootstrap/cfn-hook-update.conf.erb +++ b/cookbooks/aws-parallelcluster-environment/templates/cfn_hup_configuration/cfn-hook-update.conf.erb @@ -1,5 +1,11 @@ [parallelcluster-update] triggers=post.update +<% case node['cluster']['node_type'] -%> +<% when 'HeadNode', 'LoginNode' -%> path=Resources.<%= @launch_template_resource_id %>.Metadata.AWS::CloudFormation::Init action=PATH=/usr/local/bin:/bin:/usr/bin:/opt/aws/bin; . /etc/parallelcluster/pcluster_cookbook_environment.sh; $CFN_BOOTSTRAP_VIRTUALENV_PATH/cfn-init -v --stack <%= @stack_id %> --resource <%= @launch_template_resource_id %> --configsets update --region <%= @region %> --url <%= @cloudformation_url %> --role <%= @cfn_init_role %> +<% when 'ComputeFleet' -%> +path=Resources.<%= @launch_template_resource_id %> +action=timeout <%= @node_bootstrap_timeout %> <%= @update_hook_script_dir %>/cfn-hup-update-action.sh +<% end %> runas=root diff --git a/cookbooks/aws-parallelcluster-environment/templates/cfn_bootstrap/cfn-hup-runner.sh.erb b/cookbooks/aws-parallelcluster-environment/templates/cfn_hup_configuration/cfn-hup-runner.sh.erb similarity index 100% rename from cookbooks/aws-parallelcluster-environment/templates/cfn_bootstrap/cfn-hup-runner.sh.erb rename to cookbooks/aws-parallelcluster-environment/templates/cfn_hup_configuration/cfn-hup-runner.sh.erb diff --git a/cookbooks/aws-parallelcluster-environment/templates/cfn_bootstrap/cfn-hup.conf.erb b/cookbooks/aws-parallelcluster-environment/templates/cfn_hup_configuration/cfn-hup.conf.erb similarity index 100% rename from cookbooks/aws-parallelcluster-environment/templates/cfn_bootstrap/cfn-hup.conf.erb rename to cookbooks/aws-parallelcluster-environment/templates/cfn_hup_configuration/cfn-hup.conf.erb diff --git a/cookbooks/aws-parallelcluster-environment/test/controls/cfn_hup_configuration_spec.rb b/cookbooks/aws-parallelcluster-environment/test/controls/cfn_hup_configuration_spec.rb new file mode 100644 index 0000000000..49a3fb7d43 --- /dev/null +++ b/cookbooks/aws-parallelcluster-environment/test/controls/cfn_hup_configuration_spec.rb @@ -0,0 +1,60 @@ +# Copyright:: 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "LICENSE.txt" file accompanying this file. +# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +control 'tag:config_cfn_hup_conf_files_created' do + title "cfn_hup configuration files and directories should be created" + + %w(/etc/cfn /etc/cfn/hooks.d).each do |dir| + describe directory(dir) do + it { should exist } + its('mode') { should cmp '0700' } + its('owner') { should eq 'root' } + its('group') { should eq 'root' } + end + end + + %w(/etc/cfn/cfn-hup.conf /etc/cfn/hooks.d/pcluster-update.conf).each do |conf_file| + describe file(conf_file) do + it { should exist } + its('mode') { should cmp '0400' } + its('owner') { should eq 'root' } + its('group') { should eq 'root' } + end + end +end + +control 'tag:config_cfn_hup_head_node_configuration' do + title "cfn_hup configuration files and directories for HeadNode should be created" + only_if { instance.head_node? } + + describe file("#{node['cluster']['scripts_dir']}/share_compute_fleet_dna.py") do + it { should exist } + its('mode') { should cmp '0700' } + its('owner') { should eq 'root' } + its('group') { should eq 'root' } + end + + describe directory("#{node['cluster']['shared_dir']}/dna") do + it { should exist } + end +end + +control 'tag:config_cfn_hup_compute_configuration' do + title "cfn_hup configuration files and directories for ComputeFleet should be created" + only_if { instance.compute_node? } + + describe file("#{node['cluster']['scripts_dir']}/cfn-hup-update-action.sh") do + it { should exist } + its('mode') { should cmp '0700' } + its('owner') { should eq 'root' } + its('group') { should eq 'root' } + end +end diff --git a/cookbooks/aws-parallelcluster-platform/recipes/update.rb b/cookbooks/aws-parallelcluster-platform/recipes/update.rb index c8822f66cf..f568b1067c 100644 --- a/cookbooks/aws-parallelcluster-platform/recipes/update.rb +++ b/cookbooks/aws-parallelcluster-platform/recipes/update.rb @@ -15,6 +15,7 @@ # OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and # limitations under the License. +fetch_dna_files "Fetch ComputeFleet's Dna files" fetch_config 'Fetch and load cluster configs' do update true end diff --git a/cookbooks/aws-parallelcluster-platform/resources/fetch_dna_files.rb b/cookbooks/aws-parallelcluster-platform/resources/fetch_dna_files.rb new file mode 100644 index 0000000000..c3c4c5e15b --- /dev/null +++ b/cookbooks/aws-parallelcluster-platform/resources/fetch_dna_files.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +# +# Copyright:: 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the +# License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and +# limitations under the License. + +resource_name :fetch_dna_files +provides :fetch_dna_files +unified_mode true + +property :extra_chef_attribute_location, String, default: '/tmp/extra.json' + +default_action :share + +action :share do + return if on_docker? + return unless node['cluster']['node_type'] == 'HeadNode' + + Chef::Log.info("Share extra.json with ComputeFleet") + ::FileUtils.cp_r(new_resource.extra_chef_attribute_location, "#{node['cluster']['shared_dir']}/dna/extra.json", remove_destination: true) if ::File.exist?(new_resource.extra_chef_attribute_location) + + execute "Run share_compute_fleet_dna.py to get user_data.sh and share dna.json with ComputeFleet" do + command "#{cookbook_virtualenv_path}/bin/python #{node['cluster']['scripts_dir']}/share_compute_fleet_dna.py" \ + " --region #{node['cluster']['region']}" + timeout 30 + retries 10 + retry_delay 90 + end +end + +action :cleanup do + return if on_docker? + return unless node['cluster']['node_type'] == 'HeadNode' + + execute "Cleanup dna.json and extra.json from #{node['cluster']['shared_dir']}/dna" do + command "#{cookbook_virtualenv_path}/bin/python #{node['cluster']['scripts_dir']}/share_compute_fleet_dna.py" \ + " --region #{node['cluster']['region']} --cleanup" + timeout 30 + retries 10 + retry_delay 90 + end +end diff --git a/cookbooks/aws-parallelcluster-platform/spec/unit/resources/fetch_dna_files_spec.rb b/cookbooks/aws-parallelcluster-platform/spec/unit/resources/fetch_dna_files_spec.rb new file mode 100644 index 0000000000..8a6d937814 --- /dev/null +++ b/cookbooks/aws-parallelcluster-platform/spec/unit/resources/fetch_dna_files_spec.rb @@ -0,0 +1,78 @@ +require 'spec_helper' + +class ConvergeFetchDnaFiles + def self.share(chef_run, extra_chef_attribute_location: nil) + chef_run.converge_dsl('aws-parallelcluster-platform') do + fetch_dna_files 'share' do + extra_chef_attribute_location extra_chef_attribute_location + action :share + end + end + end + + def self.cleanup(chef_run) + chef_run.converge_dsl('aws-parallelcluster-platform') do + fetch_dna_files 'cleanup' do + action :cleanup + end + end + end +end + +describe 'fetch_dna_files resource' do + for_all_oses do |platform, version| + context "on #{platform}#{version}" do + cached(:script_dir) { 'SCRIPT_DIR' } + cached(:shared_dir) { 'SHARED_DIR' } + cached(:region) { 'REGION' } + + context "when we share dna files" do + cached(:chef_run) do + runner = runner(platform: platform, version: version, step_into: ['fetch_dna_files']) do |node| + node.override['cluster']['scripts_dir'] = script_dir + node.override['cluster']['shared_dir'] = shared_dir + node.override['cluster']['node_type'] = 'HeadNode' + node.override['cluster']['region'] = region + node.override['kitchen'] = true + end + ConvergeFetchDnaFiles.share(runner, extra_chef_attribute_location: "#{kitchen_instance_types_data_path}") + end + cached(:node) { chef_run.node } + + # it "it copies data from /tmp/extra.json" do + # is_expected.to create_remote_file("copy extra.json") + # .with(path: "#{shared_dir}/dna/extra.json") + # .with(source: "file://#{kitchen_instance_types_data_path}") + # end + + it 'runs share_compute_fleet_dna.py to get dna files' do + is_expected.to run_execute('Run share_compute_fleet_dna.py to get user_data.sh and share dna.json with ComputeFleet').with( + command: "#{cookbook_virtualenv_path}/bin/python #{node['cluster']['scripts_dir']}/share_compute_fleet_dna.py" \ + " --region #{node['cluster']['region']}" + ) + end + end + + context "when we cleanup dna files" do + cached(:chef_run) do + runner = runner(platform: platform, version: version, step_into: ['fetch_dna_files']) do |node| + node.override['cluster']['scripts_dir'] = script_dir + node.override['cluster']['shared_dir'] = shared_dir + node.override['cluster']['node_type'] = 'HeadNode' + node.override['cluster']['region'] = region + end + allow_any_instance_of(Object).to receive(:aws_domain).and_return(aws_domain) + ConvergeFetchDnaFiles.cleanup(runner) + end + cached(:node) { chef_run.node } + + it 'cleanups dna files' do + is_expected.to run_execute("Cleanup dna.json and extra.json from #{node['cluster']['shared_dir']}/dna").with( + command: "#{cookbook_virtualenv_path}/bin/python #{node['cluster']['scripts_dir']}/share_compute_fleet_dna.py" \ + " --region #{node['cluster']['region']} --cleanup" + ) + end + end + end + end +end diff --git a/cookbooks/aws-parallelcluster-slurm/Berksfile b/cookbooks/aws-parallelcluster-slurm/Berksfile index 8c9bf7fb1e..dcbf3cf120 100644 --- a/cookbooks/aws-parallelcluster-slurm/Berksfile +++ b/cookbooks/aws-parallelcluster-slurm/Berksfile @@ -5,6 +5,7 @@ metadata cookbook "aws-parallelcluster-computefleet", path: "../aws-parallelcluster-computefleet" cookbook "aws-parallelcluster-environment", path: "../aws-parallelcluster-environment" cookbook "aws-parallelcluster-shared", path: "../aws-parallelcluster-shared" +cookbook "aws-parallelcluster-platform", path: "../aws-parallelcluster-platform" cookbook "iptables", path: "../third-party/iptables-8.0.0" cookbook "line", path: "../third-party/line-4.5.21" diff --git a/cookbooks/aws-parallelcluster-slurm/metadata.rb b/cookbooks/aws-parallelcluster-slurm/metadata.rb index 31311ddf2f..4f3f227988 100644 --- a/cookbooks/aws-parallelcluster-slurm/metadata.rb +++ b/cookbooks/aws-parallelcluster-slurm/metadata.rb @@ -18,3 +18,4 @@ depends 'aws-parallelcluster-computefleet', '~> 3.13.0' depends 'aws-parallelcluster-environment', '~> 3.13.0' depends 'aws-parallelcluster-shared', '~> 3.13.0' +depends 'aws-parallelcluster-platform', '~> 3.13.0' diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/update/update_head_node.rb b/cookbooks/aws-parallelcluster-slurm/recipes/update/update_head_node.rb index 9358dd0e78..7a131f706a 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/update/update_head_node.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/update/update_head_node.rb @@ -281,3 +281,7 @@ def update_nodes_in_queue(strategy, queues) cookbook 'aws-parallelcluster-environment' mode '0644' end + +fetch_dna_files 'Cleanup' do + action :cleanup +end diff --git a/pytest.ini b/pytest.ini index 6629cd67a8..e60f70bc63 100644 --- a/pytest.ini +++ b/pytest.ini @@ -13,6 +13,7 @@ pythonpath = cookbooks/aws-parallelcluster-platform/files/dcv cookbooks/aws-parallelcluster-environment/files/cloudwatch + cookbooks/aws-parallelcluster-environment/files/cfn_hup_configuration cookbooks/aws-parallelcluster-computefleet/files/compute_fleet_status cookbooks/aws-parallelcluster-computefleet/files/clusterstatusmgtd cookbooks/aws-parallelcluster-environment/files/custom_action_executor diff --git a/test/unit/cfn_hup_configuration/test_share_compute_fleet_dna.py b/test/unit/cfn_hup_configuration/test_share_compute_fleet_dna.py new file mode 100644 index 0000000000..cc85c2c098 --- /dev/null +++ b/test/unit/cfn_hup_configuration/test_share_compute_fleet_dna.py @@ -0,0 +1,175 @@ +# Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and +# limitations under the License. +import json +import os +from base64 import b64encode +from unittest.mock import MagicMock, patch + +import boto3 +import pytest +from assertpy import assert_that +from botocore.stub import Stubber +from share_compute_fleet_dna import ( + get_compute_launch_template_ids, + get_user_data, + get_write_directives_section, + parse_proxy_config, +) + + +@pytest.mark.parametrize( + ("launch_template_config_content", "errors"), + [ + ( + """ + { + "Queues": { + "queue-0": { + "ComputeResources": { + "compute-resource-1": { + "LaunchTemplate": { + "Version": "1", + "Id": "lt-037123456747c3bc5" + } + }, + "compute-resource-2": { + "LaunchTemplate": { + "Version": "1", + "Id": "lt-0fcecb59a3721c0b3" + } + }, + "compute-resource-0": { + "LaunchTemplate": { + "Version": "1", + "Id": "lt-12345678901234567" + } + } + } + } + } + } + """, + False, + ), + ('{"Queues":{"queue-0":}}}', True), + ], +) +def test_get_compute_launch_template_ids(mocker, launch_template_config_content, errors): + mocker.patch("builtins.open", mocker.mock_open(read_data=launch_template_config_content)) + actual_op = get_compute_launch_template_ids(launch_template_config_content) + if errors: + assert_that(actual_op).is_none() + else: + assert_that(actual_op).is_equal_to(json.loads(launch_template_config_content)) + + +@pytest.mark.parametrize( + ("mime_user_data_file", "write_section"), + [ + ( + "user_data_1.txt", + [ + { + "path": "/tmp/dna.json", # nosec B108 + "permissions": "0644", + "owner": "root:root", + "content": '{"cluster":{"base_os":"alinux2","cluster_name":"clustername",' + '"directory_service":{"domain_read_only_user":"","enabled":"false",' + '"generate_ssh_keys_for_users":"false"},' + '"launch_template_id":"LoginNodeLaunchTemplate2736fab291f04e69"}}\n', + }, + { + "path": "/tmp/extra.json", # nosec B108 + "permissions": "0644", + "owner": "root:root", + "content": "{}\n", + }, + { + "path": "/tmp/bootstrap.sh", # nosec B108 + "permissions": "0744", + "owner": "root:root", + "content": '#!/bin/bash -x\n\nfunction error_exit\n{\n echo "Bootstrap failed"\n}\n', + }, + ], + ), + ( + "user_data_2.txt", + [ + { + "content": '{"cluster":{"base_os":"alinux2"}}\n', + "owner": "root:root", + "path": "/tmp/dna.json", # nosec B108 + "permissions": "0644", + }, + { + "content": '{"cluster": {"nvidia": {"enabled": "yes" }, "is_official_ami_build": "true"}}\n', + "owner": "root:root", + "path": "/tmp/extra.json", # nosec B108 + "permissions": "0644", + }, + { + "content": '#!/bin/bash -x\n\necho "Bootstrap failed with error: $1"\n', + "owner": "root:root", + "path": "/tmp/bootstrap.sh", # nosec B108 + "permissions": "0744", + }, + ], + ), + ("", None), + ], +) +def test_get_write_directives_section(mime_user_data_file, write_section, test_datadir): + input_mime_user_data = None + if mime_user_data_file: + with open(os.path.join(test_datadir, mime_user_data_file), "r", encoding="utf-8") as file: + input_mime_user_data = file.read().strip() + + assert_that(get_write_directives_section(input_mime_user_data)).is_equal_to(write_section) + + +@pytest.mark.parametrize(("error", "proxy", "port"), [(True, "myproxy.com", "8080"), (False, "", "")]) +def test_parse_proxy_config(error, proxy, port): + mock_config = MagicMock(return_value=error) + mock_config.get.side_effect = [proxy, port] + expected_op = {"https": proxy + ":" + port} + with patch("configparser.RawConfigParser", return_value=mock_config): + assert_that(parse_proxy_config().proxies).is_equal_to(expected_op) + + +def ec2_describe_launch_template_versions_mock(response, lt_id, lt_version): + e2_client = boto3.client("ec2", region_name="us-east-1") + stubber = Stubber(e2_client) + stubber.add_response( + "describe_launch_template_versions", response, {"LaunchTemplateId": lt_id, "Versions": [lt_version]} + ) + stubber.activate() + return e2_client, stubber + + +@pytest.mark.parametrize( + ("expected_user_data"), + [("#!/bin/bash\necho 'Test'"), ("")], +) +def test_get_user_data(expected_user_data): + lt_id, lt_version = "lt-12345678901234567", "1" + ec2_response = { + "LaunchTemplateVersions": [ + {"LaunchTemplateData": {"UserData": b64encode(expected_user_data.encode()).decode("utf-8")}} + ] + } + + ec2_client, stubber = ec2_describe_launch_template_versions_mock(ec2_response, lt_id, lt_version) + + with patch("boto3.client") as mock_client: + mock_client.return_value = ec2_client + with stubber: + assert_that(get_user_data(lt_id, lt_version, "us-east-1")).is_equal_to(expected_user_data) + stubber.deactivate() diff --git a/test/unit/cfn_hup_configuration/test_share_compute_fleet_dna/test_get_write_directives_section/user_data_1.txt b/test/unit/cfn_hup_configuration/test_share_compute_fleet_dna/test_get_write_directives_section/user_data_1.txt new file mode 100644 index 0000000000..e8e35affad --- /dev/null +++ b/test/unit/cfn_hup_configuration/test_share_compute_fleet_dna/test_get_write_directives_section/user_data_1.txt @@ -0,0 +1,53 @@ +Content-Type: multipart/mixed; boundary="==BOUNDARY==" +MIME-Version: 1.0 + +--==BOUNDARY== +Content-Type: text/cloud-boothook; charset="us-ascii" +MIME-Version: 1.0 + +#!/bin/bash -x + + which dnf 2>/dev/null; dnf=$? + which yum 2>/dev/null; yum=$? + +--==BOUNDARY== +Content-Type: text/cloud-config; charset=us-ascii +MIME-Version: 1.0 + +bootcmd: + +output: + all: "| tee -a /var/log/cloud-init-output.log | logger -t user-data -s 2>/dev/ttyS0" +write_files: + - path: /tmp/dna.json + permissions: '0644' + owner: root:root + content: | + {"cluster":{"base_os":"alinux2","cluster_name":"clustername","directory_service":{"domain_read_only_user":"","enabled":"false","generate_ssh_keys_for_users":"false"},"launch_template_id":"LoginNodeLaunchTemplate2736fab291f04e69"}} + - path: /tmp/extra.json + permissions: '0644' + owner: root:root + content: | + {} + - path: /tmp/bootstrap.sh + permissions: '0744' + owner: root:root + content: | + #!/bin/bash -x + + function error_exit + { + echo "Bootstrap failed" + } + +--==BOUNDARY== +Content-Type: text/x-shellscript; charset="us-ascii" +MIME-Version: 1.0 + +#!/bin/bash -x + +function error_exit +{ + exit 1 +} +--==BOUNDARY== diff --git a/test/unit/cfn_hup_configuration/test_share_compute_fleet_dna/test_get_write_directives_section/user_data_2.txt b/test/unit/cfn_hup_configuration/test_share_compute_fleet_dna/test_get_write_directives_section/user_data_2.txt new file mode 100644 index 0000000000..b4481a3c70 --- /dev/null +++ b/test/unit/cfn_hup_configuration/test_share_compute_fleet_dna/test_get_write_directives_section/user_data_2.txt @@ -0,0 +1,46 @@ +Content-Type: multipart/mixed; boundary="==BOUNDARY==" +MIME-Version: 1.0 + +--==BOUNDARY== +Content-Type: text/cloud-boothook; charset="us-ascii" +MIME-Version: 1.0 + +#!/bin/bash -x + + which dnf 2>/dev/null; dnf=$? + which yum 2>/dev/null; yum=$? + +--==BOUNDARY== +Content-Type: text/cloud-config; charset=us-ascii +MIME-Version: 1.0 + +write_files: + - path: /tmp/dna.json + permissions: '0644' + owner: root:root + content: | + {"cluster":{"base_os":"alinux2"}} + - path: /tmp/extra.json + permissions: '0644' + owner: root:root + content: | + {"cluster": {"nvidia": {"enabled": "yes" }, "is_official_ami_build": "true"}} + - path: /tmp/bootstrap.sh + permissions: '0744' + owner: root:root + content: | + #!/bin/bash -x + + echo "Bootstrap failed with error: $1" + +--==BOUNDARY== +Content-Type: text/x-shellscript; charset="us-ascii" +MIME-Version: 1.0 + +#!/bin/bash -x +function error_exit +{ + exit 1 +} + +--==BOUNDARY== \ No newline at end of file diff --git a/tox.ini b/tox.ini index 39606116c1..c5b7cb6c57 100644 --- a/tox.ini +++ b/tox.ini @@ -38,6 +38,7 @@ src_dirs = {toxinidir}/cookbooks/aws-parallelcluster-computefleet/files/clusterstatusmgtd \ {toxinidir}/cookbooks/aws-parallelcluster-environment/files/custom_action_executor \ {toxinidir}/cookbooks/aws-parallelcluster-environment/files/default/ec2_udev_rules \ + {toxinidir}/cookbooks/aws-parallelcluster-environment/files/cfn_hup_configuration \ {toxinidir}/cookbooks/aws-parallelcluster-slurm/files/default/head_node_slurm \ {toxinidir}/cookbooks/aws-parallelcluster-slurm/files/default/head_node_checks \ {toxinidir}/cookbooks/aws-parallelcluster-slurm/files/default/config_slurm/scripts