Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions cli/src/pcluster/models/s3_bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ class S3FileType(Enum):
"""Define S3 file types."""

ASSETS = "assets"
HEAD_NODE_DNA_ASSETS = f"{ASSETS}/HeadNode"
COMPUTE_DNA_ASSETS = f"{ASSETS}/ComputeNode"
LOGIN_NODE_DNA_ASSETS = f"{ASSETS}/LoginNode"
CONFIGS = "configs"
TEMPLATES = "templates"
CUSTOM_RESOURCES = "custom_resources"
Expand Down Expand Up @@ -214,6 +217,10 @@ def upload_cfn_asset(self, asset_file_content, asset_name: str, format=S3FileFor
file_type=S3FileType.ASSETS, content=asset_file_content, file_name=asset_name, format=format
)

def upload_dna_cfn_asset(self, asset_file_content, asset_name: str, file_type=S3FileType, format=S3FileFormat.YAML):
"""Upload cloudformation assets to S3 bucket."""
return self.upload_file(file_type=file_type, content=asset_file_content, file_name=asset_name, format=format)

def upload_resources(self, resource_dir, custom_artifacts_name):
"""
Upload custom resources to S3 bucket.
Expand Down
27 changes: 25 additions & 2 deletions cli/src/pcluster/resources/compute_node/user_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,15 @@ datasource_list: [ Ec2, None ]
output:
all: "| tee -a /var/log/cloud-init-output.log | logger -t user-data -s 2>/dev/ttyS0"
write_files:
- path: /tmp/stack-arn.json
permissions: '0644'
owner: root:root
content: |
{
"cluster":{
"stack_arn": "${AWS::StackId}"
}
}
- path: /tmp/bootstrap.sh
permissions: '0744'
owner: root:root
Expand Down Expand Up @@ -93,11 +102,23 @@ write_files:
fi
}

function get_s3_dna_json_files
{
AWS_RETRY_MODE=standard
JSON_FILES="common-dna-${ClusterConfigVersion}.json;common-dna.json ComputeNode/compute-dna-${LaunchTemplateResourceId}-${ClusterConfigVersion}.json;compute-dna.json extra-${ClusterConfigVersion}.json;extra.json"
for file_name_tuple in ${!JSON_FILES[@]}; do
file_names=(${!file_name_tuple//;/ })
s3_prefix_to_download=${!file_names[0]}
tmp_file_name=${!file_names[1]}
S3_RESULT=$(aws s3api get-object --bucket ${S3_BUCKET} --key "${S3_ARTIFACT_DIR}/assets/${!s3_prefix_to_download}" --region ${AWS::Region} /tmp/${!tmp_file_name} 2>&1 ) || error_exit "${!S3_RESULT}"
done
}

export PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/opt/aws/bin

[ -f /etc/parallelcluster/pcluster_cookbook_environment.sh ] && . /etc/parallelcluster/pcluster_cookbook_environment.sh

$CFN_BOOTSTRAP_VIRTUALENV_PATH/cfn-init -s ${AWS::StackName} -v -c deployFiles -r ${LaunchTemplateResourceId} --region ${AWS::Region} --url ${CloudFormationUrl} --role ${CfnInitRole} || error_exit 'Failed to bootstrap the compute node. Please check /var/log/cfn-init.log in the compute node or in CloudWatch logs. Please refer to https://docs.aws.amazon.com/parallelcluster/latest/ug/troubleshooting-v3.html#troubleshooting-v3-get-logs for more details on ParallelCluster logs.'
get_s3_dna_json_files

[ -f /etc/profile.d/proxy.sh ] && . /etc/profile.d/proxy.sh

Expand Down Expand Up @@ -139,9 +160,11 @@ write_files:
vendor_cookbook
fi
cd /tmp
mkdir -p /etc/chef/ohai/hints
touch /etc/chef/ohai/hints/ec2.json

start=$(date +%s)

jq -s ".[0] * .[1] * .[2] * .[3]" /tmp/common-dna.json /tmp/compute-dna.json /tmp/stack-arn.json /tmp/extra.json > /etc/chef/dna.json || ( echo "jq not installed"; cp /tmp/common-dna.json /tmp/compute-dna.json /etc/chef/dna.json )
{
CINC_CMD="cinc-client --local-mode --config /etc/chef/client.rb --log_level info --logfile /var/log/chef-client.log --force-formatter --no-color --chef-zero-port 8889 --json-attributes /etc/chef/dna.json --override-runlist"
FR_CMD="/opt/parallelcluster/scripts/fetch_and_run"
Expand Down
24 changes: 23 additions & 1 deletion cli/src/pcluster/templates/cdk_builder_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,13 @@ def _build_policy(self) -> List[iam.PolicyStatement]:
iam.PolicyStatement(
sid="ResourcesS3Bucket",
effect=iam.Effect.ALLOW,
actions=["s3:GetObject", "s3:GetObjectVersion", "s3:GetBucketLocation", "s3:ListBucket"],
actions=[
"s3:GetObject",
"s3:PutObject",
"s3:GetObjectVersion",
"s3:GetBucketLocation",
"s3:ListBucket",
],
resources=[
self._format_arn(service="s3", resource=self._cluster_bucket.name, region="", account=""),
self._format_arn(
Expand Down Expand Up @@ -997,7 +1003,9 @@ def __init__(
node: Union[HeadNode, BaseQueue, LoginNodesPool],
shared_storage_infos: dict,
name: str,
cluster_bucket: S3Bucket,
):
self._cluster_bucket = cluster_bucket
super().__init__(scope, id, config, node, shared_storage_infos, name)

def _build_policy(self) -> List[iam.PolicyStatement]:
Expand All @@ -1023,6 +1031,20 @@ def _build_policy(self) -> List[iam.PolicyStatement]:
)
],
),
iam.PolicyStatement(
sid="S3GetLaunchTemplate",
actions=["s3:GetObject", "s3:ListBucket"],
effect=iam.Effect.ALLOW,
resources=[
self._format_arn(service="s3", resource=self._cluster_bucket.name, region="", account=""),
self._format_arn(
service="s3",
resource=f"{self._cluster_bucket.name}/{self._cluster_bucket.artifact_directory}/*",
region="",
account="",
),
],
),
iam.PolicyStatement(
sid="CloudFormation",
actions=[
Expand Down
84 changes: 53 additions & 31 deletions cli/src/pcluster/templates/cluster_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
PCLUSTER_S3_ARTIFACTS_DICT,
SLURM_PORTS_RANGE,
)
from pcluster.models.s3_bucket import S3Bucket
from pcluster.models.s3_bucket import S3Bucket, S3FileFormat, S3FileType
from pcluster.templates.awsbatch_builder import AwsBatchConstruct
from pcluster.templates.cdk_builder_utils import (
CdkLaunchTemplateBuilder,
Expand Down Expand Up @@ -1256,12 +1256,10 @@ def _add_head_node(self):
head_node_launch_template.add_metadata("Comment", "AWS ParallelCluster Head Node")
# CloudFormation::Init metadata

dna_json = json.dumps(
common_dna_json = json.dumps(
{
"cluster": {
"stack_name": self._stack_name,
"stack_arn": self.stack.stack_id,
"raid_vol_ids": get_shared_storage_ids_by_type(self.shared_storage_infos, SharedStorageType.RAID),
"raid_shared_dir": to_comma_separated_string(
self.shared_storage_mount_dirs[SharedStorageType.RAID]
),
Expand Down Expand Up @@ -1306,49 +1304,28 @@ def _add_head_node(self):
self.shared_storage_attributes[SharedStorageType.FSX]["FileSystemTypes"]
),
"fsx_shared_dirs": to_comma_separated_string(self.shared_storage_mount_dirs[SharedStorageType.FSX]),
"volume": get_shared_storage_ids_by_type(self.shared_storage_infos, SharedStorageType.EBS),
"scheduler": self.config.scheduling.scheduler,
"ephemeral_dir": (
head_node.local_storage.ephemeral_volume.mount_dir
if head_node.local_storage.ephemeral_volume
else DEFAULT_EPHEMERAL_DIR
),
"ebs_shared_dirs": to_comma_separated_string(self.shared_storage_mount_dirs[SharedStorageType.EBS]),
"proxy": head_node.networking.proxy.http_proxy_address if head_node.networking.proxy else "NONE",
"node_type": "HeadNode",
"cluster_user": OS_MAPPING[self.config.image.os]["user"],
"ddb_table": self.dynamodb_table_status.ref if not self._condition_is_batch() else "NONE",
"log_group_name": (
self.log_group.log_group_name if self.config.monitoring.logs.cloud_watch.enabled else "NONE"
),
"dcv_enabled": "head_node" if self.config.is_dcv_enabled else "false",
"dcv_port": head_node.dcv.port if head_node.dcv else "NONE",
"enable_intel_hpc_platform": "true" if self.config.is_intel_hpc_platform_enabled else "false",
"cw_logging_enabled": "true" if self.config.is_cw_logging_enabled else "false",
"log_rotation_enabled": "true" if self.config.is_log_rotation_enabled else "false",
"cluster_s3_bucket": self.bucket.name,
"cluster_config_s3_key": "{0}/configs/{1}".format(
self.bucket.artifact_directory, PCLUSTER_S3_ARTIFACTS_DICT.get("config_name")
),
"cluster_config_version": self.config.config_version,
"instance_types_data_version": self.config.instance_types_data_version,
"change_set_s3_key": f"{self.bucket.artifact_directory}/configs/"
f"{PCLUSTER_S3_ARTIFACTS_DICT.get('change_set_name')}",
"instance_types_data_s3_key": f"{self.bucket.artifact_directory}/configs/"
f"{PCLUSTER_S3_ARTIFACTS_DICT.get('instance_types_data_name')}",
"custom_node_package": self.config.custom_node_package or "",
"custom_awsbatchcli_package": self.config.custom_aws_batch_cli_package or "",
"head_node_imds_secured": str(self.config.head_node.imds.secured).lower(),
"compute_node_bootstrap_timeout": get_attr(
self.config, "dev_settings.timeouts.compute_node_bootstrap_timeout", NODE_BOOTSTRAP_TIMEOUT
),
"head_node_private_ip": "HEAD_NODE_PRIVATE_IP",
"disable_sudo_access_for_default_user": (
"true"
if self.config.deployment_settings
and self.config.deployment_settings.disable_sudo_access_default_user
else "false"
),
"launch_template_id": launch_template_id,
**(
get_slurm_specific_dna_json_for_head_node(self.config, self.scheduler_resources)
if self._condition_is_slurm()
Expand All @@ -1359,7 +1336,44 @@ def _add_head_node(self):
},
indent=4,
)

head_node_specific_dna_json = json.dumps(
{
"cluster": {
"stack_arn": self.stack.stack_id,
"raid_vol_ids": get_shared_storage_ids_by_type(self.shared_storage_infos, SharedStorageType.RAID),
"volume": get_shared_storage_ids_by_type(self.shared_storage_infos, SharedStorageType.EBS),
"ephemeral_dir": (
head_node.local_storage.ephemeral_volume.mount_dir
if head_node.local_storage.ephemeral_volume
else DEFAULT_EPHEMERAL_DIR
),
"proxy": head_node.networking.proxy.http_proxy_address if head_node.networking.proxy else "NONE",
"node_type": "HeadNode",
"ddb_table": self.dynamodb_table_status.ref if not self._condition_is_batch() else "NONE",
"dcv_enabled": "head_node" if self.config.is_dcv_enabled else "false",
"dcv_port": head_node.dcv.port if head_node.dcv else "NONE",
"common_dna_s3_key": f"{self.bucket.artifact_directory}/assets/"
f"common-dna-{self.config.config_version}.json",
"instance_types_data_version": self.config.instance_types_data_version,
"change_set_s3_key": f"{self.bucket.artifact_directory}/configs/"
f"{PCLUSTER_S3_ARTIFACTS_DICT.get('change_set_name')}",
"instance_types_data_s3_key": f"{self.bucket.artifact_directory}/configs/"
f"{PCLUSTER_S3_ARTIFACTS_DICT.get('instance_types_data_name')}",
"head_node_imds_secured": str(self.config.head_node.imds.secured).lower(),
"compute_node_bootstrap_timeout": get_attr(
self.config, "dev_settings.timeouts.compute_node_bootstrap_timeout", NODE_BOOTSTRAP_TIMEOUT
),
"launch_template_id": launch_template_id,
},
},
indent=4,
)
self.bucket.upload_dna_cfn_asset(
file_type=S3FileType.ASSETS,
asset_file_content=json.loads(self.config.extra_chef_attributes),
asset_name=f"extra-{self.config.config_version}.json",
format=S3FileFormat.JSON,
)
cfn_init = {
"configSets": {
"deployFiles": ["deployConfigFiles"],
Expand All @@ -1377,8 +1391,15 @@ def _add_head_node(self):
# A nosec comment is appended to the following line in order to disable the B108 check.
# The file is needed by the product
# [B108:hardcoded_tmp_directory] Probable insecure usage of temp file/directory.
"/tmp/dna.json": { # nosec B108
"content": dna_json,
"/tmp/head-node-dna.json": { # nosec B108
"content": head_node_specific_dna_json,
"mode": "000644",
"owner": "root",
"group": "root",
"encoding": "plain",
},
"/tmp/common-dna.json": { # nosec B108
"content": common_dna_json,
"mode": "000644",
"owner": "root",
"group": "root",
Expand Down Expand Up @@ -1408,8 +1429,9 @@ def _add_head_node(self):
"touch": {"command": "touch /etc/chef/ohai/hints/ec2.json"},
"jq": {
"command": (
'jq -s ".[0] * .[1]" /tmp/dna.json /tmp/extra.json > /etc/chef/dna.json '
'|| ( echo "jq not installed"; cp /tmp/dna.json /etc/chef/dna.json )'
'jq -s ".[0] * .[1] * .[2]" /tmp/common-dna.json /tmp/head-node-dna.json /tmp/extra.json '
'> /etc/chef/dna.json || ( echo "jq not installed"; cp /tmp/common-dna.json '
"/tmp/head-node-dna.json /etc/chef/dna.json )"
)
},
},
Expand Down
Loading
Loading