|
33 | 33 | to_comma_separated_string, |
34 | 34 | ) |
35 | 35 | from pcluster.templates.slurm_builder import SlurmConstruct |
36 | | -from pcluster.utils import get_attr, get_http_tokens_setting, get_resource_name_from_resource_arn, get_service_endpoint |
| 36 | +from pcluster.utils import get_attr, get_http_tokens_setting |
37 | 37 |
|
38 | 38 |
|
39 | 39 | class QueuesStack(NestedStack): |
@@ -184,115 +184,7 @@ def _add_compute_resource_launch_template( |
184 | 184 | if isinstance(compute_resource, SlurmComputeResource): |
185 | 185 | conditional_template_properties.update({"instance_type": compute_resource.instance_types[0]}) |
186 | 186 |
|
187 | | - if queue.instance_profile: |
188 | | - instance_profile_name = get_resource_name_from_resource_arn(queue.instance_profile) |
189 | | - instance_role_name = ( |
190 | | - AWSApi.instance() |
191 | | - .iam.get_instance_profile(instance_profile_name) |
192 | | - .get("InstanceProfile") |
193 | | - .get("Roles")[0] |
194 | | - .get("RoleName") |
195 | | - ) |
196 | | - elif queue.instance_role: |
197 | | - instance_role_name = get_resource_name_from_resource_arn(queue.instance_role) |
198 | | - else: |
199 | | - instance_role_name = self.managed_compute_instance_roles[queue.name].ref |
200 | | - |
201 | 187 | launch_template_id = f"LaunchTemplate{create_hash_suffix(queue.name + compute_resource.name)}" |
202 | | - launch_template = ec2.CfnLaunchTemplate( |
203 | | - self, |
204 | | - launch_template_id, |
205 | | - launch_template_name=f"{self.stack_name}-{queue.name}-{compute_resource.name}", |
206 | | - launch_template_data=ec2.CfnLaunchTemplate.LaunchTemplateDataProperty( |
207 | | - block_device_mappings=self._launch_template_builder.get_block_device_mappings( |
208 | | - queue.compute_settings.local_storage.root_volume, |
209 | | - AWSApi.instance().ec2.describe_image(self._config.image_dict[queue.name]).device_name, |
210 | | - ), |
211 | | - # key_name=, |
212 | | - network_interfaces=compute_lt_nw_interfaces, |
213 | | - placement=ec2.CfnLaunchTemplate.PlacementProperty(group_name=placement_group), |
214 | | - image_id=self._config.image_dict[queue.name], |
215 | | - iam_instance_profile=ec2.CfnLaunchTemplate.IamInstanceProfileProperty( |
216 | | - name=instance_profiles[queue.name] |
217 | | - ), |
218 | | - instance_market_options=self._launch_template_builder.get_instance_market_options( |
219 | | - queue, compute_resource |
220 | | - ), |
221 | | - instance_initiated_shutdown_behavior="terminate", |
222 | | - capacity_reservation_specification=self._launch_template_builder.get_capacity_reservation( |
223 | | - queue, |
224 | | - compute_resource, |
225 | | - ), |
226 | | - metadata_options=ec2.CfnLaunchTemplate.MetadataOptionsProperty( |
227 | | - http_tokens=get_http_tokens_setting(self._config.imds.imds_support) |
228 | | - ), |
229 | | - user_data=Fn.base64( |
230 | | - Fn.sub( |
231 | | - get_user_data_content("../resources/compute_node/user_data.sh"), |
232 | | - { |
233 | | - **{ |
234 | | - # Disable multithreading using logic from |
235 | | - # https://aws.amazon.com/blogs/compute/disabling-intel-hyper-threading-technology-on-amazon-linux/ |
236 | | - # thread_siblings_list contains a comma (,) or dash (-) separated list of CPU hardware |
237 | | - # threads within the same core as cpu |
238 | | - # e.g. 0-1 or 0,1 |
239 | | - # cat /sys/devices/system/cpu/cpu*/topology/thread_siblings_list |
240 | | - # | tr '-' ',' # convert hyphen (-) to comma (,), to account that |
241 | | - # # some kernels and CPU architectures use a hyphen |
242 | | - # # instead of a comma |
243 | | - # | cut -s -d, -f2- # split over comma (,) and take the right part |
244 | | - # | tr ',' '\n' # convert remaining comma (,) into new lines |
245 | | - # | sort -un # sort and unique |
246 | | - "DisableMultiThreadingManually": ( |
247 | | - "true" if compute_resource.disable_simultaneous_multithreading_manually else "false" |
248 | | - ), |
249 | | - "BaseOS": self._config.image.os, |
250 | | - "OSUser": OS_MAPPING[self._config.image.os]["user"], |
251 | | - "ClusterName": self.stack_name, |
252 | | - "Timeout": str( |
253 | | - get_attr( |
254 | | - self._config, |
255 | | - "dev_settings.timeouts.compute_node_bootstrap_timeout", |
256 | | - NODE_BOOTSTRAP_TIMEOUT, |
257 | | - ) |
258 | | - ), |
259 | | - "ComputeStartupTimeMetricEnabled": str( |
260 | | - get_attr( |
261 | | - self._config, |
262 | | - "dev_settings.compute_startup_time_metric_enabled", |
263 | | - default=False, |
264 | | - ) |
265 | | - ), |
266 | | - "LaunchTemplateResourceId": launch_template_id, |
267 | | - "CloudFormationUrl": get_service_endpoint("cloudformation", self._config.region), |
268 | | - "CfnInitRole": instance_role_name, |
269 | | - }, |
270 | | - **get_common_user_data_env(queue, self._config), |
271 | | - }, |
272 | | - ) |
273 | | - ), |
274 | | - monitoring=ec2.CfnLaunchTemplate.MonitoringProperty(enabled=is_detailed_monitoring_enabled), |
275 | | - tag_specifications=[ |
276 | | - ec2.CfnLaunchTemplate.TagSpecificationProperty( |
277 | | - resource_type="instance", |
278 | | - tags=get_default_instance_tags( |
279 | | - self.stack_name, self._config, compute_resource, "Compute", self._shared_storage_infos |
280 | | - ) |
281 | | - + [CfnTag(key=PCLUSTER_QUEUE_NAME_TAG, value=queue.name)] |
282 | | - + [CfnTag(key=PCLUSTER_COMPUTE_RESOURCE_NAME_TAG, value=compute_resource.name)] |
283 | | - + self._get_custom_compute_resource_tags(queue, compute_resource), |
284 | | - ), |
285 | | - ec2.CfnLaunchTemplate.TagSpecificationProperty( |
286 | | - resource_type="volume", |
287 | | - tags=get_default_volume_tags(self.stack_name, "Compute") |
288 | | - + [CfnTag(key=PCLUSTER_QUEUE_NAME_TAG, value=queue.name)] |
289 | | - + [CfnTag(key=PCLUSTER_COMPUTE_RESOURCE_NAME_TAG, value=compute_resource.name)] |
290 | | - + self._get_custom_compute_resource_tags(queue, compute_resource), |
291 | | - ), |
292 | | - ], |
293 | | - **conditional_template_properties, |
294 | | - ), |
295 | | - ) |
296 | 188 |
|
297 | 189 | dna_json = json.dumps( |
298 | 190 | { |
@@ -397,64 +289,99 @@ def _add_compute_resource_launch_template( |
397 | 289 | "launch_template_id": launch_template_id, |
398 | 290 | } |
399 | 291 | }, |
400 | | - indent=4, |
| 292 | + indent=None, # Keep indent as None for compact sizing and proper parsing in user_data.sh |
401 | 293 | ) |
402 | 294 |
|
403 | | - cfn_init = { |
404 | | - "configSets": { |
405 | | - "deployFiles": ["deployConfigFiles"], |
406 | | - "update": ["deployConfigFiles", "chefUpdate"], |
407 | | - }, |
408 | | - "deployConfigFiles": { |
409 | | - "files": { |
410 | | - # A nosec comment is appended to the following line in order to disable the B108 check. |
411 | | - # The file is needed by the product |
412 | | - # [B108:hardcoded_tmp_directory] Probable insecure usage of temp file/directory. |
413 | | - "/tmp/dna.json": { # nosec B108 |
414 | | - "content": dna_json, |
415 | | - "mode": "000644", |
416 | | - "owner": "root", |
417 | | - "group": "root", |
418 | | - "encoding": "plain", |
419 | | - }, |
420 | | - # A nosec comment is appended to the following line in order to disable the B108 check. |
421 | | - # The file is needed by the product |
422 | | - # [B108:hardcoded_tmp_directory] Probable insecure usage of temp file/directory. |
423 | | - "/tmp/extra.json": { # nosec B108 |
424 | | - "mode": "000644", |
425 | | - "owner": "root", |
426 | | - "group": "root", |
427 | | - "content": self._config.extra_chef_attributes, |
428 | | - }, |
429 | | - }, |
430 | | - "commands": { |
431 | | - "mkdir": {"command": "mkdir -p /etc/chef/ohai/hints"}, |
432 | | - "touch": {"command": "touch /etc/chef/ohai/hints/ec2.json"}, |
433 | | - "jq": { |
434 | | - "command": ( |
435 | | - 'jq -s ".[0] * .[1]" /tmp/dna.json /tmp/extra.json > /etc/chef/dna.json ' |
436 | | - '|| ( echo "jq not installed"; cp /tmp/dna.json /etc/chef/dna.json )' |
| 295 | + launch_template = ec2.CfnLaunchTemplate( |
| 296 | + self, |
| 297 | + launch_template_id, |
| 298 | + launch_template_name=f"{self.stack_name}-{queue.name}-{compute_resource.name}", |
| 299 | + launch_template_data=ec2.CfnLaunchTemplate.LaunchTemplateDataProperty( |
| 300 | + block_device_mappings=self._launch_template_builder.get_block_device_mappings( |
| 301 | + queue.compute_settings.local_storage.root_volume, |
| 302 | + AWSApi.instance().ec2.describe_image(self._config.image_dict[queue.name]).device_name, |
| 303 | + ), |
| 304 | + network_interfaces=compute_lt_nw_interfaces, |
| 305 | + placement=ec2.CfnLaunchTemplate.PlacementProperty(group_name=placement_group), |
| 306 | + image_id=self._config.image_dict[queue.name], |
| 307 | + iam_instance_profile=ec2.CfnLaunchTemplate.IamInstanceProfileProperty( |
| 308 | + name=instance_profiles[queue.name] |
| 309 | + ), |
| 310 | + instance_market_options=self._launch_template_builder.get_instance_market_options( |
| 311 | + queue, compute_resource |
| 312 | + ), |
| 313 | + instance_initiated_shutdown_behavior="terminate", |
| 314 | + capacity_reservation_specification=self._launch_template_builder.get_capacity_reservation( |
| 315 | + queue, |
| 316 | + compute_resource, |
| 317 | + ), |
| 318 | + metadata_options=ec2.CfnLaunchTemplate.MetadataOptionsProperty( |
| 319 | + http_tokens=get_http_tokens_setting(self._config.imds.imds_support) |
| 320 | + ), |
| 321 | + user_data=Fn.base64( |
| 322 | + Fn.sub( |
| 323 | + get_user_data_content("../resources/compute_node/user_data.sh"), |
| 324 | + { |
| 325 | + **{ |
| 326 | + # Disable multithreading using logic from |
| 327 | + # https://aws.amazon.com/blogs/compute/disabling-intel-hyper-threading-technology-on-amazon-linux/ |
| 328 | + # thread_siblings_list contains a comma (,) or dash (-) separated list of CPU hardware |
| 329 | + # threads within the same core as cpu |
| 330 | + # e.g. 0-1 or 0,1 |
| 331 | + # cat /sys/devices/system/cpu/cpu*/topology/thread_siblings_list |
| 332 | + # | tr '-' ',' # convert hyphen (-) to comma (,), to account that |
| 333 | + # # some kernels and CPU architectures use a hyphen |
| 334 | + # # instead of a comma |
| 335 | + # | cut -s -d, -f2- # split over comma (,) and take the right part |
| 336 | + # | tr ',' '\n' # convert remaining comma (,) into new lines |
| 337 | + # | sort -un # sort and unique |
| 338 | + "DisableMultiThreadingManually": ( |
| 339 | + "true" if compute_resource.disable_simultaneous_multithreading_manually else "false" |
| 340 | + ), |
| 341 | + "BaseOS": self._config.image.os, |
| 342 | + "ClusterName": self.stack_name, |
| 343 | + "Timeout": str( |
| 344 | + get_attr( |
| 345 | + self._config, |
| 346 | + "dev_settings.timeouts.compute_node_bootstrap_timeout", |
| 347 | + NODE_BOOTSTRAP_TIMEOUT, |
| 348 | + ) |
| 349 | + ), |
| 350 | + "ComputeStartupTimeMetricEnabled": str( |
| 351 | + get_attr( |
| 352 | + self._config, |
| 353 | + "dev_settings.compute_startup_time_metric_enabled", |
| 354 | + default=False, |
| 355 | + ) |
| 356 | + ), |
| 357 | + "DnaJson": dna_json, |
| 358 | + "ExtraJson": self._config.extra_chef_attributes, |
| 359 | + }, |
| 360 | + **get_common_user_data_env(queue, self._config), |
| 361 | + }, |
| 362 | + ) |
| 363 | + ), |
| 364 | + monitoring=ec2.CfnLaunchTemplate.MonitoringProperty(enabled=is_detailed_monitoring_enabled), |
| 365 | + tag_specifications=[ |
| 366 | + ec2.CfnLaunchTemplate.TagSpecificationProperty( |
| 367 | + resource_type="instance", |
| 368 | + tags=get_default_instance_tags( |
| 369 | + self.stack_name, self._config, compute_resource, "Compute", self._shared_storage_infos |
437 | 370 | ) |
438 | | - }, |
439 | | - }, |
440 | | - }, |
441 | | - "chefUpdate": { |
442 | | - "commands": { |
443 | | - "chef": { |
444 | | - "command": ( |
445 | | - ". /etc/parallelcluster/pcluster_cookbook_environment.sh; " |
446 | | - "cinc-client --local-mode --config /etc/chef/client.rb --log_level info" |
447 | | - " --logfile /var/log/chef-client.log --force-formatter --no-color" |
448 | | - " --chef-zero-port 8889 --json-attributes /etc/chef/dna.json" |
449 | | - " --override-runlist aws-parallelcluster-entrypoints::update &&" |
450 | | - " /opt/parallelcluster/scripts/fetch_and_run -postupdate" |
451 | | - ), |
452 | | - "cwd": "/etc/chef", |
453 | | - } |
454 | | - } |
455 | | - }, |
456 | | - } |
457 | | - |
458 | | - launch_template.add_metadata("AWS::CloudFormation::Init", cfn_init) |
| 371 | + + [CfnTag(key=PCLUSTER_QUEUE_NAME_TAG, value=queue.name)] |
| 372 | + + [CfnTag(key=PCLUSTER_COMPUTE_RESOURCE_NAME_TAG, value=compute_resource.name)] |
| 373 | + + self._get_custom_compute_resource_tags(queue, compute_resource), |
| 374 | + ), |
| 375 | + ec2.CfnLaunchTemplate.TagSpecificationProperty( |
| 376 | + resource_type="volume", |
| 377 | + tags=get_default_volume_tags(self.stack_name, "Compute") |
| 378 | + + [CfnTag(key=PCLUSTER_QUEUE_NAME_TAG, value=queue.name)] |
| 379 | + + [CfnTag(key=PCLUSTER_COMPUTE_RESOURCE_NAME_TAG, value=compute_resource.name)] |
| 380 | + + self._get_custom_compute_resource_tags(queue, compute_resource), |
| 381 | + ), |
| 382 | + ], |
| 383 | + **conditional_template_properties, |
| 384 | + ), |
| 385 | + ) |
459 | 386 |
|
460 | 387 | return launch_template |
0 commit comments