Skip to content

Commit 91a8f26

Browse files
authored
fix: switch to amazon linux 2 amis & fix final snapshot identifier (#4)
* fix: switch to amazon linux 2 which has single volume * add separate gpu launch template
1 parent 3d615a0 commit 91a8f26

File tree

4 files changed

+54
-12
lines changed

4 files changed

+54
-12
lines changed

aws/terraform/modules/metaflow/modules/computation/batch.tf

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ resource "aws_batch_compute_environment" "cpu" {
3333
and this compute environment will not have to be destroyed and then created to point to a new Launch Template.
3434
*/
3535
launch_template {
36-
launch_template_id = aws_launch_template.this.id
37-
version = aws_launch_template.this.latest_version
36+
launch_template_id = aws_launch_template.cpu.id
37+
version = aws_launch_template.cpu.latest_version
3838
}
3939

4040
# Security group to apply to the instances launched.
@@ -100,8 +100,8 @@ resource "aws_batch_compute_environment" "large-cpu" {
100100
and this compute environment will not have to be destroyed and then created to point to a new Launch Template.
101101
*/
102102
launch_template {
103-
launch_template_id = aws_launch_template.this.id
104-
version = aws_launch_template.this.latest_version
103+
launch_template_id = aws_launch_template.cpu.id
104+
version = aws_launch_template.cpu.latest_version
105105
}
106106

107107
# Security group to apply to the instances launched.
@@ -167,8 +167,8 @@ resource "aws_batch_compute_environment" "gpu" {
167167
and this compute environment will not have to be destroyed and then created to point to a new Launch Template.
168168
*/
169169
launch_template {
170-
launch_template_id = aws_launch_template.this.id
171-
version = aws_launch_template.this.latest_version
170+
launch_template_id = aws_launch_template.gpu.id
171+
version = aws_launch_template.gpu.latest_version
172172
}
173173

174174
# Security group to apply to the instances launched.
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,9 @@
11
data "aws_region" "current" {}
2+
3+
data "aws_ssm_parameter" "ecs_optimized_cpu_ami" {
4+
name = "/aws/service/ecs/optimized-ami/amazon-linux-2/recommended"
5+
}
6+
7+
data "aws_ssm_parameter" "ecs_optimized_gpu_ami" {
8+
name = "/aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended"
9+
}

aws/terraform/modules/metaflow/modules/computation/ec2.tf

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,50 @@
1-
resource "aws_launch_template" "this" {
1+
resource "aws_launch_template" "cpu" {
22
/* To provide a large disk space than the default 8GB for AWS Batch.
33
AWS Batch points to this using the latest version, so we can update the disk size here
44
and AWS Batch will use that.
55
6-
This is used for all Metaflow AWS Batch remote jobs.
6+
This is used for all Metaflow AWS CPU Batch remote jobs.
77
*/
8-
name = "${var.resource_prefix}batch-launch-template-100gb${var.resource_suffix}"
8+
name = "${var.resource_prefix}batch-launch-tmpl-cpu-100gb${var.resource_suffix}"
99

1010
# Defines what IAM Role to assume to grant an EC2 instance
1111
# This role must have a policy to access the kms_key_id used to encrypt the EBS volume
1212
iam_instance_profile {
1313
arn = aws_iam_instance_profile.ecs_instance_role.arn
1414
}
1515

16+
image_id = jsondecode(data.aws_ssm_parameter.ecs_optimized_cpu_ami.value)["image_id"]
17+
18+
block_device_mappings {
19+
device_name = "/dev/xvda"
20+
21+
ebs {
22+
volume_size = 100
23+
delete_on_termination = true
24+
encrypted = true
25+
}
26+
}
27+
28+
tags = var.standard_tags
29+
}
30+
31+
resource "aws_launch_template" "gpu" {
32+
/* To provide a large disk space than the default 8GB for AWS Batch.
33+
AWS Batch points to this using the latest version, so we can update the disk size here
34+
and AWS Batch will use that.
35+
36+
This is used for all Metaflow AWS GPU Batch remote jobs.
37+
*/
38+
name = "${var.resource_prefix}batch-launch-tmpl-gpu-100gb${var.resource_suffix}"
39+
40+
# Defines what IAM Role to assume to grant an EC2 instance
41+
# This role must have a policy to access the kms_key_id used to encrypt the EBS volume
42+
iam_instance_profile {
43+
arn = aws_iam_instance_profile.ecs_instance_role.arn
44+
}
45+
46+
image_id = jsondecode(data.aws_ssm_parameter.ecs_optimized_gpu_ami.value)["image_id"]
47+
1648
block_device_mappings {
1749
device_name = "/dev/xvda"
1850

aws/terraform/modules/metaflow/modules/datastore/rds.tf

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ resource "random_password" "this" {
5151
override_special = "!#$%&*()-_=+[]{}<>:?"
5252
}
5353

54+
resource "random_pet" "final_snapshot_id" {}
55+
5456
/*
5557
Define rds db instance.
5658
*/
@@ -68,9 +70,9 @@ resource "aws_db_instance" "this" {
6870
username = var.db_username
6971
password = random_password.this.result
7072
db_subnet_group_name = aws_db_subnet_group.this.id
71-
max_allocated_storage = 1000 # Upper limit of automatic scaled storage
72-
multi_az = true # Multiple availability zone?
73-
final_snapshot_identifier = "${var.resource_prefix}${var.db_name}-final-snapshot${var.resource_suffix}-${formatdate("YYYYMMMDDhhmm", timestamp())}" # Snapshot upon delete
73+
max_allocated_storage = 1000 # Upper limit of automatic scaled storage
74+
multi_az = true # Multiple availability zone?
75+
final_snapshot_identifier = "${var.resource_prefix}${var.db_name}-final-snapshot${var.resource_suffix}-${random_pet.final_snapshot_id.id}" # Snapshot upon delete
7476
vpc_security_group_ids = [aws_security_group.rds_security_group.id]
7577

7678
tags = merge(

0 commit comments

Comments
 (0)