Skip to content

Commit 04a49fb

Browse files
authored
Harmonize terraform with Cloudformation (#6)
* Harmonize terraform with cloudformation - iam roles - single batch compute - iam_partition variable - api gateway basic auth - db migration functionality - fargate batch type - networking - general cleanup / organization * fix: adding this depends to prevent an issue where compute env cannot be destroyed
1 parent faf055b commit 04a49fb

37 files changed

+1314
-935
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
data "aws_region" "current" {}
2+
3+
data "aws_caller_identity" "current" {}

aws/terraform/modules/metaflow/iam.tf

Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
data "aws_iam_policy_document" "batch_s3_task_role_assume_role" {
2+
statement {
3+
actions = [
4+
"sts:AssumeRole"
5+
]
6+
7+
effect = "Allow"
8+
9+
principals {
10+
identifiers = [
11+
"ecs-tasks.amazonaws.com",
12+
]
13+
type = "Service"
14+
}
15+
}
16+
}
17+
18+
resource "aws_iam_role" "batch_s3_task_role" {
19+
name = local.batch_s3_task_role_name
20+
21+
description = "Role for AWS Batch to Access Amazon S3 [METAFLOW_ECS_S3_ACCESS_IAM_ROLE]"
22+
23+
assume_role_policy = data.aws_iam_policy_document.batch_s3_task_role_assume_role.json
24+
25+
tags = var.tags
26+
}
27+
28+
data "aws_iam_policy_document" "custom_s3_list_batch" {
29+
statement {
30+
sid = "BucketAccessBatch"
31+
actions = [
32+
"s3:ListBucket"
33+
]
34+
35+
effect = "Allow"
36+
37+
resources = [
38+
module.metaflow-datastore.s3_bucket_arn
39+
]
40+
}
41+
}
42+
43+
data "aws_iam_policy_document" "custom_s3_batch" {
44+
statement {
45+
sid = "ObjectAccessBatch"
46+
actions = [
47+
"s3:PutObject",
48+
"s3:GetObject",
49+
"s3:DeleteObject"
50+
]
51+
52+
effect = "Allow"
53+
54+
resources = [
55+
"${module.metaflow-datastore.s3_bucket_arn}/*"
56+
]
57+
}
58+
}
59+
60+
data "aws_iam_policy_document" "s3_kms" {
61+
statement {
62+
effect = "Allow"
63+
64+
# TODO - reduce to Encrypt, Decrypt?
65+
actions = [
66+
"kms:Decrypt",
67+
"kms:Encrypt",
68+
# "kms:ReEncryptTo",
69+
# "kms:ReEncryptFrom",
70+
# "kms:DescribeKey",
71+
# "kms:GenerateDataKey"
72+
]
73+
74+
resources = [
75+
module.metaflow-datastore.datastore_s3_bucket_kms_key_arn
76+
]
77+
}
78+
}
79+
80+
data "aws_iam_policy_document" "deny_presigned_batch" {
81+
statement {
82+
sid = "DenyPresignedBatch"
83+
actions = [
84+
"s3:*"
85+
]
86+
87+
effect = "Deny"
88+
89+
resources = [
90+
"*",
91+
]
92+
93+
condition {
94+
test = "StringNotEquals"
95+
values = [
96+
"REST-HEADER"
97+
]
98+
variable = "s3:authType"
99+
}
100+
}
101+
}
102+
103+
data "aws_iam_policy_document" "allow_sagemaker" {
104+
statement {
105+
sid = "AllowSagemakerCreate"
106+
actions = [
107+
"sagemaker:CreateTrainingJob"
108+
]
109+
110+
effect = "Allow"
111+
112+
resources = [
113+
"arn:${var.iam_partition}:sagemaker:${local.aws_region}:${local.aws_account_id}:training-job/*",
114+
]
115+
}
116+
117+
statement {
118+
sid = "AllowSagemakerDescribe"
119+
actions = [
120+
"sagemaker:DescribeTrainingJob"
121+
]
122+
123+
effect = "Allow"
124+
125+
resources = [
126+
"arn:${var.iam_partition}:sagemaker:${local.aws_region}:${local.aws_account_id}:training-job/*",
127+
]
128+
}
129+
130+
statement {
131+
sid = "AllowSagemakerDeploy"
132+
actions = [
133+
"sagemaker:CreateModel",
134+
"sagemaker:CreateEndpointConfig",
135+
"sagemaker:CreateEndpoint",
136+
"sagemaker:DescribeModel",
137+
"sagemaker:DescribeEndpoint",
138+
"sagemaker:InvokeEndpoint"
139+
]
140+
141+
effect = "Allow"
142+
143+
resources = [
144+
"arn:${var.iam_partition}:sagemaker:${local.aws_region}:${local.aws_account_id}:endpoint/*",
145+
"arn:${var.iam_partition}:sagemaker:${local.aws_region}:${local.aws_account_id}:model/*",
146+
"arn:${var.iam_partition}:sagemaker:${local.aws_region}:${local.aws_account_id}:endpoint-config/*",
147+
]
148+
}
149+
}
150+
151+
data "aws_iam_policy_document" "iam_pass_role" {
152+
statement {
153+
sid = "AllowPassRole"
154+
actions = [
155+
"iam:PassRole",
156+
]
157+
158+
effect = "Allow"
159+
160+
resources = [
161+
"*"
162+
]
163+
164+
condition {
165+
test = "StringEquals"
166+
values = [
167+
"sagemaker.amazonaws.com"
168+
]
169+
variable = "iam:PassedToService"
170+
}
171+
}
172+
}
173+
174+
data "aws_iam_policy_document" "dynamodb" {
175+
statement {
176+
sid = "Items"
177+
actions = [
178+
"dynamodb:PutItem",
179+
"dynamodb:GetItem",
180+
"dynamodb:UpdateItem",
181+
]
182+
183+
effect = "Allow"
184+
185+
resources = [
186+
module.metaflow-step-functions.metaflow_step_functions_dynamodb_table_arn
187+
]
188+
189+
condition {
190+
test = "StringEquals"
191+
values = [
192+
"sagemaker.amazonaws.com"
193+
]
194+
variable = "iam:PassedToService"
195+
}
196+
}
197+
}
198+
199+
data "aws_iam_policy_document" "cloudwatch" {
200+
statement {
201+
sid = "AllowPutLogs"
202+
actions = [
203+
"logs:CreateLogStream",
204+
"logs:PutLogEvents",
205+
]
206+
207+
effect = "Allow"
208+
209+
resources = [
210+
"*"
211+
]
212+
}
213+
}
214+
215+
resource "aws_iam_role_policy" "grant_custom_s3_list_batch" {
216+
name = "s3_list"
217+
role = aws_iam_role.batch_s3_task_role.name
218+
policy = data.aws_iam_policy_document.custom_s3_list_batch.json
219+
}
220+
221+
resource "aws_iam_role_policy" "grant_custom_s3_batch" {
222+
name = "custom_s3"
223+
role = aws_iam_role.batch_s3_task_role.name
224+
policy = data.aws_iam_policy_document.custom_s3_batch.json
225+
}
226+
227+
resource "aws_iam_role_policy" "grant_deny_presigned_batch" {
228+
name = "deny_presigned"
229+
role = aws_iam_role.batch_s3_task_role.name
230+
policy = data.aws_iam_policy_document.deny_presigned_batch.json
231+
}
232+
233+
resource "aws_iam_role_policy" "grant_allow_sagemaker" {
234+
name = "sagemaker"
235+
role = aws_iam_role.batch_s3_task_role.name
236+
policy = data.aws_iam_policy_document.allow_sagemaker.json
237+
}
238+
239+
resource "aws_iam_role_policy" "grant_iam_pass_role" {
240+
name = "iam_pass_role"
241+
role = aws_iam_role.batch_s3_task_role.name
242+
policy = data.aws_iam_policy_document.iam_pass_role.json
243+
}
244+
245+
resource "aws_iam_role_policy" "grant_dynamodb" {
246+
name = "dynamodb"
247+
role = aws_iam_role.batch_s3_task_role.name
248+
policy = data.aws_iam_policy_document.dynamodb.json
249+
}
250+
251+
resource "aws_iam_role_policy" "grant_cloudwatch" {
252+
name = "cloudwatch"
253+
role = aws_iam_role.batch_s3_task_role.name
254+
policy = data.aws_iam_policy_document.cloudwatch.json
255+
}

aws/terraform/modules/metaflow/locals.tf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,9 @@ locals {
22
resource_prefix = length(var.resource_prefix) > 0 ? "${var.resource_prefix}-" : ""
33
resource_suffix = length(var.resource_suffix) > 0 ? "-${var.resource_suffix}" : ""
44

5+
aws_region = data.aws_region.current.name
6+
aws_account_id = data.aws_caller_identity.current.account_id
7+
8+
batch_s3_task_role_name = "${local.resource_prefix}batch_s3_task_role${local.resource_suffix}"
59
metaflow_batch_image_name = "${local.resource_prefix}batch${local.resource_suffix}"
610
}

aws/terraform/modules/metaflow/main.tf

Lines changed: 39 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,71 +1,72 @@
11
module "metaflow-datastore" {
22
source = "./modules/datastore"
33

4-
resource_prefix = local.resource_prefix
5-
resource_suffix = local.resource_suffix
6-
metaflow_vpc_id = var.vpc_id
7-
ecs_instance_role_arn = module.metaflow-computation.ecs_instance_role_arn
4+
resource_prefix = local.resource_prefix
5+
resource_suffix = local.resource_suffix
6+
87
ecs_execution_role_arn = module.metaflow-computation.ecs_execution_role_arn
9-
aws_batch_service_role_arn = module.metaflow-computation.batch_service_role_arn
10-
subnet_private_1_id = var.subnet_private_1_id
11-
subnet_private_2_id = var.subnet_private_2_id
8+
ecs_instance_role_arn = module.metaflow-computation.ecs_instance_role_arn
129
metadata_service_security_group_id = module.metaflow-metadata-service.metadata_service_security_group_id
10+
metaflow_vpc_id = var.vpc_id
11+
subnet1_id = var.subnet1_id
12+
subnet2_id = var.subnet2_id
1313

1414
standard_tags = var.tags
1515
}
1616

1717
module "metaflow-metadata-service" {
1818
source = "./modules/metadata-service"
1919

20-
resource_prefix = local.resource_prefix
21-
resource_suffix = local.resource_suffix
22-
metaflow_vpc_id = var.vpc_id
23-
vpc_cidr_block = var.vpc_cidr_block
24-
subnet_private_1_id = var.subnet_private_1_id
25-
subnet_private_2_id = var.subnet_private_2_id
26-
rds_master_instance_endpoint = module.metaflow-datastore.rds_master_instance_endpoint
27-
database_username = module.metaflow-datastore.database_username
28-
database_password = module.metaflow-datastore.database_password
29-
fargate_task_role_arn = module.metaflow-datastore.iam_s3_access_role_arn
30-
fargate_execution_role_arn = module.metaflow-computation.ecs_execution_role_arn
31-
access_list_cidr_blocks = var.access_list_cidr_blocks
20+
resource_prefix = local.resource_prefix
21+
resource_suffix = local.resource_suffix
22+
23+
access_list_cidr_blocks = var.access_list_cidr_blocks
24+
api_basic_auth = var.api_basic_auth
25+
database_password = module.metaflow-datastore.database_password
26+
database_username = module.metaflow-datastore.database_username
27+
datastore_s3_bucket_kms_key_arn = module.metaflow-datastore.datastore_s3_bucket_kms_key_arn
28+
fargate_execution_role_arn = module.metaflow-computation.ecs_execution_role_arn
29+
iam_partition = var.iam_partition
30+
metaflow_vpc_id = var.vpc_id
31+
rds_master_instance_endpoint = module.metaflow-datastore.rds_master_instance_endpoint
32+
s3_bucket_arn = module.metaflow-datastore.s3_bucket_arn
33+
subnet1_id = var.subnet1_id
34+
subnet2_id = var.subnet2_id
35+
vpc_cidr_block = var.vpc_cidr_block
3236

3337
standard_tags = var.tags
3438
}
3539

3640
module "metaflow-computation" {
3741
source = "./modules/computation"
3842

39-
resource_prefix = local.resource_prefix
40-
resource_suffix = local.resource_suffix
41-
metaflow_vpc_id = var.vpc_id
42-
subnet_private_1_id = var.subnet_private_1_id
43-
subnet_private_2_id = var.subnet_private_2_id
44-
s3_kms_policy_arn = module.metaflow-datastore.metaflow_kms_s3_policy_arn
45-
metaflow_policy_arn = var.metaflow_policy_arn
46-
metaflow_step_functions_dynamodb_policy = module.metaflow-step-functions.metaflow_step_functions_dynamodb_policy
47-
batch_compute_environment_cpu_max_vcpus = var.cpu_max_compute_vcpus
48-
batch_compute_environment_cpu_desired_vcpus = var.cpu_desired_compute_vcpus
49-
batch_compute_environment_cpu_min_vcpus = var.cpu_min_compute_vcpus
50-
batch_compute_environment_large_cpu_max_vcpus = var.large_cpu_max_compute_vcpus
51-
batch_compute_environment_large_cpu_desired_vcpus = var.large_cpu_desired_compute_vcpus
52-
batch_compute_environment_large_cpu_min_vcpus = var.large_cpu_min_compute_vcpus
53-
batch_compute_environment_gpu_max_vcpus = var.gpu_max_compute_vcpus
54-
batch_compute_environment_gpu_desired_vcpus = var.gpu_desired_compute_vcpus
55-
batch_compute_environment_gpu_min_vcpus = var.gpu_min_compute_vcpus
56-
enable_step_functions = var.enable_step_functions
43+
resource_prefix = local.resource_prefix
44+
resource_suffix = local.resource_suffix
45+
46+
batch_type = var.batch_type
47+
compute_environment_desired_vcpus = var.compute_environment_desired_vcpus
48+
compute_environment_instance_types = var.compute_environment_instance_types
49+
compute_environment_max_vcpus = var.compute_environment_max_vcpus
50+
compute_environment_min_vcpus = var.compute_environment_min_vcpus
51+
enable_step_functions = var.enable_step_functions
52+
iam_partition = var.iam_partition
53+
metaflow_step_functions_dynamodb_policy = module.metaflow-step-functions.metaflow_step_functions_dynamodb_policy
54+
metaflow_vpc_id = var.vpc_id
55+
subnet1_id = var.subnet1_id
56+
subnet2_id = var.subnet2_id
5757

5858
standard_tags = var.tags
5959
}
6060

6161
module "metaflow-step-functions" {
6262
source = "./modules/step-functions"
6363

64-
active = var.enable_step_functions
6564
resource_prefix = local.resource_prefix
6665
resource_suffix = local.resource_suffix
6766

67+
active = var.enable_step_functions
6868
batch_job_queue_arn = module.metaflow-computation.METAFLOW_BATCH_JOB_QUEUE
69+
iam_partition = var.iam_partition
6970
s3_bucket_arn = module.metaflow-datastore.s3_bucket_arn
7071
s3_bucket_kms_arn = module.metaflow-datastore.datastore_s3_bucket_kms_key_arn
7172

0 commit comments

Comments
 (0)