Skip to content

Commit faf055b

Browse files
authored
Address most recent pr feedback (#5)
* update readme based on feedback - generate metaflow config * pr feedback: make custom batch ecr registry optional * pr feedback: use full AWS brand names in docs * pr feedback: remove datasets s3 bucket * pr feedback: drop flow log on vpc * pr feedback: comment clean-up * pr feedback: configurable vpc and subnet cidr blocks * pr feedback: reword comment * pr feedback: output command for imporitng configuration * pr feedback: use same min terraform version * specify value in doc * pr feedback: typos / grammar * pr feedback: naming
1 parent 91a8f26 commit faf055b

File tree

8 files changed

+61
-49
lines changed

8 files changed

+61
-49
lines changed

aws/terraform/modules/metaflow/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ Provides the core functionality for Metaflow which includes:
1010
- an API to record and query past executions (`metadata-service`)
1111
- orchestrated processing (`step-functions`)
1212

13-
Depends on a VPC that has been previously set up. The output of the project `infra` is an example
14-
configuration of a VPC that can be passed to this module.
13+
Depends on an Amazon VPC that has been previously set up. The output of the project `infra` is an example
14+
configuration of an Amazon VPC that can be passed to this module.
1515

16-
## ECR
16+
## Amazon ECR
1717

18-
Sets up an AWS ECR to hold the Docker image we wish to use with Metaflow.
18+
Sets up an Amazon Elastic Container Registry (ECR) to hold the Docker image we wish to use with Metaflow.

aws/terraform/modules/metaflow/ecr.tf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
resource "aws_ecr_repository" "metaflow_batch_image" {
2+
count = var.enable_custom_batch_container_registry ? 1 : 0
3+
24
name = local.metaflow_batch_image_name
35

46
tags = var.tags

aws/terraform/modules/metaflow/modules/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ Our Metaflow Terraform code has been separated into separate modules based on th
44

55
## Computation
66

7-
Sets up remote computation resources so flows can be run on EC2 instances. These resources do not perform
7+
Sets up remote computation resources so flows can be run on Amazon EC2 instances. These resources do not perform
88
orchestration and rely on the data scientist's computer to perform this coordination.
99

1010
## Datastore
@@ -23,4 +23,4 @@ Datastore to explore historic runs.
2323

2424
Sets up remote computation resources that come with orchestration. This allows data scientists to schedule flows
2525
using crons as well as being able to kick off flows and shut down their machine, as the remote resources will handle
26-
all coordination.
26+
all coordination.

aws/terraform/modules/metaflow/modules/computation/batch.tf

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ resource "aws_batch_compute_environment" "cpu" {
4848
var.subnet_private_2_id
4949
]
5050

51-
# Type of instance EC2 for on-demand. Can use "SPOT" to use unused instances at discount if available
51+
# Type of instance Amazon EC2 for on-demand. Can use "SPOT" to use unused instances at discount if available
5252
type = "EC2"
5353

5454
tags = var.standard_tags
@@ -115,7 +115,7 @@ resource "aws_batch_compute_environment" "large-cpu" {
115115
var.subnet_private_2_id
116116
]
117117

118-
# Type of instance EC2 for on-demand. Can use "SPOT" to use unused instances at discount if available
118+
# Type of instance Amazon EC2 for on-demand. Can use "SPOT" to use unused instances at discount if available
119119
type = "EC2"
120120

121121
tags = var.standard_tags
@@ -182,7 +182,7 @@ resource "aws_batch_compute_environment" "gpu" {
182182
var.subnet_private_2_id
183183
]
184184

185-
# Type of instance EC2 for on-demand. Can use "SPOT" to use unused instances at discount if available
185+
# Type of instance Amazon EC2 for on-demand. Can use "SPOT" to use unused instances at discount if available
186186
type = "EC2"
187187

188188
tags = var.standard_tags

aws/terraform/modules/metaflow/modules/computation/ec2.tf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ resource "aws_launch_template" "cpu" {
77
*/
88
name = "${var.resource_prefix}batch-launch-tmpl-cpu-100gb${var.resource_suffix}"
99

10-
# Defines what IAM Role to assume to grant an EC2 instance
10+
# Defines what IAM Role to assume to grant an Amazon EC2 instance
1111
# This role must have a policy to access the kms_key_id used to encrypt the EBS volume
1212
iam_instance_profile {
1313
arn = aws_iam_instance_profile.ecs_instance_role.arn
@@ -37,7 +37,7 @@ resource "aws_launch_template" "gpu" {
3737
*/
3838
name = "${var.resource_prefix}batch-launch-tmpl-gpu-100gb${var.resource_suffix}"
3939

40-
# Defines what IAM Role to assume to grant an EC2 instance
40+
# Defines what IAM Role to assume to grant an Amazon EC2 instance
4141
# This role must have a policy to access the kms_key_id used to encrypt the EBS volume
4242
iam_instance_profile {
4343
arn = aws_iam_instance_profile.ecs_instance_role.arn

aws/terraform/modules/metaflow/modules/computation/variables.tf

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,61 +49,61 @@ variable "standard_tags" {
4949

5050
variable "batch_compute_environment_cpu_max_vcpus" {
5151
type = string
52-
description = "Maximum number of EC2 vCPUs that our CPU Batch Compute Environment can reach."
52+
description = "Maximum number of Amazon EC2 vCPUs that our CPU Batch Compute Environment can reach."
5353
default = 32
5454
}
5555

5656
variable "batch_compute_environment_cpu_min_vcpus" {
5757
type = string
58-
description = "Minimum number of EC2 vCPUs that our CPU Batch Compute Environment should maintain."
58+
description = "Minimum number of Amazon EC2 vCPUs that our CPU Batch Compute Environment should maintain."
5959
default = 0
6060
}
6161

6262
variable "batch_compute_environment_cpu_desired_vcpus" {
6363
type = string
64-
description = "Desired number of EC2 vCPUS in our CPU Batch Compute Environment. A non-zero number will ensure instances are always on and avoid some cold-start problems."
64+
description = "Desired number of Amazon EC2 vCPUS in our CPU Batch Compute Environment. A non-zero number will ensure instances are always on and avoid some cold-start problems."
6565
default = 0
6666
}
6767

6868
variable "batch_compute_environment_large_cpu_max_vcpus" {
6969
type = string
70-
description = "Maximum number of EC2 vCPUs that our large CPU Batch Compute Environment can reach."
70+
description = "Maximum number of Amazon EC2 vCPUs that our large CPU Batch Compute Environment can reach."
7171
default = 128
7272
}
7373

7474
variable "batch_compute_environment_large_cpu_min_vcpus" {
7575
type = string
76-
description = "Minimum number of EC2 vCPUs that our large CPU Batch Compute Environment should maintain."
76+
description = "Minimum number of Amazon EC2 vCPUs that our large CPU Batch Compute Environment should maintain."
7777
default = 0
7878
}
7979

8080
variable "batch_compute_environment_large_cpu_desired_vcpus" {
8181
type = string
82-
description = "Desired number of EC2 vCPUS in our large CPU Batch Compute Environment. A non-zero number will ensure instances are always on and avoid some cold-start problems."
82+
description = "Desired number of Amazon EC2 vCPUS in our large CPU Batch Compute Environment. A non-zero number will ensure instances are always on and avoid some cold-start problems."
8383
default = 0
8484
}
8585

8686
variable "batch_compute_environment_gpu_max_vcpus" {
8787
type = string
88-
description = "Maximum number of EC2 vCPUs that our GPU Batch Compute Environment can reach."
88+
description = "Maximum number of Amazon EC2 vCPUs that our GPU Batch Compute Environment can reach."
8989
default = 32
9090
}
9191

9292
variable "batch_compute_environment_gpu_min_vcpus" {
9393
type = string
94-
description = "Minimum number of EC2 vCPUs that our GPU Batch Compute Environment should maintain."
94+
description = "Minimum number of Amazon EC2 vCPUs that our GPU Batch Compute Environment should maintain."
9595
default = 0
9696
}
9797

9898
variable "batch_compute_environment_gpu_desired_vcpus" {
9999
type = string
100-
description = "Desired number of EC2 vCPUS in our GPU Batch Compute Environment. A non-zero number will ensure instances are always on and avoid some cold-start problems."
100+
description = "Desired number of Amazon EC2 vCPUS in our GPU Batch Compute Environment. A non-zero number will ensure instances are always on and avoid some cold-start problems."
101101
default = 0
102102
}
103103

104104
variable "batch_cpu_instance_types" {
105105
type = list(string)
106-
description = "EC2 instance types allowed for CPU Batch jobs. Types can be explicitly or implicitly requested by data scientists."
106+
description = "Amazon EC2 instance types allowed for CPU Batch jobs. Types can be explicitly or implicitly requested by data scientists."
107107
default = [
108108
"r5.large",
109109
"r5.xlarge",
@@ -117,7 +117,7 @@ variable "batch_cpu_instance_types" {
117117

118118
variable "batch_large_cpu_instance_types" {
119119
type = list(string)
120-
description = "EC2 instance types allowed for larger CPU Batch jobs. Types can be explicitly or implicitly requested by data scientists."
120+
description = "Amazon EC2 instance types allowed for larger CPU Batch jobs. Types can be explicitly or implicitly requested by data scientists."
121121
default = [
122122
"c4.8xlarge",
123123
"r5.4xlarge",
@@ -128,7 +128,7 @@ variable "batch_large_cpu_instance_types" {
128128

129129
variable "batch_gpu_instance_types" {
130130
type = list(string)
131-
description = "EC2 instance types allowed for GPU Batch jobs. Types can be explicitly or implicitly requested by data scientists."
131+
description = "Amazon EC2 instance types allowed for GPU Batch jobs. Types can be explicitly or implicitly requested by data scientists."
132132
default = [
133133
"p3.2xlarge"
134134
]

aws/terraform/modules/metaflow/outputs.tf

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -45,22 +45,26 @@ output "METAFLOW_SFN_DYNAMO_DB_TABLE" {
4545

4646
output "metaflow_profile_json" {
4747
value = jsonencode(
48-
{
49-
"METAFLOW_BATCH_CONTAINER_REGISTRY" = element(split("/", aws_ecr_repository.metaflow_batch_image.repository_url), 0),
50-
"METAFLOW_BATCH_CONTAINER_IMAGE" = element(split("/", aws_ecr_repository.metaflow_batch_image.repository_url), 1),
51-
"METAFLOW_DATASTORE_SYSROOT_S3" = module.metaflow-datastore.METAFLOW_DATASTORE_SYSROOT_S3,
52-
"METAFLOW_DATATOOLS_S3ROOT" = module.metaflow-datastore.METAFLOW_DATATOOLS_S3ROOT,
53-
"METAFLOW_BATCH_JOB_QUEUE" = module.metaflow-computation.METAFLOW_BATCH_JOB_QUEUE,
54-
"METAFLOW_ECS_S3_ACCESS_IAM_ROLE" = module.metaflow-computation.METAFLOW_ECS_S3_ACCESS_IAM_ROLE,
55-
"METAFLOW_SERVICE_URL" = module.metaflow-metadata-service.METAFLOW_SERVICE_URL,
56-
"METAFLOW_SERVICE_INTERNAL_URL" = module.metaflow-metadata-service.METAFLOW_SERVICE_INTERNAL_URL,
57-
"METAFLOW_SFN_IAM_ROLE" = module.metaflow-step-functions.metaflow_step_functions_role_arn,
58-
"METAFLOW_SFN_STATE_MACHINE_PREFIX" = replace("${local.resource_prefix}${local.resource_suffix}", "--", "-"),
59-
"METAFLOW_EVENTS_SFN_ACCESS_IAM_ROLE" = module.metaflow-step-functions.metaflow_eventbridge_role_arn,
60-
"METAFLOW_SFN_DYNAMO_DB_TABLE" = module.metaflow-step-functions.metaflow_step_functions_dynamodb_table_name,
61-
"METAFLOW_DEFAULT_DATASTORE" = "s3",
62-
"METAFLOW_DEFAULT_METADATA" = "service"
63-
}
48+
merge(
49+
var.enable_custom_batch_container_registry ? {
50+
"METAFLOW_BATCH_CONTAINER_REGISTRY" = element(split("/", aws_ecr_repository.metaflow_batch_image[0].repository_url), 0),
51+
"METAFLOW_BATCH_CONTAINER_IMAGE" = element(split("/", aws_ecr_repository.metaflow_batch_image[0].repository_url), 1)
52+
} : {},
53+
{
54+
"METAFLOW_DATASTORE_SYSROOT_S3" = module.metaflow-datastore.METAFLOW_DATASTORE_SYSROOT_S3,
55+
"METAFLOW_DATATOOLS_S3ROOT" = module.metaflow-datastore.METAFLOW_DATATOOLS_S3ROOT,
56+
"METAFLOW_BATCH_JOB_QUEUE" = module.metaflow-computation.METAFLOW_BATCH_JOB_QUEUE,
57+
"METAFLOW_ECS_S3_ACCESS_IAM_ROLE" = module.metaflow-computation.METAFLOW_ECS_S3_ACCESS_IAM_ROLE,
58+
"METAFLOW_SERVICE_URL" = module.metaflow-metadata-service.METAFLOW_SERVICE_URL,
59+
"METAFLOW_SERVICE_INTERNAL_URL" = module.metaflow-metadata-service.METAFLOW_SERVICE_INTERNAL_URL,
60+
"METAFLOW_SFN_IAM_ROLE" = module.metaflow-step-functions.metaflow_step_functions_role_arn,
61+
"METAFLOW_SFN_STATE_MACHINE_PREFIX" = replace("${local.resource_prefix}${local.resource_suffix}", "--", "-"),
62+
"METAFLOW_EVENTS_SFN_ACCESS_IAM_ROLE" = module.metaflow-step-functions.metaflow_eventbridge_role_arn,
63+
"METAFLOW_SFN_DYNAMO_DB_TABLE" = module.metaflow-step-functions.metaflow_step_functions_dynamodb_table_name,
64+
"METAFLOW_DEFAULT_DATASTORE" = "s3",
65+
"METAFLOW_DEFAULT_METADATA" = "service"
66+
}
67+
)
6468
)
6569
description = "Metaflow profile JSON object that can be used to communicate with this Metaflow Stack. Store this in `~/.metaflow/config_[stack-name]` and select with `$ export METAFLOW_PROFILE=[stack-name]`."
6670
}
@@ -91,7 +95,7 @@ output "metaflow_api_gateway_rest_api_id" {
9195
}
9296

9397
output "metaflow_batch_container_image" {
94-
value = aws_ecr_repository.metaflow_batch_image.repository_url
98+
value = var.enable_custom_batch_container_registry ? aws_ecr_repository.metaflow_batch_image[0].repository_url : ""
9599
description = "The ECR repo containing the metaflow batch image"
96100
}
97101

aws/terraform/modules/metaflow/variables.tf

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,57 +13,63 @@ variable "enable_step_functions" {
1313
description = "Provisions infrastructure for step functions if enabled"
1414
}
1515

16+
variable "enable_custom_batch_container_registry" {
17+
type = bool
18+
default = false
19+
description = "Provisions infrastructure for custom Amazon ECR container registry if enabled"
20+
}
21+
1622
variable "cpu_max_compute_vcpus" {
1723
type = string
18-
description = "Maximum number of EC2 vCPUs that our CPU Batch Compute Environment can reach."
24+
description = "Maximum number of Amazon EC2 vCPUs that our CPU Batch Compute Environment can reach."
1925
default = 64
2026
}
2127

2228
variable "cpu_min_compute_vcpus" {
2329
type = string
24-
description = "Minimum number of EC2 vCPUs that our CPU Batch Compute Environment should maintain."
30+
description = "Minimum number of Amazon EC2 vCPUs that our CPU Batch Compute Environment should maintain."
2531
default = 16
2632
}
2733

2834
variable "cpu_desired_compute_vcpus" {
2935
type = string
30-
description = "Desired number of EC2 vCPUS in our CPU Batch Compute Environment. A non-zero number will ensure instances are always on and avoid some cold-start problems."
36+
description = "Desired number of Amazon EC2 vCPUS in our CPU Batch Compute Environment. A non-zero number will ensure instances are always on and avoid some cold-start problems."
3137
default = 16
3238
}
3339

3440
variable "large_cpu_max_compute_vcpus" {
3541
type = string
36-
description = "Maximum number of EC2 vCPUs that our large CPU Batch Compute Environment can reach."
42+
description = "Maximum number of Amazon EC2 vCPUs that our large CPU Batch Compute Environment can reach."
3743
default = 128
3844
}
3945

4046
variable "large_cpu_min_compute_vcpus" {
4147
type = string
42-
description = "Minimum number of EC2 vCPUs that our large CPU Batch Compute Environment should maintain."
48+
description = "Minimum number of Amazon EC2 vCPUs that our large CPU Batch Compute Environment should maintain."
4349
default = 0
4450
}
4551

4652
variable "large_cpu_desired_compute_vcpus" {
4753
type = string
48-
description = "Desired number of EC2 vCPUS in our large CPU Batch Compute Environment. A non-zero number will ensure instances are always on and avoid some cold-start problems."
54+
description = "Desired number of Amazon EC2 vCPUS in our large CPU Batch Compute Environment. A non-zero number will ensure instances are always on and avoid some cold-start problems."
4955
default = 0
5056
}
5157

5258
variable "gpu_max_compute_vcpus" {
5359
type = string
54-
description = "Maximum number of EC2 vCPUs that our GPU Batch Compute Environment can reach."
60+
description = "Maximum number of Amazon EC2 vCPUs that our GPU Batch Compute Environment can reach."
5561
default = 64
5662
}
5763

5864
variable "gpu_min_compute_vcpus" {
5965
type = string
60-
description = "Minimum number of EC2 vCPUs that our GPU Batch Compute Environment should maintain."
66+
description = "Minimum number of Amazon EC2 vCPUs that our GPU Batch Compute Environment should maintain."
6167
default = 0
6268
}
6369

6470
variable "gpu_desired_compute_vcpus" {
6571
type = string
66-
description = "Desired number of EC2 vCPUS in our GPU Batch Compute Environment. A non-zero number will ensure instances are always on and avoid some cold-start problems."
72+
description = "Desired number of Amazon EC2 vCPUS in our GPU Batch Compute Environment. A non-zero number will ensure instances are always on and avoid some cold-start problems."
6773
default = 0
6874
}
6975

0 commit comments

Comments
 (0)