Skip to content

Commit 815b440

Browse files
committed
terraform ui module
1 parent 5bba717 commit 815b440

File tree

18 files changed

+669
-3
lines changed

18 files changed

+669
-3
lines changed

aws/terraform/modules/metaflow/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ This module requires an Amazon VPC to be set up by the module user beforehand. T
2222
| <a name="module_metaflow-datastore"></a> [metaflow-datastore](#module\_metaflow-datastore) | ./modules/datastore | n/a |
2323
| <a name="module_metaflow-metadata-service"></a> [metaflow-metadata-service](#module\_metaflow-metadata-service) | ./modules/metadata-service | n/a |
2424
| <a name="module_metaflow-step-functions"></a> [metaflow-step-functions](#module\_metaflow-step-functions) | ./modules/step-functions | n/a |
25+
| <a name="module_metaflow-ui"></a> [metaflow-ui](#module\_metaflow-ui) | ./modules/ui | n/a |
2526

2627
## Inputs
2728

@@ -36,12 +37,15 @@ This module requires an Amazon VPC to be set up by the module user beforehand. T
3637
| <a name="input_compute_environment_min_vcpus"></a> [compute\_environment\_min\_vcpus](#input\_compute\_environment\_min\_vcpus) | Minimum VCPUs for Batch Compute Environment [0-16] for EC2 Batch Compute Environment (ignored for Fargate) | `number` | `8` | no |
3738
| <a name="input_enable_custom_batch_container_registry"></a> [enable\_custom\_batch\_container\_registry](#input\_enable\_custom\_batch\_container\_registry) | Provisions infrastructure for custom Amazon ECR container registry if enabled | `bool` | `false` | no |
3839
| <a name="input_enable_step_functions"></a> [enable\_step\_functions](#input\_enable\_step\_functions) | Provisions infrastructure for step functions if enabled | `bool` | n/a | yes |
40+
| <a name="input_extra_ui_backend_env_vars"></a> [extra\_ui\_backend\_env\_vars](#input\_extra\_ui\_backend\_env\_vars) | Additional environment variables for UI backend container | `map(string)` | `{}` | no |
41+
| <a name="input_extra_ui_static_env_vars"></a> [extra\_ui\_static\_env\_vars](#input\_extra\_ui\_static\_env\_vars) | Additional environment variables for UI static app | `map(string)` | `{}` | no |
3942
| <a name="input_iam_partition"></a> [iam\_partition](#input\_iam\_partition) | IAM Partition (Select aws-us-gov for AWS GovCloud, otherwise leave as is) | `string` | `"aws"` | no |
4043
| <a name="input_resource_prefix"></a> [resource\_prefix](#input\_resource\_prefix) | string prefix for all resources | `string` | `"metaflow"` | no |
4144
| <a name="input_resource_suffix"></a> [resource\_suffix](#input\_resource\_suffix) | string suffix for all resources | `string` | `""` | no |
4245
| <a name="input_subnet1_id"></a> [subnet1\_id](#input\_subnet1\_id) | First subnet used for availability zone redundancy | `string` | n/a | yes |
4346
| <a name="input_subnet2_id"></a> [subnet2\_id](#input\_subnet2\_id) | Second subnet used for availability zone redundancy | `string` | n/a | yes |
4447
| <a name="input_tags"></a> [tags](#input\_tags) | aws tags | `map(string)` | n/a | yes |
48+
| <a name="input_ui_certificate_arn"></a> [ui\_certificate\_arn](#input\_ui\_certificate\_arn) | SSL certificate for UI | `string` | n/a | yes |
4549
| <a name="input_vpc_cidr_block"></a> [vpc\_cidr\_block](#input\_vpc\_cidr\_block) | The VPC CIDR block that we'll access list on our Metadata Service API to allow all internal communications | `string` | n/a | yes |
4650
| <a name="input_vpc_id"></a> [vpc\_id](#input\_vpc\_id) | The id of the single VPC we stood up for all Metaflow resources to exist in. | `string` | n/a | yes |
4751

@@ -67,4 +71,5 @@ This module requires an Amazon VPC to be set up by the module user beforehand. T
6771
| <a name="output_metaflow_s3_bucket_arn"></a> [metaflow\_s3\_bucket\_arn](#output\_metaflow\_s3\_bucket\_arn) | The ARN of the bucket we'll be using as blob storage |
6872
| <a name="output_metaflow_s3_bucket_name"></a> [metaflow\_s3\_bucket\_name](#output\_metaflow\_s3\_bucket\_name) | The name of the bucket we'll be using as blob storage |
6973
| <a name="output_migration_function_arn"></a> [migration\_function\_arn](#output\_migration\_function\_arn) | ARN of DB Migration Function |
74+
| <a name="output_ui_alb_dns_name"></a> [ui\_alb\_dns\_name](#output\_ui\_alb\_dns\_name) | UI ALB DNS name |
7075
<!-- END_TF_DOCS -->

aws/terraform/modules/metaflow/main.tf

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,33 @@ module "metaflow-metadata-service" {
3737
standard_tags = var.tags
3838
}
3939

40+
module "metaflow-ui" {
41+
source = "./modules/ui"
42+
43+
resource_prefix = local.resource_prefix
44+
resource_suffix = local.resource_suffix
45+
46+
database_password = module.metaflow-datastore.database_password
47+
database_username = module.metaflow-datastore.database_username
48+
datastore_s3_bucket_kms_key_arn = module.metaflow-datastore.datastore_s3_bucket_kms_key_arn
49+
fargate_execution_role_arn = module.metaflow-computation.ecs_execution_role_arn
50+
iam_partition = var.iam_partition
51+
metaflow_vpc_id = var.vpc_id
52+
rds_master_instance_endpoint = module.metaflow-datastore.rds_master_instance_endpoint
53+
s3_bucket_arn = module.metaflow-datastore.s3_bucket_arn
54+
subnet1_id = var.subnet1_id
55+
subnet2_id = var.subnet2_id
56+
vpc_cidr_block = var.vpc_cidr_block
57+
58+
METAFLOW_DATASTORE_SYSROOT_S3 = module.metaflow-datastore.METAFLOW_DATASTORE_SYSROOT_S3
59+
certificate_arn = var.ui_certificate_arn
60+
metadata_service_security_group_id = module.metaflow-metadata-service.metadata_service_security_group_id
61+
62+
extra_ui_static_env_vars = var.extra_ui_static_env_vars
63+
extra_ui_backend_env_vars = var.extra_ui_backend_env_vars
64+
standard_tags = var.tags
65+
}
66+
4067
module "metaflow-computation" {
4168
source = "./modules/computation"
4269

aws/terraform/modules/metaflow/modules/metadata-service/iam.tf

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,13 @@ data "aws_iam_policy_document" "custom_s3_batch" {
4747
effect = "Allow"
4848

4949
actions = [
50-
"s3:GetObject"
50+
"s3:GetObject",
51+
"s3:ListBucket"
5152
]
5253

5354
resources = [
54-
"${var.s3_bucket_arn}/*"
55+
"${var.s3_bucket_arn}/*",
56+
"${var.s3_bucket_arn}"
5557
]
5658
}
5759
}

aws/terraform/modules/metaflow/modules/step-functions/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Step Functions configuration for Metaflow
22

3-
This module sets up the infrastructure to use AWS Step Functions with Metaflow.
3+
This module sets up the infrastructure to use AWS Step Functions with Metaflow.
44

55
This builds on top of the functionality provided by the `computation` module, which allows to execute Metaflow step code on AWS Batch. If you use `computation` module alone, the orchestration is done by the Metaflow task scheduler that itself needs to runs somewhere (often, your laptop, or a dedicated server). Step Functions support in Metaflow allows you to replace that scheduler by compiling your Flows to a AWS Step Functions State Machine, and deploying it to AWS.
66

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# UI
2+
3+
Metaflow operational UI
4+
5+
<!-- BEGIN_TF_DOCS -->
6+
## Inputs
7+
8+
| Name | Description | Type | Default | Required |
9+
|------|-------------|------|---------|:--------:|
10+
| <a name="input_METAFLOW_DATASTORE_SYSROOT_S3"></a> [METAFLOW\_DATASTORE\_SYSROOT\_S3](#input\_METAFLOW\_DATASTORE\_SYSROOT\_S3) | METAFLOW\_DATASTORE\_SYSROOT\_S3 value | `string` | n/a | yes |
11+
| <a name="input_certificate_arn"></a> [certificate\_arn](#input\_certificate\_arn) | SSL certificate ARN | `string` | n/a | yes |
12+
| <a name="input_database_password"></a> [database\_password](#input\_database\_password) | The database password | `string` | n/a | yes |
13+
| <a name="input_database_username"></a> [database\_username](#input\_database\_username) | The database username | `string` | n/a | yes |
14+
| <a name="input_datastore_s3_bucket_kms_key_arn"></a> [datastore\_s3\_bucket\_kms\_key\_arn](#input\_datastore\_s3\_bucket\_kms\_key\_arn) | The ARN of the KMS key used to encrypt the Metaflow datastore S3 bucket | `string` | n/a | yes |
15+
| <a name="input_extra_ui_backend_env_vars"></a> [extra\_ui\_backend\_env\_vars](#input\_extra\_ui\_backend\_env\_vars) | Additional environment variables for UI backend container | `map(string)` | `{}` | no |
16+
| <a name="input_extra_ui_static_env_vars"></a> [extra\_ui\_static\_env\_vars](#input\_extra\_ui\_static\_env\_vars) | Additional environment variables for UI static app | `map(string)` | `{}` | no |
17+
| <a name="input_fargate_execution_role_arn"></a> [fargate\_execution\_role\_arn](#input\_fargate\_execution\_role\_arn) | The IAM role that grants access to ECS and Batch services which we'll use as our Metadata Service API's execution\_role for our Fargate instance | `string` | n/a | yes |
18+
| <a name="input_iam_partition"></a> [iam\_partition](#input\_iam\_partition) | IAM Partition (Select aws-us-gov for AWS GovCloud, otherwise leave as is) | `string` | `"aws"` | no |
19+
| <a name="input_is_gov"></a> [is\_gov](#input\_is\_gov) | Set to true if IAM partition is 'aws-us-gov' | `bool` | `false` | no |
20+
| <a name="input_metadata_service_security_group_id"></a> [metadata\_service\_security\_group\_id](#input\_metadata\_service\_security\_group\_id) | The security group ID used by the MetaData service. We'll grant this access to our DB. | `string` | n/a | yes |
21+
| <a name="input_metaflow_vpc_id"></a> [metaflow\_vpc\_id](#input\_metaflow\_vpc\_id) | ID of the Metaflow VPC this SageMaker notebook instance is to be deployed in | `string` | n/a | yes |
22+
| <a name="input_rds_master_instance_endpoint"></a> [rds\_master\_instance\_endpoint](#input\_rds\_master\_instance\_endpoint) | The database connection endpoint in address:port format | `string` | n/a | yes |
23+
| <a name="input_resource_prefix"></a> [resource\_prefix](#input\_resource\_prefix) | Prefix given to all AWS resources to differentiate between applications | `string` | n/a | yes |
24+
| <a name="input_resource_suffix"></a> [resource\_suffix](#input\_resource\_suffix) | Suffix given to all AWS resources to differentiate between environment and workspace | `string` | n/a | yes |
25+
| <a name="input_s3_bucket_arn"></a> [s3\_bucket\_arn](#input\_s3\_bucket\_arn) | The ARN of the bucket we'll be using as blob storage | `string` | n/a | yes |
26+
| <a name="input_standard_tags"></a> [standard\_tags](#input\_standard\_tags) | The standard tags to apply to every AWS resource. | `map(string)` | n/a | yes |
27+
| <a name="input_subnet1_id"></a> [subnet1\_id](#input\_subnet1\_id) | First private subnet used for availability zone redundancy | `string` | n/a | yes |
28+
| <a name="input_subnet2_id"></a> [subnet2\_id](#input\_subnet2\_id) | Second private subnet used for availability zone redundancy | `string` | n/a | yes |
29+
| <a name="input_ui_backend_container_image"></a> [ui\_backend\_container\_image](#input\_ui\_backend\_container\_image) | Container image for UI backend | `string` | `"netflixoss/metaflow_metadata_service:2.1.0"` | no |
30+
| <a name="input_ui_static_container_image"></a> [ui\_static\_container\_image](#input\_ui\_static\_container\_image) | Container image for UI static app | `string` | `"public.ecr.aws/outerbounds/metaflow_ui:v1.0.1"` | no |
31+
| <a name="input_vpc_cidr_block"></a> [vpc\_cidr\_block](#input\_vpc\_cidr\_block) | The VPC CIDR block that we'll access list on our Metadata Service API to allow all internal communications | `string` | n/a | yes |
32+
33+
## Outputs
34+
35+
| Name | Description |
36+
|------|-------------|
37+
| <a name="output_alb_dns_name"></a> [alb\_dns\_name](#output\_alb\_dns\_name) | UI ALB DNS name |
38+
<!-- END_TF_DOCS -->
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
resource "aws_cloudwatch_log_group" "this" {
2+
name = "${var.resource_prefix}ui${var.resource_suffix}"
3+
4+
tags = var.standard_tags
5+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
data "aws_caller_identity" "current" {}
2+
3+
data "aws_region" "current" {}
4+
5+
data "aws_security_group" "vpc_default" {
6+
name = "default"
7+
vpc_id = var.metaflow_vpc_id
8+
}
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
resource "aws_security_group" "fargate_security_group" {
2+
name = local.ui_backend_security_group_name
3+
description = "Security Group for Fargate which runs the UI Backend."
4+
vpc_id = var.metaflow_vpc_id
5+
6+
ingress {
7+
from_port = 0
8+
to_port = 0
9+
protocol = -1
10+
security_groups = [ aws_security_group.ui_lb_security_group.id ]
11+
}
12+
13+
ingress {
14+
from_port = 0
15+
to_port = 0
16+
protocol = -1
17+
self = true
18+
description = "Internal communication"
19+
}
20+
21+
# egress to anywhere
22+
egress {
23+
from_port = 0
24+
to_port = 0
25+
protocol = "-1" # all
26+
cidr_blocks = ["0.0.0.0/0"]
27+
description = "Allow all external communication"
28+
}
29+
30+
tags = merge(
31+
var.standard_tags,
32+
{
33+
Metaflow = "true"
34+
}
35+
)
36+
}
37+
38+
resource "aws_security_group" "ui_lb_security_group" {
39+
name = local.alb_security_group_name
40+
description = "Security Group for ALB"
41+
vpc_id = var.metaflow_vpc_id
42+
43+
ingress {
44+
from_port = 443
45+
to_port = 443
46+
protocol = "tcp"
47+
cidr_blocks = ["0.0.0.0/0"]
48+
description = "Allow public HTTPS"
49+
}
50+
51+
ingress {
52+
from_port = 0
53+
to_port = 0
54+
protocol = -1
55+
self = true
56+
description = "Internal communication"
57+
}
58+
59+
# egress to anywhere
60+
egress {
61+
from_port = 0
62+
to_port = 0
63+
protocol = "-1" # all
64+
cidr_blocks = ["0.0.0.0/0"]
65+
description = "Allow all external communication"
66+
}
67+
68+
tags = merge(
69+
var.standard_tags,
70+
{
71+
Metaflow = "true"
72+
}
73+
)
74+
}
75+
76+
resource "aws_lb" "this" {
77+
name = "${var.resource_prefix}alb${var.resource_suffix}"
78+
internal = false
79+
load_balancer_type = "application"
80+
subnets = [var.subnet1_id, var.subnet2_id]
81+
security_groups = [
82+
aws_security_group.ui_lb_security_group.id
83+
]
84+
85+
tags = var.standard_tags
86+
}
87+
88+
resource "aws_lb_target_group" "ui_backend" {
89+
name = "${var.resource_prefix}ui-backend${var.resource_suffix}"
90+
port = 8083
91+
protocol = "HTTP"
92+
target_type = "ip"
93+
vpc_id = var.metaflow_vpc_id
94+
95+
health_check {
96+
protocol = "HTTP"
97+
port = 8083
98+
path = "/api/ping"
99+
interval = 10
100+
healthy_threshold = 2
101+
unhealthy_threshold = 2
102+
}
103+
104+
tags = var.standard_tags
105+
}
106+
107+
resource "aws_lb_target_group" "ui_static" {
108+
name = "${var.resource_prefix}ui-static${var.resource_suffix}"
109+
port = 3000
110+
protocol = "HTTP"
111+
target_type = "ip"
112+
vpc_id = var.metaflow_vpc_id
113+
tags = var.standard_tags
114+
}
115+
116+
resource "aws_lb_listener" "this" {
117+
load_balancer_arn = aws_lb.this.arn
118+
port = "443"
119+
protocol = "HTTPS"
120+
121+
certificate_arn = var.certificate_arn
122+
123+
default_action {
124+
type = "forward"
125+
target_group_arn = aws_lb_target_group.ui_static.id
126+
order = 100
127+
}
128+
}
129+
130+
resource "aws_lb_listener_rule" "ui_backend" {
131+
listener_arn = aws_lb_listener.this.arn
132+
priority = 1
133+
134+
action {
135+
type = "forward"
136+
target_group_arn = aws_lb_target_group.ui_backend.arn
137+
}
138+
139+
condition {
140+
path_pattern {
141+
values = ["/api/*"]
142+
}
143+
}
144+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
resource "aws_ecs_cluster" "this" {
2+
name = local.ecs_cluster_name
3+
4+
tags = merge(
5+
var.standard_tags,
6+
{
7+
Name = local.ecs_cluster_name
8+
Metaflow = "true"
9+
}
10+
)
11+
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
2+
resource "aws_ecs_task_definition" "ui_backend" {
3+
family = "${var.resource_prefix}ui_backend${var.resource_suffix}" # Unique name for task definition
4+
5+
container_definitions = jsonencode([
6+
{
7+
name = "${var.resource_prefix}ui_backend${var.resource_suffix}"
8+
image = var.ui_backend_container_image
9+
essential = true
10+
cpu = 2048
11+
memory = 16384
12+
portMappings = [
13+
{
14+
containerPort = 8083
15+
hostPort = 8083
16+
}
17+
]
18+
environment = [for k, v in merge(local.default_ui_backend_env_vars, var.extra_ui_backend_env_vars): {name = k, value=v}]
19+
logConfiguration = {
20+
logDriver = "awslogs"
21+
options = {
22+
"awslogs-group" : "${aws_cloudwatch_log_group.this.name}"
23+
"awslogs-region" : "${data.aws_region.current.name}"
24+
"awslogs-stream-prefix" : "ui_backend"
25+
}
26+
}
27+
}
28+
])
29+
30+
network_mode = "awsvpc"
31+
requires_compatibilities = ["FARGATE"]
32+
task_role_arn = aws_iam_role.metadata_ui_ecs_task_role.arn
33+
execution_role_arn = var.fargate_execution_role_arn
34+
cpu = 2048
35+
memory = 16384
36+
37+
ephemeral_storage {
38+
size_in_gib = 100
39+
}
40+
41+
tags = merge(
42+
var.standard_tags,
43+
{
44+
Metaflow = "true"
45+
}
46+
)
47+
}
48+
49+
resource "aws_ecs_service" "ui_backend" {
50+
name = "${var.resource_prefix}ui_backend${var.resource_suffix}"
51+
cluster = aws_ecs_cluster.this.id
52+
task_definition = aws_ecs_task_definition.ui_backend.arn
53+
desired_count = 1
54+
launch_type = "FARGATE"
55+
56+
network_configuration {
57+
security_groups = [aws_security_group.fargate_security_group.id, var.metadata_service_security_group_id]
58+
assign_public_ip = true
59+
subnets = [var.subnet1_id, var.subnet2_id]
60+
}
61+
62+
load_balancer {
63+
target_group_arn = aws_lb_target_group.ui_backend.arn
64+
container_name = "${var.resource_prefix}ui_backend${var.resource_suffix}"
65+
container_port = 8083
66+
}
67+
68+
lifecycle {
69+
ignore_changes = [desired_count]
70+
}
71+
72+
tags = var.standard_tags
73+
}

0 commit comments

Comments
 (0)