diff --git a/.gitignore b/.gitignore index f60797b..61843d8 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,9 @@ node_modules # CDK asset staging directory .cdk.staging cdk.out + +# Terraform files +*.tfstate +*.tfstate.backup +.terraform/ +.terraform.lock.hcl diff --git a/terraform/README.md b/terraform/README.md new file mode 100644 index 0000000..d9e1b08 --- /dev/null +++ b/terraform/README.md @@ -0,0 +1,111 @@ +# Architecture overview + +This Terraform project provisions a AWS VPC in two Availability Zones (AZs) for fault tolerance and high availability. +It creates a **Multi-AZ VPC** with **public/private subnets**, **per-AZ NAT Gateways**, **VPC Flow Logs**, and deploys an **ECS Fargate service** connected to a **private ECR repository** behind an **Application Load Balancer (ALB)**. + +--- + +## Core Components + +### 1. **VPC** +- CIDR: e.g., `10.0.0.0/16` +- DNS hostnames and DNS support enabled +- Isolated, dedicated network for workloads + +--- + +### 2. **Subnets (Multi-AZ)** +- **Public Subnets (x2):** + - One per AZ (e.g., `10.0.0.0/20`, `10.0.16.0/20`) + - Host ALB, NAT Gateways + - Auto-assign public IPs + +- **Private Subnets (x2):** + - One per AZ (e.g., `10.0.32.0/20`, `10.0.48.0/20`) + - Host ECS tasks, EKS nodes, or databases + - No direct internet access + +--- + +### 3. **Internet Gateway (IGW)** +- Attached to the VPC +- Enables outbound access for public subnets +- Used for inbound ALB or bastion connectivity + +--- + +### 4. **NAT Gateways (per AZ)** +- One NAT Gateway per AZ for fault tolerance +- Private subnets route outbound traffic to their local NAT +- Ensures resiliency during single-AZ failure + +--- + +### 5. **Route Tables** +- **Public Route Table:** default route → Internet Gateway +- **Private Route Tables (per AZ):** default route → NAT Gateway + +--- + +### 6. **VPC Endpoints (optional)** +- **Gateway Endpoints:** for S3 — keep traffic inside AWS backbone +- **Interface Endpoints:** for SSM, EC2, CloudWatch, ECR — secure private API access + +--- + +### 7. **VPC Flow Logs → CloudWatch** +- Captures ACCEPT / REJECT / ALL traffic metadata +- Sent to CloudWatch Log Group: `/vpc//flow-logs` +- IAM Role with least privilege for logging +- Enables audit, security, and performance analysis + +--- + +### 8. **ECS Fargate Cluster & Tasks** +- Cluster with container insights enabled +- Task definitions define containers, CPU/memory, and environment variables +- Pulls Docker image from private ECR +- Runs in private subnets (no public IP) +- Logs sent to CloudWatch Logs + +--- + +### 9. **Application Load Balancer (ALB)** +- Deployed in public subnets +- Routes inbound traffic to ECS tasks in private subnets +- Supports HTTP and optional HTTPS via ACM certificate +- Health checks and circuit breakers for resilience + +--- + +# Security Considerations + +- **Network Isolation** Private workloads only reachable via ALB. No public IPs on ECS tasks +- **Per-AZ NAT Gateways** AZ-specific egress preventing cross-AZ dependency +- **Security Groups** ALB SG ingress from trusted CIDRs only. Tasks SG only allows ALB ingress +- **IAM Roles** Separate task & execution roles. Principle of least privilege enforced | +- **Logging & Audit** VPC Flow Logs and CloudWatch | +- **ECR Hygiene** Private repo +- **Observability** CloudWatch metrics & logs. Supports alerts and dashboards + +--- + +# Deployment Steps + +1. **Container Image** + +- aws ecr get-login-password --region \ + | docker login --username AWS --password-stdin .dkr.ecr..amazonaws.com +- docker build -t . +- docker tag :latest .dkr.ecr..amazonaws.com/:v1 +- docker push .dkr.ecr..amazonaws.com/:v1 + +- Update container image name in terraform.tfvars file wiht the newly built container and save file. + +2. **Initialize Terraform** + Run the following commands to initialize and deploy VPC and ECS services: + +- terraform init +- terraform plan -out tf.plan +- terraform apply tf.plan +- terraform output alb_dns_name for application dns name diff --git a/terraform/alb.tf b/terraform/alb.tf new file mode 100644 index 0000000..ca73e05 --- /dev/null +++ b/terraform/alb.tf @@ -0,0 +1,80 @@ +resource "aws_security_group" "iac_exercise_alb_sg" { + name = "${var.project}-alb-sg" + description = "ALB ingress" + vpc_id = aws_vpc.iac_exercise_vpc.id + tags = var.tags + + ingress { + description = "HTTP" + from_port = 80 + to_port = 80 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = [aws_vpc.iac_exercise_vpc.cidr_block] + } +} + +resource "aws_lb" "iac_exercise_app_alb" { + name = "${var.project}-alb" + internal = false + load_balancer_type = "application" + security_groups = [aws_security_group.iac_exercise_alb_sg.id] + subnets = [for k, s in aws_subnet.iac_exercise_public : s.id] + idle_timeout = var.alb_idle_timeout + enable_deletion_protection = false + tags = var.tags +} + +resource "aws_lb_target_group" "iac_exercise_app_tg" { + name = "${var.project}-tg" + port = var.container_port + protocol = "HTTP" + target_type = "ip" + vpc_id = aws_vpc.iac_exercise_vpc.id + + health_check { + path = var.health_check_path + healthy_threshold = 2 + unhealthy_threshold = 5 + timeout = 5 + interval = 30 + matcher = "200-399" + } + + tags = var.tags +} + +resource "aws_lb_listener" "iac_exercise_http" { + load_balancer_arn = aws_lb.iac_exercise_app_alb.arn + port = 80 + protocol = "HTTP" + + default_action { + type = "redirect" + redirect { + port = "443" + protocol = "HTTPS" + status_code = "HTTP_301" + } + } +} + +# HTTPS listener using the imported self-signed cert +resource "aws_lb_listener" "https" { + load_balancer_arn = aws_lb.iac_exercise_app_alb.arn + port = 443 + protocol = "HTTPS" + ssl_policy = "ELBSecurityPolicy-TLS13-1-2-2021-06" + certificate_arn = aws_acm_certificate.self_signed.arn + + default_action { + type = "forward" + target_group_arn = aws_lb_target_group.iac_exercise_app_tg.arn + } +} diff --git a/terraform/autoscale.tf b/terraform/autoscale.tf new file mode 100644 index 0000000..e9a3044 --- /dev/null +++ b/terraform/autoscale.tf @@ -0,0 +1,44 @@ +# Target the ECS service desired count +resource "aws_appautoscaling_target" "iac_exercise_svc" { + max_capacity = 10 + min_capacity = 2 + resource_id = "service/${aws_ecs_cluster.iac_exercise_cluster.name}/${aws_ecs_service.iac_exercise_app_service.name}" + scalable_dimension = "ecs:service:DesiredCount" + service_namespace = "ecs" +} + +# Scale out on average CPU > 60% +resource "aws_appautoscaling_policy" "iac_exercise_cpu_scale_out" { + name = "${var.project}-cpu-scale-out" + policy_type = "TargetTrackingScaling" + resource_id = aws_appautoscaling_target.iac_exercise_svc.resource_id + scalable_dimension = aws_appautoscaling_target.iac_exercise_svc.scalable_dimension + service_namespace = aws_appautoscaling_target.iac_exercise_svc.service_namespace + + target_tracking_scaling_policy_configuration { + predefined_metric_specification { + predefined_metric_type = "ECSServiceAverageCPUUtilization" + } + target_value = 60 + scale_in_cooldown = 60 + scale_out_cooldown = 60 + } +} + +# Scale out on average Memory > 70% +resource "aws_appautoscaling_policy" "iac_exercise_mem_scale_out" { + name = "${var.project}-mem-scale-out" + policy_type = "TargetTrackingScaling" + resource_id = aws_appautoscaling_target.iac_exercise_svc.resource_id + scalable_dimension = aws_appautoscaling_target.iac_exercise_svc.scalable_dimension + service_namespace = aws_appautoscaling_target.iac_exercise_svc.service_namespace + + target_tracking_scaling_policy_configuration { + predefined_metric_specification { + predefined_metric_type = "ECSServiceAverageMemoryUtilization" + } + target_value = 70 + scale_in_cooldown = 60 + scale_out_cooldown = 60 + } +} diff --git a/terraform/backend.tf b/terraform/backend.tf new file mode 100644 index 0000000..d44a8bc --- /dev/null +++ b/terraform/backend.tf @@ -0,0 +1,8 @@ + # terraform { + # backend "s3" { + # bucket = "iac-exercise-terraform-state-bucket" + # key = "iac-exercise/terraform.tfstate" + # region = "us-west-2" + # encrypt = true + # } + # } \ No newline at end of file diff --git a/terraform/ecs.tf b/terraform/ecs.tf new file mode 100644 index 0000000..2037740 --- /dev/null +++ b/terraform/ecs.tf @@ -0,0 +1,61 @@ +resource "aws_ecs_cluster" "iac_exercise_cluster" { + name = "${var.project}-cluster" + setting { + name = "containerInsights" + value = "enabled" + } + tags = var.tags +} + +# Capacity providers so we can mix On-Demand and Spot +resource "aws_ecs_cluster_capacity_providers" "iac_exercise_cluster_capacity_provider" { + cluster_name = aws_ecs_cluster.iac_exercise_cluster.name + capacity_providers = var.enable_fargate_spot ? ["FARGATE", "FARGATE_SPOT"] : ["FARGATE"] + default_capacity_provider_strategy { + capacity_provider = "FARGATE" + weight = 1 + } +} + +locals { + container_def = { + name = var.project + image = var.container_image + essential = true + portMappings = [{ + containerPort = var.container_port + hostPort = var.container_port + protocol = "tcp" + appProtocol = "http" + }] + environment = [ + for k, v in var.env_vars : { name = k, value = v } + ] + logConfiguration = { + logDriver = "awslogs" + options = { + awslogs-group = aws_cloudwatch_log_group.iac_exercise_app_log.name + awslogs-region = var.region + awslogs-stream-prefix = var.project + } + } + } +} + +resource "aws_ecs_task_definition" "iac_exercise_app" { + family = "${var.project}-task" + network_mode = "awsvpc" + requires_compatibilities = ["FARGATE"] + cpu = tostring(var.cpu) + memory = tostring(var.memory) + execution_role_arn = aws_iam_role.iac_exercise_task_execution.arn + task_role_arn = aws_iam_role.iac_exercise_task_role.arn + runtime_platform { + operating_system_family = "LINUX" + cpu_architecture = "X86_64" + } + + container_definitions = jsonencode([local.container_def]) + tags = var.tags +} + diff --git a/terraform/ecs_imag_pull_alarm.tf b/terraform/ecs_imag_pull_alarm.tf new file mode 100644 index 0000000..3d0c3cc --- /dev/null +++ b/terraform/ecs_imag_pull_alarm.tf @@ -0,0 +1,120 @@ +# ------------------------------------------------------------ +# 1) Log group to receive ECS task state change events +# ------------------------------------------------------------ +resource "aws_cloudwatch_log_group" "ecs_events" { + name = "/aws/events/${var.project}/ecs-task-events" + retention_in_days = 30 + tags = var.tags +} + +# ------------------------------------------------------------ +# 2) EventBridge rule: match ECS task STOPPED with pull errors +# We look at stoppedReason for common image pull failures. +# ------------------------------------------------------------ +resource "aws_cloudwatch_event_rule" "ecs_image_pull_errors" { + name = "${var.project}-ecs-image-pull-errors" + description = "Match ECS task STOPPED events with Docker image pull errors" + + event_pattern = jsonencode({ + "source": ["aws.ecs"], + "detail-type": ["ECS Task State Change"], + "detail": { + "lastStatus": ["STOPPED"], + "stoppedReason": [ + { "prefix": "CannotPullContainerError" }, + { "prefix": "CannotCreateContainerError" }, + { "prefix": "Error response from daemon: pull access denied" } + ] + } + }) + tags = var.tags +} + +# ------------------------------------------------------------ +# 3) IAM role so EventBridge can write into CloudWatch Logs +# ------------------------------------------------------------ +data "aws_iam_policy_document" "events_to_logs_assume" { + statement { + effect = "Allow" + actions = ["sts:AssumeRole"] + principals { + type = "Service" + identifiers = ["events.amazonaws.com"] + } + } +} + +resource "aws_iam_role" "events_to_logs" { + name = "${var.project}-events-to-logs-role" + assume_role_policy = data.aws_iam_policy_document.events_to_logs_assume.json + tags = var.tags +} + +resource "aws_iam_role_policy" "events_to_logs" { + name = "${var.project}-events-to-logs-policy" + role = aws_iam_role.events_to_logs.id + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect: "Allow", + Action: [ + "logs:CreateLogStream", + "logs:PutLogEvents", + "logs:DescribeLogGroups", + "logs:DescribeLogStreams" + ], + Resource: aws_cloudwatch_log_group.ecs_events.arn + } + ] + }) +} + +# ------------------------------------------------------------ +# 4) EventBridge target → CloudWatch Logs +# ------------------------------------------------------------ +resource "aws_cloudwatch_event_target" "ecs_image_pull_errors_to_logs" { + rule = aws_cloudwatch_event_rule.ecs_image_pull_errors.name + target_id = "send-to-cwlogs" + arn = aws_cloudwatch_log_group.ecs_events.arn +} + +# ------------------------------------------------------------ +# 5) Metric filter: produce a 1-count metric when pattern matches +# ------------------------------------------------------------ +resource "aws_cloudwatch_log_metric_filter" "image_pull_error" { + name = "${var.project}-ImagePullErrors" + log_group_name = aws_cloudwatch_log_group.ecs_events.name + + # Match common ECS image pull failures recorded by EventBridge + # You can extend this with additional phrases if needed. + pattern = "\"CannotPullContainerError\"" + + metric_transformation { + name = "ImagePullErrors" + namespace = "ECS/ImagePull" + value = "1" + } +} + +# ------------------------------------------------------------ +# 6) Single CloudWatch alarm on that custom metric +# Fires if >=1 error occurs in a 5-minute period. +# ------------------------------------------------------------ +resource "aws_cloudwatch_metric_alarm" "image_pull_error_alarm" { + alarm_name = "${var.project}-ECS-ImagePullErrors" + alarm_description = "ECS Docker image pull error detected (CannotPullContainerError / CannotCreateContainerError)" + namespace = "ECS/ImagePull" + metric_name = aws_cloudwatch_log_metric_filter.image_pull_error.metric_transformation[0].name + statistic = "Sum" + period = 300 + evaluation_periods = 1 + threshold = 0 + comparison_operator = "GreaterThanThreshold" + treat_missing_data = "notBreaching" + + alarm_actions = length(var.alarm_sns_topic_arn) > 0 ? [var.alarm_sns_topic_arn] : [] + ok_actions = length(var.alarm_sns_topic_arn) > 0 ? [var.alarm_sns_topic_arn] : [] + + tags = var.tags +} diff --git a/terraform/endpoints.tf b/terraform/endpoints.tf new file mode 100644 index 0000000..2bff44f --- /dev/null +++ b/terraform/endpoints.tf @@ -0,0 +1,66 @@ +// Note: S3 gateway endpoint omitted because route table resources are +// currently not managed in this module. If you want S3 gateway endpoints, +// re-enable the route table resources in `routes.tf` and add a gateway +// endpoint here. + +// Common interface endpoints (landed into private subnets). We include +// ECR interface endpoints by default unless explicitly disabled so tasks in +// private subnets can pull images without NAT. +locals { + base_interface_endpoint_services = [ + "com.amazonaws.${var.region}.ssm", + "com.amazonaws.${var.region}.ssmmessages", + "com.amazonaws.${var.region}.ec2messages", + "com.amazonaws.${var.region}.logs", + "com.amazonaws.${var.region}.sts", + "com.amazonaws.${var.region}.secretsmanager", + "com.amazonaws.${var.region}.kms", + "com.amazonaws.${var.region}.ec2", + ] + + ecr_services = [for svc in ["ecr.api", "ecr.dkr"] : "com.amazonaws.${var.region}.${svc}"] + + interface_endpoint_services = var.disable_ecr_interface_endpoints ? local.base_interface_endpoint_services : concat(local.base_interface_endpoint_services, local.ecr_services) +} + +// Security group for interface endpoints. Allow tasks to reach endpoints on HTTPS. +resource "aws_security_group" "iac_exercise_endpoints" { + count = var.create_interface_endpoints ? 1 : 0 + name = "${var.project}-vpce-sg" + vpc_id = aws_vpc.iac_exercise_vpc.id + + // Allow private tasks to reach the endpoints over HTTPS + ingress { + from_port = 443 + to_port = 443 + protocol = "tcp" + security_groups = [aws_security_group.task_sg.id] // tasks security group + // Also allow the public and private subnet CIDRs as a fallback + cidr_blocks = concat( + [for s in aws_subnet.iac_exercise_public : s.cidr_block], + [for s in aws_subnet.iac_exercise_private : s.cidr_block] + ) + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + + tags = merge(var.tags, { Name = "${var.project}-vpce-sg" }) +} + +// Create interface endpoints for the selected services into private subnets +resource "aws_vpc_endpoint" "iac_exercise_interfaces" { + for_each = var.create_interface_endpoints ? toset(local.interface_endpoint_services) : toset([]) + vpc_id = aws_vpc.iac_exercise_vpc.id + service_name = each.value + vpc_endpoint_type = "Interface" + private_dns_enabled = true + subnet_ids = [for k, s in aws_subnet.iac_exercise_private : s.id] + security_group_ids = [aws_security_group.iac_exercise_endpoints[0].id] + + tags = merge(var.tags, { Name = "${var.project}-vpce-${replace(each.value, "com.amazonaws.${var.region}.", "")}" }) +} diff --git a/terraform/iam.tf b/terraform/iam.tf new file mode 100644 index 0000000..46399bd --- /dev/null +++ b/terraform/iam.tf @@ -0,0 +1,52 @@ +resource "aws_iam_role" "iac_exercise_task_execution" { + name = "${var.project}-ecsTaskExecutionRole" + assume_role_policy = data.aws_iam_policy_document.iac_exercise_ecs_tasks_assume.json + tags = var.tags +} + +resource "aws_iam_role" "iac_exercise_task_role" { + name = "${var.project}-ecsTaskRole" + assume_role_policy = data.aws_iam_policy_document.iac_exercise_ecs_tasks_assume.json + tags = var.tags +} + +data "aws_iam_policy_document" "iac_exercise_ecs_tasks_assume" { + statement { + effect = "Allow" + principals { + type = "Service" + identifiers = ["ecs-tasks.amazonaws.com"] + } + actions = ["sts:AssumeRole"] + } +} + +resource "aws_iam_role_policy_attachment" "iac_exercise_exec_ecr" { + role = aws_iam_role.iac_exercise_task_execution.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" +} + +# Ensure the execution role can read from ECR (pull images). The managed +# AmazonECSTaskExecutionRolePolicy should normally cover this, but add the +# explicit ReadOnly policy to be explicit and avoid pull errors. +resource "aws_iam_role_policy_attachment" "iac_exercise_exec_ecr_readonly" { + role = aws_iam_role.iac_exercise_task_execution.name + policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" +} + +# Provide the running task (container) with read-only S3 access by default. +# This is attached to the task role (not the execution role) since it's used +# by the application code inside the container. +resource "aws_iam_role_policy_attachment" "iac_exercise_task_s3_readonly" { + role = aws_iam_role.iac_exercise_task_role.name + policy_arn = "arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess" +} + + +# Add explicit ECR pull-only policy to the execution role to ensure +# it can pull images from ECR. +resource "aws_iam_role_policy_attachment" "exec_ecr_pullonly" { + role = aws_iam_role.iac_exercise_task_role.name + policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryPullOnly" +} + diff --git a/terraform/logs.tf b/terraform/logs.tf new file mode 100644 index 0000000..01e8904 --- /dev/null +++ b/terraform/logs.tf @@ -0,0 +1,76 @@ +resource "aws_cloudwatch_log_group" "iac_exercise_app_log" { + name = "/ecs/${var.project}" + retention_in_days = 30 + tags = var.tags +} + +locals { + flow_log_group_name = "/vpc/${var.project}/flow-logs" +} + +# CloudWatch log group for VPC Flow Logs +resource "aws_cloudwatch_log_group" "vpc_flow" { + count = var.flow_logs_enabled ? 1 : 0 + name = local.flow_log_group_name + retention_in_days = var.flow_logs_retention_days + tags = var.tags +} + +# IAM role that VPC Flow Logs service assumes to write to CW Logs +resource "aws_iam_role" "flowlogs" { + count = var.flow_logs_enabled ? 1 : 0 + name = "${var.project}-vpc-flowlogs-role" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Principal = { Service = "vpc-flow-logs.amazonaws.com" } + Action = "sts:AssumeRole" + }] + }) + + tags = var.tags +} + +# Least-privilege policy for writing to the specific log group +resource "aws_iam_role_policy" "flowlogs" { + count = var.flow_logs_enabled ? 1 : 0 + name = "${var.project}-vpc-flowlogs-policy" + role = aws_iam_role.flowlogs[0].id + + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Action = [ + "logs:CreateLogStream", + "logs:PutLogEvents", + "logs:DescribeLogGroups", + "logs:DescribeLogStreams" + ], + Resource = aws_cloudwatch_log_group.vpc_flow[0].arn + } + ] + }) +} + +# VPC Flow Log resource +resource "aws_flow_log" "this" { + count = var.flow_logs_enabled ? 1 : 0 + + log_destination_type = "cloud-watch-logs" + log_destination = aws_cloudwatch_log_group.vpc_flow[0].arn + iam_role_arn = aws_iam_role.flowlogs[0].arn + traffic_type = var.flow_logs_traffic_type + + vpc_id = aws_vpc.iac_exercise_vpc.id + + tags = merge(var.tags, { Name = "${var.project}-vpc-flowlogs" }) + + depends_on = [ + aws_cloudwatch_log_group.vpc_flow, + aws_iam_role_policy.flowlogs + ] +} diff --git a/terraform/nacl.tf b/terraform/nacl.tf new file mode 100644 index 0000000..2d1a078 --- /dev/null +++ b/terraform/nacl.tf @@ -0,0 +1,52 @@ +# data "aws_prefix_list" "ecr" { +# name = "com.amazonaws.${var.region}.ecr" +# } + +# resource "aws_network_acl" "main" { +# vpc_id = aws_vpc.iac_exercise_vpc.id + +# egress { +# protocol = "tcp" +# rule_no = 200 +# action = "allow" +# cidr_block = var.vpc_cidr +# from_port = 443 +# to_port = 443 +# } + +# ingress { +# protocol = "tcp" +# rule_no = 100 +# action = "allow" +# cidr_block = var.vpc_cidr +# from_port = 80 +# to_port = 80 +# } + +# tags = { +# Name = "main" +# } +# } + +# resource "aws_network_acl_rule" "iac_exercise_ecr_outbound_nacl" { +# network_acl_id = aws_network_acl.main.id +# rule_number = 100 +# egress = true +# protocol = "tcp" +# rule_action = "allow" +# from_port = 443 +# to_port = 443 +# cidr_block = data.aws_prefix_list.ecr.cidr_blocks +# } + +# resource "aws_network_acl_rule" "ecr_inbound_nacl" { +# network_acl_id = aws_network_acl.main.id +# rule_number = 101 +# egress = false +# protocol = "tcp" +# rule_action = "allow" +# from_port = 1024 # Ephemeral port range for return traffic +# to_port = 65535 # Ephemeral port range for return traffic +# cidr_block = data.aws_prefix_list.ecr.cidr_blocks +# } + diff --git a/terraform/nat.tf b/terraform/nat.tf new file mode 100644 index 0000000..eb91b70 --- /dev/null +++ b/terraform/nat.tf @@ -0,0 +1,17 @@ +# Elastic IP per NAT (one NAT per AZ for HA) +resource "aws_eip" "iac_exercise_nat" { + for_each = aws_subnet.iac_exercise_public + domain = "vpc" + tags = merge(var.tags, { Name = "${var.project}-eip-nat-${each.key}" }) +} + +resource "aws_nat_gateway" "iac_exercise_ngw" { + for_each = aws_subnet.iac_exercise_public + allocation_id = aws_eip.iac_exercise_nat[each.key].id + subnet_id = aws_subnet.iac_exercise_public[each.key].id + connectivity_type = "public" + + tags = merge(var.tags, { Name = "${var.project}-nat-${each.key}" }) + + depends_on = [aws_internet_gateway.iac_exercise_igw] +} diff --git a/terraform/output.tf b/terraform/output.tf new file mode 100644 index 0000000..72acbb0 --- /dev/null +++ b/terraform/output.tf @@ -0,0 +1,54 @@ +output "vpc_id" { + value = aws_vpc.iac_exercise_vpc.id + description = "VPC ID" +} + +output "public_subnet_ids" { + value = [for k, s in aws_subnet.iac_exercise_public : s.id] + description = "Public subnet IDs (by AZ index)" +} + +output "private_subnet_ids" { + value = [for k, s in aws_subnet.iac_exercise_private : s.id] + description = "Private subnet IDs (by AZ index)" +} + +output "public_subnet_cidrs" { + value = [for k, s in aws_subnet.iac_exercise_public : s.cidr_block] + description = "Public subnet CIDRs" +} + +output "private_subnet_cidrs" { + value = [for k, s in aws_subnet.iac_exercise_private : s.cidr_block] + description = "Private subnet CIDRs" +} + +output "nat_gateway_ids" { + value = [for k, n in aws_nat_gateway.iac_exercise_ngw : n.id] + description = "NAT Gateway IDs (per AZ)" +} + +output "alb_dns_name" { + value = aws_lb.iac_exercise_app_alb.dns_name + description = "Public DNS of the Application Load Balancer" +} + +output "service_name" { + value = aws_ecs_service.iac_exercise_app_service.name + description = "ECS Service name" +} + +output "cluster_name" { + value = aws_ecs_cluster.iac_exercise_cluster.name + description = "ECS Cluster name" +} + +# output "alb_dns_name" { +# value = aws_lb.app_alb.dns_name +# description = "Public DNS of the ALB" +# } + +# output "target_group_arn" { +# value = aws_lb_target_group.app_tg.arn +# description = "ARN of the target group" +# } diff --git a/terraform/providers.tf b/terraform/providers.tf new file mode 100644 index 0000000..a992946 --- /dev/null +++ b/terraform/providers.tf @@ -0,0 +1,9 @@ +provider "aws" { + region = var.region +} + +data "aws_availability_zones" "available" { + state = "available" +} + +data "aws_caller_identity" "current" {} \ No newline at end of file diff --git a/terraform/routes.tf b/terraform/routes.tf new file mode 100644 index 0000000..e363bc5 --- /dev/null +++ b/terraform/routes.tf @@ -0,0 +1,38 @@ +# PUBLIC route table (shared) +resource "aws_route_table" "iac_exercise_route_table_public" { + vpc_id = aws_vpc.iac_exercise_vpc.id + tags = merge(var.tags, { Name = "${var.project}-rtb-public" }) +} + +resource "aws_route" "iac_exercise_public_internet" { + route_table_id = aws_route_table.iac_exercise_route_table_public.id + destination_cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.iac_exercise_igw.id +} + +resource "aws_route_table_association" "iac_exercise_public_assoc" { + for_each = aws_subnet.iac_exercise_public + subnet_id = each.value.id + route_table_id = aws_route_table.iac_exercise_route_table_public.id +} + +# PRIVATE route tables (one per AZ so each uses its local NAT) +resource "aws_route_table" "iac_exercise_route_table_private" { + for_each = aws_nat_gateway.iac_exercise_ngw + vpc_id = aws_vpc.iac_exercise_vpc.id + tags = merge(var.tags, { Name = "${var.project}-rtb-private-${each.key}" }) +} + +resource "aws_route" "iac_exercise_private_default" { + for_each = aws_nat_gateway.iac_exercise_ngw + route_table_id = aws_route_table.iac_exercise_route_table_private[each.key].id + destination_cidr_block = "0.0.0.0/0" + nat_gateway_id = aws_nat_gateway.iac_exercise_ngw[each.key].id +} + +resource "aws_route_table_association" "iac_exercise_private_assoc" { + for_each = aws_subnet.iac_exercise_private + subnet_id = each.value.id + # use the RTB that matches the same AZ index (key) + route_table_id = aws_route_table.iac_exercise_route_table_private[each.key].id +} diff --git a/terraform/service.tf b/terraform/service.tf new file mode 100644 index 0000000..36d1648 --- /dev/null +++ b/terraform/service.tf @@ -0,0 +1,75 @@ +resource "aws_security_group" "iac_exercise_tasks_sg" { + name = "${var.project}-tasks-sg" + description = "Allows ALB to reach ECS tasks" + vpc_id = aws_vpc.iac_exercise_vpc.id + tags = var.tags + + ingress { + description = "ALB to tasks" + from_port = var.container_port + to_port = var.container_port + protocol = "tcp" + security_groups = [aws_security_group.iac_exercise_alb_sg.id] + } + + egress { + from_port = 0 + to_port = 0 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } +} + +resource "aws_ecs_service" "iac_exercise_app_service" { + name = "${var.project}-svc" + cluster = aws_ecs_cluster.iac_exercise_cluster.id + task_definition = aws_ecs_task_definition.iac_exercise_app.arn + desired_count = var.desired_count + launch_type = "FARGATE" + + network_configuration { + subnets = [for k, s in aws_subnet.iac_exercise_private : s.id] + security_groups = [aws_security_group.iac_exercise_tasks_sg.id] + assign_public_ip = true + } + + load_balancer { + target_group_arn = aws_lb_target_group.iac_exercise_app_tg.arn + container_name = var.project + container_port = var.container_port + } + + deployment_controller { + type = "ECS" + } + + deployment_circuit_breaker { + enable = true + rollback = true + } + + enable_execute_command = true # ECS Exec + + lifecycle { + ignore_changes = [task_definition] # so deploys via new revs don't need service changes + } + + tags = var.tags +} + +# For ECR access from tasks + +resource "aws_security_group" "task_sg" { + name = "${var.project}-ecr-sg" + description = "Allows ECS tasks to reach ECR" + vpc_id = aws_vpc.iac_exercise_vpc.id + tags = var.tags + + egress { + from_port = 443 + to_port = 443 + protocol = "tcp" + cidr_blocks = ["10.3.0.0/18"] + description = "Allow outbound HTTPS to ECR" + } +} diff --git a/terraform/subnets.tf b/terraform/subnets.tf new file mode 100644 index 0000000..b4b1fc6 --- /dev/null +++ b/terraform/subnets.tf @@ -0,0 +1,37 @@ +locals { + azs = slice(data.aws_availability_zones.available.names, 0, var.az_count) + + # Derive subnet CIDRs deterministically from VPC CIDR + # first N for public, next N for private + public_subnet_cidrs = [for i in range(var.az_count) : cidrsubnet(var.vpc_cidr, 4, i)] + private_subnet_cidrs = [for i in range(var.az_count) : cidrsubnet(var.vpc_cidr, 4, i + var.az_count)] +} + +resource "aws_subnet" "iac_exercise_public" { + for_each = { for idx, az in local.azs : idx => { az = az, cidr = local.public_subnet_cidrs[idx] } } + + vpc_id = aws_vpc.iac_exercise_vpc.id + cidr_block = each.value.cidr + availability_zone = each.value.az + map_public_ip_on_launch = true + + tags = merge(var.tags, { + Name = "${var.project}-public-${each.key}" + "kubernetes.io/role/elb" = "1" # if you ever use EKS + Project = var.project + }) +} + +resource "aws_subnet" "iac_exercise_private" { + for_each = { for idx, az in local.azs : idx => { az = az, cidr = local.private_subnet_cidrs[idx] } } + + vpc_id = aws_vpc.iac_exercise_vpc.id + cidr_block = each.value.cidr + availability_zone = each.value.az + + tags = merge(var.tags, { + Name = "${var.project}-private-${each.key}" + "kubernetes.io/role/internal-elb" = "1" # if you ever use EKS + Project = var.project + }) +} diff --git a/terraform/terraform.tfvars b/terraform/terraform.tfvars new file mode 100644 index 0000000..b7e1839 --- /dev/null +++ b/terraform/terraform.tfvars @@ -0,0 +1,29 @@ +# Point to your image +container_image = "914357406929.dkr.ecr.us-west-2.amazonaws.com/iac-exercise:v1" +#container_port = 80 + +env_vars = { + APP_ENV = "prod" +} + +cpu = 512 +memory = 1024 +container_port = 80 + + +desired_count = 2 +enable_fargate_spot = true +allowed_ingress_cidrs = ["0.0.0.0/0"] + +# Diagnostic toggle: temporarily disable ECR interface endpoints so tasks will egress via NAT +disable_ecr_interface_endpoints = false + +# Enable TF-managed interface endpoints so we can import existing ECR endpoints +create_interface_endpoints = true + +tags = { + Owner = "SRE" + App = "IaC Exercise" +} + +assign_public_ip = true # for testing in private subnets without NAT diff --git a/terraform/tls_self_sign.tf b/terraform/tls_self_sign.tf new file mode 100644 index 0000000..09e8a55 --- /dev/null +++ b/terraform/tls_self_sign.tf @@ -0,0 +1,40 @@ +# --------------------------------------------------------------------- +# Generate self-signed certificate and private key for HTTPS +# --------------------------------------------------------------------- + +# 1. Private key (2048-bit RSA) +resource "tls_private_key" "alb" { + algorithm = "RSA" + rsa_bits = 2048 +} + +# 2. Self-signed certificate valid for 1 year +resource "tls_self_signed_cert" "alb" { + private_key_pem = tls_private_key.alb.private_key_pem + validity_period_hours = 8760 # 1 year + + subject { + common_name = "${var.project}.local" + organization = "Example Org" + } + + allowed_uses = [ + "key_encipherment", + "digital_signature", + "server_auth" + ] +} + +# 3. Import into AWS ACM +resource "aws_acm_certificate" "self_signed" { + private_key = tls_private_key.alb.private_key_pem + certificate_body = tls_self_signed_cert.alb.cert_pem + certificate_chain = tls_self_signed_cert.alb.cert_pem + + tags = merge(var.tags, { Name = "${var.project}-self-signed-cert" }) +} + +output "self_signed_cert_arn" { + description = "ARN of imported self-signed ACM certificate" + value = aws_acm_certificate.self_signed.arn +} diff --git a/terraform/variables.tf b/terraform/variables.tf new file mode 100644 index 0000000..1f7bb62 --- /dev/null +++ b/terraform/variables.tf @@ -0,0 +1,166 @@ +variable "region" { + type = string + description = "AWS region" + default = "us-west-2" +} + +variable "project" { + type = string + description = "Used for naming and tags" + default = "iac-exercise-multi-az-vpc" +} + +variable "vpc_cidr" { + type = string + description = "VPC CIDR block" + default = "10.0.0.0/16" +} + +variable "az_count" { + type = number + description = "Number of AZs to span (min 2)" + default = 2 +} + +variable "tags" { + type = map(string) + default = {} +} + +variable "container_image" { + type = string + description = "Container image (ECR URI or public image)" +} + +variable "container_port" { + type = number + default = 80 +} + +variable "desired_count" { + type = number + default = 2 +} + +variable "enable_fargate_spot" { + type = bool + default = true +} + +variable "cpu" { + description = "Task CPU units (e.g., 256, 512, 1024)" + type = number + default = 512 +} + +variable "memory" { + description = "Task memory in MiB (e.g., 1024, 2048)" + type = number + default = 1024 +} + +variable "health_check_path" { + type = string + default = "/" +} + +variable "env_vars" { + description = "Environment variables for the container" + type = map(string) + default = {} +} + +variable "allowed_ingress_cidrs" { + description = "CIDR blocks allowed to hit the ALB" + type = list(string) + default = ["0.0.0.0/0"] +} + +variable "alb_idle_timeout" { + type = number + default = 60 +} + +variable "create_interface_endpoints" { + type = bool + description = "Whether to create common interface endpoints (SSM, EC2 Messages, etc.)" + default = false +} + +variable "create_gateway_endpoints" { + type = bool + default = true +} + +variable "disable_ecr_interface_endpoints" { + description = "If true, do not create ECR interface endpoints (ecr.api and ecr.dkr). Useful for testing NAT egress vs endpoint routing." + type = bool + default = false +} + +variable "s3_bucket_name" { + description = "Optional S3 bucket name to grant the ECS task access to. If empty, the task role will receive AmazonS3ReadOnlyAccess (managed)." + type = string + default = "" +} + +variable "s3_bucket_write" { + description = "If true and s3_bucket_name is set, allow write actions (PutObject/DeleteObject) on the bucket's objects. (Not implemented in this minimal change.)" + type = bool + default = false +} + +variable "ssm_param_names" { + type = list(string) + default = [] + description = "SSM parameter paths to mount as secrets" +} + +# Optional HTTPS +# variable "enable_https" { +# type = bool +# description = "Create HTTPS listener on 443" +# default = false +# } + +# variable "acm_certificate_arn" { +# type = string +# description = "ACM cert ARN for HTTPS (required if enable_https = true)" +# default = "" +# } + +# Optional: if HTTPS enabled, should HTTP 80 redirect? +variable "http_redirect_to_https" { + type = bool + default = true +} + +variable "assign_public_ip" { + type = bool + description = "Whether to assign public IPs to tasks in private subnets (for testing without NAT)" + default = false +} + +variable "flow_logs_enabled" { + description = "Enable VPC Flow Logs to CloudWatch" + type = bool + default = true +} + +variable "flow_logs_retention_days" { + description = "CloudWatch Logs retention for VPC Flow Logs" + type = number + default = 30 +} + +variable "flow_logs_traffic_type" { + description = "Traffic captured by flow logs: ACCEPT | REJECT | ALL" + type = string + default = "ALL" +} + +variable "alarm_sns_topic_arn" { + description = "Optional SNS topic ARN to notify on image pull errors" + type = string + default = "" +} diff --git a/terraform/versions.tf b/terraform/versions.tf new file mode 100644 index 0000000..47d550e --- /dev/null +++ b/terraform/versions.tf @@ -0,0 +1,13 @@ +terraform { + required_version = ">= 1.5.0" + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.40" + } + tls = { + source = "hashicorp/tls" + version = ">= 4.0.4" + } + } +} diff --git a/terraform/vpc.tf b/terraform/vpc.tf new file mode 100644 index 0000000..de43bde --- /dev/null +++ b/terraform/vpc.tf @@ -0,0 +1,54 @@ +resource "aws_vpc" "iac_exercise_vpc" { + cidr_block = var.vpc_cidr + enable_dns_hostnames = true + enable_dns_support = true + + tags = merge(var.tags, { + Name = "${var.project}-vpc" + Project = var.project + }) +} + +resource "aws_internet_gateway" "iac_exercise_igw" { + vpc_id = aws_vpc.iac_exercise_vpc.id + tags = merge(var.tags, { Name = "${var.project}-igw" }) +} + +# resource "aws_cloudwatch_log_group" "iac_exercise_vpc_flow" { +# name = "/vpc/${var.project}/flow-logs" +# retention_in_days = 30 +# tags = var.tags +# } + +# resource "aws_iam_role" "iac_exercise_iam_flowlogs" { +# name = "${var.project}-vpc-flowlogs-role" +# assume_role_policy = jsonencode({ +# Version = "2012-10-17" +# Statement = [{ +# Effect = "Allow" +# Principal = { Service = "vpc-flow-logs.amazonaws.com" } +# Action = "sts:AssumeRole" +# }] +# }) +# tags = var.tags +# } + +# resource "aws_iam_role_policy" "iac_exercise_iam_role_policy_flowlogs" { +# name = "${var.project}-vpc-flowlogs-policy" +# role = aws_iam_role.iac_exercise_iam_flowlogs.id +# policy = jsonencode({ +# Version = "2012-10-17", +# Statement = [{ +# Effect = "Allow", +# Action = ["logs:CreateLogStream","logs:PutLogEvents","logs:DescribeLogGroups","logs:DescribeLogStreams"], +# Resource = "*" +# }] +# }) +# } + +# resource "aws_flow_log" "iac_exercise_aws_flowlogs" { +# iam_role_arn = aws_iam_role.iac_exercise_iam_flowlogs.arn +# log_destination = aws_cloudwatch_log_group.iac_exercise_vpc_flow.arn +# traffic_type = "ALL" +# vpc_id = aws_vpc.iac_exercise_vpc.id +# }