diff --git a/.terraform.lock.hcl b/.terraform.lock.hcl new file mode 100644 index 0000000..78f7df9 --- /dev/null +++ b/.terraform.lock.hcl @@ -0,0 +1,25 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/aws" { + version = "5.15.0" + constraints = "~> 5.0" + hashes = [ + "h1:ANdCdDlCFR2frDWXfIUD2RqtBRzcaCn/3E4Jjx6qbBg=", + "zh:069d0037cd1f8791a27ec31a535ce47d02d4f220fe88f9c3caa8661c0a98892a", + "zh:08c18e8f5f69736e86919e6c2a68c94f39f879511d51b2a8e58ad1776ee18854", + "zh:41c9c95e225f72421fa4a1c3e5105f36b3b149cba1daf9bc88b0a993c1d19e07", + "zh:51e6cf850de8a8ae0e3b4e55b45ca2e6632a149c5851158f3c2711af51adb277", + "zh:5703eacc47d5a8169d1028f8cfcdf32cd12972ebea8780e870f520020280258a", + "zh:6a77e0406126208ae217c416e4b59940cd989df4d7d5ac23dfe8043725ff8f6a", + "zh:702cc6db865aeee571a639a81be3ed36326dcbda5c0a2ca91c9280772fce3e49", + "zh:8279822c5a267869d4459e429ad7b3b8ffaa36de2f6ca29cf7779214783ddf3a", + "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", + "zh:bcb74854b0742a03b46e526bc2a79f556988c7622d54ebb2ccefc72c9759e9bc", + "zh:c7b0f4e94a9351a004a5555e91c8fe5b7da8cd2e03411cbd59d135ea8fceedd8", + "zh:cec427b1ef0e0948fd16736c72de57438fafcd8eeb5aab3bb1131579d2d6d031", + "zh:d5e4819851e52c15283064f6fa8cb8179a69cc981bee39e9b5ce5f027da8e251", + "zh:dade91d49309813b7453b053429678c8e7185e5ac54b2f68edb2ffea20242149", + "zh:e05e1395a738317a6761b592a5643ea5e660abd32de36ece68809cfd04a6a8e3", + ] +} diff --git a/ecs.tf b/ecs.tf index 92e8ef3..971790f 100644 --- a/ecs.tf +++ b/ecs.tf @@ -6,14 +6,21 @@ resource "aws_cloudwatch_log_group" "airflow" { } resource "aws_ecs_cluster" "airflow" { - name = "${var.resource_prefix}-airflow-${var.resource_suffix}" + name = "${var.resource_prefix}-airflow-${var.resource_suffix}" + + tags = local.common_tags +} + +resource "aws_ecs_cluster_capacity_providers" "aws_ecs_cluster_capacity_providers" { + cluster_name = aws_ecs_cluster.airflow.name + capacity_providers = ["FARGATE_SPOT", "FARGATE"] default_capacity_provider_strategy { + base = 1 + weight = 100 capacity_provider = "FARGATE_SPOT" } - - tags = local.common_tags } resource "aws_ecs_task_definition" "airflow" { @@ -35,7 +42,7 @@ resource "aws_ecs_task_definition" "airflow" { "image": "mikesir87/aws-cli", "name": "${local.airflow_sidecar_container_name}", "command": [ - "/bin/bash -c \"aws s3 cp s3://${local.s3_bucket_name}/${local.s3_key} ${var.airflow_container_home} --recursive && chmod +x ${var.airflow_container_home}/${aws_s3_bucket_object.airflow_scheduler_entrypoint.key} && chmod +x ${var.airflow_container_home}/${aws_s3_bucket_object.airflow_webserver_entrypoint.key} && chmod -R 777 ${var.airflow_container_home}\"" + "/bin/bash -c \"aws s3 cp s3://${local.s3_bucket_name}/${local.s3_key} ${var.airflow_container_home} --recursive && chmod +x ${var.airflow_container_home}/${aws_s3_object.airflow_scheduler_entrypoint.key} && chmod +x ${var.airflow_container_home}/${aws_s3_object.airflow_webserver_entrypoint.key} && chmod -R 777 ${var.airflow_container_home}\"" ], "entryPoint": [ "sh", @@ -67,7 +74,7 @@ resource "aws_ecs_task_definition" "airflow" { } ], "command": [ - "/bin/bash -c \"${var.airflow_container_home}/${aws_s3_bucket_object.airflow_init_entrypoint.key}\"" + "/bin/bash -c \"${var.airflow_container_home}/${aws_s3_object.airflow_init_entrypoint.key}\"" ], "entryPoint": [ "sh", @@ -106,7 +113,7 @@ resource "aws_ecs_task_definition" "airflow" { } ], "command": [ - "/bin/bash -c \"${var.airflow_container_home}/${aws_s3_bucket_object.airflow_scheduler_entrypoint.key}\"" + "/bin/bash -c \"${var.airflow_container_home}/${aws_s3_object.airflow_scheduler_entrypoint.key}\"" ], "entryPoint": [ "sh", @@ -145,7 +152,7 @@ resource "aws_ecs_task_definition" "airflow" { } ], "command": [ - "/bin/bash -c \"${var.airflow_container_home}/${aws_s3_bucket_object.airflow_webserver_entrypoint.key}\"" + "/bin/bash -c \"${var.airflow_container_home}/${aws_s3_object.airflow_webserver_entrypoint.key}\"" ], "entryPoint": [ "sh", diff --git a/locals.tf b/locals.tf index df10a98..52f9dc6 100644 --- a/locals.tf +++ b/locals.tf @@ -17,7 +17,7 @@ locals { day = formatdate("D", local.timestamp) rds_name = "${var.resource_prefix}-airflow-${var.resource_suffix}" - postgres_uri = var.postgres_uri != "" ? "postgresql+psycopg2://${var.rds_username}:${var.rds_password}@${var.postgres_uri}" : (var.airflow_executor == "Sequential" ? "" : "postgresql+psycopg2://${var.rds_username}:${var.rds_password}@${aws_db_instance.airflow[0].address}:${aws_db_instance.airflow[0].port}/${aws_db_instance.airflow[0].name}") + postgres_uri = var.postgres_uri != "" ? "postgresql+psycopg2://${var.rds_username}:${var.rds_password}@${var.postgres_uri}" : (var.airflow_executor == "Sequential" ? "" : "postgresql+psycopg2://${var.rds_username}:${var.rds_password}@${aws_db_instance.airflow[0].address}:${aws_db_instance.airflow[0].port}/${aws_db_instance.airflow[0].db_name}") db_uri = var.airflow_executor == "Local" ? local.postgres_uri : "sqlite:////opt/airflow/airflow.db" s3_bucket_name = var.s3_bucket_name != "" ? var.s3_bucket_name : aws_s3_bucket.airflow[0].id diff --git a/main.tf b/main.tf index 715d0f5..cf1b762 100644 --- a/main.tf +++ b/main.tf @@ -1,9 +1,9 @@ terraform { - required_version = "~> 0.15" + required_version = "~> 1.6.2" required_providers { aws = { source = "hashicorp/aws" - version = "~> 3.12.0" + version = "~> 5.0" } } } diff --git a/rds.tf b/rds.tf index 51b53a0..22c2534 100644 --- a/rds.tf +++ b/rds.tf @@ -1,6 +1,6 @@ resource "aws_db_instance" "airflow" { count = var.postgres_uri != "" || var.airflow_executor == "Sequential" ? 0 : 1 - name = replace(title(local.rds_name), "-", "") + db_name = replace(title(local.rds_name), "-", "") allocated_storage = var.rds_allocated_storage storage_type = var.rds_storage_type engine = var.rds_engine diff --git a/s3.tf b/s3.tf index 2f4186a..e711408 100644 --- a/s3.tf +++ b/s3.tf @@ -1,34 +1,61 @@ resource "aws_s3_bucket" "airflow" { count = var.s3_bucket_name == "" ? 1 : 0 bucket = "${var.resource_prefix}-airflow-${var.resource_suffix}" - acl = "private" + tags = local.common_tags +} - versioning { - enabled = true - } +resource "aws_s3_bucket_public_access_block" "airflow" { + count = var.s3_bucket_name == "" ? 1 : 0 + bucket = aws_s3_bucket.airflow[0].id - server_side_encryption_configuration { - rule { - apply_server_side_encryption_by_default { - sse_algorithm = "aws:kms" - } + block_public_acls = false + block_public_policy = false + ignore_public_acls = false + restrict_public_buckets = false +} + +data "aws_iam_policy_document" "airflow" { + statement { + effect = "Allow" + principals { + type = "*" + identifiers = ["*"] } + actions = [ + "s3:GetObject" + ] + resources = [ + "${aws_s3_bucket.airflow[0].arn}/startup/requirements.txt", + ] } +} +resource "aws_s3_bucket_policy" "airflow" { + bucket = aws_s3_bucket.airflow[0].id + policy = data.aws_iam_policy_document.airflow.json +} - tags = local.common_tags +resource "aws_s3_bucket_ownership_controls" "airflow" { + bucket = aws_s3_bucket.airflow[0].id + rule { + object_ownership = "ObjectWriter" + } } -resource "aws_s3_bucket_public_access_block" "airflow" { - count = var.s3_bucket_name == "" ? 1 : 0 +resource "aws_s3_bucket_acl" "airflow" { + depends_on = [aws_s3_bucket_ownership_controls.airflow] + bucket = aws_s3_bucket.airflow[0].id + acl = "private" +} - block_public_acls = true - block_public_policy = true - ignore_public_acls = true - restrict_public_buckets = true +resource "aws_s3_bucket_versioning" "airflow" { + bucket = aws_s3_bucket.airflow[0].id + versioning_configuration { + status = "Enabled" + } } -resource "aws_s3_bucket_object" "airflow_seed_dag" { +resource "aws_s3_object" "airflow_seed_dag" { bucket = local.s3_bucket_name key = "dags/airflow_seed_dag.py" content = templatefile("${path.module}/templates/dags/airflow_seed_dag.py", { @@ -41,26 +68,32 @@ resource "aws_s3_bucket_object" "airflow_seed_dag" { }) } -resource "aws_s3_bucket_object" "airflow_example_dag" { +resource "aws_s3_object" "airflow_example_dag" { count = var.airflow_example_dag ? 1 : 0 bucket = local.s3_bucket_name key = "dags/example_dag.py" content = templatefile("${path.module}/templates/dags/example_dag.py", {}) } -resource "aws_s3_bucket_object" "airflow_scheduler_entrypoint" { - bucket = local.s3_bucket_name - key = "startup/entrypoint_scheduler.sh" - content = templatefile("${path.module}/templates/startup/entrypoint_scheduler.sh", { AIRFLOW_HOME = var.airflow_container_home }) +resource "aws_s3_object" "airflow_scheduler_entrypoint" { + bucket = local.s3_bucket_name + key = "startup/entrypoint_scheduler.sh" + content = templatefile("${path.module}/templates/startup/entrypoint_scheduler.sh", { + AIRFLOW_HOME = var.airflow_container_home, + S3_URL_REQUIREMENTS_FILE = "https://${local.s3_bucket_name}.s3.${var.region}.amazonaws.com/startup/requirements.txt", + }) } -resource "aws_s3_bucket_object" "airflow_webserver_entrypoint" { - bucket = local.s3_bucket_name - key = "startup/entrypoint_webserver.sh" - content = templatefile("${path.module}/templates/startup/entrypoint_webserver.sh", { AIRFLOW_HOME = var.airflow_container_home }) +resource "aws_s3_object" "airflow_webserver_entrypoint" { + bucket = local.s3_bucket_name + key = "startup/entrypoint_webserver.sh" + content = templatefile("${path.module}/templates/startup/entrypoint_webserver.sh", { + AIRFLOW_HOME = var.airflow_container_home, + S3_URL_REQUIREMENTS_FILE = "https://${local.s3_bucket_name}.s3.${var.region}.amazonaws.com/startup/requirements.txt", + }) } -resource "aws_s3_bucket_object" "airflow_init_entrypoint" { +resource "aws_s3_object" "airflow_init_entrypoint" { bucket = local.s3_bucket_name key = "startup/entrypoint_init.sh" content = templatefile("${path.module}/templates/startup/entrypoint_init.sh", { @@ -74,7 +107,7 @@ resource "aws_s3_bucket_object" "airflow_init_entrypoint" { }) } -resource "aws_s3_bucket_object" "airflow_requirements" { +resource "aws_s3_object" "airflow_requirements" { count = var.airflow_py_requirements_path == "" ? 0 : 1 bucket = local.s3_bucket_name key = "startup/requirements.txt" diff --git a/templates/startup/entrypoint_init.sh b/templates/startup/entrypoint_init.sh index 58187c9..261eb26 100755 --- a/templates/startup/entrypoint_init.sh +++ b/templates/startup/entrypoint_init.sh @@ -8,6 +8,7 @@ if [[ "$airflow_major_version" == "1" ]]; then airflow initdb else airflow db init + airflow db migrate fi # add admin user if rbac enabled and not exists @@ -17,7 +18,7 @@ if [[ "${RBAC_AUTH}" == "true" ]]; then if [[ "$airflow_major_version" == "1" ]]; then amount_of_users=$(python -c 'import sys;print((sys.argv.count("│") // 7) - 1)' $(airflow list_users)) else - amount_of_users=$(python -c 'import sys;cmd_in = " ".join(sys.argv);print((cmd_in.count("|") // 5) - 1 if "No data found" not in cmd_in else 0)' $(airflow users list)) + amount_of_users=$(python -c 'import sys;cmd_in = " ".join(sys.argv);print((cmd_in.count("|") // 5) - 1 if "No data found" not in cmd_in else 0)' $(airflow users list)) fi if [[ "$amount_of_users" == "0" ]]; then @@ -25,7 +26,7 @@ if [[ "${RBAC_AUTH}" == "true" ]]; then if [[ "$airflow_major_version" == "1" ]]; then airflow create_user -r Admin -u ${RBAC_USERNAME} -e ${RBAC_EMAIL} -f ${RBAC_FIRSTNAME} -l ${RBAC_LASTNAME} -p ${RBAC_PASSWORD} else - airflow users create -r Admin -u ${RBAC_USERNAME} -e ${RBAC_EMAIL} -f ${RBAC_FIRSTNAME} -l ${RBAC_LASTNAME} -p ${RBAC_PASSWORD} + airflow users create -r Admin -u ${RBAC_USERNAME} -e ${RBAC_EMAIL} -f ${RBAC_FIRSTNAME} -l ${RBAC_LASTNAME} -p ${RBAC_PASSWORD} fi else echo "No admin user added, users already exists!" diff --git a/templates/startup/entrypoint_scheduler.sh b/templates/startup/entrypoint_scheduler.sh index b8e2ffa..39439ec 100755 --- a/templates/startup/entrypoint_scheduler.sh +++ b/templates/startup/entrypoint_scheduler.sh @@ -5,10 +5,17 @@ ls /opt/airflow/dags # Install boto and awscli for the seed dag python -m pip install awscli --user -# Intall python packages through req.txt and pip (if exists) -if [[ -f "${AIRFLOW_HOME}/startup/requirements.txt" ]]; then +# Check if the requirements file exists and fetch it if it does +if curl --head --fail -s ${S3_URL_REQUIREMENTS_FILE} > /dev/null; then echo "requirements.txt provided, installing it with pip" - python -m pip install -r ${AIRFLOW_HOME}/startup/requirements.txt --user + # Download the requirements.txt file to the AIRFLOW_HOME directory + curl -s -o "${AIRFLOW_HOME}/requirements.txt" ${S3_URL_REQUIREMENTS_FILE} + + # Install the requirements using pip + pip install -r "${AIRFLOW_HOME}/requirements.txt" --user +else + echo "requirements.txt provided, but not found in S3 bucket" fi + # Run the airflow webserver airflow scheduler \ No newline at end of file diff --git a/templates/startup/entrypoint_webserver.sh b/templates/startup/entrypoint_webserver.sh index 120fdf8..93aba97 100755 --- a/templates/startup/entrypoint_webserver.sh +++ b/templates/startup/entrypoint_webserver.sh @@ -5,10 +5,16 @@ ls /opt/airflow/dags # Install boto and awscli for the seed dag python -m pip install awscli --user -# Intall python packages through req.txt and pip (if exists) -if [[ -f "${AIRFLOW_HOME}/startup/requirements.txt" ]]; then +# Check if the requirements file exists and fetch it if it does +if curl --head --fail -s ${S3_URL_REQUIREMENTS_FILE} > /dev/null; then echo "requirements.txt provided, installing it with pip" - python -m pip install -r ${AIRFLOW_HOME}/startup/requirements.txt --user + # Download the requirements.txt file to the AIRFLOW_HOME directory + curl -s -o "${AIRFLOW_HOME}/requirements.txt" ${S3_URL_REQUIREMENTS_FILE} + + # Install the requirements using pip + pip install -r "${AIRFLOW_HOME}/requirements.txt" --user +else + echo "requirements.txt provided, but not found in S3 bucket" fi export AIRFLOW__WEBSERVER__SECRET_KEY=$(openssl rand -hex 30) diff --git a/templates/startup/requirements.txt b/templates/startup/requirements.txt new file mode 100755 index 0000000..6c33661 --- /dev/null +++ b/templates/startup/requirements.txt @@ -0,0 +1,2 @@ +# example +pysftp==0.2.9 diff --git a/variables.tf b/variables.tf index efbf2ac..d97c1c2 100644 --- a/variables.tf +++ b/variables.tf @@ -30,7 +30,7 @@ variable "airflow_image_name" { variable "airflow_image_tag" { type = string description = "The tag of the airflow image" - default = "2.0.1" + default = "2.7.0" } variable "airflow_executor" {