diff --git a/terraform/all_in_one_azure/README.md b/terraform/all_in_one_azure/README.md new file mode 100644 index 000000000..4ead3f378 --- /dev/null +++ b/terraform/all_in_one_azure/README.md @@ -0,0 +1,22 @@ +This AiO will create Everything: + +The first file (main.tf) will deal with required azure resources: Resource Group + Two Storage accounts (one for the cluster logs, another for the overwatch database), configure diagnostic settings, create databricks workspace, secret scope, keyvault, put proper permission and roles, access policies in the keyvault, create the Event Hub Namespace, the event hub and it's authorization rules + +The second file (main_databricks) will upload the overwatch notebook, create the secret scope, add a few entries in the keyvault, generate thew required PAT token, create a dummy job and create and schedule the overwatch job with the required libraries from maven put as a dependencies. + +USAGE: +* Do a terraform init to download the azure and databricks providers +* Fill the Variables below in a new file called terraform.tfvars file with your own values +* run az login (to be able to create the secret scope, as service principal is not allowed) +* run terraform plan / terraform apply +* run the dummy job and then the overwatch job is scheduled to run every day (you can change the schedule in the variables.tf file, using the cron expression) + + +The terraform.tfvars file should contain the following vars defined: + +tenant_id= +subscription_id= +service_principal_id_mount= +user_id= +overwatch_spn= +overwatch_spn_pass= \ No newline at end of file diff --git a/terraform/all_in_one_azure/main.tf b/terraform/all_in_one_azure/main.tf new file mode 100644 index 000000000..bbfca6f9e --- /dev/null +++ b/terraform/all_in_one_azure/main.tf @@ -0,0 +1,216 @@ +// Albert Nogués. 23/05/2022. Automate Overwatch Depoloyment +// This file does all the overwatch prerequisites deployment + +data "azurerm_storage_account" "owsa" { + name = azurerm_storage_account.owsa.name + resource_group_name = azurerm_resource_group.rg.name +} + +data "azurerm_storage_account" "logsa" { + name = azurerm_storage_account.logsa.name + resource_group_name = azurerm_resource_group.rg.name +} + +data "azurerm_databricks_workspace" "adb" { + name = azurerm_databricks_workspace.adb.name + resource_group_name = azurerm_resource_group.rg.name +} + +resource "random_string" "strapp" { + length = 5 + lower = true + upper = false + special = false +} + +resource "azurerm_resource_group" "rg" { + name = join("", [var.resource_group_name,random_string.strapp.result]) + location = var.resource_group_location + + tags = { + environment = "Overwatch" + } +} + +resource "azurerm_storage_account" "owsa" { + name = join("", [var.overwatch_storage_account_name,random_string.strapp.result]) + resource_group_name = azurerm_resource_group.rg.name + location = azurerm_resource_group.rg.location + account_tier = "Standard" + account_replication_type = "LRS" + is_hns_enabled = true + + identity { + type = "SystemAssigned" + } + + tags = { + environment = "Overwatch" + purpose="Overwatch Delta Database" + } +} + +resource "azurerm_storage_account" "logsa" { + name = join("", [var.logs_storage_account_name,random_string.strapp.result]) + resource_group_name = azurerm_resource_group.rg.name + location = azurerm_resource_group.rg.location + account_tier = "Standard" + account_replication_type = "LRS" + is_hns_enabled = true + + identity { + type = "SystemAssigned" + } + + tags = { + environment = "Overwatch" + purpose="Overwatch Cluster Logs Storage" + } +} + +resource "azurerm_role_assignment" "data-contributor-role"{ + scope = azurerm_storage_account.owsa.id + role_definition_name = "Storage Blob Data Contributor" + principal_id = var.service_principal_id_mount +} + +resource "azurerm_role_assignment" "data-contributor-role-log"{ + scope = azurerm_storage_account.logsa.id + role_definition_name = "Storage Blob Data Contributor" + principal_id = var.service_principal_id_mount +} + +resource "azurerm_storage_data_lake_gen2_filesystem" "overwatch" { + name = "overwatch" + storage_account_id = azurerm_storage_account.owsa.id +} + +resource "azurerm_storage_data_lake_gen2_filesystem" "logs" { + name = "logs" + storage_account_id = azurerm_storage_account.logsa.id +} + +resource "azurerm_key_vault" "kv" { + name = join("", [var.key_vault_name,random_string.strapp.result]) + location = azurerm_resource_group.rg.location + resource_group_name = azurerm_resource_group.rg.name + tenant_id = var.tenant_id + sku_name = "standard" + + purge_protection_enabled = false +} + + +resource "azurerm_key_vault_access_policy" "storage" { + key_vault_id = azurerm_key_vault.kv.id + tenant_id = var.tenant_id + object_id = azurerm_storage_account.owsa.identity[0].principal_id + + key_permissions = ["Get", "Create", "List", "Restore", "Recover", "UnwrapKey", "WrapKey", "Purge", "Encrypt", "Decrypt", "Sign", "Verify"] + secret_permissions = ["Get", "List", "Set"] +} + +resource "azurerm_key_vault_access_policy" "tfuser" { + key_vault_id = azurerm_key_vault.kv.id + tenant_id = var.tenant_id + object_id = var.user_id + + key_permissions = ["Get", "Create", "List", "Restore", "Recover", "UnwrapKey", "WrapKey", "Purge", "Encrypt", "Decrypt", "Sign", "Verify"] + secret_permissions = ["Get", "List", "Set", "Delete", "Purge", "Recover"] +} + +resource "azurerm_databricks_workspace" "adb" { + name = var.workspace_name + resource_group_name = azurerm_resource_group.rg.name + location = azurerm_resource_group.rg.location + sku = "premium" + + tags = { + Environment = "Overwatch" + } +} + + +//EvenHub Part +resource "azurerm_eventhub_namespace" "ehn" { + name = join("", [var.evenhub_namespace_name,random_string.strapp.result]) + location = azurerm_resource_group.rg.location + resource_group_name = azurerm_resource_group.rg.name + sku = "Basic" + capacity = 1 + + tags = { + environment = "Overwatch" + } +} + +data "azurerm_eventhub_namespace" "ehn" { + name = azurerm_eventhub_namespace.ehn.name + resource_group_name = azurerm_resource_group.rg.name +} + +resource "azurerm_eventhub" "eh" { + name = var.evenhub_name + namespace_name = azurerm_eventhub_namespace.ehn.name + resource_group_name = azurerm_resource_group.rg.name + partition_count = 2 + message_retention = 1 +} + +resource "azurerm_eventhub_authorization_rule" "ehar" { + name = "overwatch" + namespace_name = azurerm_eventhub_namespace.ehn.name + eventhub_name = azurerm_eventhub.eh.name + resource_group_name = azurerm_resource_group.rg.name + listen = true + send = true + manage = true +} + +data "azurerm_eventhub_authorization_rule" "ehar" { + name = azurerm_eventhub_authorization_rule.ehar.name + resource_group_name = azurerm_resource_group.rg.name + namespace_name = azurerm_eventhub_namespace.ehn.name + eventhub_name = azurerm_eventhub.eh.name +} + + +resource "azurerm_eventhub_namespace_authorization_rule" "ehnar" { + name = "overwatch" + namespace_name = azurerm_eventhub_namespace.ehn.name + resource_group_name = azurerm_resource_group.rg.name + listen = true + send = true + manage = true +} + +data "azurerm_monitor_diagnostic_categories" "cat"{ + resource_id = azurerm_databricks_workspace.adb.id +} + +resource "azurerm_monitor_diagnostic_setting" "ovwdgs" { + name = "OverwatchDGS" + target_resource_id = azurerm_databricks_workspace.adb.id + eventhub_name = azurerm_eventhub.eh.name + eventhub_authorization_rule_id = azurerm_eventhub_namespace_authorization_rule.ehnar.id + + dynamic "log" { + iterator = log_category + for_each = data.azurerm_monitor_diagnostic_categories.cat.logs + content { + enabled = true + category = log_category.value + retention_policy { + enabled = false + } + } + } + + log{ + category = "clusters" + enabled = true + retention_policy { + enabled = false + } + } +} \ No newline at end of file diff --git a/terraform/all_in_one_azure/main_databricks.tf b/terraform/all_in_one_azure/main_databricks.tf new file mode 100644 index 000000000..c07cfad1d --- /dev/null +++ b/terraform/all_in_one_azure/main_databricks.tf @@ -0,0 +1,192 @@ +// Albert Nogués. 23/05/2022. Automate Overwatch Depoloyment +// This file does all the overwatch deployment automatically + +//Upload Databricks notebook +resource "databricks_notebook" "overwatch_notebook" { + source = "notebooks/OverwatchSource.dbc" + path = var.overwatch_home_dir + format = "DBC" +} + +resource "databricks_secret_scope" "overwatch" { //ONLY WORKS WITH TERRAFORM AZ-CLI LOGIN!!! NO SPN!!!! + name = "overwatch" + initial_manage_principal = "users" + + keyvault_metadata { + resource_id = azurerm_key_vault.kv.id + dns_name = azurerm_key_vault.kv.vault_uri + } +} + +resource "azurerm_key_vault_secret" "clientid"{ + name = "OVERWATCH-CLIENT-ID" + value = var.overwatch_spn + expiration_date = "2030-12-31T23:59:59Z" + key_vault_id = azurerm_key_vault.kv.id + depends_on = [ + azurerm_key_vault_access_policy.tfuser + ] +} + +resource "azurerm_key_vault_secret" "clientsecret"{ + name = "OVERWATCH-CLIENT-SECRET" + value = var.overwatch_spn_pass + expiration_date = "2030-12-31T23:59:59Z" + key_vault_id = azurerm_key_vault.kv.id + depends_on = [ + azurerm_key_vault_access_policy.tfuser + ] +} + +resource "azurerm_key_vault_secret" "tenant"{ + name = "OVERWATCH-TENANT" + value = var.tenant_id + expiration_date = "2030-12-31T23:59:59Z" + key_vault_id = azurerm_key_vault.kv.id + depends_on = [ + azurerm_key_vault_access_policy.tfuser + ] +} + +resource "azurerm_key_vault_secret" "owsa"{ + name = "OVERWATCH-STORAGE-ACCOUNT" + value = azurerm_storage_account.owsa.name + expiration_date = "2030-12-31T23:59:59Z" + key_vault_id = azurerm_key_vault.kv.id + depends_on = [ + azurerm_key_vault_access_policy.tfuser + ] +} + +resource "azurerm_key_vault_secret" "logsa"{ + name = "OVERWATCH-LOG-STORAGE-ACCOUNT" + value = azurerm_storage_account.logsa.name + expiration_date = "2030-12-31T23:59:59Z" + key_vault_id = azurerm_key_vault.kv.id + depends_on = [ + azurerm_key_vault_access_policy.tfuser + ] +} + +resource "azurerm_key_vault_secret" "ehconnectionstring"{ + name = "EH-CONNECTION-STRING" + value = azurerm_eventhub_authorization_rule.ehar.primary_connection_string + expiration_date = "2030-12-31T23:59:59Z" + key_vault_id = azurerm_key_vault.kv.id + depends_on = [ + azurerm_key_vault_access_policy.tfuser + ] +} + +resource "databricks_token" "pat" { + comment = "ADB-PAT for Overwatch" +} + +resource "azurerm_key_vault_secret" "adbpat"{ + name = "ADB-PAT" + value = databricks_token.pat.token_value + expiration_date = "2030-12-31T23:59:59Z" + key_vault_id = azurerm_key_vault.kv.id + depends_on = [ + azurerm_key_vault_access_policy.tfuser, + databricks_token.pat + ] +} + +data "databricks_node_type" "smallest" { + local_disk = true //If we uncomment this we will get a Standard_E4s_v4 instance a shade more expensive but with 32 gb of ram instead of the 8 of the Standard_F4s so may be worth it if you need more power + depends_on = [azurerm_databricks_workspace.adb] +} + +// Latest LTS version +data "databricks_spark_version" "latest_lts" { + long_term_support = true + depends_on = [azurerm_databricks_workspace.adb] +} + +//First of all we require a dummy job otherwise first run of overwatch will crash reading info of jobs launched. +//WE NEEED TO RUN THIS JOB BEFORE OVERWATCH RUNS +resource "databricks_job" "dummyjob" { + name = "Dummy Job" + new_cluster{ + num_workers = 0 + spark_version = data.databricks_spark_version.latest_lts.id + node_type_id = data.databricks_node_type.smallest.id + cluster_log_conf { + dbfs { + destination = var.logs_dbfs_mount_point + } + } + spark_conf = { + # Single-node + "spark.databricks.cluster.profile" : "singleNode" + "spark.master" : "local[*]" + } + custom_tags = {"ResourceClass" : "SingleNode"} + + } + notebook_task { + notebook_path = join("/", [databricks_notebook.overwatch_notebook.path,"Dummy"]) + } +} + +resource "databricks_job" "overwatch" { + name = var.overwatch_job_name + new_cluster{ + num_workers = 0 + spark_version = data.databricks_spark_version.latest_lts.id + node_type_id = "Standard_DS4_v2" + + cluster_log_conf { + dbfs { + destination = var.logs_dbfs_mount_point + } + } + + spark_conf = { + # Single-node + "spark.databricks.cluster.profile" : "singleNode" + "spark.master" : "local[*]" + } + custom_tags = {"ResourceClass" : "SingleNode"} + } + notebook_task { + notebook_path = join("/", [databricks_notebook.overwatch_notebook.path,"Overwatch - Job"]) + base_parameters = { + "consumerDBName": "overwatch", + "secretsScope": "overwatch", + "scopes": var.overwatch_job_scopes, + "maxDaysToLoad": "1500", + "etlDBName": "overwatch_etl", + "dbPATKey": azurerm_key_vault_secret.adbpat.name, + "ehName": var.evenhub_name, + "primordialDateString": var.overwatch_primordial_date, + "ehKey": azurerm_key_vault_secret.ehconnectionstring.name, + "storagePrefix": var.overwatch_mount_point + } + } + library { + maven { + coordinates = "com.microsoft.azure:azure-eventhubs-spark_2.12:2.3.21" + exclusions = [] + } + } + library{ + maven { + coordinates = "com.databricks.labs:overwatch_2.12:0.6.1.0" + exclusions = [] + } + } + email_notifications { + + on_failure = [var.overwatch_job_notification_email] + no_alert_for_skipped_runs = false + + } + + schedule{ + quartz_cron_expression = var.cron_job_schedule + timezone_id = var.cron_timezone_id + pause_status = "UNPAUSED" + } +} diff --git a/terraform/all_in_one_azure/notebooks/OverwatchSource.dbc b/terraform/all_in_one_azure/notebooks/OverwatchSource.dbc new file mode 100644 index 000000000..8650d2100 Binary files /dev/null and b/terraform/all_in_one_azure/notebooks/OverwatchSource.dbc differ diff --git a/terraform/all_in_one_azure/providers.tf b/terraform/all_in_one_azure/providers.tf new file mode 100644 index 000000000..382b89b6d --- /dev/null +++ b/terraform/all_in_one_azure/providers.tf @@ -0,0 +1,28 @@ +terraform { + + required_version = ">=0.12" + + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = ">=3.5.0" + } + databricks = { + source = "databrickslabs/databricks" + version = "~>0.5.7" + } + } +} + +provider "azurerm" { + features {} + subscription_id = var.subscription_id +} + +provider "databricks" { + host = data.azurerm_databricks_workspace.adb.workspace_url + azure_workspace_resource_id = data.azurerm_databricks_workspace.adb.id + + # ARM_USE_MSI environment variable is recommended + //azure_use_msi = true +} \ No newline at end of file diff --git a/terraform/all_in_one_azure/terraform.tfvars b/terraform/all_in_one_azure/terraform.tfvars new file mode 100644 index 000000000..7ea2e36e4 --- /dev/null +++ b/terraform/all_in_one_azure/terraform.tfvars @@ -0,0 +1,6 @@ +tenant_id= +subscription_id= +service_principal_id_mount= +user_id= +overwatch_spn= +overwatch_spn_pass= \ No newline at end of file diff --git a/terraform/all_in_one_azure/variables.tf b/terraform/all_in_one_azure/variables.tf new file mode 100644 index 000000000..6bdcceae3 --- /dev/null +++ b/terraform/all_in_one_azure/variables.tf @@ -0,0 +1,112 @@ +# Make sure to run this with an user account otherwise the create scope will fail because SPN auth is not supported +# Specific azure account variables. You can fill these here with the default keyworkd or fill it in a terraform.tfvars file +variable "tenant_id" { + description = "The tenant ID of our AAD account" +} + +variable "subscription_id" { + description = "The ID of the azure subscription where we will be deploying overwatch" +} + +variable "service_principal_id_mount" { + description = "ObjectID of the service principal that will be granted blob contributor role in the storage account of both logs and overwatch DB. This has to be the objectID of the service principal that will be used to mount the ADLS in the DBFS" +} + +variable "user_id" { + description = "ObjectID of the user that will launch this terraform deployment. TO not have issues adding secrets in the KV we will set a policy for ths user" +} + +variable "overwatch_spn"{ + description = "ApplicationID of the SPN that will be used for Overwatch" +} + +variable "overwatch_spn_pass"{ + description = "Password of the ApplicationID of the SPN that will be used for Overwatch" +} + +# Azure Overwatch prerequisites specific variables +variable "resource_group_name" { + default = "overwatch" + description = "Main Resource Group Name" +} + +variable "resource_group_location" { + default = "westeurope" + description = "Location of the resource group" +} + +variable "overwatch_storage_account_name" { + default = "overwatchdb" + description = "Main DataLake name" +} + +variable "logs_storage_account_name" { + default = "overwatchlogs" + description = "Main DataLake name" +} + +variable "key_vault_name" { + default = "overwatchkv" + description = "Main Keyvault Name" +} + +variable "evenhub_namespace_name" { + default = "overwatch" + description = "Eventhub Namespace Name" +} + +variable "evenhub_name" { + default = "shared-databricks-ds" + description = "Eventhub Databricks" +} + +variable "workspace_name"{ + default = "DatabricksOverwatch" + description = "Name of the Azure Databricks Workspace" +} + +# Overwatch specific variables +variable "overwatch_home_dir" { + default = "/Overwatch" + description = "Overwatch home directory in the workspace" +} + +variable "overwatch_job_name"{ + default = "Overwatch Job" + description = "Overwatch Job Name" +} + +variable "overwatch_job_notification_email"{ + default = "your@email.com" + description = "Overwatch Job Notification Email" +} + +variable "logs_dbfs_mount_point" { + default = "dbfs:/mnt/logs" + description = "Path in dbfs where the logs storage account will be mounted" +} + +variable "overwatch_mount_point" { + default = "/mnt/overwatch" + description = "Path in the driver where the Overwatch storage account will be mounted" +} + +variable "overwatch_job_scopes" { + default = "jobs,clusters,clusterEvents,sparkEvents,audit,notebooks,accounts" + description = "All the scopes you want to read for overwatch. Check documentation for more details. We are not using pools" +} + +variable "overwatch_primordial_date" { + default = "2022-03-08" + description = "The initial date load for overwatch in string format YYYY-MM-DD" +} + +variable "cron_job_schedule" { + default = "0 0 8 * * ?" + description = "Cron expression to schedule the Overwatch Job" +} + +variable "cron_timezone_id" { + default = "Europe/Brussels" + description = "Timezone for the cron schedule. Check documentation about supported timezone formats" +} \ No newline at end of file