diff --git a/.github/workflows/deploy-dev.yml b/.github/workflows/deploy-dev.yml new file mode 100644 index 0000000..f9f3415 --- /dev/null +++ b/.github/workflows/deploy-dev.yml @@ -0,0 +1,96 @@ +name: Deploy dabdbt (dev) + +on: + push: + branches: [dev] + workflow_dispatch: + +jobs: + deploy: + runs-on: ubuntu-latest + environment: dev + permissions: + contents: read + id-token: write + env: + DATABRICKS_HOST: ${{ vars.DATABRICKS_HOST }} + DATABRICKS_CLIENT_ID: ${{ secrets.DATABRICKS_CLIENT_ID }} + DATABRICKS_CLIENT_SECRET: ${{ secrets.DATABRICKS_CLIENT_SECRET }} + DATABRICKS_TENANT_ID: ${{ secrets.DATABRICKS_TENANT_ID }} + DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} + DATABRICKS_PROFILE: dev-github + DBT_DEV_HTTP_PATH: ${{ secrets.DBT_DEV_HTTP_PATH }} + AZURE_STORAGE_ACCOUNT: ${{ vars.AZURE_STORAGE_ACCOUNT }} + AZURE_STORAGE_CONTAINER: '$web' + AZURE_BLOB_PATH: 'dabdbt/index.html' + AZURE_BLOB_CONTENT_TYPE: 'text/html; charset=utf-8' + AZURE_BLOB_CONTENT_DISPOSITION: inline + AZURE_SUBSCRIPTION_ID: '${{ secrets.AZURE_SUBSCRIPTION_ID }}' + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Install Databricks CLI + run: | + curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | bash + echo "${HOME}/.databricks/bin" >> "$GITHUB_PATH" + echo "${HOME}/bin" >> "$GITHUB_PATH" + + - name: Configure Databricks profile (Service Principal or PAT) + run: | + set -euo pipefail + mkdir -p ~/.databricks + if [ -n "${DATABRICKS_CLIENT_ID:-}" ] && [ -n "${DATABRICKS_CLIENT_SECRET:-}" ]; then + printf '[%s]\nhost = %s\nclient_id = %s\nclient_secret = %s\n' "${DATABRICKS_PROFILE}" "${DATABRICKS_HOST}" "${DATABRICKS_CLIENT_ID}" "${DATABRICKS_CLIENT_SECRET}" > ~/.databrickscfg + if [ -n "${DATABRICKS_TENANT_ID:-}" ]; then + printf 'tenant_id = %s\n' "${DATABRICKS_TENANT_ID}" >> ~/.databrickscfg + fi + elif [ -n "${DATABRICKS_TOKEN:-}" ]; then + printf '[%s]\nhost = %s\ntoken = %s\n' "${DATABRICKS_PROFILE}" "${DATABRICKS_HOST}" "${DATABRICKS_TOKEN}" > ~/.databrickscfg + else + echo "Missing Databricks credentials: set secrets.DATABRICKS_CLIENT_ID/SECRET or secrets.DATABRICKS_TOKEN" >&2 + exit 1 + fi + + - name: Deploy bundle to Databricks (dev) + working-directory: dabdbt + run: | + databricks bundle deploy --target dev --profile "${DATABRICKS_PROFILE}" + + - name: Run dabdbt_job (dev) + working-directory: dabdbt + run: | + databricks bundle run dabdbt_job --target dev --profile "${DATABRICKS_PROFILE}" + + - name: Authenticate with Azure + uses: azure/login@v2 + with: + creds: ${{ secrets.AZURE_CREDENTIALS }} + + - name: Set Azure subscription + if: env.AZURE_SUBSCRIPTION_ID != '' + run: | + az account set --subscription "${AZURE_SUBSCRIPTION_ID}" + + - name: Fix static website MIME metadata + run: | + az storage blob update \ + --account-name "${AZURE_STORAGE_ACCOUNT}" \ + --container-name "${AZURE_STORAGE_CONTAINER}" \ + --name "${AZURE_BLOB_PATH}" \ + --content-type "${AZURE_BLOB_CONTENT_TYPE}" \ + --content-disposition "${AZURE_BLOB_CONTENT_DISPOSITION}" \ + --auth-mode login + + \ No newline at end of file diff --git a/.github/workflows/deploy-prd.yml b/.github/workflows/deploy-prd.yml new file mode 100644 index 0000000..388be85 --- /dev/null +++ b/.github/workflows/deploy-prd.yml @@ -0,0 +1,93 @@ +name: Deploy dabdbt (prd) + +on: + push: + branches: [main] + workflow_dispatch: + +jobs: + deploy: + runs-on: ubuntu-latest + environment: prd + permissions: + contents: read + id-token: write + env: + DATABRICKS_HOST: ${{ vars.DATABRICKS_HOST }} + DATABRICKS_CLIENT_ID: ${{ secrets.DATABRICKS_CLIENT_ID }} + DATABRICKS_CLIENT_SECRET: ${{ secrets.DATABRICKS_CLIENT_SECRET }} + DATABRICKS_TENANT_ID: ${{ secrets.DATABRICKS_TENANT_ID }} + DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} + DATABRICKS_PROFILE: prd-github + AZURE_STORAGE_ACCOUNT: ${{ vars.AZURE_STORAGE_ACCOUNT }} + AZURE_STORAGE_CONTAINER: '$web' + AZURE_BLOB_PATH: 'dabdbt/index.html' + AZURE_BLOB_CONTENT_TYPE: 'text/html; charset=utf-8' + AZURE_BLOB_CONTENT_DISPOSITION: inline + AZURE_SUBSCRIPTION_ID: '${{ secrets.AZURE_SUBSCRIPTION_ID }}' + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Install Databricks CLI + run: | + curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | bash + echo "${HOME}/.databricks/bin" >> "$GITHUB_PATH" + echo "${HOME}/bin" >> "$GITHUB_PATH" + + - name: Configure Databricks profile (Service Principal or PAT) + run: | + set -euo pipefail + mkdir -p ~/.databricks + if [ -n "${DATABRICKS_CLIENT_ID:-}" ] && [ -n "${DATABRICKS_CLIENT_SECRET:-}" ]; then + printf '[%s]\nhost = %s\nclient_id = %s\nclient_secret = %s\n' "${DATABRICKS_PROFILE}" "${DATABRICKS_HOST}" "${DATABRICKS_CLIENT_ID}" "${DATABRICKS_CLIENT_SECRET}" > ~/.databrickscfg + if [ -n "${DATABRICKS_TENANT_ID:-}" ]; then + printf 'tenant_id = %s\n' "${DATABRICKS_TENANT_ID}" >> ~/.databrickscfg + fi + elif [ -n "${DATABRICKS_TOKEN:-}" ]; then + printf '[%s]\nhost = %s\ntoken = %s\n' "${DATABRICKS_PROFILE}" "${DATABRICKS_HOST}" "${DATABRICKS_TOKEN}" > ~/.databrickscfg + else + echo "Missing Databricks credentials: set secrets.DATABRICKS_CLIENT_ID/SECRET or secrets.DATABRICKS_TOKEN" >&2 + exit 1 + fi + + - name: Deploy bundle to Databricks (prd) + working-directory: dabdbt + run: | + databricks bundle deploy --target prd --profile "${DATABRICKS_PROFILE}" + + - name: Run dabdbt_job (prd) + working-directory: dabdbt + run: | + databricks bundle run dabdbt_job --target prd --profile "${DATABRICKS_PROFILE}" + + - name: Authenticate with Azure + uses: azure/login@v2 + with: + creds: ${{ secrets.AZURE_CREDENTIALS }} + + - name: Set Azure subscription + if: env.AZURE_SUBSCRIPTION_ID != '' + run: | + az account set --subscription "${AZURE_SUBSCRIPTION_ID}" + + - name: Fix static website MIME metadata + run: | + az storage blob update \ + --account-name "${AZURE_STORAGE_ACCOUNT}" \ + --container-name "${AZURE_STORAGE_CONTAINER}" \ + --name "${AZURE_BLOB_PATH}" \ + --content-type "${AZURE_BLOB_CONTENT_TYPE}" \ + --content-disposition "${AZURE_BLOB_CONTENT_DISPOSITION}" \ + --auth-mode login diff --git a/dabdbt/databricks.yml b/dabdbt/databricks.yml index c3a377a..855c381 100644 --- a/dabdbt/databricks.yml +++ b/dabdbt/databricks.yml @@ -13,6 +13,9 @@ variables: prod_root_path: description: Workspace path for production bundle deployment default: /Workspace/Users/anselmoborges@gmail.com/.bundle/${bundle.name}/${bundle.target} + source_olist_env: + description: Target suffix used by Unity Catalog foreign catalog + default: ${bundle.target} prod_owner_user: description: User principal that will retain CAN_MANAGE in production default: anselmoborges@gmail.com @@ -28,7 +31,7 @@ targets: mode: development default: true workspace: - host: https://adb-1293581597272291.11.azuredatabricks.net + host: https://adb-1293581597272291.11.azuredatabricks.net/ prd: mode: production diff --git a/dabdbt/notebooks/publish_docs.py b/dabdbt/notebooks/publish_docs.py deleted file mode 100644 index 7f9bec9..0000000 --- a/dabdbt/notebooks/publish_docs.py +++ /dev/null @@ -1,126 +0,0 @@ -# Databricks notebook source -# MAGIC %md -# MAGIC ## Publica documentação do dbt -# MAGIC Atualiza o static website do Storage Account `sarescuedev` com o conteúdo gerado pelo dbt. - -# COMMAND ---------- - -from azure.storage.blob import BlobServiceClient, ContentSettings -from pathlib import Path -import os -import mimetypes - -# Configuração dinâmica: nome do scope e chave podem ser passados via env -# Defaults: scope='pocdocdbt' e key='sasdocsdbt' (conforme criação no AKV) -ACCOUNT_NAME = os.environ.get("AZURE_STORAGE_ACCOUNT", "sarescuedev") -CONTAINER_NAME = os.environ.get("AZURE_STORAGE_CONTAINER", "$web") -SECRET_SCOPE = os.environ.get("SECRET_SCOPE", os.environ.get("DBT_SECRET_SCOPE", "pocdocdbt")) -SECRET_KEY = os.environ.get("SECRET_KEY", os.environ.get("DBT_SECRET_KEY", "sasdocsdbt")) - -# Detect bundle name/target a partir do caminho do notebook no workspace -notebook_path = dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get() -nb_path = Path(notebook_path) -try: - notebook_path = dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get() - nb_path = Path(notebook_path) - bundle_name = None - bundle_target = None - parts = list(nb_path.parts) - if ".bundle" in parts: - i = parts.index(".bundle") - # expect structure: .../.bundle///files/... - if len(parts) > i + 1: - bundle_name = parts[i + 1] - if len(parts) > i + 2: - bundle_target = parts[i + 2] -except NameError: - # Fallback para execução local (ou quando dbutils não estiver disponível) - bundle_name = os.environ.get("BUNDLE_NAME") - bundle_target = os.environ.get("BUNDLE_TARGET") - -bundle_name = bundle_name or os.environ.get("BUNDLE_NAME", "dabdbt") -bundle_target = bundle_target or os.environ.get("BUNDLE_TARGET", os.environ.get("RESCUE_ENV", "dev")) - -# Construir o path do volume conforme convenção usada no bundle -BASE_DIR = Path(f"/Volumes/rescue_{bundle_target}/rescue_b/vol_docs/{bundle_name}") -PROJECT_SUBDIR = BASE_DIR / bundle_name -PORTAL_PATH = BASE_DIR / "portal.html" - -# COMMAND ---------- - -# Garante que o diretório base existe -BASE_DIR.mkdir(parents=True, exist_ok=True) - -# Copia o portal para dentro do volume (usa o arquivo do bundle ou fallback padrão) -notebook_path = dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get() -workspace_root = Path(notebook_path).parents[1] # .../.bundle/dabdbt//files -portal_source = workspace_root / "src/docs/portal.html" - - -PORTAL_PATH = BASE_DIR / "portal.html" -PORTAL_HTML_FALLBACK = """Portal

Portal não encontrado.

""" - -sas_token = None -try: - # Tenta obter o token do KeyVault via dbutils (execução no Databricks) - sas_token = dbutils.secrets.get(SECRET_SCOPE, SECRET_KEY) -except Exception: - # Fallback: variáveis de ambiente (execução local / debugging) - sas_token = os.environ.get("SAS_DOCS_TOKEN") - -if not sas_token: - raise RuntimeError(f"SAS token not found. Checked Databricks secret {SECRET_SCOPE}/{SECRET_KEY} and env SAS_DOCS_TOKEN") -service_client = BlobServiceClient( - account_url=f"https://{ACCOUNT_NAME}.blob.core.windows.net", - credential=sas_token, -) -container_client = service_client.get_container_client(CONTAINER_NAME) - -# COMMAND ---------- - -CONTENT_TYPES = { - ".html": "text/html; charset=utf-8", - ".htm": "text/html; charset=utf-8", - ".js": "application/javascript", - ".css": "text/css", - ".json": "application/json", - ".txt": "text/plain", - ".svg": "image/svg+xml", - ".png": "image/png", - ".ico": "image/x-icon", - ".woff": "font/woff", - ".woff2": "font/woff2", - ".eot": "application/vnd.ms-fontobject", - ".ttf": "font/ttf", - ".map": "application/json", -} - -uploaded = [] -for path in BASE_DIR.rglob("*"): - if path.is_file(): - rel_path = path.relative_to(BASE_DIR).as_posix() - - guessed_type, _ = mimetypes.guess_type(rel_path) - content_type = CONTENT_TYPES.get( - path.suffix.lower(), - guessed_type or CONTENT_TYPES.get(rel_path, "application/octet-stream"), - ) - blob_client = container_client.get_blob_client(rel_path) - data = path.read_bytes() - settings = ContentSettings( - content_type=content_type, - content_disposition="inline", - ) - - blob_client.upload_blob( - data, - overwrite=True, - content_settings=settings, - ) - # Reaplica o cabeçalho explicitamente para garantir o MIME correto em ambientes que ignoram o parâmetro durante o upload. - blob_client.set_http_headers( - content_settings=settings, - ) - uploaded.append((rel_path, content_type)) - -uploaded diff --git a/dabdbt/resources/dabdbt.job.yml b/dabdbt/resources/dabdbt.job.yml index fd01d0a..b3d26dd 100644 --- a/dabdbt/resources/dabdbt.job.yml +++ b/dabdbt/resources/dabdbt.job.yml @@ -6,9 +6,9 @@ resources: POC de Databricks Asset Bundles + dbt + Lakehouse Federation: consome tabelas do Azure SQL expostas como foreign catalog no Unity Catalog, transforma a camada silver com enriquecimento via IBGE e gera o golden record para dashboards versionados. schedule: - pause_status: PAUSED quartz_cron_expression: "0 0 8 ? * WED" - timezone_id: America/Sao_Paulo + timezone_id: "America/Sao_Paulo" + pause_status: PAUSED email_notifications: on_failure: @@ -21,66 +21,65 @@ resources: data_owner: "Rescue Point" destino: "POC" + git_source: + git_url: https://github.com/AnselmoBorges/pocdabdbt.git + git_provider: gitHub + git_branch: dev + tasks: - task_key: dbt_seed dbt_task: - project_directory: ../ - profiles_directory: dbt_profiles/ + project_directory: dabdbt commands: - - 'dbt seed --target=${bundle.target} --select ibge_municipios' - libraries: - - pypi: - package: dbt-databricks>=1.8.0,<2.0.0 - existing_cluster_id: "1011-215228-19jfjeyf" - + - 'dbt seed --target=${bundle.target} --select ibge_municipios --profiles-dir=dbt_profiles' + warehouse_id: f38fa7279458bb21 + catalog: rescue_${bundle.target} + schema: rescue_b + source: GIT + environment_key: dbt-default + - task_key: dbt_silver depends_on: - task_key: dbt_seed dbt_task: - project_directory: ../ - profiles_directory: dbt_profiles/ + project_directory: dabdbt commands: - - 'dbt run --target=${bundle.target} --select tag:silver' - libraries: - - pypi: - package: dbt-databricks>=1.8.0,<2.0.0 - existing_cluster_id: "1011-215228-19jfjeyf" + - 'dbt run --target=${bundle.target} --select tag:silver --profiles-dir=dbt_profiles' + source: GIT + warehouse_id: f38fa7279458bb21 + catalog: rescue_${bundle.target} + schema: rescue_s + environment_key: dbt-default - task_key: dbt_gold depends_on: - task_key: dbt_silver dbt_task: - project_directory: ../ - profiles_directory: dbt_profiles/ + project_directory: dabdbt commands: - - 'dbt run --target=${bundle.target} --select tag:gold' - libraries: - - pypi: - package: dbt-databricks>=1.8.0,<2.0.0 - existing_cluster_id: "1011-215228-19jfjeyf" - + - 'dbt run --target=${bundle.target} --select tag:gold --profiles-dir=dbt_profiles' + source: GIT + warehouse_id: f38fa7279458bb21 + catalog: rescue_${bundle.target} + schema: rescue_g + environment_key: dbt-default - task_key: dbt_docs depends_on: - task_key: dbt_gold dbt_task: - project_directory: ../ - profiles_directory: dbt_profiles/ + project_directory: dabdbt commands: - # Gere a documentação e escreva para o volume Rescue cujo sufixo - # depende do target do bundle (ex.: dev ou prd). O caminho final - # será: /Volumes/rescue_/rescue_b/vol_docs/ - - 'dbt docs generate --target=${bundle.target} --target-path=/Volumes/rescue_${bundle.target}/rescue_b/vol_docs/${bundle.name}' - libraries: - - pypi: - package: dbt-databricks>=1.8.0,<2.0.0 - existing_cluster_id: "1011-215228-19jfjeyf" + - 'dbt docs generate --target=${bundle.target} --target-path=/Volumes/rescue_${bundle.target}/rescue_b/vol_docs/${bundle.name} --profiles-dir=dbt_profiles' + source: GIT + warehouse_id: f38fa7279458bb21 + catalog: rescue_${bundle.target} + environment_key: dbt-default - - task_key: publish_docs - depends_on: - - task_key: dbt_docs - notebook_task: - notebook_path: ../notebooks/publish_docs.py - libraries: - - pypi: - package: azure-storage-blob>=12.19.0 - existing_cluster_id: "1011-215228-19jfjeyf" + queue: + enabled: true + environments: + - environment_key: dbt-default + spec: + client: "4" + dependencies: + - dbt-databricks>=1.0.0,<2.0.0 diff --git a/dabdbt/resources/dbt_single_node_cluster_spot.json b/dabdbt/resources/dbt_single_node_cluster_spot.json deleted file mode 100644 index 6334be6..0000000 --- a/dabdbt/resources/dbt_single_node_cluster_spot.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "cluster_name": "dbt-executor", - "spark_version": "15.4.x-scala2.12", - "node_type_id": "Standard_D3_v2", - "driver_node_type_id": "Standard_D3_v2", - "autotermination_minutes": 10, - "num_workers": 0, - "data_security_mode": "SINGLE_USER", - "single_user_name": "anselmoborges@gmail.com", - "spark_conf": { - "spark.master": "local[*, 4]", - "spark.databricks.cluster.profile": "singleNode" - }, - "azure_attributes": { - "availability": "SPOT", - "first_on_demand": 1, - "spot_bid_max_price": -1 - }, - "custom_tags": { - "ResourceClass": "SingleNode", - "Owner": "Rescue Point", - "Project": "DAB com DBT", - "Function": "Executor de jobs DBT" - } -} diff --git a/dabdbt/scripts/publish_docs.sh b/dabdbt/scripts/publish_docs.sh deleted file mode 100644 index 2e4ea50..0000000 --- a/dabdbt/scripts/publish_docs.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env bash -# Script simples para publicar artefatos gerados por `dbt docs generate` -# para o volume padrão usado por operações Rescue. -# Uso: -# RESCUE_ENV=dev ./scripts/publish_docs.sh -# ou -# DBT_DOCS_VOLUME=/Volumes/rescue_dev/rescue_b/vol_docs ./scripts/publish_docs.sh - -set -euo pipefail - -ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -BUILD_DIR="$ROOT_DIR/target/docs" - -# Prefer explicit path if fornecido -if [[ -n "${DBT_DOCS_VOLUME:-}" ]]; then - DEST="$DBT_DOCS_VOLUME" -else - RESCUE_ENV="${RESCUE_ENV:-dev}" - DEST="/Volumes/rescue_${RESCUE_ENV}/rescue_b/vol_docs" -fi - -echo "Publicando dbt docs gerados de: $BUILD_DIR -> $DEST" - -if [[ ! -d "$BUILD_DIR" ]]; then - echo "Diretório $BUILD_DIR não encontrado. Execute 'dbt docs generate' antes de publicar." >&2 - exit 2 -fi - -mkdir -p "$DEST" - -# Copia recursivamente mantendo permissões mínimas necessárias -rsync -av --delete "$BUILD_DIR/" "$DEST/" - -echo "Publicação concluída com sucesso em: $DEST" diff --git a/dabdbt/src/models/silver/schema.yml b/dabdbt/src/models/silver/schema.yml index 63785bb..bd317f2 100644 --- a/dabdbt/src/models/silver/schema.yml +++ b/dabdbt/src/models/silver/schema.yml @@ -3,7 +3,7 @@ version: 2 sources: - name: olist description: "{{ doc('source_olist_catalog') }}" - database: "{{ env_var('DBT_SOURCE_DATABASE', 'sqlolistdev') }}" + database: "{{ var('source_olist_database', 'sqlolist' ~ (target.name | default('dev'))) }}" schema: olist tables: - name: customers diff --git a/dabdbt/vars/prod.yml b/dabdbt/vars/prod.yml deleted file mode 100644 index a7fc239..0000000 --- a/dabdbt/vars/prod.yml +++ /dev/null @@ -1,3 +0,0 @@ -# Overrides para deployments em produção realizados pelo usuário de serviço -prod_root_path: /Workspace/Users/system.user@rescuepoint.com.br/.bundle/${bundle.name}/${bundle.target} -prod_owner_user: system.user@rescuepoint.com.br