diff --git a/contrib/azure_cicd_quickstart/.gitignore b/contrib/azure_cicd_quickstart/.gitignore new file mode 100644 index 0000000..eedf926 --- /dev/null +++ b/contrib/azure_cicd_quickstart/.gitignore @@ -0,0 +1,127 @@ +# Terraform +.terraform/ +.terraform.lock.hcl +*.lock.hcl +*.tfstate +*.tfstate.* +*.tfvars +terraform.tfstate.d/ +crash.log +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Local .terraform directories +**/.terraform/* + +# .tfstate files +*.tfstate +*.tfstate.* + +# Crash log files +crash.log + +# Exclude all .tfvars files, which are likely to contain sentitive data +*.tfvars + +# Ignore override files as they are usually used to override resources locally +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Include override files you do wish to add to version control using negated pattern +# !example_override.tf + +# Include .tflock.hcl files you do wish to add to version control using negated pattern +!.terraform.lock.hcl + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Virtual environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# OS +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Logs +*.log + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Coverage directory used by tools like istanbul +coverage/ +*.lcov + +# nyc test coverage +.nyc_output + +# Dependency directories +node_modules/ + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variables file +.env +.env.test +.env.local +.env.production + +# Temporary folders +tmp/ +temp/ \ No newline at end of file diff --git a/contrib/azure_cicd_quickstart/README.md b/contrib/azure_cicd_quickstart/README.md new file mode 100644 index 0000000..6e6d07e --- /dev/null +++ b/contrib/azure_cicd_quickstart/README.md @@ -0,0 +1,128 @@ +# Azure DevOps CI/CD for Databricks Asset Bundles (DABs) + +A complete solution for deploying Databricks Asset Bundles using Azure DevOps pipelines with managed identity authentication and multi-environment support. +The azure_cicd_quickstart project deploys Azure resources to facilitate a safe ci/cd process with Databricks Asset Bundles. To learn more about when to use terraform, apis, and Databricks Asset Bundles read https://medium.com/@alexott_en/terraform-vs-databricks-asset-bundles-6256aa70e387 +## Quick Start + +This solution automatically creates everything you need for DAB CI/CD in Azure DevOps: + +- **Azure DevOps project and pipeline** +- **Multi-environment variable groups** (dev/test/prod) +- **Managed identities** with federated credentials +- **Service connections** for each environment +- **Automated pipeline configuration** - no manual setup required + +### Prerequisites + +- Azure CLI (logged in) +- Terraform >= 1.0 +- Azure DevOps organization access +- Owner/Contributor permissions on target Azure subscriptions + +### Setup + +1. **Configure your environment**: + ```bash + cd terraform/ + cp terraform.tfvars.template terraform.tfvars + # Edit terraform.tfvars with your values + ``` + +2. **Deploy infrastructure**: + ```bash + terraform init + terraform apply + ``` + +3. **Add Manged Identity to Databricks Workspace**: + - View the Manged Identities in the terraform outputs + - Add the Managed Identities into their respective workspaces + +4. **Start using the pipeline**: + - Pipeline is automatically created and configured + - Add your DAB folders anywhere in the repository + - Create PRs to trigger validation + - Merge to main/test/dev to deploy + +## What Gets Created + +| Component | Description | +|-----------|-------------| +| **Azure DevOps Project** | Single project containing pipeline and repository | +| **Dynamic Pipeline** | Automatically detects changed DABs and deploys only what's needed | +| **Variable Groups** | Environment-specific configuration (dev/test/prod) | +| **Managed Identities** | Secure, password-less authentication for each environment | +| **Service Connections** | Azure subscription connections using workload identity | + + +The pipeline automatically: +1. **Detects changed DAB folders** using git diff +2. **Selects environment** based on branch (dev/test/main) +3. **Authenticates** using managed identity +4. **Deploys only changed bundles** for efficiency +5. **Provides detailed logging** and error handling + +## Repository Structure + +After deployment, your repository will look like: + +``` +your-repo/ +├── azure-pipelines.yml # Auto-generated pipeline +├── my-data-pipeline/ # Your DAB folders +│ ├── databricks.yml # (anywhere in repo) +│ └── src/ +├── another-bundle/ +│ ├── databricks.yml +│ └── notebooks/ +└── terraform/ # Infrastructure code + └── README.md # Detailed setup guide +``` + +## Branch-Based Deployments + +| Branch | Environment | Variable Group | Databricks Workspace | +|--------|------------|----------------|----------------------| +| `dev` | Development | `{pipeline_name}-Dev-Variables` | Dev workspace | +| `test` | Testing | `{pipeline_name}-Test-Variables` | Test workspace | +| `main` | Production | `{pipeline_name}-Prod-Variables` | Prod workspace | + +## Detailed Documentation + +For complete setup instructions, troubleshooting, and advanced configuration: + +**[See Terraform README](terraform/README.md)** for detailed deployment guide + +## Troubleshooting + +### Common Issues + +- **"No matching federated identity found"** → Check organization GUID in terraform.tfvars +- **"Resource group not found"** → Ensure resource group exists before running terraform +- **"Pipeline not triggering"** → Verify DAB folders have `databricks.yml` files + +### Getting Help + +1. Check the [detailed troubleshooting guide](terraform/README.md#troubleshooting) +2. Verify all prerequisite permissions are in place +3. Review Azure DevOps pipeline logs for specific error messages + +## Architecture + +This solution follows enterprise DevOps patterns: + +- **Single DevOps Project**: Centralized pipeline management +- **Environment Isolation**: Separate subscriptions/workspaces per environment +- **Managed Identity**: Secure, password-less authentication +- **Conditional Deployment**: Only changed DABs are deployed +- **Branch Protection**: Production deployments only from main branch + +## Next Steps + +After successful deployment: + +1. **Test the pipeline** - Create a test DAB and commit changes +2. **Set up branch policies** - Protect main branch, require PR reviews +3. **Add your DABs** - Place Databricks Asset Bundles anywhere in the repo +4. **Monitor deployments** - Use Azure DevOps pipeline history and logs +5. **Scale up** - Add more environments or customize the pipeline \ No newline at end of file diff --git a/contrib/azure_cicd_quickstart/terraform/README.md b/contrib/azure_cicd_quickstart/terraform/README.md new file mode 100644 index 0000000..20c5257 --- /dev/null +++ b/contrib/azure_cicd_quickstart/terraform/README.md @@ -0,0 +1,363 @@ +# Azure DevOps CI/CD for Databricks Asset Bundles - Deployment Guide + +## Overview + +This Terraform configuration creates a complete Azure DevOps CI/CD solution for deploying Databricks Asset Bundles (DABs) with: +- **Automated pipeline creation** - pipeline YAML is generated and committed automatically +- **Multi-environment support** - separate dev/test/prod environments with dynamic variable group selection +- **Managed identity authentication** - secure, password-less authentication using workload identity federation +- **Smart deployment** - only deploys changed DAB folders for efficiency + +## Prerequisites + +### Required Tools +- **Terraform** (>= 1.0) +- **Azure CLI** (logged in with appropriate permissions) +- **Access to Azure DevOps** organization with admin permissions + +### Required Permissions +- **Azure Subscription**: Owner or Contributor role +- **Azure DevOps**: Project Collection Administrator or Organization Owner +- **Azure Active Directory**: Ability to create managed identities and federated credentials + +## Step-by-Step Deployment + +### 1. Prepare Configuration Files + +This Terraform configuration creates all resources for all environments in a **single deployment**. It will create: +- One Azure DevOps project and pipeline +- Three variable groups (Dev, Test, Prod) +- Three managed identities (one per environment) +- Three service connections (one per subscription) + +1. **Copy the template file**: + ```bash + cp terraform.tfvars.template terraform.tfvars + ``` + +2. **Edit terraform.tfvars** with your values: + ```hcl + # Azure DevOps Organization + organization_name = "your-org-name" + organization_id = "12345678-1234-1234-1234-123456789abc" + azdo_personal_access_token = "your-azdo-pat-token" + + # Project Configuration + project_name = "dab-deployment-project" + pipeline_name = "DAB-CI-Pipeline" + + # Management Resource Group (where identities will be created) + resource_group_name = "your-management-resource-group" + + # Dev Environment + azure_subscription_id_dev = "your-dev-subscription-id" + azure_subscription_name_dev = "your-dev-subscription-name" + service_connection_name_dev = "dev-Service-Connection" + databricks_host_dev = "https://adb-1234567890123456.1.azuredatabricks.net/" + + # Test Environment + azure_subscription_id_test = "your-test-subscription-id" + azure_subscription_name_test = "your-test-subscription-name" + service_connection_name_test = "test-Service-Connection" + databricks_host_test = "https://adb-1234567890123456.1.azuredatabricks.net/" + + # Prod Environment + azure_subscription_id_prod = "your-prod-subscription-id" + azure_subscription_name_prod = "your-prod-subscription-name" + service_connection_name_prod = "prod-Service-Connection" + databricks_host_prod = "https://adb-1234567890123456.1.azuredatabricks.net/" + ``` + +### 2. Get Required Values + +#### Azure DevOps Organization GUID +The organization GUID is required for workload identity federation. Get it from: + +**Method 1: From URL Structure** +- Go to Azure DevOps organization settings +- Look in browser developer tools for API calls containing the org GUID + +**Method 2: From Error Messages** +- Try to access a non-existent resource in your organization +- The error URL will contain your organization GUID + +**Method 3: PowerShell/CLI** +```powershell +# Using Azure DevOps CLI +az devops project list --org https://dev.azure.com/{org-name} +``` + +#### Azure DevOps Personal Access Token (PAT) +1. Go to Azure DevOps → User Settings → Personal Access Tokens +2. Create new token with permissions: + - **Project and Team**: Read & Write + - **Service Connections**: Read & Write + - **Build**: Read & Execute +3. Copy the token value + +### 3. Validate Prerequisites + +1. **Azure CLI Login**: + ```bash + az login + az account set --subscription "your-subscription-id" + ``` + +2. **Verify Resource Group Exists**: + ```bash + az group show --name "your-resource-group" + ``` + +3. **Test Azure DevOps Access**: + ```bash + # Verify you can access the organization + curl -u :YOUR_PAT_TOKEN https://dev.azure.com/your-org/_apis/projects + ``` + +### 4. Deploy Infrastructure + +**Single deployment** creates all resources for all environments. + +1. **Set Azure CLI to management subscription** (where resource group exists): + ```bash + az account set --subscription "your-management-subscription-id" + az account show # Verify you're in the correct subscription + ``` + +2. **Initialize and Deploy**: + ```bash + terraform init + terraform validate + terraform plan + terraform apply + ``` + +**Note**: The managed identities will be created in your management subscription but will have permissions to deploy to their respective target subscriptions (dev/test/prod). + +### 5. Post-Deployment - Pipeline Ready to Use + +#### Fully Automated Pipeline Setup +The Terraform deployment automatically creates and configures everything needed for CI/CD: + +1. **Auto-generated `azure-pipelines.yml`** - Pipeline configuration is created and committed to your repository +2. **Variable group authorization** - Pipeline is pre-authorized to access all variable groups +3. **Service connection permissions** - Managed identities are configured with proper access +4. **Repository initialization** - Empty repository is initialized with the pipeline file + +The pipeline is **immediately ready to use** with: + +- **Conditional variable group selection** based on branch (dev/test/main) +- **Dynamic environment targeting** using the variable groups created by Terraform: + - `{pipeline_name}-Dev-Variables` (for dev branch) + - `{pipeline_name}-Test-Variables` (for test branch) + - `{pipeline_name}-Prod-Variables` (for main branch) +- **DAB change detection** to deploy only modified bundles +- **Sequential deployment** with comprehensive error handling +- **Skip logic** to avoid unnecessary deployments when no changes are detected + +Each variable group contains: +- `env` - Environment name (dev/test/prod) +- `DATABRICKS_HOST` - Environment-specific Databricks workspace URL +- `SERVICE_CONNECTION_NAME` - Environment-specific service connection name + +**No manual pipeline configuration required** - everything works immediately after `terraform apply`! + +#### Add Your DAB Projects +1. **Clone the created repository**: + ```bash + git clone https://dev.azure.com/{org}/{project}/_git/{project} + cd {project} + ``` + +2. **Create DAB folders anywhere in the repository**: + ``` + your-repo/ + ├── azure-pipelines.yml # ✨ Already created by Terraform + ├── data-pipeline/ # Your DAB folders can be anywhere + │ ├── databricks.yml + │ └── src/ + ├── ml-workflows/ + │ ├── databricks.yml + │ └── notebooks/ + └── analytics/ + ├── databricks.yml + └── queries/ + ``` + + The pipeline automatically detects **any folder containing `databricks.yml`** (searches up to 7 levels deep). + +### 6. Test the Pipeline + +1. **Create a test DAB**: + ```yaml + # data_eng_bundles/test-dab/databricks.yml + bundle: + name: test-dab + + targets: + dev: + workspace: + host: https://your-databricks-workspace + ``` + +2. **Create a feature branch and PR**: + ```bash + git checkout -b test-feature + # Make changes to your DAB + git add . + git commit -m "Test DAB changes" + git push origin test-feature + # Create PR in Azure DevOps + ``` + +3. **Verify**: + - PR should trigger change detection (no deployment) + - Merge to main should trigger deployment pipeline + +## Troubleshooting + +### Common Issues + +#### 1. "No matching federated identity record found" +**Problem**: Authentication error with managed identity + +**Solution**: +- Verify `organization_id` in tfvars matches your actual Azure DevOps org GUID +- Check that service connection names follow the pattern: `{env}-Service-Connection` +- Ensure managed identity has proper federated credential configuration +- Verify the service connection names in variable groups match the actual service connections created + +#### 2. "Resource group not found" +**Problem**: Terraform can't find the specified resource group + +**Solution**: +- Verify resource group exists: `az group show --name "your-resource-group"` +- Check Azure CLI is logged into correct subscription +- Ensure you have permissions to the resource group + +#### 3. "Pipeline not triggering" +**Problem**: PR or push doesn't trigger pipeline + +**Solution**: +- Check pipeline trigger settings in Azure DevOps +- Verify changes include files with `databricks.yml` (DAB folders can be anywhere in repo) +- Ensure pipeline YAML path is correct in Terraform configuration +- Check branch names match trigger configuration (dev/test/main) + +#### 4. Pipeline variables are empty or malformed +**Problem**: Debug output shows empty values like `Build.Reason: `, `env variable: `, or malformed `DATABRICKS_HOST: ://workspace.net` + +**Solution**: +- This indicates variable group selection is not working properly +- Check that your branch names match exactly: `dev`, `test`, `main` +- Verify terraform.tfvars contains complete values (no `` placeholders) +- Ensure service connection names follow pattern: `dev-Service-Connection`, `test-Service-Connection`, `prod-Service-Connection` +- Re-run `terraform apply` if you updated terraform.tfvars after initial deployment + +#### 5. "Access denied to Azure DevOps" +**Problem**: PAT token doesn't have sufficient permissions + +**Solution**: +- Regenerate PAT with required scopes: + - Project and Team (Read & Write) + - Service Connections (Read & Write) + - Build (Read & Execute) + +### Validation Commands + +```bash +# Test Azure authentication +az account show + +# Test Terraform configuration +terraform validate +terraform plan -var-file="terraform.tfvars" + +# Test Azure DevOps access +az devops project list --org https://dev.azure.com/{org-name} + +# Verify managed identity +az identity show --name "{project-name}-pipeline-identity" --resource-group "{resource-group}" +``` + +## Security Best Practices + +1. **PAT Token Management**: + - Use minimum required permissions + - Set appropriate expiration dates + - Store securely (consider Azure Key Vault) + +2. **Resource Group Security**: + - Use dedicated resource group for DAB resources + - Apply appropriate RBAC permissions + - Enable resource group locks if needed + +3. **Service Connection**: + - Limit service connection scope to specific subscriptions/resource groups + - Regularly rotate credentials + - Monitor usage and access + +## Support + +For issues related to: +- **Terraform Configuration**: Check variable validation messages +- **Azure DevOps**: Verify PAT permissions and organization access +- **Azure Resources**: Ensure proper RBAC and resource group permissions +- **Databricks Integration**: Verify workspace configuration and authentication + +## Architecture Diagram + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ Azure DevOps (Single Org) │ +│ │ +│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ +│ │ Project │ │ Variable Groups │ │ Pipeline │ │ +│ │ - Repository │ │ │ │ │ │ +│ │ - Pipeline │ │ MyProject-Dev │ │ Conditional │ │ +│ │ │ │ MyProject-Test │ │ Variable Group │ │ +│ │ │ │ MyProject-Prod │ │ Selection │ │ +│ └─────────────────┘ └─────────────────┘ └─────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ │ │ + │ │ │ +┌─────────────▼─────────────┐ ┌────────▼────────┐ ┌─────────────▼─────────────┐ +│ Dev Subscription │ │ Test Subscription│ │ Prod Subscription │ +│ │ │ │ │ │ +│ ┌─────────────────────────┐ │ ┌───────────────┐ │ ┌─────────────────────────┐│ +│ │ MyProject-dev-identity │ │ │MyProject-test │ │ │ MyProject-prod-identity ││ +│ │ + Service Connection │ │ │+ Service Conn │ │ │ + Service Connection ││ +│ │ + Federated Credential │ │ │+ Fed Cred │ │ │ + Federated Credential ││ +│ └─────────────────────────┘ │ └───────────────┘ │ └─────────────────────────┘│ +│ │ │ │ │ │ +│ ┌─────────────────────────┐ │ ┌───────────────┐ │ ┌─────────────────────────┐│ +│ │ Databricks Dev │ │ │Databricks Test│ │ │ Databricks Prod ││ +│ │ Workspace │ │ │Workspace │ │ │ Workspace ││ +│ └─────────────────────────┘ │ └───────────────┘ │ └─────────────────────────┘│ +└───────────────────────────┘ └─────────────────┘ └───────────────────────────┘ +``` + +## Next Steps + +After successful deployment: +1. **Clone your repository** and add DAB folders anywhere in the repo structure +2. **Create your first DAB** with a `databricks.yml` file +3. **Test the pipeline** by creating a branch and making changes +4. **Set up branch policies** for production deployments (require PR reviews for main branch) +5. **Configure notifications** for pipeline results (Azure DevOps → Project Settings → Notifications) +6. **Add monitoring** for deployed DABs using Databricks system tables or Azure Monitor +7. **Scale up** by adding more DAB projects - pipeline automatically detects all `databricks.yml` files + +## Summary + +This fully automated solution provides: + +- **Complete Automation**: Single `terraform apply` creates everything - no manual configuration needed +- **Enterprise Architecture**: Environment isolation with centralized CI/CD management +- **Zero Secrets**: Managed identity authentication - no passwords or keys to manage +- **Smart Deployment**: Only deploys changed DAB folders for efficiency +- **Multi-Environment**: Automatic dev/test/prod environment selection based on git branch +- **Production Ready**: Comprehensive error handling, logging, and pipeline authorization +- **Immediate Use**: Pipeline is configured and ready to use as soon as Terraform completes + +**Perfect for teams who want enterprise-grade DAB CI/CD without the complexity!** \ No newline at end of file diff --git a/contrib/azure_cicd_quickstart/terraform/azure_pipeline.tf b/contrib/azure_cicd_quickstart/terraform/azure_pipeline.tf new file mode 100644 index 0000000..3cd6596 --- /dev/null +++ b/contrib/azure_cicd_quickstart/terraform/azure_pipeline.tf @@ -0,0 +1,142 @@ +resource "azuredevops_build_definition" "pipeline" { + project_id = azuredevops_project.project.id + name = var.pipeline_name + path = "\\" + + repository { + repo_type = "TfsGit" + repo_id = data.azuredevops_git_repository.default_repo.id + branch_name = "refs/heads/main" + yml_path = var.pipeline_yml_path + } + + ci_trigger { + use_yaml = true + } + + variable_groups = [ + azuredevops_variable_group.dev_variables.id, + azuredevops_variable_group.test_variables.id, + azuredevops_variable_group.prod_variables.id + ] + + depends_on = [ + azuredevops_variable_group.dev_variables, + azuredevops_variable_group.test_variables, + azuredevops_variable_group.prod_variables, + null_resource.initialize_repo, + azuredevops_git_repository_file.azure_pipeline_yml + ] +} + + +# Dev Environment Variable Group +resource "azuredevops_variable_group" "dev_variables" { + project_id = azuredevops_project.project.id + name = "${var.pipeline_name}-Dev-Variables" + description = "Variable group for dev environment DAB deployment" + + variable { + name = "env" + value = "dev" + } + + variable { + name = "DATABRICKS_HOST" + value = var.databricks_host_dev + } + + variable { + name = "SERVICE_CONNECTION_NAME" + value = var.service_connection_name_dev + } +} + +# Test Environment Variable Group +resource "azuredevops_variable_group" "test_variables" { + project_id = azuredevops_project.project.id + name = "${var.pipeline_name}-Test-Variables" + description = "Variable group for test environment DAB deployment" + + variable { + name = "env" + value = "test" + } + + variable { + name = "DATABRICKS_HOST" + value = var.databricks_host_test + } + + variable { + name = "SERVICE_CONNECTION_NAME" + value = var.service_connection_name_test + } +} + +# Prod Environment Variable Group +resource "azuredevops_variable_group" "prod_variables" { + project_id = azuredevops_project.project.id + name = "${var.pipeline_name}-Prod-Variables" + description = "Variable group for prod environment DAB deployment" + + variable { + name = "env" + value = "prod" + } + + variable { + name = "DATABRICKS_HOST" + value = var.databricks_host_prod + } + + variable { + name = "SERVICE_CONNECTION_NAME" + value = var.service_connection_name_prod + } +} + + +# Pipeline authorization for all variable groups +resource "azuredevops_pipeline_authorization" "dev_variables_auth" { + project_id = azuredevops_project.project.id + resource_id = azuredevops_variable_group.dev_variables.id + type = "variablegroup" + pipeline_id = azuredevops_build_definition.pipeline.id +} + +resource "azuredevops_pipeline_authorization" "test_variables_auth" { + project_id = azuredevops_project.project.id + resource_id = azuredevops_variable_group.test_variables.id + type = "variablegroup" + pipeline_id = azuredevops_build_definition.pipeline.id +} + +resource "azuredevops_pipeline_authorization" "prod_variables_auth" { + project_id = azuredevops_project.project.id + resource_id = azuredevops_variable_group.prod_variables.id + type = "variablegroup" + pipeline_id = azuredevops_build_definition.pipeline.id +} + +# Pipeline authorization for service connections +resource "azuredevops_pipeline_authorization" "dev_service_connection_auth" { + project_id = azuredevops_project.project.id + resource_id = azuredevops_serviceendpoint_azurerm.dev_pipeline_service_connection.id + type = "endpoint" + pipeline_id = azuredevops_build_definition.pipeline.id +} + +resource "azuredevops_pipeline_authorization" "test_service_connection_auth" { + project_id = azuredevops_project.project.id + resource_id = azuredevops_serviceendpoint_azurerm.test_pipeline_service_connection.id + type = "endpoint" + pipeline_id = azuredevops_build_definition.pipeline.id +} + +resource "azuredevops_pipeline_authorization" "prod_service_connection_auth" { + project_id = azuredevops_project.project.id + resource_id = azuredevops_serviceendpoint_azurerm.prod_pipeline_service_connection.id + type = "endpoint" + pipeline_id = azuredevops_build_definition.pipeline.id +} \ No newline at end of file diff --git a/contrib/azure_cicd_quickstart/terraform/devops_project_repo.tf b/contrib/azure_cicd_quickstart/terraform/devops_project_repo.tf new file mode 100644 index 0000000..0e4affb --- /dev/null +++ b/contrib/azure_cicd_quickstart/terraform/devops_project_repo.tf @@ -0,0 +1,84 @@ +# Note: Azure DevOps organizations are typically created through the Azure portal +# This configuration assumes the organization already exists and focuses on project creation + +resource "azuredevops_project" "project" { + name = var.project_name + description = var.project_description + visibility = var.project_visibility + version_control = "Git" + work_item_template = "Agile" + + features = { + "boards" = "enabled" + "repositories" = "enabled" + "pipelines" = "enabled" + "testplans" = "disabled" + "artifacts" = "enabled" + } +} +# Note: If you have an existing Azure DevOps project, comment out the above azuredevops_project resource and use this data source to reference your project. +# data "azuredevops_project" "project" { +# name = "Example Project" +# } + +# Use the default repository created with the project +data "azuredevops_git_repository" "default_repo" { + project_id = azuredevops_project.project.id + name = var.project_name +} + +# Initialize the default repository by creating a simple initial commit using local-exec +resource "null_resource" "initialize_repo" { + provisioner "local-exec" { + command = <<-EOT + # Create a temporary directory for initialization + TEMP_DIR=$(mktemp -d) + cd "$TEMP_DIR" + + # Initialize git and create initial commit + git init + git config user.email "terraform@example.com" + git config user.name "Terraform" + + # Create initial README + echo "# ${var.project_name}" > README.md + echo "" >> README.md + echo "This repository contains Databricks Asset Bundles (DABs) with automated CI/CD deployment." >> README.md + + git add README.md + git commit -m "Initial commit" + + # Add remote and push to initialize the repository + git remote add origin "${data.azuredevops_git_repository.default_repo.remote_url}" + git branch -M main + git push -u origin main + + # Cleanup + cd / + rm -rf "$TEMP_DIR" + EOT + } + + depends_on = [azuredevops_project.project] +} + +# Create the azure-pipelines.yml file after repository initialization +resource "azuredevops_git_repository_file" "azure_pipeline_yml" { + repository_id = data.azuredevops_git_repository.default_repo.id + file = var.pipeline_yml_path + content = replace( + replace( + replace( + file("${path.module}/templates/azure-pipelines.yml.tpl"), + "PIPELINE_NAME_DEV", "${var.pipeline_name}-Dev-Variables" + ), + "PIPELINE_NAME_TEST", "${var.pipeline_name}-Test-Variables" + ), + "PIPELINE_NAME_PROD", "${var.pipeline_name}-Prod-Variables" + ) + branch = "refs/heads/main" + commit_message = "Add DAB CI/CD pipeline configuration via Terraform" + overwrite_on_create = true + depends_on = [null_resource.initialize_repo] +} + diff --git a/contrib/azure_cicd_quickstart/terraform/outputs.tf b/contrib/azure_cicd_quickstart/terraform/outputs.tf new file mode 100644 index 0000000..a1119fa --- /dev/null +++ b/contrib/azure_cicd_quickstart/terraform/outputs.tf @@ -0,0 +1,108 @@ +output "project_id" { + description = "The ID of the created project" + value = azuredevops_project.project.id +} + +output "repository_id" { + description = "The ID of the default repository" + value = data.azuredevops_git_repository.default_repo.id +} + +output "repository_clone_url" { + description = "The clone URL of the repository" + value = data.azuredevops_git_repository.default_repo.remote_url +} + +output "pipeline_id" { + description = "The ID of the created pipeline" + value = azuredevops_build_definition.pipeline.id +} + +output "pipeline_url" { + description = "The URL of the pipeline" + value = "https://dev.azure.com/${var.organization_name}/${azuredevops_project.project.name}/_build?definitionId=${azuredevops_build_definition.pipeline.id}" +} + +# Variable Group Outputs +output "dev_variable_group_name" { + description = "Name of the dev variable group" + value = azuredevops_variable_group.dev_variables.name +} + +output "test_variable_group_name" { + description = "Name of the test variable group" + value = azuredevops_variable_group.test_variables.name +} + +output "prod_variable_group_name" { + description = "Name of the prod variable group" + value = azuredevops_variable_group.prod_variables.name +} + +# Service Connection Outputs +output "dev_service_connection_name" { + description = "Name of the dev service connection" + value = azuredevops_serviceendpoint_azurerm.dev_pipeline_service_connection.service_endpoint_name +} + +output "test_service_connection_name" { + description = "Name of the test service connection" + value = azuredevops_serviceendpoint_azurerm.test_pipeline_service_connection.service_endpoint_name +} + +output "prod_service_connection_name" { + description = "Name of the prod service connection" + value = azuredevops_serviceendpoint_azurerm.prod_pipeline_service_connection.service_endpoint_name +} + +# Managed Identity Outputs +output "dev_managed_identity_name" { + description = "Name of the dev managed identity" + value = azurerm_user_assigned_identity.dev_pipeline_identity.name +} + +output "test_managed_identity_name" { + description = "Name of the test managed identity" + value = azurerm_user_assigned_identity.test_pipeline_identity.name +} + +output "prod_managed_identity_name" { + description = "Name of the prod managed identity" + value = azurerm_user_assigned_identity.prod_pipeline_identity.name +} + +# Repository Files Output +output "readme_file" { + description = "README file created in repository" + value = "README.md" +} + +output "pipeline_file_path" { + description = "Path to the auto-created pipeline YAML file" + value = var.pipeline_yml_path +} + +# Summary Output +output "deployment_summary" { + description = "Summary of all created resources" + value = { + project_name = var.project_name + pipeline_name = var.pipeline_name + pipeline_file = var.pipeline_yml_path + variable_groups = { + dev = azuredevops_variable_group.dev_variables.name + test = azuredevops_variable_group.test_variables.name + prod = azuredevops_variable_group.prod_variables.name + } + service_connections = { + dev = azuredevops_serviceendpoint_azurerm.dev_pipeline_service_connection.service_endpoint_name + test = azuredevops_serviceendpoint_azurerm.test_pipeline_service_connection.service_endpoint_name + prod = azuredevops_serviceendpoint_azurerm.prod_pipeline_service_connection.service_endpoint_name + } + managed_identities = { + dev = azurerm_user_assigned_identity.dev_pipeline_identity.name + test = azurerm_user_assigned_identity.test_pipeline_identity.name + prod = azurerm_user_assigned_identity.prod_pipeline_identity.name + } + } +} \ No newline at end of file diff --git a/contrib/azure_cicd_quickstart/terraform/providers.tf b/contrib/azure_cicd_quickstart/terraform/providers.tf new file mode 100644 index 0000000..334f813 --- /dev/null +++ b/contrib/azure_cicd_quickstart/terraform/providers.tf @@ -0,0 +1,34 @@ +terraform { + required_providers { + azuredevops = { + source = "microsoft/azuredevops" + version = ">= 0.1.0" + } + azuread = { + source = "hashicorp/azuread" + version = ">= 2.0" + } + azurerm = { + source = "hashicorp/azurerm" + version = ">= 3.0" + } + } +} + +provider "azuredevops" { + # Authentication via environment variables: + # AZDO_PERSONAL_ACCESS_TOKEN + # AZDO_ORG_SERVICE_URL + org_service_url = "https://dev.azure.com/${var.organization_name}" + personal_access_token = var.azdo_personal_access_token +} + +provider "azuread" { + # Authentication via Azure CLI user context + # Uses current user's az login credentials +} + +provider "azurerm" { + features {} + subscription_id = "" # Replace with your Azure subscription ID +} diff --git a/contrib/azure_cicd_quickstart/terraform/service_connection.tf b/contrib/azure_cicd_quickstart/terraform/service_connection.tf new file mode 100644 index 0000000..6c46e54 --- /dev/null +++ b/contrib/azure_cicd_quickstart/terraform/service_connection.tf @@ -0,0 +1,191 @@ +# User-Assigned Managed Identities for Each Environment +resource "azurerm_user_assigned_identity" "dev_pipeline_identity" { + name = "${var.project_name}-dev-identity" + location = data.azurerm_resource_group.main.location + resource_group_name = data.azurerm_resource_group.main.name + + lifecycle { + create_before_destroy = true + } + + timeouts { + create = "5m" + update = "5m" + delete = "5m" + } +} + +resource "azurerm_user_assigned_identity" "test_pipeline_identity" { + name = "${var.project_name}-test-identity" + location = data.azurerm_resource_group.main.location + resource_group_name = data.azurerm_resource_group.main.name + + lifecycle { + create_before_destroy = true + } + + timeouts { + create = "5m" + update = "5m" + delete = "5m" + } +} + +resource "azurerm_user_assigned_identity" "prod_pipeline_identity" { + name = "${var.project_name}-prod-identity" + location = data.azurerm_resource_group.main.location + resource_group_name = data.azurerm_resource_group.main.name + + lifecycle { + create_before_destroy = true + } + + timeouts { + create = "5m" + update = "5m" + delete = "5m" + } +} + +# Federated Identity Credentials for Each Environment +resource "azurerm_federated_identity_credential" "dev_pipeline_federated_credential" { + name = "${var.project_name}-dev-federated-credential" + resource_group_name = data.azurerm_resource_group.main.name + parent_id = azurerm_user_assigned_identity.dev_pipeline_identity.id + + audience = ["api://AzureADTokenExchange"] + issuer = "https://vstoken.dev.azure.com/${var.organization_id}" + subject = "sc://${var.organization_name}/${var.project_name}/${var.service_connection_name_dev}" + + timeouts { + create = "5m" + update = "5m" + delete = "5m" + } +} + +resource "azurerm_federated_identity_credential" "test_pipeline_federated_credential" { + name = "${var.project_name}-test-federated-credential" + resource_group_name = data.azurerm_resource_group.main.name + parent_id = azurerm_user_assigned_identity.test_pipeline_identity.id + + audience = ["api://AzureADTokenExchange"] + issuer = "https://vstoken.dev.azure.com/${var.organization_id}" + subject = "sc://${var.organization_name}/${var.project_name}/${var.service_connection_name_test}" + + timeouts { + create = "5m" + update = "5m" + delete = "5m" + } +} + +resource "azurerm_federated_identity_credential" "prod_pipeline_federated_credential" { + name = "${var.project_name}-prod-federated-credential" + resource_group_name = data.azurerm_resource_group.main.name + parent_id = azurerm_user_assigned_identity.prod_pipeline_identity.id + + audience = ["api://AzureADTokenExchange"] + issuer = "https://vstoken.dev.azure.com/${var.organization_id}" + subject = "sc://${var.organization_name}/${var.project_name}/${var.service_connection_name_prod}" + + timeouts { + create = "5m" + update = "5m" + delete = "5m" + } +} + +# Role Assignments - Reader on Each Subscription +resource "azurerm_role_assignment" "dev_pipeline_identity_reader" { + scope = "/subscriptions/${var.azure_subscription_id_dev}" + role_definition_name = "Reader" + principal_id = azurerm_user_assigned_identity.dev_pipeline_identity.principal_id + + depends_on = [azurerm_user_assigned_identity.dev_pipeline_identity] +} + +resource "azurerm_role_assignment" "test_pipeline_identity_reader" { + scope = "/subscriptions/${var.azure_subscription_id_test}" + role_definition_name = "Reader" + principal_id = azurerm_user_assigned_identity.test_pipeline_identity.principal_id + + depends_on = [azurerm_user_assigned_identity.test_pipeline_identity] +} + +resource "azurerm_role_assignment" "prod_pipeline_identity_reader" { + scope = "/subscriptions/${var.azure_subscription_id_prod}" + role_definition_name = "Reader" + principal_id = azurerm_user_assigned_identity.prod_pipeline_identity.principal_id + + depends_on = [azurerm_user_assigned_identity.prod_pipeline_identity] +} + +# Azure DevOps Service Connections for Each Environment +resource "azuredevops_serviceendpoint_azurerm" "dev_pipeline_service_connection" { + project_id = azuredevops_project.project.id + service_endpoint_name = var.service_connection_name_dev + description = "Service connection for ${var.project_name} dev environment using managed identity" + service_endpoint_authentication_scheme = "WorkloadIdentityFederation" + + credentials { + serviceprincipalid = azurerm_user_assigned_identity.dev_pipeline_identity.client_id + } + + azurerm_spn_tenantid = data.azurerm_client_config.current.tenant_id + azurerm_subscription_id = var.azure_subscription_id_dev + azurerm_subscription_name = var.azure_subscription_name_dev + + depends_on = [ + azurerm_federated_identity_credential.dev_pipeline_federated_credential, + azurerm_role_assignment.dev_pipeline_identity_reader + ] +} + +resource "azuredevops_serviceendpoint_azurerm" "test_pipeline_service_connection" { + project_id = azuredevops_project.project.id + service_endpoint_name = var.service_connection_name_test + description = "Service connection for ${var.project_name} test environment using managed identity" + service_endpoint_authentication_scheme = "WorkloadIdentityFederation" + + credentials { + serviceprincipalid = azurerm_user_assigned_identity.test_pipeline_identity.client_id + } + + azurerm_spn_tenantid = data.azurerm_client_config.current.tenant_id + azurerm_subscription_id = var.azure_subscription_id_test + azurerm_subscription_name = var.azure_subscription_name_test + + depends_on = [ + azurerm_federated_identity_credential.test_pipeline_federated_credential, + azurerm_role_assignment.test_pipeline_identity_reader + ] +} + +resource "azuredevops_serviceendpoint_azurerm" "prod_pipeline_service_connection" { + project_id = azuredevops_project.project.id + service_endpoint_name = var.service_connection_name_prod + description = "Service connection for ${var.project_name} prod environment using managed identity" + service_endpoint_authentication_scheme = "WorkloadIdentityFederation" + + credentials { + serviceprincipalid = azurerm_user_assigned_identity.prod_pipeline_identity.client_id + } + + azurerm_spn_tenantid = data.azurerm_client_config.current.tenant_id + azurerm_subscription_id = var.azure_subscription_id_prod + azurerm_subscription_name = var.azure_subscription_name_prod + + depends_on = [ + azurerm_federated_identity_credential.prod_pipeline_federated_credential, + azurerm_role_assignment.prod_pipeline_identity_reader + ] +} + +# Data source to get current Azure configuration +data "azurerm_client_config" "current" {} + +# Data source to get the resource group (assuming it exists) +data "azurerm_resource_group" "main" { + name = var.resource_group_name +} \ No newline at end of file diff --git a/contrib/azure_cicd_quickstart/terraform/templates/azure-pipelines.yml.tpl b/contrib/azure_cicd_quickstart/terraform/templates/azure-pipelines.yml.tpl new file mode 100644 index 0000000..177ffcc --- /dev/null +++ b/contrib/azure_cicd_quickstart/terraform/templates/azure-pipelines.yml.tpl @@ -0,0 +1,458 @@ +trigger: + batch: false + branches: + include: + - dev + - test + - main + paths: + exclude: + - README.md + - LICENSE + - .github + +pr: + branches: + include: + - '*' + paths: + exclude: + - README.md + - LICENSE + - .github + +variables: + # Dynamic variable group selection based on target branch for PRs and source branch for pushes + + # For Pull Requests: Use target branch to determine variable group + - ${{ if and(eq(variables['Build.Reason'], 'PullRequest'), eq(variables['System.PullRequest.TargetBranch'], 'refs/heads/main')) }}: + - group: PIPELINE_NAME_PROD + - ${{ if and(eq(variables['Build.Reason'], 'PullRequest'), eq(variables['System.PullRequest.TargetBranch'], 'refs/heads/test')) }}: + - group: PIPELINE_NAME_TEST + - ${{ if and(eq(variables['Build.Reason'], 'PullRequest'), eq(variables['System.PullRequest.TargetBranch'], 'refs/heads/dev')) }}: + - group: PIPELINE_NAME_DEV + - ${{ if and(eq(variables['Build.Reason'], 'PullRequest'), not(or(eq(variables['System.PullRequest.TargetBranch'], 'refs/heads/main'), eq(variables['System.PullRequest.TargetBranch'], 'refs/heads/test'), eq(variables['System.PullRequest.TargetBranch'], 'refs/heads/dev')))) }}: + - group: PIPELINE_NAME_DEV + + # For Direct Pushes: Use source branch to determine variable group + - ${{ if and(ne(variables['Build.Reason'], 'PullRequest'), eq(variables['Build.SourceBranchName'], 'main')) }}: + - group: PIPELINE_NAME_PROD + - ${{ if and(ne(variables['Build.Reason'], 'PullRequest'), eq(variables['Build.SourceBranchName'], 'test')) }}: + - group: PIPELINE_NAME_TEST + - ${{ if and(ne(variables['Build.Reason'], 'PullRequest'), eq(variables['Build.SourceBranchName'], 'dev')) }}: + - group: PIPELINE_NAME_DEV + - ${{ if and(ne(variables['Build.Reason'], 'PullRequest'), not(or(eq(variables['Build.SourceBranchName'], 'main'), eq(variables['Build.SourceBranchName'], 'test'), eq(variables['Build.SourceBranchName'], 'dev')))) }}: + - group: PIPELINE_NAME_DEV + +stages: +# Stage 1: Detect changed DAB folders +- stage: DetectChanges + jobs: + - job: DetectChangedDABs + pool: + vmImage: 'ubuntu-latest' + steps: + - checkout: self + fetchDepth: 0 # Fetch full history for git diff + displayName: 'Checkout with full history' + + - script: | + echo "Detecting changed DAB folders..." + + # Get the target branch for comparison (main or master) + TARGET_BRANCH="origin/main" + if ! git show-ref --verify --quiet refs/remotes/origin/main; then + TARGET_BRANCH="origin/master" + fi + + echo "Comparing against: $TARGET_BRANCH" + + # Determine the correct comparison point based on commit type + echo "Current HEAD: $(git rev-parse HEAD)" + + # Check if current commit is a merge commit + if git rev-parse --verify HEAD^2 >/dev/null 2>&1; then + # This is a merge commit, compare against first parent (previous state of target branch) + COMPARE_AGAINST="HEAD^1" + echo "Merge commit detected, comparing against first parent: $(git rev-parse HEAD^1)" + else + # Regular commit, compare against previous commit + COMPARE_AGAINST="HEAD~1" + echo "Regular commit, comparing against previous commit: $(git rev-parse HEAD~1)" + fi + + echo "Comparing changes from $COMPARE_AGAINST to HEAD" + + # Find all folders in all root directories that have a databricks.yml file + ALL_DABS=$(find . -maxdepth 7 -name "databricks.yml" -type f | sed 's|/databricks.yml||' | sed 's|^\./||' | sort) + echo "All DAB folders found:" + echo "$ALL_DABS" + + # Find changed DAB folders and create list + CHANGED_DABS_LIST="" + CHANGED_COUNT=0 + + for dab_folder in $ALL_DABS; do + # Check if any files in this DAB folder have changed + CHANGES=$(git diff --name-only $COMPARE_AGAINST HEAD -- "$dab_folder/" | wc -l) + if [ $CHANGES -gt 0 ]; then + echo "Changes detected in: $dab_folder ($CHANGES files changed)" + echo " Changed files:" + git diff --name-only $COMPARE_AGAINST HEAD -- "$dab_folder/" | sed 's/^/ /' + CHANGED_COUNT=$((CHANGED_COUNT + 1)) + + if [ -z "$CHANGED_DABS_LIST" ]; then + CHANGED_DABS_LIST="$dab_folder" + else + CHANGED_DABS_LIST="$CHANGED_DABS_LIST,$dab_folder" + fi + else + echo "No changes in: $dab_folder" + fi + done + + if [ $CHANGED_COUNT -eq 0 ]; then + echo "No DAB folders have changes" + echo "##vso[task.setvariable variable=CHANGED_DABS;isOutput=true]" + echo "##vso[task.setvariable variable=HAS_CHANGES;isOutput=true]false" + echo "##vso[task.setvariable variable=CHANGED_COUNT;isOutput=true]0" + else + echo "" + echo "=========================================" + echo "SUMMARY: $CHANGED_COUNT DAB(s) with changes" + echo "=========================================" + echo "$CHANGED_DABS_LIST" | tr ',' '\n' | nl -w2 -s'. ' + echo "=========================================" + echo "" + echo "##vso[task.setvariable variable=CHANGED_DABS;isOutput=true]$CHANGED_DABS_LIST" + echo "##vso[task.setvariable variable=HAS_CHANGES;isOutput=true]true" + echo "##vso[task.setvariable variable=CHANGED_COUNT;isOutput=true]$CHANGED_COUNT" + fi + name: detectChanges + displayName: 'Detect changed DAB folders' + +# Stage 2: Deploy changed DABs sequentially (push to dev/test/main only) +- stage: DeployChangedDABs + condition: ne(variables['Build.Reason'], 'PullRequest') + dependsOn: DetectChanges + jobs: + - job: DeployDABsSequentially + pool: + vmImage: 'ubuntu-latest' + timeoutInMinutes: 120 # Increase timeout for multiple DABs + variables: + changed_dabs: $[ stageDependencies.DetectChanges.DetectChangedDABs.outputs['detectChanges.CHANGED_DABS'] ] + changed_count: $[ stageDependencies.DetectChanges.DetectChangedDABs.outputs['detectChanges.CHANGED_COUNT'] ] + + steps: + - script: | + echo "Job started successfully!" + echo "changed_dabs value: '$(changed_dabs)'" + echo "changed_count value: '$(changed_count)'" + echo "=== ENVIRONMENT DEBUG ===" + echo "Build.Reason: $(Build.Reason)" + echo "Build.SourceBranchName: $(Build.SourceBranchName)" + echo "System.PullRequest.TargetBranch: $(System.PullRequest.TargetBranch)" + echo "env variable: $(env)" + echo "DATABRICKS_HOST: $(DATABRICKS_HOST)" + echo "SERVICE_CONNECTION_NAME: $(SERVICE_CONNECTION_NAME)" + echo "==========================" + displayName: 'Job Start Confirmation and Debug Info' + + - script: | + # Enhanced check for changes with better logging + CHANGED_COUNT="$(changed_count)" + CHANGED_DABS="$(changed_dabs)" + + echo "=== DEPLOYMENT CHECK ===" + echo "Changed count from job variable: '$CHANGED_COUNT'" + echo "Changed DABs list: '$CHANGED_DABS'" + + # Check if we have any changes (multiple conditions for robustness) + if [ -z "$CHANGED_COUNT" ] || [ "$CHANGED_COUNT" = "0" ] || [ "$CHANGED_COUNT" = "" ] || [ -z "$CHANGED_DABS" ] || [ "$CHANGED_DABS" = "" ]; then + echo "" + echo "##[section]No DAB changes detected - skipping all deployment tasks" + echo "##vso[task.setvariable variable=SKIP_DEPLOYMENT;isOutput=true]true" + echo "##vso[task.complete result=Succeeded;]No changes to deploy" + exit 0 + fi + + echo "" + echo "=== DEPLOYMENT OVERVIEW ===" + echo "Total DABs to deploy: $CHANGED_COUNT" + echo "Environment: $(env)" + echo "Databricks Host: $(DATABRICKS_HOST)" + echo "" + echo "DABs to be deployed:" + echo "$CHANGED_DABS" | tr ',' '\n' | nl -w2 -s'. ' + echo "==================================" + echo "##vso[task.setvariable variable=SKIP_DEPLOYMENT;isOutput=true]false" + displayName: 'Check for changes and deployment overview' + name: checkChanges + + - task: UsePythonVersion@0 + condition: ne(variables['checkChanges.SKIP_DEPLOYMENT'], 'true') + displayName: 'Use Python 3.10' + inputs: + versionSpec: '3.10' + + - task: AzureCLI@2 + condition: ne(variables['checkChanges.SKIP_DEPLOYMENT'], 'true') + inputs: + azureSubscription: $(SERVICE_CONNECTION_NAME) + scriptType: 'bash' + scriptLocation: 'inlineScript' + inlineScript: | + echo "Getting access token..." + DATABRICKS_TOKEN=$(az account get-access-token --resource 2ff814a6-3304-4ab8-85cb-cd0e6f879c1d --query "accessToken" -o tsv) + echo "##vso[task.setvariable variable=DATABRICKS_TOKEN]$DATABRICKS_TOKEN" + displayName: 'Get Databricks Token' + + - checkout: self + condition: ne(variables['checkChanges.SKIP_DEPLOYMENT'], 'true') + displayName: 'Checkout repository' + + - script: | + # Install uv (faster method) + echo "Installing uv..." + curl -LsSf https://astral.sh/uv/install.sh | sh + export PATH="$HOME/.local/bin:$PATH" + + # Create virtual environment and install dependencies with optimizations + echo "Creating virtual environment with uv..." + uv venv .venv --python $(which python) + + echo "Installing Python dependencies with uv..." + # Install dependencies with standard uv (no binary restrictions) + source .venv/bin/activate && \ + uv pip install \ + nutter \ + wheel \ + setuptools \ + pytest \ + pyspark + + # Make the virtual environment available for subsequent steps + echo "##vso[task.setvariable variable=VIRTUAL_ENV]$(pwd)/.venv" + echo "##vso[task.setvariable variable=PATH]$(pwd)/.venv/bin:$PATH" + condition: ne(variables['checkChanges.SKIP_DEPLOYMENT'], 'true') + displayName: 'Install Python dependencies with uv (optimized)' + + - script: | + echo "Installing Databricks CLI..." + curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sh + + # Ensure we use the newer CLI version + export PATH="/usr/local/bin:$PATH" + echo "Using Databricks CLI at: $(which databricks)" + databricks --version + condition: ne(variables['checkChanges.SKIP_DEPLOYMENT'], 'true') + displayName: 'Install Databricks CLI' + + - script: | + echo "Configuring Databricks CLI..." + export PATH="/usr/local/bin:$PATH" + + # Test databricks auth + echo "Testing databricks authentication..." + databricks auth describe + condition: ne(variables['checkChanges.SKIP_DEPLOYMENT'], 'true') + env: + DATABRICKS_HOST: $(DATABRICKS_HOST) + DATABRICKS_TOKEN: $(DATABRICKS_TOKEN) + displayName: 'Configure and test Databricks CLI' + + - script: | + echo "Starting sequential DAB deployment..." + echo "==================================" + + # Ensure we use the correct Databricks CLI version + export PATH="/usr/local/bin:$PATH" + + # Split the comma-separated list of DABs + IFS=',' read -ra DABS_ARRAY <<< "$(changed_dabs)" + + # Initialize counters + TOTAL_DABS=${#DABS_ARRAY[@]} + CURRENT_DAB=0 + SUCCESS_COUNT=0 + FAILED_COUNT=0 + FAILED_DABS="" + + # Process each DAB + for DAB_FOLDER in "${DABS_ARRAY[@]}"; do + CURRENT_DAB=$((CURRENT_DAB + 1)) + + echo "" + echo "##[section]=========================================" + echo "##[section]Processing DAB $CURRENT_DAB of $TOTAL_DABS: $DAB_FOLDER" + echo "##[section]=========================================" + + # Validate directory exists + if [ ! -d "$DAB_FOLDER" ]; then + echo "##[error]Directory $DAB_FOLDER does not exist!" + echo "[$CURRENT_DAB/$TOTAL_DABS] $DAB_FOLDER - FAILED (Directory not found)" + FAILED_COUNT=$((FAILED_COUNT + 1)) + FAILED_DABS="$FAILED_DABS\n - $DAB_FOLDER (Directory not found)" + continue + fi + + # Change to DAB directory + cd "$DAB_FOLDER" + + # Validate databricks.yml exists + if [ ! -f "databricks.yml" ]; then + echo "##[error]databricks.yml not found in $DAB_FOLDER!" + echo "[$CURRENT_DAB/$TOTAL_DABS] $DAB_FOLDER - FAILED (databricks.yml not found)" + FAILED_COUNT=$((FAILED_COUNT + 1)) + FAILED_DABS="$FAILED_DABS\n - $DAB_FOLDER (databricks.yml not found)" + cd /home/vsts/work/1/s + continue + fi + + # Validate bundle + echo "" + echo "##[group]Validating bundle..." + echo "Running: databricks bundle validate -t $(env)" + echo "Environment variables:" + echo " DATABRICKS_HOST: $(DATABRICKS_HOST)" + echo " Target environment: $(env)" + echo " Working directory: $(pwd)" + echo "" + + # Show databricks.yml content for debugging + echo "databricks.yml content:" + cat databricks.yml | head -50 + echo "" + + if databricks bundle validate -t $(env); then + echo "✓ Validation successful" + echo "##[endgroup]" + else + VALIDATION_EXIT_CODE=$? + echo "##[error]Validation failed for $DAB_FOLDER with exit code: $VALIDATION_EXIT_CODE" + echo "##[error]Re-running validation with maximum verbosity for debugging:" + databricks bundle validate -t $(env) --debug || true + echo "##[endgroup]" + echo "[$CURRENT_DAB/$TOTAL_DABS] $DAB_FOLDER - FAILED (Validation error - exit code: $VALIDATION_EXIT_CODE)" + FAILED_COUNT=$((FAILED_COUNT + 1)) + FAILED_DABS="$FAILED_DABS\n - $DAB_FOLDER (Validation error - exit code: $VALIDATION_EXIT_CODE)" + cd /home/vsts/work/1/s + continue + fi + + # Deploy bundle + echo "" + echo "##[group]Deploying bundle..." + if databricks bundle deploy -t $(env); then + echo "✓ Deployment successful" + echo "##[endgroup]" + echo "[$CURRENT_DAB/$TOTAL_DABS] $DAB_FOLDER - SUCCESS" + SUCCESS_COUNT=$((SUCCESS_COUNT + 1)) + else + echo "##[error]Deployment failed for $DAB_FOLDER!" + echo "##[endgroup]" + echo "[$CURRENT_DAB/$TOTAL_DABS] $DAB_FOLDER - FAILED (Deployment error)" + FAILED_COUNT=$((FAILED_COUNT + 1)) + FAILED_DABS="$FAILED_DABS\n - $DAB_FOLDER (Deployment error)" + fi + + # Return to root directory + cd /home/vsts/work/1/s + + echo "" + echo "Progress: $CURRENT_DAB/$TOTAL_DABS completed (Success: $SUCCESS_COUNT, Failed: $FAILED_COUNT)" + echo "=========================================" + done + + # Final summary + echo "" + echo "##[section]=========================================" + echo "##[section]DEPLOYMENT COMPLETE" + echo "##[section]=========================================" + echo "Total DABs processed: $TOTAL_DABS" + echo "Successful: $SUCCESS_COUNT" + echo "Failed: $FAILED_COUNT" + + echo "End Time: $(date)" + echo "Total: $TOTAL_DABS | Success: $SUCCESS_COUNT | Failed: $FAILED_COUNT" + + # Set output variables for summary stage + echo "##vso[task.setvariable variable=DEPLOYMENT_SUMMARY;isOutput=true]" + echo "##vso[task.setvariable variable=SUCCESS_COUNT;isOutput=true]$SUCCESS_COUNT" + echo "##vso[task.setvariable variable=FAILED_COUNT;isOutput=true]$FAILED_COUNT" + + # Fail the job if any DAB failed + if [ $FAILED_COUNT -gt 0 ]; then + echo "" + echo "##[error]$FAILED_COUNT DAB(s) failed to deploy:" + echo -e "$FAILED_DABS" + echo "" + echo "##vso[task.logissue type=error]$FAILED_COUNT out of $TOTAL_DABS DAB deployments failed" + exit 1 + else + echo "" + echo "##[section]All DABs deployed successfully! 🎉" + fi + condition: ne(variables['checkChanges.SKIP_DEPLOYMENT'], 'true') + env: + DATABRICKS_HOST: $(DATABRICKS_HOST) + DATABRICKS_TOKEN: $(DATABRICKS_TOKEN) + name: deployAll + displayName: 'Deploy all changed bundles sequentially' + +# Stage 3: Summary and notifications +- stage: Summary + condition: always() + dependsOn: + - DetectChanges + - DeployChangedDABs + jobs: + - job: PublishResults + pool: + vmImage: 'ubuntu-latest' + variables: + has_changes: $[ stageDependencies.DetectChanges.DetectChangedDABs.outputs['detectChanges.HAS_CHANGES'] ] + changed_count: $[ stageDependencies.DetectChanges.DetectChangedDABs.outputs['detectChanges.CHANGED_COUNT'] ] + deploy_success_count: $[ stageDependencies.DeployChangedDABs.DeployDABsSequentially.outputs['deployAll.SUCCESS_COUNT'] ] + deploy_failed_count: $[ stageDependencies.DeployChangedDABs.DeployDABsSequentially.outputs['deployAll.FAILED_COUNT'] ] + steps: + - script: | + echo "=== PIPELINE SUMMARY ===" + echo "========================" + + if [ "$(has_changes)" = "false" ]; then + echo "No DAB changes detected." + else + echo "Changed DABs: $(changed_count)" + + # Check if this was a PR or deployment + if [ "$(System.PullRequest.PullRequestId)" != "" ]; then + echo "Pull Request - Changes detected but no deployment performed." + else + echo "Deployment Results:" + if [ -n "$(deploy_success_count)" ]; then + echo " Successfully deployed: $(deploy_success_count)" + fi + if [ -n "$(deploy_failed_count)" ]; then + echo " Failed deployments: $(deploy_failed_count)" + fi + + if [ -n "$(deploy_failed_count)" ] && [ "$(deploy_failed_count)" -gt 0 ]; then + echo "" + echo "##vso[task.logissue type=error]Pipeline completed with $(deploy_failed_count) failed deployment(s)" + fi + fi + fi + + echo "" + echo "Check the stage logs for detailed information." + displayName: 'Pipeline Summary' + + - task: PublishTestResults@2 + condition: succeededOrFailed() + inputs: + testResultsFormat: 'JUnit' + testResultsFiles: '**/test-*.xml' + failTaskOnFailedTests: false + displayName: 'Publish test results' \ No newline at end of file diff --git a/contrib/azure_cicd_quickstart/terraform/terraform.tfvars.template b/contrib/azure_cicd_quickstart/terraform/terraform.tfvars.template new file mode 100644 index 0000000..31a3bc4 --- /dev/null +++ b/contrib/azure_cicd_quickstart/terraform/terraform.tfvars.template @@ -0,0 +1,86 @@ +# Update with your actual values + +# ============================================================================== +# AZURE DEVOPS ORGANIZATION CONFIGURATION +# ============================================================================== +# Your Azure DevOps organization name (from https://dev.azure.com/{org_name}) +organization_name = "YOUR_ORGANIZATION_NAME" + +# Your Azure DevOps organization GUID +# Get this from Azure DevOps organization settings -> Microsoft Entra -> Download +organization_id = "YOUR_ORGANIZATION_GUID" + +# Azure DevOps Personal Access Token with required permissions: +# - Project and Team: Read & Write +# - Service Connections: Read & Write +# - Build: Read & Execute +azdo_personal_access_token = "YOUR_AZDO_PAT_TOKEN" + +# ============================================================================== +# PROJECT CONFIGURATION +# ============================================================================== +project_name = "YOUR_PROJECT_NAME" +project_description = "Description of your Azure DevOps project" +project_visibility = "private" # or "public" + +pipeline_name = "DAB-CI-Pipeline" +pipeline_yml_path = "azure-pipelines.yml" + +# ============================================================================== +# MANAGEMENT RESOURCE GROUP +# ============================================================================== +# Resource Group where managed identities will be created (can be in any subscription) +resource_group_name = "YOUR_MANAGEMENT_RESOURCE_GROUP_NAME" + +# ============================================================================== +# DEV ENVIRONMENT CONFIGURATION +# ============================================================================== +# Dev Azure subscription details +azure_subscription_id_dev = "YOUR_DEV_AZURE_SUBSCRIPTION_ID" +azure_subscription_name_dev = "YOUR_DEV_AZURE_SUBSCRIPTION_NAME" +service_connection_name_dev = "YOUR_PROJECT_NAME-Dev-Connection" +databricks_host_dev = "https://.azuredatabricks.net/" + +# ============================================================================== +# TEST ENVIRONMENT CONFIGURATION +# ============================================================================== +# Test Azure subscription details +azure_subscription_id_test = "YOUR_TEST_AZURE_SUBSCRIPTION_ID" +azure_subscription_name_test = "YOUR_TEST_AZURE_SUBSCRIPTION_NAME" +service_connection_name_test = "YOUR_PROJECT_NAME-Test-Connection" +databricks_host_test = "https://.azuredatabricks.net/" + +# ============================================================================== +# PROD ENVIRONMENT CONFIGURATION +# ============================================================================== +# Prod Azure subscription details +azure_subscription_id_prod = "YOUR_PROD_AZURE_SUBSCRIPTION_ID" +azure_subscription_name_prod = "YOUR_PROD_AZURE_SUBSCRIPTION_NAME" +service_connection_name_prod = "YOUR_PROJECT_NAME-Prod-Connection" +databricks_host_prod = "https://.azuredatabricks.net/" + +# ============================================================================== +# INSTRUCTIONS FOR CUSTOMERS +# ============================================================================== +# 1. Copy this file to terraform.tfvars +# 2. Replace all YOUR_* placeholders with actual values +# 3. Ensure the management resource group exists before running terraform +# 4. Generate Azure DevOps PAT with required permissions +# 5. Run: terraform init && terraform plan && terraform apply +# +# This single deployment will create: +# - Azure DevOps project and pipeline +# - 3 variable groups (Dev, Test, Prod) +# - 3 managed identities (one per environment) +# - 3 service connections (one per environment/subscription) +# +# Required Azure DevOps PAT Permissions: +# - Project and Team (Read & Write) +# - Service Connections (Read & Write) +# - Build (Read & Execute) +# +# Prerequisites: +# - Azure CLI installed and logged in (az login) +# - Terraform installed +# - Access to all target subscriptions as Owner or Contributor +# - Azure DevOps organization with appropriate permissions \ No newline at end of file diff --git a/contrib/azure_cicd_quickstart/terraform/variables.tf b/contrib/azure_cicd_quickstart/terraform/variables.tf new file mode 100644 index 0000000..cf3e406 --- /dev/null +++ b/contrib/azure_cicd_quickstart/terraform/variables.tf @@ -0,0 +1,176 @@ +variable "organization_name" { + description = "The name of the Azure DevOps organization" + type = string + + validation { + condition = length(var.organization_name) > 0 + error_message = "Organization name cannot be empty." + } +} + +variable "organization_id" { + description = "The GUID of the Azure DevOps organization (for workload identity federation)" + type = string + + validation { + condition = can(regex("^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", var.organization_id)) + error_message = "Organization ID must be a valid GUID format (e.g., 12345678-1234-1234-1234-123456789abc)." + } +} + +variable "project_name" { + description = "The name of the Azure DevOps project" + type = string +} + +variable "project_description" { + description = "The description of the Azure DevOps project" + type = string + default = "" +} + +variable "project_visibility" { + description = "The visibility of the project (private or public)" + type = string + default = "private" +} + + +variable "pipeline_name" { + description = "The name of the Azure DevOps pipeline" + type = string +} + +variable "pipeline_yml_path" { + description = "Path to the azure-pipelines.yml file in the repository" + type = string + default = "azure-pipelines.yml" +} + +# Service Connection Variables +# Service Connection Names for Each Environment +variable "service_connection_name_dev" { + description = "Name for the Azure DevOps service connection for dev environment" + type = string +} + +variable "service_connection_name_test" { + description = "Name for the Azure DevOps service connection for test environment" + type = string +} + +variable "service_connection_name_prod" { + description = "Name for the Azure DevOps service connection for prod environment" + type = string +} + +# Dev Environment Azure Subscription +variable "azure_subscription_id_dev" { + description = "Azure subscription ID for dev environment" + type = string + + validation { + condition = can(regex("^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", var.azure_subscription_id_dev)) + error_message = "Dev Azure subscription ID must be a valid GUID format." + } +} + +variable "azure_subscription_name_dev" { + description = "Azure subscription name for dev environment" + type = string + + validation { + condition = length(var.azure_subscription_name_dev) > 0 + error_message = "Dev Azure subscription name cannot be empty." + } +} + +# Test Environment Azure Subscription +variable "azure_subscription_id_test" { + description = "Azure subscription ID for test environment" + type = string + + validation { + condition = can(regex("^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", var.azure_subscription_id_test)) + error_message = "Test Azure subscription ID must be a valid GUID format." + } +} + +variable "azure_subscription_name_test" { + description = "Azure subscription name for test environment" + type = string + + validation { + condition = length(var.azure_subscription_name_test) > 0 + error_message = "Test Azure subscription name cannot be empty." + } +} + +# Prod Environment Azure Subscription +variable "azure_subscription_id_prod" { + description = "Azure subscription ID for prod environment" + type = string + + validation { + condition = can(regex("^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", var.azure_subscription_id_prod)) + error_message = "Prod Azure subscription ID must be a valid GUID format." + } +} + +variable "azure_subscription_name_prod" { + description = "Azure subscription name for prod environment" + type = string + + validation { + condition = length(var.azure_subscription_name_prod) > 0 + error_message = "Prod Azure subscription name cannot be empty." + } +} + +variable "azdo_personal_access_token" { + description = "Azure DevOps Personal Access Token" + type = string + sensitive = true +} + +variable "resource_group_name" { + description = "Name of the Azure Resource Group where the managed identity will be created" + type = string + + validation { + condition = length(var.resource_group_name) > 0 + error_message = "Resource group name cannot be empty." + } +} + +# Environment-specific Databricks workspace URLs +variable "databricks_host_dev" { + description = "Databricks workspace URL for development environment" + type = string + + validation { + condition = can(regex("^https://.*\\.azuredatabricks\\.net/?$", var.databricks_host_dev)) + error_message = "Databricks host must be a valid Azure Databricks URL." + } +} + +variable "databricks_host_test" { + description = "Databricks workspace URL for test environment" + type = string + + validation { + condition = can(regex("^https://.*\\.azuredatabricks\\.net/?$", var.databricks_host_test)) + error_message = "Databricks host must be a valid Azure Databricks URL." + } +} + +variable "databricks_host_prod" { + description = "Databricks workspace URL for production environment" + type = string + + validation { + condition = can(regex("^https://.*\\.azuredatabricks\\.net/?$", var.databricks_host_prod)) + error_message = "Databricks host must be a valid Azure Databricks URL." + } +} + diff --git a/contrib/data_engineering/databricks.yml b/contrib/data_engineering/databricks.yml index 0577aa4..13b9f27 100644 --- a/contrib/data_engineering/databricks.yml +++ b/contrib/data_engineering/databricks.yml @@ -29,6 +29,7 @@ targets: catalog: catalog schema: ${workspace.current_user.short_name} notifications: [] + prod: mode: production workspace: