Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 142 additions & 0 deletions dev-infrastructure/Makefile.sre-tooling
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
#
# SRE Tooling AKS Cluster
# Standalone Makefile - does not require main Makefile
#
# Environment variables required:
# SRE_TOOLING_ENV: dev or pers
# SRE_TOOLING_RG: Resource group name (e.g., hcp-dev-sre-tooling or hcp-pers-sre-tooling)
# SRE_TOOLING_SUBSCRIPTION_ID: Subscription ID
# SERVICE_KEYVAULT_NAME: Name of existing service key vault
# SERVICE_KEYVAULT_RG: Resource group of service key vault
# REGIONAL_RG: Regional resource group name
# SVC_ACR_RESOURCE_ID: Resource ID of SVC ACR
# GLOBAL_MSI_ID: Resource ID of global MSI
# KV_CERT_OFFICER_PRINCIPAL_ID: Principal ID for KV certificate officer
# AZURE_MONITORING_WORKSPACE_ID: Resource ID of Azure Monitor Workspace (optional)
# ADMIN_API_MI_NAME: Name of Admin API managed identity
#
# Usage: make -f Makefile.sre-tooling <target>
#

# Set SKIP_CONFIRM to a non-empty value to skip "what-if" confirmation prompts.
ifndef SKIP_CONFIRM
PROMPT_TO_CONFIRM = "--confirm-with-what-if"
endif

SRE_TOOLING_ENVS = dev pers

sre-tooling-infra:
@[ "${SRE_TOOLING_ENV}" ] || ( echo ">> SRE_TOOLING_ENV is not set (dev or pers)"; exit 1 )
@[ "${SRE_TOOLING_ENV}" = "dev" ] || [ "${SRE_TOOLING_ENV}" = "pers" ] || ( echo ">> SRE_TOOLING_ENV must be 'dev' or 'pers', got: ${SRE_TOOLING_ENV}"; exit 1 )
@[ "${SRE_TOOLING_RG}" ] || ( echo ">> SRE_TOOLING_RG is not set"; exit 1 )
@[ "${SRE_TOOLING_SUBSCRIPTION_ID}" ] || ( echo ">> SRE_TOOLING_SUBSCRIPTION_ID is not set"; exit 1 )
@[ "${SERVICE_KEYVAULT_NAME}" ] || ( echo ">> SERVICE_KEYVAULT_NAME is not set"; exit 1 )
@[ "${SERVICE_KEYVAULT_RG}" ] || ( echo ">> SERVICE_KEYVAULT_RG is not set"; exit 1 )
@[ "${GLOBAL_MSI_ID}" ] || ( echo ">> GLOBAL_MSI_ID is not set"; exit 1 )
@[ "${KV_CERT_OFFICER_PRINCIPAL_ID}" ] || ( echo ">> KV_CERT_OFFICER_PRINCIPAL_ID is not set"; exit 1 )
az group create \
--resource-group ${SRE_TOOLING_RG} --subscription ${SRE_TOOLING_SUBSCRIPTION_ID} \
--location westus3 --tags persist=true environment=${SRE_TOOLING_ENV} || true
az deployment group create \
--name sre-tooling-infra-${SRE_TOOLING_ENV} \
--resource-group ${SRE_TOOLING_RG} \
--mode complete \
--subscription ${SRE_TOOLING_SUBSCRIPTION_ID} \
--template-file templates/sre-tooling-infra.bicep \
$(PROMPT_TO_CONFIRM) \
--parameters configurations/sre-tooling-infra.bicepparam \
--parameters serviceKeyVaultName=${SERVICE_KEYVAULT_NAME} \
--parameters serviceKeyVaultResourceGroup=${SERVICE_KEYVAULT_RG} \
--parameters globalMSIId=${GLOBAL_MSI_ID} \
--parameters kvCertOfficerPrincipalId=${KV_CERT_OFFICER_PRINCIPAL_ID} \
--parameters serviceKeyVaultTagValue=${SRE_TOOLING_ENV}
.PHONY: sre-tooling-infra

sre-tooling-infra.what-if:
@[ "${SRE_TOOLING_ENV}" ] || ( echo ">> SRE_TOOLING_ENV is not set (dev or pers)"; exit 1 )
@[ "${SRE_TOOLING_ENV}" = "dev" ] || [ "${SRE_TOOLING_ENV}" = "pers" ] || ( echo ">> SRE_TOOLING_ENV must be 'dev' or 'pers', got: ${SRE_TOOLING_ENV}"; exit 1 )
@[ "${SRE_TOOLING_RG}" ] || ( echo ">> SRE_TOOLING_RG is not set"; exit 1 )
@[ "${SRE_TOOLING_SUBSCRIPTION_ID}" ] || ( echo ">> SRE_TOOLING_SUBSCRIPTION_ID is not set"; exit 1 )
@[ "${SERVICE_KEYVAULT_NAME}" ] || ( echo ">> SERVICE_KEYVAULT_NAME is not set"; exit 1 )
@[ "${SERVICE_KEYVAULT_RG}" ] || ( echo ">> SERVICE_KEYVAULT_RG is not set"; exit 1 )
@[ "${GLOBAL_MSI_ID}" ] || ( echo ">> GLOBAL_MSI_ID is not set"; exit 1 )
@[ "${KV_CERT_OFFICER_PRINCIPAL_ID}" ] || ( echo ">> KV_CERT_OFFICER_PRINCIPAL_ID is not set"; exit 1 )
az deployment group what-if \
--name sre-tooling-infra-${SRE_TOOLING_ENV} \
--resource-group ${SRE_TOOLING_RG} \
--subscription ${SRE_TOOLING_SUBSCRIPTION_ID} \
--template-file templates/sre-tooling-infra.bicep \
--parameters configurations/sre-tooling-infra.bicepparam \
--parameters serviceKeyVaultName=${SERVICE_KEYVAULT_NAME} \
--parameters serviceKeyVaultResourceGroup=${SERVICE_KEYVAULT_RG} \
--parameters globalMSIId=${GLOBAL_MSI_ID} \
--parameters kvCertOfficerPrincipalId=${KV_CERT_OFFICER_PRINCIPAL_ID} \
--parameters serviceKeyVaultTagValue=${SRE_TOOLING_ENV}
.PHONY: sre-tooling-infra.what-if

sre-tooling-cluster:
@[ "${SRE_TOOLING_ENV}" ] || ( echo ">> SRE_TOOLING_ENV is not set (dev or pers)"; exit 1 )
@[ "${SRE_TOOLING_ENV}" = "dev" ] || [ "${SRE_TOOLING_ENV}" = "pers" ] || ( echo ">> SRE_TOOLING_ENV must be 'dev' or 'pers', got: ${SRE_TOOLING_ENV}"; exit 1 )
@[ "${SRE_TOOLING_RG}" ] || ( echo ">> SRE_TOOLING_RG is not set"; exit 1 )
@[ "${SRE_TOOLING_SUBSCRIPTION_ID}" ] || ( echo ">> SRE_TOOLING_SUBSCRIPTION_ID is not set"; exit 1 )
@[ "${SERVICE_KEYVAULT_NAME}" ] || ( echo ">> SERVICE_KEYVAULT_NAME is not set"; exit 1 )
@[ "${SERVICE_KEYVAULT_RG}" ] || ( echo ">> SERVICE_KEYVAULT_RG is not set"; exit 1 )
@[ "${REGIONAL_RG}" ] || ( echo ">> REGIONAL_RG is not set"; exit 1 )
@[ "${SVC_ACR_RESOURCE_ID}" ] || ( echo ">> SVC_ACR_RESOURCE_ID is not set"; exit 1 )
@[ "${GLOBAL_MSI_ID}" ] || ( echo ">> GLOBAL_MSI_ID is not set"; exit 1 )
@[ "${ADMIN_API_MI_NAME}" ] || ( echo ">> ADMIN_API_MI_NAME is not set"; exit 1 )
@$(eval DEFAULT_CLUSTER_NAME = $(if $(filter pers,${SRE_TOOLING_ENV}),pers-westus3-sre-tooling,sre-tooling-aks))
@$(eval AKS_CLUSTER_NAME = $(or ${AKS_CLUSTER_NAME},${DEFAULT_CLUSTER_NAME}))
@echo "Using cluster name: ${AKS_CLUSTER_NAME}"
az deployment group create \
--name sre-tooling-cluster-${SRE_TOOLING_ENV} \
--resource-group ${SRE_TOOLING_RG} \
--mode complete \
--subscription ${SRE_TOOLING_SUBSCRIPTION_ID} \
--template-file templates/sre-tooling-cluster.bicep \
$(PROMPT_TO_CONFIRM) \
--parameters configurations/sre-tooling-cluster.bicepparam \
--parameters serviceKeyVaultName=${SERVICE_KEYVAULT_NAME} \
--parameters serviceKeyVaultResourceGroup=${SERVICE_KEYVAULT_RG} \
--parameters regionalResourceGroup=${REGIONAL_RG} \
--parameters svcAcrResourceId=${SVC_ACR_RESOURCE_ID} \
--parameters globalMSIId=${GLOBAL_MSI_ID} \
--parameters adminApiMIName=${ADMIN_API_MI_NAME} \
--parameters aksKeyVaultName=sre-tooling-${SRE_TOOLING_ENV}-etcd-kv \
--parameters aksKeyVaultTagValue=${SRE_TOOLING_ENV} \
--parameters aksClusterName=${AKS_CLUSTER_NAME} \
$(if $(AZURE_MONITORING_WORKSPACE_ID),--parameters azureMonitoringWorkspaceId=${AZURE_MONITORING_WORKSPACE_ID})
.PHONY: sre-tooling-cluster

sre-tooling-cluster.what-if:
@[ "${SRE_TOOLING_ENV}" ] || ( echo ">> SRE_TOOLING_ENV is not set (dev or pers)"; exit 1 )
@[ "${SRE_TOOLING_ENV}" = "dev" ] || [ "${SRE_TOOLING_ENV}" = "pers" ] || ( echo ">> SRE_TOOLING_ENV must be 'dev' or 'pers', got: ${SRE_TOOLING_ENV}"; exit 1 )
@[ "${SRE_TOOLING_RG}" ] || ( echo ">> SRE_TOOLING_RG is not set"; exit 1 )
@[ "${SRE_TOOLING_SUBSCRIPTION_ID}" ] || ( echo ">> SRE_TOOLING_SUBSCRIPTION_ID is not set"; exit 1 )
@[ "${SERVICE_KEYVAULT_NAME}" ] || ( echo ">> SERVICE_KEYVAULT_NAME is not set"; exit 1 )
@[ "${SERVICE_KEYVAULT_RG}" ] || ( echo ">> SERVICE_KEYVAULT_RG is not set"; exit 1 )
@[ "${REGIONAL_RG}" ] || ( echo ">> REGIONAL_RG is not set"; exit 1 )
@[ "${SVC_ACR_RESOURCE_ID}" ] || ( echo ">> SVC_ACR_RESOURCE_ID is not set"; exit 1 )
@[ "${GLOBAL_MSI_ID}" ] || ( echo ">> GLOBAL_MSI_ID is not set"; exit 1 )
@[ "${ADMIN_API_MI_NAME}" ] || ( echo ">> ADMIN_API_MI_NAME is not set"; exit 1 )
@$(eval DEFAULT_CLUSTER_NAME = $(if $(filter pers,${SRE_TOOLING_ENV}),pers-westus3-sre-tooling,sre-tooling-aks))
@$(eval AKS_CLUSTER_NAME = $(or ${AKS_CLUSTER_NAME},${DEFAULT_CLUSTER_NAME}))
@echo "Using cluster name: ${AKS_CLUSTER_NAME}"
az deployment group what-if \
--name sre-tooling-cluster-${SRE_TOOLING_ENV} \
--resource-group ${SRE_TOOLING_RG} \
--subscription ${SRE_TOOLING_SUBSCRIPTION_ID} \
--template-file templates/sre-tooling-cluster.bicep \
--parameters configurations/sre-tooling-cluster.bicepparam \
--parameters serviceKeyVaultName=${SERVICE_KEYVAULT_NAME} \
--parameters serviceKeyVaultResourceGroup=${SERVICE_KEYVAULT_RG} \
--parameters regionalResourceGroup=${REGIONAL_RG} \
--parameters svcAcrResourceId=${SVC_ACR_RESOURCE_ID} \
--parameters globalMSIId=${GLOBAL_MSI_ID} \
--parameters adminApiMIName=${ADMIN_API_MI_NAME} \
--parameters aksKeyVaultName=sre-tooling-${SRE_TOOLING_ENV}-etcd-kv \
--parameters aksKeyVaultTagValue=${SRE_TOOLING_ENV} \
--parameters aksClusterName=${AKS_CLUSTER_NAME} \
$(if $(AZURE_MONITORING_WORKSPACE_ID),--parameters azureMonitoringWorkspaceId=${AZURE_MONITORING_WORKSPACE_ID})
.PHONY: sre-tooling-cluster.what-if

66 changes: 66 additions & 0 deletions dev-infrastructure/configurations/sre-tooling-cluster.bicepparam
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
using '../templates/sre-tooling-cluster.bicep'

// Location
param location = 'westus3'

// AKS Cluster
// Note: This will be overridden by Makefile based on SRE_TOOLING_ENV
// Default: 'sre-tooling-aks' for dev, 'pers-westus3-sre-tooling' for pers
param aksClusterName = 'sre-tooling-aks'
param kubernetesVersion = '1.32'
param vnetAddressPrefix = '10.0.0.0/16'
param subnetPrefix = '10.0.0.0/24'
param podSubnetPrefix = '10.0.1.0/24'

// System Agent Pool
param systemAgentMinCount = 2
param systemAgentMaxCount = 3
param systemAgentPoolCount = 1
param systemAgentPoolZones = '1,2,3'
param systemAgentVMSize = 'Standard_D2s_v3'
param systemZoneRedundantMode = 'Zone'
param aksSystemOsDiskSizeGB = 32

// User Agent Pool
param userAgentMinCount = 1
param userAgentMaxCount = 3
param userAgentVMSize = 'Standard_D2s_v3'
param userAgentPoolCount = 1
param userAgentPoolZones = '1,2,3'
param userZoneRedundantMode = 'Zone'
param userOsDiskSizeGB = 32

// Infra Agent Pool (for Prometheus)
param infraAgentMinCount = 1
param infraAgentMaxCount = 2
param infraAgentVMSize = 'Standard_D4s_v3'
param infraAgentPoolCount = 1
param infraAgentPoolZones = '1,2,3'
param infraZoneRedundantMode = 'Zone'
param infraOsDiskSizeGB = 64

// Network
param aksNetworkDataplane = 'azure'
param aksNetworkPolicy = 'azure'

// Key Vault for AKS etcd
param aksKeyVaultName = ''
param aksKeyVaultTagName = 'aro-hcp-environment'
param aksKeyVaultTagValue = 'dev'
param aksEtcdKVEnableSoftDelete = true
param aksClusterOutboundIPAddressIPTags = ''

// These will be overridden via command line
param svcAcrResourceId = ''
param serviceKeyVaultName = ''
param serviceKeyVaultResourceGroup = ''
param regionalResourceGroup = ''
param globalMSIId = ''
param azureMonitoringWorkspaceId = ''
param logsNamespace = 'logs'
param logsMSI = 'logs-msi'
param logsServiceAccount = 'logs-service-account'
param adminApiMIName = ''
param adminApiNamespace = 'admin-api'
param adminApiServiceAccountName = 'admin-api-service-account'

13 changes: 13 additions & 0 deletions dev-infrastructure/configurations/sre-tooling-infra.bicepparam
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
using '../templates/sre-tooling-infra.bicep'

// These will be overridden via command line in Makefile
param serviceKeyVaultName = ''
param serviceKeyVaultResourceGroup = ''
param serviceKeyVaultLocation = 'westus3'
param serviceKeyVaultSoftDelete = true
param serviceKeyVaultPrivate = true
param serviceKeyVaultTagName = 'aro-hcp-environment'
param serviceKeyVaultTagValue = 'dev'
param globalMSIId = ''
param kvCertOfficerPrincipalId = ''

42 changes: 42 additions & 0 deletions dev-infrastructure/templates/output-sre-tooling-cluster.bicep
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import { safeTake } from '../modules/common.bicep'

@description('Azure Region Location')
param location string = resourceGroup().location

@description('AKS cluster name')
param aksClusterName string

@description('The managed identity name of the logs')
param logsMSI string

@description('The name of the Admin API managed identity')
param adminApiMIName string

var dceName = safeTake('MSProm-${location}-${aksClusterName}', 44)
var dcrName = safeTake('MSProm-${location}-${aksClusterName}', 44)

resource dce 'Microsoft.Insights/dataCollectionEndpoints@2022-06-01' existing = {
name: dceName
}

resource dcr 'Microsoft.Insights/dataCollectionRules@2022-06-01' existing = {
name: dcrName
}

resource prometheusUAMI 'Microsoft.ManagedIdentity/userAssignedIdentities@2024-11-30' existing = {
name: 'prometheus'
}

resource logsUAMI 'Microsoft.ManagedIdentity/userAssignedIdentities@2024-11-30' existing = {
name: logsMSI
}

resource adminApiUAMI 'Microsoft.ManagedIdentity/userAssignedIdentities@2024-11-30' existing = {
name: adminApiMIName
}

output dcrRemoteWriteUrl string = '${dce.properties.metricsIngestion.endpoint}/dataCollectionRules/${dcr.properties.immutableId}/streams/Microsoft-PrometheusMetrics/api/v1/write?api-version=2023-04-24'
output hcpDcrRemoteWriteUrl string = 'NONE'
output prometheusUAMIClientId string = prometheusUAMI.properties.clientId
output clusterLogPrincipalId string = logsUAMI.properties.principalId
output adminApiPrincipalId string = adminApiUAMI.properties.principalId
Loading
Loading