diff --git a/Makefile b/Makefile index 531336a34a0..f3733464d13 100644 --- a/Makefile +++ b/Makefile @@ -186,6 +186,7 @@ E2E_CONF_FILE_ENVSUBST := $(ROOT_DIR)/test/e2e/config/azure-dev-envsubst.yaml SKIP_CLEANUP ?= false AZWI_SKIP_CLEANUP ?= false SKIP_LOG_COLLECTION ?= false +MGMT_CLUSTER_TYPE ?= kind # @sonasingh46: Skip creating mgmt cluster for ci as workload identity needs kind cluster # to be created with extra mounts for key pairs which is not yet supported # by existing e2e framework. A mgmt cluster(kind) is created as part of e2e suite @@ -317,8 +318,12 @@ install-tools: $(ENVSUBST) $(KUSTOMIZE) $(KUBECTL) $(HELM) $(GINKGO) $(KIND) $(A .PHONY: create-management-cluster create-management-cluster: $(KUSTOMIZE) $(ENVSUBST) $(KUBECTL) $(KIND) ## Create a management cluster. - # Create kind management cluster. - $(MAKE) kind-create + # Create management cluster based on type + @if [ "$(MGMT_CLUSTER_TYPE)" = "aks" ]; then \ + $(MAKE) aks-create; \ + else \ + $(MAKE) kind-create; \ + fi # Install cert manager and wait for availability ./hack/install-cert-manager.sh @@ -334,7 +339,9 @@ create-management-cluster: $(KUSTOMIZE) $(ENVSUBST) $(KUBECTL) $(KIND) ## Create timeout --foreground 300 bash -c "until curl --retry $(CURL_RETRIES) -sSL https://github.com/kubernetes-sigs/cluster-api-addon-provider-helm/releases/download/v0.2.5/addon-components.yaml | $(ENVSUBST) | $(KUBECTL) apply -f -; do sleep 5; done" # Deploy CAPZ - $(KIND) load docker-image $(CONTROLLER_IMG)-$(ARCH):$(TAG) --name=$(KIND_CLUSTER_NAME) + if [ "$(MGMT_CLUSTER_TYPE)" != "aks" ]; then \ + $(KIND) load docker-image $(CONTROLLER_IMG)-$(ARCH):$(TAG) --name=$(KIND_CLUSTER_NAME); \ + fi timeout --foreground 300 bash -c "until $(KUSTOMIZE) build config/default | $(ENVSUBST) | $(KUBECTL) apply -f - --server-side=true; do sleep 5; done" # Wait for CAPI deployments @@ -360,7 +367,10 @@ create-management-cluster: $(KUSTOMIZE) $(ENVSUBST) $(KUBECTL) $(KIND) ## Create timeout --foreground 300 bash -c "until $(KUBECTL) get clusters -A; do sleep 3; done" timeout --foreground 300 bash -c "until $(KUBECTL) get azureclusters -A; do sleep 3; done" timeout --foreground 300 bash -c "until $(KUBECTL) get kubeadmcontrolplanes -A; do sleep 3; done" - @echo 'Set kubectl context to the kind management cluster by running "$(KUBECTL) config set-context kind-$(KIND_CLUSTER_NAME)"' + + @if [ "$(MGMT_CLUSTER_TYPE)" != "aks" ]; then \ + echo 'Set kubectl context to the kind management cluster by running "$(KUBECTL) config set-context kind-$(KIND_CLUSTER_NAME)"'; \ + fi .PHONY: create-workload-cluster create-workload-cluster: $(ENVSUBST) $(KUBECTL) ## Create a workload cluster. @@ -726,6 +736,17 @@ test-cover: test ## Run tests with code coverage and generate reports. kind-create-bootstrap: $(KUBECTL) ## Create capz kind bootstrap cluster. KIND_CLUSTER_NAME=capz-e2e ./scripts/kind-with-registry.sh +.PHONY: create-bootstrap +create-bootstrap: $(KUBECTL) ## Create bootstrap cluster (AKS or KIND) for CAPZ testing. Default is KIND. + @echo "Creating bootstrap cluster with type: $(MGMT_CLUSTER_TYPE)" + @if [ "$(MGMT_CLUSTER_TYPE)" == "aks" ]; then \ + MGMT_CLUSTER_NAME="$${MGMT_CLUSTER_NAME:-capz-e2e-$(shell date +%s)}" \ + ./scripts/aks-as-mgmt.sh || { echo "Failed to create AKS bootstrap cluster" >&2; exit 1; }; \ + else \ + KIND_CLUSTER_NAME=capz-e2e ./scripts/kind-with-registry.sh || { echo "Failed to create KIND bootstrap cluster" >&2; exit 1; }; \ + fi + @echo "Bootstrap cluster created successfully" + .PHONY: cleanup-workload-identity cleanup-workload-identity: ## Cleanup CI workload-identity infra @if ! [ "$(AZWI_SKIP_CLEANUP)" == "true" ]; then \ diff --git a/docs/book/src/developers/development.md b/docs/book/src/developers/development.md index 9660e7a393c..b55e8019c89 100644 --- a/docs/book/src/developers/development.md +++ b/docs/book/src/developers/development.md @@ -507,6 +507,8 @@ To run E2E locally, set `AZURE_CLIENT_ID`, `AZURE_CLIENT_SECRET`, `AZURE_SUBSCRI ./scripts/ci-e2e.sh ``` +Note: Users that have a restrictive environment and want to leverage API Server ILB in their flavors and to run e2e tests locally should refer to the detailed explanation in [running e2e tests locally leveraging apiserver ilb solution](./tilt-with-aks-as-mgmt-ilb.md#running-e2e-tests-locally-using-api-server-ilbs-networking-solution) + You can optionally set the following variables: | Variable | Description | Default | diff --git a/docs/book/src/developers/tilt-with-aks-as-mgmt-ilb.md b/docs/book/src/developers/tilt-with-aks-as-mgmt-ilb.md index 1095edd172f..185c85b31b0 100644 --- a/docs/book/src/developers/tilt-with-aks-as-mgmt-ilb.md +++ b/docs/book/src/developers/tilt-with-aks-as-mgmt-ilb.md @@ -38,6 +38,7 @@ While the default Tilt setup recommends using a KIND cluster as the management c export AZURE_CLIENT_ID_USER_ASSIGNED_IDENTITY="" export AZURE_CLIENT_ID="${AZURE_CLIENT_ID_USER_ASSIGNED_IDENTITY}" export AZURE_OBJECT_ID_USER_ASSIGNED_IDENTITY="" + export AZURE_USER_ASSIGNED_IDENTITY_RESOURCE_ID="" export AZURE_LOCATION="" export REGISTRY= ``` @@ -76,6 +77,158 @@ While the default Tilt setup recommends using a KIND cluster as the management c - [apiserver-ilb](https://github.com/kubernetes-sigs/cluster-api-provider-azure/blob/main/templates/cluster-template-apiserver-ilb.yaml): VM-based default flavor that brings up native K8s clusters with Linux nodes. - [apiserver-ilb-windows](https://github.com/kubernetes-sigs/cluster-api-provider-azure/blob/main/templates/cluster-template-windows-apiserver-ilb.yaml): VM-based flavor that brings up native K8s clusters with Linux and Windows nodes. +## Running e2e tests locally using API Server ILB's networking solution + +Running an e2e test locally in a restricted environment calls for some workarounds in the prow templates and the e2e test itself. + +1. We need to add the apiserver ILB with private endpoints and predeterimined CIDRs to the workload cluster's VNet & Subnets, and pre-kubeadm commands updating the `/etc/hosts` file of the nodes of the workload cluster. + +2. Once the template has been modified to be run in local environment using AKS as management cluster, we need to be able to peer the vnets, create private DNS zone for the FQDN of the workload cluster and re-enable blocked NSG ports. + +**Note:** + +- The following guidance is only for debugging, and is not a recommendation for any production environment. + +- The below steps are for self-managed templates only and do not apply to AKS workload clusters. + +- If you are going to run the local tests from a dev machine in Azure, you will have to use user-assigned managed identity and assign it to the management cluster. Follow the below steps before proceeding. + 1. Create a user-assigned managed identity + 2. Assign that managed identity a contributor role to your subscription + 3. Set `AZURE_CLIENT_ID_USER_ASSIGNED_IDENTITY`, `AZURE_OBJECT_ID_USER_ASSIGNED_IDENTITY`, and `AZURE_USER_ASSIGNED_IDENTITY_RESOURCE_ID` to the user-assigned managed identity. + +#### Update prow template with apiserver ILB networking solution + +There are three sections of a prow template that need an update. + +1. AzureCluster + - `/spec/networkSpec/apiServerLB` + - Add FrontendIP + - Add an associated private IP to be leveraged by an internal ILB + - `/spec/networkSpec/vnet/cidrBlocks` + - Add VNet CIDR + - `/spec/networkSpec/subnets/0/cidrBlocks` + - Add Subnet CIDR for the control plane + - `/spec/networkSpec/subnets/1/cidrBlocks` + - Add Subnet CIDR for the worker node +2. `KubeadmConfigTemplate` - linux node; Identifiable by `name: .*-md-0` + - `/spec/template/spec/preKubeadmCommands/0` + - Add a prekubeadm command updating the `/etc/hosts` of worker nodes of type "linux". +3. `KubeadmConfigTemplate` - windows node; Identifiable by `name: .*-md-win` + - `/spec/template/spec/preKubeadmCommands/0` + - Add a prekubeadm command updating the `/etc/hosts` of worker nodes of type "windows". + +A sample kustomize command for updating a prow template via its kustomization.yaml is pasted below. + +```yaml +- target: + kind: AzureCluster + patch: |- + - op: add + path: /spec/networkSpec/apiServerLB + value: + frontendIPs: + - name: ${CLUSTER_NAME}-api-lb + publicIP: + dnsName: ${CLUSTER_NAME}-${APISERVER_LB_DNS_SUFFIX}.${AZURE_LOCATION}.cloudapp.azure.com + name: ${CLUSTER_NAME}-api-lb + - name: ${CLUSTER_NAME}-internal-lb-private-ip + privateIP: ${AZURE_INTERNAL_LB_PRIVATE_IP} +- target: + kind: AzureCluster + patch: |- + - op: add + path: /spec/networkSpec/vnet/cidrBlocks + value: [] + - op: add + path: /spec/networkSpec/vnet/cidrBlocks/- + value: ${AZURE_VNET_CIDR} +- target: + kind: AzureCluster + patch: |- + - op: add + path: /spec/networkSpec/subnets/0/cidrBlocks + value: [] + - op: add + path: /spec/networkSpec/subnets/0/cidrBlocks/- + value: ${AZURE_CP_SUBNET_CIDR} +- target: + kind: AzureCluster + patch: |- + - op: add + path: /spec/networkSpec/subnets/1/cidrBlocks + value: [] + - op: add + path: /spec/networkSpec/subnets/1/cidrBlocks/- + value: ${AZURE_NODE_SUBNET_CIDR} +- target: + kind: KubeadmConfigTemplate + name: .*-md-0 + patch: |- + - op: add + path: /spec/template/spec/preKubeadmCommands + value: [] + - op: add + path: /spec/template/spec/preKubeadmCommands/- + value: echo '${AZURE_INTERNAL_LB_PRIVATE_IP} ${CLUSTER_NAME}-${APISERVER_LB_DNS_SUFFIX}.${AZURE_LOCATION}.cloudapp.azure.com' >> /etc/hosts +- target: + kind: KubeadmConfigTemplate + name: .*-md-win + patch: |- + - op: add + path: /spec/template/spec/preKubeadmCommands/- + value: + powershell -Command "Add-Content -Path 'C:\\Windows\\System32\\drivers\\etc\\hosts' -Value '${AZURE_INTERNAL_LB_PRIVATE_IP} ${CLUSTER_NAME}-${APISERVER_LB_DNS_SUFFIX}.${AZURE_LOCATION}.cloudapp.azure.com'" +``` + +#### Peer Vnets of the management cluster and the workload cluster + +Peering VNets, creating a private DNS zone with the FQDN of the workload cluster, and updating NSGs of the management and workload clusters can be achieved by running `scripts/peer-vnets.sh`. + +This script, `scripts/peer-vnets.sh`, should be run after triggering the test run locally and from a separate terminal. + +#### Running the test locally + +We recommend running each test individually while debugging the test failure. This implies that `GINKGO_FOCUS` is as unique as possible. So for instance if you want to run `periodic-cluster-api-provider-azure-e2e-main`'s "With 3 control-plane nodes and 2 Linux and 2 Windows worker nodes" test, + +1. We first need to add the following environment variables to the test itself. For example: + + ```go + Expect(os.Setenv("EXP_APISERVER_ILB", "true")).To(Succeed()) + Expect(os.Setenv("AZURE_INTERNAL_LB_PRIVATE_IP", "10.0.0.101")).To(Succeed()) + Expect(os.Setenv("AZURE_VNET_CIDR", "10.0.0.0/8")).To(Succeed()) + Expect(os.Setenv("AZURE_CP_SUBNET_CIDR", "10.0.0.0/16")).To(Succeed()) + Expect(os.Setenv("AZURE_NODE_SUBNET_CIDR", "10.1.0.0/16")).To(Succeed()) + ``` + + The above lines should be added before the `clusterctl.ApplyClusterTemplateAndWait()` is invoked. + + +2. Open the terminal and run the below command: + + ```bash + GINKGO_FOCUS="With 3 control-plane nodes and 2 Linux and 2 Windows worker nodes" USE_LOCAL_KIND_REGISTRY=false SKIP_CLEANUP="true" SKIP_LOG_COLLECTION="true" REGISTRY="<>" MGMT_CLUSTER_TYPE="aks" EXP_APISERVER_ILB=true AZURE_LOCATION="<>" ARCH="amd64" scripts/ci-e2e.sh + ``` + + **Note:** + + - Set `MGMT_CLUSTER_TYPE` to `"aks"` to leverage `AKS` as the management cluster. + - Set `EXP_APISERVER_ILB` to `true` to enable the API Server ILB feature gate. + - Set `AZURE_CLIENT_ID_USER_ASSIGNED_IDENTITY`, `AZURE_OBJECT_ID_USER_ASSIGNED_IDENTITY` and `AZURE_USER_ASSIGNED_IDENTITY_RESOURCE_ID` to use the user-assigned managed identity instead of the AKS-created managed identity. + +3. In a new terminal, wait for AzureClusters to be created by the above command. Check it using `kubectl get AzureClusters -A`. Note that this command will fail or will not output anything unless the above command, `GINKGO_FOCUS...`, has deployed the worker template and initiated workload cluster creation. + + Once the worker cluster has been created, `export` the `CLUSTER_NAME` and `CLUSTER_NAMESPACE`. + It is recommended that `AZURE_INTERNAL_LB_PRIVATE_IP` is set an IP of `10.0.0.x`, say `10.0.0.101`, to avoid any test updates. + + Then open a new terminal at the root of the cluster api provider azure repo and run the below command. + + ```bash + AZURE_INTERNAL_LB_PRIVATE_IP="" CLUSTER_NAME="" CLUSTER_NAMESPACE="" ./scripts/peer-vnets.sh ./tilt-settings.yaml + ``` + +You will see that the test progresses in the first terminal window that invoked `GINKGO_FOCUS=....` + + ## Leveraging internal load balancer By default using Tilt with Cluster API Provider Azure (CAPZ), the management cluster is exposed via a public endpoint. This works well for many development scenarios but presents challenges in environments with strict network security requirements. @@ -127,4 +280,3 @@ By default using Tilt with Cluster API Provider Azure (CAPZ), the management clu **Solution:** - Use 3 control plane nodes in a stacked etcd setup. - Using aks as management cluster sets `CONTROL_PLANE_MACHINE_COUNT` to 3 by default. - diff --git a/e2e.mk b/e2e.mk index 78f321fd1ed..9a72e4c27a5 100644 --- a/e2e.mk +++ b/e2e.mk @@ -5,8 +5,11 @@ ##@ E2E Testing: .PHONY: test-e2e-run -test-e2e-run: generate-e2e-templates install-tools kind-create-bootstrap ## Run e2e tests. - @$(ENVSUBST) < $(E2E_CONF_FILE) > $(E2E_CONF_FILE_ENVSUBST) && \ +test-e2e-run: generate-e2e-templates install-tools create-bootstrap ## Run e2e tests. + if [ "$(MGMT_CLUSTER_TYPE)" == "aks" ]; then \ + source ./scripts/peer-vnets.sh && source_tilt_settings tilt-settings.yaml; \ + fi; \ + $(ENVSUBST) < $(E2E_CONF_FILE) > $(E2E_CONF_FILE_ENVSUBST) && \ if [ -z "${AZURE_CLIENT_ID_USER_ASSIGNED_IDENTITY}" ]; then \ export AZURE_CLIENT_ID_USER_ASSIGNED_IDENTITY=$(shell cat $(AZURE_IDENTITY_ID_FILEPATH)); \ fi; \ diff --git a/scripts/aks-as-mgmt.sh b/scripts/aks-as-mgmt.sh index 0cf1b0e6bcd..9bf012629e8 100755 --- a/scripts/aks-as-mgmt.sh +++ b/scripts/aks-as-mgmt.sh @@ -23,9 +23,9 @@ source "${REPO_ROOT}/hack/ensure-azcli.sh" # install az cli and login using WI # shellcheck source=hack/ensure-tags.sh source "${REPO_ROOT}/hack/ensure-tags.sh" # set the right timestamp and job name -export MGMT_CLUSTER_NAME="${MGMT_CLUSTER_NAME:-aks-mgmt-capz-${RANDOM_SUFFIX}}" # management cluster name -export AKS_RESOURCE_GROUP="${AKS_RESOURCE_GROUP:-aks-mgmt-capz-${RANDOM_SUFFIX}}" # resource group name -export AKS_NODE_RESOURCE_GROUP="node-${AKS_RESOURCE_GROUP}" +export MGMT_CLUSTER_NAME="${MGMT_CLUSTER_NAME:-aks-mgmt-$(date +%s)}" # management cluster name +export AKS_RESOURCE_GROUP="${AKS_RESOURCE_GROUP:-"${MGMT_CLUSTER_NAME}"}" # resource group name +export AKS_NODE_RESOURCE_GROUP="${AKS_RESOURCE_GROUP}-nodes" export AKS_MGMT_KUBERNETES_VERSION="${AKS_MGMT_KUBERNETES_VERSION:-v1.30.2}" export AZURE_LOCATION="${AZURE_LOCATION:-westus2}" export AKS_NODE_VM_SIZE="${AKS_NODE_VM_SIZE:-"Standard_B2s"}" @@ -33,11 +33,11 @@ export AKS_NODE_COUNT="${AKS_NODE_COUNT:-2}" export MGMT_CLUSTER_KUBECONFIG="${MGMT_CLUSTER_KUBECONFIG:-$REPO_ROOT/aks-mgmt.config}" export AZURE_IDENTITY_ID_FILEPATH="${AZURE_IDENTITY_ID_FILEPATH:-$REPO_ROOT/azure_identity_id}" export REGISTRY="${REGISTRY:-}" -export AKS_MGMT_VNET_NAME="${AKS_MGMT_VNET_NAME:-"aks-mgmt-vnet-${RANDOM_SUFFIX}"}" +export AKS_MGMT_VNET_NAME="${AKS_MGMT_VNET_NAME:-"${MGMT_CLUSTER_NAME}-vnet"}" export AKS_MGMT_VNET_CIDR="${AKS_MGMT_VNET_CIDR:-"20.255.0.0/16"}" export AKS_MGMT_SERVICE_CIDR="${AKS_MGMT_SERVICE_CIDR:-"20.255.254.0/24"}" export AKS_MGMT_DNS_SERVICE_IP="${AKS_MGMT_DNS_SERVICE_IP:-"20.255.254.100"}" -export AKS_MGMT_SUBNET_NAME="${AKS_MGMT_SUBNET_NAME:-"aks-mgmt-subnet-${RANDOM_SUFFIX}"}" +export AKS_MGMT_SUBNET_NAME="${AKS_MGMT_SUBNET_NAME:-"${MGMT_CLUSTER_NAME}-subnet"}" export AKS_MGMT_SUBNET_CIDR="${AKS_MGMT_SUBNET_CIDR:-"20.255.0.0/24"}" @@ -58,34 +58,36 @@ export SKIP_AKS_CREATE="${SKIP_AKS_CREATE:-false}" main() { echo "--------------------------------" - echo "MGMT_CLUSTER_NAME: $MGMT_CLUSTER_NAME" - echo "AKS_RESOURCE_GROUP: $AKS_RESOURCE_GROUP" - echo "AKS_NODE_RESOURCE_GROUP: $AKS_NODE_RESOURCE_GROUP" - echo "AKS_MGMT_KUBERNETES_VERSION: $AKS_MGMT_KUBERNETES_VERSION" - echo "AZURE_LOCATION: $AZURE_LOCATION" - echo "AKS_NODE_VM_SIZE: $AKS_NODE_VM_SIZE" - echo "AKS_NODE_COUNT: $AKS_NODE_COUNT" - echo "MGMT_CLUSTER_KUBECONFIG: $MGMT_CLUSTER_KUBECONFIG" - echo "AZURE_IDENTITY_ID_FILEPATH: $AZURE_IDENTITY_ID_FILEPATH" - echo "REGISTRY: $REGISTRY" - echo "AKS_MGMT_VNET_NAME: $AKS_MGMT_VNET_NAME" - echo "AKS_MGMT_VNET_CIDR: $AKS_MGMT_VNET_CIDR" - echo "AKS_MGMT_SERVICE_CIDR: $AKS_MGMT_SERVICE_CIDR" - echo "AKS_MGMT_DNS_SERVICE_IP: $AKS_MGMT_DNS_SERVICE_IP" - echo "AKS_MGMT_SUBNET_NAME: $AKS_MGMT_SUBNET_NAME" - echo "AKS_MGMT_SUBNET_CIDR: $AKS_MGMT_SUBNET_CIDR" - - echo "AZURE_SUBSCRIPTION_ID: $AZURE_SUBSCRIPTION_ID" - echo "AZURE_CLIENT_ID: $AZURE_CLIENT_ID" - echo "AZURE_TENANT_ID: $AZURE_TENANT_ID" - echo "APISERVER_LB_DNS_SUFFIX: $APISERVER_LB_DNS_SUFFIX" - echo "AKS_MI_CLIENT_ID: $AKS_MI_CLIENT_ID" - echo "AKS_MI_OBJECT_ID: $AKS_MI_OBJECT_ID" - echo "AKS_MI_RESOURCE_ID: $AKS_MI_RESOURCE_ID" - echo "MANAGED_IDENTITY_NAME: $MANAGED_IDENTITY_NAME" - echo "MANAGED_IDENTITY_RG: $MANAGED_IDENTITY_RG" - echo "ASO_CREDENTIAL_SECRET_MODE: $ASO_CREDENTIAL_SECRET_MODE" - echo "SKIP_AKS_CREATE: $SKIP_AKS_CREATE" + echo "MGMT_CLUSTER_NAME: $MGMT_CLUSTER_NAME" + echo "AKS_RESOURCE_GROUP: $AKS_RESOURCE_GROUP" + echo "AKS_NODE_RESOURCE_GROUP: $AKS_NODE_RESOURCE_GROUP" + echo "AKS_MGMT_KUBERNETES_VERSION: $AKS_MGMT_KUBERNETES_VERSION" + echo "AZURE_LOCATION: $AZURE_LOCATION" + echo "AKS_NODE_VM_SIZE: $AKS_NODE_VM_SIZE" + echo "AKS_NODE_COUNT: $AKS_NODE_COUNT" + echo "MGMT_CLUSTER_KUBECONFIG: $MGMT_CLUSTER_KUBECONFIG" + echo "AZURE_IDENTITY_ID_FILEPATH: $AZURE_IDENTITY_ID_FILEPATH" + echo "REGISTRY: $REGISTRY" + echo "AKS_MGMT_VNET_NAME: $AKS_MGMT_VNET_NAME" + echo "AKS_MGMT_VNET_CIDR: $AKS_MGMT_VNET_CIDR" + echo "AKS_MGMT_SERVICE_CIDR: $AKS_MGMT_SERVICE_CIDR" + echo "AKS_MGMT_DNS_SERVICE_IP: $AKS_MGMT_DNS_SERVICE_IP" + echo "AKS_MGMT_SUBNET_NAME: $AKS_MGMT_SUBNET_NAME" + echo "AKS_MGMT_SUBNET_CIDR: $AKS_MGMT_SUBNET_CIDR" + echo "AZURE_SUBSCRIPTION_ID: $AZURE_SUBSCRIPTION_ID" + echo "AZURE_CLIENT_ID: $AZURE_CLIENT_ID" + echo "AZURE_TENANT_ID: $AZURE_TENANT_ID" + echo "APISERVER_LB_DNS_SUFFIX: $APISERVER_LB_DNS_SUFFIX" + echo "AKS_MI_CLIENT_ID: $AKS_MI_CLIENT_ID" + echo "AKS_MI_OBJECT_ID: $AKS_MI_OBJECT_ID" + echo "AKS_MI_RESOURCE_ID: $AKS_MI_RESOURCE_ID" + echo "MANAGED_IDENTITY_NAME: $MANAGED_IDENTITY_NAME" + echo "MANAGED_IDENTITY_RG: $MANAGED_IDENTITY_RG" + echo "ASO_CREDENTIAL_SECRET_MODE: $ASO_CREDENTIAL_SECRET_MODE" + echo "SKIP_AKS_CREATE: $SKIP_AKS_CREATE" + echo "AZURE_CLIENT_ID_USER_ASSIGNED_IDENTITY: ${AZURE_CLIENT_ID_USER_ASSIGNED_IDENTITY:-}" + echo "AZURE_OBJECT_ID_USER_ASSIGNED_IDENTITY: ${AZURE_OBJECT_ID_USER_ASSIGNED_IDENTITY:-}" + echo "AZURE_USER_ASSIGNED_IDENTITY_RESOURCE_ID: ${AZURE_USER_ASSIGNED_IDENTITY_RESOURCE_ID:-}" echo "--------------------------------" # if using SKIP_AKS_CREATE=true, skip creating the AKS cluster @@ -131,6 +133,7 @@ create_aks_cluster() { --node-vm-size "${AKS_NODE_VM_SIZE}" \ --node-resource-group "${AKS_NODE_RESOURCE_GROUP}" \ --vm-set-type VirtualMachineScaleSets \ + --enable-managed-identity \ --generate-ssh-keys \ --network-plugin azure \ --vnet-subnet-id "/subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${AKS_RESOURCE_GROUP}/providers/Microsoft.Network/virtualNetworks/${AKS_MGMT_VNET_NAME}/subnets/${AKS_MGMT_SUBNET_NAME}" \ @@ -155,31 +158,69 @@ create_aks_cluster() { az aks get-credentials --name "${MGMT_CLUSTER_NAME}" --resource-group "${AKS_RESOURCE_GROUP}" \ --overwrite-existing --only-show-errors - # echo "fetching Client ID for ${MGMT_CLUSTER_NAME}" - AKS_MI_CLIENT_ID=$(az aks show -n "${MGMT_CLUSTER_NAME}" -g "${AKS_RESOURCE_GROUP}" --output json \ - --only-show-errors | jq -r '.identityProfile.kubeletidentity.clientId') - export AKS_MI_CLIENT_ID - echo "mgmt client identity: ${AKS_MI_CLIENT_ID}" - echo "${AKS_MI_CLIENT_ID}" > "${AZURE_IDENTITY_ID_FILEPATH}" - - # echo "fetching Object ID for ${MGMT_CLUSTER_NAME}" - AKS_MI_OBJECT_ID=$(az aks show -n "${MGMT_CLUSTER_NAME}" -g "${AKS_RESOURCE_GROUP}" --output json \ - --only-show-errors | jq -r '.identityProfile.kubeletidentity.objectId') - export AKS_MI_OBJECT_ID - echo "mgmt object identity: ${AKS_MI_OBJECT_ID}" - - # echo "fetching Resource ID for ${MGMT_CLUSTER_NAME}" - AKS_MI_RESOURCE_ID=$(az aks show -n "${MGMT_CLUSTER_NAME}" -g "${AKS_RESOURCE_GROUP}" --output json \ - --only-show-errors | jq -r '.identityProfile.kubeletidentity.resourceId') - export AKS_MI_RESOURCE_ID - echo "mgmt resource identity: ${AKS_MI_RESOURCE_ID}" - - # save resource identity name and resource group - MANAGED_IDENTITY_NAME=$(az identity show --ids "${AKS_MI_RESOURCE_ID}" --output json | jq -r '.name') - # export MANAGED_IDENTITY_NAME - echo "mgmt resource identity name: ${MANAGED_IDENTITY_NAME}" - USER_IDENTITY=$MANAGED_IDENTITY_NAME - export USER_IDENTITY + if [[ -n "${AZURE_CLIENT_ID_USER_ASSIGNED_IDENTITY:-}" ]] && \ + [[ -n "${AZURE_OBJECT_ID_USER_ASSIGNED_IDENTITY:-}" ]] && \ + [[ -n "${AZURE_USER_ASSIGNED_IDENTITY_RESOURCE_ID:-}" ]]; then + echo "using user-provided Managed Identity" + # echo "fetching Client ID for ${MGMT_CLUSTER_NAME}" + AKS_MI_CLIENT_ID=${AZURE_CLIENT_ID_USER_ASSIGNED_IDENTITY} + export AKS_MI_CLIENT_ID + echo "mgmt client identity: ${AKS_MI_CLIENT_ID}" + echo "${AKS_MI_CLIENT_ID}" > "${AZURE_IDENTITY_ID_FILEPATH}" + + # echo "fetching Object ID for ${MGMT_CLUSTER_NAME}" + AKS_MI_OBJECT_ID=${AZURE_OBJECT_ID_USER_ASSIGNED_IDENTITY} + export AKS_MI_OBJECT_ID + echo "mgmt object identity: ${AKS_MI_OBJECT_ID}" + + # echo "fetching Resource ID for ${MGMT_CLUSTER_NAME}" + AKS_MI_RESOURCE_ID=${AZURE_USER_ASSIGNED_IDENTITY_RESOURCE_ID} + export AKS_MI_RESOURCE_ID + echo "mgmt resource identity: ${AKS_MI_RESOURCE_ID}" + + # save resource identity name and resource group + MANAGED_IDENTITY_NAME=$(az identity show --ids "${AKS_MI_RESOURCE_ID}" --output json | jq -r '.name') + # export MANAGED_IDENTITY_NAME + echo "mgmt resource identity name: ${MANAGED_IDENTITY_NAME}" + USER_IDENTITY=$MANAGED_IDENTITY_NAME + export USER_IDENTITY + + echo "assigning user-assigned managed identity to the AKS cluster" + az aks update --resource-group "${AKS_RESOURCE_GROUP}" \ + --name "${MGMT_CLUSTER_NAME}" \ + --enable-managed-identity \ + --assign-identity "${AKS_MI_RESOURCE_ID}" \ + --assign-kubelet-identity "${AKS_MI_RESOURCE_ID}" \ + --output none --only-show-errors --yes + + else + # echo "fetching Client ID for ${MGMT_CLUSTER_NAME}" + AKS_MI_CLIENT_ID=$(az aks show -n "${MGMT_CLUSTER_NAME}" -g "${AKS_RESOURCE_GROUP}" --output json \ + --only-show-errors | jq -r '.identityProfile.kubeletidentity.clientId') + export AKS_MI_CLIENT_ID + echo "mgmt client identity: ${AKS_MI_CLIENT_ID}" + echo "${AKS_MI_CLIENT_ID}" > "${AZURE_IDENTITY_ID_FILEPATH}" + + # echo "fetching Object ID for ${MGMT_CLUSTER_NAME}" + AKS_MI_OBJECT_ID=$(az aks show -n "${MGMT_CLUSTER_NAME}" -g "${AKS_RESOURCE_GROUP}" --output json \ + --only-show-errors | jq -r '.identityProfile.kubeletidentity.objectId') + export AKS_MI_OBJECT_ID + echo "mgmt object identity: ${AKS_MI_OBJECT_ID}" + + # echo "fetching Resource ID for ${MGMT_CLUSTER_NAME}" + AKS_MI_RESOURCE_ID=$(az aks show -n "${MGMT_CLUSTER_NAME}" -g "${AKS_RESOURCE_GROUP}" --output json \ + --only-show-errors | jq -r '.identityProfile.kubeletidentity.resourceId') + export AKS_MI_RESOURCE_ID + echo "mgmt resource identity: ${AKS_MI_RESOURCE_ID}" + + # save resource identity name and resource group + MANAGED_IDENTITY_NAME=$(az identity show --ids "${AKS_MI_RESOURCE_ID}" --output json | jq -r '.name') + # export MANAGED_IDENTITY_NAME + echo "mgmt resource identity name: ${MANAGED_IDENTITY_NAME}" + USER_IDENTITY=$MANAGED_IDENTITY_NAME + export USER_IDENTITY + + fi MANAGED_IDENTITY_RG=$(az identity show --ids "${AKS_MI_RESOURCE_ID}" --output json | jq -r '.resourceGroup') export MANAGED_IDENTITY_RG diff --git a/scripts/peer-vnets.sh b/scripts/peer-vnets.sh new file mode 100755 index 00000000000..b7b4b8fda0a --- /dev/null +++ b/scripts/peer-vnets.sh @@ -0,0 +1,426 @@ +#!/usr/bin/env bash +# Copyright 2024 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -o errexit # exit immediately if a command exits with a non-zero status. +set -o nounset # exit when script tries to use undeclared variables. +set -o pipefail # make the pipeline fail if any command in it fails. + +# Color definitions +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +CYAN='\033[0;36m' +NC='\033[0m' # No Color +BOLD='\033[1m' + +# Function to print colored messages +print_header() { + echo -e "\n${BOLD}${BLUE}-------- $1 --------${NC}\n" +} + +print_success() { + echo -e "${GREEN}✓ $1${NC}" +} + +print_info() { + echo -e "${CYAN}ℹ $1${NC}" +} + +print_warning() { + echo -e "${YELLOW}⚠ $1${NC}" +} + +print_error() { + echo -e "${RED}✗ $1${NC}" >&2 +} + +print_step() { + echo -e "${BOLD}${CYAN}Step $1:${NC} $2" +} + +usage() { + cat < + +This script peers Azure VNets and sets up DNS for AKS clusters. + +It requires a tilt-settings.yaml file with aks_as_mgmt_settings dict populated with env variables created from running make aks-create. The following +environment variables will be sourced from the file: + - AKS_RESOURCE_GROUP + - AKS_MGMT_VNET_NAME + - CLUSTER_NAME + - CLUSTER_NAMESPACE + - AZURE_INTERNAL_LB_PRIVATE_IP + - APISERVER_LB_DNS_SUFFIX + - AZURE_LOCATION + - AKS_NODE_RESOURCE_GROUP + +Additionally, you may optionally skip individual steps by setting these environment variables: + + SKIP_PEER_VNETS: Set to "true" to skip the VNET peering operations. + SKIP_CREATE_PRIVATE_DNS_ZONE: Set to "true" to skip the private DNS zone creation. + SKIP_NSG_RULES: Set to "true" to skip the NSG rule checking and updates. + + +Examples: + Run all steps: + ./$(basename "$0") tilt-settings.yaml + + Skip the VNET peering operations: + SKIP_PEER_VNETS=true ./$(basename "$0") tilt-settings.yaml + + Skip the NSG rule check and update: + SKIP_NSG_RULES=true ./$(basename "$0") tilt-settings.yaml + + Skip the VNET peering operations and the NSG rule check and update: + SKIP_PEER_VNETS=true SKIP_NSG_RULES=true ./$(basename "$0") tilt-settings.yaml + + Skip the VNET peering operations and the private DNS zone creation: + SKIP_PEER_VNETS=true SKIP_CREATE_PRIVATE_DNS_ZONE=true ./$(basename "$0") tilt-settings.yaml + +EOF + exit 1 +} + +error() { + echo "ERROR: $1" >&2 + exit 1 +} + +source_tilt_settings() { + [ $# -eq 0 ] && usage + + TILT_SETTINGS_FILE="$1" + [ ! -f "$TILT_SETTINGS_FILE" ] && error "File not found: $TILT_SETTINGS_FILE" + + # Function to process settings from a specific section + process_settings() { + local section="$1" + echo "Reading variables from $TILT_SETTINGS_FILE under '$section'..." + + # Get the list of keys under the section + local VAR_KEYS + VAR_KEYS=$(yq e ".$section | keys | .[]" "$TILT_SETTINGS_FILE" 2>/dev/null || true) + + # If there's no such key or it's empty, VAR_KEYS will be empty + if [ -z "$VAR_KEYS" ]; then + echo "No variables found under '$section'." + else + for key in $VAR_KEYS; do + # Read the value of each key + value=$(yq e ".${section}[\"$key\"]" "$TILT_SETTINGS_FILE") + # Export the key/value pair + export "$key=$value" + echo "Exported $key=$value" + done + fi + } + + # Process both sections + process_settings "aks_as_mgmt_settings" + process_settings "kustomize_substitutions" + + echo "All variables exported" +} + +# Check that all required environment variables are set +check_required_vars() { + required_vars=( + "AKS_RESOURCE_GROUP" + "AKS_MGMT_VNET_NAME" + "CLUSTER_NAME" + "CLUSTER_NAMESPACE" + "AZURE_INTERNAL_LB_PRIVATE_IP" + "APISERVER_LB_DNS_SUFFIX" + "AZURE_LOCATION" + "AKS_NODE_RESOURCE_GROUP" + ) + + print_info "Checking required environment variables..." + for var in "${required_vars[@]}"; do + [ -z "${!var:-}" ] && error "$var is not set" + done + print_success "All required environment variables are set" + + # Add timeout variable for better maintainability + WAIT_TIMEOUT=600 + + # Add DNS zone variable to avoid repetition + DNS_ZONE="${CLUSTER_NAME}-${APISERVER_LB_DNS_SUFFIX}.${AZURE_LOCATION}.cloudapp.azure.com" +} + +# Peers the mgmt and workload clusters VNETs +peer_vnets() { + print_header "Peering VNETs" + + # Get VNET IDs with improved error handling + az network vnet wait --resource-group "${AKS_RESOURCE_GROUP}" --name "${AKS_MGMT_VNET_NAME}" --created --timeout "${WAIT_TIMEOUT}" || error "Timeout waiting for management VNET" + MGMT_VNET_ID=$(az network vnet show --resource-group "${AKS_RESOURCE_GROUP}" --name "${AKS_MGMT_VNET_NAME}" --query id --output tsv) || error "Failed to get management VNET ID" + print_step "1/4" "${AKS_MGMT_VNET_NAME} found and ${MGMT_VNET_ID} found" + + az network vnet wait --resource-group "${CLUSTER_NAME}" --name "${CLUSTER_NAME}-vnet" --created --timeout "${WAIT_TIMEOUT}" || error "Timeout waiting for workload VNET" + WORKLOAD_VNET_ID=$(az network vnet show --resource-group "${CLUSTER_NAME}" --name "${CLUSTER_NAME}-vnet" --query id --output tsv) || error "Failed to get workload VNET ID" + print_step "2/4" "${CLUSTER_NAME}-vnet found and ${WORKLOAD_VNET_ID} found" + + # Peer mgmt vnet with improved error handling + az network vnet peering create \ + --name "mgmt-to-${CLUSTER_NAME}" \ + --resource-group "${AKS_RESOURCE_GROUP}" \ + --vnet-name "${AKS_MGMT_VNET_NAME}" \ + --remote-vnet "${WORKLOAD_VNET_ID}" \ + --allow-vnet-access true \ + --allow-forwarded-traffic true \ + --only-show-errors --output none || error "Failed to create management peering" + print_step "3/4" "mgmt-to-${CLUSTER_NAME} peering created in ${AKS_MGMT_VNET_NAME}" + + # Peer workload vnet with improved error handling + az network vnet peering create \ + --name "${CLUSTER_NAME}-to-mgmt" \ + --resource-group "${CLUSTER_NAME}" \ + --vnet-name "${CLUSTER_NAME}-vnet" \ + --remote-vnet "${MGMT_VNET_ID}" \ + --allow-vnet-access true \ + --allow-forwarded-traffic true \ + --only-show-errors --output none || error "Failed to create workload peering" + print_step "4/4" "${CLUSTER_NAME}-to-mgmt peering created in ${CLUSTER_NAME}-vnet" + print_success "VNET peering completed successfully" +} + +# Creates a private DNS zone and links it to the workload and mgmt VNETs +create_private_dns_zone() { + print_header "Creating private DNS zone" + + # Create private DNS zone with improved error handling + az network private-dns zone create \ + --resource-group "${CLUSTER_NAME}" \ + --name "${DNS_ZONE}" \ + --only-show-errors --output none || error "Failed to create private DNS zone" + az network private-dns zone wait \ + --resource-group "${CLUSTER_NAME}" \ + --name "${DNS_ZONE}" \ + --created --timeout "${WAIT_TIMEOUT}" \ + --only-show-errors --output none || error "Timeout waiting for private DNS zone" + print_step "1/4" "${DNS_ZONE} private DNS zone created in ${CLUSTER_NAME}" + + # Link private DNS Zone to workload vnet with improved error handling + az network private-dns link vnet create \ + --resource-group "${CLUSTER_NAME}" \ + --zone-name "${DNS_ZONE}" \ + --name "${CLUSTER_NAME}-to-mgmt" \ + --virtual-network "${WORKLOAD_VNET_ID}" \ + --registration-enabled false \ + --only-show-errors --output none || error "Failed to create workload DNS link" + az network private-dns link vnet wait \ + --resource-group "${CLUSTER_NAME}" \ + --zone-name "${DNS_ZONE}" \ + --name "${CLUSTER_NAME}-to-mgmt" \ + --created --timeout "${WAIT_TIMEOUT}" \ + --only-show-errors --output none || error "Timeout waiting for workload DNS link" + print_step "2/4" "workload cluster vnet ${CLUSTER_NAME}-vnet linked with private DNS zone" + + # Link private DNS Zone to mgmt vnet with improved error handling + az network private-dns link vnet create \ + --resource-group "${CLUSTER_NAME}" \ + --zone-name "${DNS_ZONE}" \ + --name "mgmt-to-${CLUSTER_NAME}" \ + --virtual-network "${MGMT_VNET_ID}" \ + --registration-enabled false \ + --only-show-errors --output none || error "Failed to create management DNS link" + az network private-dns link vnet wait \ + --resource-group "${CLUSTER_NAME}" \ + --zone-name "${DNS_ZONE}" \ + --name "mgmt-to-${CLUSTER_NAME}" \ + --created --timeout "${WAIT_TIMEOUT}" \ + --only-show-errors --output none || error "Timeout waiting for management DNS link" + print_step "3/4" "management cluster vnet ${AKS_MGMT_VNET_NAME} linked with private DNS zone" + + # Create private DNS zone record with improved error handling + az network private-dns record-set a add-record \ + --resource-group "${CLUSTER_NAME}" \ + --zone-name "${DNS_ZONE}" \ + --record-set-name "@" \ + --ipv4-address "${AZURE_INTERNAL_LB_PRIVATE_IP}" \ + --only-show-errors --output none || error "Failed to create DNS record" + print_step "4/4" "\"@\" private DNS zone record created to point ${DNS_ZONE} to ${AZURE_INTERNAL_LB_PRIVATE_IP}" + print_success "Private DNS zone creation completed successfully" +} + +# New function that waits for NSG rules with prefix "NRMS-Rule-101" in the relevant resource groups, +# then creates or modifies NRMS-Rule-101 to allow the specified ports. +wait_and_fix_nsg_rules() { + local tcp_ports="443 5986 6443" + local udp_ports="53 123" + local timeout=3000 # seconds to wait per NSG for the appearance of an NRMS-Rule-101 rule + local sleep_interval=10 # seconds between checks + + print_header "Checking and Updating NSG Rules" + + print_info "Waiting for NSG rules with prefix 'NRMS-Rule-101' to appear..." + + local resource_groups=("$AKS_NODE_RESOURCE_GROUP" "$AKS_RESOURCE_GROUP" "$CLUSTER_NAME") + + for rg in "${resource_groups[@]}"; do + echo + print_info "Processing NSGs in resource group: '$rg'" + local nsg_list="" + local rg_start_time + rg_start_time=$(date +%s) + # Wait until at least one NSG is present in the resource group. + while :; do + nsg_list=$(az network nsg list --resource-group "$rg" --query "[].name" --output tsv) + if [ -n "$nsg_list" ]; then + break + fi + if (( $(date +%s) - rg_start_time >= timeout )); then + error "Timeout waiting for NSGs in resource group '$rg'" + fi + print_warning "No NSGs found in '$rg' yet, waiting..." + sleep "$sleep_interval" + done + + for nsg in $nsg_list; do + echo + print_info "Checking for NRMS-Rule-101 rules in NSG: '$nsg' (Resource Group: '$rg')" + local rule_found="" + local rule_start_time + rule_start_time=$(date +%s) + while :; do + # Query NSG rules with names that start with "NRMS-Rule-101". + rule_found=$(az network nsg rule list --resource-group "$rg" --nsg-name "$nsg" --query "[?starts_with(name, 'NRMS-Rule-101')].name" --output tsv) + if [ -n "$rule_found" ]; then + print_success "Found NRMS rule(s): $rule_found in NSG '$nsg'" + break + fi + if (( $(date +%s) - rule_start_time >= timeout )); then + print_warning "Timeout waiting for NRMS-Rule-101 rules in NSG '$nsg' in RG '$rg'. Skipping NSG." + break + fi + print_warning "NRMS-Rule-101 rules not found in NSG '$nsg', waiting..." + sleep "$sleep_interval" + done + + # If an NRMS-Rule-101 rule is found in the NSG, then ensure NRMS-Rule-101 is updated. + if [ -n "$rule_found" ]; then + echo + print_info "Configuring NRMS-Rule-101 in NSG '$nsg' (Resource Group: '$rg')" + print_info "Allowed TCP ports: $tcp_ports" + if az network nsg rule show --resource-group "$rg" --nsg-name "$nsg" --name "NRMS-Rule-101" --output none 2>/dev/null; then + # shellcheck disable=SC2086 + az network nsg rule update \ + --resource-group "$rg" \ + --nsg-name "$nsg" \ + --name "NRMS-Rule-101" \ + --access Allow \ + --direction Inbound \ + --protocol "TCP" \ + --destination-port-ranges $tcp_ports \ + --destination-address-prefixes "*" \ + --source-address-prefixes "*" \ + --source-port-ranges "*" \ + --only-show-errors --output none || error "Failed to update NRMS-Rule-101 in NSG '$nsg' in resource group '$rg'" + print_success "Successfully updated NRMS-Rule-101 in NSG '$nsg'" + + echo + print_info "Configuring NRMS-Rule-103 in NSG '$nsg' (Resource Group: '$rg')" + print_info "Allowed UDP ports: $udp_ports" + # shellcheck disable=SC2086 + az network nsg rule update \ + --resource-group "$rg" \ + --nsg-name "$nsg" \ + --name "NRMS-Rule-103" \ + --access Allow \ + --direction Inbound \ + --protocol "UDP" \ + --destination-port-ranges $udp_ports \ + --destination-address-prefixes "*" \ + --source-address-prefixes "*" \ + --source-port-ranges "*" \ + --only-show-errors --output none || error "Failed to update NRMS-Rule-103 in NSG '$nsg' in resource group '$rg'" + print_success "Successfully updated NRMS-Rule-103 in NSG '$nsg'" + fi + fi + done + done + print_success "NSG Rule Check and Update Complete" +} + +# Waits for the controlplane of the workload cluster to be ready +wait_for_controlplane_ready() { + print_header "Waiting for Workload Cluster Control Plane" + + print_info "Waiting for secret: ${CLUSTER_NAME}-kubeconfig to be available in the management cluster" + until kubectl get secret "${CLUSTER_NAME}-kubeconfig" -n "${CLUSTER_NAMESPACE}" > /dev/null 2>&1; do + sleep 5 + done + kubectl get secret "${CLUSTER_NAME}-kubeconfig" -n "${CLUSTER_NAMESPACE}" -o jsonpath='{.data.value}' | base64 --decode > "./${CLUSTER_NAME}.kubeconfig" + chmod 600 "./${CLUSTER_NAME}.kubeconfig" + + # Save the current (management) kubeconfig. + # If KUBECONFIG was not set, assume the default is $HOME/.kube/config. + MANAGEMENT_KUBECONFIG="${KUBECONFIG:-$HOME/.kube/config}" + + # Now switch to the workload cluster kubeconfig. + export KUBECONFIG="./${CLUSTER_NAME}.kubeconfig" # Set kubeconfig for subsequent kubectl commands + + print_info "Waiting for controlplane of the workload cluster to be ready..." + + # Wait for the API server to be responsive and for control plane nodes to be Ready + until kubectl get nodes --selector='node-role.kubernetes.io/control-plane' --no-headers 2>/dev/null | grep -q "Ready"; do + print_warning "Waiting for control plane nodes to be responsive and Ready..." + sleep 10 + done + + # Reset KUBECONFIG back to the management cluster kubeconfig. + export KUBECONFIG="$MANAGEMENT_KUBECONFIG" + print_info "Reset KUBECONFIG to management cluster kubeconfig: $KUBECONFIG" + print_success "Workload Cluster Control Plane is Ready" +} + +main() { + source_tilt_settings "$@" + check_required_vars + + # SKIP_PEER_VNETS can be set to true to skip the VNET peering operations + if [ "${SKIP_PEER_VNETS:-false}" != "true" ]; then + peer_vnets + else + print_header "Skipping VNET Peering" + print_info "Skipping peer_vnets as requested via SKIP_PEER_VNETS." + fi + + # wait for controlplane of the workload cluster to be ready and then create the private DNS zone + # SKIP_CREATE_PRIVATE_DNS_ZONE can be set to true to skip the private DNS zone creation + if [ "${SKIP_CREATE_PRIVATE_DNS_ZONE:-false}" != "true" ]; then + wait_for_controlplane_ready + create_private_dns_zone + else + print_header "Skipping Private DNS Zone Creation" + print_info "Skipping create_private_dns_zone as requested via SKIP_CREATE_PRIVATE_DNS_ZONE." + fi + + # SKIP_NSG_RULES can be set to true to skip the NSG rule checking and updates + if [ "${SKIP_NSG_RULES:-false}" != "true" ]; then + wait_and_fix_nsg_rules + else + print_header "Skipping NSG Rule Updates" + print_info "Skipping wait_and_fix_nsg_rules as requested via SKIP_NSG_RULES." + fi +} + +# Only run main if script is executed directly (not sourced) +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + main "$@" +fi diff --git a/test/e2e/config/azure-dev.yaml b/test/e2e/config/azure-dev.yaml index 330e25b279e..53b1d031f6c 100644 --- a/test/e2e/config/azure-dev.yaml +++ b/test/e2e/config/azure-dev.yaml @@ -226,9 +226,9 @@ variables: CONFORMANCE_IMAGE: "${CONFORMANCE_IMAGE:-}" CONFORMANCE_NODES: "${CONFORMANCE_NODES:-1}" IP_FAMILY: "IPv4" - CLUSTER_IDENTITY_NAME: "cluster-identity-ci" - ASO_CREDENTIAL_SECRET_NAME: "aso-credentials" - ASO_CREDENTIAL_SECRET_MODE: workloadidentity + CLUSTER_IDENTITY_NAME: "${CLUSTER_IDENTITY_NAME:-cluster-identity-ci}" + ASO_CREDENTIAL_SECRET_NAME: "${ASO_CREDENTIAL_SECRET_NAME:-aso-credentials}" + ASO_CREDENTIAL_SECRET_MODE: "${ASO_CREDENTIAL_SECRET_MODE:-workloadidentity}" NODE_DRAIN_TIMEOUT: "60s" CI_VERSION: "" KUBETEST_CONFIGURATION: "./data/kubetest/conformance.yaml" @@ -240,8 +240,8 @@ variables: LATEST_PROVIDER_UPGRADE_VERSION: "v1.19.1" OLD_CAAPH_UPGRADE_VERSION: "v0.1.0-alpha.10" LATEST_CAAPH_UPGRADE_VERSION: "v0.2.5" - CI_RG: capz-ci - USER_IDENTITY: cloud-provider-user-identity + CI_RG: "${CI_RG:-capz-ci}" + USER_IDENTITY: "${USER_IDENTITY:-cloud-provider-user-identity}" EXP_APISERVER_ILB: "true" intervals: