Skip to content

Commit 391fd42

Browse files
authored
Script for batch migration legacy to msi (#1517)
* script for batch migration legacy to msi
1 parent c9f5dfa commit 391fd42

File tree

1 file changed

+395
-0
lines changed

1 file changed

+395
-0
lines changed
Lines changed: 395 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,395 @@
1+
#!/bin/bash
2+
#
3+
# Script to migrate container insights monitoring to managed identity authentication
4+
# For network and firewall endpoints setup, refer to following doc for detailed configuration:
5+
# https://learn.microsoft.com/en-us/azure/azure-monitor/containers/kubernetes-monitoring-firewall
6+
# High Level Steps:
7+
# 1. Prerequisites Check:
8+
# - Validate Azure CLI and login status
9+
# - Verify subscription access
10+
# - Check cluster identity type
11+
# - Verify container insights MSI status
12+
# - Check cluster health
13+
#
14+
# 2. Cluster Discovery:
15+
# - Scan specified subscriptions
16+
# - Filter by cluster types (AKS/Arc)
17+
# - List eligible clusters
18+
#
19+
# 3. Migration Process:
20+
# - Get Log Analytics workspace ID
21+
# - Disable monitoring addon
22+
# - Re-enable with MSI auth
23+
#
24+
# Usage:
25+
# ./migrate-to-container-insights-msi.sh -s <subscriptionIds> -c <clusterTypes>
26+
#
27+
# Example:
28+
# ./migrate-to-container-insights-msi.sh -s "sub1,sub2" -c "aks,arc"
29+
#
30+
31+
# Function to display usage
32+
usage() {
33+
echo "Usage: $0 -s <subscriptionIds> -c <clusterTypes>"
34+
echo " -s : Comma-separated list of subscription IDs"
35+
echo " -c : Comma-separated list of cluster types (aks,arc)"
36+
echo "Example:"
37+
echo " $0 -s \"subId1,subId2\" -c \"aks,arc\""
38+
exit 1
39+
}
40+
41+
# Function to check prerequisites for a cluster
42+
check_prerequisites() {
43+
local cluster_type=$1
44+
local resource_group=$2
45+
local name=$3
46+
47+
if [ "$cluster_type" = "aks" ]; then
48+
# 1. Check AKS cluster health first
49+
local provisioning_state=$(az aks show -g "$resource_group" -n "$name" --query "provisioningState" -o tsv)
50+
if [ "$provisioning_state" != "Succeeded" ]; then
51+
echo "Cluster not ready (current state: $provisioning_state)" >&2
52+
return 1
53+
fi
54+
55+
# 2. Check if cluster uses managed identity
56+
local identity_type=$(az aks show -g "$resource_group" -n "$name" --query "identity.type" -o tsv)
57+
if [ "$identity_type" != "SystemAssigned" ] && [ "$identity_type" != "UserAssigned" ]; then
58+
echo "Current identity type: $identity_type (requires SystemAssigned or UserAssigned)" >&2
59+
echo "To migrate to managed identity, visit: https://learn.microsoft.com/en-us/azure/aks/use-managed-identity" >&2
60+
echo "Please migrate to managed identity and then rerun this script" >&2
61+
return 1
62+
fi
63+
64+
# 3. Check if monitoring is enabled
65+
# Case-insensitive check for monitoring status
66+
local monitoring_enabled=$(az aks show -g "$resource_group" -n "$name" --query "addonProfiles.omsagent.enabled || addonProfiles.omsAgent.enabled" -o tsv | tr '[:upper:]' '[:lower:]')
67+
if [ "$monitoring_enabled" != "true" ]; then
68+
echo "Container insights not enabled on this cluster" >&2
69+
return 1
70+
fi
71+
72+
# 4. Check if monitoring is already using MSI
73+
local auth_mode=$(az aks show -g "$resource_group" -n "$name" --query "addonProfiles.omsagent.config.useAADAuth || addonProfiles.omsAgent.config.useAADAuth || addonProfiles.omsagent.config.useaadauth || addonProfiles.omsAgent.config.useaadauth || ''" -o tsv | tr '[:upper:]' '[:lower:]')
74+
if [ "$auth_mode" = "true" ]; then
75+
echo "Monitoring already using MSI authentication" >&2
76+
return 1
77+
fi
78+
79+
elif [ "$cluster_type" = "arc" ]; then
80+
# 1. Check Arc cluster health first
81+
local cluster_state=$(az connectedk8s show -g "$resource_group" -n "$name" --query "provisioningState" -o tsv)
82+
if [ "$cluster_state" != "Succeeded" ]; then
83+
echo "Arc cluster not ready (current state: $cluster_state)" >&2
84+
return 1
85+
fi
86+
87+
# 2. Check if cluster uses managed identity
88+
local identity_type=$(az connectedk8s show -g "$resource_group" -n "$name" --query "identity.type" -o tsv)
89+
if [ "$identity_type" != "SystemAssigned" ] && [ "$identity_type" != "UserAssigned" ]; then
90+
echo "Current identity type: $identity_type (requires SystemAssigned or UserAssigned)" >&2
91+
echo "Arc-enabled clusters require managed identity for monitoring. Current authentication method: Service Principal" >&2
92+
echo "To use managed identity:" >&2
93+
echo "1. Offboard monitoring" >&2
94+
echo "2. Delete and re-register Arc connection using managed identity" >&2
95+
echo "3. Re-onboard monitoring with the new identity" >&2
96+
echo "Please migrate to managed identity and then rerun this script" >&2
97+
return 1
98+
fi
99+
100+
# 3. Check if extension exists and its state
101+
local extension_state=$(az k8s-extension show --name azuremonitor-containers \
102+
--cluster-name "$name" \
103+
--resource-group "$resource_group" \
104+
--cluster-type connectedClusters \
105+
--query "provisioningState" -o tsv 2>/dev/null)
106+
if [ -z "$extension_state" ]; then
107+
echo "Container insights extension not installed" >&2
108+
return 1
109+
fi
110+
if [ "$extension_state" != "Succeeded" ]; then
111+
echo "Container insights extension not ready (current state: $extension_state)" >&2
112+
return 1
113+
fi
114+
115+
# 4. Check if already using MSI authentication
116+
local use_aad_auth=$(az k8s-extension show --name azuremonitor-containers \
117+
--cluster-name "$name" \
118+
--resource-group "$resource_group" \
119+
--cluster-type connectedClusters \
120+
--query "configurationSettings.\"amalogs.useAADAuth\"" -o tsv | tr '[:upper:]' '[:lower:]')
121+
if [ "$use_aad_auth" = "true" ]; then
122+
echo "Monitoring already using MSI authentication" >&2
123+
return 1
124+
fi
125+
fi
126+
127+
return 0
128+
}
129+
130+
# Function to get AMPLS ID for private cluster
131+
get_ampls_id() {
132+
local workspace_id=$1
133+
134+
# Parse workspace details
135+
local workspace_name=$(echo "$workspace_id" | cut -d'/' -f9)
136+
local workspace_rg=$(echo "$workspace_id" | cut -d'/' -f5)
137+
138+
# Get AMPLS ID
139+
az monitor log-analytics workspace show \
140+
--workspace-name "$workspace_name" \
141+
--resource-group "$workspace_rg" \
142+
--query "privateLinkScopedResources[0].resourceId" -o tsv | sed 's|/scopedresources/.*||'
143+
}
144+
145+
# Function to discover clusters
146+
discover_clusters() {
147+
local subscription_id=$1
148+
local cluster_type=$2
149+
150+
case $cluster_type in
151+
"aks")
152+
az aks list --query "[].{name:name,resourceGroup:resourceGroup}" -o tsv
153+
;;
154+
"arc")
155+
az connectedk8s list --query "[].{name:name,resourceGroup:resourceGroup}" -o tsv
156+
;;
157+
*)
158+
echo "[Error] Invalid cluster type: $cluster_type (must be aks or arc)"
159+
echo ""
160+
;;
161+
esac
162+
}
163+
164+
# Function to perform migration
165+
perform_migration() {
166+
local cluster_type=$1
167+
local resource_group=$2
168+
local name=$3
169+
local workspace_id=$4
170+
local max_attempts=3
171+
local attempt=1
172+
173+
# 1. Disable monitoring with retries
174+
echo "[$(date)] Disabling monitoring for $name"
175+
while [ $attempt -le $max_attempts ]; do
176+
if [ "$cluster_type" = "aks" ]; then
177+
az aks disable-addons -a monitoring -g "$resource_group" -n "$name"
178+
179+
# Wait for 3 minutes
180+
echo "[$(date)] Waiting 3 minutes for disable operation to complete (attempt $attempt/$max_attempts)..."
181+
sleep 180
182+
183+
# Check if monitoring is actually disabled
184+
local monitoring_state=$(az aks show -g "$resource_group" -n "$name" --query "addonProfiles.omsagent.enabled || addonProfiles.omsAgent.enabled" -o tsv | tr '[:upper:]' '[:lower:]')
185+
if [ "$monitoring_state" != "true" ]; then
186+
echo "[$(date)] Successfully disabled monitoring"
187+
break
188+
else
189+
echo "[$(date)] Monitoring is still enabled after attempt $attempt"
190+
if [ $attempt -eq $max_attempts ]; then
191+
echo "[Error] Failed to disable monitoring after $max_attempts attempts"
192+
return 1
193+
fi
194+
attempt=$((attempt + 1))
195+
fi
196+
else
197+
az k8s-extension delete --name azuremonitor-containers -g "$resource_group" -c "$name" --cluster-type connectedClusters --yes && break
198+
199+
if [ $attempt -eq $max_attempts ]; then
200+
echo "[Error] Could not disable monitoring after $max_attempts attempts"
201+
return 1
202+
fi
203+
attempt=$((attempt + 1))
204+
echo "[$(date)] Retrying disable operation (attempt $attempt/$max_attempts)..."
205+
sleep 180
206+
fi
207+
done
208+
209+
# 2. Re-enable monitoring with MSI
210+
echo "[$(date)] Re-enabling monitoring with MSI for $name"
211+
if [ "$cluster_type" = "aks" ]; then
212+
# Check if private cluster
213+
local is_private=$(az aks show -g "$resource_group" -n "$name" \
214+
--query "apiServerAccessProfile.enablePrivateCluster" -o tsv)
215+
216+
if [ "$is_private" = "true" ]; then
217+
echo "[$(date)] Private cluster detected, preserving AMPLS configuration"
218+
# For private clusters, get AMPLS ID
219+
local ampls_id=$(get_ampls_id "$workspace_id")
220+
if [ -n "$ampls_id" ]; then
221+
echo "[$(date)] Using AMPLS: $ampls_id"
222+
az aks enable-addons -a monitoring -g "$resource_group" -n "$name" \
223+
--workspace-resource-id "$workspace_id" \
224+
--ampls-resource-id "$ampls_id" || {
225+
echo "[Error] Could not enable monitoring with MSI and AMPLS"
226+
return 1
227+
}
228+
else
229+
echo "[Error] Could not get AMPLS ID for private cluster"
230+
return 1
231+
fi
232+
else
233+
echo "[$(date)] Non-private cluster, proceeding without AMPLS"
234+
# For non-private clusters, proceed without AMPLS
235+
az aks enable-addons -a monitoring -g "$resource_group" -n "$name" \
236+
--workspace-resource-id "$workspace_id" || {
237+
echo "[Error] Could not enable monitoring with MSI"
238+
return 1
239+
}
240+
fi
241+
242+
# Verify MSI auth is enabled
243+
local auth_mode=$(az aks show -g "$resource_group" -n "$name" --query "addonProfiles.omsagent.config.useAADAuth || addonProfiles.omsAgent.config.useAADAuth || addonProfiles.omsagent.config.useaadauth || addonProfiles.omsAgent.config.useaadauth || ''" -o tsv | tr '[:upper:]' '[:lower:]')
244+
[ "$auth_mode" = "true" ] || {
245+
echo "[Error] MSI authentication not enabled after configuration"
246+
return 1
247+
}
248+
else
249+
az k8s-extension create --name azuremonitor-containers \
250+
-g "$resource_group" -c "$name" \
251+
--cluster-type connectedClusters \
252+
--extension-type Microsoft.AzureMonitor.Containers \
253+
--configuration-settings logAnalyticsWorkspaceResourceID="$workspace_id" \
254+
--configuration-settings useManagedIdentityForAuth="true" || {
255+
echo "[Error] Could not configure monitoring with MSI"
256+
return 1
257+
}
258+
fi
259+
260+
return 0
261+
}
262+
263+
# Parse command line arguments
264+
while getopts "s:c:h" opt; do
265+
case $opt in
266+
s) subscription_ids="$OPTARG" ;;
267+
c) cluster_types="$OPTARG" ;;
268+
h) usage ;;
269+
?) usage ;;
270+
esac
271+
done
272+
273+
# Validate required parameters
274+
if [ -z "$subscription_ids" ] || [ -z "$cluster_types" ]; then
275+
echo "[Error] Missing required parameters"
276+
usage
277+
fi
278+
279+
# Arrays to track cluster status
280+
successful_clusters=()
281+
skipped_clusters=()
282+
failed_clusters=()
283+
284+
echo "=== STEP 1: Prerequisites Check ==="
285+
# Check Azure CLI installation
286+
command -v az > /dev/null || {
287+
echo "[Error] Azure CLI not found. Please install Azure CLI"
288+
exit 1
289+
}
290+
291+
# Check Azure CLI version
292+
az_version=$(az version --query \"azure-cli\" -o tsv)
293+
if [ "$(printf '%s\n' "2.49.0" "$az_version" | sort -V | head -n1)" != "2.49.0" ]; then
294+
echo "[Error] Azure CLI version must be 2.49.0 or higher (current version: $az_version)"
295+
exit 1
296+
fi
297+
298+
# Check login status and verify subscriptions
299+
az account show > /dev/null || {
300+
echo "[Error] Azure login required. Run 'az login'"
301+
exit 1
302+
}
303+
304+
# Verify all subscriptions exist and are accessible
305+
IFS=',' read -ra SUBS <<< "$subscription_ids"
306+
available_subs=$(az account list --query "[].id" -o tsv)
307+
308+
for sub in "${SUBS[@]}"; do
309+
sub=$(echo "$sub" | xargs)
310+
echo "$available_subs" | grep -q "^$sub$" || {
311+
echo "[Error] Cannot access subscription: $sub"
312+
exit 1
313+
}
314+
done
315+
316+
echo "=== STEP 2: Cluster Discovery ==="
317+
# Process each subscription
318+
IFS=',' read -ra SUBS <<< "$subscription_ids"
319+
for subscription_id in "${SUBS[@]}"; do
320+
subscription_id=$(echo "$subscription_id" | xargs)
321+
echo "[$(date)] Processing subscription: $subscription_id"
322+
323+
# Set subscription
324+
az account set -s "$subscription_id" || {
325+
echo "[Error] Cannot access subscription: $subscription_id"
326+
continue
327+
}
328+
329+
# Process each cluster type
330+
IFS=',' read -ra TYPES <<< "$cluster_types"
331+
for cluster_type in "${TYPES[@]}"; do
332+
cluster_type=$(echo "$cluster_type" | xargs | tr '[:upper:]' '[:lower:]')
333+
334+
# Discover clusters
335+
clusters=$(discover_clusters "$subscription_id" "$cluster_type")
336+
337+
echo "=== STEP 3: Migration Process ==="
338+
# Process each discovered cluster
339+
if [ -n "$clusters" ]; then
340+
while IFS=$'\t' read -r name resource_group; do
341+
[ -z "$name" ] && continue
342+
343+
echo "[$(date)] Processing $cluster_type cluster: $name"
344+
345+
# Check prerequisites
346+
prereq_result=$(check_prerequisites "$cluster_type" "$resource_group" "$name" 2>&1)
347+
if [ $? -ne 0 ]; then
348+
skipped_clusters+=("$name - Prerequisites failed: $prereq_result")
349+
continue
350+
fi
351+
352+
# Get workspace ID
353+
echo "[$(date)] Getting workspace ID for $name"
354+
if [ "$cluster_type" = "aks" ]; then
355+
workspace_id=$(az aks show -g "$resource_group" -n "$name" --query "addonProfiles.omsagent.config.logAnalyticsWorkspaceResourceID || addonProfiles.omsAgent.config.logAnalyticsWorkspaceResourceID || addonProfiles.omsagent.config.loganalyticsworkspaceresourceid || addonProfiles.omsAgent.config.loganalyticsworkspaceresourceid" -o tsv)
356+
else
357+
workspace_id=$(az k8s-extension show --name azuremonitor-containers --resource-group "$resource_group" --cluster-name "$name" --cluster-type connectedClusters --query "configurationSettings.logAnalyticsWorkspaceResourceID" -o tsv)
358+
fi
359+
360+
if [ -z "$workspace_id" ] || [ "$workspace_id" = "null" ]; then
361+
echo "[Skip] No workspace ID found for $name"
362+
skipped_clusters+=("$name - No workspace ID configured")
363+
continue
364+
fi
365+
366+
# Perform migration
367+
if perform_migration "$cluster_type" "$resource_group" "$name" "$workspace_id"; then
368+
echo "[$(date)] Successfully migrated $name"
369+
successful_clusters+=("$name")
370+
else
371+
echo "[$(date)] Failed to migrate $name"
372+
failed_clusters+=("$name")
373+
fi
374+
done <<< "$clusters"
375+
fi
376+
done
377+
done
378+
379+
# Print summary
380+
echo -e "\n[$(date)] Migration Summary:"
381+
382+
echo -e "\nSuccessful clusters (${#successful_clusters[@]}):"
383+
for cluster in "${successful_clusters[@]}"; do
384+
echo "$cluster"
385+
done
386+
387+
echo -e "\nSkipped clusters (${#skipped_clusters[@]}):"
388+
for cluster in "${skipped_clusters[@]}"; do
389+
echo "$cluster"
390+
done
391+
392+
echo -e "\nFailed clusters (${#failed_clusters[@]}):"
393+
for cluster in "${failed_clusters[@]}"; do
394+
echo "$cluster"
395+
done

0 commit comments

Comments
 (0)