Skip to content

Commit d7b012b

Browse files
authored
Update AgentLogCollection.sh (#935)
-Resolved issue where script wasn't recognizing daemonset and replicaset pods, thus skipping log collection. -Added some additional logging lines to provide visibility of where logs are copied from. -Added section to other_logCollection to collect both container-azm-ms-aks-k8scluster as well as ama-logs-rs-config configmap.
1 parent a02cacd commit d7b012b

File tree

1 file changed

+56
-27
lines changed

1 file changed

+56
-27
lines changed

scripts/troubleshoot/LogCollection/AgentLogCollection.sh

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,35 @@
11
#!/bin/bash
2-
#
3-
# Copyright (c) Microsoft Corporation.
4-
#
5-
# This script will collect all logs from the replicaset agent pod and a random daemonset pod, also collect onboard logs with processes
6-
#
7-
# Author Nina Li
2+
#Copyright (c) Microsoft Corporation.
3+
# Description: This script will collect all logs from the Replicaset Agent Pod and Daemonset Agent Pod.
4+
# Also collects onboarding logs with processes.
5+
# Original Author: Nina Li
6+
# Modified By: Brandon DeGolier
87

98
Red='\033[0;31m'
109
Cyan='\033[0;36m'
11-
NC='\033[0m' # No Color
10+
NC='\033[0m'
1211

1312
init()
1413
{
1514
echo -e "Preparing for log collection..." | tee -a Tool.log
1615

1716
if ! cmd="$(type -p kubectl)" || [[ -z $cmd ]]; then
18-
echo -e "${Red}Command kubectl not found, please install it firstly, exit...${NC}"
17+
echo -e "${Red}Command kubectl not found, please install to continue, exiting...${NC}"
1918
cd ..
2019
rm -rf $output_path
2120
exit
2221
fi
2322

2423
if ! cmd="$(type -p tar)" || [[ -z $cmd ]]; then
25-
echo -e "${Red}Command tar not found, please install it firstly, exit...${NC}"
24+
echo -e "${Red}Command tar not found, please install to continue, exiting...${NC}"
2625
cd ..
2726
rm -rf $output_path
2827
exit
2928
fi
3029

3130
cmd=`kubectl get nodes 2>&1`
3231
if [[ $cmd == *"refused"* ]];then
33-
echo -e "${Red}Fail to connect your AKS, please fisrlty connect to cluster by command: az aks get-credentials --resource-group myResourceGroup --name myAKSCluster${NC}"
32+
echo -e "${Red}Failed to connect your AKS, please connect to your cluster by command: az aks get-credentials --resource-group myResourceGroup --name myAKSCluster, exiting...${NC}"
3433
cd ..
3534
rm -rf $output_path
3635
exit
@@ -41,19 +40,19 @@ init()
4140
do
4241
if [ `echo $node | tr -s '[:upper:]' '[:lower:]'` != "ready" ]; then
4342
kubectl get nodes
44-
echo -e "${Red} One or more AKS node is not ready, please start this node firstly for log collection, exit...${NC}"
43+
echo -e "${Red}One or more AKS node is not ready, please start this node for log collection, exiting...${NC}"
4544
cd ..
4645
rm -rf $output_path
4746
exit
4847
fi
4948
done
50-
echo -e "Prerequistes check is done, all good" | tee -a Tool.log
49+
echo -e "Prerequisites check complete!" | tee -a Tool.log
5150

52-
echo -e "Saving cluster information" | tee -a Tool.log
51+
echo -e "Saving cluster information..." | tee -a Tool.log
5352

5453
cmd=`kubectl cluster-info 2>&1`
5554
if [[ $cmd == *"refused"* ]];then
56-
echo -e "${Red}Fail to get cluster info, please check your AKS status fistly, exit...${NC}"
55+
echo -e "${Red}Failed to get cluster info, please check your AKS status, exiting...${NC}"
5756
cd ..
5857
rm -rf $output_path
5958
exit
@@ -76,6 +75,10 @@ ds_logCollection()
7675
if [[ $cmd == *"cannot access"* ]];then
7776
echo -e "${Red}/var/opt/microsoft not exist on ${ds_pod}${NC}" | tee -a Tool.log
7877
else
78+
echo -e "Collecting the following logs from ${ds_pod}:"
79+
echo -e "/var/opt/microsoft/docker-cimprov/log | Containers "ama-logs, ama-logs-prometheus""
80+
echo -e "/var/opt/microsoft/linuxmonagent/log | Containers "ama-logs, ama-logs-prometheus""
81+
echo -e "/etc/mdsd.d/config-cache/configchunks/ | Data Collection Rule Config"
7982
kubectl cp ${ds_pod}:/var/opt/microsoft/docker-cimprov/log ama-logs-daemonset --namespace=kube-system --container ama-logs > /dev/null
8083
kubectl cp ${ds_pod}:/var/opt/microsoft/docker-cimprov/log ama-logs-prom-daemonset --namespace=kube-system --container ama-logs-prometheus > /dev/null
8184
kubectl cp ${ds_pod}:/var/opt/microsoft/linuxmonagent/log ama-logs-daemonset-mdsd --namespace=kube-system --container ama-logs > /dev/null
@@ -89,6 +92,8 @@ ds_logCollection()
8992
if [[ $cmd == *"cannot access"* ]];then
9093
echo -e "${Red}/etc/fluent not exist on ${ds_pod}${NC}" | tee -a Tool.log
9194
else
95+
echo -e "Collecting the following logs from ${ds_pod}:"
96+
echo -e "/etc/fluent/container.conf | Containers "ama-logs, ama-logs-prometheus""
9297
kubectl cp ${ds_pod}:/etc/fluent/container.conf ama-logs-daemonset/container_${ds_pod}.conf --namespace=kube-system --container ama-logs > /dev/null
9398
kubectl cp ${ds_pod}:/etc/fluent/container.conf ama-logs-prom-daemonset/container_${ds_pod}_prom.conf --namespace=kube-system --container ama-logs-prometheus > /dev/null
9499
fi
@@ -97,6 +102,9 @@ ds_logCollection()
97102
if [[ $cmd == *"cannot access"* ]];then
98103
echo -e "${Red}/etc/opt/microsoft/docker-cimprov not exist on ${ds_pod}${NC}" | tee -a Tool.log
99104
else
105+
echo -e "Collecting the following logs from ${ds_pod}:"
106+
echo -e "/etc/opt/microsoft/docker-cimprov/fluent-bit.conf | Containers "ama-logs, ama-logs-prometheus""
107+
echo -e "/etc/opt/microsoft/docker-cimprov/telegraf.conf | Containers "ama-logs, ama-logs-prometheus""
100108
kubectl cp ${ds_pod}:/etc/opt/microsoft/docker-cimprov/fluent-bit.conf ama-logs-daemonset/fluent-bit.conf --namespace=kube-system --container ama-logs > /dev/null
101109
kubectl cp ${ds_pod}:/etc/opt/microsoft/docker-cimprov/telegraf.conf ama-logs-daemonset/telegraf.conf --namespace=kube-system --container ama-logs > /dev/null
102110
kubectl cp ${ds_pod}:/etc/opt/microsoft/docker-cimprov/telegraf.conf ama-logs-prom-daemonset/telegraf.conf --namespace=kube-system --container ama-logs-prometheus > /dev/null
@@ -107,7 +115,7 @@ ds_logCollection()
107115

108116
win_logCollection()
109117
{
110-
echo -e "Collecting logs from ${ds_win_pod}, windows pod will take several minutes for log collection, please dont exit forcely..." | tee -a Tool.log
118+
echo -e "Collecting logs from ${ds_win_pod}, Windows pod will take several minutes for log collection, please don't exit..." | tee -a Tool.log
111119
kubectl describe pod ${ds_win_pod} --namespace=kube-system > describe_${ds_win_pod}.txt
112120
kubectl logs ${ds_win_pod} --container ama-logs-windows --namespace=kube-system > logs_${ds_win_pod}.txt
113121
kubectl exec ${ds_win_pod} -n kube-system --request-timeout=10m -- powershell Get-Process > process_${ds_win_pod}.txt
@@ -116,6 +124,9 @@ win_logCollection()
116124
if [[ $cmd == *"cannot access"* ]];then
117125
echo -e "${Red}/etc/ not exist on ${ds_pod}${NC}" | tee -a Tool.log
118126
else
127+
echo -e "Collecting the following logs from ${ds_win_pod}:"
128+
echo -e "/etc/fluent-bit"
129+
echo -e "/etc/telegraf/telegraf.conf"
119130
kubectl cp ${ds_win_pod}:/etc/fluent-bit ama-logs-windows-daemonset-fbit --namespace=kube-system > /dev/null
120131
kubectl cp ${ds_win_pod}:/etc/telegraf/telegraf.conf ama-logs-windows-daemonset-fbit/telegraf.conf --namespace=kube-system > /dev/null
121132

@@ -146,6 +157,9 @@ rs_logCollection()
146157
if [[ $cmd == *"cannot access"* ]];then
147158
echo -e "${Red}/var/opt/microsoft not exist on ${rs_pod}${NC}" | tee -a Tool.log
148159
else
160+
echo -e "Collecting the following logs from ${rs_pod}:"
161+
echo -e "/var/opt/microsoft/docker-cimprov/log"
162+
echo -e "/var/opt/microsoft/linuxmonagent/log"
149163
kubectl cp ${rs_pod}:/var/opt/microsoft/docker-cimprov/log ama-logs-replicaset --namespace=kube-system > /dev/null
150164
kubectl cp ${rs_pod}:/var/opt/microsoft/linuxmonagent/log ama-logs-replicaset-mdsd --namespace=kube-system > /dev/null
151165
fi
@@ -154,13 +168,18 @@ rs_logCollection()
154168
if [[ $cmd == *"cannot access"* ]];then
155169
echo -e "${Red}/etc/fluent not exist on ${rs_pod}${NC}" | tee -a Tool.log
156170
else
171+
echo -e "Collecting the following logs from ${rs_pod}:"
172+
echo -e "/etc/fluent/kube.conf"
157173
kubectl cp ${rs_pod}:/etc/fluent/kube.conf ama-logs-replicaset/kube_${rs_pod}.conf --namespace=kube-system --container ama-logs > /dev/null
158174
fi
159175

160176
cmd=`kubectl exec ${rs_pod} -n kube-system -- ls /etc/opt/microsoft/docker-cimprov 2>&1`
161177
if [[ $cmd == *"cannot access"* ]];then
162178
echo -e "${Red}/etc/opt/microsoft/docker-cimprov not exist on ${rs_pod}${NC}" | tee -a Tool.log
163179
else
180+
echo -e "Collecting the following logs from ${rs_pod}:"
181+
echo -e "/etc/opt/microsoft/docker-cimprov/fluent-bit-rs.conf"
182+
echo -e "/etc/opt/microsoft/docker-cimprov/telegraf-rs.conf"
164183
kubectl cp ${rs_pod}:/etc/opt/microsoft/docker-cimprov/fluent-bit-rs.conf ama-logs-replicaset/fluent-bit.conf --namespace=kube-system --container ama-logs > /dev/null
165184
kubectl cp ${rs_pod}:/etc/opt/microsoft/docker-cimprov/telegraf-rs.conf ama-logs-replicaset/telegraf-rs.conf --namespace=kube-system --container ama-logs > /dev/null
166185
fi
@@ -169,55 +188,65 @@ rs_logCollection()
169188

170189
other_logCollection()
171190
{
172-
echo -e "Collecting onboard logs..."
191+
echo -e "Collecting onboarding logs..."
173192
export deploy=$(kubectl get deployment --namespace=kube-system | grep -E ama-logs | head -n 1 | awk '{print $1}')
174193
if [ -z "$deploy" ];then
175-
echo -e "${Red}there is not ama-logs deployment, skipping log collection of deployment${NC}" | tee -a Tool.log
194+
echo -e "${Red}There is no ama-logs deployment present, skipping log collection of deployment${NC}" | tee -a Tool.log
176195
else
196+
echo -e "Collecting deployment info..."
177197
kubectl get deployment $deploy --namespace=kube-system -o yaml > deployment_${deploy}.txt
178198
fi
179199

180-
export config=$(kubectl get configmaps --namespace=kube-system | grep -E container-azm-ms-agentconfig | head -n 1 | awk '{print $1}')
200+
export config=$(kubectl get configmaps --namespace=kube-system | grep -E container-azm-ms-aks-k8scluster | head -n 1 | awk '{print $1}')
181201
if [ -z "$config" ];then
182-
echo -e "${Red}configMap named container-azm-ms-agentconfig is not found, if you created configMap for ama-logs, please manually save your custom configMap of ama-logs by command: kubectl get configmaps <configMap name> --namespace=kube-system -o yaml > configMap.yaml${NC}" | tee -a Tool.log
202+
echo -e "${Red}configMap named container-azm-ms-aks-k8scluster is not found, if you created configMap for ama-logs, please manually save your custom configMap of ama-logs by command: kubectl get configmaps <configMap name> --namespace=kube-system -o yaml > configMap.yaml${NC}" | tee -a Tool.log
183203
else
204+
echo -e "Collecting container-azm-ms-aks-k8scluster configmap..."
184205
kubectl get configmaps $config --namespace=kube-system -o yaml > ${config}.yaml
185206
fi
186207

208+
export config2=$(kubectl get configmaps --namespace=kube-system | grep -E ama-logs-rs-config | head -n 1 | awk '{print $1}')
209+
if [ -z "$config2" ];then
210+
echo -e "${Red}configMap named ama-logs-rs-config is not found, if you created configMap for ama-logs, please manually save your custom configMap of ama-logs by command: kubectl get configmaps <configMap name> --namespace=kube-system -o yaml > configMap.yaml${NC}" | tee -a Tool.log
211+
else
212+
echo -e "Collecting ama-logs-rs-config configmap..."
213+
kubectl get configmaps $config2 --namespace=kube-system -o yaml > ${config2}.yaml
214+
fi
215+
187216
kubectl get nodes > node.txt
188217
# contains info regarding node image version, images present on disk, etc
189218
# TODO: add syslog doc link
190219
echo -e "If syslog collection is enabled please make sure that the node pool image is Nov 2022 or later.\
191220
To check current version and upgrade: https://learn.microsoft.com/en-us/azure/aks/node-image-upgrade"
192221
kubectl get nodes -o json > node-detailed.json
193222

194-
echo -e "Complete onboard log collection!" | tee -a Tool.log
223+
echo -e "Complete onboarding log collection!" | tee -a Tool.log
195224
}
196225

197-
#main
226+
#Main
198227
output_path="AKSInsights-logs.$(date +%s).`hostname`"
199228
mkdir -p $output_path
200229
cd $output_path
201230

202231
init
203232

204233
export ds_pod=$(kubectl get pods -n kube-system -o custom-columns=NAME:.metadata.name | grep -E ama-logs-[a-z0-9]{5} | head -n 1)
205-
if [ -z "$ds_pod" ];then
206-
echo -e "${Red}daemonset pod do not exist, skipping log collection for daemonset pod${NC}" | tee -a Tool.log
234+
if [[ -z "$ds_pod" ]];then
235+
echo -e "${Red}Daemonset pod does not exist, skipping log collection for daemonset pod${NC}" | tee -a Tool.log
207236
else
208237
ds_logCollection
209238
fi
210239

211240
export ds_win_pod=$(kubectl get pods -n kube-system -o custom-columns=NAME:.metadata.name | grep -E ama-logs-windows-[a-z0-9]{5} | head -n 1)
212-
if [ -z "$ds_win_pod" ];then
213-
echo -e "${Cyan} windows agent pod do not exist, skipping log collection for windows agent pod ${NC}" | tee -a Tool.log
241+
if [[ -z "$ds_win_pod" ]];then
242+
echo -e "${Cyan}Windows agent pod does not exist, skipping log collection for windows agent pod ${NC}" | tee -a Tool.log
214243
else
215244
win_logCollection
216245
fi
217246

218247
export rs_pod=$(kubectl get pods -n kube-system -o custom-columns=NAME:.metadata.name | grep -E ama-logs-rs-[a-z0-9]{5} | head -n 1)
219-
if [ -z "$rs_pod" ];then
220-
echo -e "${Red}replicaset pod do not exist, skipping log collection for replicaset pod ${NC}" | tee -a Tool.log
248+
if [[ -z "$rs_pod" ]];then
249+
echo -e "${Red}replicaset pod does not exist, skipping log collection for replicaset pod ${NC}" | tee -a Tool.log
221250
else
222251
rs_logCollection
223252
fi

0 commit comments

Comments
 (0)