Skip to content

Commit 2432180

Browse files
ksubrmnnchewong
authored andcommitted
feat: create debugging script
1 parent 9fc12dc commit 2432180

File tree

1 file changed

+98
-0
lines changed

1 file changed

+98
-0
lines changed

hack/collect-logs.sh

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#!/usr/bin/env bash
2+
3+
# Copyright 2020 The Kubernetes Authors.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
set -o errexit
18+
set -o nounset
19+
set -o pipefail
20+
21+
CLUSTER_NAME=${CLUSTER_NAME:-"test-$(date +%s)"}
22+
ARTIFACTS="${ARTIFACTS:-${PWD}/_artifacts}"
23+
24+
# dump logs from kind and all the nodes
25+
dump-logs() {
26+
# log version information
27+
echo "=== versions ==="
28+
echo "kind : $(kind version)" || true
29+
echo "kubectl: "
30+
kubectl --kubeconfig=$CLUSTER_NAME.kubeconfig version || true
31+
echo ""
32+
33+
# dump all the info from the CAPI related CRDs
34+
mkdir -p $ARTIFACTS/logs
35+
kubectl get \
36+
clusters,azureclusters,machines,azuremachines,kubeadmconfigs,machinedeployments,azuremachinetemplates,kubeadmconfigtemplates,machinesets,kubeadmcontrolplanes \
37+
--all-namespaces -o yaml >> "${ARTIFACTS}/logs/capz.info" || true
38+
39+
# dump images info
40+
{
41+
echo "images in docker"
42+
docker images
43+
echo "images from bootstrap using containerd CLI"
44+
docker exec kind-control-plane ctr -n k8s.io images list
45+
echo "images in bootstrap cluster using kubectl CLI"
46+
(kubectl get pods --all-namespaces -o json \
47+
| jq --raw-output '.items[].spec.containers[].image' | sort)
48+
echo "images in deployed cluster using kubectl CLI"
49+
(kubectl --kubeconfig=$CLUSTER_NAME.kubeconfig get pods --all-namespaces -o json \
50+
| jq --raw-output '.items[].spec.containers[].image' | sort)
51+
} >> "${ARTIFACTS}/logs/images.info"
52+
53+
# dump cluster info for kind
54+
{
55+
echo "kind cluster-info"
56+
kubectl cluster-info dump
57+
} >> "${ARTIFACTS}/logs/kind-cluster.info"
58+
59+
# dump cluster info for capz
60+
{
61+
echo "=== VMs in ${AZURE_RESOURCE_GROUP} ==="
62+
az vm list --resource-group "${AZURE_RESOURCE_GROUP}"
63+
echo "=== cluster-info dump ==="
64+
kubectl --kubeconfig=$CLUSTER_NAME.kubeconfig cluster-info dump
65+
} >> "${ARTIFACTS}/logs/capz-cluster.info"
66+
67+
# export all logs from kind
68+
kind "export" logs --name="kind" "${ARTIFACTS}/logs"
69+
70+
nodes=$(az vm list --resource-group ${AZURE_RESOURCE_GROUP} --query "[?tags.\"sigs.k8s.io_cluster-api-provider-azure_cluster_capi-quickstart\" == 'owned'].name" -o tsv)
71+
declare -a nodeList=( $( echo $nodes | cut -d' ' -f1- ) )
72+
# We used to pipe this output to 'tail -n +2' but for some reason this was sometimes (all the time?) only finding the
73+
# bastion host. For now, omit the tail and gather logs for all VMs that have a private IP address. This will include
74+
# the bastion, but that's better than not getting logs from all the VMs.
75+
for node in "${nodeList[@]}"
76+
do
77+
echo "collecting logs from ${node}"
78+
dir="${ARTIFACTS}/logs/${node}"
79+
mkdir -p "${dir}"
80+
ssh-to-node "${node}" "sudo journalctl --output=short-precise -k" "${dir}/kern.log"
81+
ssh-to-node "${node}" "sudo journalctl --output=short-precise" "${dir}/systemd.log"
82+
ssh-to-node "${node}" "sudo crictl version && sudo crictl info" "${dir}/containerd.info"
83+
ssh-to-node "${node}" "sudo journalctl --no-pager -u cloud-final" "${dir}/cloud-final.log"
84+
ssh-to-node "${node}" "sudo journalctl --no-pager -u kubelet.service" "${dir}/kubelet.log"
85+
ssh-to-node "${node}" "sudo journalctl --no-pager -u containerd.service" "${dir}/containerd.log"
86+
done
87+
}
88+
89+
# SSH to a node by instance-id ($1) and run a command ($2).
90+
function ssh-to-node() {
91+
local node="$1"
92+
local cmd="$2"
93+
local logfile="$3"
94+
95+
output=$(az vm run-command invoke -g ${AZURE_RESOURCE_GROUP} -n ${node} --command-id RunShellScript --scripts ${cmd})
96+
message=$(echo -E $output | jq '.value[0].message')
97+
echo -e $message >> ${logfile}
98+
}

0 commit comments

Comments
 (0)