Skip to content

Commit bb2e24b

Browse files
committed
Use DaemonSet as a proxy to collect logs from CI
1 parent 2432180 commit bb2e24b

File tree

8 files changed

+259
-107
lines changed

8 files changed

+259
-107
lines changed

hack/collect-logs.sh

Lines changed: 0 additions & 98 deletions
This file was deleted.

hack/log/log-dump-daemonset.yaml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
apiVersion: apps/v1
2+
kind: DaemonSet
3+
metadata:
4+
name: log-dump-node
5+
spec:
6+
selector:
7+
matchLabels:
8+
app: log-dump-node
9+
template:
10+
metadata:
11+
labels:
12+
app: log-dump-node
13+
spec:
14+
containers:
15+
- name: log-dump-node
16+
image: fedora # A base image that has 'journalctl' binary
17+
args:
18+
- sleep
19+
- "3600"
20+
volumeMounts:
21+
- name: varlog
22+
mountPath: /var/log
23+
- name: runlog
24+
mountPath: /run/log
25+
nodeSelector:
26+
kubernetes.io/os: linux
27+
tolerations:
28+
- effect: NoSchedule
29+
key: node-role.kubernetes.io/master
30+
operator: Equal
31+
value: "true"
32+
- effect: NoExecute
33+
operator: Exists
34+
- effect: NoSchedule
35+
operator: Exists
36+
- key: CriticalAddonsOnly
37+
operator: Exists
38+
volumes:
39+
- name: varlog
40+
hostPath:
41+
path: /var/log
42+
- name: runlog
43+
hostPath:
44+
path: /run/log

hack/log/log-dump.sh

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
#!/usr/bin/env bash
2+
3+
# Copyright 2020 The Kubernetes Authors.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
set -o errexit
18+
set -o nounset
19+
set -o pipefail
20+
21+
REPO_ROOT=$(dirname "${BASH_SOURCE[0]}")/../..
22+
cd "${REPO_ROOT}" || exit 1
23+
24+
# shellcheck source=../hack/ensure-kind.sh
25+
source "${REPO_ROOT}/hack/ensure-kind.sh"
26+
# shellcheck source=../hack/ensure-kubectl.sh
27+
source "${REPO_ROOT}/hack/ensure-kubectl.sh"
28+
29+
export ARTIFACTS="${ARTIFACTS:-${PWD}/_artifacts}"
30+
mkdir -p "${ARTIFACTS}/management-cluster" "${ARTIFACTS}/workload-cluster"
31+
32+
export KUBECONFIG="${KUBECONFIG:-${PWD}/kubeconfig}"
33+
34+
get_node_name() {
35+
local -r pod_name="${1}"
36+
echo "$(kubectl get pod "${pod_name}" -ojsonpath={.spec.nodeName})"
37+
}
38+
39+
dump_mgmt_cluster_logs() {
40+
# Assume the first kind cluster is the management cluster
41+
local -r mgmt_cluster_name="$(kind get clusters | head -n 1)"
42+
if [[ -z "${mgmt_cluster_name}" ]]; then
43+
echo "No kind cluster is found"
44+
return
45+
fi
46+
47+
kind get kubeconfig --name "${mgmt_cluster_name}" > "${PWD}/kind.kubeconfig"
48+
local -r kubectl_kind="kubectl --kubeconfig=${PWD}/kind.kubeconfig"
49+
50+
local -r resources=(
51+
"clusters"
52+
"azureclusters"
53+
"machines"
54+
"azuremachines"
55+
"kubeadmconfigs"
56+
"machinedeployments"
57+
"azuremachinetemplates"
58+
"kubeadmconfigtemplates"
59+
"machinesets"
60+
"kubeadmcontrolplanes"
61+
"machinepools"
62+
"azuremachinepools"
63+
)
64+
mkdir -p "${ARTIFACTS}/management-cluster/resources"
65+
for resource in "${resources[@]}"; do
66+
${kubectl_kind} get --all-namespaces "${resource}" -oyaml > "${ARTIFACTS}/management-cluster/resources/${resource}.log" || true
67+
done
68+
69+
{
70+
echo "images in docker"
71+
docker images
72+
echo "images in bootstrap cluster using kubectl CLI"
73+
(${kubectl_kind} get pods --all-namespaces -ojson \
74+
| jq --raw-output '.items[].spec.containers[].image' | sort)
75+
echo "images in deployed cluster using kubectl CLI"
76+
(${kubectl_kind} get pods --all-namespaces -ojson \
77+
| jq --raw-output '.items[].spec.containers[].image' | sort)
78+
} > "${ARTIFACTS}/management-cluster/images.info"
79+
80+
{
81+
echo "kind cluster-info"
82+
${kubectl_kind} cluster-info dump
83+
} > "${ARTIFACTS}/management-cluster/kind-cluster.info"
84+
85+
kind export logs --name="${mgmt_cluster_name}" "${ARTIFACTS}/management-cluster"
86+
}
87+
88+
dump_workload_cluster_logs() {
89+
echo "Deploying log-dump-daemonset"
90+
kubectl apply -f "${REPO_ROOT}/hack/log/log-dump-daemonset.yaml"
91+
kubectl wait pod -l app=log-dump-node --for=condition=Ready --timeout=5m
92+
93+
local -r log_dump_pods=( $(kubectl get pod -l app=log-dump-node -ojsonpath={.items[*].metadata.name}) )
94+
local log_dump_commands=(
95+
"journalctl --output=short-precise -u kubelet > kubelet.log"
96+
"journalctl --output=short-precise -u containerd > containerd.log"
97+
"journalctl --output=short-precise -k > kern.log"
98+
"journalctl --output=short-precise > journal.log"
99+
"cat /var/log/cloud-init.log > cloud-init.log"
100+
"cat /var/log/cloud-init-output.log > cloud-init-output.log"
101+
)
102+
103+
if [[ "$(uname)" == "Darwin" ]]; then
104+
# tar on Mac OS does not support --wildcards flag
105+
log_dump_commands+=( "tar -cf - var/log/pods --ignore-failed-read | tar xf - --strip-components=2 -C . '*kube-system*'" )
106+
else
107+
log_dump_commands+=( "tar -cf - var/log/pods --ignore-failed-read | tar xf - --strip-components=2 -C . --wildcards '*kube-system*'" )
108+
fi
109+
110+
for log_dump_pod in "${log_dump_pods[@]}"; do
111+
local node_name="$(get_node_name "${log_dump_pod}")"
112+
113+
local log_dump_dir="${ARTIFACTS}/workload-cluster/${node_name}"
114+
mkdir -p "${log_dump_dir}"
115+
pushd "${log_dump_dir}" > /dev/null
116+
for cmd in "${log_dump_commands[@]}"; do
117+
bash -c "kubectl exec ${log_dump_pod} -- ${cmd}" &
118+
done
119+
120+
popd > /dev/null
121+
echo "Exported logs for node \"${node_name}\""
122+
done
123+
124+
# Wait for log-dumping commands running in the background to complete
125+
wait
126+
}
127+
128+
cleanup() {
129+
kubectl delete -f "${REPO_ROOT}/hack/log/log-dump-daemonset.yaml"
130+
source "${REPO_ROOT}/hack/log/redact.sh"
131+
}
132+
133+
trap cleanup EXIT
134+
135+
echo "================ DUMPING LOGS FOR MANAGEMENT CLUSTER ================"
136+
dump_mgmt_cluster_logs
137+
138+
echo "================ DUMPING LOGS FOR WORKLOAD CLUSTER ================"
139+
dump_workload_cluster_logs

hack/log/redact.sh

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#!/usr/bin/env bash
2+
3+
# Copyright 2020 The Kubernetes Authors.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
set -o errexit
18+
set -o nounset
19+
set -o pipefail
20+
21+
# Verify the required Environment Variables are present.
22+
: "${AZURE_SUBSCRIPTION_ID:?Environment variable empty or not defined.}"
23+
: "${AZURE_TENANT_ID:?Environment variable empty or not defined.}"
24+
: "${AZURE_CLIENT_ID:?Environment variable empty or not defined.}"
25+
: "${AZURE_CLIENT_SECRET:?Environment variable empty or not defined.}"
26+
27+
echo "================ REDACTING LOGS ================"
28+
29+
log_files=( $(find "${ARTIFACTS:-${PWD}/_artifacts}" -type f) )
30+
redact_vars=(
31+
"${AZURE_CLIENT_ID}"
32+
"${AZURE_CLIENT_SECRET}"
33+
"${AZURE_SUBSCRIPTION_ID}"
34+
"${AZURE_TENANT_ID}"
35+
"${AZURE_STANDARD_JSON_B64:-}"
36+
"${AZURE_VMSS_JSON_B64:-}"
37+
"$(echo -n "$AZURE_SUBSCRIPTION_ID" | base64 | tr -d '\n')"
38+
"$(echo -n "$AZURE_TENANT_ID" | base64 | tr -d '\n')"
39+
"$(echo -n "$AZURE_CLIENT_ID" | base64 | tr -d '\n')"
40+
"$(echo -n "$AZURE_CLIENT_SECRET" | base64 | tr -d '\n')"
41+
)
42+
43+
for log_file in "${log_files[@]}"; do
44+
for redact_var in "${redact_vars[@]}"; do
45+
# LC_CTYPE=C and LANG=C will prevent "illegal byte sequence" error from sed
46+
if [[ "$(uname)" == "Darwin" ]]; then
47+
# sed on Mac OS requires an empty string for -i flag
48+
LC_CTYPE=C LANG=C sed -i "" "s|${redact_var}|===REDACTED===|g" "${log_file}" || true
49+
else
50+
LC_CTYPE=C LANG=C sed -i "s|${redact_var}|===REDACTED===|g" "${log_file}" || true
51+
fi
52+
done
53+
done
54+
55+
echo "All sensitive variables are redacted"

scripts/ci-e2e.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ export AZURE_SSH_PUBLIC_KEY=$(cat "${AZURE_SSH_PUBLIC_KEY_FILE}" | base64 | tr -
7070
export TIMESTAMP="$(date -u '+%Y-%m-%dT%H:%M:%SZ')"
7171
export JOB_NAME="${JOB_NAME:-"cluster-api-provider-azure-e2e"}"
7272

73+
cleanup() {
74+
source "${REPO_ROOT}/hack/log/redact.sh"
75+
}
76+
77+
trap cleanup EXIT
78+
7379
make test-e2e
7480
test_status="${?}"
7581

scripts/ci-entrypoint.sh

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,6 @@ run_upstream_e2e_tests() {
127127
unset KUBERNETES_CONFORMANCE_TEST
128128
}
129129

130-
get_logs() {
131-
kubectl logs deploy/capz-controller-manager -n capz-system manager > "${ARTIFACTS}/logs/capz-manager.log" || true
132-
}
133-
134130
# cleanup all resources we use
135131
cleanup() {
136132
timeout 600 kubectl \
@@ -144,16 +140,15 @@ cleanup() {
144140

145141
on_exit() {
146142
unset KUBECONFIG
147-
get_logs
143+
source "${REPO_ROOT}/hack/log/log-dump.sh"
148144
# cleanup
149145
if [[ -z "${SKIP_CLEANUP:-}" ]]; then
150146
cleanup
151147
fi
152148
}
153149

154150
trap on_exit EXIT
155-
ARTIFACTS="${ARTIFACTS:-${PWD}/_artifacts}"
156-
mkdir -p "${ARTIFACTS}/logs"
151+
export ARTIFACTS="${ARTIFACTS:-${PWD}/_artifacts}"
157152

158153
# create cluster
159154
if [[ -z "${SKIP_CREATE_WORKLOAD_CLUSTER:-}" ]]; then

test/e2e/common.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,15 @@ func setupSpecNamespace(ctx context.Context, specName string, clusterProxy frame
5454
return namespace, cancelWatches
5555
}
5656

57-
func cleanup(ctx context.Context, specName string, clusterProxy framework.ClusterProxy, artifactFolder string, namespace *corev1.Namespace, cancelWatches context.CancelFunc, cluster *clusterv1.Cluster, intervalsGetter func(spec, key string) []interface{}, skipCleanup bool) {
57+
func dumpSpecResourcesAndCleanup(ctx context.Context, specName string, clusterProxy framework.ClusterProxy, artifactFolder string, namespace *corev1.Namespace, cancelWatches context.CancelFunc, cluster *clusterv1.Cluster, intervalsGetter func(spec, key string) []interface{}, skipCleanup bool) {
58+
Byf("Dumping all the Cluster API resources in the %q namespace", namespace.Name)
59+
// Dump all Cluster API related resources to artifacts before deleting them.
60+
framework.DumpAllResources(ctx, framework.DumpAllResourcesInput{
61+
Lister: clusterProxy.GetClient(),
62+
Namespace: namespace.Name,
63+
LogPath: filepath.Join(artifactFolder, "clusters", clusterProxy.GetName(), "resources"),
64+
})
65+
5866
if !skipCleanup {
5967
Byf("Deleting cluster %s/%s", cluster.Namespace, cluster.Name)
6068
// While https://github.com/kubernetes-sigs/cluster-api/issues/2955 is addressed in future iterations, there is a chance

0 commit comments

Comments
 (0)