@@ -65,7 +65,7 @@ readonly max_dump_processes=25
65
65
function setup() {
66
66
KUBE_ROOT=$( dirname " ${BASH_SOURCE[0]} " ) /../..
67
67
if [[ -z " ${use_custom_instance_list} " ]]; then
68
- : " ${KUBE_CONFIG_FILE:= " config-test.sh" } "
68
+ : ${KUBE_CONFIG_FILE:= " config-test.sh" }
69
69
echo " Sourcing kube-util.sh"
70
70
source " ${KUBE_ROOT} /cluster/kube-util.sh"
71
71
echo " Detecting project"
@@ -87,7 +87,7 @@ function setup() {
87
87
}
88
88
89
89
function log-dump-ssh() {
90
- if [[ " ${gcloud_supported_providers} " =~ ${KUBERNETES_PROVIDER} ]]; then
90
+ if [[ " ${gcloud_supported_providers} " =~ " ${KUBERNETES_PROVIDER} " ]]; then
91
91
ssh-to-node " $@ "
92
92
return
93
93
fi
@@ -104,7 +104,7 @@ function log-dump-ssh() {
104
104
function copy-logs-from-node() {
105
105
local -r node=" ${1} "
106
106
local -r dir=" ${2} "
107
- read -r -a files <<< " ${3}"
107
+ local files=( ${3} )
108
108
# Append "*"
109
109
# The * at the end is needed to also copy rotated logs (which happens
110
110
# in large clusters and long runs).
@@ -114,13 +114,12 @@ function copy-logs-from-node() {
114
114
# Comma delimit (even the singleton, or scp does the wrong thing), surround by braces.
115
115
local -r scp_files=" {$( printf " %s," " ${files[@]} " ) }"
116
116
117
- if [[ " ${gcloud_supported_providers} " =~ ${KUBERNETES_PROVIDER} ]]; then
117
+ if [[ " ${gcloud_supported_providers} " =~ " ${KUBERNETES_PROVIDER} " ]]; then
118
118
# get-serial-port-output lets you ask for ports 1-4, but currently (11/21/2016) only port 1 contains useful information
119
119
gcloud compute instances get-serial-port-output --project " ${PROJECT} " --zone " ${ZONE} " --port 1 " ${node} " > " ${dir} /serial-1.log" || true
120
120
gcloud compute scp --recurse --project " ${PROJECT} " --zone " ${ZONE} " " ${node} :${scp_files} " " ${dir} " > /dev/null || true
121
121
elif [[ " ${KUBERNETES_PROVIDER} " == " aws" ]]; then
122
- local ip
123
- ip=$( get_ssh_hostname " ${node} " )
122
+ local ip=$( get_ssh_hostname " ${node} " )
124
123
scp -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i " ${AWS_SSH_KEY} " " ${SSH_USER} @${ip} :${scp_files} " " ${dir} " > /dev/null || true
125
124
elif [[ -n " ${use_custom_instance_list} " ]]; then
126
125
scp -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i " ${LOG_DUMP_SSH_KEY} " " ${LOG_DUMP_SSH_USER} @${node} :${scp_files} " " ${dir} " > /dev/null || true
@@ -137,25 +136,25 @@ function copy-logs-from-node() {
137
136
function save-logs() {
138
137
local -r node_name=" ${1} "
139
138
local -r dir=" ${2} "
140
- local save_files =" ${3} "
139
+ local files =" ${3} "
141
140
local opt_systemd_services=" ${4:- " " } "
142
141
local on_master=" ${5:- " false" } "
143
142
144
143
if [[ -n " ${use_custom_instance_list} " ]]; then
145
144
if [[ -n " ${LOG_DUMP_SAVE_LOGS:- } " ]]; then
146
- save_files =" ${save_files } ${LOG_DUMP_SAVE_LOGS:- } "
145
+ files =" ${files } ${LOG_DUMP_SAVE_LOGS:- } "
147
146
fi
148
147
else
149
148
case " ${KUBERNETES_PROVIDER} " in
150
149
gce|gke|kubernetes-anywhere)
151
- save_files =" ${save_files } ${gce_logfiles} "
150
+ files =" ${files } ${gce_logfiles} "
152
151
;;
153
152
aws)
154
- save_files =" ${save_files } ${aws_logfiles} "
153
+ files =" ${files } ${aws_logfiles} "
155
154
;;
156
155
esac
157
156
fi
158
- read -r -a services <<< " ${systemd_services} ${opt_systemd_services} ${LOG_DUMP_SAVE_SERVICES:-}"
157
+ local -r services=( ${systemd_services} ${opt_systemd_services} ${LOG_DUMP_SAVE_SERVICES:- } )
159
158
160
159
if log-dump-ssh " ${node_name} " " command -v journalctl" & > /dev/null; then
161
160
if [[ " ${on_master} " == " true" ]]; then
@@ -175,7 +174,7 @@ function save-logs() {
175
174
log-dump-ssh " ${node_name} " " sudo journalctl --output=short-precise" > " ${dir} /systemd.log" || true
176
175
fi
177
176
else
178
- save_files =" ${kern_logfile} ${save_files } ${initd_logfiles} ${supervisord_logfiles} "
177
+ files =" ${kern_logfile} ${files } ${initd_logfiles} ${supervisord_logfiles} "
179
178
fi
180
179
181
180
# Try dumping coverage profiles, if it looks like coverage is enabled in the first place.
@@ -196,8 +195,8 @@ function save-logs() {
196
195
echo " Changing logfiles to be world-readable for download"
197
196
log-dump-ssh " ${node_name} " " sudo chmod -R a+r /var/log" || true
198
197
199
- echo " Copying '${save_files } ' from ${node_name} "
200
- copy-logs-from-node " ${node_name} " " ${dir} " " ${save_files } "
198
+ echo " Copying '${files } ' from ${node_name} "
199
+ copy-logs-from-node " ${node_name} " " ${dir} " " ${files } "
201
200
}
202
201
203
202
# Saves a copy of the Windows Docker event log to ${WINDOWS_LOGS_DIR}\docker.log
@@ -224,9 +223,8 @@ function save-windows-logs-via-diagnostics-tool() {
224
223
local node=" ${1} "
225
224
local dest_dir=" ${2} "
226
225
227
- gcloud compute instances add-metadata " ${node} " --metadata enable-diagnostics=true --project=" ${PROJECT} " --zone=" ${ZONE} "
228
- local logs_archive_in_gcs
229
- logs_archive_in_gcs=$( gcloud alpha compute diagnose export-logs " ${node} " --zone=" ${ZONE} " --project=" ${PROJECT} " | tail -n 1)
226
+ gcloud compute instances add-metadata ${node} --metadata enable-diagnostics=true --project=${PROJECT} --zone=${ZONE}
227
+ local logs_archive_in_gcs=$( gcloud alpha compute diagnose export-logs ${node} --zone=${ZONE} --project=${PROJECT} | tail -n 1)
230
228
local temp_local_path=" ${node} .zip"
231
229
for retry in {1..20}; do
232
230
if gsutil mv " ${logs_archive_in_gcs} " " ${temp_local_path} " > /dev/null 2>&1 ; then
@@ -238,8 +236,8 @@ function save-windows-logs-via-diagnostics-tool() {
238
236
done
239
237
240
238
if [[ -f " ${temp_local_path} " ]]; then
241
- unzip " ${temp_local_path} " -d " ${dest_dir} " > /dev/null
242
- rm -f " ${temp_local_path} "
239
+ unzip ${temp_local_path} -d " ${dest_dir} " > /dev/null
240
+ rm -f ${temp_local_path}
243
241
fi
244
242
}
245
243
@@ -251,14 +249,14 @@ function save-windows-logs-via-ssh() {
251
249
export-windows-docker-event-log " ${node} "
252
250
253
251
local remote_files=()
254
- for file in ${windows_node_logfiles} ; do
252
+ for file in ${windows_node_logfiles[@] } ; do
255
253
remote_files+=( " ${WINDOWS_LOGS_DIR} \\ ${file} " )
256
254
done
257
255
remote_files+=( " ${windows_node_otherfiles[@]} " )
258
256
259
257
# TODO(pjh, yujuhong): handle rotated logs and copying multiple files at the
260
258
# same time.
261
- for remote_file in " ${remote_files[@]} " ; do
259
+ for remote_file in ${remote_files[@]} ; do
262
260
# Retry up to 3 times to allow ssh keys to be properly propagated and
263
261
# stored.
264
262
for retry in {1..3}; do
@@ -280,7 +278,7 @@ function save-logs-windows() {
280
278
local -r node=" ${1} "
281
279
local -r dest_dir=" ${2} "
282
280
283
- if [[ ! " ${gcloud_supported_providers} " =~ ${KUBERNETES_PROVIDER} ]]; then
281
+ if [[ ! " ${gcloud_supported_providers} " =~ " ${KUBERNETES_PROVIDER} " ]]; then
284
282
echo " Not saving logs for ${node} , Windows log dumping requires gcloud support"
285
283
return
286
284
fi
@@ -302,14 +300,14 @@ function run-in-docker-container() {
302
300
local node_name=" $1 "
303
301
local container=" $2 "
304
302
shift 2
305
- log-dump-ssh " ${node_name} " " docker exec \"\$ (docker ps -f label=io.kubernetes.container.name=${container} --format \" {{.ID}}\" )\" $* "
303
+ log-dump-ssh " ${node_name} " " docker exec \"\$ (docker ps -f label=io.kubernetes.container.name=${container} --format \" {{.ID}}\" )\" $@ "
306
304
}
307
305
308
306
function dump_masters() {
309
307
local master_names
310
308
if [[ -n " ${use_custom_instance_list} " ]]; then
311
- while IFS= ' ' read -r line ; do master_names+=( " $line " ) ; done < < ( log_dump_custom_get_instances master)
312
- elif [[ ! " ${master_ssh_supported_providers} " =~ ${KUBERNETES_PROVIDER} ]]; then
309
+ master_names=( $ ( log_dump_custom_get_instances master) )
310
+ elif [[ ! " ${master_ssh_supported_providers} " =~ " ${KUBERNETES_PROVIDER} " ]]; then
313
311
echo " Master SSH not supported for ${KUBERNETES_PROVIDER} "
314
312
return
315
313
elif [[ -n " ${KUBEMARK_MASTER_NAME:- } " ]]; then
@@ -360,8 +358,8 @@ function dump_nodes() {
360
358
node_names=( " $@ " )
361
359
elif [[ -n " ${use_custom_instance_list} " ]]; then
362
360
echo " Dumping logs for nodes provided by log_dump_custom_get_instances() function"
363
- while IFS= ' ' read -r line ; do node_names+=( " $line " ) ; done < < ( log_dump_custom_get_instances node)
364
- elif [[ ! " ${node_ssh_supported_providers} " =~ ${KUBERNETES_PROVIDER} ]]; then
361
+ node_names=( $ ( log_dump_custom_get_instances node) )
362
+ elif [[ ! " ${node_ssh_supported_providers} " =~ " ${KUBERNETES_PROVIDER} " ]]; then
365
363
echo " Node SSH not supported for ${KUBERNETES_PROVIDER} "
366
364
return
367
365
else
@@ -388,7 +386,7 @@ function dump_nodes() {
388
386
linux_nodes_selected_for_logs=()
389
387
if [[ -n " ${LOGDUMP_ONLY_N_RANDOM_NODES:- } " ]]; then
390
388
# We randomly choose 'LOGDUMP_ONLY_N_RANDOM_NODES' many nodes for fetching logs.
391
- for index in $( shuf -i 0-$(( ${# node_names[*]} - 1 )) -n " ${LOGDUMP_ONLY_N_RANDOM_NODES} " )
389
+ for index in ` shuf -i 0-$(( ${# node_names[*]} - 1 )) -n ${LOGDUMP_ONLY_N_RANDOM_NODES} `
392
390
do
393
391
linux_nodes_selected_for_logs+=(" ${node_names[$index]} " )
394
392
done
@@ -437,11 +435,11 @@ function dump_nodes() {
437
435
# Sets:
438
436
# NON_LOGEXPORTED_NODES
439
437
function find_non_logexported_nodes() {
440
- succeeded_nodes=$( gsutil ls " ${gcs_artifacts_dir} /logexported-nodes-registry" ) || return 1
438
+ succeeded_nodes=$( gsutil ls ${gcs_artifacts_dir} /logexported-nodes-registry) || return 1
441
439
echo " Successfully listed marker files for successful nodes"
442
440
NON_LOGEXPORTED_NODES=()
443
441
for node in " ${NODE_NAMES[@]} " ; do
444
- if [[ ! " ${succeeded_nodes} " =~ ${node} ]]; then
442
+ if [[ ! " ${succeeded_nodes} " =~ " ${node} " ]]; then
445
443
NON_LOGEXPORTED_NODES+=(" ${node} " )
446
444
fi
447
445
done
@@ -452,7 +450,7 @@ function find_non_logexported_nodes() {
452
450
function dump_nodes_with_logexporter() {
453
451
if [[ -n " ${use_custom_instance_list} " ]]; then
454
452
echo " Dumping logs for nodes provided by log_dump_custom_get_instances() function"
455
- while IFS= ' ' read -r line ; do NODE_NAMES+=( " $line " ) ; done < < ( log_dump_custom_get_instances node)
453
+ NODE_NAMES=( $ ( log_dump_custom_get_instances node) )
456
454
else
457
455
echo " Detecting nodes in the cluster"
458
456
detect-node-names & > /dev/null
@@ -464,7 +462,7 @@ function dump_nodes_with_logexporter() {
464
462
fi
465
463
466
464
# Obtain parameters required by logexporter.
467
- local -r service_account_credentials=" $( base64 " ${GOOGLE_APPLICATION_CREDENTIALS} " | tr -d ' \n' ) "
465
+ local -r service_account_credentials=" $( cat ${GOOGLE_APPLICATION_CREDENTIALS} | base64 | tr -d ' \n' ) "
468
466
local -r cloud_provider=" ${KUBERNETES_PROVIDER} "
469
467
local -r enable_hollow_node_logs=" ${ENABLE_HOLLOW_NODE_LOGS:- false} "
470
468
local -r logexport_sleep_seconds=" $(( 90 + NUM_NODES / 3 )) "
@@ -506,10 +504,10 @@ function dump_nodes_with_logexporter() {
506
504
# Store logs from logexporter pods to allow debugging log exporting process
507
505
# itself.
508
506
proc=${max_dump_processes}
509
- while read -r pod node; do
507
+ " ${KUBECTL} " get pods -n " ${logexporter_namespace} " -o jsonpath= ' {range .items[*]}{.metadata.name}{"\t"}{.spec.nodeName}{"\n"}{end} ' | while read pod node; do
510
508
echo " Fetching logs from ${pod} running on ${node} "
511
- mkdir -p " ${report_dir} /${node} "
512
- " ${KUBECTL} " logs -n " ${logexporter_namespace} " " ${pod} " > " ${report_dir} /${node} /${pod} .log" &
509
+ mkdir -p ${report_dir} /${node}
510
+ " ${KUBECTL} " logs -n " ${logexporter_namespace} " ${pod} > ${report_dir} /${node} /${pod} .log &
513
511
514
512
# We don't want to run more than ${max_dump_processes} at a time, so
515
513
# wait once we hit that many nodes. This isn't ideal, since one might
@@ -519,13 +517,14 @@ function dump_nodes_with_logexporter() {
519
517
proc=${max_dump_processes}
520
518
wait
521
519
fi
522
- done < <( " ${KUBECTL} " get pods -n " ${logexporter_namespace} " -o jsonpath= ' {range .items[*]}{.metadata.name}{"\t"}{.spec.nodeName}{"\n"}{end} ' )
520
+ done
523
521
# Wait for any remaining processes.
524
522
if [[ proc -gt 0 && proc -lt ${max_dump_processes} ]]; then
525
523
wait
526
524
fi
527
525
528
526
# List registry of marker files (of nodes whose logexporter succeeded) from GCS.
527
+ local nodes_succeeded
529
528
for retry in {1..10}; do
530
529
if find_non_logexported_nodes; then
531
530
break
@@ -555,33 +554,32 @@ function dump_nodes_with_logexporter() {
555
554
" ${KUBECTL} " get pods --namespace " ${logexporter_namespace} " || true
556
555
" ${KUBECTL} " delete namespace " ${logexporter_namespace} " || true
557
556
if [[ " ${# failed_nodes[@]} " != 0 ]]; then
558
- echo -e " Dumping logs through SSH for the following nodes:\n${failed_nodes[* ]} "
557
+ echo -e " Dumping logs through SSH for the following nodes:\n${failed_nodes[@ ]} "
559
558
dump_nodes " ${failed_nodes[@]} "
560
559
fi
561
560
}
562
561
563
562
function detect_node_failures() {
564
- if ! [[ " ${gcloud_supported_providers} " =~ ${KUBERNETES_PROVIDER} ]]; then
563
+ if ! [[ " ${gcloud_supported_providers} " =~ " ${KUBERNETES_PROVIDER} " ]]; then
565
564
return
566
565
fi
567
566
568
567
detect-node-names
569
568
if [[ " ${KUBERNETES_PROVIDER} " == " gce" ]]; then
570
- local all_instance_groups=(" ${INSTANCE_GROUPS[@]} " " ${WINDOWS_INSTANCE_GROUPS[@]} " )
569
+ local all_instance_groups=(${INSTANCE_GROUPS[@]} ${WINDOWS_INSTANCE_GROUPS[@]} )
571
570
else
572
- local all_instance_groups=(" ${INSTANCE_GROUPS[@]} " )
571
+ local all_instance_groups=(${INSTANCE_GROUPS[@]} )
573
572
fi
574
573
575
574
if [ -z " ${all_instance_groups:- } " ]; then
576
575
return
577
576
fi
578
577
for group in " ${all_instance_groups[@]} " ; do
579
- local creation_timestamp
580
- creation_timestamp=$( gcloud compute instance-groups managed describe \
581
- " ${group} " \
582
- --project " ${PROJECT} " \
583
- --zone " ${ZONE} " \
584
- --format=' value(creationTimestamp)' )
578
+ local creation_timestamp=$( gcloud compute instance-groups managed describe \
579
+ " ${group} " \
580
+ --project " ${PROJECT} " \
581
+ --zone " ${ZONE} " \
582
+ --format=' value(creationTimestamp)' )
585
583
echo " Failures for ${group} (if any):"
586
584
gcloud logging read --order=asc \
587
585
--format=' table(timestamp,jsonPayload.resource.name,jsonPayload.event_subtype)' \
0 commit comments