@@ -65,7 +65,7 @@ readonly max_dump_processes=25
65
65
function setup() {
66
66
KUBE_ROOT=$( dirname " ${BASH_SOURCE[0]} " ) /../..
67
67
if [[ -z " ${use_custom_instance_list} " ]]; then
68
- : ${KUBE_CONFIG_FILE:= " config-test.sh" }
68
+ : " ${KUBE_CONFIG_FILE:= " config-test.sh" } "
69
69
echo " Sourcing kube-util.sh"
70
70
source " ${KUBE_ROOT} /cluster/kube-util.sh"
71
71
echo " Detecting project"
@@ -87,7 +87,7 @@ function setup() {
87
87
}
88
88
89
89
function log-dump-ssh() {
90
- if [[ " ${gcloud_supported_providers} " =~ " ${KUBERNETES_PROVIDER} " ]]; then
90
+ if [[ " ${gcloud_supported_providers} " =~ ${KUBERNETES_PROVIDER} ]]; then
91
91
ssh-to-node " $@ "
92
92
return
93
93
fi
@@ -104,7 +104,7 @@ function log-dump-ssh() {
104
104
function copy-logs-from-node() {
105
105
local -r node=" ${1} "
106
106
local -r dir=" ${2} "
107
- local files=( ${3} )
107
+ read -r -a files <<< " ${3}"
108
108
# Append "*"
109
109
# The * at the end is needed to also copy rotated logs (which happens
110
110
# in large clusters and long runs).
@@ -114,12 +114,13 @@ function copy-logs-from-node() {
114
114
# Comma delimit (even the singleton, or scp does the wrong thing), surround by braces.
115
115
local -r scp_files=" {$( printf " %s," " ${files[@]} " ) }"
116
116
117
- if [[ " ${gcloud_supported_providers} " =~ " ${KUBERNETES_PROVIDER} " ]]; then
117
+ if [[ " ${gcloud_supported_providers} " =~ ${KUBERNETES_PROVIDER} ]]; then
118
118
# get-serial-port-output lets you ask for ports 1-4, but currently (11/21/2016) only port 1 contains useful information
119
119
gcloud compute instances get-serial-port-output --project " ${PROJECT} " --zone " ${ZONE} " --port 1 " ${node} " > " ${dir} /serial-1.log" || true
120
120
gcloud compute scp --recurse --project " ${PROJECT} " --zone " ${ZONE} " " ${node} :${scp_files} " " ${dir} " > /dev/null || true
121
121
elif [[ " ${KUBERNETES_PROVIDER} " == " aws" ]]; then
122
- local ip=$( get_ssh_hostname " ${node} " )
122
+ local ip
123
+ ip=$( get_ssh_hostname " ${node} " )
123
124
scp -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i " ${AWS_SSH_KEY} " " ${SSH_USER} @${ip} :${scp_files} " " ${dir} " > /dev/null || true
124
125
elif [[ -n " ${use_custom_instance_list} " ]]; then
125
126
scp -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i " ${LOG_DUMP_SSH_KEY} " " ${LOG_DUMP_SSH_USER} @${node} :${scp_files} " " ${dir} " > /dev/null || true
@@ -136,25 +137,25 @@ function copy-logs-from-node() {
136
137
function save-logs() {
137
138
local -r node_name=" ${1} "
138
139
local -r dir=" ${2} "
139
- local files =" ${3} "
140
+ local save_files =" ${3} "
140
141
local opt_systemd_services=" ${4:- " " } "
141
142
local on_master=" ${5:- " false" } "
142
143
143
144
if [[ -n " ${use_custom_instance_list} " ]]; then
144
145
if [[ -n " ${LOG_DUMP_SAVE_LOGS:- } " ]]; then
145
- files =" ${files } ${LOG_DUMP_SAVE_LOGS:- } "
146
+ save_files =" ${save_files } ${LOG_DUMP_SAVE_LOGS:- } "
146
147
fi
147
148
else
148
149
case " ${KUBERNETES_PROVIDER} " in
149
150
gce|gke|kubernetes-anywhere)
150
- files =" ${files } ${gce_logfiles} "
151
+ save_files =" ${save_files } ${gce_logfiles} "
151
152
;;
152
153
aws)
153
- files =" ${files } ${aws_logfiles} "
154
+ save_files =" ${save_files } ${aws_logfiles} "
154
155
;;
155
156
esac
156
157
fi
157
- local -r services=( ${systemd_services} ${opt_systemd_services} ${LOG_DUMP_SAVE_SERVICES:- } )
158
+ read -r -a services <<< " ${systemd_services} ${opt_systemd_services} ${LOG_DUMP_SAVE_SERVICES:-}"
158
159
159
160
if log-dump-ssh " ${node_name} " " command -v journalctl" & > /dev/null; then
160
161
if [[ " ${on_master} " == " true" ]]; then
@@ -174,7 +175,7 @@ function save-logs() {
174
175
log-dump-ssh " ${node_name} " " sudo journalctl --output=short-precise" > " ${dir} /systemd.log" || true
175
176
fi
176
177
else
177
- files =" ${kern_logfile} ${files } ${initd_logfiles} ${supervisord_logfiles} "
178
+ save_files =" ${kern_logfile} ${save_files } ${initd_logfiles} ${supervisord_logfiles} "
178
179
fi
179
180
180
181
# Try dumping coverage profiles, if it looks like coverage is enabled in the first place.
@@ -195,8 +196,8 @@ function save-logs() {
195
196
echo " Changing logfiles to be world-readable for download"
196
197
log-dump-ssh " ${node_name} " " sudo chmod -R a+r /var/log" || true
197
198
198
- echo " Copying '${files } ' from ${node_name} "
199
- copy-logs-from-node " ${node_name} " " ${dir} " " ${files } "
199
+ echo " Copying '${save_files } ' from ${node_name} "
200
+ copy-logs-from-node " ${node_name} " " ${dir} " " ${save_files } "
200
201
}
201
202
202
203
# Saves a copy of the Windows Docker event log to ${WINDOWS_LOGS_DIR}\docker.log
@@ -223,8 +224,9 @@ function save-windows-logs-via-diagnostics-tool() {
223
224
local node=" ${1} "
224
225
local dest_dir=" ${2} "
225
226
226
- gcloud compute instances add-metadata ${node} --metadata enable-diagnostics=true --project=${PROJECT} --zone=${ZONE}
227
- local logs_archive_in_gcs=$( gcloud alpha compute diagnose export-logs ${node} --zone=${ZONE} --project=${PROJECT} | tail -n 1)
227
+ gcloud compute instances add-metadata " ${node} " --metadata enable-diagnostics=true --project=" ${PROJECT} " --zone=" ${ZONE} "
228
+ local logs_archive_in_gcs
229
+ logs_archive_in_gcs=$( gcloud alpha compute diagnose export-logs " ${node} " --zone=" ${ZONE} " --project=" ${PROJECT} " | tail -n 1)
228
230
local temp_local_path=" ${node} .zip"
229
231
for retry in {1..20}; do
230
232
if gsutil mv " ${logs_archive_in_gcs} " " ${temp_local_path} " > /dev/null 2>&1 ; then
@@ -236,8 +238,8 @@ function save-windows-logs-via-diagnostics-tool() {
236
238
done
237
239
238
240
if [[ -f " ${temp_local_path} " ]]; then
239
- unzip ${temp_local_path} -d " ${dest_dir} " > /dev/null
240
- rm -f ${temp_local_path}
241
+ unzip " ${temp_local_path} " -d " ${dest_dir} " > /dev/null
242
+ rm -f " ${temp_local_path} "
241
243
fi
242
244
}
243
245
@@ -249,14 +251,14 @@ function save-windows-logs-via-ssh() {
249
251
export-windows-docker-event-log " ${node} "
250
252
251
253
local remote_files=()
252
- for file in ${windows_node_logfiles[@] } ; do
254
+ for file in ${windows_node_logfiles} ; do
253
255
remote_files+=( " ${WINDOWS_LOGS_DIR} \\ ${file} " )
254
256
done
255
257
remote_files+=( " ${windows_node_otherfiles[@]} " )
256
258
257
259
# TODO(pjh, yujuhong): handle rotated logs and copying multiple files at the
258
260
# same time.
259
- for remote_file in ${remote_files[@]} ; do
261
+ for remote_file in " ${remote_files[@]} " ; do
260
262
# Retry up to 3 times to allow ssh keys to be properly propagated and
261
263
# stored.
262
264
for retry in {1..3}; do
@@ -278,7 +280,7 @@ function save-logs-windows() {
278
280
local -r node=" ${1} "
279
281
local -r dest_dir=" ${2} "
280
282
281
- if [[ ! " ${gcloud_supported_providers} " =~ " ${KUBERNETES_PROVIDER} " ]]; then
283
+ if [[ ! " ${gcloud_supported_providers} " =~ ${KUBERNETES_PROVIDER} ]]; then
282
284
echo " Not saving logs for ${node} , Windows log dumping requires gcloud support"
283
285
return
284
286
fi
@@ -300,14 +302,14 @@ function run-in-docker-container() {
300
302
local node_name=" $1 "
301
303
local container=" $2 "
302
304
shift 2
303
- log-dump-ssh " ${node_name} " " docker exec \"\$ (docker ps -f label=io.kubernetes.container.name=${container} --format \" {{.ID}}\" )\" $@ "
305
+ log-dump-ssh " ${node_name} " " docker exec \"\$ (docker ps -f label=io.kubernetes.container.name=${container} --format \" {{.ID}}\" )\" $* "
304
306
}
305
307
306
308
function dump_masters() {
307
309
local master_names
308
310
if [[ -n " ${use_custom_instance_list} " ]]; then
309
- master_names=( $ ( log_dump_custom_get_instances master) )
310
- elif [[ ! " ${master_ssh_supported_providers} " =~ " ${KUBERNETES_PROVIDER} " ]]; then
311
+ while IFS= ' ' read -r line ; do master_names+=( " $line " ) ; done < < ( log_dump_custom_get_instances master)
312
+ elif [[ ! " ${master_ssh_supported_providers} " =~ ${KUBERNETES_PROVIDER} ]]; then
311
313
echo " Master SSH not supported for ${KUBERNETES_PROVIDER} "
312
314
return
313
315
elif [[ -n " ${KUBEMARK_MASTER_NAME:- } " ]]; then
@@ -358,8 +360,8 @@ function dump_nodes() {
358
360
node_names=( " $@ " )
359
361
elif [[ -n " ${use_custom_instance_list} " ]]; then
360
362
echo " Dumping logs for nodes provided by log_dump_custom_get_instances() function"
361
- node_names=( $ ( log_dump_custom_get_instances node) )
362
- elif [[ ! " ${node_ssh_supported_providers} " =~ " ${KUBERNETES_PROVIDER} " ]]; then
363
+ while IFS= ' ' read -r line ; do node_names+=( " $line " ) ; done < < ( log_dump_custom_get_instances node)
364
+ elif [[ ! " ${node_ssh_supported_providers} " =~ ${KUBERNETES_PROVIDER} ]]; then
363
365
echo " Node SSH not supported for ${KUBERNETES_PROVIDER} "
364
366
return
365
367
else
@@ -386,7 +388,7 @@ function dump_nodes() {
386
388
linux_nodes_selected_for_logs=()
387
389
if [[ -n " ${LOGDUMP_ONLY_N_RANDOM_NODES:- } " ]]; then
388
390
# We randomly choose 'LOGDUMP_ONLY_N_RANDOM_NODES' many nodes for fetching logs.
389
- for index in ` shuf -i 0-$(( ${# node_names[*]} - 1 )) -n ${LOGDUMP_ONLY_N_RANDOM_NODES} `
391
+ for index in $( shuf -i 0-$(( ${# node_names[*]} - 1 )) -n " ${LOGDUMP_ONLY_N_RANDOM_NODES} " )
390
392
do
391
393
linux_nodes_selected_for_logs+=(" ${node_names[$index]} " )
392
394
done
@@ -435,11 +437,11 @@ function dump_nodes() {
435
437
# Sets:
436
438
# NON_LOGEXPORTED_NODES
437
439
function find_non_logexported_nodes() {
438
- succeeded_nodes=$( gsutil ls ${gcs_artifacts_dir} /logexported-nodes-registry) || return 1
440
+ succeeded_nodes=$( gsutil ls " ${gcs_artifacts_dir} /logexported-nodes-registry" ) || return 1
439
441
echo " Successfully listed marker files for successful nodes"
440
442
NON_LOGEXPORTED_NODES=()
441
443
for node in " ${NODE_NAMES[@]} " ; do
442
- if [[ ! " ${succeeded_nodes} " =~ " ${node} " ]]; then
444
+ if [[ ! " ${succeeded_nodes} " =~ ${node} ]]; then
443
445
NON_LOGEXPORTED_NODES+=(" ${node} " )
444
446
fi
445
447
done
@@ -450,7 +452,7 @@ function find_non_logexported_nodes() {
450
452
function dump_nodes_with_logexporter() {
451
453
if [[ -n " ${use_custom_instance_list} " ]]; then
452
454
echo " Dumping logs for nodes provided by log_dump_custom_get_instances() function"
453
- NODE_NAMES=( $ ( log_dump_custom_get_instances node) )
455
+ while IFS= ' ' read -r line ; do NODE_NAMES+=( " $line " ) ; done < < ( log_dump_custom_get_instances node)
454
456
else
455
457
echo " Detecting nodes in the cluster"
456
458
detect-node-names & > /dev/null
@@ -462,7 +464,7 @@ function dump_nodes_with_logexporter() {
462
464
fi
463
465
464
466
# Obtain parameters required by logexporter.
465
- local -r service_account_credentials=" $( cat ${GOOGLE_APPLICATION_CREDENTIALS} | base64 | tr -d ' \n' ) "
467
+ local -r service_account_credentials=" $( base64 " ${GOOGLE_APPLICATION_CREDENTIALS} " | tr -d ' \n' ) "
466
468
local -r cloud_provider=" ${KUBERNETES_PROVIDER} "
467
469
local -r enable_hollow_node_logs=" ${ENABLE_HOLLOW_NODE_LOGS:- false} "
468
470
local -r logexport_sleep_seconds=" $(( 90 + NUM_NODES / 3 )) "
@@ -504,10 +506,10 @@ function dump_nodes_with_logexporter() {
504
506
# Store logs from logexporter pods to allow debugging log exporting process
505
507
# itself.
506
508
proc=${max_dump_processes}
507
- " ${KUBECTL} " get pods -n " ${logexporter_namespace} " -o jsonpath= ' {range .items[*]}{.metadata.name}{"\t"}{.spec.nodeName}{"\n"}{end} ' | while read pod node; do
509
+ while read -r pod node; do
508
510
echo " Fetching logs from ${pod} running on ${node} "
509
- mkdir -p ${report_dir} /${node}
510
- " ${KUBECTL} " logs -n " ${logexporter_namespace} " ${pod} > ${report_dir} /${node} /${pod} .log &
511
+ mkdir -p " ${report_dir} /${node} "
512
+ " ${KUBECTL} " logs -n " ${logexporter_namespace} " " ${pod} " > " ${report_dir} /${node} /${pod} .log" &
511
513
512
514
# We don't want to run more than ${max_dump_processes} at a time, so
513
515
# wait once we hit that many nodes. This isn't ideal, since one might
@@ -517,14 +519,13 @@ function dump_nodes_with_logexporter() {
517
519
proc=${max_dump_processes}
518
520
wait
519
521
fi
520
- done
522
+ done < <( " ${KUBECTL} " get pods -n " ${logexporter_namespace} " -o jsonpath= ' {range .items[*]}{.metadata.name}{"\t"}{.spec.nodeName}{"\n"}{end} ' )
521
523
# Wait for any remaining processes.
522
524
if [[ proc -gt 0 && proc -lt ${max_dump_processes} ]]; then
523
525
wait
524
526
fi
525
527
526
528
# List registry of marker files (of nodes whose logexporter succeeded) from GCS.
527
- local nodes_succeeded
528
529
for retry in {1..10}; do
529
530
if find_non_logexported_nodes; then
530
531
break
@@ -554,32 +555,33 @@ function dump_nodes_with_logexporter() {
554
555
" ${KUBECTL} " get pods --namespace " ${logexporter_namespace} " || true
555
556
" ${KUBECTL} " delete namespace " ${logexporter_namespace} " || true
556
557
if [[ " ${# failed_nodes[@]} " != 0 ]]; then
557
- echo -e " Dumping logs through SSH for the following nodes:\n${failed_nodes[@ ]} "
558
+ echo -e " Dumping logs through SSH for the following nodes:\n${failed_nodes[* ]} "
558
559
dump_nodes " ${failed_nodes[@]} "
559
560
fi
560
561
}
561
562
562
563
function detect_node_failures() {
563
- if ! [[ " ${gcloud_supported_providers} " =~ " ${KUBERNETES_PROVIDER} " ]]; then
564
+ if ! [[ " ${gcloud_supported_providers} " =~ ${KUBERNETES_PROVIDER} ]]; then
564
565
return
565
566
fi
566
567
567
568
detect-node-names
568
569
if [[ " ${KUBERNETES_PROVIDER} " == " gce" ]]; then
569
- local all_instance_groups=(${INSTANCE_GROUPS[@]} ${WINDOWS_INSTANCE_GROUPS[@]} )
570
+ local all_instance_groups=(" ${INSTANCE_GROUPS[@]} " " ${WINDOWS_INSTANCE_GROUPS[@]} " )
570
571
else
571
- local all_instance_groups=(${INSTANCE_GROUPS[@]} )
572
+ local all_instance_groups=(" ${INSTANCE_GROUPS[@]} " )
572
573
fi
573
574
574
575
if [ -z " ${all_instance_groups:- } " ]; then
575
576
return
576
577
fi
577
578
for group in " ${all_instance_groups[@]} " ; do
578
- local creation_timestamp=$( gcloud compute instance-groups managed describe \
579
- " ${group} " \
580
- --project " ${PROJECT} " \
581
- --zone " ${ZONE} " \
582
- --format=' value(creationTimestamp)' )
579
+ local creation_timestamp
580
+ creation_timestamp=$( gcloud compute instance-groups managed describe \
581
+ " ${group} " \
582
+ --project " ${PROJECT} " \
583
+ --zone " ${ZONE} " \
584
+ --format=' value(creationTimestamp)' )
583
585
echo " Failures for ${group} (if any):"
584
586
gcloud logging read --order=asc \
585
587
--format=' table(timestamp,jsonPayload.resource.name,jsonPayload.event_subtype)' \
0 commit comments