@@ -30,7 +30,7 @@ readonly logexporter_namespace="${3:-logexporter}"
30
30
# check for a function named log_dump_custom_get_instances. If it's
31
31
# defined, we assume the function can me called with one argument, the
32
32
# role, which is either "master" or "node".
33
- echo " Checking for custom logdump instances, if any"
33
+ echo ' Checking for custom logdump instances, if any'
34
34
if [[ $( type -t log_dump_custom_get_instances) == " function" ]]; then
35
35
readonly use_custom_instance_list=yes
36
36
else
@@ -67,10 +67,10 @@ readonly max_dump_processes=25
67
67
function setup() {
68
68
KUBE_ROOT=$( dirname " ${BASH_SOURCE[0]} " ) /../..
69
69
if [[ -z " ${use_custom_instance_list} " ]]; then
70
- : ${KUBE_CONFIG_FILE:= " config-test.sh" }
71
- echo " Sourcing kube-util.sh"
70
+ : " ${KUBE_CONFIG_FILE:= ' config-test.sh' } "
71
+ echo ' Sourcing kube-util.sh'
72
72
source " ${KUBE_ROOT} /cluster/kube-util.sh"
73
- echo " Detecting project"
73
+ echo ' Detecting project'
74
74
detect-project 2>&1
75
75
elif [[ " ${KUBERNETES_PROVIDER} " == " gke" ]]; then
76
76
echo " Using 'use_custom_instance_list' with gke, skipping check for LOG_DUMP_SSH_KEY and LOG_DUMP_SSH_USER"
@@ -80,17 +80,17 @@ function setup() {
80
80
source " ${KUBE_ROOT} /cluster/gce/util.sh"
81
81
ZONE=" ${gke_zone} "
82
82
elif [[ -z " ${LOG_DUMP_SSH_KEY:- } " ]]; then
83
- echo " LOG_DUMP_SSH_KEY not set, but required when using log_dump_custom_get_instances"
83
+ echo ' LOG_DUMP_SSH_KEY not set, but required when using log_dump_custom_get_instances'
84
84
exit 1
85
85
elif [[ -z " ${LOG_DUMP_SSH_USER:- } " ]]; then
86
- echo " LOG_DUMP_SSH_USER not set, but required when using log_dump_custom_get_instances"
86
+ echo ' LOG_DUMP_SSH_USER not set, but required when using log_dump_custom_get_instances'
87
87
exit 1
88
88
fi
89
89
source " ${KUBE_ROOT} /hack/lib/util.sh"
90
90
}
91
91
92
92
function log-dump-ssh() {
93
- if [[ " ${gcloud_supported_providers} " =~ " ${KUBERNETES_PROVIDER} " ]]; then
93
+ if [[ " ${gcloud_supported_providers} " =~ ${KUBERNETES_PROVIDER} ]]; then
94
94
ssh-to-node " $@ "
95
95
return
96
96
fi
@@ -102,12 +102,14 @@ function log-dump-ssh() {
102
102
}
103
103
104
104
# Copy all files /var/log/{$3}.log on node $1 into local dir $2.
105
- # $3 should be a space-separated string of files .
105
+ # $3 should be a string array of file names .
106
106
# This function shouldn't ever trigger errexit, but doesn't block stderr.
107
107
function copy-logs-from-node() {
108
108
local -r node=" ${1} "
109
109
local -r dir=" ${2} "
110
- local files=( ${3} )
110
+ shift
111
+ shift
112
+ local files=(" $@ " )
111
113
# Append "*"
112
114
# The * at the end is needed to also copy rotated logs (which happens
113
115
# in large clusters and long runs).
@@ -117,12 +119,13 @@ function copy-logs-from-node() {
117
119
# Comma delimit (even the singleton, or scp does the wrong thing), surround by braces.
118
120
local -r scp_files=" {$( printf " %s," " ${files[@]} " ) }"
119
121
120
- if [[ " ${gcloud_supported_providers} " =~ " ${KUBERNETES_PROVIDER} " ]]; then
122
+ if [[ " ${gcloud_supported_providers} " =~ ${KUBERNETES_PROVIDER} ]]; then
121
123
# get-serial-port-output lets you ask for ports 1-4, but currently (11/21/2016) only port 1 contains useful information
122
124
gcloud compute instances get-serial-port-output --project " ${PROJECT} " --zone " ${ZONE} " --port 1 " ${node} " > " ${dir} /serial-1.log" || true
123
125
gcloud compute scp --recurse --project " ${PROJECT} " --zone " ${ZONE} " " ${node} :${scp_files} " " ${dir} " > /dev/null || true
124
126
elif [[ " ${KUBERNETES_PROVIDER} " == " aws" ]]; then
125
- local ip=$( get_ssh_hostname " ${node} " )
127
+ local ip
128
+ ip=$( get_ssh_hostname " ${node} " )
126
129
scp -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i " ${AWS_SSH_KEY} " " ${SSH_USER} @${ip} :${scp_files} " " ${dir} " > /dev/null || true
127
130
elif [[ -n " ${use_custom_instance_list} " ]]; then
128
131
scp -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i " ${LOG_DUMP_SSH_KEY} " " ${LOG_DUMP_SSH_USER} @${node} :${scp_files} " " ${dir} " > /dev/null || true
@@ -139,26 +142,34 @@ function copy-logs-from-node() {
139
142
function save-logs() {
140
143
local -r node_name=" ${1} "
141
144
local -r dir=" ${2} "
142
- local files=" ${3} "
145
+ local files=()
146
+ IFS=' ' read -r -a files <<< " $3"
143
147
local opt_systemd_services=" ${4:- " " } "
144
148
local on_master=" ${5:- " false" } "
145
149
146
- files=" ${files} ${extra_log_files} "
150
+ local extra=()
151
+ IFS=' ' read -r -a extra <<< " $extra_log_files"
152
+ files+=(" ${extra[@]} " )
147
153
if [[ -n " ${use_custom_instance_list} " ]]; then
148
154
if [[ -n " ${LOG_DUMP_SAVE_LOGS:- } " ]]; then
149
- files=" ${files} ${LOG_DUMP_SAVE_LOGS:- } "
155
+ local dump=()
156
+ IFS=' ' read -r -a dump <<< " ${LOG_DUMP_SAVE_LOGS:-}"
157
+ files+=(" ${dump[@]} " )
150
158
fi
151
159
else
160
+ local providerlogs=()
152
161
case " ${KUBERNETES_PROVIDER} " in
153
162
gce|gke)
154
- files= " ${files} ${gce_logfiles} "
163
+ IFS= ' ' read -r -a providerlogs <<< " ${gce_logfiles}"
155
164
;;
156
165
aws)
157
- files= " ${files} ${aws_logfiles} "
166
+ IFS= ' ' read -r -a providerlogs <<< " ${aws_logfiles}"
158
167
;;
159
168
esac
169
+ files+=(" ${providerlogs[@]} " )
160
170
fi
161
- local -r services=( ${systemd_services} ${opt_systemd_services} ${extra_systemd_services} )
171
+ local services
172
+ read -r -a services <<< " ${systemd_services} ${opt_systemd_services} ${extra_systemd_services}"
162
173
163
174
if log-dump-ssh " ${node_name} " " command -v journalctl" & > /dev/null; then
164
175
if [[ " ${on_master} " == " true" ]]; then
@@ -178,7 +189,11 @@ function save-logs() {
178
189
log-dump-ssh " ${node_name} " " sudo journalctl --output=short-precise" > " ${dir} /systemd.log" || true
179
190
fi
180
191
else
181
- files=" ${kern_logfile} ${files} ${initd_logfiles} ${supervisord_logfiles} "
192
+ local tmpfiles=()
193
+ for f in " ${kern_logfile} " " ${initd_logfiles} " " ${supervisord_logfiles} " ; do
194
+ IFS=' ' read -r -a tmpfiles <<< " $f"
195
+ files+=(" ${tmpfiles[@]} " )
196
+ done
182
197
fi
183
198
184
199
# Try dumping coverage profiles, if it looks like coverage is enabled in the first place.
@@ -192,15 +207,15 @@ function save-logs() {
192
207
run-in-docker-container " ${node_name} " " kube-proxy" " cat /tmp/k8s-kube-proxy.cov" > " ${dir} /kube-proxy.cov" || true
193
208
fi
194
209
else
195
- echo " Coverage profiles seem to exist, but cannot be retrieved from inside containers."
210
+ echo ' Coverage profiles seem to exist, but cannot be retrieved from inside containers.'
196
211
fi
197
212
fi
198
213
199
- echo " Changing logfiles to be world-readable for download"
214
+ echo ' Changing logfiles to be world-readable for download'
200
215
log-dump-ssh " ${node_name} " " sudo chmod -R a+r /var/log" || true
201
216
202
- echo " Copying '${files} ' from ${node_name} "
203
- copy-logs-from-node " ${node_name} " " ${dir} " " ${files} "
217
+ echo " Copying '${files[*] } ' from ${node_name} "
218
+ copy-logs-from-node " ${node_name} " " ${dir} " " ${files[@] } "
204
219
}
205
220
206
221
# Saves a copy of the Windows Docker event log to ${WINDOWS_LOGS_DIR}\docker.log
@@ -246,8 +261,9 @@ function save-windows-logs-via-diagnostics-tool() {
246
261
local node=" ${1} "
247
262
local dest_dir=" ${2} "
248
263
249
- gcloud compute instances add-metadata ${node} --metadata enable-diagnostics=true --project=${PROJECT} --zone=${ZONE}
250
- local logs_archive_in_gcs=$( gcloud alpha compute diagnose export-logs ${node} --zone=${ZONE} --project=${PROJECT} | tail -n 1)
264
+ gcloud compute instances add-metadata " ${node} " --metadata enable-diagnostics=true --project=" ${PROJECT} " --zone=" ${ZONE} "
265
+ local logs_archive_in_gcs
266
+ logs_archive_in_gcs=$( gcloud alpha compute diagnose export-logs " ${node} " " --zone=${ZONE} " " --project=${PROJECT} " | tail -n 1)
251
267
local temp_local_path=" ${node} .zip"
252
268
for retry in {1..20}; do
253
269
if gsutil mv " ${logs_archive_in_gcs} " " ${temp_local_path} " > /dev/null 2>&1 ; then
@@ -259,8 +275,8 @@ function save-windows-logs-via-diagnostics-tool() {
259
275
done
260
276
261
277
if [[ -f " ${temp_local_path} " ]]; then
262
- unzip ${temp_local_path} -d " ${dest_dir} " > /dev/null
263
- rm -f ${temp_local_path}
278
+ unzip " ${temp_local_path} " -d " ${dest_dir} " > /dev/null
279
+ rm -f " ${temp_local_path} "
264
280
fi
265
281
}
266
282
@@ -273,14 +289,14 @@ function save-windows-logs-via-ssh() {
273
289
export-windows-docker-images-list " ${node} "
274
290
275
291
local remote_files=()
276
- for file in ${windows_node_logfiles[@]} ; do
292
+ for file in " ${windows_node_logfiles[@]} " ; do
277
293
remote_files+=( " ${WINDOWS_LOGS_DIR} \\ ${file} " )
278
294
done
279
295
remote_files+=( " ${windows_node_otherfiles[@]} " )
280
296
281
297
# TODO(pjh, yujuhong): handle rotated logs and copying multiple files at the
282
298
# same time.
283
- for remote_file in ${remote_files[@]} ; do
299
+ for remote_file in " ${remote_files[@]} " ; do
284
300
# Retry up to 3 times to allow ssh keys to be properly propagated and
285
301
# stored.
286
302
for retry in {1..3}; do
@@ -302,7 +318,7 @@ function save-logs-windows() {
302
318
local -r node=" ${1} "
303
319
local -r dest_dir=" ${2} "
304
320
305
- if [[ ! " ${gcloud_supported_providers} " =~ " ${KUBERNETES_PROVIDER} " ]]; then
321
+ if [[ ! " ${gcloud_supported_providers} " =~ ${KUBERNETES_PROVIDER} ]]; then
306
322
echo " Not saving logs for ${node} , Windows log dumping requires gcloud support"
307
323
return
308
324
fi
@@ -324,28 +340,28 @@ function run-in-docker-container() {
324
340
local node_name=" $1 "
325
341
local container=" $2 "
326
342
shift 2
327
- log-dump-ssh " ${node_name} " " docker exec \"\$ (docker ps -f label=io.kubernetes.container.name=${container} --format \" {{.ID}}\" )\" $@ "
343
+ log-dump-ssh " ${node_name} " " docker exec \"\$ (docker ps -f label=io.kubernetes.container.name=${container} --format \" {{.ID}}\" )\" $* "
328
344
}
329
345
330
346
function dump_masters() {
331
347
local master_names
332
348
if [[ -n " ${use_custom_instance_list} " ]]; then
333
- master_names=( $ ( log_dump_custom_get_instances master) )
334
- elif [[ ! " ${master_ssh_supported_providers} " =~ " ${KUBERNETES_PROVIDER} " ]]; then
349
+ while IFS= ' ' read -r line ; do master_names+=( " $line " ) ; done < < ( log_dump_custom_get_instances master)
350
+ elif [[ ! " ${master_ssh_supported_providers} " =~ ${KUBERNETES_PROVIDER} ]]; then
335
351
echo " Master SSH not supported for ${KUBERNETES_PROVIDER} "
336
352
return
337
353
elif [[ -n " ${KUBEMARK_MASTER_NAME:- } " ]]; then
338
354
master_names=( " ${KUBEMARK_MASTER_NAME} " )
339
355
else
340
356
if ! (detect-master); then
341
- echo " Master not detected. Is the cluster up?"
357
+ echo ' Master not detected. Is the cluster up?'
342
358
return
343
359
fi
344
360
master_names=( " ${MASTER_NAME} " )
345
361
fi
346
362
347
363
if [[ " ${# master_names[@]} " == 0 ]]; then
348
- echo " No masters found?"
364
+ echo ' No masters found?'
349
365
return
350
366
fi
351
367
@@ -378,16 +394,16 @@ function dump_nodes() {
378
394
local node_names=()
379
395
local windows_node_names=()
380
396
if [[ -n " ${1:- } " ]]; then
381
- echo " Dumping logs for nodes provided as args to dump_nodes() function"
397
+ echo ' Dumping logs for nodes provided as args to dump_nodes() function'
382
398
node_names=( " $@ " )
383
399
elif [[ -n " ${use_custom_instance_list} " ]]; then
384
- echo " Dumping logs for nodes provided by log_dump_custom_get_instances() function"
385
- node_names=( $ ( log_dump_custom_get_instances node) )
386
- elif [[ ! " ${node_ssh_supported_providers} " =~ " ${KUBERNETES_PROVIDER} " ]]; then
400
+ echo ' Dumping logs for nodes provided by log_dump_custom_get_instances() function'
401
+ while IFS= ' ' read -r line ; do node_names+=( " $line " ) ; done < < ( log_dump_custom_get_instances node)
402
+ elif [[ ! " ${node_ssh_supported_providers} " =~ ${KUBERNETES_PROVIDER} ]]; then
387
403
echo " Node SSH not supported for ${KUBERNETES_PROVIDER} "
388
404
return
389
405
else
390
- echo " Detecting nodes in the cluster"
406
+ echo ' Detecting nodes in the cluster'
391
407
detect-node-names & > /dev/null
392
408
if [[ -n " ${NODE_NAMES:- } " ]]; then
393
409
node_names=( " ${NODE_NAMES[@]} " )
@@ -398,7 +414,7 @@ function dump_nodes() {
398
414
fi
399
415
400
416
if [[ " ${# node_names[@]} " == 0 && " ${# windows_node_names[@]} " == 0 ]]; then
401
- echo " No nodes found!"
417
+ echo ' No nodes found!'
402
418
return
403
419
fi
404
420
@@ -410,7 +426,7 @@ function dump_nodes() {
410
426
linux_nodes_selected_for_logs=()
411
427
if [[ -n " ${LOGDUMP_ONLY_N_RANDOM_NODES:- } " ]]; then
412
428
# We randomly choose 'LOGDUMP_ONLY_N_RANDOM_NODES' many nodes for fetching logs.
413
- for index in ` shuf -i 0-$(( ${# node_names[*]} - 1 )) -n ${LOGDUMP_ONLY_N_RANDOM_NODES} `
429
+ for index in $( shuf -i 0-$(( ${# node_names[*]} - 1 )) -n " ${LOGDUMP_ONLY_N_RANDOM_NODES} " )
414
430
do
415
431
linux_nodes_selected_for_logs+=(" ${node_names[$index]} " )
416
432
done
@@ -473,10 +489,10 @@ function find_non_logexported_nodes() {
473
489
local file=" ${gcs_artifacts_dir} /logexported-nodes-registry"
474
490
echo " Listing marker files ($file ) for successful nodes..."
475
491
succeeded_nodes=$( gsutil ls " ${file} " ) || return 1
476
- echo " Successfully listed marker files for successful nodes"
492
+ echo ' Successfully listed marker files for successful nodes'
477
493
NON_LOGEXPORTED_NODES=()
478
494
for node in " ${NODE_NAMES[@]} " ; do
479
- if [[ ! " ${succeeded_nodes} " =~ " ${node} " ]]; then
495
+ if [[ ! " ${succeeded_nodes} " =~ ${node} ]]; then
480
496
NON_LOGEXPORTED_NODES+=(" ${node} " )
481
497
fi
482
498
done
@@ -486,20 +502,21 @@ function find_non_logexported_nodes() {
486
502
# does not run on Windows nodes.
487
503
function dump_nodes_with_logexporter() {
488
504
if [[ -n " ${use_custom_instance_list} " ]]; then
489
- echo " Dumping logs for nodes provided by log_dump_custom_get_instances() function"
490
- NODE_NAMES=( $( log_dump_custom_get_instances node) )
505
+ echo ' Dumping logs for nodes provided by log_dump_custom_get_instances() function'
506
+ NODE_NAMES=()
507
+ while IFS=' ' read -r line; do NODE_NAMES+=(" $line " ); done < <( log_dump_custom_get_instances node)
491
508
else
492
- echo " Detecting nodes in the cluster"
509
+ echo ' Detecting nodes in the cluster'
493
510
detect-node-names & > /dev/null
494
511
fi
495
512
496
513
if [[ -z " ${NODE_NAMES:- } " ]]; then
497
- echo " No nodes found!"
514
+ echo ' No nodes found!'
498
515
return
499
516
fi
500
517
501
518
# Obtain parameters required by logexporter.
502
- local -r service_account_credentials=" $( cat ${GOOGLE_APPLICATION_CREDENTIALS} | base64 | tr -d ' \n' ) "
519
+ local -r service_account_credentials=" $( base64 " ${GOOGLE_APPLICATION_CREDENTIALS} " | tr -d ' \n' ) "
503
520
local -r cloud_provider=" ${KUBERNETES_PROVIDER} "
504
521
local -r enable_hollow_node_logs=" ${ENABLE_HOLLOW_NODE_LOGS:- false} "
505
522
local -r logexport_sleep_seconds=" $(( 90 + NUM_NODES / 3 )) "
@@ -526,7 +543,7 @@ function dump_nodes_with_logexporter() {
526
543
# Create the logexporter namespace, service-account secret and the logexporter daemonset within that namespace.
527
544
KUBECTL=" ${KUBE_ROOT} /cluster/kubectl.sh"
528
545
if ! " ${KUBECTL} " create -f " ${manifest_yaml} " ; then
529
- echo " Failed to create logexporter daemonset.. falling back to logdump through SSH"
546
+ echo ' Failed to create logexporter daemonset.. falling back to logdump through SSH'
530
547
" ${KUBECTL} " delete namespace " ${logexporter_namespace} " || true
531
548
dump_nodes " ${NODE_NAMES[@]} "
532
549
return
@@ -538,7 +555,7 @@ function dump_nodes_with_logexporter() {
538
555
while true ; do
539
556
now=" $( date +%s) "
540
557
if [[ $(( now - start)) -gt ${logexport_sleep_seconds} ]]; then
541
- echo " Waiting for all nodes to be logexported timed out."
558
+ echo ' Waiting for all nodes to be logexported timed out.'
542
559
break
543
560
fi
544
561
if find_non_logexported_nodes; then
@@ -569,14 +586,13 @@ function dump_nodes_with_logexporter() {
569
586
done ; wait)
570
587
571
588
# List registry of marker files (of nodes whose logexporter succeeded) from GCS.
572
- local nodes_succeeded
573
589
for retry in {1..10}; do
574
590
if find_non_logexported_nodes; then
575
591
break
576
592
else
577
593
echo " Attempt ${retry} failed to list marker files for successful nodes"
578
594
if [[ " ${retry} " == 10 ]]; then
579
- echo " Final attempt to list marker files failed.. falling back to logdump through SSH"
595
+ echo ' Final attempt to list marker files failed.. falling back to logdump through SSH'
580
596
" ${KUBECTL} " delete namespace " ${logexporter_namespace} " || true
581
597
dump_nodes " ${NODE_NAMES[@]} "
582
598
return
@@ -599,32 +615,33 @@ function dump_nodes_with_logexporter() {
599
615
" ${KUBECTL} " get pods --namespace " ${logexporter_namespace} " || true
600
616
" ${KUBECTL} " delete namespace " ${logexporter_namespace} " || true
601
617
if [[ " ${# failed_nodes[@]} " != 0 ]]; then
602
- echo -e " Dumping logs through SSH for the following nodes:\n${failed_nodes[@ ]} "
618
+ echo -e " Dumping logs through SSH for the following nodes:\n${failed_nodes[* ]} "
603
619
dump_nodes " ${failed_nodes[@]} "
604
620
fi
605
621
}
606
622
607
623
function detect_node_failures() {
608
- if ! [[ " ${gcloud_supported_providers} " =~ " ${KUBERNETES_PROVIDER} " ]]; then
624
+ if ! [[ " ${gcloud_supported_providers} " =~ ${KUBERNETES_PROVIDER} ]]; then
609
625
return
610
626
fi
611
627
612
628
detect-node-names
613
629
if [[ " ${KUBERNETES_PROVIDER} " == " gce" ]]; then
614
- local all_instance_groups=(${INSTANCE_GROUPS[@]} ${WINDOWS_INSTANCE_GROUPS[@]} )
630
+ local all_instance_groups=(" ${INSTANCE_GROUPS[@]} " " ${WINDOWS_INSTANCE_GROUPS[@]} " )
615
631
else
616
- local all_instance_groups=(${INSTANCE_GROUPS[@]} )
632
+ local all_instance_groups=(" ${INSTANCE_GROUPS[@]} " )
617
633
fi
618
634
619
635
if [ -z " ${all_instance_groups:- } " ]; then
620
636
return
621
637
fi
622
638
for group in " ${all_instance_groups[@]} " ; do
623
- local creation_timestamp=$( gcloud compute instance-groups managed describe \
624
- " ${group} " \
625
- --project " ${PROJECT} " \
626
- --zone " ${ZONE} " \
627
- --format=' value(creationTimestamp)' )
639
+ local creation_timestamp
640
+ creation_timestamp=$( gcloud compute instance-groups managed describe \
641
+ " ${group} " \
642
+ --project " ${PROJECT} " \
643
+ --zone " ${ZONE} " \
644
+ --format=' value(creationTimestamp)' )
628
645
echo " Failures for ${group} (if any):"
629
646
gcloud logging read --order=asc \
630
647
--format=' table(timestamp,jsonPayload.resource.name,jsonPayload.event_subtype)' \
@@ -644,7 +661,7 @@ function main() {
644
661
echo " Dumping logs from master locally to '${report_dir} '"
645
662
dump_masters
646
663
if [[ " ${DUMP_ONLY_MASTER_LOGS:- } " == " true" ]]; then
647
- echo " Skipping dumping of node logs"
664
+ echo ' Skipping dumping of node logs'
648
665
return
649
666
fi
650
667
0 commit comments