Skip to content

Commit 65abb91

Browse files
authored
Merge pull request #243 from ndokos/wip-ssh-status
Catch pipeline failures and return proper error status.
2 parents 53d5901 + f4f5618 commit 65abb91

File tree

16 files changed

+225
-37
lines changed

16 files changed

+225
-37
lines changed

agent/base

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
#!/bin/bash
22

33
export PBENCH_debug_mode=0
4+
# pipeline status is set to the status of the last command that *failed*
5+
# in the pipeline (or 0 if all succeed): this way "ssh foo | sed '...' "
6+
# will catch any ssh failure
7+
set -o pipefail
48

59
# very first thing to do is figure out which pbench
610
# we are

agent/bench-scripts/test-bin/ssh

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,24 @@ echo "$0 $*" >> $_testlog
55
if [[ "$4" == "netstat" && "$5" == "-tlpn" ]]; then
66
echo "tcp 0 0 0.0.0.0:21000 0.0.0.0:* LISTEN 5830/uperf"
77
fi
8+
9+
while true ;do
10+
case $1 in
11+
-o)
12+
shift 2
13+
;;
14+
-n)
15+
shift 1
16+
;;
17+
*)
18+
break
19+
;;
20+
esac
21+
done
22+
remote=$1
23+
24+
if [[ "$remote" == "fubar" ]] ;then
25+
exit 255
26+
else
27+
exit 0
28+
fi
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
+++ Running test-12 pbench-clear-tools
2+
removing tools-default/mpstat
3+
running . /opt/pbench-agent/profile; pbench-clear-tools --group default on fubar
4+
Removing the remote remote@fubar
5+
--- Finished test-12 pbench-clear-tools (status=1}
6+
+++ pbench tree state
7+
/var/tmp/pbench-test-utils/pbench
8+
/var/tmp/pbench-test-utils/pbench/tmp
9+
/var/tmp/pbench-test-utils/pbench/tools-default
10+
--- pbench tree state
11+
+++ pbench.log file contents
12+
grep: /var/tmp/pbench-test-utils/pbench/pbench.log: No such file or directory
13+
--- pbench.log file contents
14+
+++ test-execution.log file contents
15+
/var/tmp/pbench-test-utils/test-execution.log:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no -o ConnectTimeout=1 -n fubar . /opt/pbench-agent/profile; pbench-clear-tools --group default
16+
--- test-execution.log file contents
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
+++ Running test-13 pbench-postprocess-tools
2+
tar: This does not look like a tar archive
3+
tar: Exiting with failure status due to previous errors
4+
--- Finished test-13 pbench-postprocess-tools (status=2}
5+
+++ pbench tree state
6+
/var/tmp/pbench-test-utils/pbench
7+
/var/tmp/pbench-test-utils/pbench/pbench.log
8+
/var/tmp/pbench-test-utils/pbench/tmp
9+
/var/tmp/pbench-test-utils/pbench/tmp/tools-default
10+
/var/tmp/pbench-test-utils/pbench/tools-default
11+
/var/tmp/pbench-test-utils/pbench/tools-default/remote@fubar
12+
/var/tmp/pbench-test-utils/pbench/tools-default/remote@fubar:
13+
--- pbench tree state
14+
+++ pbench.log file contents
15+
/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] [pbench-postprocess-tools]started: --dir=/var/tmp/pbench-test-utils/pbench/tmp --group=default --iteration=1
16+
/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] [pbench-postprocess-tools]running this tool on fubar: ssh -o StrictHostKeyChecking=no -n fubar pbench-postprocess-tools --iteration=1 --group=default --dir=/var/tmp/pbench-test-utils/pbench/tmp
17+
/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] [pbench-postprocess-tools]started: copying tool data ( MB) from fubar
18+
/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] [pbench-postprocess-tools]copying tool data failed for remote fubar
19+
/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] [pbench-postprocess-tools]completed:
20+
--- pbench.log file contents
21+
+++ test-execution.log file contents
22+
/var/tmp/pbench-test-utils/test-execution.log:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no -n fubar pbench-postprocess-tools --iteration=1 --group=default --dir=/var/tmp/pbench-test-utils/pbench/tmp
23+
/var/tmp/pbench-test-utils/test-execution.log:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no -n fubar du -sm /var/tmp/pbench-test-utils/pbench/tmp/tools-default
24+
/var/tmp/pbench-test-utils/test-execution.log:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no -n fubar cd /var/tmp/pbench-test-utils/pbench/tmp/tools-default && tar cf - *
25+
--- test-execution.log file contents
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
+++ Running test-11 pbench-register-tool-set
2+
--- Finished test-11 pbench-register-tool-set (status=4}
3+
+++ pbench tree state
4+
/var/tmp/pbench-test-utils/pbench
5+
/var/tmp/pbench-test-utils/pbench/pbench.log
6+
/var/tmp/pbench-test-utils/pbench/tmp
7+
/var/tmp/pbench-test-utils/pbench/tools-default
8+
/var/tmp/pbench-test-utils/pbench/tools-default/remote@fubar
9+
/var/tmp/pbench-test-utils/pbench/tools-default/remote@fubar:
10+
--- pbench tree state
11+
+++ pbench.log file contents
12+
/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] tool_opts: --interval=3
13+
/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] tool_opts[0]="--interval=3"; pbench-register-tool --name=mpstat --group=default $label_opt -- "${tool_opts[@]}" 2>&1
14+
/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] tool_opts: --interval=3
15+
/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] tool_opts[0]="--interval=3"; pbench-register-tool --name=vmstat --group=default $label_opt -- "${tool_opts[@]}" 2>&1
16+
/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] tool_opts: --interval=3
17+
/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] tool_opts[0]="--interval=3"; pbench-register-tool --name=iostat --group=default $label_opt -- "${tool_opts[@]}" 2>&1
18+
/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] tool_opts: --record-opts=record -a --freq=100
19+
/var/tmp/pbench-test-utils/pbench/pbench.log:[debug][1900-01-01T00:00:00.000000] tool_opts[0]="--record-opts=record -a --freq=100"; pbench-register-tool --name=perf --group=default $label_opt -- "${tool_opts[@]}" 2>&1
20+
--- pbench.log file contents
21+
+++ test-execution.log file contents
22+
/var/tmp/pbench-test-utils/test-execution.log:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no fubar tool_opts[0]="--interval=3"; pbench-register-tool --name=mpstat --group=default -- "${tool_opts[@]}" 2>&1
23+
/var/tmp/pbench-test-utils/test-execution.log:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no fubar tool_opts[0]="--interval=3"; pbench-register-tool --name=vmstat --group=default -- "${tool_opts[@]}" 2>&1
24+
/var/tmp/pbench-test-utils/test-execution.log:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no fubar tool_opts[0]="--interval=3"; pbench-register-tool --name=iostat --group=default -- "${tool_opts[@]}" 2>&1
25+
/var/tmp/pbench-test-utils/test-execution.log:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no fubar tool_opts[0]="--record-opts=record -a --freq=100"; pbench-register-tool --name=perf --group=default -- "${tool_opts[@]}" 2>&1
26+
--- test-execution.log file contents

agent/util-scripts/gold/pbench-stop-tools/test-05.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
+++ Running test-05 pbench-stop-tools
22
[warn][1900-01-01T00:00:00.000000] Too many pids for turbostat: 123463 123464 123465 123466 123467 -- maybe old tools running? Use pbench-kill-tools.
3-
--- Finished test-05 pbench-stop-tools (status=0}
3+
--- Finished test-05 pbench-stop-tools (status=1}
44
+++ pbench tree state
55
/var/tmp/pbench-test-utils/pbench
66
/var/tmp/pbench-test-utils/pbench/pbench.log

agent/util-scripts/gold/pbench-stop-tools/test-06.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
+++ Running test-06 pbench-stop-tools
2-
--- Finished test-06 pbench-stop-tools (status=0}
2+
--- Finished test-06 pbench-stop-tools (status=1}
33
+++ pbench tree state
44
/var/tmp/pbench-test-utils/pbench
55
/var/tmp/pbench-test-utils/pbench/pbench.log

agent/util-scripts/pbench-clear-tools

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#!/bin/bash
2+
# -*- mode: shell-script; indent-tabs-mode: t; sh-basic-offset: 8; sh-indentation: 8; sh-indent-for-case-alt: + -*-
23

34
script_path=`dirname $0`
45
script_name=`basename $0`
@@ -58,6 +59,8 @@ pushd $pbench_run >/dev/null
5859
# this tool group's directory which stores options, etc.
5960
tool_group_dir="tools-$group"
6061
if [ -d "$tool_group_dir" ]; then
62+
typeset -i nerrs=0
63+
ssh_opts="$ssh_opts -o ConnectTimeout=1"
6164
for this_tool_file in `/bin/ls $tool_group_dir`; do
6265
if [ "$this_tool_file" == "label" ]; then
6366
continue;
@@ -72,6 +75,17 @@ if [ -d "$tool_group_dir" ]; then
7275
remote_hostname=`echo $this_tool_file | awk -F@ '{print $2}'`
7376
echo running ". ${pbench_install_dir}/profile; pbench-clear-tools $group_opt $name_opt" on $remote_hostname
7477
ssh $ssh_opts -n $remote_hostname ". ${pbench_install_dir}/profile; pbench-clear-tools $group_opt $name_opt" | sed -e 's/\(.*\)/['$remote_hostname']\1/g'
78+
rc=$?
79+
if [ $rc != 0 ] ;then
80+
nerrs=$nerrs+1
81+
# we can't get to the remote but if the intent is to clear all tools
82+
# then we remove the local entry for the remote
83+
if [ -z "$name" ] ;then
84+
echo "Removing the remote $this_tool_file"
85+
/bin/rm -f "$tool_group_dir/$this_tool_file"
86+
fi
87+
continue
88+
fi
7589
remaining_remote_tools=`ssh $ssh_opts -n $remote_hostname ". ${pbench_install_dir}/profile; pbench-list-tools $group_opt"`
7690
if [ -z "$remaining_remote_tools" ]; then
7791
echo "The remote host $remote_hostname no longer has tools registered for $group group, so the "remote@$remote" entry in the local $tool_group_dir directory will be removed"
@@ -96,3 +110,4 @@ if [ -d "$tool_group_dir" ]; then
96110
fi
97111
fi
98112
popd >/dev/null
113+
exit $nerrs

agent/util-scripts/pbench-postprocess-tools

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ if [ $? -ne 0 ]; then
4040
printf -- "\t will store and process data\n"
4141
printf "\n"
4242
printf -- "\t-i str --iteration=num, num = a number representing the\n"
43-
printf -- "\t iteration data was collected for"
43+
printf -- "\t iteration data was collected for\n"
4444
exit 1
4545
fi
4646
eval set -- "$opts";
@@ -74,6 +74,7 @@ while true; do
7474
esac
7575
done
7676

77+
typeset -i nerrs=0
7778
iteration_num=`echo $iteration | awk -F- '{print $1}'`
7879
# this tool group's directory which stores options, etc.
7980
if [ -d "$pbench_run/tools-$group" ]; then
@@ -92,6 +93,13 @@ fi
9293
tool_output_dir="$dir/tools-$group"
9394
mkdir -p $tool_output_dir
9495

96+
# Try to prevent a cascade of tools running.
97+
if [ "$action" == "start" ] ;then
98+
# Kill any tools running from a previous incantation before
99+
# starting this one.
100+
pbench-kill-tools --group $group
101+
fi
102+
95103
function move_tool_data {
96104
local remote_host=$1
97105
local remote_label=$2
@@ -100,6 +108,12 @@ function move_tool_data {
100108
tool_data_size=`ssh $ssh_opts -n $remote_host du -sm $tool_output_dir | awk '{print $1}'`
101109
debug_log "[$script_name]started: copying tool data ($tool_data_size MB) from $remote_host"
102110
ssh $ssh_opts -n $remote_host "cd $tool_output_dir && tar cf - *" | tar mxf -
111+
rc=$?
112+
if [ $rc != 0 ] ;then
113+
debug_log "[$script_name]copying tool data failed for remote $remote_host"
114+
popd > /dev/null
115+
return $rc
116+
fi
103117
# if the full hostname was used in pbench-register-tool --remote, make sure that is preserved in the directory name
104118
if [ $remote_host != "$remote_shost" ]; then
105119
if [ -e "$remote_label:$remote_shost" ]; then
@@ -112,11 +126,14 @@ function move_tool_data {
112126
debug_log "[$script_name]completed: copying of tool data on $remote_host"
113127
debug_log "[$script_name]started: deleting tool data on $remote_host"
114128
ssh $ssh_opts -n $remote_host "cd $tool_output_dir && /bin/rm -rf *"
129+
rc=$?
115130
debug_log "[$script_name]completed: deleting tool data on $remote_host"
116131
popd >/dev/null
132+
return $rc
117133
}
118134

119135
### phase 1: for each tool, call the tool script with --$action (start, stop, or postprocess)
136+
pids=""
120137
for this_tool_file in `/bin/ls $tool_group_dir`; do
121138
if [ "$this_tool_file" == "label" ]; then
122139
continue;
@@ -128,6 +145,7 @@ for this_tool_file in `/bin/ls $tool_group_dir`; do
128145
# tool options are stored on the remote host's tool file, so no need to pass it here
129146
debug_log "[$script_name]running this tool on $remote: ssh $ssh_opts -n $remote pbench-$action-tools --iteration=$iteration --group=$group --dir=$dir"
130147
ssh $ssh_opts -n $remote pbench-$action-tools --iteration=$iteration --group=$group --dir=$dir &
148+
pids="$pids $!"
131149
else
132150
# tool is local
133151
# assemble the tool options in to an array
@@ -159,12 +177,19 @@ for this_tool_file in `/bin/ls $tool_group_dir`; do
159177
fi
160178
else
161179
$pbench_bin/tool-scripts/$name --$action --iteration=$iteration --group=$group --dir=$dir "${tool_opts[@]}" &
180+
pids="$pids $!"
162181
fi
163182
fi
164183

165184
fi
166185
done
167-
wait
186+
for p in $pids ;do
187+
wait $p
188+
rc=$?
189+
if [[ $rc -ne 0 ]] ;then
190+
nerrs=$nerrs+1
191+
fi
192+
done
168193
if [ "$action" == "postprocess" ]; then
169194
# phase 2: now that the local results are ready, move them
170195
# down to $tool_output_dir/[$label:]$hostname.
@@ -185,12 +210,21 @@ if [ "$action" == "postprocess" ]; then
185210

186211
### phase 3: copy over data from remote hosts
187212
# for the remote tools, copy over the postprocess data
213+
pids=""
188214
for this_tool_file in `/bin/ls $tool_group_dir | grep "^remote"`; do
189215
remote_hostname=`echo "$this_tool_file" | awk -F@ '{print $2}'`
190216
label=`cat $tool_group_dir/$this_tool_file`
191217
# copy over the data from postprocessing
192218
move_tool_data $remote_hostname $label &
219+
pids="$pids $!"
220+
done
221+
for p in $pids ;do
222+
wait $p
223+
rc=$?
224+
if [[ $rc -ne 0 ]] ;then
225+
nerrs=$nerrs+1
226+
fi
193227
done
194-
wait
195228
fi
196229
debug_log "[$script_name]completed: $@"
230+
exit $nerrs

agent/util-scripts/pbench-register-tool

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,3 +179,4 @@ else # register this tool on the remote host
179179
echo "$label" >"$this_tool_file"
180180
fi
181181
fi
182+
exit $rc

0 commit comments

Comments
 (0)