@@ -40,7 +40,7 @@ if [ $? -ne 0 ]; then
4040 printf -- " \t will store and process data\n"
4141 printf " \n"
4242 printf -- " \t-i str --iteration=num, num = a number representing the\n"
43- printf -- " \t iteration data was collected for"
43+ printf -- " \t iteration data was collected for\n "
4444 exit 1
4545fi
4646eval set -- " $opts " ;
@@ -74,6 +74,7 @@ while true; do
7474 esac
7575done
7676
77+ typeset -i nerrs=0
7778iteration_num=` echo $iteration | awk -F- ' {print $1}' `
7879# this tool group's directory which stores options, etc.
7980if [ -d " $pbench_run /tools-$group " ]; then
9293tool_output_dir=" $dir /tools-$group "
9394mkdir -p $tool_output_dir
9495
96+ # Try to prevent a cascade of tools running.
97+ if [ " $action " == " start" ] ; then
98+ # Kill any tools running from a previous incantation before
99+ # starting this one.
100+ pbench-kill-tools --group $group
101+ fi
102+
95103function move_tool_data {
96104 local remote_host=$1
97105 local remote_label=$2
@@ -100,6 +108,12 @@ function move_tool_data {
100108 tool_data_size=` ssh $ssh_opts -n $remote_host du -sm $tool_output_dir | awk ' {print $1}' `
101109 debug_log " [$script_name ]started: copying tool data ($tool_data_size MB) from $remote_host "
102110 ssh $ssh_opts -n $remote_host " cd $tool_output_dir && tar cf - *" | tar mxf -
111+ rc=$?
112+ if [ $rc != 0 ] ; then
113+ debug_log " [$script_name ]copying tool data failed for remote $remote_host "
114+ popd > /dev/null
115+ return $rc
116+ fi
103117 # if the full hostname was used in pbench-register-tool --remote, make sure that is preserved in the directory name
104118 if [ $remote_host != " $remote_shost " ]; then
105119 if [ -e " $remote_label :$remote_shost " ]; then
@@ -112,11 +126,14 @@ function move_tool_data {
112126 debug_log " [$script_name ]completed: copying of tool data on $remote_host "
113127 debug_log " [$script_name ]started: deleting tool data on $remote_host "
114128 ssh $ssh_opts -n $remote_host " cd $tool_output_dir && /bin/rm -rf *"
129+ rc=$?
115130 debug_log " [$script_name ]completed: deleting tool data on $remote_host "
116131 popd > /dev/null
132+ return $rc
117133}
118134
119135# ## phase 1: for each tool, call the tool script with --$action (start, stop, or postprocess)
136+ pids=" "
120137for this_tool_file in ` /bin/ls $tool_group_dir ` ; do
121138 if [ " $this_tool_file " == " label" ]; then
122139 continue ;
@@ -128,6 +145,7 @@ for this_tool_file in `/bin/ls $tool_group_dir`; do
128145 # tool options are stored on the remote host's tool file, so no need to pass it here
129146 debug_log " [$script_name ]running this tool on $remote : ssh $ssh_opts -n $remote pbench-$action -tools --iteration=$iteration --group=$group --dir=$dir "
130147 ssh $ssh_opts -n $remote pbench-$action -tools --iteration=$iteration --group=$group --dir=$dir &
148+ pids=" $pids $! "
131149 else
132150 # tool is local
133151 # assemble the tool options in to an array
@@ -159,12 +177,19 @@ for this_tool_file in `/bin/ls $tool_group_dir`; do
159177 fi
160178 else
161179 $pbench_bin /tool-scripts/$name --$action --iteration=$iteration --group=$group --dir=$dir " ${tool_opts[@]} " &
180+ pids=" $pids $! "
162181 fi
163182 fi
164183
165184 fi
166185done
167- wait
186+ for p in $pids ; do
187+ wait $p
188+ rc=$?
189+ if [[ $rc -ne 0 ]] ; then
190+ nerrs=$nerrs +1
191+ fi
192+ done
168193if [ " $action " == " postprocess" ]; then
169194 # phase 2: now that the local results are ready, move them
170195 # down to $tool_output_dir/[$label:]$hostname.
@@ -185,12 +210,21 @@ if [ "$action" == "postprocess" ]; then
185210
186211 # ## phase 3: copy over data from remote hosts
187212 # for the remote tools, copy over the postprocess data
213+ pids=" "
188214 for this_tool_file in ` /bin/ls $tool_group_dir | grep " ^remote" ` ; do
189215 remote_hostname=` echo " $this_tool_file " | awk -F@ ' {print $2}' `
190216 label=` cat $tool_group_dir /$this_tool_file `
191217 # copy over the data from postprocessing
192218 move_tool_data $remote_hostname $label &
219+ pids=" $pids $! "
220+ done
221+ for p in $pids ; do
222+ wait $p
223+ rc=$?
224+ if [[ $rc -ne 0 ]] ; then
225+ nerrs=$nerrs +1
226+ fi
193227 done
194- wait
195228fi
196229debug_log " [$script_name ]completed: $@ "
230+ exit $nerrs
0 commit comments