|
3 | 3 | # Copyright (c) 2012 Los Alamos National Security, Inc. |
4 | 4 | # All rights reserved. |
5 | 5 | # Copyright (c) 2015-2016 Intel, Inc. All rights reserved. |
| 6 | +# Copyright (c) 2017-2018 The University of Tennessee and The University |
| 7 | +# of Tennessee Research Foundation. All rights |
| 8 | +# reserved. |
| 9 | + |
6 | 10 |
|
7 | 11 | use strict; |
8 | 12 | use Getopt::Long; |
|
21 | 25 | my $rawoutput = 0; |
22 | 26 | my $myresults = "myresults"; |
23 | 27 | my $ppn = 1; |
| 28 | +my $npmin = 1; |
24 | 29 | my @csvrow; |
25 | 30 | my $multiplier = 1; |
26 | 31 |
|
|
29 | 34 | my @starterlist = qw(mpirun prun srun aprun); |
30 | 35 | my @starteroptionlist = (" --novm --timeout 600", |
31 | 36 | " --system-server-only", |
32 | | - " --distribution=cyclic --ntasks-per-node=", |
33 | | - " -N"); |
| 37 | + " --distribution=cyclic", |
| 38 | + ""); |
34 | 39 |
|
35 | 40 | # Set to true if the script should merely print the cmds |
36 | 41 | # it would run, but don't run them |
|
54 | 59 | "rawout" => \$rawoutput, |
55 | 60 | "ppn=s" => \$ppn, |
56 | 61 | "multiplier=s" => \$multiplier, |
| 62 | + "npmin=s" => \$npmin, |
57 | 63 | ) or die "unable to parse options, stopped"; |
58 | 64 |
|
59 | 65 | if ($HELP) { |
|
72 | 78 | --rawout Provide raw timing output to the file |
73 | 79 | --ppn=n Run n procs/node |
74 | 80 | --multiplier=n Run n daemons/node (only for DVM and mpirun) |
| 81 | +--npmin=n Minimal number of nodes |
75 | 82 | "; |
76 | 83 | exit(0); |
77 | 84 | } |
|
133 | 140 | push @starteroptions, $opt; |
134 | 141 | } elsif ($useaprun && $starter eq "aprun") { |
135 | 142 | push @starters, $starter; |
136 | | - $opt = $starteroptionlist[$idx] . " " . $ppn; |
| 143 | + $opt = $starteroptionlist[$idx] . " -N " . $ppn; |
137 | 144 | push @starteroptions, $opt; |
138 | 145 | } elsif ($usesrun && $starter eq "srun") { |
139 | 146 | push @starters, $starter; |
140 | | - $opt = $starteroptionlist[$idx] . $ppn; |
| 147 | + $opt = $starteroptionlist[$idx] . " --ntasks-per-node " . $ppn; |
141 | 148 | push @starteroptions, $opt; |
142 | 149 | } |
143 | 150 | } |
|
191 | 198 |
|
192 | 199 | sub runcmd() |
193 | 200 | { |
| 201 | + my $rc; |
194 | 202 | for (1..$reps) { |
195 | 203 | $output = `$cmd`; |
| 204 | + # Check the error code of the command; if the error code is alright |
| 205 | + # just add a 0 in front of the number to neutraly mark the success; |
| 206 | + # If the code is not correct, add a ! in front of the number to mark |
| 207 | + # it invalid. |
| 208 | + if($? != 0) { |
| 209 | + $rc = "0"; |
| 210 | + } |
| 211 | + else { |
| 212 | + $rc = "!"; |
| 213 | + } |
196 | 214 | if ($myresults && $rawoutput) { |
197 | | - print FILE $n . " " . $output . "\n"; |
| 215 | + print FILE $n . " " . $output . " $rc\n"; |
198 | 216 | } |
199 | 217 | @lines = split(/\n/, $output); |
200 | 218 | foreach $line (@lines) { |
|
216 | 234 | if (0 == $strloc) { |
217 | 235 | if (0 == $idx) { |
218 | 236 | # it must be in the next location |
219 | | - push @csvrow,$results[1]; |
| 237 | + push @csvrow,join $rc,$results[1]; |
220 | 238 | } else { |
221 | 239 | # it must be in the prior location |
222 | | - push @csvrow,$results[$idx-1]; |
| 240 | + push @csvrow,join $rc,$results[$idx-1]; |
223 | 241 | } |
224 | 242 | } else { |
225 | 243 | # take the portion of the string up to the tag |
226 | | - push @csvrow,substr($res, 0, $strloc); |
| 244 | + push @csvrow,join $rc,substr($res, 0, $strloc); |
227 | 245 | } |
228 | 246 | } else { |
229 | 247 | $strloc = index($res, "elapsed"); |
|
234 | 252 | if (0 == $strloc) { |
235 | 253 | if (0 == $idx) { |
236 | 254 | # it must be in the next location |
237 | | - push @csvrow,$results[1]; |
| 255 | + push @csvrow,join $rc,$results[1]; |
238 | 256 | } else { |
239 | 257 | # it must be in the prior location |
240 | | - push @csvrow,$results[$idx-1]; |
| 258 | + push @csvrow,join $rc,$results[$idx-1]; |
241 | 259 | } |
242 | 260 | } else { |
243 | 261 | # take the portion of the string up to the tag |
244 | | - push @csvrow,substr($res, 0, $strloc); |
| 262 | + push @csvrow,join $rc,substr($res, 0, $strloc); |
245 | 263 | } |
246 | 264 | } |
247 | 265 | } |
|
270 | 288 | } |
271 | 289 |
|
272 | 290 | foreach $starter (@starters) { |
| 291 | + my $dvmout; |
273 | 292 | print "STARTER: $starter\n"; |
274 | 293 | # if we are going to use the dvm, then we |
275 | 294 | if ($starter eq "prun") { |
|
278 | 297 | $dvm = $dvm . " --mca rtc ^hwloc --mca ras_base_multiplier " . $multiplier; |
279 | 298 | } |
280 | 299 | # need to start it |
| 300 | + print "##DVM: Launching $dvm\n"; |
281 | 301 | if ($myresults) { |
282 | 302 | print FILE "\n\n$dvm\n"; |
283 | 303 | } |
284 | 304 | if (!$SHOWME) { |
285 | | - unless ($pid = fork) { |
286 | | - unless (fork) { |
287 | | - exec "$dvm 2>&1"; |
288 | | - die "no exec"; |
289 | | - } |
290 | | - exit 0; |
| 305 | + $havedvm = open($dvmout, $dvm."|") or die "##DVM: Spawn error $!\n"; |
| 306 | + print "##DVM: pid=$havedvm\n"; |
| 307 | + # Wait that the dvm reports that it is ready |
| 308 | + my $waitready = <$dvmout>; |
| 309 | + if($waitready =~ /DVM ready/i) { |
| 310 | + print "##DVM: $waitready\n"; |
| 311 | + } |
| 312 | + else { |
| 313 | + die "##DVM: error: $waitready\n"; |
291 | 314 | } |
292 | | - $havedvm = 1; |
293 | 315 | } |
294 | | - # give it a couple of seconds to start |
295 | | - sleep 2; |
296 | 316 | } else { |
297 | 317 | if ($myresults) { |
298 | 318 | print FILE "\n\n"; |
|
305 | 325 | my $testnum = 0; |
306 | 326 | foreach $test (@tests) { |
307 | 327 | $option = $options[$testnum]; |
| 328 | + if ($starter eq "aprun") { |
| 329 | + $option =~ s/-mca\s+(\S+)\s+(\S+)/-e OMPI_MCA_$1=$2/g; |
| 330 | + } |
| 331 | + if ($starter eq "srun") { |
| 332 | + $option =~ s/-mca\s+(\S+)\s+(\S+)\s*/OMPI_MCA_$1=$2,/g; |
| 333 | + $option =~ s/\s*(OMPI_MCA\S+)/ --export=$1ALL/g; |
| 334 | + } |
308 | 335 | if (-e $test) { |
309 | 336 | if ($myresults) { |
310 | 337 | print FILE "#nodes,$test,$option\n"; |
|
322 | 349 | } |
323 | 350 | } |
324 | 351 | } |
325 | | - $n = 1; |
| 352 | + $n = $npmin; |
326 | 353 | while ($n <= $num_nodes) { |
327 | 354 | push @csvrow,$n; |
328 | | - if ($starter eq "prun" or $starter eq "mpirun") { |
329 | | - $cmd = "time " . $starter . " " . $starteroptions[$index] . " $option -n $n $test 2>&1"; |
| 355 | + if ($starter eq "prun" or $starter eq "mpirun" or $starter eq "aprun") { |
| 356 | + my $np = $n * $ppn; |
| 357 | + $cmd = "time " . $starter . " " . $starteroptions[$index] . " $option -n $np $test 2>&1"; |
330 | 358 | } else { |
331 | 359 | $cmd = "time " . $starter . " " . $starteroptions[$index] . " $option -N $n $test 2>&1"; |
332 | 360 | } |
|
358 | 386 | if ($havedvm) { |
359 | 387 | if (!$SHOWME) { |
360 | 388 | $cmd = "prun --system-server-only --terminate"; |
361 | | - my $rc = `$cmd`; |
362 | | - waitpid($pid, 0); |
| 389 | + system($cmd); |
| 390 | + waitpid($havedvm, 0); |
363 | 391 | } |
364 | 392 | $havedvm = 0; |
365 | 393 | } |
|
0 commit comments