Skip to content

Commit e08e580

Browse files
authored
Merge pull request #4916 from abouteiller/topic/scaling.pl-m
Scaling.pl: Fix Srun options and wait for DVM launch
2 parents 7f4872d + 9e23d24 commit e08e580

File tree

2 files changed

+54
-26
lines changed

2 files changed

+54
-26
lines changed

contrib/scaling/scaling.pl

Lines changed: 53 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
# Copyright (c) 2012 Los Alamos National Security, Inc.
44
# All rights reserved.
55
# Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
6+
# Copyright (c) 2017-2018 The University of Tennessee and The University
7+
# of Tennessee Research Foundation. All rights
8+
# reserved.
9+
610

711
use strict;
812
use Getopt::Long;
@@ -21,6 +25,7 @@
2125
my $rawoutput = 0;
2226
my $myresults = "myresults";
2327
my $ppn = 1;
28+
my $npmin = 1;
2429
my @csvrow;
2530
my $multiplier = 1;
2631

@@ -29,8 +34,8 @@
2934
my @starterlist = qw(mpirun prun srun aprun);
3035
my @starteroptionlist = (" --novm --timeout 600",
3136
" --system-server-only",
32-
" --distribution=cyclic --ntasks-per-node=",
33-
" -N");
37+
" --distribution=cyclic",
38+
"");
3439

3540
# Set to true if the script should merely print the cmds
3641
# it would run, but don't run them
@@ -54,6 +59,7 @@
5459
"rawout" => \$rawoutput,
5560
"ppn=s" => \$ppn,
5661
"multiplier=s" => \$multiplier,
62+
"npmin=s" => \$npmin,
5763
) or die "unable to parse options, stopped";
5864

5965
if ($HELP) {
@@ -72,6 +78,7 @@
7278
--rawout Provide raw timing output to the file
7379
--ppn=n Run n procs/node
7480
--multiplier=n Run n daemons/node (only for DVM and mpirun)
81+
--npmin=n Minimal number of nodes
7582
";
7683
exit(0);
7784
}
@@ -133,11 +140,11 @@
133140
push @starteroptions, $opt;
134141
} elsif ($useaprun && $starter eq "aprun") {
135142
push @starters, $starter;
136-
$opt = $starteroptionlist[$idx] . " " . $ppn;
143+
$opt = $starteroptionlist[$idx] . " -N " . $ppn;
137144
push @starteroptions, $opt;
138145
} elsif ($usesrun && $starter eq "srun") {
139146
push @starters, $starter;
140-
$opt = $starteroptionlist[$idx] . $ppn;
147+
$opt = $starteroptionlist[$idx] . " --ntasks-per-node " . $ppn;
141148
push @starteroptions, $opt;
142149
}
143150
}
@@ -191,10 +198,21 @@
191198

192199
sub runcmd()
193200
{
201+
my $rc;
194202
for (1..$reps) {
195203
$output = `$cmd`;
204+
# Check the error code of the command; if the error code is alright
205+
# just add a 0 in front of the number to neutraly mark the success;
206+
# If the code is not correct, add a ! in front of the number to mark
207+
# it invalid.
208+
if($? != 0) {
209+
$rc = "0";
210+
}
211+
else {
212+
$rc = "!";
213+
}
196214
if ($myresults && $rawoutput) {
197-
print FILE $n . " " . $output . "\n";
215+
print FILE $n . " " . $output . " $rc\n";
198216
}
199217
@lines = split(/\n/, $output);
200218
foreach $line (@lines) {
@@ -216,14 +234,14 @@ ()
216234
if (0 == $strloc) {
217235
if (0 == $idx) {
218236
# it must be in the next location
219-
push @csvrow,$results[1];
237+
push @csvrow,join $rc,$results[1];
220238
} else {
221239
# it must be in the prior location
222-
push @csvrow,$results[$idx-1];
240+
push @csvrow,join $rc,$results[$idx-1];
223241
}
224242
} else {
225243
# take the portion of the string up to the tag
226-
push @csvrow,substr($res, 0, $strloc);
244+
push @csvrow,join $rc,substr($res, 0, $strloc);
227245
}
228246
} else {
229247
$strloc = index($res, "elapsed");
@@ -234,14 +252,14 @@ ()
234252
if (0 == $strloc) {
235253
if (0 == $idx) {
236254
# it must be in the next location
237-
push @csvrow,$results[1];
255+
push @csvrow,join $rc,$results[1];
238256
} else {
239257
# it must be in the prior location
240-
push @csvrow,$results[$idx-1];
258+
push @csvrow,join $rc,$results[$idx-1];
241259
}
242260
} else {
243261
# take the portion of the string up to the tag
244-
push @csvrow,substr($res, 0, $strloc);
262+
push @csvrow,join $rc,substr($res, 0, $strloc);
245263
}
246264
}
247265
}
@@ -270,6 +288,7 @@ ()
270288
}
271289

272290
foreach $starter (@starters) {
291+
my $dvmout;
273292
print "STARTER: $starter\n";
274293
# if we are going to use the dvm, then we
275294
if ($starter eq "prun") {
@@ -278,21 +297,22 @@ ()
278297
$dvm = $dvm . " --mca rtc ^hwloc --mca ras_base_multiplier " . $multiplier;
279298
}
280299
# need to start it
300+
print "##DVM: Launching $dvm\n";
281301
if ($myresults) {
282302
print FILE "\n\n$dvm\n";
283303
}
284304
if (!$SHOWME) {
285-
unless ($pid = fork) {
286-
unless (fork) {
287-
exec "$dvm 2>&1";
288-
die "no exec";
289-
}
290-
exit 0;
305+
$havedvm = open($dvmout, $dvm."|") or die "##DVM: Spawn error $!\n";
306+
print "##DVM: pid=$havedvm\n";
307+
# Wait that the dvm reports that it is ready
308+
my $waitready = <$dvmout>;
309+
if($waitready =~ /DVM ready/i) {
310+
print "##DVM: $waitready\n";
311+
}
312+
else {
313+
die "##DVM: error: $waitready\n";
291314
}
292-
$havedvm = 1;
293315
}
294-
# give it a couple of seconds to start
295-
sleep 2;
296316
} else {
297317
if ($myresults) {
298318
print FILE "\n\n";
@@ -305,6 +325,13 @@ ()
305325
my $testnum = 0;
306326
foreach $test (@tests) {
307327
$option = $options[$testnum];
328+
if ($starter eq "aprun") {
329+
$option =~ s/-mca\s+(\S+)\s+(\S+)/-e OMPI_MCA_$1=$2/g;
330+
}
331+
if ($starter eq "srun") {
332+
$option =~ s/-mca\s+(\S+)\s+(\S+)\s*/OMPI_MCA_$1=$2,/g;
333+
$option =~ s/\s*(OMPI_MCA\S+)/ --export=$1ALL/g;
334+
}
308335
if (-e $test) {
309336
if ($myresults) {
310337
print FILE "#nodes,$test,$option\n";
@@ -322,11 +349,12 @@ ()
322349
}
323350
}
324351
}
325-
$n = 1;
352+
$n = $npmin;
326353
while ($n <= $num_nodes) {
327354
push @csvrow,$n;
328-
if ($starter eq "prun" or $starter eq "mpirun") {
329-
$cmd = "time " . $starter . " " . $starteroptions[$index] . " $option -n $n $test 2>&1";
355+
if ($starter eq "prun" or $starter eq "mpirun" or $starter eq "aprun") {
356+
my $np = $n * $ppn;
357+
$cmd = "time " . $starter . " " . $starteroptions[$index] . " $option -n $np $test 2>&1";
330358
} else {
331359
$cmd = "time " . $starter . " " . $starteroptions[$index] . " $option -N $n $test 2>&1";
332360
}
@@ -358,8 +386,8 @@ ()
358386
if ($havedvm) {
359387
if (!$SHOWME) {
360388
$cmd = "prun --system-server-only --terminate";
361-
my $rc = `$cmd`;
362-
waitpid($pid, 0);
389+
system($cmd);
390+
waitpid($havedvm, 0);
363391
}
364392
$havedvm = 0;
365393
}

orte/mca/state/dvm/state_dvm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,7 @@ static void vm_ready(int fd, short args, void *cbdata)
410410
OBJ_RELEASE(buf);
411411
}
412412
/* notify that the vm is ready */
413-
fprintf(stdout, "DVM ready\n");
413+
fprintf(stdout, "DVM ready\n"); fflush(stdout);
414414
OBJ_RELEASE(caddy);
415415
return;
416416
}

0 commit comments

Comments
 (0)