Skip to content

Commit aa2d22a

Browse files
authored
Merge pull request #4915 from abouteiller/topic/fix-scaling.pl
Scaling.pl: Fix Srun options and wait for DVM launch
2 parents bfeeaf4 + b36e3cd commit aa2d22a

File tree

2 files changed

+55
-20
lines changed

2 files changed

+55
-20
lines changed

contrib/scaling/scaling.pl

Lines changed: 54 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
# Copyright (c) 2012 Los Alamos National Security, Inc.
44
# All rights reserved.
55
# Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
6+
# Copyright (c) 2017-2018 The University of Tennessee and The University
7+
# of Tennessee Research Foundation. All rights
8+
# reserved.
9+
610

711
use strict;
812
use Getopt::Long;
@@ -21,15 +25,16 @@
2125
my $rawoutput = 0;
2226
my $myresults = "myresults";
2327
my $ppn = 1;
28+
my $npmin = 1;
2429
my @csvrow;
2530

2631
my @tests = qw(/bin/true ./orte_no_op ./mpi_no_op ./mpi_no_op ./mpi_no_op);
2732
my @options = ("", "", "", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca async_mpi_init 1 -mca async_mpi_finalize 1");
2833
my @starterlist = qw(mpirun prun srun aprun);
2934
my @starteroptionlist = ("--novm",
3035
"",
31-
"--distribution=cyclic -N",
32-
"-N");
36+
"--distribution=cyclic",
37+
"");
3338

3439
# Set to true if the script should merely print the cmds
3540
# it would run, but don't run them
@@ -52,6 +57,7 @@
5257
"results=s" => \$myresults,
5358
"rawout" => \$rawoutput,
5459
"ppn=s" => \$ppn,
60+
"npmin=s" => \$npmin,
5561
) or die "unable to parse options, stopped";
5662

5763
if ($HELP) {
@@ -69,6 +75,7 @@
6975
--results=file File where results are to be stored in comma-separated value format
7076
--rawout Provide raw timing output to the file
7177
--ppn=n Run n procs/node
78+
--npmin=n Minimal number of nodes
7279
";
7380
exit(0);
7481
}
@@ -126,11 +133,11 @@
126133
push @starteroptions, $opt;
127134
} elsif ($useaprun && $starter eq "aprun") {
128135
push @starters, $starter;
129-
$opt = $starteroptionlist[$idx] . " " . $ppn;
136+
$opt = $starteroptionlist[$idx] . " -N " . $ppn;
130137
push @starteroptions, $opt;
131138
} elsif ($usesrun && $starter eq "srun") {
132139
push @starters, $starter;
133-
$opt = $starteroptionlist[$idx] . " " . $ppn;
140+
$opt = $starteroptionlist[$idx] . " --ntasks-per-node " . $ppn;
134141
push @starteroptions, $opt;
135142
}
136143
}
@@ -184,10 +191,21 @@
184191

185192
sub runcmd()
186193
{
194+
my $rc;
187195
for (1..$reps) {
188196
$output = `$cmd`;
197+
# Check the error code of the command; if the error code is alright
198+
# just add a 0 in front of the number to neutraly mark the success;
199+
# If the code is not correct, add a ! in front of the number to mark
200+
# it invalid.
201+
if($? != 0) {
202+
$rc = "0";
203+
}
204+
else {
205+
$rc = "!";
206+
}
189207
if ($myresults && $rawoutput) {
190-
print FILE $n . " " . $output . "\n";
208+
print FILE $n . " " . $output . " $rc\n";
191209
}
192210
@lines = split(/\n/, $output);
193211
foreach $line (@lines) {
@@ -209,14 +227,14 @@ ()
209227
if (0 == $strloc) {
210228
if (0 == $idx) {
211229
# it must be in the next location
212-
push @csvrow,$results[1];
230+
push @csvrow,join $rc,$results[1];
213231
} else {
214232
# it must be in the prior location
215-
push @csvrow,$results[$idx-1];
233+
push @csvrow,join $rc,$results[$idx-1];
216234
}
217235
} else {
218236
# take the portion of the string up to the tag
219-
push @csvrow,substr($res, 0, $strloc);
237+
push @csvrow,join $rc,substr($res, 0, $strloc);
220238
}
221239
} else {
222240
$strloc = index($res, "elapsed");
@@ -227,14 +245,14 @@ ()
227245
if (0 == $strloc) {
228246
if (0 == $idx) {
229247
# it must be in the next location
230-
push @csvrow,$results[1];
248+
push @csvrow,join $rc,$results[1];
231249
} else {
232250
# it must be in the prior location
233-
push @csvrow,$results[$idx-1];
251+
push @csvrow,join $rc,$results[$idx-1];
234252
}
235253
} else {
236254
# take the portion of the string up to the tag
237-
push @csvrow,substr($res, 0, $strloc);
255+
push @csvrow,join $rc,substr($res, 0, $strloc);
238256
}
239257
}
240258
}
@@ -263,20 +281,28 @@ ()
263281
}
264282

265283
foreach $starter (@starters) {
284+
my $dvmout;
266285
print "STARTER: $starter\n";
267286
# if we are going to use the dvm, then we
268287
if ($starter eq "prun") {
269288
# need to start it
270-
$cmd = "orte-dvm -mca pmix_system_server 1 2>&1 &";
289+
$cmd = "orte-dvm -mca pmix_system_server 1";
290+
print "##DVM: Launching $cmd\n";
271291
if ($myresults) {
272292
print FILE "\n\n$cmd\n";
273293
}
274294
if (!$SHOWME) {
275-
system($cmd);
276-
$havedvm = 1;
295+
$havedvm = open($dvmout, $cmd."|") or die "##DVM: Spawn error $!\n";
296+
print "##DVM: pid=$havedvm\n";
297+
# Wait that the dvm reports that it is ready
298+
my $waitready = <$dvmout>;
299+
if($waitready =~ /DVM ready/i) {
300+
print "##DVM: $waitready\n";
301+
}
302+
else {
303+
die "##DVM: error: $waitready\n";
304+
}
277305
}
278-
# give it a couple of seconds to start
279-
sleep 2;
280306
}
281307

282308
if ($myresults) {
@@ -285,6 +311,13 @@ ()
285311
my $testnum = 0;
286312
foreach $test (@tests) {
287313
$option = $options[$testnum];
314+
if ($starter eq "aprun") {
315+
$option =~ s/-mca\s+(\S+)\s+(\S+)/-e OMPI_MCA_$1=$2/g;
316+
}
317+
if ($starter eq "srun") {
318+
$option =~ s/-mca\s+(\S+)\s+(\S+)\s*/OMPI_MCA_$1=$2,/g;
319+
$option =~ s/\s*(OMPI_MCA\S+)/ --export=$1ALL/g;
320+
}
288321
if (-e $test) {
289322
if ($myresults) {
290323
print FILE "#nodes,$test,$option\n";
@@ -294,11 +327,12 @@ ()
294327
$cmd = $starter . $starteroptions[$index] . " $test 2>&1";
295328
system($cmd);
296329
}
297-
$n = 1;
330+
$n = $npmin;
298331
while ($n <= $num_nodes) {
299332
push @csvrow,$n;
300-
if ($starter eq "prun" or $starter eq "mpirun") {
301-
$cmd = "time " . $starter . " " . $starteroptions[$index] . " $option -n $n $test 2>&1";
333+
if ($starter eq "prun" or $starter eq "mpirun" or $starter eq "aprun") {
334+
my $np = $n * $ppn;
335+
$cmd = "time " . $starter . " " . $starteroptions[$index] . " $option -n $np $test 2>&1";
302336
} else {
303337
$cmd = "time " . $starter . " " . $starteroptions[$index] . " $option -N $n $test 2>&1";
304338
}
@@ -326,6 +360,7 @@ ()
326360
if (!$SHOWME) {
327361
$cmd = "prun --terminate";
328362
system($cmd);
363+
waitpid($havedvm, 0);
329364
}
330365
}
331366
$index = $index + 1;

orte/mca/state/dvm/state_dvm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,7 @@ static void vm_ready(int fd, short args, void *cbdata)
408408
OBJ_RELEASE(buf);
409409
}
410410
/* notify that the vm is ready */
411-
fprintf(stdout, "DVM ready\n");
411+
fprintf(stdout, "DVM ready\n"); fflush(stdout);
412412
OBJ_RELEASE(caddy);
413413
return;
414414
}

0 commit comments

Comments
 (0)