Skip to content

Commit 9b287e9

Browse files
committed
Make printf on node rank 0 the default for all bench apps
1 parent 6b2396b commit 9b287e9

File tree

5 files changed

+39
-19
lines changed

5 files changed

+39
-19
lines changed

apps/bench1.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ int main(int argc, char *argv[]) {
197197

198198
if (myrank == 0) {
199199
double send_BW = total_size_bytes / timer[0] / 1024 / 1024;
200-
#if false
200+
#if true
201201
printf("%i, %i, %i, %.2f, %.2f, %.2f, %.2f\n",
202202
conf.num_tasks,
203203
conf.num_threads,
@@ -208,13 +208,15 @@ int main(int argc, char *argv[]) {
208208
send_BW);
209209
#endif
210210
} else {
211+
#if false
211212
double recv_BW = total_size_bytes / timer[1] / 1024 / 1024;
212213
printf("%i, %i, %i, %.2f, %.2f, %.2f, %.2f\n",
213214
conf.num_tasks,
214215
conf.num_threads,
215216
conf.num_partitions,
216217
((double)patition_size_bytes) / 1024,
217218
((double)total_size_bytes) / 1024, timer[1] /*rank1*/, recv_BW);
219+
#endif
218220
}
219221

220222
MPI_Barrier(MPI_COMM_WORLD);

apps/bench2.cpp

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,21 @@
1111
//@HEADER
1212
*/
1313

14+
/*
15+
Similar to bench1 but delays n-1 tasks by an equal ammount of time
16+
*/
17+
1418
#include "mpi.h"
1519
#include <cstdio>
1620
#include <partix.h>
1721
#include <stdlib.h>
1822
#include <unistd.h>
1923

20-
#define DEFAULT_ITERS 5
24+
#define DEFAULT_ITERS 10
2125
#define DATA_TYPE MPI_DOUBLE
2226
#define USE_PARRIVED
2327

24-
#define ALL_TASKS_SLEEP
28+
//#define ALL_TASKS_SLEEP_SAME_AMMOUNT
2529
#define DEFAULT_RECV_SEND_PARTITION_RATIO 1
2630

2731
double timer[3] = {0.0, 0.0, 0.0};
@@ -64,15 +68,15 @@ void send_task(partix_task_args_t *args) {
6468
send_task_args_t *task_args = (send_task_args_t *)args->user_task_args;
6569

6670
// First partition completion is delayed by sleep_time_ms
67-
#ifdef ALL_TASKS_SLEEP
68-
size_t sleep_time_ms = global_conf->overlap_duration;
69-
usleep(sleep_time_ms * 1000);
71+
size_t sleep_time_ms = 0;
72+
#ifdef ALL_TASKS_SLEEP_SAME_AMMOUNT
73+
sleep_time_ms = global_conf->overlap_duration;
7074
#else
7175
if (task_args->partition_id == 0) {
72-
size_t sleep_time_ms = global_conf->overlap_duration;
73-
usleep(sleep_time_ms * 1000);
76+
sleep_time_ms = global_conf->overlap_duration;
7477
}
7578
#endif
79+
usleep(sleep_time_ms * 1000);
7680
MPI_Pready(task_args->partition_id, *task_args->request);
7781
}
7882

@@ -202,6 +206,11 @@ int main(int argc, char *argv[]) {
202206
}
203207
}
204208

209+
// Measure perceived BW, that is communication as it were in the critical
210+
// path, by subtracting overlap
211+
timer[0] -= iterations * (float)global_conf->overlap_duration / 1000;
212+
timer[1] -= iterations * (float)global_conf->overlap_duration / 1000;
213+
205214
timer[0] /= iterations;
206215
timer[1] /= iterations;
207216

@@ -211,22 +220,22 @@ int main(int argc, char *argv[]) {
211220
if (myrank == 0) {
212221
double send_BW = total_size_bytes / timer[0] / 1024 / 1024;
213222
#if true
214-
printf("%i, %i, %i, %.1f, %.2f, %.2f, %.2f, %.2f\n",
223+
printf("%i, %i, %i, %.3f, %.2f, %.2f, %.2f, %.2f\n",
215224
conf.num_tasks,
216225
conf.num_threads,
217226
conf.num_partitions,
218-
(float)global_conf->overlap_duration,
227+
(float)global_conf->overlap_duration / 1000.0,
219228
((double)patition_size_bytes) / 1024,
220229
((double)total_size_bytes) / 1024, timer[0] /*rank0*/, send_BW);
221230
#endif
222231
} else {
223232
#if false
224233
double recv_BW = total_size_bytes / timer[1] / 1024 / 1024;
225-
printf("%i, %i, %i, %.1f, %.2f, %.2f, %.2f, %.2f\n",
234+
printf("%i, %i, %i, %.3f, %.2f, %.2f, %.2f, %.2f\n",
226235
conf.num_tasks,
227236
conf.num_threads,
228237
conf.num_partitions,
229-
(float)global_conf->overlap_duration,
238+
(float)global_conf->overlap_duration / 1000.0,
230239
((double)patition_size_bytes) / 1024,
231240
((double)total_size_bytes) / 1024,
232241
timer[1] /*rank1*/,

apps/bench3.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
//@HEADER
1212
*/
1313

14+
/*
15+
Similar to bench2 but delays n-1 tasks by a random ammount of time
16+
*/
17+
1418
#include "mpi.h"
1519
#include <cstdio>
1620
#include <partix.h>
@@ -207,6 +211,11 @@ int main(int argc, char *argv[]) {
207211
}
208212
}
209213

214+
// Measure perceived BW, that is communication as it were in the critical
215+
// path, by subtracting overlap
216+
timer[0] -= iterations * (float)global_conf->overlap_duration / 1000;
217+
timer[1] -= iterations * (float)global_conf->overlap_duration / 1000;
218+
210219
timer[0] /= iterations;
211220
timer[1] /= iterations;
212221

@@ -216,21 +225,21 @@ int main(int argc, char *argv[]) {
216225
if (myrank == 0) {
217226
double send_BW = total_size_bytes / timer[0] / 1024 / 1024;
218227
#if true
219-
printf("%i, %i, %i, %.1f, %.2f, %.2f, %.2f, %.2f\n", conf.num_tasks,
228+
printf("%i, %i, %i, %.3f, %.2f, %.2f, %.2f, %.2f\n", conf.num_tasks,
220229
conf.num_threads,
221230
conf.num_partitions,
222-
(float)global_conf->overlap_duration,
231+
(float)global_conf->overlap_duration / 1000.0,
223232
((double)patition_size_bytes) / 1024,
224233
((double)total_size_bytes) / 1024, timer[0] /*rank0*/, send_BW);
225234
#endif
226235
} else {
227236
#if false
228237
double recv_BW = total_size_bytes / timer[1] / 1024 / 1024;
229-
printf("%i, %i, %i, %.1f, %.2f, %.2f, %.2f, %.2f\n",
238+
printf("%i, %i, %i, %.3f, %.2f, %.2f, %.2f, %.2f\n",
230239
conf.num_tasks,
231240
conf.num_threads,
232241
conf.num_partitions,
233-
(float)global_conf->overlap_duration,
242+
(float)global_conf->overlap_duration / 1000.0,
234243
((double)patition_size_bytes) / 1024,
235244
((double)total_size_bytes) / 1024,
236245
timer[1] /*rank1*/,

scripts/run_over_overlap.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ export QTHREAD_STACK_SIZE=8192
2323
export OMP_PROC_BIND=true
2424
export OMP_PLACES=cores
2525

26-
overlap_default=0 #msec
26+
overlap_default=1 #msec
2727

2828
FLAGS="--bind-to core --rank-by core"
2929
PRELOAD="-x LD_PRELOAD=/home/projects/x86-64/gcc/10.2.0/lib64/libstdc++.so.6"
@@ -46,7 +46,7 @@ for threads in {1..9..1}; do
4646
$FLAGS $PRELOAD -x OMP_PLACES=cores -x OMP_NUM_THREADS=$num_threads \
4747
-x QTHREAD_STACK_SIZE=8196 -x OMP_PROC_BIND=true \
4848
$binary $num_tasks $num_threads $num_part $num_partlen $overlp
49-
if [[ $overlp -eq 0 ]]
49+
if [[ $overlp -eq 1 ]]
5050
then
5151
overlp=10
5252
fi

src/partix.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
#define OVERLAP_IN_MSEC_DEFAULT 100
3636

3737
/* Used add task duration divergence as a % of OVERLAP_IN_MSEC_DEFAULT */
38-
#define NOISE_IN_PERCENTAGE_OF_OVERLAP 30
38+
#define NOISE_IN_PERCENTAGE_OF_OVERLAP 0
3939

4040
#include <thread.h>
4141

0 commit comments

Comments
 (0)