Skip to content

Commit 3796307

Browse files
committed
timings: added new timing points
Signed-off-by: Boris Karasev <[email protected]>
1 parent cc4bb43 commit 3796307

File tree

10 files changed

+176
-21
lines changed

10 files changed

+176
-21
lines changed

ompi/runtime/ompi_mpi_init.c

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
2121
* Copyright (c) 2014-2016 Research Organization for Information Science
2222
* and Technology (RIST). All rights reserved.
23-
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
23+
* Copyright (c) 2016-2018 Mellanox Technologies Ltd. All rights reserved.
2424
*
2525
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
2626
* $COPYRIGHT$
@@ -384,7 +384,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
384384
volatile bool active;
385385
bool background_fence = false;
386386

387-
OMPI_TIMING_INIT(32);
387+
OMPI_TIMING_INIT(64);
388388

389389
ompi_hook_base_mpi_init_top(argc, argv, requested, provided);
390390

@@ -423,6 +423,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
423423
error = "ompi_mpi_init: opal_init_util failed";
424424
goto error;
425425
}
426+
OMPI_TIMING_IMPORT_OPAL("opal_init_util");
426427

427428
/* If thread support was enabled, then setup OPAL to allow for them. This must be done
428429
* early to prevent a race condition that can occur with orte_init(). */
@@ -512,8 +513,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
512513
error = "ompi_mpi_init: ompi_rte_init failed";
513514
goto error;
514515
}
515-
516516
OMPI_TIMING_NEXT("rte_init");
517+
OMPI_TIMING_IMPORT_OPAL("orte_ess_base_app_setup");
518+
OMPI_TIMING_IMPORT_OPAL("rte_init");
517519

518520
ompi_rte_initialized = true;
519521

@@ -643,16 +645,23 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
643645
}
644646

645647
OMPI_TIMING_IMPORT_OPAL("orte_init");
646-
OMPI_TIMING_IMPORT_OPAL("opal_init_util");
647648
OMPI_TIMING_NEXT("rte_init-commit");
648649

649-
650650
/* exchange connection info - this function may also act as a barrier
651651
* if data exchange is required. The modex occurs solely across procs
652652
* in our job. If a barrier is required, the "modex" function will
653653
* perform it internally */
654654
opal_pmix.commit();
655655
OMPI_TIMING_NEXT("commit");
656+
#if (OPAL_ENABLE_TIMING)
657+
if (OMPI_TIMING_ENABLED && !opal_pmix_base_async_modex &&
658+
opal_pmix_collect_all_data) {
659+
opal_pmix.fence(NULL, 0);
660+
OMPI_TIMING_NEXT("pmix-barrier-1");
661+
opal_pmix.fence(NULL, 0);
662+
OMPI_TIMING_NEXT("pmix-barrier-2");
663+
}
664+
#endif
656665

657666
/* If we have a non-blocking fence:
658667
* if we are doing an async modex, but we are collecting all

ompi/util/timings.h

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved.
2+
* Copyright (c) 2017-2018 Mellanox Technologies Ltd. All rights reserved.
33
* Copyright (c) 2017 Intel, Inc. All rights reserved.
44
* $COPYRIGHT$
55
*
@@ -21,6 +21,7 @@ typedef struct {
2121
double ts;
2222
char *file;
2323
char *prefix;
24+
int imported;
2425
} ompi_timing_val_t;
2526

2627
typedef struct {
@@ -36,11 +37,15 @@ typedef struct ompi_timing_t {
3637
int cnt;
3738
int error;
3839
int enabled;
40+
int import_cnt;
3941
opal_timing_ts_func_t get_ts;
4042
ompi_timing_list_t *timing;
4143
ompi_timing_list_t *cur_timing;
4244
} ompi_timing_t;
4345

46+
#define OMPI_TIMING_ENABLED \
47+
(getenv("OMPI_TIMING_ENABLE") ? atoi(getenv("OMPI_TIMING_ENABLE")) : 0)
48+
4449
#define OMPI_TIMING_INIT(_size) \
4550
ompi_timing_t OMPI_TIMING; \
4651
OMPI_TIMING.prefix = __func__; \
@@ -50,6 +55,7 @@ typedef struct ompi_timing_t {
5055
OMPI_TIMING.error = 0; \
5156
OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \
5257
OMPI_TIMING.enabled = 0; \
58+
OMPI_TIMING.import_cnt = 0; \
5359
{ \
5460
char *ptr; \
5561
ptr = getenv("OMPI_TIMING_ENABLE"); \
@@ -94,7 +100,8 @@ typedef struct ompi_timing_t {
94100
#define OMPI_TIMING_NEXT(...) \
95101
do { \
96102
if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \
97-
char *f = strrchr(__FILE__, '/') + 1; \
103+
char *f = strrchr(__FILE__, '/'); \
104+
f = (f == NULL) ? strdup(__FILE__) : f+1; \
98105
int len = 0; \
99106
if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \
100107
OMPI_TIMING_ITEM_EXTEND; \
@@ -135,10 +142,13 @@ typedef struct ompi_timing_t {
135142
int cnt; \
136143
int i; \
137144
double ts; \
145+
OMPI_TIMING.import_cnt++; \
138146
OPAL_TIMING_ENV_CNT(func, cnt); \
139147
OPAL_TIMING_ENV_ERROR_PREFIX(_prefix, func, OMPI_TIMING.error); \
140148
for(i = 0; i < cnt; i++){ \
141149
char *desc, *filename; \
150+
OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].imported= \
151+
OMPI_TIMING.import_cnt; \
142152
OPAL_TIMING_ENV_GETDESC_PREFIX(_prefix, &filename, func, i, &desc, ts); \
143153
OMPI_TIMING_APPEND(filename, func, desc, ts); \
144154
} \
@@ -155,6 +165,7 @@ typedef struct ompi_timing_t {
155165
MPI_Comm_size(MPI_COMM_WORLD, &size); \
156166
MPI_Comm_rank(MPI_COMM_WORLD, &rank); \
157167
int error = 0; \
168+
int imported = 0; \
158169
\
159170
MPI_Reduce(&OMPI_TIMING.error, &error, 1, \
160171
MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); \
@@ -171,6 +182,7 @@ typedef struct ompi_timing_t {
171182
char **desc = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \
172183
char **prefix = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \
173184
char **file = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \
185+
double total_avg = 0, total_min = 0, total_max = 0; \
174186
\
175187
if( OMPI_TIMING.cnt > 0 ) { \
176188
OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \
@@ -193,21 +205,53 @@ typedef struct ompi_timing_t {
193205
timing = (ompi_timing_list_t*)timing->next; \
194206
} while (timing != NULL); \
195207
\
196-
if( 0 == rank ){ \
208+
if( 0 == rank ) { \
197209
if (OMPI_TIMING.timing->next) { \
198210
printf("==OMPI_TIMING== warning: added the extra timings allocation that might misrepresent the results.\n" \
199211
"==OMPI_TIMING== Increase the inited size of timings to avoid extra allocation during runtime.\n"); \
200212
} \
201213
\
202214
printf("------------------ %s ------------------\n", \
203-
OMPI_TIMING.prefix); \
215+
OMPI_TIMING.prefix); \
216+
imported = OMPI_TIMING.timing->val[0].imported; \
204217
for(i=0; i< OMPI_TIMING.cnt; i++){ \
218+
bool print_total = 0; \
219+
imported = OMPI_TIMING.timing->val[i].imported; \
205220
avg[i] /= size; \
206-
printf("[%s:%s:%s]: %lf / %lf / %lf\n", \
221+
printf("%s[%s:%s:%s]: %lf / %lf / %lf\n", \
222+
imported ? " -- " : "", \
207223
file[i], prefix[i], desc[i], avg[i], min[i], max[i]); \
224+
if (OMPI_TIMING.timing->val[i].imported) { \
225+
total_avg += avg[i]; \
226+
total_min += min[i]; \
227+
total_max += max[i]; \
228+
} \
229+
if (i == (OMPI_TIMING.cnt-1)) { \
230+
print_total = true; \
231+
} else { \
232+
print_total = imported != OMPI_TIMING.timing->val[i+1].imported; \
233+
} \
234+
if (print_total && OMPI_TIMING.timing->val[i].imported) { \
235+
printf("%s[%s:%s:%s]: %lf / %lf / %lf\n", \
236+
imported ? " !! " : "", \
237+
file[i], prefix[i], "total", \
238+
total_avg, total_min, total_max); \
239+
total_avg = 0; total_min = 0; total_max = 0; \
240+
} \
241+
} \
242+
total_avg = 0; total_min = 0; total_max = 0; \
243+
for(i=0; i< OMPI_TIMING.cnt; i++) { \
244+
if (!OMPI_TIMING.timing->val[i].imported) { \
245+
total_avg += avg[i]; \
246+
total_min += min[i]; \
247+
total_max += max[i]; \
248+
} \
208249
} \
250+
printf("[%s:total] %lf / %lf / %lf\n", \
251+
OMPI_TIMING.prefix, \
252+
total_avg, total_min, total_max); \
209253
printf("[%s:overhead]: %lf \n", OMPI_TIMING.prefix, \
210-
OMPI_TIMING.get_ts() - OMPI_TIMING.ts); \
254+
OMPI_TIMING.get_ts() - OMPI_TIMING.ts); \
211255
} \
212256
} \
213257
free(avg); \
@@ -233,6 +277,8 @@ typedef struct ompi_timing_t {
233277

234278
#define OMPI_TIMING_FINALIZE
235279

280+
#define OMPI_TIMING_ENABLED 0
281+
236282
#endif
237283

238284
#endif

opal/runtime/opal_init.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
* and Technology (RIST). All rights reserved.
2121
* Copyright (c) 2017 Amazon.com, Inc. or its affiliates.
2222
* All Rights reserved.
23+
* Copyright (c) 2018 Mellanox Technologies, Inc.
24+
* All rights reserved.
2325
* $COPYRIGHT$
2426
*
2527
* Additional copyrights may follow
@@ -479,12 +481,16 @@ opal_init_util(int* pargc, char*** pargv)
479481
goto return_error;
480482
}
481483

484+
OPAL_TIMING_ENV_NEXT(otmng, "opal_dss_open");
485+
482486
/* initialize the mca */
483487
if (OPAL_SUCCESS != (ret = mca_base_open())) {
484488
error = "mca_base_open";
485489
goto return_error;
486490
}
487491

492+
OPAL_TIMING_ENV_NEXT(otmng, "mca_base_open");
493+
488494
/* initialize if framework */
489495
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_if_base_framework, 0))) {
490496
fprintf(stderr, "opal_if_base_open() failed -- process will likely abort (%s:%d, returned %d instead of OPAL_SUCCESS)\n",

opal/util/timings.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* Copyright (C) 2014 Artem Polyakov <[email protected]>
33
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
4-
* Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved.
4+
* Copyright (c) 2017-2018 Mellanox Technologies Ltd. All rights reserved.
55
* $COPYRIGHT$
66
*
77
* Additional copyrights may follow
@@ -91,7 +91,7 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type);
9191
#define OPAL_TIMING_ENV_INIT_PREFIX(prefix, name) \
9292
do { \
9393
opal_timing_env_t name ## _val, *name = &(name ## _val); \
94-
*name = OPAL_TIMING_ENV_START_TYPE(__func__, OPAL_TIMING_AUTOMATIC_TIMER, prefix); \
94+
*name = OPAL_TIMING_ENV_START_TYPE(__func__, name, OPAL_TIMING_AUTOMATIC_TIMER, prefix); \
9595
} while(0)
9696

9797
#define OPAL_TIMING_ENV_NEXT(h, ...) \
@@ -121,7 +121,8 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type);
121121
h->error = 1; \
122122
} \
123123
setenv(buf1, buf2, 1); \
124-
filename = strrchr(__FILE__, '/') + 1; \
124+
filename = strrchr(__FILE__, '/'); \
125+
filename = (filename == NULL) ? strdup(__FILE__) : filename+1; \
125126
n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_FILE_%d", h->id, h->cntr); \
126127
if ( n > OPAL_TIMING_STR_LEN ){ \
127128
h->error = 1; \
@@ -207,7 +208,7 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type);
207208

208209
#define OPAL_TIMING_ENV_INIT(name)
209210

210-
#define OPAL_TIMING_ENV_INIT_PREFIX(prefix)
211+
#define OPAL_TIMING_ENV_INIT_PREFIX(prefix, name)
211212

212213
#define OPAL_TIMING_ENV_NEXT(h, ... )
213214

orte/mca/ess/base/ess_base_std_app.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
* Copyright (c) 2014-2016 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
1818
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
19+
* Copyright (c) 2018 Mellanox Technologies, Inc.
20+
* All rights reserved.
1921
* $COPYRIGHT$
2022
*
2123
* Additional copyrights may follow
@@ -62,6 +64,7 @@
6264
#include "orte/util/session_dir.h"
6365
#include "orte/util/name_fns.h"
6466
#include "orte/util/show_help.h"
67+
#include "opal/util/timings.h"
6568

6669
#include "orte/runtime/orte_globals.h"
6770
#include "orte/runtime/orte_wait.h"
@@ -74,6 +77,7 @@ int orte_ess_base_app_setup(bool db_restrict_local)
7477
char *error = NULL;
7578
opal_list_t transports;
7679

80+
OPAL_TIMING_ENV_INIT(ess_base_setup);
7781
/*
7882
* stdout/stderr buffering
7983
* If the user requested to override the default setting then do
@@ -116,13 +120,16 @@ int orte_ess_base_app_setup(bool db_restrict_local)
116120
error = "orte_state_base_select";
117121
goto error;
118122
}
123+
OPAL_TIMING_ENV_NEXT(ess_base_setup, "state_framework_open");
119124

120125
/* open the errmgr */
121126
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) {
122127
ORTE_ERROR_LOG(ret);
123128
error = "orte_errmgr_base_open";
124129
goto error;
125130
}
131+
OPAL_TIMING_ENV_NEXT(ess_base_setup, "errmgr_framework_open");
132+
126133
/* setup my session directory */
127134
if (orte_create_session_dirs) {
128135
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output,
@@ -157,6 +164,8 @@ int orte_ess_base_app_setup(bool db_restrict_local)
157164
}
158165
}
159166
}
167+
OPAL_TIMING_ENV_NEXT(ess_base_setup, "create_session_dirs");
168+
160169
/* Setup the communication infrastructure */
161170
/* Routed system */
162171
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
@@ -169,6 +178,8 @@ int orte_ess_base_app_setup(bool db_restrict_local)
169178
error = "orte_routed_base_select";
170179
goto error;
171180
}
181+
OPAL_TIMING_ENV_NEXT(ess_base_setup, "routed_framework_open");
182+
172183
/*
173184
* OOB Layer
174185
*/
@@ -182,6 +193,8 @@ int orte_ess_base_app_setup(bool db_restrict_local)
182193
error = "orte_oob_base_select";
183194
goto error;
184195
}
196+
OPAL_TIMING_ENV_NEXT(ess_base_setup, "oob_framework_open");
197+
185198
/* Runtime Messaging Layer */
186199
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) {
187200
ORTE_ERROR_LOG(ret);
@@ -193,6 +206,8 @@ int orte_ess_base_app_setup(bool db_restrict_local)
193206
error = "orte_rml_base_select";
194207
goto error;
195208
}
209+
OPAL_TIMING_ENV_NEXT(ess_base_setup, "rml_framework_open");
210+
196211
/* if we have info on the HNP and local daemon, process it */
197212
if (NULL != orte_process_info.my_hnp_uri) {
198213
/* we have to set the HNP's name, even though we won't route messages directly
@@ -243,6 +258,7 @@ int orte_ess_base_app_setup(bool db_restrict_local)
243258
error = "orte_errmgr_base_select";
244259
goto error;
245260
}
261+
OPAL_TIMING_ENV_NEXT(ess_base_setup, "errmgr_select");
246262

247263
/* get a conduit for our use - we never route IO over fabric */
248264
OBJ_CONSTRUCT(&transports, opal_list_t);
@@ -264,6 +280,7 @@ int orte_ess_base_app_setup(bool db_restrict_local)
264280
goto error;
265281
}
266282
OPAL_LIST_DESTRUCT(&transports);
283+
OPAL_TIMING_ENV_NEXT(ess_base_setup, "rml_open_conduit");
267284

268285
/*
269286
* Group communications
@@ -278,6 +295,7 @@ int orte_ess_base_app_setup(bool db_restrict_local)
278295
error = "orte_grpcomm_base_select";
279296
goto error;
280297
}
298+
OPAL_TIMING_ENV_NEXT(ess_base_setup, "grpcomm_framework_open");
281299

282300
/* open the distributed file system */
283301
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) {
@@ -290,6 +308,8 @@ int orte_ess_base_app_setup(bool db_restrict_local)
290308
error = "orte_dfs_base_select";
291309
goto error;
292310
}
311+
OPAL_TIMING_ENV_NEXT(ess_base_setup, "dfs_framework_open");
312+
293313
return ORTE_SUCCESS;
294314
error:
295315
orte_show_help("help-orte-runtime.txt",

0 commit comments

Comments
 (0)