11/*
2- * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved.
2+ * Copyright (c) 2017-2018 Mellanox Technologies Ltd. All rights reserved.
33 * Copyright (c) 2017 Intel, Inc. All rights reserved.
44 * $COPYRIGHT$
55 *
@@ -21,6 +21,7 @@ typedef struct {
2121 double ts ;
2222 char * file ;
2323 char * prefix ;
24+ int imported ;
2425} ompi_timing_val_t ;
2526
2627typedef struct {
@@ -36,11 +37,15 @@ typedef struct ompi_timing_t {
3637 int cnt ;
3738 int error ;
3839 int enabled ;
40+ int import_cnt ;
3941 opal_timing_ts_func_t get_ts ;
4042 ompi_timing_list_t * timing ;
4143 ompi_timing_list_t * cur_timing ;
4244} ompi_timing_t ;
4345
46+ #define OMPI_TIMING_ENABLED \
47+ (getenv("OMPI_TIMING_ENABLE") ? atoi(getenv("OMPI_TIMING_ENABLE")) : 0)
48+
4449#define OMPI_TIMING_INIT (_size ) \
4550 ompi_timing_t OMPI_TIMING; \
4651 OMPI_TIMING.prefix = __func__; \
@@ -50,6 +55,7 @@ typedef struct ompi_timing_t {
5055 OMPI_TIMING.error = 0; \
5156 OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \
5257 OMPI_TIMING.enabled = 0; \
58+ OMPI_TIMING.import_cnt = 0; \
5359 { \
5460 char *ptr; \
5561 ptr = getenv("OMPI_TIMING_ENABLE"); \
@@ -94,7 +100,8 @@ typedef struct ompi_timing_t {
94100#define OMPI_TIMING_NEXT (...) \
95101 do { \
96102 if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \
97- char *f = strrchr(__FILE__, '/') + 1; \
103+ char *f = strrchr(__FILE__, '/'); \
104+ f = (f == NULL) ? strdup(__FILE__) : f+1; \
98105 int len = 0; \
99106 if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \
100107 OMPI_TIMING_ITEM_EXTEND; \
@@ -135,10 +142,13 @@ typedef struct ompi_timing_t {
135142 int cnt; \
136143 int i; \
137144 double ts; \
145+ OMPI_TIMING.import_cnt++; \
138146 OPAL_TIMING_ENV_CNT(func, cnt); \
139147 OPAL_TIMING_ENV_ERROR_PREFIX(_prefix, func, OMPI_TIMING.error); \
140148 for(i = 0; i < cnt; i++){ \
141149 char *desc, *filename; \
150+ OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].imported= \
151+ OMPI_TIMING.import_cnt; \
142152 OPAL_TIMING_ENV_GETDESC_PREFIX(_prefix, &filename, func, i, &desc, ts); \
143153 OMPI_TIMING_APPEND(filename, func, desc, ts); \
144154 } \
@@ -155,6 +165,7 @@ typedef struct ompi_timing_t {
155165 MPI_Comm_size(MPI_COMM_WORLD, &size); \
156166 MPI_Comm_rank(MPI_COMM_WORLD, &rank); \
157167 int error = 0; \
168+ int imported = 0; \
158169 \
159170 MPI_Reduce(&OMPI_TIMING.error, &error, 1, \
160171 MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); \
@@ -171,6 +182,7 @@ typedef struct ompi_timing_t {
171182 char **desc = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \
172183 char **prefix = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \
173184 char **file = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \
185+ double total_avg = 0, total_min = 0, total_max = 0; \
174186 \
175187 if( OMPI_TIMING.cnt > 0 ) { \
176188 OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \
@@ -193,21 +205,53 @@ typedef struct ompi_timing_t {
193205 timing = (ompi_timing_list_t*)timing->next; \
194206 } while (timing != NULL); \
195207 \
196- if( 0 == rank ){ \
208+ if( 0 == rank ) { \
197209 if (OMPI_TIMING.timing->next) { \
198210 printf("==OMPI_TIMING== warning: added the extra timings allocation that might misrepresent the results.\n" \
199211 "==OMPI_TIMING== Increase the inited size of timings to avoid extra allocation during runtime.\n"); \
200212 } \
201213 \
202214 printf("------------------ %s ------------------\n", \
203- OMPI_TIMING.prefix); \
215+ OMPI_TIMING.prefix); \
216+ imported = OMPI_TIMING.timing->val[0].imported; \
204217 for(i=0; i< OMPI_TIMING.cnt; i++){ \
218+ bool print_total = 0; \
219+ imported = OMPI_TIMING.timing->val[i].imported; \
205220 avg[i] /= size; \
206- printf("[%s:%s:%s]: %lf / %lf / %lf\n", \
221+ printf("%s[%s:%s:%s]: %lf / %lf / %lf\n", \
222+ imported ? " -- " : "", \
207223 file[i], prefix[i], desc[i], avg[i], min[i], max[i]); \
224+ if (OMPI_TIMING.timing->val[i].imported) { \
225+ total_avg += avg[i]; \
226+ total_min += min[i]; \
227+ total_max += max[i]; \
228+ } \
229+ if (i == (OMPI_TIMING.cnt-1)) { \
230+ print_total = true; \
231+ } else { \
232+ print_total = imported != OMPI_TIMING.timing->val[i+1].imported; \
233+ } \
234+ if (print_total && OMPI_TIMING.timing->val[i].imported) { \
235+ printf("%s[%s:%s:%s]: %lf / %lf / %lf\n", \
236+ imported ? " !! " : "", \
237+ file[i], prefix[i], "total", \
238+ total_avg, total_min, total_max); \
239+ total_avg = 0; total_min = 0; total_max = 0; \
240+ } \
241+ } \
242+ total_avg = 0; total_min = 0; total_max = 0; \
243+ for(i=0; i< OMPI_TIMING.cnt; i++) { \
244+ if (!OMPI_TIMING.timing->val[i].imported) { \
245+ total_avg += avg[i]; \
246+ total_min += min[i]; \
247+ total_max += max[i]; \
248+ } \
208249 } \
250+ printf("[%s:total] %lf / %lf / %lf\n", \
251+ OMPI_TIMING.prefix, \
252+ total_avg, total_min, total_max); \
209253 printf("[%s:overhead]: %lf \n", OMPI_TIMING.prefix, \
210- OMPI_TIMING.get_ts() - OMPI_TIMING.ts); \
254+ OMPI_TIMING.get_ts() - OMPI_TIMING.ts); \
211255 } \
212256 } \
213257 free(avg); \
@@ -233,6 +277,8 @@ typedef struct ompi_timing_t {
233277
234278#define OMPI_TIMING_FINALIZE
235279
280+ #define OMPI_TIMING_ENABLED 0
281+
236282#endif
237283
238284#endif
0 commit comments