Skip to content

Commit 9373071

Browse files
authored
Merge pull request pmodels#7271 from hzhou/2501_prog_timeout
request: always enable the progress timeout cvar Approved-by: Ken Raffenetti
2 parents f9d685f + 6e738c8 commit 9373071

File tree

4 files changed

+33
-23
lines changed

4 files changed

+33
-23
lines changed

CHANGES

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
# MPIR_CHKLMEM_ and MPIR_CHKPMEM_ macros are simplified, removing non-essential
55
argument such as type case and custom error messages.
66

7+
# Rename MPIR_CVAR_DEBUG_PROGRESS_TIMEOUT to MPIR_CVAR_PROGRESS_TIMEOUT, and
8+
enable it whether or not --enable-g=progress is used in configure.
9+
710
===============================================================================
811
Changes in 4.3
912
===============================================================================

configure.ac

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@ AC_ARG_ENABLE(g,
406406
performance impacts. Recommended for typical development.
407407
progress - Enable debugging progress status
408408
yes - synonym for "most" (*not* "all")
409-
all - All of the above choices
409+
all - Most of the above choices
410410
],,enable_g=none)
411411

412412
AC_ARG_ENABLE([mpit-pvars],
@@ -1290,6 +1290,7 @@ for option in $enable_g ; do
12901290
perform_dbgmutex=yes
12911291
perform_handlealloc=yes
12921292
perform_handle=yes
1293+
perform_dbgprogress=yes
12931294
;;
12941295
*)
12951296
IFS=$save_IFS

src/include/mpir_request.h

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,24 @@
88

99
#include "mpir_process.h"
1010

11+
/*
12+
=== BEGIN_MPI_T_CVAR_INFO_BLOCK ===
13+
14+
cvars:
15+
- name : MPIR_CVAR_PROGRESS_TIMEOUT
16+
category : CH4
17+
type : int
18+
default : 0
19+
class : none
20+
verbosity : MPI_T_VERBOSITY_USER_BASIC
21+
scope : MPI_T_SCOPE_LOCAL
22+
description : >-
23+
Sets the timeout in seconds to dump outstanding requests when progress wait is not making progress for some time.
24+
25+
26+
=== END_MPI_T_CVAR_INFO_BLOCK ===
27+
*/
28+
1129
/* NOTE-R1: MPIR_REQUEST_KIND__MPROBE signifies that this is a request created by
1230
* MPI_Mprobe or MPI_Improbe. Since we use MPI_Request objects as our
1331
* MPI_Message objects, we use this separate kind in order to provide stronger
@@ -319,41 +337,39 @@ extern MPIR_Request MPIR_Request_direct[MPIR_REQUEST_PREALLOC];
319337
} \
320338
} while (0)
321339

340+
#else
341+
342+
#define MPIR_REQUEST_SET_INFO(req, ...) do { } while (0)
343+
#define MPIR_REQUEST_DEBUG(req) do { } while (0)
344+
#endif
345+
322346
#define DEBUG_PROGRESS_START \
323347
int iter = 0; \
324348
bool progress_timed_out = false; \
325349
MPL_time_t time_start; \
326-
if (MPIR_CVAR_DEBUG_PROGRESS_TIMEOUT > 0) { \
350+
if (MPIR_CVAR_PROGRESS_TIMEOUT > 0) { \
327351
MPL_wtime(&time_start); \
328352
}
329353

330354
#define DEBUG_PROGRESS_CHECK \
331-
if (MPIR_CVAR_DEBUG_PROGRESS_TIMEOUT > 0) { \
355+
if (MPIR_CVAR_PROGRESS_TIMEOUT > 0) { \
332356
iter++; \
333357
if (iter == 0xffff) {\
334358
double time_diff = 0.0; \
335359
MPL_time_t time_cur; \
336360
MPL_wtime(&time_cur); \
337361
MPL_wtime_diff(&time_start, &time_cur, &time_diff); \
338-
if (time_diff > MPIR_CVAR_DEBUG_PROGRESS_TIMEOUT && !progress_timed_out) { \
362+
if (time_diff > MPIR_CVAR_PROGRESS_TIMEOUT && !progress_timed_out) { \
339363
MPIR_Request_debug(); \
340364
MPL_backtrace_show(stdout); \
341365
progress_timed_out = true; \
342-
} else if (time_diff > MPIR_CVAR_DEBUG_PROGRESS_TIMEOUT * 2) { \
366+
} else if (time_diff > MPIR_CVAR_PROGRESS_TIMEOUT * 2) { \
343367
MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**timeout"); \
344368
} \
345369
iter = 0; \
346370
} \
347371
}
348372

349-
#else
350-
351-
#define MPIR_REQUEST_SET_INFO(req, ...) do { } while (0)
352-
#define MPIR_REQUEST_DEBUG(req) do { } while (0)
353-
#define DEBUG_PROGRESS_START do {} while (0)
354-
#define DEBUG_PROGRESS_CHECK do {} while (0)
355-
#endif
356-
357373
void MPII_init_request(void);
358374

359375
/* To get the benefit of multiple request pool, device layer need register their per-vci lock

src/mpi/request/request_impl.c

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,6 @@ categories :
5353
in MPI_Waitall and MPI_Testall implementation. A large number
5454
is likely to cause more cache misses.
5555
56-
- name : MPIR_CVAR_DEBUG_PROGRESS_TIMEOUT
57-
category : CH4
58-
type : int
59-
default : 0
60-
class : none
61-
verbosity : MPI_T_VERBOSITY_USER_BASIC
62-
scope : MPI_T_SCOPE_LOCAL
63-
description : >-
64-
Sets the timeout in seconds to dump outstanding requests when progress wait is not making progress for some time.
65-
6656
=== END_MPI_T_CVAR_INFO_BLOCK ===
6757
*/
6858

0 commit comments

Comments
 (0)