Skip to content

Commit 27899b0

Browse files
authored
Merge pull request #6486 from hoopoepg/topic/check-ucx-params-v4.0
PML/SPML/UCX: added evaluation of mmap events - v4.0
2 parents 27c0e95 + bed8141 commit 27899b0

File tree

5 files changed

+33
-4
lines changed

5 files changed

+33
-4
lines changed

config/ompi_check_ucx.m4

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,9 @@ AC_DEFUN([OMPI_CHECK_UCX],[
112112
ucp_request_check_status, ucp_put_nb, ucp_get_nb],
113113
[], [],
114114
[#include <ucp/api/ucp.h>])
115+
AC_CHECK_DECLS([ucm_test_events],
116+
[], [],
117+
[#include <ucm/api/ucm.h>])
115118
AC_CHECK_DECLS([UCP_ATOMIC_POST_OP_AND,
116119
UCP_ATOMIC_POST_OP_OR,
117120
UCP_ATOMIC_POST_OP_XOR,

ompi/mca/pml/ucx/pml_ucx.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,7 @@ int mca_pml_ucx_add_procs(struct ompi_proc_t **procs, size_t nprocs)
422422
}
423423
}
424424

425+
opal_common_ucx_mca_proc_added();
425426
return OMPI_SUCCESS;
426427
}
427428

opal/mca/common/ucx/common_ucx.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,27 @@ static void opal_common_ucx_mca_fence_complete_cb(int status, void *fenced)
132132
*(int*)fenced = 1;
133133
}
134134

135+
void opal_common_ucx_mca_proc_added(void)
136+
{
137+
#if HAVE_DECL_UCM_TEST_EVENTS
138+
static int warned = 0;
139+
static char *mem_hooks_suggestion = "Pls try adding --mca opal_common_ucx_opal_mem_hooks 1 "
140+
"to mpirun/oshrun command line to resolve this issue.";
141+
ucs_status_t status;
142+
143+
if (!warned) {
144+
status = ucm_test_events(UCM_EVENT_VM_UNMAPPED);
145+
if (status != UCS_OK) {
146+
MCA_COMMON_UCX_WARN("UCX is unable to handle VM_UNMAP event. "
147+
"This may cause performance degradation or data "
148+
"corruption. %s",
149+
opal_common_ucx.opal_mem_hooks ? "" : mem_hooks_suggestion);
150+
warned = 1;
151+
}
152+
}
153+
#endif
154+
}
155+
135156
OPAL_DECLSPEC int opal_common_ucx_mca_pmix_fence(ucp_worker_h worker)
136157
{
137158
volatile int fenced = 0;

opal/mca/common/ucx/common_ucx.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,11 @@ BEGIN_C_DECLS
3939
#define MCA_COMMON_UCX_QUOTE(_x) \
4040
_MCA_COMMON_UCX_QUOTE(_x)
4141

42-
#define MCA_COMMON_UCX_ERROR(...) \
43-
opal_output_verbose(0, opal_common_ucx.output, \
44-
__FILE__ ":" MCA_COMMON_UCX_QUOTE(__LINE__) \
45-
" Error: " __VA_ARGS__)
42+
#define MCA_COMMON_UCX_ERROR(...) \
43+
MCA_COMMON_UCX_VERBOSE(0, " Error: " __VA_ARGS__)
44+
45+
#define MCA_COMMON_UCX_WARN(...) \
46+
MCA_COMMON_UCX_VERBOSE(0, " Warning: " __VA_ARGS__)
4647

4748
#define MCA_COMMON_UCX_VERBOSE(_level, ... ) \
4849
if (((_level) <= MCA_COMMON_UCX_MAX_VERBOSE) && \
@@ -96,6 +97,7 @@ extern opal_common_ucx_module_t opal_common_ucx;
9697

9798
OPAL_DECLSPEC void opal_common_ucx_mca_register(void);
9899
OPAL_DECLSPEC void opal_common_ucx_mca_deregister(void);
100+
OPAL_DECLSPEC void opal_common_ucx_mca_proc_added(void);
99101
OPAL_DECLSPEC void opal_common_ucx_empty_complete_cb(void *request, ucs_status_t status);
100102
OPAL_DECLSPEC int opal_common_ucx_mca_pmix_fence(ucp_worker_h worker);
101103
OPAL_DECLSPEC void opal_common_ucx_mca_var_register(const mca_base_component_t *component);

oshmem/mca/spml/ucx/spml_ucx.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs)
138138

139139
mca_spml_ucx_ctx_default.ucp_peers = NULL;
140140

141+
opal_common_ucx_mca_proc_added();
142+
141143
return ret;
142144
}
143145

0 commit comments

Comments
 (0)