Skip to content

Commit 1f32fa2

Browse files
author
Rolf vandeVaart
committed
Fix arguments to error message, remove tabs and trailing spaces
1 parent 773b509 commit 1f32fa2

File tree

2 files changed

+16
-7
lines changed

2 files changed

+16
-7
lines changed

opal/mca/common/cuda/common_cuda.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,9 +1749,9 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t
17491749
void *attrdata[] = {(void *)&memType, (void *)&ctx, (void *)&isManaged};
17501750

17511751
res = cuFunc.cuPointerGetAttributes(3, attributes, attrdata, dbuf);
1752-
OPAL_OUTPUT_VERBOSE((101, mca_common_cuda_output,
1753-
"dbuf=%p, memType=%d, ctx=%p, isManaged=%d, res=%d",
1754-
(void *)dbuf, (int)memType, (void *)ctx, isManaged, res));
1752+
OPAL_OUTPUT_VERBOSE((101, mca_common_cuda_output,
1753+
"dbuf=%p, memType=%d, ctx=%p, isManaged=%d, res=%d",
1754+
(void *)dbuf, (int)memType, (void *)ctx, isManaged, res));
17551755

17561756
/* Mark unified memory buffers with a flag. This will allow all unified
17571757
* memory to be forced through host buffers. Note that this memory can
@@ -1836,7 +1836,7 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t
18361836
size_t psize;
18371837
res = cuFunc.cuMemGetAddressRange(&pbase, &psize, dbuf);
18381838
if (CUDA_SUCCESS != res) {
1839-
opal_output_verbose(5, mca_common_cuda_output,
1839+
opal_output_verbose(5, mca_common_cuda_output,
18401840
"CUDA: cuMemGetAddressRange failed on this pointer: res=%d, buf=%p "
18411841
"Overriding check and setting to host pointer. ",
18421842
res, (void *)dbuf);
@@ -1994,8 +1994,8 @@ int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base)
19941994
CUresult result;
19951995
result = cuFunc.cuMemGetAddressRange((CUdeviceptr *)pbase, psize, (CUdeviceptr)base);
19961996
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
1997-
opal_show_help("help-mpi-common-cuda.txt", "cuMemGetAddressRange failed",
1998-
true, result, base);
1997+
opal_show_help("help-mpi-common-cuda.txt", "cuMemGetAddressRange failed 2",
1998+
true, OPAL_PROC_MY_HOSTNAME, result, base);
19991999
return OPAL_ERROR;
20002000
} else {
20012001
opal_output_verbose(50, mca_common_cuda_output,
@@ -2054,7 +2054,8 @@ void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg)
20542054
res = cuFunc.cuPointerGetAttribute(&bufID, CU_POINTER_ATTRIBUTE_BUFFER_ID,
20552055
(CUdeviceptr)dbuf);
20562056
if (OPAL_UNLIKELY(res != CUDA_SUCCESS)) {
2057-
opal_show_help("help-mpi-common-cuda.txt", "bufferID failed", true, res);
2057+
opal_show_help("help-mpi-common-cuda.txt", "bufferID failed",
2058+
true, OPAL_PROC_MY_HOSTNAME, res);
20582059
}
20592060
reg->gpu_bufID = bufID;
20602061

opal/mca/common/cuda/help-mpi-common-cuda.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,14 @@ cannot be used.
6565
Check the cuda.h file for what the return value means. Perhaps a reboot
6666
of the node will clear the problem.
6767
#
68+
[cuMemGetAddressRange failed 2]
69+
The call to cuMemGetAddressRange failed during the GPU RDMA protocol.
70+
Host: %s
71+
cuMemGetAddressRange return value: %d
72+
address: %p
73+
Check the cuda.h file for what the return value means. This is highly
74+
unusual and should not happen. The program will probably abort.
75+
#
6876
[Out of cuEvent handles]
6977
The library has exceeded its number of outstanding event handles.
7078
For better performance, this number should be increased.

0 commit comments

Comments
 (0)