Skip to content

Commit 773b509

Browse files
author
Rolf vandeVaart
committed
Merge pull request #737 from rolfv/pr/add-cuda-war
Add a workaroud for issue in libcuda.so library
2 parents 967907f + 7703c96 commit 773b509

File tree

1 file changed

+24
-0
lines changed

1 file changed

+24
-0
lines changed

opal/mca/common/cuda/common_cuda.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,6 +1749,9 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t
17491749
void *attrdata[] = {(void *)&memType, (void *)&ctx, (void *)&isManaged};
17501750

17511751
res = cuFunc.cuPointerGetAttributes(3, attributes, attrdata, dbuf);
1752+
OPAL_OUTPUT_VERBOSE((101, mca_common_cuda_output,
1753+
"dbuf=%p, memType=%d, ctx=%p, isManaged=%d, res=%d",
1754+
(void *)dbuf, (int)memType, (void *)ctx, isManaged, res));
17521755

17531756
/* Mark unified memory buffers with a flag. This will allow all unified
17541757
* memory to be forced through host buffers. Note that this memory can
@@ -1822,13 +1825,34 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t
18221825
}
18231826
}
18241827

1828+
/* WORKAROUND - They are times when the above code determines a pice of memory
1829+
* is GPU memory, but it actually is not. That has been seen on multi-GPU systems
1830+
* with 6 or 8 GPUs on them. Therefore, we will do this extra check. Note if we
1831+
* made it this far, then the assumption at this point is we have GPU memory.
1832+
* Unfotunately, this extra call is costing us another 100 ns almost doubling
1833+
* the cost of this entire function. */
1834+
{
1835+
CUdeviceptr pbase;
1836+
size_t psize;
1837+
res = cuFunc.cuMemGetAddressRange(&pbase, &psize, dbuf);
1838+
if (CUDA_SUCCESS != res) {
1839+
opal_output_verbose(5, mca_common_cuda_output,
1840+
"CUDA: cuMemGetAddressRange failed on this pointer: res=%d, buf=%p "
1841+
"Overriding check and setting to host pointer. ",
1842+
res, (void *)dbuf);
1843+
/* This cannot be GPU memory if the previous call failed */
1844+
return 0;
1845+
}
1846+
}
1847+
18251848
/* First access on a device pointer finalizes CUDA support initialization.
18261849
* If initialization fails, disable support. */
18271850
if (!stage_three_init_complete) {
18281851
if (0 != mca_common_cuda_stage_three_init()) {
18291852
opal_cuda_support = 0;
18301853
}
18311854
}
1855+
18321856
return 1;
18331857
}
18341858

0 commit comments

Comments
 (0)