@@ -1749,6 +1749,9 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t
17491749 void * attrdata [] = {(void * )& memType , (void * )& ctx , (void * )& isManaged };
17501750
17511751 res = cuFunc .cuPointerGetAttributes (3 , attributes , attrdata , dbuf );
1752+ OPAL_OUTPUT_VERBOSE ((101 , mca_common_cuda_output ,
1753+ "dbuf=%p, memType=%d, ctx=%p, isManaged=%d, res=%d" ,
1754+ (void * )dbuf , (int )memType , (void * )ctx , isManaged , res ));
17521755
17531756 /* Mark unified memory buffers with a flag. This will allow all unified
17541757 * memory to be forced through host buffers. Note that this memory can
@@ -1822,13 +1825,34 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t
18221825 }
18231826 }
18241827
1828+ /* WORKAROUND - They are times when the above code determines a pice of memory
1829+ * is GPU memory, but it actually is not. That has been seen on multi-GPU systems
1830+ * with 6 or 8 GPUs on them. Therefore, we will do this extra check. Note if we
1831+ * made it this far, then the assumption at this point is we have GPU memory.
1832+ * Unfotunately, this extra call is costing us another 100 ns almost doubling
1833+ * the cost of this entire function. */
1834+ {
1835+ CUdeviceptr pbase ;
1836+ size_t psize ;
1837+ res = cuFunc .cuMemGetAddressRange (& pbase , & psize , dbuf );
1838+ if (CUDA_SUCCESS != res ) {
1839+ opal_output_verbose (5 , mca_common_cuda_output ,
1840+ "CUDA: cuMemGetAddressRange failed on this pointer: res=%d, buf=%p "
1841+ "Overriding check and setting to host pointer. " ,
1842+ res , (void * )dbuf );
1843+ /* This cannot be GPU memory if the previous call failed */
1844+ return 0 ;
1845+ }
1846+ }
1847+
18251848 /* First access on a device pointer finalizes CUDA support initialization.
18261849 * If initialization fails, disable support. */
18271850 if (!stage_three_init_complete ) {
18281851 if (0 != mca_common_cuda_stage_three_init ()) {
18291852 opal_cuda_support = 0 ;
18301853 }
18311854 }
1855+
18321856 return 1 ;
18331857}
18341858
0 commit comments