Skip to content

Commit 139369a

Browse files
committed
Merge pull request open-mpi#728 from rolfv/pr/cuda-require-41-2.x
Make CUDA 4.1 a requirement for CUDA-aware support.
2 parents 8e00524 + bd1d925 commit 139369a

File tree

3 files changed

+8
-28
lines changed

3 files changed

+8
-28
lines changed

config/opal_check_cuda.m4

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ dnl Copyright (c) 2009 IBM Corporation. All rights reserved.
1616
dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights
1717
dnl reserved.
1818
dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved.
19-
dnl Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved.
19+
dnl Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved.
2020
dnl Copyright (c) 2015 Research Organization for Information Science
2121
dnl and Technology (RIST). All rights reserved.
2222
dnl
@@ -79,10 +79,13 @@ dnl common framework, and likely configured first). So we have to
7979
dnl defer this check until later (see the OPAL_CHECK_CUDA_AFTER_OPAL_DL m4
8080
dnl macro, below). :-(
8181

82-
# If we have CUDA support, check to see if we have CUDA 4.1 support
83-
AS_IF([test "$opal_check_cuda_happy"="yes"],
84-
AC_CHECK_MEMBER([struct CUipcMemHandle_st.reserved], [CUDA_SUPPORT_41=1], [CUDA_SUPPORT_41=0],
85-
[#include <$opal_cuda_incdir/cuda.h>]),
82+
# We require CUDA IPC support which started in CUDA 4.1. Error
83+
# out if the support is not there.
84+
AS_IF([test "$opal_check_cuda_happy" = "yes"],
85+
[AC_CHECK_MEMBER([struct CUipcMemHandle_st.reserved],
86+
[],
87+
[AC_MSG_ERROR([Cannot continue because CUDA 4.1 or later is required])],
88+
[#include <$opal_cuda_incdir/cuda.h>])],
8689
[])
8790

8891
# If we have CUDA support, check to see if we have support for SYNC_MEMOPS
@@ -125,10 +128,6 @@ AM_CONDITIONAL([OPAL_cuda_support], [test "x$CUDA_SUPPORT" = "x1"])
125128
AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT],$CUDA_SUPPORT,
126129
[Whether we want cuda device pointer support])
127130

128-
AM_CONDITIONAL([OPAL_cuda_support_41], [test "x$CUDA_SUPPORT_41" = "x1"])
129-
AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT_41],$CUDA_SUPPORT_41,
130-
[Whether we have CUDA 4.1 support available])
131-
132131
AM_CONDITIONAL([OPAL_cuda_sync_memops], [test "x$CUDA_SYNC_MEMOPS" = "x1"])
133132
AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS,
134133
[Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available])

ompi/mca/pml/ob1/pml_ob1_cuda.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ int mca_pml_ob1_send_request_start_cuda(mca_pml_ob1_send_request_t* sendreq,
5656
mca_bml_base_btl_t* bml_btl,
5757
size_t size) {
5858
int rc;
59-
#if OPAL_CUDA_SUPPORT_41
6059
#if OPAL_CUDA_GDR_SUPPORT
6160
/* With some BTLs, switch to RNDV from RGET at large messages */
6261
if ((sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) &&
@@ -95,10 +94,6 @@ int mca_pml_ob1_send_request_start_cuda(mca_pml_ob1_send_request_t* sendreq,
9594
sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA;
9695
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, 0, 0);
9796
}
98-
#else
99-
/* Just do the rendezvous but set initial data to be sent to zero */
100-
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, 0, 0);
101-
#endif /* OPAL_CUDA_SUPPORT_41 */
10297
return rc;
10398
}
10499

opal/mca/common/cuda/common_cuda.c

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,11 @@ struct cudaFunctionTable {
8888
int (*cuEventDestroy)(CUevent);
8989
int (*cuStreamWaitEvent)(CUstream, CUevent, unsigned int);
9090
int (*cuMemGetAddressRange)(CUdeviceptr*, size_t*, CUdeviceptr);
91-
#if OPAL_CUDA_SUPPORT_41
9291
int (*cuIpcGetEventHandle)(CUipcEventHandle*, CUevent);
9392
int (*cuIpcOpenEventHandle)(CUevent*, CUipcEventHandle);
9493
int (*cuIpcOpenMemHandle)(CUdeviceptr*, CUipcMemHandle, unsigned int);
9594
int (*cuIpcCloseMemHandle)(CUdeviceptr);
9695
int (*cuIpcGetMemHandle)(CUipcMemHandle*, CUdeviceptr);
97-
#endif /* OPAL_CUDA_SUPPORT_41 */
9896
int (*cuCtxGetDevice)(CUdevice *);
9997
int (*cuDeviceCanAccessPeer)(int *, CUdevice, CUdevice);
10098
int (*cuDeviceGet)(CUdevice *, int);
@@ -156,7 +154,6 @@ OBJ_CLASS_INSTANCE(common_cuda_mem_regs_t,
156154
NULL,
157155
NULL);
158156

159-
#if OPAL_CUDA_SUPPORT_41
160157
static int mca_common_cuda_async = 1;
161158
static int mca_common_cuda_cumemcpy_async;
162159
#if OPAL_ENABLE_DEBUG
@@ -223,8 +220,6 @@ static void cuda_dump_memhandle(int, void *, char *) __opal_attribute_unused__ ;
223220
#define CUDA_DUMP_EVTHANDLE(a)
224221
#endif /* OPAL_ENABLE_DEBUG */
225222

226-
#endif /* OPAL_CUDA_SUPPORT_41 */
227-
228223
/* This is a seperate function so we can see these variables with ompi_info and
229224
* also set them with the tools interface */
230225
void mca_common_cuda_register_mca_variables(void)
@@ -263,7 +258,6 @@ void mca_common_cuda_register_mca_variables(void)
263258
MCA_BASE_VAR_SCOPE_READONLY,
264259
&mca_common_cuda_warning);
265260

266-
#if OPAL_CUDA_SUPPORT_41
267261
/* Use this flag to test async vs sync copies */
268262
mca_common_cuda_async = 1;
269263
(void) mca_base_var_register("ompi", "mpi", "common_cuda", "memcpy_async",
@@ -280,7 +274,6 @@ void mca_common_cuda_register_mca_variables(void)
280274
OPAL_INFO_LVL_9,
281275
MCA_BASE_VAR_SCOPE_READONLY,
282276
&cuda_event_max);
283-
#endif /* OPAL_CUDA_SUPPORT_41 */
284277

285278
/* Use this flag to test cuMemcpyAsync vs cuMemcpy */
286279
mca_common_cuda_cumemcpy_async = 1;
@@ -465,13 +458,11 @@ int mca_common_cuda_stage_one_init(void)
465458
OPAL_CUDA_DLSYM(libcuda_handle, cuMemFree);
466459
OPAL_CUDA_DLSYM(libcuda_handle, cuMemAlloc);
467460
OPAL_CUDA_DLSYM(libcuda_handle, cuMemGetAddressRange);
468-
#if OPAL_CUDA_SUPPORT_41
469461
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcGetEventHandle);
470462
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcOpenEventHandle);
471463
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcOpenMemHandle);
472464
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcCloseMemHandle);
473465
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcGetMemHandle);
474-
#endif /* OPAL_CUDA_SUPPORT_41 */
475466
OPAL_CUDA_DLSYM(libcuda_handle, cuCtxGetDevice);
476467
OPAL_CUDA_DLSYM(libcuda_handle, cuDeviceCanAccessPeer);
477468
OPAL_CUDA_DLSYM(libcuda_handle, cuDeviceGet);
@@ -595,7 +586,6 @@ static int mca_common_cuda_stage_three_init(void)
595586
return OPAL_ERROR;
596587
}
597588

598-
#if OPAL_CUDA_SUPPORT_41
599589
if (true == mca_common_cuda_enabled) {
600590
/* Set up an array to store outstanding IPC async copy events */
601591
cuda_event_ipc_num_used = 0;
@@ -633,7 +623,6 @@ static int mca_common_cuda_stage_three_init(void)
633623
}
634624
}
635625

636-
#endif /* OPAL_CUDA_SUPPORT_41 */
637626
if (true == mca_common_cuda_enabled) {
638627
/* Set up an array to store outstanding async dtoh events. Used on the
639628
* sending side for asynchronous copies. */
@@ -1006,7 +995,6 @@ void mca_common_cuda_unregister(void *ptr, char *msg) {
1006995
}
1007996
}
1008997

1009-
#if OPAL_CUDA_SUPPORT_41
1010998
/*
1011999
* Get the memory handle of a local section of memory that can be sent
10121000
* to the remote size so it can access the memory. This is the
@@ -1739,8 +1727,6 @@ static float mydifftime(opal_timer_t ts_start, opal_timer_t ts_end) {
17391727
}
17401728
#endif /* OPAL_ENABLE_DEBUG */
17411729

1742-
#endif /* OPAL_CUDA_SUPPORT_41 */
1743-
17441730
/* Routines that get plugged into the opal datatype code */
17451731
static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t *convertor)
17461732
{

0 commit comments

Comments
 (0)