@@ -143,21 +143,29 @@ static inline void efa_hmem_info_check_p2p_support_cuda(struct efa_hmem_info *in
143143 }
144144
145145#if HAVE_EFA_DMABUF_MR
146- ret = cuda_get_dmabuf_fd (ptr , len , & dmabuf_fd , & dmabuf_offset );
147- if (ret == FI_SUCCESS ) {
148- ibv_mr = ibv_reg_dmabuf_mr (ibv_pd , dmabuf_offset ,
149- len , (uint64_t )ptr , dmabuf_fd , ibv_access );
150- (void )cuda_put_dmabuf_fd (dmabuf_fd );
151- if (!ibv_mr ) {
146+ if (ofi_hmem_is_dmabuf_env_var_enabled (FI_HMEM_CUDA )) {
147+ ret = ofi_hmem_get_dmabuf_fd (FI_HMEM_CUDA , ptr , len , & dmabuf_fd , & dmabuf_offset );
148+ if (ret == FI_SUCCESS ) {
149+ ibv_mr = ibv_reg_dmabuf_mr (ibv_pd , dmabuf_offset ,
150+ len , (uint64_t )ptr , dmabuf_fd , ibv_access );
151+ (void )ofi_hmem_put_dmabuf_fd (FI_HMEM_CUDA , dmabuf_fd );
152+ if (!ibv_mr ) {
153+ EFA_INFO (FI_LOG_CORE ,
154+ "Unable to register CUDA device buffer via dmabuf: %s. "
155+ "Fall back to ibv_reg_mr\n" , fi_strerror (- errno ));
156+ ibv_mr = ibv_reg_mr (ibv_pd , ptr , len , ibv_access );
157+ } else {
158+ info -> dmabuf_supported_by_device = EFA_DMABUF_SUPPORTED ;
159+ }
160+ } else {
152161 EFA_INFO (FI_LOG_CORE ,
153- "Unable to register CUDA device buffer via dmabuf : %s . "
154- "Fall back to ibv_reg_mr\n" , fi_strerror ( - errno ) );
162+ "Unable to retrieve dmabuf fd of CUDA device buffer: %d . "
163+ "Fall back to ibv_reg_mr\n" , ret );
155164 ibv_mr = ibv_reg_mr (ibv_pd , ptr , len , ibv_access );
165+ info -> dmabuf_supported_by_device = EFA_DMABUF_NOT_SUPPORTED ;
156166 }
157167 } else {
158- EFA_INFO (FI_LOG_CORE ,
159- "Unable to retrieve dmabuf fd of CUDA device buffer: %d. "
160- "Fall back to ibv_reg_mr\n" , ret );
168+ EFA_INFO (FI_LOG_CORE , "FI_HMEM_CUDA_USE_DMABUF set to false. Not using DMABUF for CUDA.\n" );
161169 ibv_mr = ibv_reg_mr (ibv_pd , ptr , len , ibv_access );
162170 }
163171#else
@@ -216,21 +224,29 @@ static inline void efa_hmem_info_check_p2p_support_rocr(struct efa_hmem_info *in
216224 }
217225
218226#if HAVE_EFA_DMABUF_MR
219- ret = rocr_hmem_get_dmabuf_fd (ptr , len , & dmabuf_fd , & dmabuf_offset );
220- if (ret == FI_SUCCESS ) {
221- ibv_mr = ibv_reg_dmabuf_mr (ibv_pd , dmabuf_offset ,
222- len , (uint64_t ) ptr , dmabuf_fd , ibv_access );
223- (void ) rocr_hmem_put_dmabuf_fd (dmabuf_fd );
224- if (!ibv_mr ) {
227+ if (ofi_hmem_is_dmabuf_env_var_enabled (FI_HMEM_ROCR )) {
228+ ret = rocr_hmem_get_dmabuf_fd (ptr , len , & dmabuf_fd , & dmabuf_offset );
229+ if (ret == FI_SUCCESS ) {
230+ ibv_mr = ibv_reg_dmabuf_mr (ibv_pd , dmabuf_offset ,
231+ len , (uint64_t ) ptr , dmabuf_fd , ibv_access );
232+ (void ) rocr_hmem_put_dmabuf_fd (dmabuf_fd );
233+ if (!ibv_mr ) {
234+ EFA_INFO (FI_LOG_CORE ,
235+ "Unable to register ROCr device buffer via dmabuf: %s. "
236+ "Fall back to ibv_reg_mr\n" , fi_strerror (- errno ));
237+ ibv_mr = ibv_reg_mr (ibv_pd , ptr , len , ibv_access );
238+ } else {
239+ info -> dmabuf_supported_by_device = EFA_DMABUF_SUPPORTED ;
240+ }
241+ } else {
225242 EFA_INFO (FI_LOG_CORE ,
226- "Unable to register ROCr device buffer via dmabuf : %s . "
227- "Fall back to ibv_reg_mr\n" , fi_strerror ( - errno ) );
243+ "Unable to retrieve dmabuf fd of ROCr device buffer: %d . "
244+ "Fall back to ibv_reg_mr\n" , ret );
228245 ibv_mr = ibv_reg_mr (ibv_pd , ptr , len , ibv_access );
246+ info -> dmabuf_supported_by_device = EFA_DMABUF_NOT_SUPPORTED ;
229247 }
230248 } else {
231- EFA_INFO (FI_LOG_CORE ,
232- "Unable to retrieve dmabuf fd of ROCr device buffer: %d. "
233- "Fall back to ibv_reg_mr\n" , ret );
249+ EFA_INFO (FI_LOG_CORE , "FI_HMEM_ROCR_USE_DMABUF set to false. Not using DMABUF for ROCr.\n" );
234250 ibv_mr = ibv_reg_mr (ibv_pd , ptr , len , ibv_access );
235251 }
236252#else
@@ -263,84 +279,6 @@ static inline void efa_hmem_info_check_p2p_support_rocr(struct efa_hmem_info *in
263279 return ;
264280}
265281
266- static inline void efa_hmem_info_check_p2p_support_neuron (struct efa_hmem_info * info ) {
267- #if HAVE_NEURON
268- struct ibv_mr * ibv_mr = NULL ;
269- struct ibv_pd * ibv_pd ;
270- int ibv_access = IBV_ACCESS_LOCAL_WRITE ;
271- void * handle ;
272- void * ptr = NULL ;
273- size_t len = ofi_get_page_size () * 2 ;
274- int dmabuf_fd ;
275- uint64_t offset ;
276- int ret ;
277-
278- if (g_efa_selected_device_list [0 ].device_caps & EFADV_DEVICE_ATTR_CAPS_RDMA_READ ) {
279- ibv_access |= IBV_ACCESS_REMOTE_READ ;
280- }
281-
282- ptr = neuron_alloc (& handle , len );
283- /*
284- * neuron_alloc will fail if application did not call nrt_init,
285- * which is ok if it's not running neuron workloads. libfabric
286- * will move on and leave info->initialized as false.
287- */
288- if (!ptr ) {
289- info -> initialized = false;
290- EFA_INFO (FI_LOG_CORE , "Cannot allocate Neuron buffer\n" );
291- return ;
292- }
293-
294- ibv_pd = ibv_alloc_pd (g_efa_selected_device_list [0 ].ibv_ctx );
295- if (!ibv_pd ) {
296- EFA_WARN (FI_LOG_CORE , "failed to allocate ibv_pd: %d" , errno );
297- neuron_free (& handle );
298- return ;
299- }
300-
301- #if HAVE_EFA_DMABUF_MR
302- ret = neuron_get_dmabuf_fd (ptr , (uint64_t )len , & dmabuf_fd , & offset );
303- if (ret == FI_SUCCESS ) {
304- ibv_mr = ibv_reg_dmabuf_mr (
305- ibv_pd , offset ,
306- len , (uint64_t )ptr , dmabuf_fd , ibv_access );
307- } else if (ret == - FI_EOPNOTSUPP ) {
308- EFA_INFO (FI_LOG_MR ,
309- "Unable to retrieve dmabuf fd of Neuron device buffer, "
310- "Fall back to ibv_reg_mr\n" );
311- ibv_mr = ibv_reg_mr (ibv_pd , ptr , len , ibv_access );
312- }
313- #else
314- ibv_mr = ibv_reg_mr (ibv_pd , ptr , len , ibv_access );
315- #endif
316-
317- if (!ibv_mr ) {
318- info -> p2p_supported_by_device = false;
319- /* We do not expect to support Neuron on non p2p systems */
320- EFA_WARN (FI_LOG_CORE ,
321- "Failed to register Neuron buffer with the EFA device, "
322- "FI_HMEM transfers that require peer to peer support will fail.\n" );
323- neuron_free (& handle );
324- (void ) ibv_dealloc_pd (ibv_pd );
325- return ;
326- }
327-
328- ret = ibv_dereg_mr (ibv_mr );
329- neuron_free (& handle );
330- (void ) ibv_dealloc_pd (ibv_pd );
331- if (ret ) {
332- EFA_WARN (FI_LOG_CORE ,
333- "Failed to deregister Neuron buffer: %s\n" ,
334- fi_strerror (- ret ));
335- return ;
336- }
337-
338- info -> p2p_supported_by_device = true;
339- return ;
340- #endif
341- return ;
342- }
343-
344282/**
345283 * @brief Initialize the efa_hmem_info state for iface
346284 *
@@ -366,9 +304,26 @@ efa_hmem_info_init_iface(enum fi_hmem_iface iface)
366304 }
367305
368306 info -> initialized = true;
307+ info -> dmabuf_supported_by_device = EFA_DMABUF_ASSUMED ;
308+ info -> dmabuf_fallback_enabled = false;
309+
310+ if (iface == FI_HMEM_SYNAPSEAI || iface == FI_HMEM_SYSTEM ||
311+ iface == FI_HMEM_NEURON ) {
312+ /* It is not recommended to allocate neuron buffs this
313+ * early in initialization, so we must skip the explicit
314+ * check to see if p2p will work. Instead, assume it works.
315+ * and set fallback to true
316+ */
317+ if (iface == FI_HMEM_NEURON )
318+ info -> dmabuf_fallback_enabled = true;
369319
370- if (iface == FI_HMEM_SYNAPSEAI || iface == FI_HMEM_SYSTEM ) {
371320 info -> p2p_supported_by_device = true;
321+
322+ if (!ofi_hmem_is_dmabuf_env_var_enabled (iface )) {
323+ info -> dmabuf_supported_by_device = EFA_DMABUF_NOT_SUPPORTED ;
324+ EFA_INFO (FI_LOG_CORE , "%s DMABUF disabled by environment variable\n" ,
325+ fi_tostr (& iface , FI_TYPE_HMEM_IFACE ));
326+ }
372327 } else if (ofi_hmem_p2p_disabled ()) {
373328 info -> p2p_supported_by_device = false;
374329 } else {
@@ -379,9 +334,6 @@ efa_hmem_info_init_iface(enum fi_hmem_iface iface)
379334 case FI_HMEM_ROCR :
380335 efa_hmem_info_check_p2p_support_rocr (info );
381336 break ;
382- case FI_HMEM_NEURON :
383- efa_hmem_info_check_p2p_support_neuron (info );
384- break ;
385337 default :
386338 break ;
387339 }
0 commit comments