@@ -195,14 +195,18 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
195
195
return 0 ;
196
196
}
197
197
}
198
-
198
+ /* First access on a device pointer finalizes CUDA support initialization. */
199
+ opal_accelerator_cuda_delayed_init ();
199
200
return 1 ;
200
201
}
201
202
202
203
static int accelerator_cuda_create_stream (int dev_id , opal_accelerator_stream_t * * stream )
203
204
{
204
205
CUresult result ;
205
-
206
+ int delayed_init = opal_accelerator_cuda_delayed_init ();
207
+ if (OPAL_UNLIKELY (0 != delayed_init )) {
208
+ return delayed_init ;
209
+ }
206
210
* stream = (opal_accelerator_stream_t * )OBJ_NEW (opal_accelerator_cuda_stream_t );
207
211
if (NULL == * stream ) {
208
212
return OPAL_ERR_OUT_OF_RESOURCE ;
@@ -248,6 +252,10 @@ OBJ_CLASS_INSTANCE(
248
252
static int accelerator_cuda_create_event (int dev_id , opal_accelerator_event_t * * event )
249
253
{
250
254
CUresult result ;
255
+ int delayed_init = opal_accelerator_cuda_delayed_init ();
256
+ if (OPAL_UNLIKELY (0 != delayed_init )) {
257
+ return delayed_init ;
258
+ }
251
259
252
260
* event = (opal_accelerator_event_t * )OBJ_NEW (opal_accelerator_cuda_event_t );
253
261
if (NULL == * event ) {
@@ -340,6 +348,11 @@ static int accelerator_cuda_memcpy_async(int dest_dev_id, int src_dev_id, void *
340
348
{
341
349
CUresult result ;
342
350
351
+ int delayed_init = opal_accelerator_cuda_delayed_init ();
352
+ if (OPAL_UNLIKELY (0 != delayed_init )) {
353
+ return delayed_init ;
354
+ }
355
+
343
356
if (NULL == stream || NULL == dest || NULL == src || size <= 0 ) {
344
357
return OPAL_ERR_BAD_PARAM ;
345
358
}
@@ -358,6 +371,11 @@ static int accelerator_cuda_memcpy(int dest_dev_id, int src_dev_id, void *dest,
358
371
{
359
372
CUresult result ;
360
373
374
+ int delayed_init = opal_accelerator_cuda_delayed_init ();
375
+ if (OPAL_UNLIKELY (0 != delayed_init )) {
376
+ return delayed_init ;
377
+ }
378
+
361
379
if (NULL == dest || NULL == src || size <= 0 ) {
362
380
return OPAL_ERR_BAD_PARAM ;
363
381
}
@@ -391,6 +409,11 @@ static int accelerator_cuda_memmove(int dest_dev_id, int src_dev_id, void *dest,
391
409
CUdeviceptr tmp ;
392
410
CUresult result ;
393
411
412
+ int delayed_init = opal_accelerator_cuda_delayed_init ();
413
+ if (OPAL_UNLIKELY (0 != delayed_init )) {
414
+ return delayed_init ;
415
+ }
416
+
394
417
if (NULL == dest || NULL == src || size <= 0 ) {
395
418
return OPAL_ERR_BAD_PARAM ;
396
419
}
@@ -425,6 +448,11 @@ static int accelerator_cuda_mem_alloc(int dev_id, void **ptr, size_t size)
425
448
{
426
449
CUresult result ;
427
450
451
+ int delayed_init = opal_accelerator_cuda_delayed_init ();
452
+ if (OPAL_UNLIKELY (0 != delayed_init )) {
453
+ return delayed_init ;
454
+ }
455
+
428
456
if (NULL == ptr || 0 == size ) {
429
457
return OPAL_ERR_BAD_PARAM ;
430
458
}
@@ -434,7 +462,7 @@ static int accelerator_cuda_mem_alloc(int dev_id, void **ptr, size_t size)
434
462
if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
435
463
opal_show_help ("help-accelerator-cuda.txt" , "cuMemAlloc failed" , true,
436
464
OPAL_PROC_MY_HOSTNAME , result );
437
- return result ;
465
+ return OPAL_ERROR ;
438
466
}
439
467
}
440
468
return 0 ;
@@ -448,7 +476,7 @@ static int accelerator_cuda_mem_release(int dev_id, void *ptr)
448
476
if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
449
477
opal_show_help ("help-accelerator-cuda.txt" , "cuMemFree failed" , true,
450
478
OPAL_PROC_MY_HOSTNAME , result );
451
- return result ;
479
+ return OPAL_ERROR ;
452
480
}
453
481
}
454
482
return 0 ;
@@ -459,6 +487,11 @@ static int accelerator_cuda_get_address_range(int dev_id, const void *ptr, void
459
487
{
460
488
CUresult result ;
461
489
490
+ int delayed_init = opal_accelerator_cuda_delayed_init ();
491
+ if (OPAL_UNLIKELY (0 != delayed_init )) {
492
+ return delayed_init ;
493
+ }
494
+
462
495
if (NULL == ptr || NULL == base || NULL == size ) {
463
496
return OPAL_ERR_BAD_PARAM ;
464
497
}
@@ -479,6 +512,11 @@ static int accelerator_cuda_get_address_range(int dev_id, const void *ptr, void
479
512
static int accelerator_cuda_host_register (int dev_id , void * ptr , size_t size )
480
513
{
481
514
CUresult result ;
515
+ int delayed_init = opal_accelerator_cuda_delayed_init ();
516
+ if (OPAL_UNLIKELY (0 != delayed_init )) {
517
+ return delayed_init ;
518
+ }
519
+
482
520
if (NULL == ptr && size > 0 ) {
483
521
return OPAL_ERR_BAD_PARAM ;
484
522
}
@@ -487,7 +525,7 @@ static int accelerator_cuda_host_register(int dev_id, void *ptr, size_t size)
487
525
if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
488
526
opal_show_help ("help-accelerator-cuda.txt" , "cuMemHostRegister failed" , true,
489
527
ptr , size , OPAL_PROC_MY_HOSTNAME , result );
490
- return result ;
528
+ return OPAL_ERROR ;
491
529
}
492
530
493
531
return OPAL_SUCCESS ;
@@ -501,7 +539,7 @@ static int accelerator_cuda_host_unregister(int dev_id, void *ptr)
501
539
if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
502
540
opal_show_help ("help-accelerator-cuda.txt" , "cuMemHostUnregister failed" , true,
503
541
ptr , OPAL_PROC_MY_HOSTNAME , result );
504
- return result ;
542
+ return OPAL_ERROR ;
505
543
}
506
544
}
507
545
return OPAL_SUCCESS ;
@@ -512,6 +550,11 @@ static int accelerator_cuda_get_device(int *dev_id)
512
550
CUdevice cuDev ;
513
551
CUresult result ;
514
552
553
+ int delayed_init = opal_accelerator_cuda_delayed_init ();
554
+ if (OPAL_UNLIKELY (0 != delayed_init )) {
555
+ return delayed_init ;
556
+ }
557
+
515
558
if (NULL == dev_id ) {
516
559
return OPAL_ERR_BAD_PARAM ;
517
560
}
@@ -520,7 +563,7 @@ static int accelerator_cuda_get_device(int *dev_id)
520
563
if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
521
564
opal_show_help ("help-accelerator-cuda.txt" , "cuCtxGetDevice failed" , true,
522
565
result );
523
- return result ;
566
+ return OPAL_ERROR ;
524
567
}
525
568
* dev_id = cuDev ;
526
569
return 0 ;
@@ -530,6 +573,11 @@ static int accelerator_cuda_device_can_access_peer(int *access, int dev1, int de
530
573
{
531
574
CUresult result ;
532
575
576
+ int delayed_init = opal_accelerator_cuda_delayed_init ();
577
+ if (OPAL_UNLIKELY (0 != delayed_init )) {
578
+ return delayed_init ;
579
+ }
580
+
533
581
if (NULL == access ) {
534
582
return OPAL_ERR_BAD_PARAM ;
535
583
}
@@ -538,7 +586,7 @@ static int accelerator_cuda_device_can_access_peer(int *access, int dev1, int de
538
586
if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
539
587
opal_show_help ("help-accelerator-cuda.txt" , "cuDeviceCanAccessPeer failed" , true,
540
588
OPAL_PROC_MY_HOSTNAME , result );
541
- return result ;
589
+ return OPAL_ERROR ;
542
590
}
543
591
return 0 ;
544
592
}
@@ -554,18 +602,24 @@ static int accelerator_cuda_get_buffer_id(int dev_id, const void *addr, opal_acc
554
602
{
555
603
CUresult result ;
556
604
int enable = 1 ;
605
+
606
+ int delayed_init = opal_accelerator_cuda_delayed_init ();
607
+ if (OPAL_UNLIKELY (0 != delayed_init )) {
608
+ return delayed_init ;
609
+ }
610
+
557
611
result = cuPointerGetAttribute ((unsigned long long * )buf_id , CU_POINTER_ATTRIBUTE_BUFFER_ID , (CUdeviceptr ) addr );
558
612
if (OPAL_UNLIKELY (result != CUDA_SUCCESS )) {
559
613
opal_show_help ("help-accelerator-cuda.txt" , "bufferID failed" , true, OPAL_PROC_MY_HOSTNAME ,
560
614
result );
561
- return result ;
615
+ return OPAL_ERROR ;
562
616
}
563
617
result = cuPointerSetAttribute (& enable , CU_POINTER_ATTRIBUTE_SYNC_MEMOPS ,
564
618
(CUdeviceptr ) addr );
565
619
if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
566
620
opal_show_help ("help-accelerator-cuda.txt" , "cuPointerSetAttribute failed" , true,
567
621
OPAL_PROC_MY_HOSTNAME , result , addr );
568
- return result ;
622
+ return OPAL_ERROR ;
569
623
}
570
624
return OPAL_SUCCESS ;
571
625
}
0 commit comments