@@ -2377,6 +2377,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
2377
2377
if (Error Err = AMDImage->loadExecutable (*this ))
2378
2378
return std::move (Err);
2379
2379
2380
+ Plugin::get ().checkAndAdjustUsmModeForTargetImage (TgtImage);
2381
+
2380
2382
return AMDImage;
2381
2383
}
2382
2384
@@ -2682,6 +2684,20 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
2682
2684
return coarse_grain_mem_tab->contains ((const uintptr_t )ptr, size);
2683
2685
}
2684
2686
2687
+ Error prepopulatePageTableImpl (void *ptr, int64_t size) override final {
2688
+ // Instruct ROCr that the [ptr, ptr+size-1] pages are
2689
+ // coarse grain
2690
+ hsa_amd_svm_attribute_pair_t tt;
2691
+ tt.attribute = HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE;
2692
+ tt.value = Agent.handle ;
2693
+ hsa_status_t err = hsa_amd_svm_attributes_set (ptr, size, &tt, 1 );
2694
+ if (err != HSA_STATUS_SUCCESS) {
2695
+ return Plugin::error (" Failed to prepopulate GPU page table." );
2696
+ }
2697
+
2698
+ return Plugin::success ();
2699
+ }
2700
+
2685
2701
// / Create an event.
2686
2702
Error createEventImpl (void **EventPtrStorage) override {
2687
2703
AMDGPUEventTy **Event = reinterpret_cast <AMDGPUEventTy **>(EventPtrStorage);
@@ -3419,8 +3435,10 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
3419
3435
#endif
3420
3436
3421
3437
// Initialize flags for device type:
3422
- hasGfx90aDevice ();
3423
3438
hasAPUDevice ();
3439
+ // check for dGPUs with USM support
3440
+ hasGfx90aDevice ();
3441
+ hasMI300xDevice ();
3424
3442
3425
3443
readEnvVars ();
3426
3444
@@ -3463,7 +3481,19 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
3463
3481
#define ALDEBARAN_MAJOR 9
3464
3482
#define ALDEBARAN_STEPPING 10
3465
3483
3466
- bool hasGfx90aDevice () override final {
3484
+ bool hasMI300xDevice () {
3485
+ if (HasMi300xDevice != -1 )
3486
+ return HasMi300xDevice;
3487
+
3488
+ if (!Initialized)
3489
+ FATAL_MESSAGE (1 , " %s" , " hasMI300xDevice called on uninitialized plugin" );
3490
+ // On splinter the MI300X identifies itself as a GFX941. Use GFX name to
3491
+ // distinguish for testing.
3492
+ HasMi300xDevice = checkForDeviceByGFXName (" gfx941" );
3493
+ return HasMi300xDevice;
3494
+ }
3495
+
3496
+ bool hasGfx90aDevice () {
3467
3497
if (HasGFX90ADevice != -1 )
3468
3498
return HasGFX90ADevice;
3469
3499
@@ -3474,10 +3504,18 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
3474
3504
return HasGFX90ADevice;
3475
3505
}
3476
3506
3507
+ bool hasDGpuWithUsmSupport () override final {
3508
+ return hasGfx90aDevice () || hasMI300xDevice ();
3509
+ }
3510
+
3477
3511
bool AreAllocationsForMapsOnApusDisabled () override final {
3478
3512
return DisableAllocationsForMapsOnApus;
3479
3513
}
3480
3514
3515
+ bool requestedPrepopulateGPUPageTable () override final {
3516
+ return PrepopulateGPUPageTable;
3517
+ }
3518
+
3481
3519
bool IsNoMapsCheck () override final { return NoUSMMapChecks; }
3482
3520
3483
3521
bool IsFineGrainedMemoryEnabled () override final {
@@ -3491,6 +3529,8 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
3491
3529
NoMapChecks = BoolEnvar (" OMPX_DISABLE_MAPS" , true );
3492
3530
DisableUsmMaps = BoolEnvar (" OMPX_DISABLE_USM_MAPS" , false );
3493
3531
HsaXnack = BoolEnvar (" HSA_XNACK" , false );
3532
+ APUPrefault = BoolEnvar (" OMPX_EAGER_ZERO_COPY_MAPS" , false );
3533
+ ZeroCopyForMapsOnUsm = BoolEnvar (" OMPX_APU_MAPS" , false );
3494
3534
}
3495
3535
3496
3536
void setUpEnv () override final {
@@ -3502,6 +3542,15 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
3502
3542
if (DisableUsmMaps.get () == true ) {
3503
3543
EnableFineGrainedMemory = true ;
3504
3544
}
3545
+
3546
+ if (hasAPUDevice ()) {
3547
+ // OMPX_EAGER_ZERO_COPY_MAPS=1 && HSA_XNACK=0 (XNACK-disabled)
3548
+ // && default (non-USM) program
3549
+ if ((APUPrefault.get () == true ) && !IsXnackEnabled () &&
3550
+ !(Plugin::get ().getRequiresFlags () & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
3551
+ PrepopulateGPUPageTable = true ;
3552
+ }
3553
+ }
3505
3554
}
3506
3555
3507
3556
// / Check whether the image is compatible with an AMDGPU device.
@@ -3545,38 +3594,74 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
3545
3594
}
3546
3595
3547
3596
void checkAndAdjustUsmModeForTargetImage (
3548
- __tgt_device_image *TgtImage) override final {
3597
+ const __tgt_device_image *TgtImage) override final {
3549
3598
assert ((TgtImage != nullptr ) && " TgtImage is nullptr" );
3550
3599
assert (!(Plugin::get ().getRequiresFlags () & OMP_REQ_UNDEFINED) &&
3551
3600
" Requires flags are not set." );
3552
3601
3553
- if (!(hasAPUDevice () || hasGfx90aDevice ()))
3602
+ if (!(hasAPUDevice () || hasDGpuWithUsmSupport ()))
3554
3603
return ;
3555
3604
3556
3605
bool IsXnackRequired =
3557
3606
Plugin::get ().getRequiresFlags () & OMP_REQ_UNIFIED_SHARED_MEMORY;
3558
-
3559
3607
utils::XnackBuildMode BinaryXnackMode =
3560
3608
utils::extractXnackModeFromBinary (TgtImage);
3561
3609
3610
+ if (IsXnackRequired) {
3611
+ handleImageRequiresUsmMode (BinaryXnackMode);
3612
+ } else {
3613
+ handleDefaultMode (BinaryXnackMode);
3614
+ }
3615
+ }
3616
+
3617
+ void handleImageRequiresUsmMode (utils::XnackBuildMode xnackImageMode) {
3618
+ bool IsXnackActiveOnSystem = IsXnackEnabled ();
3619
+
3620
+ if ((xnackImageMode == utils::XnackBuildMode::XNACK_ANY) ||
3621
+ (xnackImageMode == utils::XnackBuildMode::XNACK_PLUS &&
3622
+ IsXnackActiveOnSystem) ||
3623
+ (xnackImageMode == utils::XnackBuildMode::XNACK_MINUS &&
3624
+ !IsXnackActiveOnSystem)) {
3625
+ DisableAllocationsForMapsOnApus = true ; // Zero-copy
3626
+
3627
+ if (APUPrefault.get () && hasAPUDevice ())
3628
+ PrepopulateGPUPageTable = true ; // Pre-faulting
3629
+ }
3630
+
3631
+ if (!IsXnackActiveOnSystem &&
3632
+ (xnackImageMode != utils::XnackBuildMode::XNACK_PLUS)) {
3633
+ FAILURE_MESSAGE (
3634
+ " Running a program that requries XNACK on a system where XNACK is "
3635
+ " disabled! This may potentially cause memory errors! Just saying.\n " );
3636
+ }
3637
+ }
3638
+
3639
+ void handleDefaultMode (utils::XnackBuildMode xnackImageMode) {
3640
+ // assuming that copying is required
3562
3641
DisableAllocationsForMapsOnApus = false ;
3642
+ bool IsXnackActiveOnSystem = IsXnackEnabled ();
3563
3643
3564
- if (IsXnackEnabled ()) {
3565
- if (!IsXnackRequired) {
3566
- switch (BinaryXnackMode) {
3567
- case utils::XnackBuildMode::XNACK_PLUS:
3568
- case utils::XnackBuildMode::XNACK_ANY:
3569
- DisableAllocationsForMapsOnApus = true ; // Zero-copy
3570
- }
3571
- return ;
3572
- }
3573
- } else {
3574
- if (IsXnackRequired) {
3575
- FAILURE_MESSAGE (
3576
- " XNACK is disabled. However, the program requires XNACK "
3577
- " support. Enable XNACK and re-run the program.\n " );
3644
+ if (IsXnackActiveOnSystem &&
3645
+ (hasAPUDevice () || ZeroCopyForMapsOnUsm.get ()) &&
3646
+ ((xnackImageMode == utils::XnackBuildMode::XNACK_ANY) ||
3647
+ (xnackImageMode == utils::XnackBuildMode::XNACK_PLUS))) {
3648
+ DisableAllocationsForMapsOnApus = true ; // Zero-copy
3649
+
3650
+ if (hasAPUDevice () && APUPrefault.get ()) {
3651
+ PrepopulateGPUPageTable = true ; // Pre-faulting
3578
3652
}
3653
+ return ;
3654
+ }
3655
+
3656
+ if (!IsXnackActiveOnSystem && hasAPUDevice () && APUPrefault.get () &&
3657
+ ((xnackImageMode == utils::XnackBuildMode::XNACK_ANY) ||
3658
+ (xnackImageMode == utils::XnackBuildMode::XNACK_MINUS))) {
3659
+ DisableAllocationsForMapsOnApus = true ; // Zero-copy
3660
+ PrepopulateGPUPageTable = true ; // Pre-faulting
3661
+ return ;
3579
3662
}
3663
+
3664
+ return ;
3580
3665
}
3581
3666
3582
3667
// / This plugin does not support exchanging data between two devices.
@@ -3649,11 +3734,10 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
3649
3734
return ((HsaXnack.get ()) || (utils::IsXnackEnabledViaKernelParam ()));
3650
3735
}
3651
3736
3652
- bool checkForDeviceByGFXName (const llvm::StringRef GfxLookUpName) {
3653
- bool CheckForMI300A =
3654
- (GfxLookUpName. find_insensitive ( " gfx940 " ) != llvm::StringRef::npos);
3737
+ bool checkForDeviceByGFXName (const llvm::StringRef GfxLookUpName,
3738
+ char mi300Specifier = ' ' ) {
3739
+
3655
3740
char GfxName[64 ];
3656
- llvm::StringRef GfxNameRef = llvm::StringRef (GfxName);
3657
3741
3658
3742
for (hsa_agent_t GPUAgent : KernelAgents) {
3659
3743
std::memset ((void *)&GfxName, 0 , sizeof (char ) * 64 );
@@ -3664,22 +3748,37 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
3664
3748
if (Status != HSA_STATUS_SUCCESS)
3665
3749
continue ;
3666
3750
3667
- if (GfxLookUpName.find_insensitive (GfxNameRef) != llvm::StringRef::npos) {
3668
- // Special handling for MI300. We will have to distinguish between an
3669
- // MI300A and X
3670
- if (CheckForMI300A) {
3671
- uint32_t ChipID = 0 ;
3672
- Status = hsa_agent_get_info (
3673
- GPUAgent, (hsa_agent_info_t )HSA_AMD_AGENT_INFO_CHIP_ID, &ChipID);
3751
+ llvm::StringRef GfxNameRef = llvm::StringRef (GfxName);
3752
+
3753
+ if (GfxLookUpName.equals_insensitive (GfxNameRef)) {
3754
+ if (mi300Specifier == ' ' )
3755
+ return true ;
3674
3756
3675
- if (Status != HSA_STATUS_SUCCESS) {
3676
- continue ;
3677
- }
3757
+ // Special handling for MI300. We will have to distinguish between
3758
+ // an MI300A and X
3759
+ uint32_t ChipID = 0 ;
3760
+ Status = hsa_agent_get_info (
3761
+ GPUAgent, (hsa_agent_info_t )HSA_AMD_AGENT_INFO_CHIP_ID, &ChipID);
3678
3762
3679
- if ((ChipID & 0x1 ))
3680
- continue ;
3763
+ if (Status != HSA_STATUS_SUCCESS) {
3764
+ continue ;
3765
+ }
3766
+
3767
+ bool IsMi300X = ChipID & 0x1 ;
3768
+
3769
+ switch (mi300Specifier) {
3770
+ case ' A' :
3771
+ case ' a' :
3772
+ if (!IsMi300X)
3773
+ return true ;
3774
+ break ;
3775
+ case ' x' :
3776
+ if (IsMi300X) // We are looking for a MI300X
3777
+ return true ;
3778
+ break ;
3779
+ default :
3780
+ FAILURE_MESSAGE (" Unknown MI300 specifier!\n " );
3681
3781
}
3682
- return true ;
3683
3782
}
3684
3783
}
3685
3784
return false ;
@@ -3693,18 +3792,31 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
3693
3792
// / Flag that shows if device is a GFX90A AMD GPU
3694
3793
int16_t HasGFX90ADevice{-1 };
3695
3794
3795
+ int16_t HasMi300xDevice{-1 };
3796
+
3696
3797
// / Flag that shows if device is an APU device
3697
3798
int16_t HasAPUDevice{-1 };
3698
3799
3699
3800
BoolEnvar NoMapChecks;
3700
3801
BoolEnvar DisableUsmMaps;
3701
3802
BoolEnvar HsaXnack;
3803
+ BoolEnvar APUPrefault;
3804
+
3805
+ // Set by OMPX_APU_MAPS
3806
+ // Enables code that detect if zero copying is possible. If so, the variable
3807
+ // DisableAllocationsForMapsOnApus is set to 'true'.
3808
+ BoolEnvar ZeroCopyForMapsOnUsm;
3702
3809
3703
- // Set by OMPX_APU_MAPS environment variable.
3704
3810
// If set, maps cause no copy operations. USM is used instead. Allocated
3705
- // memory remains coarse grained.
3811
+ // memory remains coarse grained. The variable is only considered to be set if
3812
+ // ZeroCopyForMapsOnUsm (OMPX_APU_MAPS) is set.
3706
3813
bool DisableAllocationsForMapsOnApus{false };
3707
3814
3815
+ // Set by OMPX_EAGER_ZERO_COPY_MAPS environment variable.
3816
+ // If set, map clauses provoke prefaulting of the GPU
3817
+ // page table.
3818
+ bool PrepopulateGPUPageTable{false };
3819
+
3708
3820
// Set by OMPX_DISABLE_MAPS environment variable.
3709
3821
// When active (default value), maps are ignored by the runtime
3710
3822
bool NoUSMMapChecks{true };
0 commit comments