Skip to content

Commit 19560a4

Browse files
ThorBlronlieb
authored andcommitted
[OpenMP][MI300] Revised the APU detection algorithm. Only for a GFX942 the last bit of the chip id is used to distinguish between an MI300A and MI300X. There is only a single iteration over the agent vector necessary instead of three, as before.
Change-Id: Id297e4bcf5eb25f9b724bd1b995981a84409d9d8
1 parent 8f0cb36 commit 19560a4

File tree

1 file changed

+27
-52
lines changed
  • openmp/libomptarget/plugins-nextgen/amdgpu/src

1 file changed

+27
-52
lines changed

openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 27 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -3430,6 +3430,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
34303430
if (auto Err = HostDevice->init())
34313431
return std::move(Err);
34323432

3433+
#if 0//<<<<<<< HEAD
34333434
#ifdef OMPT_SUPPORT
34343435
::OmptCallbackInit();
34353436
#endif
@@ -3439,6 +3440,9 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
34393440
// check for dGPUs with USM support
34403441
hasGfx90aDevice();
34413442
hasMI300xDevice();
3443+
#else//=======
3444+
scanForUSMCapableDevices();
3445+
#endif//>>>>>>> 1abd5de10b93 ([OpenMP][MI300] Revised the APU detection algorithm. Only for a GFX942 the last bit of the chip id is used to distinguish between an MI300A and MI300X. There is only a single iteration over the agent vector necessary instead of three, as before.)
34423446

34433447
readEnvVars();
34443448

@@ -3468,40 +3472,27 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
34683472
uint16_t getMagicElfBits() const override { return ELF::EM_AMDGPU; }
34693473

34703474
bool hasAPUDevice() override final {
3471-
if (HasAPUDevice != -1)
3472-
return HasAPUDevice;
3473-
34743475
if (!Initialized)
34753476
FATAL_MESSAGE(1, "%s", "hasAPUDevice called on uninitialized plugin");
34763477

3477-
HasAPUDevice = checkForDeviceByGFXName("gfx940");
3478-
return HasAPUDevice;
3478+
return IsEquippedWithMI300A;
34793479
}
34803480

34813481
#define ALDEBARAN_MAJOR 9
34823482
#define ALDEBARAN_STEPPING 10
34833483

34843484
bool hasMI300xDevice() {
3485-
if (HasMi300xDevice != -1)
3486-
return HasMi300xDevice;
3487-
34883485
if (!Initialized)
34893486
FATAL_MESSAGE(1, "%s", "hasMI300xDevice called on uninitialized plugin");
3490-
// On splinter the MI300X identifies itself as a GFX941. Use GFX name to
3491-
// distinguish for testing.
3492-
HasMi300xDevice = checkForDeviceByGFXName("gfx941");
3493-
return HasMi300xDevice;
3487+
3488+
return IsEquippedWithMI300X;
34943489
}
34953490

34963491
bool hasGfx90aDevice() {
3497-
if (HasGFX90ADevice != -1)
3498-
return HasGFX90ADevice;
3499-
35003492
if (!Initialized)
35013493
FATAL_MESSAGE(1, "%s", "hasGfx90aDevice called on uninitialized plugin");
35023494

3503-
HasGFX90ADevice = checkForDeviceByGFXName("gfx90a");
3504-
return HasGFX90ADevice;
3495+
return IsEquippedWithGFX90A;
35053496
}
35063497

35073498
bool hasDGpuWithUsmSupport() override final {
@@ -3734,28 +3725,28 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
37343725
return ((HsaXnack.get()) || (utils::IsXnackEnabledViaKernelParam()));
37353726
}
37363727

3737-
bool checkForDeviceByGFXName(const llvm::StringRef GfxLookUpName,
3738-
char mi300Specifier = ' ') {
3728+
void scanForUSMCapableDevices() {
37393729

37403730
char GfxName[64];
3741-
37423731
for (hsa_agent_t GPUAgent : KernelAgents) {
37433732
std::memset((void *)&GfxName, 0, sizeof(char) * 64);
37443733

37453734
hsa_status_t Status = hsa_agent_get_info(
37463735
GPUAgent, (hsa_agent_info_t)HSA_AGENT_INFO_NAME, GfxName);
37473736

3748-
if (Status != HSA_STATUS_SUCCESS)
3749-
continue;
3750-
3751-
llvm::StringRef GfxNameRef = llvm::StringRef(GfxName);
3737+
std::string StrGfxName(GfxName);
37523738

3753-
if (GfxLookUpName.equals_insensitive(GfxNameRef)) {
3754-
if (mi300Specifier == ' ')
3755-
return true;
3739+
std::transform(std::begin(StrGfxName), std::end(StrGfxName),
3740+
std::begin(StrGfxName),
3741+
[](char c) { return std::tolower(c); });
37563742

3757-
// Special handling for MI300. We will have to distinguish between
3758-
// an MI300A and X
3743+
if (StrGfxName == "gfx90a") {
3744+
IsEquippedWithGFX90A = true;
3745+
} else if (StrGfxName == "gfx940") {
3746+
IsEquippedWithMI300A = true;
3747+
} else if (StrGfxName == "gfx941") {
3748+
IsEquippedWithMI300X = true;
3749+
} else if (StrGfxName == "gfx942") {
37593750
uint32_t ChipID = 0;
37603751
Status = hsa_agent_get_info(
37613752
GPUAgent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CHIP_ID, &ChipID);
@@ -3764,38 +3755,22 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
37643755
continue;
37653756
}
37663757

3767-
bool IsMi300X = ChipID & 0x1;
3768-
3769-
switch (mi300Specifier) {
3770-
case 'A':
3771-
case 'a':
3772-
if (!IsMi300X)
3773-
return true;
3774-
break;
3775-
case 'x':
3776-
if (IsMi300X) // We are looking for a MI300X
3777-
return true;
3778-
break;
3779-
default:
3780-
FAILURE_MESSAGE("Unknown MI300 specifier!\n");
3781-
}
3758+
if (ChipID & 0x1)
3759+
IsEquippedWithMI300X = true;
3760+
else
3761+
IsEquippedWithMI300A = true;
37823762
}
37833763
}
3784-
return false;
37853764
}
37863765

37873766
/// Indicate whether the HSA runtime was correctly initialized. Even if there
37883767
/// is no available devices this boolean will be true. It indicates whether
37893768
/// we can safely call HSA functions (e.g., hsa_shut_down).
37903769
bool Initialized;
37913770

3792-
/// Flag that shows if device is a GFX90A AMD GPU
3793-
int16_t HasGFX90ADevice{-1};
3794-
3795-
int16_t HasMi300xDevice{-1};
3796-
3797-
/// Flag that shows if device is an APU device
3798-
int16_t HasAPUDevice{-1};
3771+
bool IsEquippedWithMI300A{false};
3772+
bool IsEquippedWithMI300X{false};
3773+
bool IsEquippedWithGFX90A{false};
37993774

38003775
BoolEnvar NoMapChecks;
38013776
BoolEnvar DisableUsmMaps;

0 commit comments

Comments
 (0)