From 07d439c245b333f516316bff72f98c9b35b6e3f8 Mon Sep 17 00:00:00 2001 From: Hideki Saito Date: Thu, 5 Dec 2024 21:46:06 -0600 Subject: [PATCH 1/4] [Offload][AMDGPU] accept generic target --- offload/DeviceRTL/CMakeLists.txt | 15 ++++-- offload/plugins-nextgen/amdgpu/src/rtl.cpp | 48 +++++++++++-------- .../plugins-nextgen/common/src/Utils/ELF.cpp | 3 +- 3 files changed, 39 insertions(+), 27 deletions(-) diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt index 32a7510be980d..b9659ca3d7a9c 100644 --- a/offload/DeviceRTL/CMakeLists.txt +++ b/offload/DeviceRTL/CMakeLists.txt @@ -42,11 +42,16 @@ set(devicertl_base_directory ${CMAKE_CURRENT_SOURCE_DIR}) set(include_directory ${devicertl_base_directory}/include) set(source_directory ${devicertl_base_directory}/src) -set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906" - "gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx950;gfx1010" - "gfx1012;gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035" - "gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150" - "gfx1151;gfx1152;gfx1153") +set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803" + "gfx9-generic;gfx900;gfx902;gfx906;gfx908" + "gfx90a;gfx90c" + "gfx9-4-generic;gfx940;gfx941;gfx942;gfx950" + "gfx10-1-generic;gfx1010;gfx1012" + "gfx10-3-generic;gfx1030;gfx1031;gfx1032;gfx1033" + "gfx1034;gfx1035;gfx1036" + "gfx11-generic;gfx1100;gfx1101;gfx1102;gfx1103" + "gfx1150;gfx1151;gfx1152;gfx1153" + "gfx12-generic") set(all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62" "sm_70;sm_72;sm_75;sm_80;sm_86;sm_87;sm_89;sm_90") set(all_gpu_architectures diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index d74e65d416567..cdc7f5ae0427a 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -190,8 +190,8 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent, #endif } -Expected getTargetTripleAndFeatures(hsa_agent_t Agent) { - std::string Target; +Error getTargetTripleAndFeatures(hsa_agent_t Agent, + SmallVector &Targets) { auto Err = hsa_utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) { uint32_t Length; hsa_status_t Status; @@ -205,13 +205,16 @@ Expected getTargetTripleAndFeatures(hsa_agent_t Agent) { return Status; llvm::StringRef TripleTarget(ISAName.begin(), Length); - if (TripleTarget.consume_front("amdgcn-amd-amdhsa")) - Target = TripleTarget.ltrim('-').rtrim('\0').str(); - return HSA_STATUS_INFO_BREAK; + if (TripleTarget.consume_front("amdgcn-amd-amdhsa")) { + auto Target = TripleTarget.ltrim('-').rtrim('\0').str(); + if (Target.find("generic") != std::string::npos) + Targets.push_back(Target); + else + Targets[0] = Target; + } + return HSA_STATUS_SUCCESS; }); - if (Err) - return Err; - return Target; + return Err; } } // namespace hsa_utils @@ -1988,12 +1991,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return Err; // Detect if XNACK is enabled - auto TargeTripleAndFeaturesOrError = - hsa_utils::getTargetTripleAndFeatures(Agent); - if (!TargeTripleAndFeaturesOrError) - return TargeTripleAndFeaturesOrError.takeError(); - if (static_cast(*TargeTripleAndFeaturesOrError) - .contains("xnack+")) + SmallVector Targets; + Targets.push_back(""); + if (auto Err = hsa_utils::getTargetTripleAndFeatures(Agent, Targets)) + return Err; + if (Targets[0].find("xnack+") != std::string::npos) IsXnackEnabled = true; // detect if device is an APU. @@ -3207,13 +3209,17 @@ struct AMDGPUPluginTy final : public GenericPluginTy { if (!Processor) return false; - auto TargeTripleAndFeaturesOrError = - hsa_utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId)); - if (!TargeTripleAndFeaturesOrError) - return TargeTripleAndFeaturesOrError.takeError(); - return offloading::amdgpu::isImageCompatibleWithEnv( - Processor ? *Processor : "", ElfOrErr->getPlatformFlags(), - *TargeTripleAndFeaturesOrError); + SmallVector Targets; + Targets.push_back(""); + if (auto Err = hsa_utils::getTargetTripleAndFeatures( + getKernelAgent(DeviceId), Targets)) + return Err; + for (auto &Target : Targets) + if (offloading::amdgpu::isImageCompatibleWithEnv( + Processor ? *Processor : "", ElfOrErr->getPlatformFlags(), + Target)) + return true; + return false; } bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override { diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp index f5037611e72e0..10b32440dc877 100644 --- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp +++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp @@ -68,7 +68,8 @@ checkMachineImpl(const object::ELFObjectFile &ELFObj, uint16_t EMachine) { Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6) return createError("Invalid AMD ABI version, must be version 4 or above"); if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 || - (Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX1201) + (Header.e_flags & EF_AMDGPU_MACH) > + EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC) return createError("Unsupported AMDGPU architecture"); } else if (Header.e_machine == EM_CUDA) { if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS) From 84399a95253478736ff25ae8f23745a3febf37d8 Mon Sep 17 00:00:00 2001 From: Hideki Saito Date: Fri, 6 Dec 2024 18:50:56 -0600 Subject: [PATCH 2/4] Code Review adjustments --- offload/plugins-nextgen/amdgpu/src/rtl.cpp | 44 ++++++++++++---------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index cdc7f5ae0427a..5fc4a19d8ae14 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -190,8 +190,9 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent, #endif } -Error getTargetTripleAndFeatures(hsa_agent_t Agent, - SmallVector &Targets) { +Expected +getTargetTripleAndFeatures(hsa_agent_t Agent, SmallVector &Targets) { + StringRef SpecificTarget; auto Err = hsa_utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) { uint32_t Length; hsa_status_t Status; @@ -206,15 +207,18 @@ Error getTargetTripleAndFeatures(hsa_agent_t Agent, llvm::StringRef TripleTarget(ISAName.begin(), Length); if (TripleTarget.consume_front("amdgcn-amd-amdhsa")) { - auto Target = TripleTarget.ltrim('-').rtrim('\0').str(); - if (Target.find("generic") != std::string::npos) - Targets.push_back(Target); - else - Targets[0] = Target; + auto Target = TripleTarget.ltrim('-').rtrim('\0'); + Targets.push_back(Target); + if (!Target.ends_with("generic")) + SpecificTarget = Target; // Expect one (and only one) to be found } return HSA_STATUS_SUCCESS; }); - return Err; + if (Err) + return Err; + if (SpecificTarget.empty()) + return Plugin::error("Specific Target ISA not found"); + return SpecificTarget; } } // namespace hsa_utils @@ -1991,11 +1995,13 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return Err; // Detect if XNACK is enabled - SmallVector Targets; - Targets.push_back(""); - if (auto Err = hsa_utils::getTargetTripleAndFeatures(Agent, Targets)) - return Err; - if (Targets[0].find("xnack+") != std::string::npos) + SmallVector Targets; + auto TargeTripleAndFeaturesOrError = + hsa_utils::getTargetTripleAndFeatures(Agent, Targets); + if (!TargeTripleAndFeaturesOrError) + return TargeTripleAndFeaturesOrError.takeError(); + if (static_cast(*TargeTripleAndFeaturesOrError) + .contains("xnack+")) IsXnackEnabled = true; // detect if device is an APU. @@ -3209,15 +3215,15 @@ struct AMDGPUPluginTy final : public GenericPluginTy { if (!Processor) return false; - SmallVector Targets; - Targets.push_back(""); - if (auto Err = hsa_utils::getTargetTripleAndFeatures( - getKernelAgent(DeviceId), Targets)) - return Err; + SmallVector Targets; + auto TargetTripleAndFeaturesOrError = hsa_utils::getTargetTripleAndFeatures( + getKernelAgent(DeviceId), Targets); + if (!TargetTripleAndFeaturesOrError) + return TargetTripleAndFeaturesOrError.takeError(); for (auto &Target : Targets) if (offloading::amdgpu::isImageCompatibleWithEnv( Processor ? *Processor : "", ElfOrErr->getPlatformFlags(), - Target)) + Target.str())) return true; return false; } From 1ac5b2d90a287dd52c7fece7f196303a928235e7 Mon Sep 17 00:00:00 2001 From: Hideki Saito Date: Mon, 9 Dec 2024 11:33:03 -0600 Subject: [PATCH 3/4] Code Review adjustments --- offload/plugins-nextgen/amdgpu/src/rtl.cpp | 29 +++++++--------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index 5fc4a19d8ae14..492b47a909d79 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -190,9 +190,8 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent, #endif } -Expected -getTargetTripleAndFeatures(hsa_agent_t Agent, SmallVector &Targets) { - StringRef SpecificTarget; +Error getTargetTripleAndFeatures(hsa_agent_t Agent, + SmallVector &Targets) { auto Err = hsa_utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) { uint32_t Length; hsa_status_t Status; @@ -209,16 +208,10 @@ getTargetTripleAndFeatures(hsa_agent_t Agent, SmallVector &Targets) { if (TripleTarget.consume_front("amdgcn-amd-amdhsa")) { auto Target = TripleTarget.ltrim('-').rtrim('\0'); Targets.push_back(Target); - if (!Target.ends_with("generic")) - SpecificTarget = Target; // Expect one (and only one) to be found } return HSA_STATUS_SUCCESS; }); - if (Err) - return Err; - if (SpecificTarget.empty()) - return Plugin::error("Specific Target ISA not found"); - return SpecificTarget; + return Err; } } // namespace hsa_utils @@ -1996,12 +1989,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { // Detect if XNACK is enabled SmallVector Targets; - auto TargeTripleAndFeaturesOrError = - hsa_utils::getTargetTripleAndFeatures(Agent, Targets); - if (!TargeTripleAndFeaturesOrError) - return TargeTripleAndFeaturesOrError.takeError(); - if (static_cast(*TargeTripleAndFeaturesOrError) - .contains("xnack+")) + if (auto Err = hsa_utils::getTargetTripleAndFeatures(Agent, Targets)) + return Err; + if (!Targets.empty() && Targets[0].contains("xnack+")) IsXnackEnabled = true; // detect if device is an APU. @@ -3216,10 +3206,9 @@ struct AMDGPUPluginTy final : public GenericPluginTy { return false; SmallVector Targets; - auto TargetTripleAndFeaturesOrError = hsa_utils::getTargetTripleAndFeatures( - getKernelAgent(DeviceId), Targets); - if (!TargetTripleAndFeaturesOrError) - return TargetTripleAndFeaturesOrError.takeError(); + if (auto Err = hsa_utils::getTargetTripleAndFeatures( + getKernelAgent(DeviceId), Targets)) + return Err; for (auto &Target : Targets) if (offloading::amdgpu::isImageCompatibleWithEnv( Processor ? *Processor : "", ElfOrErr->getPlatformFlags(), From 8eab21c0279c19bdf631cec141bd55d8ffe6ac34 Mon Sep 17 00:00:00 2001 From: Hideki Saito Date: Mon, 9 Dec 2024 14:28:04 -0600 Subject: [PATCH 4/4] Code Review adjustments --- offload/plugins-nextgen/amdgpu/src/rtl.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index 492b47a909d79..789b2031ab4a8 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -191,7 +191,7 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent, } Error getTargetTripleAndFeatures(hsa_agent_t Agent, - SmallVector &Targets) { + SmallVector> &Targets) { auto Err = hsa_utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) { uint32_t Length; hsa_status_t Status; @@ -1988,10 +1988,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return Err; // Detect if XNACK is enabled - SmallVector Targets; + SmallVector> Targets; if (auto Err = hsa_utils::getTargetTripleAndFeatures(Agent, Targets)) return Err; - if (!Targets.empty() && Targets[0].contains("xnack+")) + if (!Targets.empty() && Targets[0].str().contains("xnack+")) IsXnackEnabled = true; // detect if device is an APU. @@ -3205,7 +3205,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy { if (!Processor) return false; - SmallVector Targets; + SmallVector> Targets; if (auto Err = hsa_utils::getTargetTripleAndFeatures( getKernelAgent(DeviceId), Targets)) return Err;