diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt index 32a7510be980d..b9659ca3d7a9c 100644 --- a/offload/DeviceRTL/CMakeLists.txt +++ b/offload/DeviceRTL/CMakeLists.txt @@ -42,11 +42,16 @@ set(devicertl_base_directory ${CMAKE_CURRENT_SOURCE_DIR}) set(include_directory ${devicertl_base_directory}/include) set(source_directory ${devicertl_base_directory}/src) -set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906" - "gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx950;gfx1010" - "gfx1012;gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035" - "gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150" - "gfx1151;gfx1152;gfx1153") +set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803" + "gfx9-generic;gfx900;gfx902;gfx906;gfx908" + "gfx90a;gfx90c" + "gfx9-4-generic;gfx940;gfx941;gfx942;gfx950" + "gfx10-1-generic;gfx1010;gfx1012" + "gfx10-3-generic;gfx1030;gfx1031;gfx1032;gfx1033" + "gfx1034;gfx1035;gfx1036" + "gfx11-generic;gfx1100;gfx1101;gfx1102;gfx1103" + "gfx1150;gfx1151;gfx1152;gfx1153" + "gfx12-generic") set(all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62" "sm_70;sm_72;sm_75;sm_80;sm_86;sm_87;sm_89;sm_90") set(all_gpu_architectures diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index d74e65d416567..789b2031ab4a8 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -190,8 +190,8 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent, #endif } -Expected getTargetTripleAndFeatures(hsa_agent_t Agent) { - std::string Target; +Error getTargetTripleAndFeatures(hsa_agent_t Agent, + SmallVector> &Targets) { auto Err = hsa_utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) { uint32_t Length; hsa_status_t Status; @@ -205,13 +205,13 @@ Expected getTargetTripleAndFeatures(hsa_agent_t Agent) { return Status; llvm::StringRef TripleTarget(ISAName.begin(), Length); - if (TripleTarget.consume_front("amdgcn-amd-amdhsa")) - Target = TripleTarget.ltrim('-').rtrim('\0').str(); - return HSA_STATUS_INFO_BREAK; + if (TripleTarget.consume_front("amdgcn-amd-amdhsa")) { + auto Target = TripleTarget.ltrim('-').rtrim('\0'); + Targets.push_back(Target); + } + return HSA_STATUS_SUCCESS; }); - if (Err) - return Err; - return Target; + return Err; } } // namespace hsa_utils @@ -1988,12 +1988,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return Err; // Detect if XNACK is enabled - auto TargeTripleAndFeaturesOrError = - hsa_utils::getTargetTripleAndFeatures(Agent); - if (!TargeTripleAndFeaturesOrError) - return TargeTripleAndFeaturesOrError.takeError(); - if (static_cast(*TargeTripleAndFeaturesOrError) - .contains("xnack+")) + SmallVector> Targets; + if (auto Err = hsa_utils::getTargetTripleAndFeatures(Agent, Targets)) + return Err; + if (!Targets.empty() && Targets[0].str().contains("xnack+")) IsXnackEnabled = true; // detect if device is an APU. @@ -3207,13 +3205,16 @@ struct AMDGPUPluginTy final : public GenericPluginTy { if (!Processor) return false; - auto TargeTripleAndFeaturesOrError = - hsa_utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId)); - if (!TargeTripleAndFeaturesOrError) - return TargeTripleAndFeaturesOrError.takeError(); - return offloading::amdgpu::isImageCompatibleWithEnv( - Processor ? *Processor : "", ElfOrErr->getPlatformFlags(), - *TargeTripleAndFeaturesOrError); + SmallVector> Targets; + if (auto Err = hsa_utils::getTargetTripleAndFeatures( + getKernelAgent(DeviceId), Targets)) + return Err; + for (auto &Target : Targets) + if (offloading::amdgpu::isImageCompatibleWithEnv( + Processor ? *Processor : "", ElfOrErr->getPlatformFlags(), + Target.str())) + return true; + return false; } bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override { diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp index f5037611e72e0..10b32440dc877 100644 --- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp +++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp @@ -68,7 +68,8 @@ checkMachineImpl(const object::ELFObjectFile &ELFObj, uint16_t EMachine) { Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6) return createError("Invalid AMD ABI version, must be version 4 or above"); if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 || - (Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX1201) + (Header.e_flags & EF_AMDGPU_MACH) > + EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC) return createError("Unsupported AMDGPU architecture"); } else if (Header.e_machine == EM_CUDA) { if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)