Skip to content

Commit 07d439c

Browse files
committed
[Offload][AMDGPU] accept generic target
1 parent 36c2940 commit 07d439c

File tree

3 files changed

+39
-27
lines changed

3 files changed

+39
-27
lines changed

offload/DeviceRTL/CMakeLists.txt

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,16 @@ set(devicertl_base_directory ${CMAKE_CURRENT_SOURCE_DIR})
4242
set(include_directory ${devicertl_base_directory}/include)
4343
set(source_directory ${devicertl_base_directory}/src)
4444

45-
set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
46-
"gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx950;gfx1010"
47-
"gfx1012;gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035"
48-
"gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150"
49-
"gfx1151;gfx1152;gfx1153")
45+
set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803"
46+
"gfx9-generic;gfx900;gfx902;gfx906;gfx908"
47+
"gfx90a;gfx90c"
48+
"gfx9-4-generic;gfx940;gfx941;gfx942;gfx950"
49+
"gfx10-1-generic;gfx1010;gfx1012"
50+
"gfx10-3-generic;gfx1030;gfx1031;gfx1032;gfx1033"
51+
"gfx1034;gfx1035;gfx1036"
52+
"gfx11-generic;gfx1100;gfx1101;gfx1102;gfx1103"
53+
"gfx1150;gfx1151;gfx1152;gfx1153"
54+
"gfx12-generic")
5055
set(all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
5156
"sm_70;sm_72;sm_75;sm_80;sm_86;sm_87;sm_89;sm_90")
5257
set(all_gpu_architectures

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,8 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent,
190190
#endif
191191
}
192192

193-
Expected<std::string> getTargetTripleAndFeatures(hsa_agent_t Agent) {
194-
std::string Target;
193+
Error getTargetTripleAndFeatures(hsa_agent_t Agent,
194+
SmallVector<std::string> &Targets) {
195195
auto Err = hsa_utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) {
196196
uint32_t Length;
197197
hsa_status_t Status;
@@ -205,13 +205,16 @@ Expected<std::string> getTargetTripleAndFeatures(hsa_agent_t Agent) {
205205
return Status;
206206

207207
llvm::StringRef TripleTarget(ISAName.begin(), Length);
208-
if (TripleTarget.consume_front("amdgcn-amd-amdhsa"))
209-
Target = TripleTarget.ltrim('-').rtrim('\0').str();
210-
return HSA_STATUS_INFO_BREAK;
208+
if (TripleTarget.consume_front("amdgcn-amd-amdhsa")) {
209+
auto Target = TripleTarget.ltrim('-').rtrim('\0').str();
210+
if (Target.find("generic") != std::string::npos)
211+
Targets.push_back(Target);
212+
else
213+
Targets[0] = Target;
214+
}
215+
return HSA_STATUS_SUCCESS;
211216
});
212-
if (Err)
213-
return Err;
214-
return Target;
217+
return Err;
215218
}
216219
} // namespace hsa_utils
217220

@@ -1988,12 +1991,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
19881991
return Err;
19891992

19901993
// Detect if XNACK is enabled
1991-
auto TargeTripleAndFeaturesOrError =
1992-
hsa_utils::getTargetTripleAndFeatures(Agent);
1993-
if (!TargeTripleAndFeaturesOrError)
1994-
return TargeTripleAndFeaturesOrError.takeError();
1995-
if (static_cast<StringRef>(*TargeTripleAndFeaturesOrError)
1996-
.contains("xnack+"))
1994+
SmallVector<std::string> Targets;
1995+
Targets.push_back("");
1996+
if (auto Err = hsa_utils::getTargetTripleAndFeatures(Agent, Targets))
1997+
return Err;
1998+
if (Targets[0].find("xnack+") != std::string::npos)
19971999
IsXnackEnabled = true;
19982000

19992001
// detect if device is an APU.
@@ -3207,13 +3209,17 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
32073209
if (!Processor)
32083210
return false;
32093211

3210-
auto TargeTripleAndFeaturesOrError =
3211-
hsa_utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
3212-
if (!TargeTripleAndFeaturesOrError)
3213-
return TargeTripleAndFeaturesOrError.takeError();
3214-
return offloading::amdgpu::isImageCompatibleWithEnv(
3215-
Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
3216-
*TargeTripleAndFeaturesOrError);
3212+
SmallVector<std::string> Targets;
3213+
Targets.push_back("");
3214+
if (auto Err = hsa_utils::getTargetTripleAndFeatures(
3215+
getKernelAgent(DeviceId), Targets))
3216+
return Err;
3217+
for (auto &Target : Targets)
3218+
if (offloading::amdgpu::isImageCompatibleWithEnv(
3219+
Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
3220+
Target))
3221+
return true;
3222+
return false;
32173223
}
32183224

32193225
bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {

offload/plugins-nextgen/common/src/Utils/ELF.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
6868
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
6969
return createError("Invalid AMD ABI version, must be version 4 or above");
7070
if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
71-
(Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX1201)
71+
(Header.e_flags & EF_AMDGPU_MACH) >
72+
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)
7273
return createError("Unsupported AMDGPU architecture");
7374
} else if (Header.e_machine == EM_CUDA) {
7475
if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)

0 commit comments

Comments
 (0)