Skip to content

Commit d113e76

Browse files
author
git apple-llvm automerger
committed
Merge commit 'e625a781211e' from llvm.org/release/21.x into stable/21.x
2 parents 553b519 + e625a78 commit d113e76

File tree

6 files changed

+158
-46
lines changed

6 files changed

+158
-46
lines changed

llvm/include/llvm/BinaryFormat/ELF.h

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,7 @@ enum {
362362
ELFOSABI_FENIXOS = 16, // FenixOS
363363
ELFOSABI_CLOUDABI = 17, // Nuxi CloudABI
364364
ELFOSABI_CUDA = 51, // NVIDIA CUDA architecture.
365+
ELFOSABI_CUDA_V2 = 41, // NVIDIA CUDA architecture.
365366
ELFOSABI_FIRST_ARCH = 64, // First architecture-specific OS ABI
366367
ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime
367368
ELFOSABI_AMDGPU_PAL = 65, // AMD PAL runtime
@@ -385,6 +386,12 @@ enum {
385386
ELFABIVERSION_AMDGPU_HSA_V6 = 4,
386387
};
387388

389+
// CUDA OS ABI Version identification.
390+
enum {
391+
ELFABIVERSION_CUDA_V1 = 7,
392+
ELFABIVERSION_CUDA_V2 = 8,
393+
};
394+
388395
#define ELF_RELOC(name, value) name = value,
389396

390397
// X86_64 relocations.
@@ -921,9 +928,15 @@ enum {
921928

922929
// NVPTX specific e_flags.
923930
enum : unsigned {
924-
// Processor selection mask for EF_CUDA_SM* values.
931+
// Processor selection mask for EF_CUDA_SM* values prior to blackwell.
925932
EF_CUDA_SM = 0xff,
926933

934+
// Processor selection mask for EF_CUDA_SM* values following blackwell.
935+
EF_CUDA_SM_MASK = 0xff00,
936+
937+
// Processor selection mask for EF_CUDA_SM* values following blackwell.
938+
EF_CUDA_SM_OFFSET = 8,
939+
927940
// SM based processor values.
928941
EF_CUDA_SM20 = 0x14,
929942
EF_CUDA_SM21 = 0x15,
@@ -943,9 +956,15 @@ enum : unsigned {
943956
EF_CUDA_SM80 = 0x50,
944957
EF_CUDA_SM86 = 0x56,
945958
EF_CUDA_SM87 = 0x57,
959+
EF_CUDA_SM88 = 0x58,
946960
EF_CUDA_SM89 = 0x59,
947-
// The sm_90a variant uses the same machine flag.
948961
EF_CUDA_SM90 = 0x5a,
962+
EF_CUDA_SM100 = 0x64,
963+
EF_CUDA_SM101 = 0x65,
964+
EF_CUDA_SM103 = 0x67,
965+
EF_CUDA_SM110 = 0x6e,
966+
EF_CUDA_SM120 = 0x78,
967+
EF_CUDA_SM121 = 0x79,
949968

950969
// Unified texture binding is enabled.
951970
EF_CUDA_TEXMODE_UNIFIED = 0x100,
@@ -954,12 +973,15 @@ enum : unsigned {
954973
// The target is using 64-bit addressing.
955974
EF_CUDA_64BIT_ADDRESS = 0x400,
956975
// Set when using the sm_90a processor.
957-
EF_CUDA_ACCELERATORS = 0x800,
976+
EF_CUDA_ACCELERATORS_V1 = 0x800,
958977
// Undocumented software feature.
959978
EF_CUDA_SW_FLAG_V2 = 0x1000,
960979

961980
// Virtual processor selection mask for EF_CUDA_VIRTUAL_SM* values.
962981
EF_CUDA_VIRTUAL_SM = 0xff0000,
982+
983+
// Set when using an accelerator variant like sm_100a.
984+
EF_CUDA_ACCELERATORS = 0x8,
963985
};
964986

965987
// ELF Relocation types for BPF

llvm/include/llvm/Object/ELFObjectFile.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,6 +1479,7 @@ template <class ELFT> Triple::OSType ELFObjectFile<ELFT>::getOS() const {
14791479
case ELF::ELFOSABI_OPENBSD:
14801480
return Triple::OpenBSD;
14811481
case ELF::ELFOSABI_CUDA:
1482+
case ELF::ELFOSABI_CUDA_V2:
14821483
return Triple::CUDA;
14831484
case ELF::ELFOSABI_AMDGPU_HSA:
14841485
return Triple::AMDHSA;

llvm/lib/Object/ELFObjectFile.cpp

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,10 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
620620

621621
StringRef ELFObjectFileBase::getNVPTXCPUName() const {
622622
assert(getEMachine() == ELF::EM_CUDA);
623-
unsigned SM = getPlatformFlags() & ELF::EF_CUDA_SM;
623+
unsigned SM = getEIdentABIVersion() == ELF::ELFABIVERSION_CUDA_V1
624+
? getPlatformFlags() & ELF::EF_CUDA_SM
625+
: (getPlatformFlags() & ELF::EF_CUDA_SM_MASK) >>
626+
ELF::EF_CUDA_SM_OFFSET;
624627

625628
switch (SM) {
626629
// Fermi architecture.
@@ -672,14 +675,39 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const {
672675
return "sm_86";
673676
case ELF::EF_CUDA_SM87:
674677
return "sm_87";
678+
case ELF::EF_CUDA_SM88:
679+
return "sm_88";
675680

676681
// Ada architecture.
677682
case ELF::EF_CUDA_SM89:
678683
return "sm_89";
679684

680685
// Hopper architecture.
681686
case ELF::EF_CUDA_SM90:
682-
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_90a" : "sm_90";
687+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS_V1 ? "sm_90a"
688+
: "sm_90";
689+
690+
// Blackwell architecture.
691+
case ELF::EF_CUDA_SM100:
692+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_100a"
693+
: "sm_100";
694+
case ELF::EF_CUDA_SM101:
695+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_101a"
696+
: "sm_101";
697+
case ELF::EF_CUDA_SM103:
698+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_103a"
699+
: "sm_103";
700+
case ELF::EF_CUDA_SM110:
701+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_110a"
702+
: "sm_110";
703+
704+
// Blackwell architecture.
705+
case ELF::EF_CUDA_SM120:
706+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_120a"
707+
: "sm_120";
708+
case ELF::EF_CUDA_SM121:
709+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_121a"
710+
: "sm_121";
683711
default:
684712
llvm_unreachable("Unknown EF_CUDA_SM value");
685713
}

llvm/tools/llvm-readobj/ELFDumper.cpp

Lines changed: 82 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,26 +1083,26 @@ const EnumEntry<unsigned> ElfObjectFileType[] = {
10831083
};
10841084

10851085
const EnumEntry<unsigned> ElfOSABI[] = {
1086-
{"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
1087-
{"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
1088-
{"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
1089-
{"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX},
1090-
{"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD},
1091-
{"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS},
1092-
{"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX},
1093-
{"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX},
1094-
{"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD},
1095-
{"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64},
1096-
{"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO},
1097-
{"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD},
1098-
{"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS},
1099-
{"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK},
1100-
{"AROS", "AROS", ELF::ELFOSABI_AROS},
1101-
{"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
1102-
{"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
1103-
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
1104-
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}
1105-
};
1086+
{"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
1087+
{"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
1088+
{"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
1089+
{"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX},
1090+
{"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD},
1091+
{"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS},
1092+
{"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX},
1093+
{"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX},
1094+
{"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD},
1095+
{"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64},
1096+
{"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO},
1097+
{"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD},
1098+
{"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS},
1099+
{"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK},
1100+
{"AROS", "AROS", ELF::ELFOSABI_AROS},
1101+
{"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
1102+
{"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
1103+
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
1104+
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA_V2},
1105+
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}};
11061106

11071107
const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
11081108
{"AMDGPU_HSA", "AMDGPU - HSA", ELF::ELFOSABI_AMDGPU_HSA},
@@ -1666,16 +1666,60 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
16661666
};
16671667

16681668
const EnumEntry<unsigned> ElfHeaderNVPTXFlags[] = {
1669-
ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"),
1670-
ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"),
1671-
ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"),
1672-
ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"),
1673-
ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"),
1674-
ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"),
1675-
ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"),
1676-
ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"),
1677-
ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"),
1678-
ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"),
1669+
ENUM_ENT(EF_CUDA_SM20, "sm_20"),
1670+
ENUM_ENT(EF_CUDA_SM21, "sm_21"),
1671+
ENUM_ENT(EF_CUDA_SM30, "sm_30"),
1672+
ENUM_ENT(EF_CUDA_SM32, "sm_32"),
1673+
ENUM_ENT(EF_CUDA_SM35, "sm_35"),
1674+
ENUM_ENT(EF_CUDA_SM37, "sm_37"),
1675+
ENUM_ENT(EF_CUDA_SM50, "sm_50"),
1676+
ENUM_ENT(EF_CUDA_SM52, "sm_52"),
1677+
ENUM_ENT(EF_CUDA_SM53, "sm_53"),
1678+
ENUM_ENT(EF_CUDA_SM60, "sm_60"),
1679+
ENUM_ENT(EF_CUDA_SM61, "sm_61"),
1680+
ENUM_ENT(EF_CUDA_SM62, "sm_62"),
1681+
ENUM_ENT(EF_CUDA_SM70, "sm_70"),
1682+
ENUM_ENT(EF_CUDA_SM72, "sm_72"),
1683+
ENUM_ENT(EF_CUDA_SM75, "sm_75"),
1684+
ENUM_ENT(EF_CUDA_SM80, "sm_80"),
1685+
ENUM_ENT(EF_CUDA_SM86, "sm_86"),
1686+
ENUM_ENT(EF_CUDA_SM87, "sm_87"),
1687+
ENUM_ENT(EF_CUDA_SM88, "sm_88"),
1688+
ENUM_ENT(EF_CUDA_SM89, "sm_89"),
1689+
ENUM_ENT(EF_CUDA_SM90, "sm_90"),
1690+
ENUM_ENT(EF_CUDA_SM100, "sm_100"),
1691+
ENUM_ENT(EF_CUDA_SM101, "sm_101"),
1692+
ENUM_ENT(EF_CUDA_SM103, "sm_103"),
1693+
ENUM_ENT(EF_CUDA_SM110, "sm_110"),
1694+
ENUM_ENT(EF_CUDA_SM120, "sm_120"),
1695+
ENUM_ENT(EF_CUDA_SM121, "sm_121"),
1696+
ENUM_ENT(EF_CUDA_SM20 << EF_CUDA_SM_OFFSET, "sm_20"),
1697+
ENUM_ENT(EF_CUDA_SM21 << EF_CUDA_SM_OFFSET, "sm_21"),
1698+
ENUM_ENT(EF_CUDA_SM30 << EF_CUDA_SM_OFFSET, "sm_30"),
1699+
ENUM_ENT(EF_CUDA_SM32 << EF_CUDA_SM_OFFSET, "sm_32"),
1700+
ENUM_ENT(EF_CUDA_SM35 << EF_CUDA_SM_OFFSET, "sm_35"),
1701+
ENUM_ENT(EF_CUDA_SM37 << EF_CUDA_SM_OFFSET, "sm_37"),
1702+
ENUM_ENT(EF_CUDA_SM50 << EF_CUDA_SM_OFFSET, "sm_50"),
1703+
ENUM_ENT(EF_CUDA_SM52 << EF_CUDA_SM_OFFSET, "sm_52"),
1704+
ENUM_ENT(EF_CUDA_SM53 << EF_CUDA_SM_OFFSET, "sm_53"),
1705+
ENUM_ENT(EF_CUDA_SM60 << EF_CUDA_SM_OFFSET, "sm_60"),
1706+
ENUM_ENT(EF_CUDA_SM61 << EF_CUDA_SM_OFFSET, "sm_61"),
1707+
ENUM_ENT(EF_CUDA_SM62 << EF_CUDA_SM_OFFSET, "sm_62"),
1708+
ENUM_ENT(EF_CUDA_SM70 << EF_CUDA_SM_OFFSET, "sm_70"),
1709+
ENUM_ENT(EF_CUDA_SM72 << EF_CUDA_SM_OFFSET, "sm_72"),
1710+
ENUM_ENT(EF_CUDA_SM75 << EF_CUDA_SM_OFFSET, "sm_75"),
1711+
ENUM_ENT(EF_CUDA_SM80 << EF_CUDA_SM_OFFSET, "sm_80"),
1712+
ENUM_ENT(EF_CUDA_SM86 << EF_CUDA_SM_OFFSET, "sm_86"),
1713+
ENUM_ENT(EF_CUDA_SM87 << EF_CUDA_SM_OFFSET, "sm_87"),
1714+
ENUM_ENT(EF_CUDA_SM88 << EF_CUDA_SM_OFFSET, "sm_88"),
1715+
ENUM_ENT(EF_CUDA_SM89 << EF_CUDA_SM_OFFSET, "sm_89"),
1716+
ENUM_ENT(EF_CUDA_SM90 << EF_CUDA_SM_OFFSET, "sm_90"),
1717+
ENUM_ENT(EF_CUDA_SM100 << EF_CUDA_SM_OFFSET, "sm_100"),
1718+
ENUM_ENT(EF_CUDA_SM101 << EF_CUDA_SM_OFFSET, "sm_101"),
1719+
ENUM_ENT(EF_CUDA_SM103 << EF_CUDA_SM_OFFSET, "sm_103"),
1720+
ENUM_ENT(EF_CUDA_SM110 << EF_CUDA_SM_OFFSET, "sm_110"),
1721+
ENUM_ENT(EF_CUDA_SM120 << EF_CUDA_SM_OFFSET, "sm_120"),
1722+
ENUM_ENT(EF_CUDA_SM121 << EF_CUDA_SM_OFFSET, "sm_121"),
16791723
};
16801724

16811725
const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
@@ -3650,10 +3694,16 @@ template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
36503694
else if (e.e_machine == EM_XTENSA)
36513695
ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderXtensaFlags),
36523696
unsigned(ELF::EF_XTENSA_MACH));
3653-
else if (e.e_machine == EM_CUDA)
3697+
else if (e.e_machine == EM_CUDA) {
36543698
ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderNVPTXFlags),
36553699
unsigned(ELF::EF_CUDA_SM));
3656-
else if (e.e_machine == EM_AMDGPU) {
3700+
if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1 &&
3701+
(e.e_flags & ELF::EF_CUDA_ACCELERATORS_V1))
3702+
ElfFlags += "a";
3703+
else if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V2 &&
3704+
(e.e_flags & ELF::EF_CUDA_ACCELERATORS))
3705+
ElfFlags += "a";
3706+
} else if (e.e_machine == EM_AMDGPU) {
36573707
switch (e.e_ident[ELF::EI_ABIVERSION]) {
36583708
default:
36593709
break;

offload/plugins-nextgen/common/src/Utils/ELF.cpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,23 +60,30 @@ static Expected<bool>
6060
checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
6161
const auto Header = ELFObj.getELFFile().getHeader();
6262
if (Header.e_type != ET_EXEC && Header.e_type != ET_DYN)
63-
return createError("Only executable ELF files are supported");
63+
return createError("only executable ELF files are supported");
6464

6565
if (Header.e_machine == EM_AMDGPU) {
6666
if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA)
67-
return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA");
67+
return createError("invalid AMD OS/ABI, must be AMDGPU_HSA");
6868
if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 &&
6969
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
70-
return createError("Invalid AMD ABI version, must be version 5 or above");
70+
return createError("invalid AMD ABI version, must be version 5 or above");
7171
if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
7272
(Header.e_flags & EF_AMDGPU_MACH) >
7373
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)
74-
return createError("Unsupported AMDGPU architecture");
74+
return createError("unsupported AMDGPU architecture");
7575
} else if (Header.e_machine == EM_CUDA) {
76-
if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
77-
return createError("Invalid CUDA addressing mode");
78-
if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35)
79-
return createError("Unsupported NVPTX architecture");
76+
if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V1) {
77+
if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
78+
return createError("invalid CUDA addressing mode");
79+
if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35)
80+
return createError("unsupported NVPTX architecture");
81+
} else if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V2) {
82+
if ((Header.e_flags & EF_CUDA_SM_MASK) < EF_CUDA_SM100)
83+
return createError("unsupported NVPTX architecture");
84+
} else {
85+
return createError("invalid CUDA ABI version");
86+
}
8087
}
8188

8289
return Header.e_machine == EMachine;

offload/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1442,7 +1442,11 @@ struct CUDAPluginTy final : public GenericPluginTy {
14421442
return ElfOrErr.takeError();
14431443

14441444
// Get the numeric value for the image's `sm_` value.
1445-
auto SM = ElfOrErr->getPlatformFlags() & ELF::EF_CUDA_SM;
1445+
const auto Header = ElfOrErr->getELFFile().getHeader();
1446+
unsigned SM =
1447+
Header.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1
1448+
? Header.e_flags & ELF::EF_CUDA_SM
1449+
: (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> ELF::EF_CUDA_SM_OFFSET;
14461450

14471451
CUdevice Device;
14481452
CUresult Res = cuDeviceGet(&Device, DeviceId);

0 commit comments

Comments
 (0)