Skip to content

Commit dffd7f3

Browse files
authored
[LLVM] Fix offload and update CUDA ABI for all SM values (#159354)
Summary: Turns out the new CUDA ABI now applies retroactively to all the other SMs if you upgrade to CUDA 13.0. This patch changes the scheme, keeping all the SM flags consistent but using an offset. Fixes: #159088
1 parent 5cb7bf6 commit dffd7f3

File tree

4 files changed

+76
-26
lines changed

4 files changed

+76
-26
lines changed

llvm/include/llvm/BinaryFormat/ELF.h

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -931,6 +931,12 @@ enum : unsigned {
931931
// Processor selection mask for EF_CUDA_SM* values prior to blackwell.
932932
EF_CUDA_SM = 0xff,
933933

934+
// Processor selection mask for EF_CUDA_SM* values following blackwell.
935+
EF_CUDA_SM_MASK = 0xff00,
936+
937+
// Processor selection mask for EF_CUDA_SM* values following blackwell.
938+
EF_CUDA_SM_OFFSET = 8,
939+
934940
// SM based processor values.
935941
EF_CUDA_SM20 = 0x14,
936942
EF_CUDA_SM21 = 0x15,
@@ -950,9 +956,15 @@ enum : unsigned {
950956
EF_CUDA_SM80 = 0x50,
951957
EF_CUDA_SM86 = 0x56,
952958
EF_CUDA_SM87 = 0x57,
959+
EF_CUDA_SM88 = 0x58,
953960
EF_CUDA_SM89 = 0x59,
954-
// The sm_90a variant uses the same machine flag.
955961
EF_CUDA_SM90 = 0x5a,
962+
EF_CUDA_SM100 = 0x64,
963+
EF_CUDA_SM101 = 0x65,
964+
EF_CUDA_SM103 = 0x67,
965+
EF_CUDA_SM110 = 0x6e,
966+
EF_CUDA_SM120 = 0x78,
967+
EF_CUDA_SM121 = 0x79,
956968

957969
// Unified texture binding is enabled.
958970
EF_CUDA_TEXMODE_UNIFIED = 0x100,
@@ -968,17 +980,7 @@ enum : unsigned {
968980
// Virtual processor selection mask for EF_CUDA_VIRTUAL_SM* values.
969981
EF_CUDA_VIRTUAL_SM = 0xff0000,
970982

971-
// Processor selection mask for EF_CUDA_SM* values following blackwell.
972-
EF_CUDA_SM_MASK = 0xff00,
973-
974-
// SM based processor values.
975-
EF_CUDA_SM100 = 0x6400,
976-
EF_CUDA_SM101 = 0x6500,
977-
EF_CUDA_SM103 = 0x6700,
978-
EF_CUDA_SM120 = 0x7800,
979-
EF_CUDA_SM121 = 0x7900,
980-
981-
// Set when using an accelerator variant like sm_100a.
983+
// Set when using an accelerator variant like sm_100a in the new ABI.
982984
EF_CUDA_ACCELERATORS = 0x8,
983985
};
984986

llvm/lib/Object/ELFObjectFile.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -622,7 +622,8 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const {
622622
assert(getEMachine() == ELF::EM_CUDA);
623623
unsigned SM = getEIdentABIVersion() == ELF::ELFABIVERSION_CUDA_V1
624624
? getPlatformFlags() & ELF::EF_CUDA_SM
625-
: getPlatformFlags() & ELF::EF_CUDA_SM_MASK;
625+
: (getPlatformFlags() & ELF::EF_CUDA_SM_MASK) >>
626+
ELF::EF_CUDA_SM_OFFSET;
626627

627628
switch (SM) {
628629
// Fermi architecture.
@@ -674,6 +675,8 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const {
674675
return "sm_86";
675676
case ELF::EF_CUDA_SM87:
676677
return "sm_87";
678+
case ELF::EF_CUDA_SM88:
679+
return "sm_88";
677680

678681
// Ada architecture.
679682
case ELF::EF_CUDA_SM89:
@@ -694,6 +697,9 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const {
694697
case ELF::EF_CUDA_SM103:
695698
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_103a"
696699
: "sm_103";
700+
case ELF::EF_CUDA_SM110:
701+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_110a"
702+
: "sm_110";
697703

698704
// Rubin architecture.
699705
case ELF::EF_CUDA_SM120:

llvm/tools/llvm-readobj/ELFDumper.cpp

Lines changed: 54 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,6 +1114,7 @@ const EnumEntry<unsigned> ElfOSABI[] = {
11141114
{"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
11151115
{"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
11161116
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
1117+
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA_V2},
11171118
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}};
11181119

11191120
const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
@@ -1679,19 +1680,60 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
16791680
};
16801681

16811682
const EnumEntry<unsigned> ElfHeaderNVPTXFlags[] = {
1682-
ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"),
1683-
ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"),
1684-
ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"),
1685-
ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"),
1686-
ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"),
1687-
ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"),
1688-
ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"),
1689-
ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"),
1690-
ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"),
1691-
ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"),
1692-
ENUM_ENT(EF_CUDA_SM100, "sm_100"), ENUM_ENT(EF_CUDA_SM101, "sm_101"),
1693-
ENUM_ENT(EF_CUDA_SM103, "sm_103"), ENUM_ENT(EF_CUDA_SM120, "sm_120"),
1683+
ENUM_ENT(EF_CUDA_SM20, "sm_20"),
1684+
ENUM_ENT(EF_CUDA_SM21, "sm_21"),
1685+
ENUM_ENT(EF_CUDA_SM30, "sm_30"),
1686+
ENUM_ENT(EF_CUDA_SM32, "sm_32"),
1687+
ENUM_ENT(EF_CUDA_SM35, "sm_35"),
1688+
ENUM_ENT(EF_CUDA_SM37, "sm_37"),
1689+
ENUM_ENT(EF_CUDA_SM50, "sm_50"),
1690+
ENUM_ENT(EF_CUDA_SM52, "sm_52"),
1691+
ENUM_ENT(EF_CUDA_SM53, "sm_53"),
1692+
ENUM_ENT(EF_CUDA_SM60, "sm_60"),
1693+
ENUM_ENT(EF_CUDA_SM61, "sm_61"),
1694+
ENUM_ENT(EF_CUDA_SM62, "sm_62"),
1695+
ENUM_ENT(EF_CUDA_SM70, "sm_70"),
1696+
ENUM_ENT(EF_CUDA_SM72, "sm_72"),
1697+
ENUM_ENT(EF_CUDA_SM75, "sm_75"),
1698+
ENUM_ENT(EF_CUDA_SM80, "sm_80"),
1699+
ENUM_ENT(EF_CUDA_SM86, "sm_86"),
1700+
ENUM_ENT(EF_CUDA_SM87, "sm_87"),
1701+
ENUM_ENT(EF_CUDA_SM88, "sm_88"),
1702+
ENUM_ENT(EF_CUDA_SM89, "sm_89"),
1703+
ENUM_ENT(EF_CUDA_SM90, "sm_90"),
1704+
ENUM_ENT(EF_CUDA_SM100, "sm_100"),
1705+
ENUM_ENT(EF_CUDA_SM101, "sm_101"),
1706+
ENUM_ENT(EF_CUDA_SM103, "sm_103"),
1707+
ENUM_ENT(EF_CUDA_SM110, "sm_110"),
1708+
ENUM_ENT(EF_CUDA_SM120, "sm_120"),
16941709
ENUM_ENT(EF_CUDA_SM121, "sm_121"),
1710+
ENUM_ENT(EF_CUDA_SM20 << EF_CUDA_SM_OFFSET, "sm_20"),
1711+
ENUM_ENT(EF_CUDA_SM21 << EF_CUDA_SM_OFFSET, "sm_21"),
1712+
ENUM_ENT(EF_CUDA_SM30 << EF_CUDA_SM_OFFSET, "sm_30"),
1713+
ENUM_ENT(EF_CUDA_SM32 << EF_CUDA_SM_OFFSET, "sm_32"),
1714+
ENUM_ENT(EF_CUDA_SM35 << EF_CUDA_SM_OFFSET, "sm_35"),
1715+
ENUM_ENT(EF_CUDA_SM37 << EF_CUDA_SM_OFFSET, "sm_37"),
1716+
ENUM_ENT(EF_CUDA_SM50 << EF_CUDA_SM_OFFSET, "sm_50"),
1717+
ENUM_ENT(EF_CUDA_SM52 << EF_CUDA_SM_OFFSET, "sm_52"),
1718+
ENUM_ENT(EF_CUDA_SM53 << EF_CUDA_SM_OFFSET, "sm_53"),
1719+
ENUM_ENT(EF_CUDA_SM60 << EF_CUDA_SM_OFFSET, "sm_60"),
1720+
ENUM_ENT(EF_CUDA_SM61 << EF_CUDA_SM_OFFSET, "sm_61"),
1721+
ENUM_ENT(EF_CUDA_SM62 << EF_CUDA_SM_OFFSET, "sm_62"),
1722+
ENUM_ENT(EF_CUDA_SM70 << EF_CUDA_SM_OFFSET, "sm_70"),
1723+
ENUM_ENT(EF_CUDA_SM72 << EF_CUDA_SM_OFFSET, "sm_72"),
1724+
ENUM_ENT(EF_CUDA_SM75 << EF_CUDA_SM_OFFSET, "sm_75"),
1725+
ENUM_ENT(EF_CUDA_SM80 << EF_CUDA_SM_OFFSET, "sm_80"),
1726+
ENUM_ENT(EF_CUDA_SM86 << EF_CUDA_SM_OFFSET, "sm_86"),
1727+
ENUM_ENT(EF_CUDA_SM87 << EF_CUDA_SM_OFFSET, "sm_87"),
1728+
ENUM_ENT(EF_CUDA_SM88 << EF_CUDA_SM_OFFSET, "sm_88"),
1729+
ENUM_ENT(EF_CUDA_SM89 << EF_CUDA_SM_OFFSET, "sm_89"),
1730+
ENUM_ENT(EF_CUDA_SM90 << EF_CUDA_SM_OFFSET, "sm_90"),
1731+
ENUM_ENT(EF_CUDA_SM100 << EF_CUDA_SM_OFFSET, "sm_100"),
1732+
ENUM_ENT(EF_CUDA_SM101 << EF_CUDA_SM_OFFSET, "sm_101"),
1733+
ENUM_ENT(EF_CUDA_SM103 << EF_CUDA_SM_OFFSET, "sm_103"),
1734+
ENUM_ENT(EF_CUDA_SM110 << EF_CUDA_SM_OFFSET, "sm_110"),
1735+
ENUM_ENT(EF_CUDA_SM120 << EF_CUDA_SM_OFFSET, "sm_120"),
1736+
ENUM_ENT(EF_CUDA_SM121 << EF_CUDA_SM_OFFSET, "sm_121"),
16951737
};
16961738

16971739
const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {

offload/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1581,7 +1581,7 @@ struct CUDAPluginTy final : public GenericPluginTy {
15811581
unsigned SM =
15821582
Header.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1
15831583
? Header.e_flags & ELF::EF_CUDA_SM
1584-
: (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> 8;
1584+
: (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> ELF::EF_CUDA_SM_OFFSET;
15851585

15861586
CUdevice Device;
15871587
CUresult Res = cuDeviceGet(&Device, DeviceId);

0 commit comments

Comments
 (0)