[ELF][LLDB] Add an nvsass triple #159459

walter-erquinigo · 2025-09-17T21:26:13Z

When handling CUDA ELF files via objdump or LLDB, the ELF parser in LLVM needs to distinguish if an ELF file is sass or not, which requires a triple for sass to exist in llvm. This patch includes all the necessary changes for LLDB and objdump to correctly identify these files with the correct triple.

llvmbot · 2025-09-17T21:26:44Z

@llvm/pr-subscribers-llvm-binary-utilities

@llvm/pr-subscribers-lldb

Author: Walter Erquinigo (walter-erquinigo)

Changes

When handling CUDA ELF files via objdump or LLDB, the ELF parser in LLVM needs to distinguish if an ELF file is sass or not, which requires a triple for sass to exist in llvm. This patch includes all the necessary changes for LLDB and objdump to correctly identify these files with the correct triple.

Full diff: https://github.com/llvm/llvm-project/pull/159459.diff

7 Files Affected:

(modified) lldb/include/lldb/Utility/ArchSpec.h (+5-1)
(modified) lldb/source/Utility/ArchSpec.cpp (+3)
(modified) llvm/include/llvm/Object/ELFObjectFile.h (+2-4)
(modified) llvm/include/llvm/TargetParser/Triple.h (+4-1)
(modified) llvm/lib/Object/ELFObjectFile.cpp (+2-2)
(modified) llvm/lib/TargetParser/TargetDataLayout.cpp (+1)
(modified) llvm/lib/TargetParser/Triple.cpp (+13)

diff --git a/lldb/include/lldb/Utility/ArchSpec.h b/lldb/include/lldb/Utility/ArchSpec.h
index 361108fd8f0e7..8415eca96ea69 100644
--- a/lldb/include/lldb/Utility/ArchSpec.h
+++ b/lldb/include/lldb/Utility/ArchSpec.h
@@ -236,6 +236,8 @@ class ArchSpec {
 
     eCore_wasm32,
 
+    eCore_nvsass,
+
     kNumCores,
 
     kCore_invalid,
@@ -282,8 +284,10 @@ class ArchSpec {
     kCore_mips64el_last = eCore_mips64r6el,
 
     kCore_mips_first = eCore_mips32,
-    kCore_mips_last = eCore_mips64r6el
+    kCore_mips_last = eCore_mips64r6el,
 
+    kCore_nvsass_first = eCore_nvsass,
+    kCore_nvsass_last = eCore_nvsass,
   };
 
   /// Default constructor.
diff --git a/lldb/source/Utility/ArchSpec.cpp b/lldb/source/Utility/ArchSpec.cpp
index 2a87cc6bf7de9..5ce66a4135d78 100644
--- a/lldb/source/Utility/ArchSpec.cpp
+++ b/lldb/source/Utility/ArchSpec.cpp
@@ -248,6 +248,8 @@ static const CoreDefinition g_core_definitions[] = {
 
     {eByteOrderLittle, 4, 1, 4, llvm::Triple::wasm32, ArchSpec::eCore_wasm32,
      "wasm32"},
+
+    {eByteOrderLittle, 8, 4, 4, llvm::Triple::nvsass, ArchSpec::eCore_nvsass, "nvsass"},
 };
 
 // Ensure that we have an entry in the g_core_definitions for each core. If you
@@ -403,6 +405,7 @@ static const ArchDefinitionEntry g_elf_arch_entries[] = {
     {ArchSpec::eCore_riscv64,         llvm::ELF::EM_RISCV,      ArchSpec::eRISCVSubType_riscv64}, // riscv64
     {ArchSpec::eCore_loongarch32,     llvm::ELF::EM_LOONGARCH,  ArchSpec::eLoongArchSubType_loongarch32}, // loongarch32
     {ArchSpec::eCore_loongarch64,     llvm::ELF::EM_LOONGARCH,  ArchSpec::eLoongArchSubType_loongarch64}, // loongarch64
+    {ArchSpec::eCore_nvsass,   llvm::ELF::EM_CUDA,       }, // nvsass
 };
 // clang-format on
 
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index ced1afdd4cc6a..7f8f4a2a01fe4 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -69,7 +69,7 @@ class LLVM_ABI ELFObjectFileBase : public ObjectFile {
   SubtargetFeatures getLoongArchFeatures() const;
 
   StringRef getAMDGPUCPUName() const;
-  StringRef getNVPTXCPUName() const;
+  StringRef getCUDACPUName() const;
 
 protected:
   ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source);
@@ -1431,9 +1431,7 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const {
   }
 
   case ELF::EM_CUDA: {
-    if (EF.getHeader().e_ident[ELF::EI_CLASS] == ELF::ELFCLASS32)
-      return Triple::nvptx;
-    return Triple::nvptx64;
+    return Triple::nvsass;
   }
 
   case ELF::EM_BPF:
diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
index f9b4fc3aa2010..f5043baa68f27 100644
--- a/llvm/include/llvm/TargetParser/Triple.h
+++ b/llvm/include/llvm/TargetParser/Triple.h
@@ -110,6 +110,7 @@ class Triple {
     renderscript32, // 32-bit RenderScript
     renderscript64, // 64-bit RenderScript
     ve,             // NEC SX-Aurora Vector Engine
+    nvsass,         // NVIDIA SASS
     LastArchType = ve
   };
   enum SubArchType {
@@ -905,6 +906,8 @@ class Triple {
 
   bool isAMDGPU() const { return getArch() == Triple::r600 || isAMDGCN(); }
 
+  bool isNVSASS() const { return getArch() == Triple::nvsass; }
+
   /// Tests whether the target is Thumb (little and big endian).
   bool isThumb() const {
     return getArch() == Triple::thumb || getArch() == Triple::thumbeb;
@@ -1267,7 +1270,7 @@ class Triple {
   LLVM_ABI bool isCompatibleWith(const Triple &Other) const;
 
   /// Test whether the target triple is for a GPU.
-  bool isGPU() const { return isSPIRV() || isNVPTX() || isAMDGPU(); }
+  bool isGPU() const { return isSPIRV() || isNVPTX() || isAMDGPU() || isNVSASS(); }
 
   /// Merge target triples.
   LLVM_ABI std::string merge(const Triple &Other) const;
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index f9fda23469ee5..f083efe1a25b1 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -438,7 +438,7 @@ std::optional<StringRef> ELFObjectFileBase::tryGetCPUName() const {
   case ELF::EM_AMDGPU:
     return getAMDGPUCPUName();
   case ELF::EM_CUDA:
-    return getNVPTXCPUName();
+    return getCUDACPUName();
   case ELF::EM_PPC:
   case ELF::EM_PPC64:
     return StringRef("future");
@@ -620,7 +620,7 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
   }
 }
 
-StringRef ELFObjectFileBase::getNVPTXCPUName() const {
+StringRef ELFObjectFileBase::getCUDACPUName() const {
   assert(getEMachine() == ELF::EM_CUDA);
   unsigned SM = getEIdentABIVersion() == ELF::ELFABIVERSION_CUDA_V1
                     ? getPlatformFlags() & ELF::EF_CUDA_SM
diff --git a/llvm/lib/TargetParser/TargetDataLayout.cpp b/llvm/lib/TargetParser/TargetDataLayout.cpp
index e222588ea389b..40a8a057ef18d 100644
--- a/llvm/lib/TargetParser/TargetDataLayout.cpp
+++ b/llvm/lib/TargetParser/TargetDataLayout.cpp
@@ -618,6 +618,7 @@ std::string Triple::computeDataLayout(StringRef ABIName) const {
   case Triple::shave:
   case Triple::renderscript32:
   case Triple::renderscript64:
+  case Triple::nvsass:
     // These are all virtual ISAs with no LLVM backend, and therefore no fixed
     // LLVM data layout.
     return "";
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index ac3626db46ea9..8fb9ba76d2474 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -54,6 +54,7 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
   case msp430:         return "msp430";
   case nvptx64:        return "nvptx64";
   case nvptx:          return "nvptx";
+  case nvsass:         return "nvsass";
   case ppc64:          return "powerpc64";
   case ppc64le:        return "powerpc64le";
   case ppc:            return "powerpc";
@@ -242,6 +243,8 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) {
   case wasm32:
   case wasm64:      return "wasm";
 
+  case nvsass:      return "nvsass";
+
   case riscv32:
   case riscv64:
   case riscv32be:
@@ -486,6 +489,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
       .Case("xcore", xcore)
       .Case("nvptx", nvptx)
       .Case("nvptx64", nvptx64)
+      .Case("nvsass", nvsass)
       .Case("amdil", amdil)
       .Case("amdil64", amdil64)
       .Case("hsail", hsail)
@@ -627,6 +631,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
                 .Case("xcore", Triple::xcore)
                 .Case("nvptx", Triple::nvptx)
                 .Case("nvptx64", Triple::nvptx64)
+                .Case("nvsass", Triple::nvsass)
                 .Case("amdil", Triple::amdil)
                 .Case("amdil64", Triple::amdil64)
                 .Case("hsail", Triple::hsail)
@@ -985,6 +990,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
   case Triple::msp430:
   case Triple::nvptx64:
   case Triple::nvptx:
+  case Triple::nvsass:
   case Triple::ppc64le:
   case Triple::ppcle:
   case Triple::r600:
@@ -1745,6 +1751,9 @@ unsigned Triple::getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el:
   case llvm::Triple::nvptx64:
+  // nvsass can represent both 32- and 64-bit pointers, but assume
+  // 64-bit for the triple
+  case llvm::Triple::nvsass:
   case llvm::Triple::ppc64:
   case llvm::Triple::ppc64le:
   case llvm::Triple::renderscript64:
@@ -1823,6 +1832,7 @@ Triple Triple::get32BitArchVariant() const {
   case Triple::mips:
   case Triple::mipsel:
   case Triple::nvptx:
+  case Triple::nvsass:
   case Triple::ppc:
   case Triple::ppcle:
   case Triple::r600:
@@ -1910,6 +1920,7 @@ Triple Triple::get64BitArchVariant() const {
   case Triple::mips64:
   case Triple::mips64el:
   case Triple::nvptx64:
+  case Triple::nvsass:
   case Triple::ppc64:
   case Triple::ppc64le:
   case Triple::renderscript64:
@@ -1980,6 +1991,7 @@ Triple Triple::getBigEndianArchVariant() const {
   case Triple::msp430:
   case Triple::nvptx64:
   case Triple::nvptx:
+  case Triple::nvsass:
   case Triple::r600:
   case Triple::renderscript32:
   case Triple::renderscript64:
@@ -2095,6 +2107,7 @@ bool Triple::isLittleEndian() const {
   case Triple::msp430:
   case Triple::nvptx64:
   case Triple::nvptx:
+  case Triple::nvsass:
   case Triple::ppcle:
   case Triple::ppc64le:
   case Triple::r600:

github-actions · 2025-09-17T21:31:17Z

✅ With the latest revision this PR passed the C/C++ code formatter.

When handling CUDA ELF files via objdump or LLDB, the ELF parser in LLVM needs to distinguish if an ELF file is sass or not, which requires a triple for sass to exist in llvm. This patch includes all the necessary changes for LLDB and objdump to correctly identify these files with the correct triple.

jholewinski-nv · 2025-09-19T16:05:09Z

llvm/unittests/Object/ELFObjectFileTest.cpp

      {ELF::EM_X86_64, ELF::ELFOSABI_FREEBSD, "x86_64--freebsd"},
      {ELF::EM_X86_64, ELF::ELFOSABI_OPENBSD, "x86_64--openbsd"},
-      {ELF::EM_CUDA, ELF::ELFOSABI_CUDA, "nvptx64-nvidia-cuda"}};
+      {ELF::EM_CUDA, ELF::ELFOSABI_CUDA, "nvsass-nvidia-cuda"}};


Can we keep both targets tested here?

After inspecting that code, I realized that this can only work with nvsass. I actually got the impression that folks used nvptx here because of the lack of a proper sass triple.
If there are indeed object files with ptx and no sass, a flag would be needed somewhere in the ELF file for the parser to distinguish the actual triple. And this seems not to be the case.

Ok, thanks!

What is this nvsass target supposed to do exactly? It's just nvptx64 but duplicating the name. The reason you can't test both is because your change replaces the old handling with a new name.

jhuber6 · 2025-09-19T23:16:12Z

llvm/include/llvm/Object/ELFObjectFile.h

  SubtargetFeatures getLoongArchFeatures() const;

  StringRef getAMDGPUCPUName() const;
-  StringRef getNVPTXCPUName() const;


Why was this changed?

jhuber6 · 2025-09-19T23:17:43Z

This caused all of the libc tests to fail because the triple was changed. It's a relatively simple change to check against nvsass instead of nvptx64. But I have to wonder why we made this change in the first place? nvptx64 was already used for the SASS target triple.

jhuber6 · 2025-09-19T23:24:06Z

ELF parser in LLVM needs to distinguish if an ELF file is sass or not, which requires a triple for sass to exist in llvm.

Maybe I'm not familiar with CUDA, but there are ELF targets that aren't SASS? This corresponds to the triple nvptx64-nvidia-cuda which is what all GPU code uses. If you want to make this change then you should add nvsass-nvidia-cuda and have it be an entirely separate toolchain I'd wager.

Artem-B · 2025-09-19T23:42:54Z

@walter-erquinigo I agree with @jhuber6. Addition of a new triple looks unnecessary for the stated purpose.

Summary: This patch has broken the `libc` build bot. I could work around that but the changes seem unnecessary. This reverts commit 9ba844e.

jhuber6 · 2025-09-20T14:11:12Z

I'm assuming the lldb changes are fine but should probably just use nvptx64. We could probably entirely remove nvptx32 since NVIDIA's removed it AFAIK.

walter-erquinigo · 2025-09-21T01:21:47Z

Interesting. Thanks for the feedback and thanks for reverting this change.

jhuber6 · 2025-09-21T01:23:59Z

Interesting. Thanks for the feedback and thanks for reverting this change.

If you can make a PR for just the lldb stuff I can review it. I appreciate NVIDIA trying to upstream more support, so I'm not trying to dissuade you.

walter-erquinigo · 2025-09-21T01:34:15Z

@jhuber6 , something I care about for LLDB is being able to use different disassemblers for ptx and sass. The unique situation here is that sass is not part of LLVM code generation, and it seems that's why LLVM is unaware of sass. LLVM can only generate ptx and folks rely on an nvidia proprietary compiler to go from ptx to sass. But at runtime, LLDB sees both sass and nvptx.

I guess that a simpler patch that would exist only within LLDB is to add two flavors to the nvptx architecture, one for nvptx and for sass in the ArchSpec class in LLDB. I for sure can do that without doing major architectural changes like this.

jhuber6 · 2025-09-21T01:38:16Z

@jhuber6 , something I care about for LLDB is being able to use different disassemblers for ptx and sass. The unique situation here is that sass is not part of LLVM code generation, and it seems that's why LLVM is unaware of sass. LLVM can only generate ptx and folks rely on an nvidia proprietary compiler to go from ptx to sass. But at runtime, LLDB sees both sass and nvptx.

I guess that a simpler patch that would exist only within LLDB is to add two flavors to the nvptx architecture, one for nvptx and for sass in the ArchSpec class in LLDB. I for sure can do that without doing major architectural changes like this.

LLVM's backend can only emit PTX, but as a toolchain we simply export that handling to ptxas as our assembler. The toolchain handles the binaries as the nvptx64 triple because they correspond to ELF files created with --target=nvptx64-nvidia-cuda. You can generate SASS yourself with something similar to https://godbolt.org/z/ofWE6baPT. This is as far as I understand it at least, because as far as I'm aware, if a file has ELF magic and the EM_CUDA machine, it is SASS.

walter-erquinigo · 2025-09-21T02:12:08Z

I see. Thanks! I'll think more about this next week

walter-erquinigo requested review from clayborg, dmpots and jholewinski September 17, 2025 21:26

walter-erquinigo requested a review from JDevlieghere as a code owner September 17, 2025 21:26

llvmbot added lldb llvm:binary-utilities labels Sep 17, 2025

walter-erquinigo force-pushed the llvm-fork/werquinigo/nvsass branch from 93ae87d to d5f269b Compare September 17, 2025 21:27

walter-erquinigo force-pushed the llvm-fork/werquinigo/nvsass branch from d5f269b to a14b6f1 Compare September 17, 2025 23:57

alexrp mentioned this pull request Sep 14, 2025

Tracking issue for the LLVM 22 upgrade ziglang/zig#24542

Open

21 tasks

jholewinski-nv reviewed Sep 19, 2025

View reviewed changes

jholewinski approved these changes Sep 19, 2025

View reviewed changes

walter-erquinigo merged commit 9ba844e into llvm:main Sep 19, 2025
9 checks passed

jhuber6 reviewed Sep 19, 2025

View reviewed changes

jhuber6 added a commit that referenced this pull request Sep 20, 2025

Revert "[ELF][LLDB] Add an nvsass triple (#159459)" (#159879)

0fa3061

Summary: This patch has broken the `libc` build bot. I could work around that but the changes seem unnecessary. This reverts commit 9ba844e.

[ELF][LLDB] Add an nvsass triple #159459

[ELF][LLDB] Add an nvsass triple #159459

Uh oh!

Conversation

walter-erquinigo commented Sep 17, 2025

Uh oh!

llvmbot commented Sep 17, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Sep 17, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

jholewinski-nv Sep 19, 2025

Choose a reason for hiding this comment

Uh oh!

walter-erquinigo Sep 19, 2025

Choose a reason for hiding this comment

Uh oh!

walter-erquinigo Sep 19, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

jholewinski-nv Sep 19, 2025

Choose a reason for hiding this comment

Uh oh!

jhuber6 Sep 19, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

jhuber6 Sep 19, 2025

Choose a reason for hiding this comment

Uh oh!

jhuber6 commented Sep 19, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

jhuber6 commented Sep 19, 2025

Uh oh!

Artem-B commented Sep 19, 2025

Uh oh!

jhuber6 commented Sep 20, 2025

Uh oh!

walter-erquinigo commented Sep 21, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

jhuber6 commented Sep 21, 2025

Uh oh!

walter-erquinigo commented Sep 21, 2025

Uh oh!

jhuber6 commented Sep 21, 2025

Uh oh!

walter-erquinigo commented Sep 21, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

6 participants

llvmbot commented Sep 17, 2025 •

edited

Loading

github-actions bot commented Sep 17, 2025 •

edited

Loading

walter-erquinigo Sep 19, 2025 •

edited

Loading

jhuber6 Sep 19, 2025 •

edited

Loading

jhuber6 commented Sep 19, 2025 •

edited

Loading

walter-erquinigo commented Sep 21, 2025 •

edited

Loading