llvm · walter-erquinigo · Sep 19, 2025 · Sep 16, 2025 · jhuber6 · Sep 19, 2025
@@ -236,6 +236,8 @@ class ArchSpec {
 
     eCore_wasm32,
 
+    eCore_nvsass,
+
     kNumCores,
 
     kCore_invalid,
@@ -282,8 +284,10 @@ class ArchSpec {
     kCore_mips64el_last = eCore_mips64r6el,
 
     kCore_mips_first = eCore_mips32,
-    kCore_mips_last = eCore_mips64r6el
+    kCore_mips_last = eCore_mips64r6el,
 
+    kCore_nvsass_first = eCore_nvsass,
+    kCore_nvsass_last = eCore_nvsass,
   };
 
   /// Default constructor.

@@ -248,6 +248,9 @@ static const CoreDefinition g_core_definitions[] = {
 
     {eByteOrderLittle, 4, 1, 4, llvm::Triple::wasm32, ArchSpec::eCore_wasm32,
      "wasm32"},
+
+    {eByteOrderLittle, 8, 4, 4, llvm::Triple::nvsass, ArchSpec::eCore_nvsass,
+     "nvsass"},
 };
 
 // Ensure that we have an entry in the g_core_definitions for each core. If you
@@ -403,6 +406,7 @@ static const ArchDefinitionEntry g_elf_arch_entries[] = {
     {ArchSpec::eCore_riscv64,         llvm::ELF::EM_RISCV,      ArchSpec::eRISCVSubType_riscv64}, // riscv64
     {ArchSpec::eCore_loongarch32,     llvm::ELF::EM_LOONGARCH,  ArchSpec::eLoongArchSubType_loongarch32}, // loongarch32
     {ArchSpec::eCore_loongarch64,     llvm::ELF::EM_LOONGARCH,  ArchSpec::eLoongArchSubType_loongarch64}, // loongarch64
+    {ArchSpec::eCore_nvsass,          llvm::ELF::EM_CUDA,       }, // nvsass
 };
 // clang-format on
 

diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -69,7 +69,7 @@ class LLVM_ABI ELFObjectFileBase : public ObjectFile {
   SubtargetFeatures getLoongArchFeatures() const;
 
   StringRef getAMDGPUCPUName() const;
-  StringRef getNVPTXCPUName() const;
+  StringRef getCUDACPUName() const;
 
 protected:
   ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source);
@@ -1431,9 +1431,7 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const {
   }
 
   case ELF::EM_CUDA: {
-    if (EF.getHeader().e_ident[ELF::EI_CLASS] == ELF::ELFCLASS32)
-      return Triple::nvptx;
-    return Triple::nvptx64;
+    return Triple::nvsass;
   }
 
   case ELF::EM_BPF:

diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
@@ -110,6 +110,7 @@ class Triple {
     renderscript32, // 32-bit RenderScript
     renderscript64, // 64-bit RenderScript
     ve,             // NEC SX-Aurora Vector Engine
+    nvsass,         // NVIDIA SASS
     LastArchType = ve
   };
   enum SubArchType {
@@ -905,6 +906,8 @@ class Triple {
 
   bool isAMDGPU() const { return getArch() == Triple::r600 || isAMDGCN(); }
 
+  bool isNVSASS() const { return getArch() == Triple::nvsass; }
+
   /// Tests whether the target is Thumb (little and big endian).
   bool isThumb() const {
     return getArch() == Triple::thumb || getArch() == Triple::thumbeb;
@@ -1267,7 +1270,9 @@ class Triple {
   LLVM_ABI bool isCompatibleWith(const Triple &Other) const;
 
   /// Test whether the target triple is for a GPU.
-  bool isGPU() const { return isSPIRV() || isNVPTX() || isAMDGPU(); }
+  bool isGPU() const {
+    return isSPIRV() || isNVPTX() || isAMDGPU() || isNVSASS();
+  }
 
   /// Merge target triples.
   LLVM_ABI std::string merge(const Triple &Other) const;

diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
@@ -438,7 +438,7 @@ std::optional<StringRef> ELFObjectFileBase::tryGetCPUName() const {
   case ELF::EM_AMDGPU:
     return getAMDGPUCPUName();
   case ELF::EM_CUDA:
-    return getNVPTXCPUName();
+    return getCUDACPUName();
   case ELF::EM_PPC:
   case ELF::EM_PPC64:
     return StringRef("future");
@@ -620,7 +620,7 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
   }
 }
 
-StringRef ELFObjectFileBase::getNVPTXCPUName() const {
+StringRef ELFObjectFileBase::getCUDACPUName() const {
   assert(getEMachine() == ELF::EM_CUDA);
   unsigned SM = getEIdentABIVersion() == ELF::ELFABIVERSION_CUDA_V1
                     ? getPlatformFlags() & ELF::EF_CUDA_SM

diff --git a/llvm/lib/Object/ObjectFile.cpp b/llvm/lib/Object/ObjectFile.cpp
@@ -139,7 +139,7 @@ Triple ObjectFile::makeTriple() const {
     TheTriple.setObjectFormat(Triple::GOFF);
   } else if (TheTriple.isAMDGPU()) {
     TheTriple.setVendor(Triple::AMD);
-  } else if (TheTriple.isNVPTX()) {
+  } else if (TheTriple.isNVPTX() || TheTriple.isNVSASS()) {
     TheTriple.setVendor(Triple::NVIDIA);
   }
 

diff --git a/llvm/lib/TargetParser/TargetDataLayout.cpp b/llvm/lib/TargetParser/TargetDataLayout.cpp
@@ -618,6 +618,7 @@ std::string Triple::computeDataLayout(StringRef ABIName) const {
   case Triple::shave:
   case Triple::renderscript32:
   case Triple::renderscript64:
+  case Triple::nvsass:
     // These are all virtual ISAs with no LLVM backend, and therefore no fixed
     // LLVM data layout.
     return "";

diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
@@ -54,6 +54,8 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
   case msp430:         return "msp430";
   case nvptx64:        return "nvptx64";
   case nvptx:          return "nvptx";
+  case nvsass:
+    return "nvsass";
   case ppc64:          return "powerpc64";
   case ppc64le:        return "powerpc64le";
   case ppc:            return "powerpc";
@@ -242,6 +244,9 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) {
   case wasm32:
   case wasm64:      return "wasm";
 
+  case nvsass:
+    return "nvsass";
+
   case riscv32:
   case riscv64:
   case riscv32be:
@@ -486,6 +491,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
       .Case("xcore", xcore)
       .Case("nvptx", nvptx)
       .Case("nvptx64", nvptx64)
+      .Case("nvsass", nvsass)
       .Case("amdil", amdil)
       .Case("amdil64", amdil64)
       .Case("hsail", hsail)
@@ -627,6 +633,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
                 .Case("xcore", Triple::xcore)
                 .Case("nvptx", Triple::nvptx)
                 .Case("nvptx64", Triple::nvptx64)
+                .Case("nvsass", Triple::nvsass)
                 .Case("amdil", Triple::amdil)
                 .Case("amdil64", Triple::amdil64)
                 .Case("hsail", Triple::hsail)
@@ -985,6 +992,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
   case Triple::msp430:
   case Triple::nvptx64:
   case Triple::nvptx:
+  case Triple::nvsass:
   case Triple::ppc64le:
   case Triple::ppcle:
   case Triple::r600:
@@ -1745,6 +1753,9 @@ unsigned Triple::getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el:
   case llvm::Triple::nvptx64:
+  // nvsass can represent both 32- and 64-bit pointers, but assume
+  // 64-bit for the triple
+  case llvm::Triple::nvsass:
   case llvm::Triple::ppc64:
   case llvm::Triple::ppc64le:
   case llvm::Triple::renderscript64:
@@ -1823,6 +1834,7 @@ Triple Triple::get32BitArchVariant() const {
   case Triple::mips:
   case Triple::mipsel:
   case Triple::nvptx:
+  case Triple::nvsass:
   case Triple::ppc:
   case Triple::ppcle:
   case Triple::r600:
@@ -1910,6 +1922,7 @@ Triple Triple::get64BitArchVariant() const {
   case Triple::mips64:
   case Triple::mips64el:
   case Triple::nvptx64:
+  case Triple::nvsass:
   case Triple::ppc64:
   case Triple::ppc64le:
   case Triple::renderscript64:
@@ -1980,6 +1993,7 @@ Triple Triple::getBigEndianArchVariant() const {
   case Triple::msp430:
   case Triple::nvptx64:
   case Triple::nvptx:
+  case Triple::nvsass:
   case Triple::r600:
   case Triple::renderscript32:
   case Triple::renderscript64:
@@ -2095,6 +2109,7 @@ bool Triple::isLittleEndian() const {
   case Triple::msp430:
   case Triple::nvptx64:
   case Triple::nvptx:
+  case Triple::nvsass:
   case Triple::ppcle:
   case Triple::ppc64le:
   case Triple::r600:

diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp
@@ -295,7 +295,7 @@ TEST(ELFObjectFileTest, CheckOSAndTriple) {
       {ELF::EM_X86_64, ELF::ELFOSABI_AIX, "x86_64--aix"},
       {ELF::EM_X86_64, ELF::ELFOSABI_FREEBSD, "x86_64--freebsd"},
       {ELF::EM_X86_64, ELF::ELFOSABI_OPENBSD, "x86_64--openbsd"},
-      {ELF::EM_CUDA, ELF::ELFOSABI_CUDA, "nvptx64-nvidia-cuda"}};
+      {ELF::EM_CUDA, ELF::ELFOSABI_CUDA, "nvsass-nvidia-cuda"}};
   for (auto [Machine, OS, Triple] : Formats) {
     const DataForTest D(ELF::ELFCLASS64, ELF::ELFDATA2LSB, Machine, OS,
                         ELF::EF_AMDGPU_MACH_AMDGCN_LAST);