-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[ELF][LLDB] Add an nvsass triple #159459
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ELF][LLDB] Add an nvsass triple #159459
Conversation
@llvm/pr-subscribers-llvm-binary-utilities @llvm/pr-subscribers-lldb Author: Walter Erquinigo (walter-erquinigo) ChangesWhen handling CUDA ELF files via objdump or LLDB, the ELF parser in LLVM needs to distinguish if an ELF file is sass or not, which requires a triple for sass to exist in llvm. This patch includes all the necessary changes for LLDB and objdump to correctly identify these files with the correct triple. Full diff: https://github.com/llvm/llvm-project/pull/159459.diff 7 Files Affected:
diff --git a/lldb/include/lldb/Utility/ArchSpec.h b/lldb/include/lldb/Utility/ArchSpec.h
index 361108fd8f0e7..8415eca96ea69 100644
--- a/lldb/include/lldb/Utility/ArchSpec.h
+++ b/lldb/include/lldb/Utility/ArchSpec.h
@@ -236,6 +236,8 @@ class ArchSpec {
eCore_wasm32,
+ eCore_nvsass,
+
kNumCores,
kCore_invalid,
@@ -282,8 +284,10 @@ class ArchSpec {
kCore_mips64el_last = eCore_mips64r6el,
kCore_mips_first = eCore_mips32,
- kCore_mips_last = eCore_mips64r6el
+ kCore_mips_last = eCore_mips64r6el,
+ kCore_nvsass_first = eCore_nvsass,
+ kCore_nvsass_last = eCore_nvsass,
};
/// Default constructor.
diff --git a/lldb/source/Utility/ArchSpec.cpp b/lldb/source/Utility/ArchSpec.cpp
index 2a87cc6bf7de9..5ce66a4135d78 100644
--- a/lldb/source/Utility/ArchSpec.cpp
+++ b/lldb/source/Utility/ArchSpec.cpp
@@ -248,6 +248,8 @@ static const CoreDefinition g_core_definitions[] = {
{eByteOrderLittle, 4, 1, 4, llvm::Triple::wasm32, ArchSpec::eCore_wasm32,
"wasm32"},
+
+ {eByteOrderLittle, 8, 4, 4, llvm::Triple::nvsass, ArchSpec::eCore_nvsass, "nvsass"},
};
// Ensure that we have an entry in the g_core_definitions for each core. If you
@@ -403,6 +405,7 @@ static const ArchDefinitionEntry g_elf_arch_entries[] = {
{ArchSpec::eCore_riscv64, llvm::ELF::EM_RISCV, ArchSpec::eRISCVSubType_riscv64}, // riscv64
{ArchSpec::eCore_loongarch32, llvm::ELF::EM_LOONGARCH, ArchSpec::eLoongArchSubType_loongarch32}, // loongarch32
{ArchSpec::eCore_loongarch64, llvm::ELF::EM_LOONGARCH, ArchSpec::eLoongArchSubType_loongarch64}, // loongarch64
+ {ArchSpec::eCore_nvsass, llvm::ELF::EM_CUDA, }, // nvsass
};
// clang-format on
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index ced1afdd4cc6a..7f8f4a2a01fe4 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -69,7 +69,7 @@ class LLVM_ABI ELFObjectFileBase : public ObjectFile {
SubtargetFeatures getLoongArchFeatures() const;
StringRef getAMDGPUCPUName() const;
- StringRef getNVPTXCPUName() const;
+ StringRef getCUDACPUName() const;
protected:
ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source);
@@ -1431,9 +1431,7 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const {
}
case ELF::EM_CUDA: {
- if (EF.getHeader().e_ident[ELF::EI_CLASS] == ELF::ELFCLASS32)
- return Triple::nvptx;
- return Triple::nvptx64;
+ return Triple::nvsass;
}
case ELF::EM_BPF:
diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
index f9b4fc3aa2010..f5043baa68f27 100644
--- a/llvm/include/llvm/TargetParser/Triple.h
+++ b/llvm/include/llvm/TargetParser/Triple.h
@@ -110,6 +110,7 @@ class Triple {
renderscript32, // 32-bit RenderScript
renderscript64, // 64-bit RenderScript
ve, // NEC SX-Aurora Vector Engine
+ nvsass, // NVIDIA SASS
LastArchType = ve
};
enum SubArchType {
@@ -905,6 +906,8 @@ class Triple {
bool isAMDGPU() const { return getArch() == Triple::r600 || isAMDGCN(); }
+ bool isNVSASS() const { return getArch() == Triple::nvsass; }
+
/// Tests whether the target is Thumb (little and big endian).
bool isThumb() const {
return getArch() == Triple::thumb || getArch() == Triple::thumbeb;
@@ -1267,7 +1270,7 @@ class Triple {
LLVM_ABI bool isCompatibleWith(const Triple &Other) const;
/// Test whether the target triple is for a GPU.
- bool isGPU() const { return isSPIRV() || isNVPTX() || isAMDGPU(); }
+ bool isGPU() const { return isSPIRV() || isNVPTX() || isAMDGPU() || isNVSASS(); }
/// Merge target triples.
LLVM_ABI std::string merge(const Triple &Other) const;
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index f9fda23469ee5..f083efe1a25b1 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -438,7 +438,7 @@ std::optional<StringRef> ELFObjectFileBase::tryGetCPUName() const {
case ELF::EM_AMDGPU:
return getAMDGPUCPUName();
case ELF::EM_CUDA:
- return getNVPTXCPUName();
+ return getCUDACPUName();
case ELF::EM_PPC:
case ELF::EM_PPC64:
return StringRef("future");
@@ -620,7 +620,7 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
}
}
-StringRef ELFObjectFileBase::getNVPTXCPUName() const {
+StringRef ELFObjectFileBase::getCUDACPUName() const {
assert(getEMachine() == ELF::EM_CUDA);
unsigned SM = getEIdentABIVersion() == ELF::ELFABIVERSION_CUDA_V1
? getPlatformFlags() & ELF::EF_CUDA_SM
diff --git a/llvm/lib/TargetParser/TargetDataLayout.cpp b/llvm/lib/TargetParser/TargetDataLayout.cpp
index e222588ea389b..40a8a057ef18d 100644
--- a/llvm/lib/TargetParser/TargetDataLayout.cpp
+++ b/llvm/lib/TargetParser/TargetDataLayout.cpp
@@ -618,6 +618,7 @@ std::string Triple::computeDataLayout(StringRef ABIName) const {
case Triple::shave:
case Triple::renderscript32:
case Triple::renderscript64:
+ case Triple::nvsass:
// These are all virtual ISAs with no LLVM backend, and therefore no fixed
// LLVM data layout.
return "";
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index ac3626db46ea9..8fb9ba76d2474 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -54,6 +54,7 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
case msp430: return "msp430";
case nvptx64: return "nvptx64";
case nvptx: return "nvptx";
+ case nvsass: return "nvsass";
case ppc64: return "powerpc64";
case ppc64le: return "powerpc64le";
case ppc: return "powerpc";
@@ -242,6 +243,8 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) {
case wasm32:
case wasm64: return "wasm";
+ case nvsass: return "nvsass";
+
case riscv32:
case riscv64:
case riscv32be:
@@ -486,6 +489,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("xcore", xcore)
.Case("nvptx", nvptx)
.Case("nvptx64", nvptx64)
+ .Case("nvsass", nvsass)
.Case("amdil", amdil)
.Case("amdil64", amdil64)
.Case("hsail", hsail)
@@ -627,6 +631,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("xcore", Triple::xcore)
.Case("nvptx", Triple::nvptx)
.Case("nvptx64", Triple::nvptx64)
+ .Case("nvsass", Triple::nvsass)
.Case("amdil", Triple::amdil)
.Case("amdil64", Triple::amdil64)
.Case("hsail", Triple::hsail)
@@ -985,6 +990,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
case Triple::msp430:
case Triple::nvptx64:
case Triple::nvptx:
+ case Triple::nvsass:
case Triple::ppc64le:
case Triple::ppcle:
case Triple::r600:
@@ -1745,6 +1751,9 @@ unsigned Triple::getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
case llvm::Triple::nvptx64:
+ // nvsass can represent both 32- and 64-bit pointers, but assume
+ // 64-bit for the triple
+ case llvm::Triple::nvsass:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
case llvm::Triple::renderscript64:
@@ -1823,6 +1832,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::mips:
case Triple::mipsel:
case Triple::nvptx:
+ case Triple::nvsass:
case Triple::ppc:
case Triple::ppcle:
case Triple::r600:
@@ -1910,6 +1920,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::mips64:
case Triple::mips64el:
case Triple::nvptx64:
+ case Triple::nvsass:
case Triple::ppc64:
case Triple::ppc64le:
case Triple::renderscript64:
@@ -1980,6 +1991,7 @@ Triple Triple::getBigEndianArchVariant() const {
case Triple::msp430:
case Triple::nvptx64:
case Triple::nvptx:
+ case Triple::nvsass:
case Triple::r600:
case Triple::renderscript32:
case Triple::renderscript64:
@@ -2095,6 +2107,7 @@ bool Triple::isLittleEndian() const {
case Triple::msp430:
case Triple::nvptx64:
case Triple::nvptx:
+ case Triple::nvsass:
case Triple::ppcle:
case Triple::ppc64le:
case Triple::r600:
|
93ae87d
to
d5f269b
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
When handling CUDA ELF files via objdump or LLDB, the ELF parser in LLVM needs to distinguish if an ELF file is sass or not, which requires a triple for sass to exist in llvm. This patch includes all the necessary changes for LLDB and objdump to correctly identify these files with the correct triple.
d5f269b
to
a14b6f1
Compare
{ELF::EM_X86_64, ELF::ELFOSABI_FREEBSD, "x86_64--freebsd"}, | ||
{ELF::EM_X86_64, ELF::ELFOSABI_OPENBSD, "x86_64--openbsd"}, | ||
{ELF::EM_CUDA, ELF::ELFOSABI_CUDA, "nvptx64-nvidia-cuda"}}; | ||
{ELF::EM_CUDA, ELF::ELFOSABI_CUDA, "nvsass-nvidia-cuda"}}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we keep both targets tested here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sure!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After inspecting that code, I realized that this can only work with nvsass
. I actually got the impression that folks used nvptx
here because of the lack of a proper sass
triple.
If there are indeed object files with ptx and no sass, a flag would be needed somewhere in the ELF file for the parser to distinguish the actual triple. And this seems not to be the case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is this nvsass
target supposed to do exactly? It's just nvptx64
but duplicating the name. The reason you can't test both is because your change replaces the old handling with a new name.
SubtargetFeatures getLoongArchFeatures() const; | ||
|
||
StringRef getAMDGPUCPUName() const; | ||
StringRef getNVPTXCPUName() const; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why was this changed?
This caused all of the |
Maybe I'm not familiar with CUDA, but there are ELF targets that aren't SASS? This corresponds to the triple |
@walter-erquinigo I agree with @jhuber6. Addition of a new triple looks unnecessary for the stated purpose. |
Summary: This patch has broken the `libc` build bot. I could work around that but the changes seem unnecessary. This reverts commit 9ba844e.
I'm assuming the |
Interesting. Thanks for the feedback and thanks for reverting this change. |
If you can make a PR for just the lldb stuff I can review it. I appreciate NVIDIA trying to upstream more support, so I'm not trying to dissuade you. |
@jhuber6 , something I care about for LLDB is being able to use different disassemblers for ptx and sass. The unique situation here is that sass is not part of LLVM code generation, and it seems that's why LLVM is unaware of sass. LLVM can only generate ptx and folks rely on an nvidia proprietary compiler to go from ptx to sass. But at runtime, LLDB sees both sass and nvptx. I guess that a simpler patch that would exist only within LLDB is to add two flavors to the nvptx architecture, one for nvptx and for sass in the ArchSpec class in LLDB. I for sure can do that without doing major architectural changes like this. |
LLVM's backend can only emit PTX, but as a toolchain we simply export that handling to |
I see. Thanks! I'll think more about this next week |
When handling CUDA ELF files via objdump or LLDB, the ELF parser in LLVM needs to distinguish if an ELF file is sass or not, which requires a triple for sass to exist in llvm. This patch includes all the necessary changes for LLDB and objdump to correctly identify these files with the correct triple.