-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[LLVM][IR] Add support for address space names in DataLayout #170559
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
cfa645f to
ebac1cc
Compare
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
|
@llvm/pr-subscribers-llvm-ir Author: Rahul Joshi (jurahul) ChangesAdd support for specifying the names of address spaces when specifying pointer properties for an address space. Update LLVM's AsmParser and LLParser to print and read these symbolic address space name. Patch is 26.06 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170559.diff 6 Files Affected:
diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 54458201af0b3..9e67f726a92b3 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -21,6 +21,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
@@ -92,7 +93,15 @@ class DataLayout {
/// of this would be CHERI capabilities where the validity bit is stored
/// separately from the pointer address+bounds information.
bool HasExternalState;
- LLVM_ABI bool operator==(const PointerSpec &Other) const;
+ // Symbolic name of the address space. We can store a StringRef here
+ // directly (backed by StringRepresentation) but then the copy construtor
+ // for DataLayout has to be updated to redirect these StringRefs to the new
+ // copy of StringRepresentation. To avoid that, we store just the offset and
+ // size of the address space name witin the StringRepresentation.
+ size_t AddrSpaceNameOffset;
+ size_t AddrSpaceNameSize;
+
+ StringRef getAddrSpaceName(const DataLayout &DL) const;
};
enum class FunctionPtrAlignType {
@@ -158,7 +167,8 @@ class DataLayout {
/// Sets or updates the specification for pointer in the given address space.
void setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, Align ABIAlign,
Align PrefAlign, uint32_t IndexBitWidth,
- bool HasUnstableRepr, bool HasExternalState);
+ bool HasUnstableRepr, bool HasExternalState,
+ StringRef AddrSpaceName);
/// Internal helper to get alignment for integer of given bitwidth.
LLVM_ABI Align getIntegerAlignment(uint32_t BitWidth, bool abi_or_pref) const;
@@ -173,11 +183,13 @@ class DataLayout {
Error parseAggregateSpec(StringRef Spec);
/// Attempts to parse pointer specification ('p').
- Error parsePointerSpec(StringRef Spec);
+ Error parsePointerSpec(StringRef Spec,
+ SmallDenseSet<StringRef, 8> &AddrSpaceNames);
/// Attempts to parse a single specification.
Error parseSpecification(StringRef Spec,
- SmallVectorImpl<unsigned> &NonIntegralAddressSpaces);
+ SmallVectorImpl<unsigned> &NonIntegralAddressSpaces,
+ SmallDenseSet<StringRef, 8> &AddrSpaceNames);
/// Attempts to parse a data layout string.
Error parseLayoutString(StringRef LayoutString);
@@ -324,9 +336,13 @@ class DataLayout {
return false;
}
- /// Layout pointer alignment
+ /// Layout pointer alignment.
LLVM_ABI Align getPointerABIAlignment(unsigned AS) const;
+ LLVM_ABI StringRef getAddressSpaceName(unsigned AS) const;
+
+ LLVM_ABI std::optional<unsigned> getNamedAddressSpace(StringRef Name) const;
+
/// Return target's alignment for stack-based pointers
/// FIXME: The defaults need to be removed once all of
/// the backends/clients are updated.
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 2a0246074a462..a09ab4fc7828c 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -1958,13 +1958,16 @@ bool LLParser::parseOptionalAddrSpace(unsigned &AddrSpace, unsigned DefaultAS) {
auto ParseAddrspaceValue = [&](unsigned &AddrSpace) -> bool {
if (Lex.getKind() == lltok::StringConstant) {
- auto AddrSpaceStr = Lex.getStrVal();
+ const std::string &AddrSpaceStr = Lex.getStrVal();
if (AddrSpaceStr == "A") {
AddrSpace = M->getDataLayout().getAllocaAddrSpace();
} else if (AddrSpaceStr == "G") {
AddrSpace = M->getDataLayout().getDefaultGlobalsAddressSpace();
} else if (AddrSpaceStr == "P") {
AddrSpace = M->getDataLayout().getProgramAddressSpace();
+ } else if (std::optional<unsigned> AS =
+ M->getDataLayout().getNamedAddressSpace(AddrSpaceStr)) {
+ AddrSpace = *AS;
} else {
return tokError("invalid symbolic addrspace '" + AddrSpaceStr + "'");
}
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 7932765db8359..10577fceee239 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -543,7 +543,8 @@ namespace {
class TypePrinting {
public:
- TypePrinting(const Module *M = nullptr) : DeferredM(M) {}
+ TypePrinting(const Module *M = nullptr)
+ : M(M), TypesIncorporated(M == nullptr) {}
TypePrinting(const TypePrinting &) = delete;
TypePrinting &operator=(const TypePrinting &) = delete;
@@ -563,8 +564,9 @@ class TypePrinting {
private:
void incorporateTypes();
- /// A module to process lazily when needed. Set to nullptr as soon as used.
- const Module *DeferredM;
+ /// A module to process lazily.
+ const Module *M;
+ bool TypesIncorporated;
TypeFinder NamedTypes;
@@ -605,11 +607,11 @@ bool TypePrinting::empty() {
}
void TypePrinting::incorporateTypes() {
- if (!DeferredM)
+ if (TypesIncorporated)
return;
- NamedTypes.run(*DeferredM, false);
- DeferredM = nullptr;
+ NamedTypes.run(*M, false);
+ TypesIncorporated = true;
// The list of struct types we got back includes all the struct types, split
// the unnamed ones out to a numbering and remove the anonymous structs.
@@ -630,6 +632,20 @@ void TypePrinting::incorporateTypes() {
NamedTypes.erase(NextToUse, NamedTypes.end());
}
+static void printAddressSpace(const Module *M, unsigned AS, raw_ostream &OS,
+ StringRef Prefix = " ", StringRef Suffix = "",
+ bool ForcePrint = false) {
+ if (AS == 0 && !ForcePrint)
+ return;
+ OS << Prefix << "addrspace(";
+ StringRef ASName = M ? M->getDataLayout().getAddressSpaceName(AS) : "";
+ if (!ASName.empty())
+ OS << "\"" << ASName << "\"";
+ else
+ OS << AS;
+ OS << ")" << Suffix;
+}
+
/// Write the specified type to the specified raw_ostream, making use of type
/// names or up references to shorten the type name where possible.
void TypePrinting::print(Type *Ty, raw_ostream &OS) {
@@ -686,8 +702,7 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
case Type::PointerTyID: {
PointerType *PTy = cast<PointerType>(Ty);
OS << "ptr";
- if (unsigned AddressSpace = PTy->getAddressSpace())
- OS << " addrspace(" << AddressSpace << ')';
+ printAddressSpace(M, PTy->getAddressSpace(), OS);
return;
}
case Type::ArrayTyID: {
@@ -3896,10 +3911,10 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
printThreadLocalModel(GV->getThreadLocalMode(), Out);
StringRef UA = getUnnamedAddrEncoding(GV->getUnnamedAddr());
if (!UA.empty())
- Out << UA << ' ';
+ Out << UA << ' ';
- if (unsigned AddressSpace = GV->getType()->getAddressSpace())
- Out << "addrspace(" << AddressSpace << ") ";
+ printAddressSpace(GV->getParent(), GV->getType()->getAddressSpace(), Out,
+ /*Prefix=*/"", /*Suffix=*/" ");
if (GV->isExternallyInitialized()) Out << "externally_initialized ";
Out << (GV->isConstant() ? "constant " : "global ");
TypePrinter.print(GV->getValueType(), Out);
@@ -4174,9 +4189,10 @@ void AssemblyWriter::printFunction(const Function *F) {
// a module with a non-zero program address space or if there is no valid
// Module* so that the file can be parsed without the datalayout string.
const Module *Mod = F->getParent();
- if (F->getAddressSpace() != 0 || !Mod ||
- Mod->getDataLayout().getProgramAddressSpace() != 0)
- Out << " addrspace(" << F->getAddressSpace() << ")";
+ bool ForcePrintAddressSpace =
+ !Mod || Mod->getDataLayout().getProgramAddressSpace() != 0;
+ printAddressSpace(Mod, F->getAddressSpace(), Out, /*Prefix=*/" ",
+ /*Suffix=*/"", ForcePrintAddressSpace);
if (Attrs.hasFnAttrs())
Out << " #" << Machine.getAttributeGroupSlot(Attrs.getFnAttrs());
if (F->hasSection()) {
@@ -4352,23 +4368,21 @@ void AssemblyWriter::printInfoComment(const Value &V, bool isMaterializable) {
static void maybePrintCallAddrSpace(const Value *Operand, const Instruction *I,
raw_ostream &Out) {
- // We print the address space of the call if it is non-zero.
if (Operand == nullptr) {
Out << " <cannot get addrspace!>";
return;
}
+
+ // We print the address space of the call if it is non-zero.
+ // We also print it if it is zero but not equal to the program address space
+ // or if we can't find a valid Module* to make it possible to parse
+ // the resulting file even without a datalayout string.
unsigned CallAddrSpace = Operand->getType()->getPointerAddressSpace();
- bool PrintAddrSpace = CallAddrSpace != 0;
- if (!PrintAddrSpace) {
- const Module *Mod = getModuleFromVal(I);
- // We also print it if it is zero but not equal to the program address space
- // or if we can't find a valid Module* to make it possible to parse
- // the resulting file even without a datalayout string.
- if (!Mod || Mod->getDataLayout().getProgramAddressSpace() != 0)
- PrintAddrSpace = true;
- }
- if (PrintAddrSpace)
- Out << " addrspace(" << CallAddrSpace << ")";
+ const Module *Mod = getModuleFromVal(I);
+ bool ForcePrintAddrSpace =
+ !Mod || Mod->getDataLayout().getProgramAddressSpace() != 0;
+ printAddressSpace(Mod, CallAddrSpace, Out, /*Prefix=*/" ", /*Suffix=*/"",
+ ForcePrintAddrSpace);
}
// This member is called for each Instruction in a function..
@@ -4735,9 +4749,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ", align " << A->value();
}
- unsigned AddrSpace = AI->getAddressSpace();
- if (AddrSpace != 0)
- Out << ", addrspace(" << AddrSpace << ')';
+ printAddressSpace(AI->getModule(), AI->getAddressSpace(), Out,
+ /*Prefix=*/", ");
} else if (isa<CastInst>(I)) {
if (Operand) {
Out << ' ';
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index 49e1f898ca594..52b77e39ffe5b 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -147,12 +147,10 @@ bool DataLayout::PrimitiveSpec::operator==(const PrimitiveSpec &Other) const {
PrefAlign == Other.PrefAlign;
}
-bool DataLayout::PointerSpec::operator==(const PointerSpec &Other) const {
- return AddrSpace == Other.AddrSpace && BitWidth == Other.BitWidth &&
- ABIAlign == Other.ABIAlign && PrefAlign == Other.PrefAlign &&
- IndexBitWidth == Other.IndexBitWidth &&
- HasUnstableRepresentation == Other.HasUnstableRepresentation &&
- HasExternalState == Other.HasExternalState;
+StringRef
+DataLayout::PointerSpec::getAddrSpaceName(const DataLayout &DL) const {
+ return StringRef(DL.StringRepresentation)
+ .substr(AddrSpaceNameOffset, AddrSpaceNameSize);
}
namespace {
@@ -195,7 +193,7 @@ constexpr DataLayout::PrimitiveSpec DefaultVectorSpecs[] = {
// Default pointer type specifications.
constexpr DataLayout::PointerSpec DefaultPointerSpecs[] = {
// p0:64:64:64:64
- {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, false, false},
+ {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, false, false, 0, 0},
};
DataLayout::DataLayout()
@@ -233,6 +231,15 @@ DataLayout &DataLayout::operator=(const DataLayout &Other) {
bool DataLayout::operator==(const DataLayout &Other) const {
// NOTE: StringRepresentation might differ, it is not canonicalized.
+ auto IsPointerSpecsEqual = [this, &Other](const PointerSpec &A,
+ const PointerSpec &B) {
+ return A.AddrSpace == B.AddrSpace && A.BitWidth == B.BitWidth &&
+ A.ABIAlign == B.ABIAlign && A.PrefAlign == B.PrefAlign &&
+ A.IndexBitWidth == B.IndexBitWidth &&
+ A.HasUnstableRepresentation == B.HasUnstableRepresentation &&
+ A.HasExternalState == B.HasExternalState &&
+ A.getAddrSpaceName(*this) == B.getAddrSpaceName(Other);
+ };
return BigEndian == Other.BigEndian &&
AllocaAddrSpace == Other.AllocaAddrSpace &&
ProgramAddrSpace == Other.ProgramAddrSpace &&
@@ -243,9 +250,9 @@ bool DataLayout::operator==(const DataLayout &Other) const {
ManglingMode == Other.ManglingMode &&
LegalIntWidths == Other.LegalIntWidths && IntSpecs == Other.IntSpecs &&
FloatSpecs == Other.FloatSpecs && VectorSpecs == Other.VectorSpecs &&
- PointerSpecs == Other.PointerSpecs &&
StructABIAlignment == Other.StructABIAlignment &&
- StructPrefAlignment == Other.StructPrefAlignment;
+ StructPrefAlignment == Other.StructPrefAlignment &&
+ llvm::equal(PointerSpecs, Other.PointerSpecs, IsPointerSpecsEqual);
}
Expected<DataLayout> DataLayout::parse(StringRef LayoutString) {
@@ -271,6 +278,48 @@ static Error parseAddrSpace(StringRef Str, unsigned &AddrSpace) {
return Error::success();
}
+/// Attempts to parse an address space component of a specification allowing
+/// name to be specified as well. The input is expected to be of the form
+/// <number> '(' name ' )', with the name otional and the number is optional as
+/// well.
+static Error parseAddrSpaceAndName(StringRef Str, unsigned &AddrSpace,
+ StringRef &AddrSpaceName) {
+ if (Str.empty())
+ return createStringError("address space component cannot be empty");
+
+ if (isDigit(Str.front())) {
+ if (Str.consumeInteger(10, AddrSpace) || !isUInt<24>(AddrSpace))
+ return createStringError("address space must be a 24-bit integer");
+ }
+
+ if (Str.empty())
+ return Error::success();
+
+ if (Str.front() != '(')
+ return createStringError("address space must be a 24-bit integer");
+
+ // Expect atleast one character in between the ( and ).
+ if (Str.back() != ')' || Str.size() == 2)
+ return createStringError("Expected `( address space name )`");
+
+ AddrSpaceName = Str.drop_front().drop_back();
+ // TODO: Do we need any additional verification for address space name? Like
+ // should be a valid identifier of some sort? Its not strictly needed.
+
+ // LLVM's assembly parser used names "P", "G" and "A" to represent the
+ // program, default global, and alloca address space. This mapping is not 1:1
+ // in the sense that all of them can map to the same numberic address space.
+ // Diallow using these predefined symbolic address space names as address
+ // space names specified in the data layout.
+ if (AddrSpaceName.size() == 1) {
+ char C = AddrSpaceName.front();
+ if (C == 'P' || C == 'G' || C == 'A')
+ return createStringError(
+ "Cannot use predefined address space names P/G/A in data layout");
+ }
+ return Error::success();
+}
+
/// Attempts to parse a size component of a specification.
static Error parseSize(StringRef Str, unsigned &BitWidth,
StringRef Name = "size") {
@@ -395,7 +444,8 @@ Error DataLayout::parseAggregateSpec(StringRef Spec) {
return Error::success();
}
-Error DataLayout::parsePointerSpec(StringRef Spec) {
+Error DataLayout::parsePointerSpec(
+ StringRef Spec, SmallDenseSet<StringRef, 8> &AddrSpaceNames) {
// p[<n>]:<size>:<abi>[:<pref>[:<idx>]]
SmallVector<StringRef, 5> Components;
assert(Spec.front() == 'p');
@@ -408,6 +458,7 @@ Error DataLayout::parsePointerSpec(StringRef Spec) {
unsigned AddrSpace = 0;
bool ExternalState = false;
bool UnstableRepr = false;
+ StringRef AddrSpaceName;
StringRef AddrSpaceStr = Components[0];
while (!AddrSpaceStr.empty()) {
char C = AddrSpaceStr.front();
@@ -424,12 +475,18 @@ Error DataLayout::parsePointerSpec(StringRef Spec) {
AddrSpaceStr = AddrSpaceStr.drop_front(1);
}
if (!AddrSpaceStr.empty())
- if (Error Err = parseAddrSpace(AddrSpaceStr, AddrSpace))
+ if (Error Err =
+ parseAddrSpaceAndName(AddrSpaceStr, AddrSpace, AddrSpaceName))
return Err; // Failed to parse the remaining characters as a number
if (AddrSpace == 0 && (ExternalState || UnstableRepr))
return createStringError(
"address space 0 cannot be unstable or have external state");
+ // Check for duplicate address space names.
+ if (!AddrSpaceName.empty() && !AddrSpaceNames.insert(AddrSpaceName).second)
+ return createStringError("address space name `" + AddrSpaceName +
+ "` already used");
+
// Size. Required, cannot be zero.
unsigned BitWidth;
if (Error Err = parseSize(Components[1], BitWidth, "pointer size"))
@@ -462,12 +519,13 @@ Error DataLayout::parsePointerSpec(StringRef Spec) {
"index size cannot be larger than the pointer size");
setPointerSpec(AddrSpace, BitWidth, ABIAlign, PrefAlign, IndexBitWidth,
- UnstableRepr, ExternalState);
+ UnstableRepr, ExternalState, AddrSpaceName);
return Error::success();
}
Error DataLayout::parseSpecification(
- StringRef Spec, SmallVectorImpl<unsigned> &NonIntegralAddressSpaces) {
+ StringRef Spec, SmallVectorImpl<unsigned> &NonIntegralAddressSpaces,
+ SmallDenseSet<StringRef, 8> &AddrSpaceNames) {
// The "ni" specifier is the only two-character specifier. Handle it first.
if (Spec.starts_with("ni")) {
// ni:<address space>[:<address space>]...
@@ -499,7 +557,7 @@ Error DataLayout::parseSpecification(
return parseAggregateSpec(Spec);
if (Specifier == 'p')
- return parsePointerSpec(Spec);
+ return parsePointerSpec(Spec, AddrSpaceNames);
StringRef Rest = Spec.drop_front();
switch (Specifier) {
@@ -616,7 +674,7 @@ Error DataLayout::parseSpecification(
}
Error DataLayout::parseLayoutString(StringRef LayoutString) {
- StringRepresentation = std::string(LayoutString);
+ StringRepresentation = LayoutString.str();
if (LayoutString.empty())
return Error::success();
@@ -624,10 +682,12 @@ Error DataLayout::parseLayoutString(StringRef LayoutString) {
// Split the data layout string into specifications separated by '-' and
// parse each specification individually, updating internal data structures.
SmallVector<unsigned, 8> NonIntegralAddressSpaces;
- for (StringRef Spec : split(LayoutString, '-')) {
+ SmallDenseSet<StringRef, 8> AddessSpaceNames;
+ for (StringRef Spec : split(StringRepresentation, '-')) {
if (Spec.empty())
return createStringError("empty specification is not allowed");
- if (Error Err = parseSpecification(Spec, NonIntegralAddressSpaces))
+ if (Error Err = parseSpecification(Spec, NonIntegralAddressSpaces,
+ AddessSpaceNames))
return Err;
}
// Mark all address spaces that were qualified as non-integral now. This has
@@ -638,7 +698,8 @@ Error DataLayout::parseLayoutString(StringRef LayoutString) {
// the spec for AS0, and we then update that to mark it non-integral.
const PointerSpec &PS = getPointerSpec(AS);
setPointerSpec(AS, PS.BitWidth, PS.ABIAlign, PS.PrefAlign, PS.IndexBitWidth,
- /*HasUnstableRepr=*/true, /*HasExternalState=*/false);
+ /*HasUnstableRepr=*/true, /*HasExternalState=*/false,
+ getAddressSpaceName(AS));
}
return Error::success();
@@ -687,12 +748,28 @@ DataLayout::getPointerSpec(uint32_t AddrSpace) const {
void DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth,
Align ABIAlign, Align PrefAlign,
uint32_t IndexBitWidth, bool HasUnstableRepr,
- bool HasExternalState) {
+ bool HasExternalState,
+ StringRef AddrSpaceName) {
auto I = lower_bound(PointerSpecs, AddrSpace, LessPointerAddrSpace());
+ size_t AddrSpaceNameOffset = 0, AddrSpaceNameSize = AddrSpaceName.size();
+ if (!AddrSpaceName.empty()) {
+ // Validate that AddrSpaceName points to data within the
+ // StringRepresentation.
+ const char *RepStart = StringRepresentation.data();
+ const char *ASStart = AddrSpaceName.data();
+ [[maybe_unused]] const char *RepEnd =
+ RepStart + StringRepresentation.size();
+ [[maybe_unused]] const char *ASEnd = ASStart + AddrSpaceNameSize;
+
+ assert(RepStart <= ASStart && ASStart < RepEnd && Rep...
[truncated]
|
ebac1cc to
7ba46b8
Compare
arichardson
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Haven't had time to review in detail but generally looks good to me. Two small comments for now and will try to finish reviewing tomorrow
|
Thanks, will wait for @nikic's approval as well before merging. |
Add support for specifying the names of address spaces when specifying pointer properties for an address space. Update LLVM's AsmPrinter and LLParser to print and read these symbolic address space name.