diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 5f14726c36672..06eccc80ab606 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -3147,14 +3147,21 @@ as follows:
``A
``
Specifies the address space of objects created by '``alloca``'.
Defaults to the default address space of 0.
-``p[n]::[:][:]``
- This specifies the *size* of a pointer and its ```` and
- ````\erred alignments for address space ``n``.
- The fourth parameter ```` is the size of the
- index that used for address calculation, which must be less than or equal
- to the pointer size. If not
- specified, the default index size is equal to the pointer size. All sizes
- are in bits. The address space, ``n``, is optional, and if not specified,
+``p[n]::[:[:]]``
+ This specifies the properties of a pointer in address space ``n``.
+ The ```` parameter specifies the size of the bitwise representation.
+ For :ref:`non-integral pointers ` the representation size may
+ be larger than the address width of the underlying address space (e.g. to
+ accommodate additional metadata).
+ The alignment requirements are specified via the ```` and
+ ````\erred alignments parameters.
+ The fourth parameter ```` is the size of the index that used for
+ address calculations such as :ref:`getelementptr `.
+ It must be less than or equal to the pointer size. If not specified, the
+ default index size is equal to the pointer size.
+ The index size also specifies the width of addresses in this address space.
+ All sizes are in bits.
+ The address space, ``n``, is optional, and if not specified,
denotes the default address space 0. The value of ``n`` must be
in the range [1,2^24).
``i:[:]``
@@ -4266,6 +4273,16 @@ address spaces defined in the :ref:`datalayout string`.
the default globals address space and ``addrspace("P")`` the program address
space.
+The representation of pointers can be different for each address space and does
+not necessarily need to be a plain integer address (e.g. for
+:ref:`non-integral pointers `). In addition to a representation
+bits size, pointers in each address space also have an index size which defines
+the bitwidth of indexing operations as well as the size of `integer addresses`
+in this address space. For example, CHERI capabilities are twice the size of the
+underlying addresses to accommodate for additional metadata such as bounds and
+permissions: on a 32-bit system the bitwidth of the pointer representation size
+is 64, but the underlying address width remains 32 bits.
+
The default address space is number zero.
The semantics of non-zero address spaces are target-specific. Memory
diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 2ad080e6d0cd2..ec54bab9ae3f6 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -92,6 +92,7 @@ class DataLayout {
/// The function pointer alignment is a multiple of the function alignment.
MultipleOfFunctionAlign,
};
+
private:
bool BigEndian = false;
@@ -324,16 +325,38 @@ class DataLayout {
/// the backends/clients are updated.
Align getPointerPrefAlignment(unsigned AS = 0) const;
- /// Layout pointer size in bytes, rounded up to a whole
- /// number of bytes.
+ /// The pointer representation size in bytes, rounded up to a whole number of
+ /// bytes. The difference between this function and getAddressSize() is that
+ /// this one returns the size of the entire pointer representation (including
+ /// metadata bits for fat pointers) and the latter only returns the number of
+ /// address bits.
+ /// \sa DataLayout::getAddressSizeInBits
/// FIXME: The defaults need to be removed once all of
/// the backends/clients are updated.
unsigned getPointerSize(unsigned AS = 0) const;
- // Index size in bytes used for address calculation,
- /// rounded up to a whole number of bytes.
+ /// The index size in bytes used for address calculation, rounded up to a
+ /// whole number of bytes. This not only defines the size used in
+ /// getelementptr operations, but also the size of addresses in this \p AS.
+ /// For example, a 64-bit CHERI-enabled target has 128-bit pointers of which
+ /// only 64 are used to represent the address and the remaining ones are used
+ /// for metadata such as bounds and access permissions. In this case
+ /// getPointerSize() returns 16, but getIndexSize() returns 8.
+ /// To help with code understanding, the alias getAddressSize() can be used
+ /// instead of getIndexSize() to clarify that an address width is needed.
unsigned getIndexSize(unsigned AS) const;
+ /// The integral size of a pointer in a given address space in bytes, which
+ /// is defined to be the same as getIndexSize(). This exists as a separate
+ /// function to make it clearer when reading code that the size of an address
+ /// is being requested. While targets exist where index size and the
+ /// underlying address width are not identical (e.g. AMDGPU fat pointers with
+ /// 48-bit addresses and 32-bit offsets indexing), there is currently no need
+ /// to differentiate these properties in LLVM.
+ /// \sa DataLayout::getIndexSize
+ /// \sa DataLayout::getAddressSizeInBits
+ unsigned getAddressSize(unsigned AS) const { return getIndexSize(AS); }
+
/// Return the address spaces containing non-integral pointers. Pointers in
/// this address space don't have a well-defined bitwise representation.
SmallVector getNonIntegralAddressSpaces() const {
@@ -358,29 +381,53 @@ class DataLayout {
return PTy && isNonIntegralPointerType(PTy);
}
- /// Layout pointer size, in bits
+ /// The size in bits of the pointer representation in a given address space.
+ /// This is not necessarily the same as the integer address of a pointer (e.g.
+ /// for fat pointers).
+ /// \sa DataLayout::getAddressSizeInBits()
/// FIXME: The defaults need to be removed once all of
/// the backends/clients are updated.
unsigned getPointerSizeInBits(unsigned AS = 0) const {
return getPointerSpec(AS).BitWidth;
}
- /// Size in bits of index used for address calculation in getelementptr.
+ /// The size in bits of indices used for address calculation in getelementptr
+ /// and for addresses in the given AS. See getIndexSize() for more
+ /// information.
+ /// \sa DataLayout::getAddressSizeInBits()
unsigned getIndexSizeInBits(unsigned AS) const {
return getPointerSpec(AS).IndexBitWidth;
}
- /// Layout pointer size, in bits, based on the type. If this function is
+ /// The size in bits of an address in for the given AS. This is defined to
+ /// return the same value as getIndexSizeInBits() since there is currently no
+ /// target that requires these two properties to have different values. See
+ /// getIndexSize() for more information.
+ /// \sa DataLayout::getIndexSizeInBits()
+ unsigned getAddressSizeInBits(unsigned AS) const {
+ return getIndexSizeInBits(AS);
+ }
+
+ /// The pointer representation size in bits for this type. If this function is
/// called with a pointer type, then the type size of the pointer is returned.
/// If this function is called with a vector of pointers, then the type size
/// of the pointer is returned. This should only be called with a pointer or
/// vector of pointers.
unsigned getPointerTypeSizeInBits(Type *) const;
- /// Layout size of the index used in GEP calculation.
+ /// The size in bits of the index used in GEP calculation for this type.
/// The function should be called with pointer or vector of pointers type.
+ /// This is defined to return the same value as getAddressSizeInBits(),
+ /// but separate functions exist for code clarity.
unsigned getIndexTypeSizeInBits(Type *Ty) const;
+ /// The size in bits of an address for this type.
+ /// This is defined to return the same value as getIndexTypeSizeInBits(),
+ /// but separate functions exist for code clarity.
+ unsigned getAddressSizeInBits(Type *Ty) const {
+ return getIndexTypeSizeInBits(Ty);
+ }
+
unsigned getPointerTypeSize(Type *Ty) const {
return getPointerTypeSizeInBits(Ty) / 8;
}
@@ -515,15 +562,21 @@ class DataLayout {
/// are set.
unsigned getLargestLegalIntTypeSizeInBits() const;
- /// Returns the type of a GEP index in AddressSpace.
+ /// Returns the type of a GEP index in \p AddressSpace.
/// If it was not specified explicitly, it will be the integer type of the
/// pointer width - IntPtrType.
IntegerType *getIndexType(LLVMContext &C, unsigned AddressSpace) const;
+ /// Returns the type of an address in \p AddressSpace
+ IntegerType *getAddressType(LLVMContext &C, unsigned AddressSpace) const {
+ return getIndexType(C, AddressSpace);
+ }
/// Returns the type of a GEP index.
/// If it was not specified explicitly, it will be the integer type of the
/// pointer width - IntPtrType.
Type *getIndexType(Type *PtrTy) const;
+ /// Returns the type of an address in \p AddressSpace
+ Type *getAddressType(Type *PtrTy) const { return getIndexType(PtrTy); }
/// Returns the offset from the beginning of the type for the specified
/// indices.