diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 5f14726c36672..06eccc80ab606 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3147,14 +3147,21 @@ as follows: ``A
`` Specifies the address space of objects created by '``alloca``'. Defaults to the default address space of 0. -``p[n]::[:][:]`` - This specifies the *size* of a pointer and its ```` and - ````\erred alignments for address space ``n``. - The fourth parameter ```` is the size of the - index that used for address calculation, which must be less than or equal - to the pointer size. If not - specified, the default index size is equal to the pointer size. All sizes - are in bits. The address space, ``n``, is optional, and if not specified, +``p[n]::[:[:]]`` + This specifies the properties of a pointer in address space ``n``. + The ```` parameter specifies the size of the bitwise representation. + For :ref:`non-integral pointers ` the representation size may + be larger than the address width of the underlying address space (e.g. to + accommodate additional metadata). + The alignment requirements are specified via the ```` and + ````\erred alignments parameters. + The fourth parameter ```` is the size of the index that used for + address calculations such as :ref:`getelementptr `. + It must be less than or equal to the pointer size. If not specified, the + default index size is equal to the pointer size. + The index size also specifies the width of addresses in this address space. + All sizes are in bits. + The address space, ``n``, is optional, and if not specified, denotes the default address space 0. The value of ``n`` must be in the range [1,2^24). ``i:[:]`` @@ -4266,6 +4273,16 @@ address spaces defined in the :ref:`datalayout string`. the default globals address space and ``addrspace("P")`` the program address space. +The representation of pointers can be different for each address space and does +not necessarily need to be a plain integer address (e.g. for +:ref:`non-integral pointers `). In addition to a representation +bits size, pointers in each address space also have an index size which defines +the bitwidth of indexing operations as well as the size of `integer addresses` +in this address space. For example, CHERI capabilities are twice the size of the +underlying addresses to accommodate for additional metadata such as bounds and +permissions: on a 32-bit system the bitwidth of the pointer representation size +is 64, but the underlying address width remains 32 bits. + The default address space is number zero. The semantics of non-zero address spaces are target-specific. Memory diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 2ad080e6d0cd2..ec54bab9ae3f6 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -92,6 +92,7 @@ class DataLayout { /// The function pointer alignment is a multiple of the function alignment. MultipleOfFunctionAlign, }; + private: bool BigEndian = false; @@ -324,16 +325,38 @@ class DataLayout { /// the backends/clients are updated. Align getPointerPrefAlignment(unsigned AS = 0) const; - /// Layout pointer size in bytes, rounded up to a whole - /// number of bytes. + /// The pointer representation size in bytes, rounded up to a whole number of + /// bytes. The difference between this function and getAddressSize() is that + /// this one returns the size of the entire pointer representation (including + /// metadata bits for fat pointers) and the latter only returns the number of + /// address bits. + /// \sa DataLayout::getAddressSizeInBits /// FIXME: The defaults need to be removed once all of /// the backends/clients are updated. unsigned getPointerSize(unsigned AS = 0) const; - // Index size in bytes used for address calculation, - /// rounded up to a whole number of bytes. + /// The index size in bytes used for address calculation, rounded up to a + /// whole number of bytes. This not only defines the size used in + /// getelementptr operations, but also the size of addresses in this \p AS. + /// For example, a 64-bit CHERI-enabled target has 128-bit pointers of which + /// only 64 are used to represent the address and the remaining ones are used + /// for metadata such as bounds and access permissions. In this case + /// getPointerSize() returns 16, but getIndexSize() returns 8. + /// To help with code understanding, the alias getAddressSize() can be used + /// instead of getIndexSize() to clarify that an address width is needed. unsigned getIndexSize(unsigned AS) const; + /// The integral size of a pointer in a given address space in bytes, which + /// is defined to be the same as getIndexSize(). This exists as a separate + /// function to make it clearer when reading code that the size of an address + /// is being requested. While targets exist where index size and the + /// underlying address width are not identical (e.g. AMDGPU fat pointers with + /// 48-bit addresses and 32-bit offsets indexing), there is currently no need + /// to differentiate these properties in LLVM. + /// \sa DataLayout::getIndexSize + /// \sa DataLayout::getAddressSizeInBits + unsigned getAddressSize(unsigned AS) const { return getIndexSize(AS); } + /// Return the address spaces containing non-integral pointers. Pointers in /// this address space don't have a well-defined bitwise representation. SmallVector getNonIntegralAddressSpaces() const { @@ -358,29 +381,53 @@ class DataLayout { return PTy && isNonIntegralPointerType(PTy); } - /// Layout pointer size, in bits + /// The size in bits of the pointer representation in a given address space. + /// This is not necessarily the same as the integer address of a pointer (e.g. + /// for fat pointers). + /// \sa DataLayout::getAddressSizeInBits() /// FIXME: The defaults need to be removed once all of /// the backends/clients are updated. unsigned getPointerSizeInBits(unsigned AS = 0) const { return getPointerSpec(AS).BitWidth; } - /// Size in bits of index used for address calculation in getelementptr. + /// The size in bits of indices used for address calculation in getelementptr + /// and for addresses in the given AS. See getIndexSize() for more + /// information. + /// \sa DataLayout::getAddressSizeInBits() unsigned getIndexSizeInBits(unsigned AS) const { return getPointerSpec(AS).IndexBitWidth; } - /// Layout pointer size, in bits, based on the type. If this function is + /// The size in bits of an address in for the given AS. This is defined to + /// return the same value as getIndexSizeInBits() since there is currently no + /// target that requires these two properties to have different values. See + /// getIndexSize() for more information. + /// \sa DataLayout::getIndexSizeInBits() + unsigned getAddressSizeInBits(unsigned AS) const { + return getIndexSizeInBits(AS); + } + + /// The pointer representation size in bits for this type. If this function is /// called with a pointer type, then the type size of the pointer is returned. /// If this function is called with a vector of pointers, then the type size /// of the pointer is returned. This should only be called with a pointer or /// vector of pointers. unsigned getPointerTypeSizeInBits(Type *) const; - /// Layout size of the index used in GEP calculation. + /// The size in bits of the index used in GEP calculation for this type. /// The function should be called with pointer or vector of pointers type. + /// This is defined to return the same value as getAddressSizeInBits(), + /// but separate functions exist for code clarity. unsigned getIndexTypeSizeInBits(Type *Ty) const; + /// The size in bits of an address for this type. + /// This is defined to return the same value as getIndexTypeSizeInBits(), + /// but separate functions exist for code clarity. + unsigned getAddressSizeInBits(Type *Ty) const { + return getIndexTypeSizeInBits(Ty); + } + unsigned getPointerTypeSize(Type *Ty) const { return getPointerTypeSizeInBits(Ty) / 8; } @@ -515,15 +562,21 @@ class DataLayout { /// are set. unsigned getLargestLegalIntTypeSizeInBits() const; - /// Returns the type of a GEP index in AddressSpace. + /// Returns the type of a GEP index in \p AddressSpace. /// If it was not specified explicitly, it will be the integer type of the /// pointer width - IntPtrType. IntegerType *getIndexType(LLVMContext &C, unsigned AddressSpace) const; + /// Returns the type of an address in \p AddressSpace + IntegerType *getAddressType(LLVMContext &C, unsigned AddressSpace) const { + return getIndexType(C, AddressSpace); + } /// Returns the type of a GEP index. /// If it was not specified explicitly, it will be the integer type of the /// pointer width - IntPtrType. Type *getIndexType(Type *PtrTy) const; + /// Returns the type of an address in \p AddressSpace + Type *getAddressType(Type *PtrTy) const { return getIndexType(PtrTy); } /// Returns the offset from the beginning of the type for the specified /// indices.