From 493cc09369390df34c52a11992cbe339ac744955 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Mon, 12 May 2025 12:23:42 -0700 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20change?= =?UTF-8?q?s=20to=20main=20this=20commit=20is=20based=20on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.6-beta.1 [skip ci] --- clang/lib/Basic/Targets/AMDGPU.cpp | 7 +- clang/test/CodeGen/target-data.c | 4 +- clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl | 2 +- llvm/docs/LangRef.rst | 98 ++++++++- llvm/include/llvm-c/Core.h | 1 + .../llvm/Analysis/TargetTransformInfoImpl.h | 8 + llvm/include/llvm/AsmParser/LLToken.h | 1 + llvm/include/llvm/Bitcode/LLVMBitCodes.h | 3 +- .../llvm/CodeGen/GlobalISel/IRTranslator.h | 1 + llvm/include/llvm/IR/DataLayout.h | 62 +++++- llvm/include/llvm/IR/InstVisitor.h | 1 + llvm/include/llvm/IR/Instruction.def | 53 ++--- llvm/include/llvm/IR/Instructions.h | 35 +++- llvm/include/llvm/SandboxIR/Instruction.h | 1 + llvm/lib/AsmParser/LLLexer.cpp | 1 + llvm/lib/AsmParser/LLParser.cpp | 2 + llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 1 + llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 1 + llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 14 ++ .../SelectionDAG/SelectionDAGBuilder.cpp | 30 ++- .../SelectionDAG/SelectionDAGBuilder.h | 1 + llvm/lib/CodeGen/TargetLoweringBase.cpp | 1 + llvm/lib/IR/AutoUpgrade.cpp | 2 +- llvm/lib/IR/Instruction.cpp | 1 + llvm/lib/IR/Instructions.cpp | 55 ++++-- llvm/lib/SandboxIR/Context.cpp | 1 + .../AMDGPU/AMDGPULowerBufferFatPointers.cpp | 17 ++ .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 7 +- .../StackSafetyAnalysis/extend-ptr.ll | 2 +- llvm/test/Assembler/ptrtoaddr-const.ll | 15 ++ llvm/test/Assembler/ptrtoaddr.ll | 6 + .../GlobalISel/ptrtoint-ptrtoaddr-p8.ll | 187 ++++++++++++++++++ .../AMDGPU/GlobalISel/unsupported-ptr-add.ll | 5 +- .../lower-buffer-fat-pointers-pointer-ops.ll | 47 +++++ llvm/test/CodeGen/AMDGPU/ptrmask.ll | 46 ++--- .../CodeGen/AMDGPU/ptrtoint-ptrtoaddr-p8.ll | 85 ++++++++ llvm/test/CodeGen/X86/GlobalISel/ptrtoaddr.ll | 66 +++++++ llvm/test/CodeGen/X86/ptrtoaddr.ll | 66 +++++++ .../AlignmentFromAssumptions/amdgpu-crash.ll | 2 +- .../Transforms/EarlyCSE/AMDGPU/memrealtime.ll | 2 +- .../FunctionAttrs/make-buffer-rsrc.ll | 2 +- .../regression-convergence-tokens.ll | 6 +- .../IRNormalizer/regression-infinite-loop.ll | 44 ++--- .../IRNormalizer/reordering-basic.ll | 14 +- .../Transforms/IRNormalizer/reordering.ll | 8 +- .../AMDGPU/noop-ptrint-pair.ll | 2 +- .../X86/noop-ptrint-pair.ll | 2 +- .../LoopLoadElim/pr46854-adress-spaces.ll | 2 +- .../OpenMP/attributor_pointer_offset_crash.ll | 2 +- .../OpenMP/indirect_call_kernel_info_crash.ll | 2 +- .../OpenMP/spmdization_constant_prop.ll | 2 +- .../OpenMP/spmdization_kernel_env_dep.ll | 2 +- .../OpenMP/values_in_offload_arrays.alloca.ll | 2 +- .../Bitcode/DataLayoutUpgradeTest.cpp | 51 ++--- .../Frontend/OpenMPIRBuilderTest.cpp | 6 +- .../Transforms/Utils/CodeExtractorTest.cpp | 2 +- .../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 3 +- .../Conversion/GPUToROCDL/gpu-to-rocdl.mlir | 2 +- .../omptarget-memcpy-align-metadata.mlir | 2 +- .../LLVMIR/omptarget-multi-reduction.mlir | 2 +- .../LLVMIR/omptarget-parallel-llvm.mlir | 2 +- .../LLVMIR/omptarget-parallel-wsloop.mlir | 2 +- .../Target/LLVMIR/omptarget-private-llvm.mlir | 2 +- .../omptarget-teams-distribute-reduction.mlir | 2 +- .../LLVMIR/omptarget-teams-reduction.mlir | 2 +- .../LLVMIR/omptarget-wsloop-collapsed.mlir | 2 +- mlir/test/Target/LLVMIR/omptarget-wsloop.mlir | 2 +- 67 files changed, 920 insertions(+), 192 deletions(-) create mode 100644 llvm/test/Assembler/ptrtoaddr-const.ll create mode 100644 llvm/test/Assembler/ptrtoaddr.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll create mode 100644 llvm/test/CodeGen/AMDGPU/ptrtoint-ptrtoaddr-p8.ll create mode 100644 llvm/test/CodeGen/X86/GlobalISel/ptrtoaddr.ll create mode 100644 llvm/test/CodeGen/X86/ptrtoaddr.ll diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index c368200f3f739..056a3d6579fa5 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -33,10 +33,9 @@ static const char *const DataLayoutStringR600 = static const char *const DataLayoutStringAMDGCN = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" - "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" - "32-v48:64-v96:128" - "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" - "-ni:7:8:9"; + "-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-" + "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-" + "v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"; const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { llvm::AMDGPUAS::FLAT_ADDRESS, // Default diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index 9cb00e8ee73d3..41a3f59b0fc81 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -176,12 +176,12 @@ // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SI -// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" // Test default -target-cpu // RUN: %clang_cc1 -triple amdgcn-unknown -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SIDefault -// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" // RUN: %clang_cc1 -triple arm64-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=AARCH64 diff --git a/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl b/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl index bb52f87615214..713ae48648aa4 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl @@ -1,5 +1,5 @@ // RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s -// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" void foo(void) {} diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 7296bb84b7d95..2d87211ee3538 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3147,14 +3147,21 @@ as follows: ``A
`` Specifies the address space of objects created by '``alloca``'. Defaults to the default address space of 0. -``p[n]::[:][:]`` - This specifies the *size* of a pointer and its ```` and - ````\erred alignments for address space ``n``. - The fourth parameter ```` is the size of the - index that used for address calculation, which must be less than or equal - to the pointer size. If not - specified, the default index size is equal to the pointer size. All sizes - are in bits. The address space, ``n``, is optional, and if not specified, +``p[n]::[:[:]]`` + This specifies the properties of a pointer in address space ``n``. + The ```` parameter specifies the size of the bitwise representation. + For :ref:`non-integral pointers ` the representation size may + be larger than the address width of the underlying address space (e.g. to + accommodate additional metadata). + The alignment requirements are specified via the ```` and + ````\erred alignments parameters. + The fourth parameter ```` is the size of the index that used for + address calculations such as :ref:`getelementptr `. + It must be less than or equal to the pointer size. If not specified, the + default index size is equal to the pointer size. + The index size also specifies the width of addresses in this address space. + All sizes are in bits. + The address space, ``n``, is optional, and if not specified, denotes the default address space 0. The value of ``n`` must be in the range [1,2^24). ``i:[:]`` @@ -4266,6 +4273,16 @@ address spaces defined in the :ref:`datalayout string`. the default globals address space and ``addrspace("P")`` the program address space. +The representation of pointers can be different for each address space and does +not necessarily need to be a plain integer address (e.g. for +:ref:`non-integral pointers `). In addition to a representation +bits size, pointers in each address space also have an index size which defines +the bitwidth of indexing operations as well as the size of `integer addresses` +in this address space. For example, CHERI capabilities are twice the size of the +underlying addresses to accommodate for additional metadata such as bounds and +permissions: on a 32-bit system the bitwidth of the pointer representation size +is 64, but the underlying address width remains 32 bits. + The default address space is number zero. The semantics of non-zero address spaces are target-specific. Memory @@ -12396,12 +12413,15 @@ Semantics: """""""""" The '``ptrtoint``' instruction converts ``value`` to integer type -``ty2`` by interpreting the pointer value as an integer and either -truncating or zero extending that value to the size of the integer type. +``ty2`` by interpreting the all pointer representation bits as an integer +(equivalent to a ``bitcast``) and either truncating or zero extending that value +to the size of the integer type. If ``value`` is smaller than ``ty2`` then a zero extension is done. If ``value`` is larger than ``ty2`` then a truncation is done. If they are the same size, then nothing is done (*no-op cast*) other than a type change. +The ``ptrtoint`` always :ref:`captures address and provenance ` +of the pointer argument. Example: """""""" @@ -12412,6 +12432,61 @@ Example: %Y = ptrtoint ptr %P to i64 ; yields zero extension on 32-bit architecture %Z = ptrtoint <4 x ptr> %P to <4 x i64>; yields vector zero extension for a vector of addresses on 32-bit architecture +.. _i_ptrtoaddr: + +'``ptrtoaddr .. to``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = ptrtoaddr to ; yields ty2 + +Overview: +""""""""" + +The '``ptrtoaddr``' instruction converts the pointer or a vector of +pointers ``value`` to the underlying integer address (or vector of integers) of +type ``ty2``. This is different from :ref:`ptrtoint ` in that it +only operates on the index bits of the pointer and ignores all other bits. + +Arguments: +"""""""""" + +The '``ptrtoaddr``' instruction takes a ``value`` to cast, which must be +a value of type :ref:`pointer ` or a vector of pointers, and a +type to cast it to ``ty2``, which must be an :ref:`integer ` or +a vector of integers type. + +Semantics: +"""""""""" + +The '``ptrtoaddr``' instruction converts ``value`` to integer type +``ty2`` by interpreting the lowest index-width pointer representation bits as an +integer and either truncating or zero extending that value to the size of the +integer type. +If the address of ``value`` is smaller than ``ty2`` then a zero extension is +done. If the address of ``value`` is larger than ``ty2`` then a truncation is +done. If the address size and the pointer representation size are the same and +``value`` and ``ty2`` are the same size, then nothing is done (*no-op cast*) +other than a type change. + +The ``ptrtoaddr`` always :ref:`captures the address (but not provenance) ` +of the pointer argument. + +Example: +"""""""" +This example assumes pointers in address space 1 are 64 bits in size with an +address width of 32 bits (``p1:64:64:64:32`` :ref:`datalayout string`) +.. code-block:: llvm + + %X = ptrtoaddr ptr addrspace(1) %P to i8 ; extracts low 32 bits and truncates + %Y = ptrtoaddr ptr addrspace(1) %P to i64 ; extracts low 32 bits and zero extends + %Z = ptrtoaddr <4 x ptr addrspace(1)> %P to <4 x i64>; yields vector zero extension of low 32 bits for each pointer + + .. _i_inttoptr: '``inttoptr .. to``' Instruction @@ -12456,6 +12531,9 @@ of the integer ``value``. If ``value`` is larger than the size of a pointer then a truncation is done. If ``value`` is smaller than the size of a pointer then a zero extension is done. If they are the same size, nothing is done (*no-op cast*). +The behavior is equivalent to a ``bitcast``, however, the resulting value is not +guaranteed to be dereferenceable (e.g. if the result type is a +:ref:`non-integral pointers `). Example: """""""" diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index 6857944e6875f..dd74c68bd20c1 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -110,6 +110,7 @@ typedef enum { LLVMFPTrunc = 37, LLVMFPExt = 38, LLVMPtrToInt = 39, + LLVMPtrToAddr = 69, LLVMIntToPtr = 40, LLVMBitCast = 41, LLVMAddrSpaceCast = 60, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index a440b6484e94d..d6e194fb21b88 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -732,6 +732,13 @@ class TargetTransformInfoImplBase { return 0; break; } + case Instruction::PtrToAddr: { + unsigned DstSize = Dst->getScalarSizeInBits(); + if (DL.isLegalInteger(DstSize) && + DstSize >= DL.getPointerAddressSizeInBits(Src)) + return 0; + break; + } case Instruction::PtrToInt: { unsigned DstSize = Dst->getScalarSizeInBits(); if (DL.isLegalInteger(DstSize) && @@ -1441,6 +1448,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { Op2Info, Operands, I); } case Instruction::IntToPtr: + case Instruction::PtrToAddr: case Instruction::PtrToInt: case Instruction::SIToFP: case Instruction::UIToFP: diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index c7e4bdf3ff811..13ff9e773dfbe 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -318,6 +318,7 @@ enum Kind { kw_fptoui, kw_fptosi, kw_inttoptr, + kw_ptrtoaddr, kw_ptrtoint, kw_bitcast, kw_addrspacecast, diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index b362a88963f6c..e00c9eeb2fd0e 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -456,7 +456,8 @@ enum CastOpcodes { CAST_PTRTOINT = 9, CAST_INTTOPTR = 10, CAST_BITCAST = 11, - CAST_ADDRSPACECAST = 12 + CAST_ADDRSPACECAST = 12, + CAST_PTRTOADDR = 13, }; /// UnaryOpcodes - These are values used in the bitcode files to encode which diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 6fd05c8fddd5f..41d03c9fb3ed5 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -486,6 +486,7 @@ class IRTranslator : public MachineFunctionPass { bool translatePtrToInt(const User &U, MachineIRBuilder &MIRBuilder) { return translateCast(TargetOpcode::G_PTRTOINT, U, MIRBuilder); } + bool translatePtrToAddr(const User &U, MachineIRBuilder &MIRBuilder); bool translateTrunc(const User &U, MachineIRBuilder &MIRBuilder) { return translateCast(TargetOpcode::G_TRUNC, U, MIRBuilder); } diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 2ad080e6d0cd2..09ba6b54cf721 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -324,16 +324,38 @@ class DataLayout { /// the backends/clients are updated. Align getPointerPrefAlignment(unsigned AS = 0) const; - /// Layout pointer size in bytes, rounded up to a whole - /// number of bytes. + /// The pointer representation size in bytes, rounded up to a whole number of + /// bytes. The difference between this function and getPointerAddressSize() is + /// this one returns the size of the entire pointer type (this includes + /// metadata bits for fat pointers) and the latter only returns the number of + /// address bits. + /// \sa DataLayout::getPointerAddressSizeInBits /// FIXME: The defaults need to be removed once all of /// the backends/clients are updated. unsigned getPointerSize(unsigned AS = 0) const; - // Index size in bytes used for address calculation, - /// rounded up to a whole number of bytes. + /// The index size in bytes used for address calculation, rounded up to a + /// whole number of bytes. This not only defines the size used in + /// getelementptr operations, but also the size of addresses in this \p AS. + /// For example, a 64-bit CHERI-enabled target has 128-bit pointers of which + /// only 64 are used to represent the address and the remaining ones are used + /// for metadata such as bounds and access permissions. In this case + /// getPointerSize() returns 16, but getIndexSize() returns 8. + /// To help with code understanding, the alias getPointerAddressSize() can be + /// used instead of getIndexSize() to clarify that an address width is needed. unsigned getIndexSize(unsigned AS) const; + /// The integral size of a pointer in a given address space in bytes, which + /// is defined to be the same as getIndexSize(). This exists as a separate + /// function to make it clearer when reading code that the size of an address + /// is being requested. While targets exist where index size and the + /// underlying address width are not identical (e.g. AMDGPU fat pointers with + /// 48-bit addresses and 32-bit offsets indexing), there is currently no need + /// to differentiate these properties in LLVM. + /// \sa DataLayout::getIndexSize + /// \sa DataLayout::getPointerAddressSizeInBits + unsigned getPointerAddressSize(unsigned AS) const { return getIndexSize(AS); } + /// Return the address spaces containing non-integral pointers. Pointers in /// this address space don't have a well-defined bitwise representation. SmallVector getNonIntegralAddressSpaces() const { @@ -358,29 +380,53 @@ class DataLayout { return PTy && isNonIntegralPointerType(PTy); } - /// Layout pointer size, in bits + /// The size in bits of the pointer representation in a given address space. + /// This is not necessarily the same as the integer address of a pointer (e.g. + /// for fat pointers). + /// \sa DataLayout::getPointerAddressSizeInBits() /// FIXME: The defaults need to be removed once all of /// the backends/clients are updated. unsigned getPointerSizeInBits(unsigned AS = 0) const { return getPointerSpec(AS).BitWidth; } - /// Size in bits of index used for address calculation in getelementptr. + /// The size in bits of indices used for address calculation in getelementptr + /// and for addresses in the given AS. See getIndexSize() for more + /// information. + /// \sa DataLayout::getPointerAddressSizeInBits() unsigned getIndexSizeInBits(unsigned AS) const { return getPointerSpec(AS).IndexBitWidth; } - /// Layout pointer size, in bits, based on the type. If this function is + /// The size in bits of an address in for the given AS. This is defined to + /// return the same value as getIndexSizeInBits() since there is currently no + /// target that requires these two properties to have different values. See + /// getIndexSize() for more information. + /// \sa DataLayout::getIndexSizeInBits() + unsigned getPointerAddressSizeInBits(unsigned AS) const { + return getIndexSizeInBits(AS); + } + + /// The pointer representation size in bits for this type. If this function is /// called with a pointer type, then the type size of the pointer is returned. /// If this function is called with a vector of pointers, then the type size /// of the pointer is returned. This should only be called with a pointer or /// vector of pointers. unsigned getPointerTypeSizeInBits(Type *) const; - /// Layout size of the index used in GEP calculation. + /// The size in bits of the index used in GEP calculation for this type. /// The function should be called with pointer or vector of pointers type. + /// This is defined to return the same value as getPointerAddressSizeInBits(), + /// but separate functions exist for code clarity. unsigned getIndexTypeSizeInBits(Type *Ty) const; + /// The size in bits of an address for this type. + /// This is defined to return the same value as getIndexTypeSizeInBits(), + /// but separate functions exist for code clarity. + unsigned getPointerAddressSizeInBits(Type *Ty) const { + return getIndexTypeSizeInBits(Ty); + } + unsigned getPointerTypeSize(Type *Ty) const { return getPointerTypeSizeInBits(Ty) / 8; } diff --git a/llvm/include/llvm/IR/InstVisitor.h b/llvm/include/llvm/IR/InstVisitor.h index b4eb729c7ce38..181e490b66a85 100644 --- a/llvm/include/llvm/IR/InstVisitor.h +++ b/llvm/include/llvm/IR/InstVisitor.h @@ -183,6 +183,7 @@ class InstVisitor { RetTy visitUIToFPInst(UIToFPInst &I) { DELEGATE(CastInst);} RetTy visitSIToFPInst(SIToFPInst &I) { DELEGATE(CastInst);} RetTy visitPtrToIntInst(PtrToIntInst &I) { DELEGATE(CastInst);} + RetTy visitPtrToAddrInst(PtrToAddrInst &I) { DELEGATE(PtrToIntInst);} RetTy visitIntToPtrInst(IntToPtrInst &I) { DELEGATE(CastInst);} RetTy visitBitCastInst(BitCastInst &I) { DELEGATE(CastInst);} RetTy visitAddrSpaceCastInst(AddrSpaceCastInst &I) { DELEGATE(CastInst);} diff --git a/llvm/include/llvm/IR/Instruction.def b/llvm/include/llvm/IR/Instruction.def index a5ad92f58f94e..face6a93ec7d5 100644 --- a/llvm/include/llvm/IR/Instruction.def +++ b/llvm/include/llvm/IR/Instruction.def @@ -190,35 +190,36 @@ HANDLE_CAST_INST(43, UIToFP , UIToFPInst ) // UInt -> floating point HANDLE_CAST_INST(44, SIToFP , SIToFPInst ) // SInt -> floating point HANDLE_CAST_INST(45, FPTrunc , FPTruncInst ) // Truncate floating point HANDLE_CAST_INST(46, FPExt , FPExtInst ) // Extend floating point -HANDLE_CAST_INST(47, PtrToInt, PtrToIntInst) // Pointer -> Integer -HANDLE_CAST_INST(48, IntToPtr, IntToPtrInst) // Integer -> Pointer -HANDLE_CAST_INST(49, BitCast , BitCastInst ) // Type cast -HANDLE_CAST_INST(50, AddrSpaceCast, AddrSpaceCastInst) // addrspace cast - LAST_CAST_INST(50) +HANDLE_CAST_INST(47, PtrToInt, PtrToIntInst) // Pointer -> Integer (bitcast) +HANDLE_CAST_INST(48, PtrToAddr, PtrToAddrInst) // Pointer -> Address +HANDLE_CAST_INST(49, IntToPtr, IntToPtrInst) // Integer -> Pointer +HANDLE_CAST_INST(50, BitCast , BitCastInst ) // Type cast +HANDLE_CAST_INST(51, AddrSpaceCast, AddrSpaceCastInst) // addrspace cast + LAST_CAST_INST(51) - FIRST_FUNCLETPAD_INST(51) -HANDLE_FUNCLETPAD_INST(51, CleanupPad, CleanupPadInst) -HANDLE_FUNCLETPAD_INST(52, CatchPad , CatchPadInst) - LAST_FUNCLETPAD_INST(52) + FIRST_FUNCLETPAD_INST(52) +HANDLE_FUNCLETPAD_INST(52, CleanupPad, CleanupPadInst) +HANDLE_FUNCLETPAD_INST(53, CatchPad , CatchPadInst) + LAST_FUNCLETPAD_INST(53) // Other operators... - FIRST_OTHER_INST(53) -HANDLE_OTHER_INST(53, ICmp , ICmpInst ) // Integer comparison instruction -HANDLE_OTHER_INST(54, FCmp , FCmpInst ) // Floating point comparison instr. -HANDLE_OTHER_INST(55, PHI , PHINode ) // PHI node instruction -HANDLE_OTHER_INST(56, Call , CallInst ) // Call a function -HANDLE_OTHER_INST(57, Select , SelectInst ) // select instruction -HANDLE_USER_INST (58, UserOp1, Instruction) // May be used internally in a pass -HANDLE_USER_INST (59, UserOp2, Instruction) // Internal to passes only -HANDLE_OTHER_INST(60, VAArg , VAArgInst ) // vaarg instruction -HANDLE_OTHER_INST(61, ExtractElement, ExtractElementInst)// extract from vector -HANDLE_OTHER_INST(62, InsertElement, InsertElementInst) // insert into vector -HANDLE_OTHER_INST(63, ShuffleVector, ShuffleVectorInst) // shuffle two vectors. -HANDLE_OTHER_INST(64, ExtractValue, ExtractValueInst)// extract from aggregate -HANDLE_OTHER_INST(65, InsertValue, InsertValueInst) // insert into aggregate -HANDLE_OTHER_INST(66, LandingPad, LandingPadInst) // Landing pad instruction. -HANDLE_OTHER_INST(67, Freeze, FreezeInst) // Freeze instruction. - LAST_OTHER_INST(67) + FIRST_OTHER_INST(54) +HANDLE_OTHER_INST(54, ICmp , ICmpInst ) // Integer comparison instruction +HANDLE_OTHER_INST(55, FCmp , FCmpInst ) // Floating point comparison instr. +HANDLE_OTHER_INST(56, PHI , PHINode ) // PHI node instruction +HANDLE_OTHER_INST(57, Call , CallInst ) // Call a function +HANDLE_OTHER_INST(58, Select , SelectInst ) // select instruction +HANDLE_USER_INST (59, UserOp1, Instruction) // May be used internally in a pass +HANDLE_USER_INST (60, UserOp2, Instruction) // Internal to passes only +HANDLE_OTHER_INST(61, VAArg , VAArgInst ) // vaarg instruction +HANDLE_OTHER_INST(62, ExtractElement, ExtractElementInst)// extract from vector +HANDLE_OTHER_INST(63, InsertElement, InsertElementInst) // insert into vector +HANDLE_OTHER_INST(64, ShuffleVector, ShuffleVectorInst) // shuffle two vectors. +HANDLE_OTHER_INST(65, ExtractValue, ExtractValueInst)// extract from aggregate +HANDLE_OTHER_INST(66, InsertValue, InsertValueInst) // insert into aggregate +HANDLE_OTHER_INST(67, LandingPad, LandingPadInst) // Landing pad instruction. +HANDLE_OTHER_INST(68, Freeze, FreezeInst) // Freeze instruction. + LAST_OTHER_INST(68) #undef FIRST_TERM_INST #undef HANDLE_TERM_INST diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index c164f76eb335b..81f93aa1eb77d 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -4881,6 +4881,9 @@ class PtrToIntInst : public CastInst { /// Clone an identical PtrToIntInst. PtrToIntInst *cloneImpl() const; + PtrToIntInst(unsigned Op, Value *S, Type *Ty, const Twine &NameStr, + InsertPosition InsertBefore); + public: /// Constructor with insert-before-instruction semantics PtrToIntInst(Value *S, ///< The value to be converted @@ -4904,13 +4907,43 @@ class PtrToIntInst : public CastInst { // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { - return I->getOpcode() == PtrToInt; + return I->getOpcode() == PtrToInt || I->getOpcode() == PtrToAddr; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This class represents a cast from a pointer to an address (non-capturing +/// ptrtoint). Inherits from PtrToIntInst since it is a less restrictive version +/// of ptrtoint, so treating it as ptrtoint is conservatively correct. +class PtrToAddrInst : public PtrToIntInst { +protected: + // Note: Instruction needs to be a friend here to call cloneImpl. + friend class Instruction; + + /// Clone an identical PtrToIntInst. + PtrToAddrInst *cloneImpl() const; + +public: + /// Constructor with insert-before-instruction semantics + PtrToAddrInst(Value *S, ///< The value to be converted + Type *Ty, ///< The type to convert to + const Twine &NameStr = "", ///< A name for the new instruction + InsertPosition InsertBefore = + nullptr ///< Where to insert the new instruction + ); + + // Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const Instruction *I) { + return I->getOpcode() == PtrToAddr; } static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; + //===----------------------------------------------------------------------===// // BitCastInst Class //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/SandboxIR/Instruction.h b/llvm/include/llvm/SandboxIR/Instruction.h index ce5a2cbec85bd..14ca7fb20c787 100644 --- a/llvm/include/llvm/SandboxIR/Instruction.h +++ b/llvm/include/llvm/SandboxIR/Instruction.h @@ -2264,6 +2264,7 @@ class CastInst : public UnaryInstruction { return Opcode::FPToSI; case llvm::Instruction::FPExt: return Opcode::FPExt; + case llvm::Instruction::PtrToAddr: case llvm::Instruction::PtrToInt: return Opcode::PtrToInt; case llvm::Instruction::IntToPtr: diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index ce813e1d7b1c4..ce75a75905b21 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -927,6 +927,7 @@ lltok::Kind LLLexer::LexIdentifier() { INSTKEYWORD(fptoui, FPToUI); INSTKEYWORD(fptosi, FPToSI); INSTKEYWORD(inttoptr, IntToPtr); + INSTKEYWORD(ptrtoaddr, PtrToAddr); INSTKEYWORD(ptrtoint, PtrToInt); INSTKEYWORD(bitcast, BitCast); INSTKEYWORD(addrspacecast, AddrSpaceCast); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 2dfc8254d5885..8b8ee00e3c46b 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -4275,6 +4275,7 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) { case lltok::kw_bitcast: case lltok::kw_addrspacecast: case lltok::kw_inttoptr: + // ptrtoaddr not supported in constant exprs (yet?). case lltok::kw_ptrtoint: { unsigned Opc = Lex.getUIntVal(); Type *DestTy = nullptr; @@ -7237,6 +7238,7 @@ int LLParser::parseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_fptoui: case lltok::kw_fptosi: case lltok::kw_inttoptr: + case lltok::kw_ptrtoaddr: case lltok::kw_ptrtoint: return parseCast(Inst, PFS, KeywordVal); case lltok::kw_fptrunc: diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 1e07f060d72cb..5786b650f46f4 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1278,6 +1278,7 @@ static int getDecodedCastOpcode(unsigned Val) { case bitc::CAST_SITOFP : return Instruction::SIToFP; case bitc::CAST_FPTRUNC : return Instruction::FPTrunc; case bitc::CAST_FPEXT : return Instruction::FPExt; + case bitc::CAST_PTRTOADDR: return Instruction::PtrToAddr; case bitc::CAST_PTRTOINT: return Instruction::PtrToInt; case bitc::CAST_INTTOPTR: return Instruction::IntToPtr; case bitc::CAST_BITCAST : return Instruction::BitCast; diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 158b0a669acb1..f9c6e55d0d329 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -640,6 +640,7 @@ static unsigned getEncodedCastOpcode(unsigned Opcode) { case Instruction::SIToFP : return bitc::CAST_SITOFP; case Instruction::FPTrunc : return bitc::CAST_FPTRUNC; case Instruction::FPExt : return bitc::CAST_FPEXT; + case Instruction::PtrToAddr: return bitc::CAST_PTRTOADDR; case Instruction::PtrToInt: return bitc::CAST_PTRTOINT; case Instruction::IntToPtr: return bitc::CAST_INTTOPTR; case Instruction::BitCast : return bitc::CAST_BITCAST; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 8ab2533afc15f..7bd29f1aad3f3 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1583,6 +1583,20 @@ bool IRTranslator::translateCast(unsigned Opcode, const User &U, return true; } +bool IRTranslator::translatePtrToAddr(const User &U, + MachineIRBuilder &MIRBuilder) { + Register Op = getOrCreateVReg(*U.getOperand(0)); + Type *PtrTy = U.getOperand(0)->getType(); + LLT AddrTy = getLLTForType(*DL->getIndexType(PtrTy), *DL); + auto IntPtrTy = getLLTForType(*DL->getIntPtrType(PtrTy), *DL); + auto PtrToInt = MIRBuilder.buildPtrToInt(IntPtrTy, Op); + auto Addr = PtrToInt; + if (AddrTy != IntPtrTy) + Addr = MIRBuilder.buildTrunc(AddrTy, PtrToInt.getReg(0)); + MIRBuilder.buildZExtOrTrunc(getOrCreateVReg(U), Addr.getReg(0)); + return true; +} + bool IRTranslator::translateGetElementPtr(const User &U, MachineIRBuilder &MIRBuilder) { Value &Op0 = *U.getOperand(0); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 9d138d364bad7..66b11030ce5a5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3877,6 +3877,21 @@ void SelectionDAGBuilder::visitSIToFP(const User &I) { setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); } +void SelectionDAGBuilder::visitPtrToAddr(const User &I) { + const auto &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + LLVMContext &Ctx = *DAG.getContext(); + // ptrtoaddr is equivalent to a truncate of ptrtoint to address/index width + SDValue N = getValue(I.getOperand(0)); + Type *PtrTy = I.getOperand(0)->getType(); + EVT AddrVT = EVT::getIntegerVT(Ctx, DL.getPointerAddressSizeInBits(PtrTy)); + if (auto *VTy = dyn_cast(PtrTy)) + AddrVT = EVT::getVectorVT(Ctx, AddrVT, VTy->getElementCount()); + N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), AddrVT); + N = DAG.getZExtOrTrunc(N, getCurSDLoc(), TLI.getValueType(DL, I.getType())); + setValue(&I, N); +} + void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. @@ -7966,17 +7981,26 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // On arm64_32, pointers are 32 bits when stored in memory, but // zero-extended to 64 bits when in registers. Thus the mask is 32 bits to - // match the index type, but the pointer is 64 bits, so the the mask must be + // match the index type, but the pointer is 64 bits, so the mask must be // zero-extended up to 64 bits to match the pointer. EVT PtrVT = TLI.getValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); assert(PtrVT == Ptr.getValueType()); - assert(MemVT == Mask.getValueType()); - if (MemVT != PtrVT) + if (Mask.getValueType().getFixedSizeInBits() < MemVT.getFixedSizeInBits()) { + // For AMDGPU buffer descriptors the mask is 48 bits, but the pointer is + // 128-bit, so we have to pad the mask with ones for unused bits. + auto HighOnes = + DAG.getNode(ISD::SHL, sdl, PtrVT, DAG.getAllOnesConstant(sdl, PtrVT), + DAG.getConstant(Mask.getValueType().getFixedSizeInBits(), + sdl, PtrVT)); + Mask = DAG.getNode(ISD::OR, sdl, PtrVT, + DAG.getZExtOrTrunc(Mask, sdl, PtrVT), HighOnes); + } else if (Mask.getValueType() != PtrVT) Mask = DAG.getPtrExtOrTrunc(Mask, sdl, PtrVT); + assert(Mask.getValueType() == PtrVT); setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr, Mask)); return; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 35c15bc269d4b..108c0141495e3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -575,6 +575,7 @@ class SelectionDAGBuilder { void visitFPToSI(const User &I); void visitUIToFP(const User &I); void visitSIToFP(const User &I); + void visitPtrToAddr(const User &I); void visitPtrToInt(const User &I); void visitIntToPtr(const User &I); void visitBitCast(const User &I); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index c85f0c71ef25f..3d937c4127b82 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1835,6 +1835,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { case SIToFP: return ISD::SINT_TO_FP; case FPTrunc: return ISD::FP_ROUND; case FPExt: return ISD::FP_EXTEND; + case PtrToAddr: return ISD::BITCAST; case PtrToInt: return ISD::BITCAST; case IntToPtr: return ISD::BITCAST; case BitCast: return ISD::BITCAST; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 8717cd092b0b5..eb7c2a6c8f125 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -5773,7 +5773,7 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { if (!DL.contains("-p7") && !DL.starts_with("p7")) Res.append("-p7:160:256:256:32"); if (!DL.contains("-p8") && !DL.starts_with("p8")) - Res.append("-p8:128:128"); + Res.append("-p8:128:128:128:48"); if (!DL.contains("-p9") && !DL.starts_with("p9")) Res.append("-p9:192:256:256:32"); diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 54e5e6d53e791..27392e4ba6122 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -818,6 +818,7 @@ const char *Instruction::getOpcodeName(unsigned OpCode) { case UIToFP: return "uitofp"; case SIToFP: return "sitofp"; case IntToPtr: return "inttoptr"; + case PtrToAddr: return "ptrtoaddr"; case PtrToInt: return "ptrtoint"; case BitCast: return "bitcast"; case AddrSpaceCast: return "addrspacecast"; diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index f404e11b9c0f0..11cd0799645cc 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -2856,24 +2856,25 @@ unsigned CastInst::isEliminableCastPair( const unsigned numCastOps = Instruction::CastOpsEnd - Instruction::CastOpsBegin; static const uint8_t CastResults[numCastOps][numCastOps] = { - // T F F U S F F P I B A -+ - // R Z S P P I I T P 2 N T S | - // U E E 2 2 2 2 R E I T C C +- secondOp - // N X X U S F F N X N 2 V V | - // C T T I I P P C T T P T T -+ - { 1, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // Trunc -+ - { 8, 1, 9,99,99, 2,17,99,99,99, 2, 3, 0}, // ZExt | - { 8, 0, 1,99,99, 0, 2,99,99,99, 0, 3, 0}, // SExt | - { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // FPToUI | - { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // FPToSI | - { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4, 0}, // UIToFP +- firstOp - { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4, 0}, // SIToFP | - { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4, 0}, // FPTrunc | - { 99,99,99, 2, 2,99,99, 8, 2,99,99, 4, 0}, // FPExt | - { 1, 0, 0,99,99, 0, 0,99,99,99, 7, 3, 0}, // PtrToInt | - { 99,99,99,99,99,99,99,99,99,11,99,15, 0}, // IntToPtr | - { 5, 5, 5, 0, 0, 5, 5, 0, 0,16, 5, 1,14}, // BitCast | - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,13,12}, // AddrSpaceCast -+ + // T F F U S F F P P I B A -+ + // R Z S P P I I T P 2 2 N T S | + // U E E 2 2 2 2 R E I A T C C +- secondOp + // N X X U S F F N X N D 2 V V | + // C T T I I P P C T T R P T T -+ + { 1, 0, 0,99,99, 0, 0,99,99,99,99, 0, 3, 0}, // Trunc -+ + { 8, 1, 9,99,99, 2,17,99,99,99,99, 2, 3, 0}, // ZExt | + { 8, 0, 1,99,99, 0, 2,99,99,99,99, 0, 3, 0}, // SExt | + { 0, 0, 0,99,99, 0, 0,99,99,99,99, 0, 3, 0}, // FPToUI | + { 0, 0, 0,99,99, 0, 0,99,99,99,99, 0, 3, 0}, // FPToSI | + { 99,99,99, 0, 0,99,99, 0, 0,99,99,99, 4, 0}, // UIToFP +- firstOp + { 99,99,99, 0, 0,99,99, 0, 0,99,99,99, 4, 0}, // SIToFP | + { 99,99,99, 0, 0,99,99, 0, 0,99,99,99, 4, 0}, // FPTrunc | + { 99,99,99, 2, 2,99,99, 8, 2,99,99,99, 4, 0}, // FPExt | + { 1, 0, 0,99,99, 0, 0,99,99,99,99, 7, 3, 0}, // PtrToInt | + { 1, 0, 0,99,99, 0, 0,99,99,99,99,99, 3, 0}, // PtrToAddr | + { 99,99,99,99,99,99,99,99,99,11,99,99,15, 0}, // IntToPtr | + { 5, 5, 5, 0, 0, 5, 5, 0, 0,16,16, 5, 1,14}, // BitCast | + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,13,12}, // AddrSpaceCast -+ }; // TODO: This logic could be encoded into the table above and handled in the @@ -3046,6 +3047,7 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty, case SIToFP: return new SIToFPInst (S, Ty, Name, InsertBefore); case FPToUI: return new FPToUIInst (S, Ty, Name, InsertBefore); case FPToSI: return new FPToSIInst (S, Ty, Name, InsertBefore); + case PtrToAddr: return new PtrToAddrInst (S, Ty, Name, InsertBefore); case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertBefore); case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertBefore); case BitCast: @@ -3347,6 +3349,7 @@ CastInst::castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy) { case Instruction::FPToSI: return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy() && SrcEC == DstEC; + case Instruction::PtrToAddr: case Instruction::PtrToInt: if (SrcEC != DstEC) return false; @@ -3454,12 +3457,20 @@ FPToSIInst::FPToSIInst(Value *S, Type *Ty, const Twine &Name, assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI"); } -PtrToIntInst::PtrToIntInst(Value *S, Type *Ty, const Twine &Name, +PtrToIntInst::PtrToIntInst(unsigned Op, Value *S, Type *Ty, const Twine &Name, InsertPosition InsertBefore) - : CastInst(Ty, PtrToInt, S, Name, InsertBefore) { + : CastInst(Ty, Op, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt"); } +PtrToIntInst::PtrToIntInst(Value *S, Type *Ty, const Twine &Name, + InsertPosition InsertBefore) + : PtrToIntInst(PtrToInt, S, Ty, Name, InsertBefore) {} + +PtrToAddrInst::PtrToAddrInst(Value *S, Type *Ty, const Twine &Name, + InsertPosition InsertBefore) + : PtrToIntInst(PtrToAddr, S, Ty, Name, InsertBefore) {} + IntToPtrInst::IntToPtrInst(Value *S, Type *Ty, const Twine &Name, InsertPosition InsertBefore) : CastInst(Ty, IntToPtr, S, Name, InsertBefore) { @@ -4427,6 +4438,10 @@ PtrToIntInst *PtrToIntInst::cloneImpl() const { return new PtrToIntInst(getOperand(0), getType()); } +PtrToAddrInst *PtrToAddrInst::cloneImpl() const { + return new PtrToAddrInst(getOperand(0), getType()); +} + IntToPtrInst *IntToPtrInst::cloneImpl() const { return new IntToPtrInst(getOperand(0), getType()); } diff --git a/llvm/lib/SandboxIR/Context.cpp b/llvm/lib/SandboxIR/Context.cpp index fe67f9ef73fb6..9e6b9981e0c57 100644 --- a/llvm/lib/SandboxIR/Context.cpp +++ b/llvm/lib/SandboxIR/Context.cpp @@ -256,6 +256,7 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) { case llvm::Instruction::FPToUI: case llvm::Instruction::FPToSI: case llvm::Instruction::FPExt: + case llvm::Instruction::PtrToAddr: case llvm::Instruction::PtrToInt: case llvm::Instruction::IntToPtr: case llvm::Instruction::SIToFP: diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp index eb768ed9ad5a1..e5c8df0b162d8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp @@ -1361,6 +1361,7 @@ class SplitPtrStructs : public InstVisitor { PtrParts visitAtomicCmpXchgInst(AtomicCmpXchgInst &AI); PtrParts visitGetElementPtrInst(GetElementPtrInst &GEP); + PtrParts visitPtrToAddrInst(PtrToAddrInst &PA); PtrParts visitPtrToIntInst(PtrToIntInst &PI); PtrParts visitIntToPtrInst(IntToPtrInst &IP); PtrParts visitAddrSpaceCastInst(AddrSpaceCastInst &I); @@ -1952,6 +1953,22 @@ PtrParts SplitPtrStructs::visitPtrToIntInst(PtrToIntInst &PI) { return {nullptr, nullptr}; } +PtrParts SplitPtrStructs::visitPtrToAddrInst(PtrToAddrInst &PA) { + Value *Ptr = PA.getPointerOperand(); + if (!isSplitFatPtr(Ptr->getType())) + return {nullptr, nullptr}; + IRB.SetInsertPoint(&PA); + + auto [Rsrc, Off] = getPtrParts(Ptr); + Value *Res = IRB.CreateIntCast(Off, PA.getType(), /*isSigned=*/false, + PA.getName() + ".off"); + copyMetadata(Res, &PA); + Res->takeName(&PA); + SplitUsers.insert(&PA); + PA.replaceAllUsesWith(Res); + return {nullptr, nullptr}; +} + PtrParts SplitPtrStructs::visitIntToPtrInst(IntToPtrInst &IP) { if (!isSplitFatPtr(IP.getType())) return {nullptr, nullptr}; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index c22b27abdbf6c..390fd0bde1166 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -687,10 +687,9 @@ static StringRef computeDataLayout(const Triple &TT) { // space 8) which cannot be non-trivilally accessed by LLVM memory operations // like getelementptr. return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" - "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-" - "v32:32-v48:64-v96:" - "128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-" - "G1-ni:7:8:9"; + "-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-" + "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-" + "v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"; } LLVM_READNONE diff --git a/llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll b/llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll index 2bfe32c654ff1..39f8ffedc219d 100644 --- a/llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll +++ b/llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll @@ -8,7 +8,7 @@ ; CHECK-NEXT: x[]: full-set ; CHECK-NEXT: allocas uses: -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" define void @a(ptr addrspace(5) %x) { entry: diff --git a/llvm/test/Assembler/ptrtoaddr-const.ll b/llvm/test/Assembler/ptrtoaddr-const.ll new file mode 100644 index 0000000000000..d57c23446c515 --- /dev/null +++ b/llvm/test/Assembler/ptrtoaddr-const.ll @@ -0,0 +1,15 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +target datalayout = "p1:64:64:64:32" + +define i32 @test_as0(ptr %p) { + %addr = ptrtoaddr ptr %p to i32 + ; CHECK: %addr = ptrtoaddr ptr %p to i32 + ret i32 %addr +} + +define i16 @test_as1(ptr addrspace(1) %p) { + %addr = ptrtoaddr ptr addrspace(1) %p to i16 + ; CHECK: %addr = ptrtoaddr ptr addrspace(1) %p to i16 + ret i16 %addr +} diff --git a/llvm/test/Assembler/ptrtoaddr.ll b/llvm/test/Assembler/ptrtoaddr.ll new file mode 100644 index 0000000000000..b274fd29beaec --- /dev/null +++ b/llvm/test/Assembler/ptrtoaddr.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; ptrtoaddr is not currently support in constant expressions + +@i = global i32 0 +@global_cast = global i32 ptrtoaddr (ptr @i0 to i32) +; CHECK: [[#@LINE-1]]:27: error: expected value token diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll new file mode 100644 index 0000000000000..6722a55e8da92 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ptrtoint-ptrtoaddr-p8.ll @@ -0,0 +1,187 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,GISEL +; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,SDAG +;; Check that we can lower ptrtoaddr differently from ptrtoint. +;; Includes an ignored argument so the registers actually need to be written + +define i128 @ptrtoint(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) { +; GISEL-LABEL: ptrtoint: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, v4 +; GISEL-NEXT: v_mov_b32_e32 v1, v5 +; GISEL-NEXT: v_mov_b32_e32 v2, v6 +; GISEL-NEXT: v_mov_b32_e32 v3, v7 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-LABEL: ptrtoint: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v3, v7 +; SDAG-NEXT: v_mov_b32_e32 v2, v6 +; SDAG-NEXT: v_mov_b32_e32 v1, v5 +; SDAG-NEXT: v_mov_b32_e32 v0, v4 +; SDAG-NEXT: s_setpc_b64 s[30:31] + %ret = ptrtoint ptr addrspace(8) %ptr to i128 + ret i128 %ret +} + +define i48 @ptrtoaddr(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) { +; GISEL-LABEL: ptrtoaddr: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, v4 +; GISEL-NEXT: v_mov_b32_e32 v1, v5 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-LABEL: ptrtoaddr: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v1, v5 +; SDAG-NEXT: v_mov_b32_e32 v0, v4 +; SDAG-NEXT: s_setpc_b64 s[30:31] + %ret = ptrtoaddr ptr addrspace(8) %ptr to i48 + ret i48 %ret +} + +define <2 x i128> @ptrtoint_vec(ptr addrspace(8) %ignored, <2 x ptr addrspace(8)> %ptr) { +; GISEL-LABEL: ptrtoint_vec: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, v4 +; GISEL-NEXT: v_mov_b32_e32 v1, v5 +; GISEL-NEXT: v_mov_b32_e32 v2, v6 +; GISEL-NEXT: v_mov_b32_e32 v3, v7 +; GISEL-NEXT: v_mov_b32_e32 v4, v8 +; GISEL-NEXT: v_mov_b32_e32 v5, v9 +; GISEL-NEXT: v_mov_b32_e32 v6, v10 +; GISEL-NEXT: v_mov_b32_e32 v7, v11 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-LABEL: ptrtoint_vec: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v3, v7 +; SDAG-NEXT: v_mov_b32_e32 v2, v6 +; SDAG-NEXT: v_mov_b32_e32 v1, v5 +; SDAG-NEXT: v_mov_b32_e32 v0, v4 +; SDAG-NEXT: v_mov_b32_e32 v4, v8 +; SDAG-NEXT: v_mov_b32_e32 v5, v9 +; SDAG-NEXT: v_mov_b32_e32 v6, v10 +; SDAG-NEXT: v_mov_b32_e32 v7, v11 +; SDAG-NEXT: s_setpc_b64 s[30:31] + %ret = ptrtoint <2 x ptr addrspace(8)> %ptr to <2 x i128> + ret <2 x i128> %ret +} + +define <2 x i64> @ptrtoaddr_vec(ptr addrspace(8) %ignored, <2 x ptr addrspace(8)> %ptr) { +; GISEL-LABEL: ptrtoaddr_vec: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, v4 +; GISEL-NEXT: v_mov_b32_e32 v1, v5 +; GISEL-NEXT: v_mov_b32_e32 v2, v8 +; GISEL-NEXT: v_mov_b32_e32 v3, v9 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-LABEL: ptrtoaddr_vec: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v2, v8 +; SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v5 +; SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v9 +; SDAG-NEXT: v_mov_b32_e32 v0, v4 +; SDAG-NEXT: s_setpc_b64 s[30:31] + %ret = ptrtoaddr <2 x ptr addrspace(8)> %ptr to <2 x i64> + ret <2 x i64> %ret +} + +define i256 @ptrtoint_ext(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) { +; GISEL-LABEL: ptrtoint_ext: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, v4 +; GISEL-NEXT: v_mov_b32_e32 v1, v5 +; GISEL-NEXT: v_mov_b32_e32 v2, v6 +; GISEL-NEXT: v_mov_b32_e32 v3, v7 +; GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GISEL-NEXT: v_mov_b32_e32 v5, 0 +; GISEL-NEXT: v_mov_b32_e32 v6, 0 +; GISEL-NEXT: v_mov_b32_e32 v7, 0 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-LABEL: ptrtoint_ext: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v3, v7 +; SDAG-NEXT: v_mov_b32_e32 v2, v6 +; SDAG-NEXT: v_mov_b32_e32 v1, v5 +; SDAG-NEXT: v_mov_b32_e32 v0, v4 +; SDAG-NEXT: v_mov_b32_e32 v4, 0 +; SDAG-NEXT: v_mov_b32_e32 v5, 0 +; SDAG-NEXT: v_mov_b32_e32 v6, 0 +; SDAG-NEXT: v_mov_b32_e32 v7, 0 +; SDAG-NEXT: s_setpc_b64 s[30:31] + %ret = ptrtoint ptr addrspace(8) %ptr to i256 + ret i256 %ret +} + +;; FIXME: this is wrong for the GlobalISel case, we are removing the trunc: +;; https://github.com/llvm/llvm-project/issues/139598 +define i256 @ptrtoaddr_ext(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) { +; GISEL-LABEL: ptrtoaddr_ext: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, v4 +; GISEL-NEXT: v_mov_b32_e32 v1, v5 +; GISEL-NEXT: v_mov_b32_e32 v2, v6 +; GISEL-NEXT: v_mov_b32_e32 v3, v7 +; GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GISEL-NEXT: v_mov_b32_e32 v5, 0 +; GISEL-NEXT: v_mov_b32_e32 v6, 0 +; GISEL-NEXT: v_mov_b32_e32 v7, 0 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-LABEL: ptrtoaddr_ext: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v0, v4 +; SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v5 +; SDAG-NEXT: v_mov_b32_e32 v2, 0 +; SDAG-NEXT: v_mov_b32_e32 v3, 0 +; SDAG-NEXT: v_mov_b32_e32 v4, 0 +; SDAG-NEXT: v_mov_b32_e32 v5, 0 +; SDAG-NEXT: v_mov_b32_e32 v6, 0 +; SDAG-NEXT: v_mov_b32_e32 v7, 0 +; SDAG-NEXT: s_setpc_b64 s[30:31] + %ret = ptrtoaddr ptr addrspace(8) %ptr to i256 + ret i256 %ret +} + +define i64 @ptrtoint_trunc(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) { +; GISEL-LABEL: ptrtoint_trunc: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v0, v4 +; GISEL-NEXT: v_mov_b32_e32 v1, v5 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-LABEL: ptrtoint_trunc: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v1, v5 +; SDAG-NEXT: v_mov_b32_e32 v0, v4 +; SDAG-NEXT: s_setpc_b64 s[30:31] + %ret = ptrtoint ptr addrspace(8) %ptr to i64 + ret i64 %ret +} + +define i16 @ptrtoaddr_trunc(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) { +; CHECK-LABEL: ptrtoaddr_trunc: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, v4 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %ret = ptrtoaddr ptr addrspace(8) %ptr to i16 + ret i16 %ret +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll index 82a15f7497f52..316fe65361f47 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll @@ -1,14 +1,13 @@ ; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - < %s 2>&1 | FileCheck -check-prefix=GISEL-ERR %s -; GISEL-ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(p8) = G_PTR_ADD %{{[0-9]+}}:_, %{{[0-9]+}}:_(s128) +; GISEL-ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(p8) = G_PTR_ADD %{{[0-9]+}}:_, %{{[0-9]+}}:_(s48) define float @gep_on_rsrc(ptr addrspace(8) %rsrc) { body: - %next = getelementptr float, ptr addrspace(8) %rsrc, i128 1 + %next = getelementptr float, ptr addrspace(8) %rsrc, i48 1 %res = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %next, i32 0, i32 0, i32 0) ret float %res } declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32 immarg) - diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll index fd5bf579cdeb2..538145a11c733 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll @@ -255,6 +255,53 @@ define i32 @ptrtoint_offset(ptr addrspace(7) %ptr) { ret i32 %ret } +define i32 @ptrtoaddr(ptr addrspace(7) %ptr) { +; CHECK-LABEL: define i32 @ptrtoaddr +; CHECK-SAME: ({ ptr addrspace(8), i32 } [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[PTR_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 0 +; CHECK-NEXT: [[RET:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 1 +; CHECK-NEXT: ret i32 [[RET]] +; + %ret = ptrtoaddr ptr addrspace(7) %ptr to i32 + ret i32 %ret +} + +define <2 x i32> @ptrtoaddr_vec(<2 x ptr addrspace(7)> %ptr) { +; CHECK-LABEL: define <2 x i32> @ptrtoaddr_vec +; CHECK-SAME: ({ <2 x ptr addrspace(8)>, <2 x i32> } [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[PTR_RSRC:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[PTR]], 0 +; CHECK-NEXT: [[RET:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[PTR]], 1 +; CHECK-NEXT: ret <2 x i32> [[RET]] +; + %ret = ptrtoaddr <2 x ptr addrspace(7)> %ptr to <2 x i32> + ret <2 x i32> %ret +} + +;; Check that we extend the offset to i160 instead of reinterpreting all bits. +define i160 @ptrtoaddr_ext(ptr addrspace(7) %ptr) { +; CHECK-LABEL: define i160 @ptrtoaddr_ext +; CHECK-SAME: ({ ptr addrspace(8), i32 } [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[PTR_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 0 +; CHECK-NEXT: [[PTR_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 1 +; CHECK-NEXT: [[RET:%.*]] = zext i32 [[PTR_OFF]] to i160 +; CHECK-NEXT: ret i160 [[RET]] +; + %ret = ptrtoaddr ptr addrspace(7) %ptr to i160 + ret i160 %ret +} + +define i16 @ptrtoaddr_trunc(ptr addrspace(7) %ptr) { +; CHECK-LABEL: define i16 @ptrtoaddr_trunc +; CHECK-SAME: ({ ptr addrspace(8), i32 } [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[PTR_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 0 +; CHECK-NEXT: [[PTR_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 1 +; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[PTR_OFF]] to i16 +; CHECK-NEXT: ret i16 [[RET]] +; + %ret = ptrtoaddr ptr addrspace(7) %ptr to i16 + ret i16 %ret +} + define ptr addrspace(7) @inttoptr(i160 %v) { ; CHECK-LABEL: define { ptr addrspace(8), i32 } @inttoptr ; CHECK-SAME: (i160 [[V:%.*]]) #[[ATTR0]] { diff --git a/llvm/test/CodeGen/AMDGPU/ptrmask.ll b/llvm/test/CodeGen/AMDGPU/ptrmask.ll index 9ad9c80d82ff3..17c4eaa2736f4 100644 --- a/llvm/test/CodeGen/AMDGPU/ptrmask.ll +++ b/llvm/test/CodeGen/AMDGPU/ptrmask.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GCN %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s @@ -145,64 +145,64 @@ define amdgpu_ps ptr addrspace(7) @s_ptrmask_buffer_fat_ptr_i32_neg8(ptr addrspa ret ptr addrspace(7) %masked } -define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i128(ptr addrspace(8) %ptr, i128 %mask) { -; GCN-LABEL: v_ptrmask_buffer_resource_variable_i128: +define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i48(ptr addrspace(8) %ptr, i48 %mask) { +; GCN-LABEL: v_ptrmask_buffer_resource_variable_i48: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_or_b32_e32 v5, 0xffff0000, v5 ; GCN-NEXT: v_and_b32_e32 v1, v1, v5 ; GCN-NEXT: v_and_b32_e32 v0, v0, v4 -; GCN-NEXT: v_and_b32_e32 v3, v3, v7 -; GCN-NEXT: v_and_b32_e32 v2, v2, v6 ; GCN-NEXT: s_setpc_b64 s[30:31] ; -; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i128: +; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i48: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10PLUS-NEXT: v_or_b32_e32 v5, 0xffff0000, v5 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v4 ; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v5 -; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v6 -; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v7 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] - %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 %mask) + %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i48(ptr addrspace(8) %ptr, i48 %mask) ret ptr addrspace(8) %masked } -define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i128_neg8(ptr addrspace(8) %ptr) { -; GCN-LABEL: v_ptrmask_buffer_resource_variable_i128_neg8: +define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i48_neg8(ptr addrspace(8) %ptr) { +; GCN-LABEL: v_ptrmask_buffer_resource_variable_i48_neg8: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_and_b32_e32 v0, -8, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] ; -; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i128_neg8: +; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i48_neg8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_and_b32_e32 v0, -8, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] - %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 -8) + %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i48(ptr addrspace(8) %ptr, i48 -8) ret ptr addrspace(8) %masked } -define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i128(ptr addrspace(8) inreg %ptr, i128 inreg %mask) { -; GCN-LABEL: s_ptrmask_buffer_resource_variable_i128: +define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i48(ptr addrspace(8) inreg %ptr, i48 inreg %mask) { +; GCN-LABEL: s_ptrmask_buffer_resource_variable_i48: ; GCN: ; %bb.0: -; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] +; GCN-NEXT: s_or_b32 s7, s7, 0xffff0000 ; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7] ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: ; return to shader part epilog ; -; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i128: +; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i48: ; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_or_b32 s7, s7, 0xffff0000 ; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7] -; GFX10PLUS-NEXT: s_and_b64 s[2:3], s[4:5], s[8:9] +; GFX10PLUS-NEXT: s_mov_b32 s2, s4 +; GFX10PLUS-NEXT: s_mov_b32 s3, s5 ; GFX10PLUS-NEXT: ; return to shader part epilog - %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 %mask) + %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i48(ptr addrspace(8) %ptr, i48 %mask) ret ptr addrspace(8) %masked } -define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i128_neg8(ptr addrspace(8) inreg %ptr) { -; GCN-LABEL: s_ptrmask_buffer_resource_variable_i128_neg8: +define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i48_neg8(ptr addrspace(8) inreg %ptr) { +; GCN-LABEL: s_ptrmask_buffer_resource_variable_i48_neg8: ; GCN: ; %bb.0: ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_and_b32 s0, s2, -8 @@ -210,14 +210,14 @@ define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i128_neg8( ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: ; return to shader part epilog ; -; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i128_neg8: +; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i48_neg8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_mov_b32 s1, s3 ; GFX10PLUS-NEXT: s_and_b32 s0, s2, -8 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5 ; GFX10PLUS-NEXT: ; return to shader part epilog - %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 -8) + %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i48(ptr addrspace(8) %ptr, i48 -8) ret ptr addrspace(8) %masked } diff --git a/llvm/test/CodeGen/AMDGPU/ptrtoint-ptrtoaddr-p8.ll b/llvm/test/CodeGen/AMDGPU/ptrtoint-ptrtoaddr-p8.ll new file mode 100644 index 0000000000000..da4b531ab5b25 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/ptrtoint-ptrtoaddr-p8.ll @@ -0,0 +1,85 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck %s + +define i128 @ptrtoint(ptr addrspace(8) %ptr) { +; CHECK-LABEL: ptrtoint: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %ret = ptrtoint ptr addrspace(8) %ptr to i128 + ret i128 %ret +} + +define i48 @ptrtoaddr(ptr addrspace(8) %ptr) { +; CHECK-LABEL: ptrtoaddr: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %ret = ptrtoaddr ptr addrspace(8) %ptr to i48 + ret i48 %ret +} + +define <2 x i128> @ptrtoint_vec(<2 x ptr addrspace(8)> %ptr) { +; CHECK-LABEL: ptrtoint_vec: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %ret = ptrtoint <2 x ptr addrspace(8)> %ptr to <2 x i128> + ret <2 x i128> %ret +} + +define <2 x i64> @ptrtoaddr_vec(<2 x ptr addrspace(8)> %ptr) { +; CHECK-LABEL: ptrtoaddr_vec: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v2, v4 +; CHECK-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; CHECK-NEXT: v_and_b32_e32 v3, 0xffff, v5 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %ret = ptrtoaddr <2 x ptr addrspace(8)> %ptr to <2 x i64> + ret <2 x i64> %ret +} + +define i256 @ptrtoint_ext(ptr addrspace(8) %ptr) { +; CHECK-LABEL: ptrtoint_ext: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v4, 0 +; CHECK-NEXT: v_mov_b32_e32 v5, 0 +; CHECK-NEXT: v_mov_b32_e32 v6, 0 +; CHECK-NEXT: v_mov_b32_e32 v7, 0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %ret = ptrtoint ptr addrspace(8) %ptr to i256 + ret i256 %ret +} + +;; Check that we extend the offset to i128 instead of reinterpreting all bits. +define i128 @ptrtoaddr_ext(ptr addrspace(8) %ptr) { +; CHECK-LABEL: ptrtoaddr_ext: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: v_mov_b32_e32 v3, 0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %ret = ptrtoaddr ptr addrspace(8) %ptr to i128 + ret i128 %ret +} + +define i64 @ptrtoint_trunc(ptr addrspace(8) %ptr) { +; CHECK-LABEL: ptrtoint_trunc: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %ret = ptrtoint ptr addrspace(8) %ptr to i64 + ret i64 %ret +} + +define i16 @ptrtoaddr_trunc(ptr addrspace(8) %ptr) { +; CHECK-LABEL: ptrtoaddr_trunc: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %ret = ptrtoaddr ptr addrspace(8) %ptr to i16 + ret i16 %ret +} diff --git a/llvm/test/CodeGen/X86/GlobalISel/ptrtoaddr.ll b/llvm/test/CodeGen/X86/GlobalISel/ptrtoaddr.ll new file mode 100644 index 0000000000000..1430b5c8e56c6 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/ptrtoaddr.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=CHECK + +define i1 @ptrtoaddr_1(ptr %p) { +; CHECK-LABEL: ptrtoaddr_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: xorb $1, %al +; CHECK-NEXT: # kill: def $al killed $al killed $rax +; CHECK-NEXT: retq +entry: + %addr = ptrtoaddr ptr %p to i1 + %ret = xor i1 %addr, 1 + ret i1 %ret +} + +define i8 @ptrtoaddr_8(ptr %p) { +; CHECK-LABEL: ptrtoaddr_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: notb %al +; CHECK-NEXT: # kill: def $al killed $al killed $rax +; CHECK-NEXT: retq +entry: + %addr = ptrtoaddr ptr %p to i8 + %ret = xor i8 %addr, -1 + ret i8 %ret +} + +define i16 @ptrtoaddr_16(ptr %p) { +; CHECK-LABEL: ptrtoaddr_16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: notw %ax +; CHECK-NEXT: # kill: def $ax killed $ax killed $rax +; CHECK-NEXT: retq +entry: + %addr = ptrtoaddr ptr %p to i16 + %ret = xor i16 %addr, -1 + ret i16 %ret +} + +define i32 @ptrtoaddr_s32_p0(ptr %p) { +; CHECK-LABEL: ptrtoaddr_s32_p0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: notl %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq +entry: + %addr = ptrtoaddr ptr %p to i32 + %ret = xor i32 %addr, -1 + ret i32 %ret +} + +define i64 @ptrtoaddr_s64_p0(ptr %p) { +; CHECK-LABEL: ptrtoaddr_s64_p0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: notq %rax +; CHECK-NEXT: retq +entry: + %addr = ptrtoaddr ptr %p to i64 + %ret = xor i64 %addr, -1 + ret i64 %ret +} diff --git a/llvm/test/CodeGen/X86/ptrtoaddr.ll b/llvm/test/CodeGen/X86/ptrtoaddr.ll new file mode 100644 index 0000000000000..80c8548e4d569 --- /dev/null +++ b/llvm/test/CodeGen/X86/ptrtoaddr.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=CHECK + +define i1 @ptrtoaddr_1(ptr %p) { +; CHECK-LABEL: ptrtoaddr_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: xorb $1, %al +; CHECK-NEXT: # kill: def $al killed $al killed $rax +; CHECK-NEXT: retq +entry: + %addr = ptrtoaddr ptr %p to i1 + %ret = xor i1 %addr, 1 + ret i1 %ret +} + +define i8 @ptrtoaddr_8(ptr %p) { +; CHECK-LABEL: ptrtoaddr_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: notb %al +; CHECK-NEXT: # kill: def $al killed $al killed $rax +; CHECK-NEXT: retq +entry: + %addr = ptrtoaddr ptr %p to i8 + %ret = xor i8 %addr, -1 + ret i8 %ret +} + +define i16 @ptrtoaddr_16(ptr %p) { +; CHECK-LABEL: ptrtoaddr_16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: notl %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $rax +; CHECK-NEXT: retq +entry: + %addr = ptrtoaddr ptr %p to i16 + %ret = xor i16 %addr, -1 + ret i16 %ret +} + +define i32 @ptrtoaddr_s32_p0(ptr %p) { +; CHECK-LABEL: ptrtoaddr_s32_p0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: notl %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq +entry: + %addr = ptrtoaddr ptr %p to i32 + %ret = xor i32 %addr, -1 + ret i32 %ret +} + +define i64 @ptrtoaddr_s64_p0(ptr %p) { +; CHECK-LABEL: ptrtoaddr_s64_p0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: notq %rax +; CHECK-NEXT: retq +entry: + %addr = ptrtoaddr ptr %p to i64 + %ret = xor i64 %addr, -1 + ret i64 %ret +} diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll index 2c5286edbfe00..6cef895aaf42c 100644 --- a/llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll +++ b/llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll @@ -1,7 +1,7 @@ ; Test that we don't crash. ; RUN: opt < %s -passes=alignment-from-assumptions -S -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" %"core::str::CharIndices.29.66.90.114.138.149.165.173.181.197.205.213.229.387.398" = type { [0 x i64], i64, [0 x i64], { ptr, ptr }, [0 x i64] } %"unwind::libunwind::_Unwind_Exception.9.51.75.99.123.147.163.171.179.195.203.211.227.385.396" = type { [0 x i64], i64, [0 x i64], ptr, [0 x i64], [6 x i64], [0 x i64] } diff --git a/llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll b/llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll index 6d313a6cb417d..e603ee980ef38 100644 --- a/llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll +++ b/llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes='early-cse' -earlycse-debug-hash < %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" ; CHECK-LABEL: @memrealtime( ; CHECK: call i64 @llvm.amdgcn.s.memrealtime() diff --git a/llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll b/llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll index 9ef153183cc9e..f09a51c48a52f 100644 --- a/llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll +++ b/llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll @@ -3,7 +3,7 @@ ; RUN: opt -passes=attributor-light -S < %s | FileCheck --check-prefixes=COMMON,ATTRIBUTOR %s ;; target triple = "amdgcn-amd-amdhsa" -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" define amdgpu_kernel void @test_make_buffer_rsrc(ptr %p, ptr %q) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) diff --git a/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll b/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll index 88eff971b9576..0c2db4a2862b2 100644 --- a/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll +++ b/llvm/test/Transforms/IRNormalizer/regression-convergence-tokens.ll @@ -7,9 +7,9 @@ define i32 @nested(i32 %src) #0 { ; CHECK-SAME: i32 [[A0:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[BB15160:.*:]] ; CHECK-NEXT: [[T1:%.*]] = call token @llvm.experimental.convergence.entry() -; CHECK-NEXT: %"vl77672llvm.experimental.convergence.anchor()" = call token @llvm.experimental.convergence.anchor() -; CHECK-NEXT: %"op68297(vl77672)" = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[A0]]) [ "convergencectrl"(token %"vl77672llvm.experimental.convergence.anchor()") ] -; CHECK-NEXT: ret i32 %"op68297(vl77672)" +; CHECK-NEXT: %"vl14659llvm.experimental.convergence.anchor()" = call token @llvm.experimental.convergence.anchor() +; CHECK-NEXT: %"op15516(vl14659)" = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[A0]]) [ "convergencectrl"(token %"vl14659llvm.experimental.convergence.anchor()") ] +; CHECK-NEXT: ret i32 %"op15516(vl14659)" ; %t1 = call token @llvm.experimental.convergence.entry() %t2 = call token @llvm.experimental.convergence.anchor() diff --git a/llvm/test/Transforms/IRNormalizer/regression-infinite-loop.ll b/llvm/test/Transforms/IRNormalizer/regression-infinite-loop.ll index 35ac0fd8c329a..b9be105bcda48 100644 --- a/llvm/test/Transforms/IRNormalizer/regression-infinite-loop.ll +++ b/llvm/test/Transforms/IRNormalizer/regression-infinite-loop.ll @@ -8,18 +8,18 @@ define void @test(ptr, i32) { ; CHECK-NEXT: %"vl72693([[A1]], 1)" = add i32 [[A1]], 1 ; CHECK-NEXT: br label %[[BB16110:.*]] ; CHECK: [[BB16110]]: -; CHECK-NEXT: %"op10912(op18080, vl72693)" = phi i32 [ %"op18080(op10412, op17645)", %[[BB16110]] ], [ %"vl72693([[A1]], 1)", %[[BB76951]] ] -; CHECK-NEXT: %"op10912(op17645, vl72693)" = phi i32 [ %"op17645(op10912)70", %[[BB16110]] ], [ %"vl72693([[A1]], 1)", %[[BB76951]] ] -; CHECK-NEXT: %"op15084(op10912)" = mul i32 %"op10912(op18080, vl72693)", undef -; CHECK-NEXT: %"op16562(op15084)" = xor i32 -1, %"op15084(op10912)" -; CHECK-NEXT: %"op44627(op10912, op16562)" = add i32 %"op10912(op18080, vl72693)", %"op16562(op15084)" -; CHECK-NEXT: %"op17645(op10912)" = add i32 -1, %"op10912(op17645, vl72693)" -; CHECK-NEXT: %"op18080(op17645, op44627)" = add i32 %"op17645(op10912)", %"op44627(op10912, op16562)" -; CHECK-NEXT: %"op17720(op15084, op18080)" = mul i32 %"op15084(op10912)", %"op18080(op17645, op44627)" -; CHECK-NEXT: %"op16562(op17720)" = xor i32 -1, %"op17720(op15084, op18080)" -; CHECK-NEXT: %"op17430(op16562, op18080)" = add i32 %"op16562(op17720)", %"op18080(op17645, op44627)" +; CHECK-NEXT: %"op81283(op18080, vl72693)" = phi i32 [ %"op18080(op10412, op18131)", %[[BB16110]] ], [ %"vl72693([[A1]], 1)", %[[BB76951]] ] +; CHECK-NEXT: %"op81283(op18131, vl72693)" = phi i32 [ %"op18131(op81283)70", %[[BB16110]] ], [ %"vl72693([[A1]], 1)", %[[BB76951]] ] +; CHECK-NEXT: %"op13219(op81283)" = mul i32 %"op81283(op18080, vl72693)", undef +; CHECK-NEXT: %"op16562(op13219)" = xor i32 -1, %"op13219(op81283)" +; CHECK-NEXT: %"op12556(op16562, op81283)" = add i32 %"op16562(op13219)", %"op81283(op18080, vl72693)" +; CHECK-NEXT: %"op18131(op81283)" = add i32 -1, %"op81283(op18131, vl72693)" +; CHECK-NEXT: %"op18080(op12556, op18131)" = add i32 %"op12556(op16562, op81283)", %"op18131(op81283)" +; CHECK-NEXT: %"op17720(op13219, op18080)" = mul i32 %"op13219(op81283)", %"op18080(op12556, op18131)" +; CHECK-NEXT: %"op16562(op17720)" = xor i32 -1, %"op17720(op13219, op18080)" +; CHECK-NEXT: %"op17430(op16562, op18080)" = add i32 %"op16562(op17720)", %"op18080(op12556, op18131)" ; CHECK-NEXT: %"op10412(op17430)" = add i32 %"op17430(op16562, op18080)", undef -; CHECK-NEXT: %"op17720(op10412, op17720)" = mul i32 %"op10412(op17430)", %"op17720(op15084, op18080)" +; CHECK-NEXT: %"op17720(op10412, op17720)" = mul i32 %"op10412(op17430)", %"op17720(op13219, op18080)" ; CHECK-NEXT: %"op16562(op17720)1" = xor i32 -1, %"op17720(op10412, op17720)" ; CHECK-NEXT: %"op17430(op10412, op16562)" = add i32 %"op10412(op17430)", %"op16562(op17720)1" ; CHECK-NEXT: %"op10412(op17430)2" = add i32 %"op17430(op10412, op16562)", undef @@ -45,11 +45,11 @@ define void @test(ptr, i32) { ; CHECK-NEXT: %"op17720(op10412, op17720)21" = mul i32 %"op10412(op17430)20", %"op17720(op10412, op17720)17" ; CHECK-NEXT: %"op16562(op17720)22" = xor i32 -1, %"op17720(op10412, op17720)21" ; CHECK-NEXT: %"op17430(op10412, op16562)23" = add i32 %"op10412(op17430)20", %"op16562(op17720)22" -; CHECK-NEXT: %"op17645(op10912)24" = add i32 -9, %"op10912(op17645, vl72693)" -; CHECK-NEXT: %"op18080(op17430, op17645)" = add i32 %"op17430(op10412, op16562)23", %"op17645(op10912)24" -; CHECK-NEXT: %"op17720(op17720, op18080)" = mul i32 %"op17720(op10412, op17720)21", %"op18080(op17430, op17645)" +; CHECK-NEXT: %"op18131(op81283)24" = add i32 -9, %"op81283(op18131, vl72693)" +; CHECK-NEXT: %"op18080(op17430, op18131)" = add i32 %"op17430(op10412, op16562)23", %"op18131(op81283)24" +; CHECK-NEXT: %"op17720(op17720, op18080)" = mul i32 %"op17720(op10412, op17720)21", %"op18080(op17430, op18131)" ; CHECK-NEXT: %"op16562(op17720)25" = xor i32 -1, %"op17720(op17720, op18080)" -; CHECK-NEXT: %"op17430(op16562, op18080)26" = add i32 %"op16562(op17720)25", %"op18080(op17430, op17645)" +; CHECK-NEXT: %"op17430(op16562, op18080)26" = add i32 %"op16562(op17720)25", %"op18080(op17430, op18131)" ; CHECK-NEXT: %"op10412(op17430)27" = add i32 %"op17430(op16562, op18080)26", undef ; CHECK-NEXT: %"op17720(op10412, op17720)28" = mul i32 %"op10412(op17430)27", %"op17720(op17720, op18080)" ; CHECK-NEXT: %"op16562(op17720)29" = xor i32 -1, %"op17720(op10412, op17720)28" @@ -66,11 +66,11 @@ define void @test(ptr, i32) { ; CHECK-NEXT: %"op17720(op10412, op17720)40" = mul i32 %"op10412(op17430)39", %"op17720(op10412, op17720)36" ; CHECK-NEXT: %"op16562(op17720)41" = xor i32 -1, %"op17720(op10412, op17720)40" ; CHECK-NEXT: %"op17430(op10412, op16562)42" = add i32 %"op10412(op17430)39", %"op16562(op17720)41" -; CHECK-NEXT: %"op17645(op10912)43" = add i32 -14, %"op10912(op17645, vl72693)" -; CHECK-NEXT: %"op18080(op17430, op17645)44" = add i32 %"op17430(op10412, op16562)42", %"op17645(op10912)43" -; CHECK-NEXT: %"op17720(op17720, op18080)45" = mul i32 %"op17720(op10412, op17720)40", %"op18080(op17430, op17645)44" +; CHECK-NEXT: %"op18131(op81283)43" = add i32 -14, %"op81283(op18131, vl72693)" +; CHECK-NEXT: %"op18080(op17430, op18131)44" = add i32 %"op17430(op10412, op16562)42", %"op18131(op81283)43" +; CHECK-NEXT: %"op17720(op17720, op18080)45" = mul i32 %"op17720(op10412, op17720)40", %"op18080(op17430, op18131)44" ; CHECK-NEXT: %"op16562(op17720)46" = xor i32 -1, %"op17720(op17720, op18080)45" -; CHECK-NEXT: %"op17430(op16562, op18080)47" = add i32 %"op16562(op17720)46", %"op18080(op17430, op17645)44" +; CHECK-NEXT: %"op17430(op16562, op18080)47" = add i32 %"op16562(op17720)46", %"op18080(op17430, op18131)44" ; CHECK-NEXT: %"op10412(op17430)48" = add i32 %"op17430(op16562, op18080)47", undef ; CHECK-NEXT: %"op17720(op10412, op17720)49" = mul i32 %"op10412(op17430)48", %"op17720(op17720, op18080)45" ; CHECK-NEXT: %"op16562(op17720)50" = xor i32 -1, %"op17720(op10412, op17720)49" @@ -93,9 +93,9 @@ define void @test(ptr, i32) { ; CHECK-NEXT: %"op17430(op10412, op16562)67" = add i32 %"op10412(op17430)64", %"op16562(op17720)66" ; CHECK-NEXT: %"op10412(op17430)68" = add i32 %"op17430(op10412, op16562)67", undef ; CHECK-NEXT: %"op10412(op10412)69" = add i32 %"op10412(op17430)68", undef -; CHECK-NEXT: %"op17645(op10912)70" = add i32 -21, %"op10912(op17645, vl72693)" -; CHECK-NEXT: %"op18080(op10412, op17645)" = add i32 %"op10412(op10412)69", %"op17645(op10912)70" -; CHECK-NEXT: store i32 %"op18080(op10412, op17645)", ptr [[A0]], align 4 +; CHECK-NEXT: %"op18131(op81283)70" = add i32 -21, %"op81283(op18131, vl72693)" +; CHECK-NEXT: %"op18080(op10412, op18131)" = add i32 %"op10412(op10412)69", %"op18131(op81283)70" +; CHECK-NEXT: store i32 %"op18080(op10412, op18131)", ptr [[A0]], align 4 ; CHECK-NEXT: br label %[[BB16110]] ; bb: diff --git a/llvm/test/Transforms/IRNormalizer/reordering-basic.ll b/llvm/test/Transforms/IRNormalizer/reordering-basic.ll index fd09ce016addb..06e67e0feb7e4 100644 --- a/llvm/test/Transforms/IRNormalizer/reordering-basic.ll +++ b/llvm/test/Transforms/IRNormalizer/reordering-basic.ll @@ -28,16 +28,16 @@ define double @baz(double %x) { ; CHECK-SAME: double [[A0:%.*]]) { ; CHECK-NEXT: [[BB76951:.*:]] ; CHECK-NEXT: [[IFCOND:%.*]] = fcmp one double [[A0]], 0.000000e+00 -; CHECK-NEXT: br i1 [[IFCOND]], label %[[BB91455:.*]], label %[[BB914551:.*]] -; CHECK: [[BB91455]]: -; CHECK-NEXT: %"vl15001bir()" = call double @bir() +; CHECK-NEXT: br i1 [[IFCOND]], label %[[BB47054:.*]], label %[[BB470541:.*]] +; CHECK: [[BB47054]]: +; CHECK-NEXT: %"vl16994bir()" = call double @bir() ; CHECK-NEXT: br label %[[BB17254:.*]] -; CHECK: [[BB914551]]: -; CHECK-NEXT: %"vl69719bar()" = call double @bar() +; CHECK: [[BB470541]]: +; CHECK-NEXT: %"vl88592bar()" = call double @bar() ; CHECK-NEXT: br label %[[BB17254]] ; CHECK: [[BB17254]]: -; CHECK-NEXT: %"op19734(vl15001, vl69719)" = phi double [ %"vl15001bir()", %[[BB91455]] ], [ %"vl69719bar()", %[[BB914551]] ] -; CHECK-NEXT: ret double %"op19734(vl15001, vl69719)" +; CHECK-NEXT: %"op16411(vl16994, vl88592)" = phi double [ %"vl16994bir()", %[[BB47054]] ], [ %"vl88592bar()", %[[BB470541]] ] +; CHECK-NEXT: ret double %"op16411(vl16994, vl88592)" ; entry: %ifcond = fcmp one double %x, 0.000000e+00 diff --git a/llvm/test/Transforms/IRNormalizer/reordering.ll b/llvm/test/Transforms/IRNormalizer/reordering.ll index 64abe8eb56ce1..a3dbcb5494875 100644 --- a/llvm/test/Transforms/IRNormalizer/reordering.ll +++ b/llvm/test/Transforms/IRNormalizer/reordering.ll @@ -23,7 +23,7 @@ declare void @effecting() ; Place dead instruction(s) before the terminator define void @call_effecting() { ; CHECK-LABEL: define void @call_effecting() { -; CHECK-NEXT: bb15160: +; CHECK-NEXT: bb14885: ; CHECK-NEXT: call void @effecting() ; CHECK-NEXT: [[TMP0:%.*]] = add i32 0, 1 ; CHECK-NEXT: ret void @@ -51,7 +51,7 @@ exit: define void @dont_move_above_alloca() { ; CHECK-LABEL: define void @dont_move_above_alloca() { -; CHECK-NEXT: bb15160: +; CHECK-NEXT: bb14885: ; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 ; CHECK-NEXT: call void @effecting() ; CHECK-NEXT: ret void @@ -65,7 +65,7 @@ declare void @effecting1() define void @dont_reorder_effecting() { ; CHECK-LABEL: define void @dont_reorder_effecting() { -; CHECK-NEXT: bb10075: +; CHECK-NEXT: bb45003: ; CHECK-NEXT: call void @effecting() ; CHECK-NEXT: call void @effecting1() ; CHECK-NEXT: ret void @@ -79,7 +79,7 @@ declare void @effecting2(i32) define void @dont_reorder_effecting1() { ; CHECK-LABEL: define void @dont_reorder_effecting1() { -; CHECK-NEXT: bb10075: +; CHECK-NEXT: bb45003: ; CHECK-NEXT: [[ONE:%.*]] = add i32 1, 1 ; CHECK-NEXT: call void @effecting2(i32 [[ONE]]) ; CHECK-NEXT: [[TWO:%.*]] = add i32 2, 2 diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll index 422ac0dfd2cd4..f6619b791e7ef 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll @@ -1,7 +1,7 @@ ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -o - -passes=infer-address-spaces %s | FileCheck -check-prefixes=COMMON,AMDGCN %s ; RUN: opt -S -o - -passes=infer-address-spaces -assume-default-is-flat-addrspace %s | FileCheck -check-prefixes=COMMON,NOTTI %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" ; COMMON-LABEL: @noop_ptrint_pair( ; AMDGCN-NEXT: store i32 0, ptr addrspace(1) %{{.*}} diff --git a/llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll b/llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll index 0eaf6e32e5a94..fbdac1e07ca16 100644 --- a/llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll +++ b/llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll @@ -2,7 +2,7 @@ ; Check that assert in X86TargetMachine::isNoopAddrSpaceCast is not triggered. -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" ; CHECK-LABEL: @noop_ptrint_pair( ; CHECK: addrspacecast ptr addrspace(1) %x to ptr addrspace(4) diff --git a/llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll b/llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll index 83379f6a54fc1..f8a6f843cc249 100644 --- a/llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll +++ b/llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll @@ -3,7 +3,7 @@ ; RUN: opt -passes='require,loop-simplify,loop-load-elim' -S %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" target triple = "amdgcn-amd-amdhsa" %struct.foo = type { %struct.pluto, i8, ptr, i32 } diff --git a/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll b/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll index 4dfa8cc828248..1341a5ec57b02 100644 --- a/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll +++ b/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll @@ -2,7 +2,7 @@ ; Verify the address space cast doesn't cause a crash -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" %"struct.(anonymous namespace)::TeamStateTy" = type { %"struct.(anonymous namespace)::ICVStateTy", i32, ptr } %"struct.(anonymous namespace)::ICVStateTy" = type { i32, i32, i32, i32, i32, i32 } diff --git a/llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll b/llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll index 19d41f9d1e301..3f4077bd7e027 100644 --- a/llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll +++ b/llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -passes=openmp-opt < %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" %"struct.ompx::state::TeamStateTy" = type { %"struct.ompx::state::ICVStateTy", i32, i32, ptr } diff --git a/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll b/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll index 63b54bfddf9de..70c0d046dad4e 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll @@ -8,7 +8,7 @@ ; CHECK: store i32 1, ptr addrspace(3) @IsSPMDMode ; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode ; -target datalayout = "A5-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32" +target datalayout = "A5-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32" target triple = "amdgcn-amd-amdhsa" %struct.ident_t = type { i32, i32, i32, i32, ptr } diff --git a/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll b/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll index 52be16c41f872..d3e8e98b6f510 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" %struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy.8, ptr, ptr } diff --git a/llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll index 74871a2babcb1..4e8bae08c87c6 100644 --- a/llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll +++ b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -passes=openmp-opt-cgscc -aa-pipeline=basic-aa -openmp-hide-memory-transfer-latency < %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" @.__omp_offloading_heavyComputation.region_id = weak constant i8 0 @.offload_maptypes. = private unnamed_addr constant [2 x i64] [i64 35, i64 35] diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp index 5eef8ee87b4dd..76fe4daca1e61 100644 --- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp +++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp @@ -41,12 +41,12 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) { // Check that AMDGPU targets add -G1 if it's not present. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "r600"), "e-p:32:32-G1"); // and that ANDGCN adds p7 and p8 as well. - EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64", "amdgcn"), - "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); - EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64-G1", "amdgcn"), - "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64", "amdgcn"), + "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128:128:48-p9:192:" + "256:256:32"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G1", "amdgcn"), + "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128:128:48-p9:192:" + "256:256:32"); // but that r600 does not. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G1", "r600"), "e-p:32:32-G1"); @@ -60,7 +60,8 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) { "amdgcn"), "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-" "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:" - "1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-" + "1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128:" + "128:48-" "p9:192:256:256:32"); // Check that RISCV64 upgrades -n64 to -n32:64. @@ -144,23 +145,26 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) { // Check that AMDGPU targets don't add -G1 if there is already a -G flag. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G2", "r600"), "e-p:32:32-G2"); EXPECT_EQ(UpgradeDataLayoutString("G2", "r600"), "G2"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G2", "amdgcn"), + "e-p:64:64-G2-ni:7:8:9-p7:160:256:256:32-p8:128:128:128:48-p9:192:" + "256:256:32"); + EXPECT_EQ(UpgradeDataLayoutString("G2-e-p:64:64", "amdgcn"), + "G2-e-p:64:64-ni:7:8:9-p7:160:256:256:32-p8:128:128:128:48-p9:192:" + "256:256:32"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G0", "amdgcn"), + "e-p:64:64-G0-ni:7:8:9-p7:160:256:256:32-p8:128:128:128:48-p9:192:" + "256:256:32"); + + // Check that AMDGCN targets don't add already declared address space 7. EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64-G2", "amdgcn"), - "e-p:64:64-G2-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); + UpgradeDataLayoutString("e-p:64:64-p7:64:64", "amdgcn"), + "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128:128:48-p9:192:256:256:32"); EXPECT_EQ( - UpgradeDataLayoutString("G2-e-p:64:64", "amdgcn"), - "G2-e-p:64:64-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); + UpgradeDataLayoutString("p7:64:64-G2-e-p:64:64", "amdgcn"), + "p7:64:64-G2-e-p:64:64-ni:7:8:9-p8:128:128:128:48-p9:192:256:256:32"); EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64-G0", "amdgcn"), - "e-p:64:64-G0-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); - - // Check that AMDGCN targets don't add already declared address space 7. - EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-p7:64:64", "amdgcn"), - "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32"); - EXPECT_EQ(UpgradeDataLayoutString("p7:64:64-G2-e-p:64:64", "amdgcn"), - "p7:64:64-G2-e-p:64:64-ni:7:8:9-p8:128:128-p9:192:256:256:32"); - EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-p7:64:64-G1", "amdgcn"), - "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32"); + UpgradeDataLayoutString("e-p:64:64-p7:64:64-G1", "amdgcn"), + "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128:128:48-p9:192:256:256:32"); // Check that SPIR & SPIRV targets don't add -G1 if there is already a -G // flag. @@ -191,8 +195,9 @@ TEST(DataLayoutUpgradeTest, EmptyDataLayout) { // Check that AMDGPU targets add G1 if it's not present. EXPECT_EQ(UpgradeDataLayoutString("", "r600"), "G1"); - EXPECT_EQ(UpgradeDataLayoutString("", "amdgcn"), - "G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); + EXPECT_EQ( + UpgradeDataLayoutString("", "amdgcn"), + "G1-ni:7:8:9-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32"); // Check that SPIR & SPIRV targets add G1 if it's not present. EXPECT_EQ(UpgradeDataLayoutString("", "spir"), "G1"); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 1a1bb031afa7c..054c9d6a9bb02 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -644,7 +644,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) { std::string oldDLStr = M->getDataLayoutStr(); M->setDataLayout( "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:" - "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:" + "256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-" + "v192:" "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"); OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = true; @@ -2349,7 +2350,8 @@ TEST_F(OpenMPIRBuilderTest, StaticWorkshareLoopTarget) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; M->setDataLayout( "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:" - "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:" + "256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-" + "v192:" "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"); OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = true; diff --git a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp index 239e84d6024ff..9ea8de3da1e5b 100644 --- a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp +++ b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp @@ -677,7 +677,7 @@ TEST(CodeExtractor, OpenMPAggregateArgs) { LLVMContext Ctx; SMDiagnostic Err; std::unique_ptr M(parseAssemblyString(R"ir( - target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" + target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" define void @foo(ptr %0) { diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index dd16ec4b73e9f..c900d4cba612a 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -94,7 +94,8 @@ static Value getLaneId(ConversionPatternRewriter &rewriter, Location loc, } static constexpr StringLiteral amdgcnDataLayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" - "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" + "-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:" + "32-v32:" "32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:" "64-S32-A5-G1-ni:7:8:9"; diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir index d28aa9e34c22a..79d0f5dd3e61e 100644 --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -3,7 +3,7 @@ // RUN: mlir-opt %s -convert-gpu-to-rocdl='index-bitwidth=32' -split-input-file | FileCheck --check-prefix=CHECK32 %s // CHECK-LABEL: @test_module -// CHECK-SAME: llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// CHECK-SAME: llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" gpu.module @test_module { // CHECK-LABEL: func @gpu_index_ops() diff --git a/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir b/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir index 633df96866885..13c18401cafab 100644 --- a/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir @@ -4,7 +4,7 @@ // alignment of loaded objects is passed to outlined // functions. -module attributes {llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} { +module attributes {llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} { omp.private {type = private} @_QFEk_private_i32 : i32 llvm.func @_QQmain() { %0 = llvm.mlir.constant(1 : i32) : i32 diff --git a/mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir index 1a4c081414c9e..b8b7c780a74d0 100644 --- a/mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir @@ -3,7 +3,7 @@ // Only check the overall shape of the code and the presence of relevant // runtime calls. Actual IR checking is done at the OpenMPIRBuilder level. -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { omp.private {type = private} @_QFEj_private_i32 : i32 omp.declare_reduction @add_reduction_f32 : f32 init { ^bb0(%arg0: f32): diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir index 593d8010f55de..60c6fa4dd8f1e 100644 --- a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir @@ -3,7 +3,7 @@ // The aim of the test is to check the LLVM IR codegen for the device // for omp target parallel construct -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} { +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} { llvm.func @_QQmain_omp_outline_1(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget} { %0 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"} omp.target map_entries(%0 -> %arg2 : !llvm.ptr) { diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir index 649210795ff5c..830610f12a5d2 100644 --- a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir @@ -3,7 +3,7 @@ // The aim of the test is to check the GPU LLVM IR codegen // for nested omp do loop inside omp target region -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { llvm.func @target_parallel_wsloop(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget, target_cpu = "gfx90a", target_features = #llvm.target_features<["+gfx9-insts", "+wavefrontsize64"]>} diff --git a/mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir index a2500f3a579dd..b978354e25329 100644 --- a/mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir @@ -3,7 +3,7 @@ // Regression tset for calling a function using pointer alloca'ed on // device for private variable -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} { +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} { omp.private {type = private} @_QMmodFfailingEi_private_i32 : i32 llvm.func @_QMotherProutine(%arg0: !llvm.ptr {fir.bindc_name = "i", llvm.nocapture}) attributes {frame_pointer = #llvm.framePointerKind, omp.declare_target = #omp.declaretarget, target_cpu = "gfx90a", target_features = #llvm.target_features<["+16-bit-insts", "+atomic-buffer-global-pk-add-f16-insts", "+atomic-fadd-rtn-insts", "+ci-insts", "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot3-insts", "+dot4-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp", "+gfx8-insts", "+gfx9-insts", "+gfx90a-insts", "+gws", "+image-insts", "+mai-insts", "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]>} { llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir index af8fe7aacc336..9aba72dabf13c 100644 --- a/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir @@ -3,7 +3,7 @@ // Only check the overall shape of the code and the presence of relevant // runtime calls. Actual IR checking is done at the OpenMPIRBuilder level. -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { omp.private {type = private} @_QFsimple_target_teams_only_reductionEindex__private_i32 : i32 omp.declare_reduction @add_reduction_i32 : i32 init { ^bb0(%arg0: i32): diff --git a/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir index edfb2839d6604..dc22fe11666cf 100644 --- a/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir @@ -3,7 +3,7 @@ // Only check the overall shape of the code and the presence of relevant // runtime calls. Actual IR checking is done at the OpenMPIRBuilder level. -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { omp.declare_reduction @add_reduction_i32 : i32 init { ^bb0(%arg0: i32): %0 = llvm.mlir.constant(0 : i32) : i32 diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir index b7aecec308ef3..0ebcec0e0ec31 100644 --- a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir @@ -3,7 +3,7 @@ // The aim of the test is to check the GPU LLVM IR codegen // for nested omp do loop with collapse clause inside omp target region -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { llvm.func @target_collapsed_wsloop(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget} { %loop_ub = llvm.mlir.constant(99 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir index e2a8d88bd181a..a9f913b744489 100644 --- a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir @@ -3,7 +3,7 @@ // The aim of the test is to check the GPU LLVM IR codegen // for nested omp do loop inside omp target region -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { llvm.func @target_wsloop(%arg0: !llvm.ptr ) attributes {omp.declare_target = #omp.declaretarget} { %loop_ub = llvm.mlir.constant(9 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32