diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst index f225b9e8bd268..eed68155c7319 100644 --- a/llvm/docs/NVPTXUsage.rst +++ b/llvm/docs/NVPTXUsage.rst @@ -250,6 +250,36 @@ The ``@llvm.nvvm.fence.proxy.tensormap_generic.*`` is a uni-directional fence us The address operand ``addr`` and the operand ``size`` together specify the memory range ``[addr, addr+size)`` on which the ordering guarantees on the memory accesses across the proxies is to be provided. The only supported value for the ``size`` operand is ``128`` and must be an immediate. Generic Addressing is used unconditionally, and the address specified by the operand addr must fall within the ``.global`` state space. Otherwise, the behavior is undefined. For more information, see `PTX ISA `_. +Address Space Intrinsics +------------------------ + +'``llvm.nvvm.isspacep.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +.. code-block:: llvm + + declare i1 @llvm.nvvm.isspacep.const(ptr %p) + declare i1 @llvm.nvvm.isspacep.global(ptr %p) + declare i1 @llvm.nvvm.isspacep.local(ptr %p) + declare i1 @llvm.nvvm.isspacep.shared(ptr %p) + declare i1 @llvm.nvvm.isspacep.shared.cluster(ptr %p) + +Overview: +""""""""" + +The '``llvm.nvvm.isspacep.*``' intrinsics determine whether the provided generic +pointer references memory which falls within a particular address space. + +Semantics: +"""""""""" + +If the given pointer in the generic address space refers to memory which falls +within the state space of the intrinsic (and therefore could be safely address +space casted to this space), 1 is returned, otherwise 0 is returned. + Arithmetic Intrinsics --------------------- diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index a5a147da8da1c..31eb0b4fd7b72 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -302,6 +302,8 @@ void NVPTXPassConfig::addAddressSpaceInferencePasses() { // be eliminated by SROA. addPass(createSROAPass()); addPass(createNVPTXLowerAllocaPass()); + // TODO: Consider running InferAddressSpaces during opt, earlier in the + // compilation flow. addPass(createInferAddressSpacesPass()); addPass(createNVPTXAtomicLowerPass()); } diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index 3507573df1869..8d482ffb27b14 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -416,33 +416,38 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC, llvm_unreachable("All SpecialCase enumerators should be handled in switch."); } +// Returns true/false when we know the answer, nullopt otherwise. +static std::optional evaluateIsSpace(Intrinsic::ID IID, unsigned AS) { + if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC || + AS == NVPTXAS::ADDRESS_SPACE_PARAM) + return std::nullopt; // Got to check at run-time. + switch (IID) { + case Intrinsic::nvvm_isspacep_global: + return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL; + case Intrinsic::nvvm_isspacep_local: + return AS == NVPTXAS::ADDRESS_SPACE_LOCAL; + case Intrinsic::nvvm_isspacep_shared: + return AS == NVPTXAS::ADDRESS_SPACE_SHARED; + case Intrinsic::nvvm_isspacep_shared_cluster: + // We can't tell shared from shared_cluster at compile time from AS alone, + // but it can't be either is AS is not shared. + return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt + : std::optional{false}; + case Intrinsic::nvvm_isspacep_const: + return AS == NVPTXAS::ADDRESS_SPACE_CONST; + default: + llvm_unreachable("Unexpected intrinsic"); + } +} + // Returns an instruction pointer (may be nullptr if we do not know the answer). // Returns nullopt if `II` is not one of the `isspacep` intrinsics. +// +// TODO: If InferAddressSpaces were run early enough in the pipeline this could +// be removed in favor of the constant folding that occurs there through +// rewriteIntrinsicWithAddressSpace static std::optional handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) { - // Returns true/false when we know the answer, nullopt otherwise. - auto CheckASMatch = [](unsigned IID, unsigned AS) -> std::optional { - if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC || - AS == NVPTXAS::ADDRESS_SPACE_PARAM) - return std::nullopt; // Got to check at run-time. - switch (IID) { - case Intrinsic::nvvm_isspacep_global: - return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL; - case Intrinsic::nvvm_isspacep_local: - return AS == NVPTXAS::ADDRESS_SPACE_LOCAL; - case Intrinsic::nvvm_isspacep_shared: - return AS == NVPTXAS::ADDRESS_SPACE_SHARED; - case Intrinsic::nvvm_isspacep_shared_cluster: - // We can't tell shared from shared_cluster at compile time from AS alone, - // but it can't be either is AS is not shared. - return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt - : std::optional{false}; - case Intrinsic::nvvm_isspacep_const: - return AS == NVPTXAS::ADDRESS_SPACE_CONST; - default: - llvm_unreachable("Unexpected intrinsic"); - } - }; switch (auto IID = II.getIntrinsicID()) { case Intrinsic::nvvm_isspacep_global: @@ -458,7 +463,7 @@ handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) { if (auto *ASCO = dyn_cast(Op0)) AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace(); - if (std::optional Answer = CheckASMatch(IID, AS)) + if (std::optional Answer = evaluateIsSpace(IID, AS)) return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), *Answer)); return nullptr; // Don't know the answer, got to check at run time. @@ -525,3 +530,37 @@ void NVPTXTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) { BaseT::getPeelingPreferences(L, SE, PP); } + +bool NVPTXTTIImpl::collectFlatAddressOperands(SmallVectorImpl &OpIndexes, + Intrinsic::ID IID) const { + switch (IID) { + case Intrinsic::nvvm_isspacep_const: + case Intrinsic::nvvm_isspacep_global: + case Intrinsic::nvvm_isspacep_local: + case Intrinsic::nvvm_isspacep_shared: + case Intrinsic::nvvm_isspacep_shared_cluster: { + OpIndexes.push_back(0); + return true; + } + } + return false; +} + +Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, + Value *OldV, + Value *NewV) const { + const Intrinsic::ID IID = II->getIntrinsicID(); + switch (IID) { + case Intrinsic::nvvm_isspacep_const: + case Intrinsic::nvvm_isspacep_global: + case Intrinsic::nvvm_isspacep_local: + case Intrinsic::nvvm_isspacep_shared: + case Intrinsic::nvvm_isspacep_shared_cluster: { + const unsigned NewAS = NewV->getType()->getPointerAddressSpace(); + if (const auto R = evaluateIsSpace(IID, NewAS)) + return ConstantInt::get(II->getType(), *R); + return nullptr; + } + } + return nullptr; +} \ No newline at end of file diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index 86140daa7be48..0f4fb280b2d99 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -123,6 +123,12 @@ class NVPTXTTIImpl : public BasicTTIImplBase { return true; } } + + bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, + Intrinsic::ID IID) const; + + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, + Value *NewV) const; }; } // end namespace llvm diff --git a/llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll b/llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll new file mode 100644 index 0000000000000..348fa688770df --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=nvptx64-nvidia-cuda -passes=infer-address-spaces,bdce %s | FileCheck %s + +target datalayout = "e-p:64:64:64-p3:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +declare i1 @llvm.nvvm.isspacep.const(ptr) readnone noinline +declare i1 @llvm.nvvm.isspacep.global(ptr) readnone noinline +declare i1 @llvm.nvvm.isspacep.local(ptr) readnone noinline +declare i1 @llvm.nvvm.isspacep.shared(ptr) readnone noinline +declare i1 @llvm.nvvm.isspacep.cluster.shared(ptr) readnone noinline + +define i1 @test_isspacep_const_true(ptr addrspace(4) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_const_true( +; CHECK-SAME: ptr addrspace(4) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i1 true +; +entry: + %addr0 = addrspacecast ptr addrspace(4) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.const(ptr %addr1) + ret i1 %val +} + +define i1 @test_isspacep_const_false(ptr addrspace(1) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_const_false( +; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i1 false +; +entry: + %addr0 = addrspacecast ptr addrspace(1) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.const(ptr %addr1) + ret i1 %val +} + +define i1 @test_isspacep_global_true(ptr addrspace(1) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_global_true( +; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i1 true +; +entry: + %addr0 = addrspacecast ptr addrspace(1) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.global(ptr %addr1) + ret i1 %val +} + +define i1 @test_isspacep_global_false(ptr addrspace(4) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_global_false( +; CHECK-SAME: ptr addrspace(4) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i1 false +; +entry: + %addr0 = addrspacecast ptr addrspace(4) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.global(ptr %addr1) + ret i1 %val +} + +define i1 @test_isspacep_local_true(ptr addrspace(5) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_local_true( +; CHECK-SAME: ptr addrspace(5) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i1 true +; +entry: + %addr0 = addrspacecast ptr addrspace(5) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.local(ptr %addr1) + ret i1 %val +} + +define i1 @test_isspacep_local_false(ptr addrspace(1) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_local_false( +; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i1 false +; +entry: + %addr0 = addrspacecast ptr addrspace(1) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.local(ptr %addr1) + ret i1 %val +} + +define i1 @test_isspacep_shared_true(ptr addrspace(3) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_shared_true( +; CHECK-SAME: ptr addrspace(3) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i1 true +; +entry: + %addr0 = addrspacecast ptr addrspace(3) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.shared(ptr %addr1) + ret i1 %val +} + +define i1 @test_isspacep_shared_false(ptr addrspace(1) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_shared_false( +; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i1 false +; +entry: + %addr0 = addrspacecast ptr addrspace(1) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.shared(ptr %addr1) + ret i1 %val +} + +define i1 @test_isspacep_cluster_shared_unsure(ptr addrspace(3) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_cluster_shared_unsure( +; CHECK-SAME: ptr addrspace(3) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ADDR1:%.*]] = getelementptr i8, ptr addrspace(3) [[ADDR]], i32 10 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) [[ADDR1]] to ptr +; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr [[TMP0]]) +; CHECK-NEXT: ret i1 [[VAL]] +; +entry: + %addr0 = addrspacecast ptr addrspace(3) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %addr1) + ret i1 %val +} + +define i1 @test_isspacep_cluster_shared_false(ptr addrspace(1) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_cluster_shared_false( +; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i1 false +; +entry: + %addr0 = addrspacecast ptr addrspace(1) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %addr1) + ret i1 %val +}