From ab58488b42bf45b53123ad74672338231c92ec31 Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Mon, 4 Nov 2024 17:09:53 +0000 Subject: [PATCH 1/4] [NVPTX][docs] Add isspacep.* to usage doc --- llvm/docs/NVPTXUsage.rst | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst index f225b9e8bd268..eed68155c7319 100644 --- a/llvm/docs/NVPTXUsage.rst +++ b/llvm/docs/NVPTXUsage.rst @@ -250,6 +250,36 @@ The ``@llvm.nvvm.fence.proxy.tensormap_generic.*`` is a uni-directional fence us The address operand ``addr`` and the operand ``size`` together specify the memory range ``[addr, addr+size)`` on which the ordering guarantees on the memory accesses across the proxies is to be provided. The only supported value for the ``size`` operand is ``128`` and must be an immediate. Generic Addressing is used unconditionally, and the address specified by the operand addr must fall within the ``.global`` state space. Otherwise, the behavior is undefined. For more information, see `PTX ISA `_. +Address Space Intrinsics +------------------------ + +'``llvm.nvvm.isspacep.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +.. code-block:: llvm + + declare i1 @llvm.nvvm.isspacep.const(ptr %p) + declare i1 @llvm.nvvm.isspacep.global(ptr %p) + declare i1 @llvm.nvvm.isspacep.local(ptr %p) + declare i1 @llvm.nvvm.isspacep.shared(ptr %p) + declare i1 @llvm.nvvm.isspacep.shared.cluster(ptr %p) + +Overview: +""""""""" + +The '``llvm.nvvm.isspacep.*``' intrinsics determine whether the provided generic +pointer references memory which falls within a particular address space. + +Semantics: +"""""""""" + +If the given pointer in the generic address space refers to memory which falls +within the state space of the intrinsic (and therefore could be safely address +space casted to this space), 1 is returned, otherwise 0 is returned. + Arithmetic Intrinsics --------------------- From b832e93a6b6aa1a15946e09a41066e86d878b807 Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Thu, 31 Oct 2024 20:25:07 +0000 Subject: [PATCH 2/4] [NVPTX] Add TTI support for folding isspacep in InferAS --- .../Target/NVPTX/NVPTXTargetTransformInfo.cpp | 83 +++++++--- .../Target/NVPTX/NVPTXTargetTransformInfo.h | 6 + .../InferAddressSpaces/NVPTX/isspacep.ll | 144 ++++++++++++++++++ 3 files changed, 209 insertions(+), 24 deletions(-) create mode 100644 llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index 3507573df1869..5df211726882e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -416,33 +416,34 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC, llvm_unreachable("All SpecialCase enumerators should be handled in switch."); } + // Returns true/false when we know the answer, nullopt otherwise. +static std::optional evaluateIsSpace(Intrinsic::ID IID, unsigned AS) { + if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC || + AS == NVPTXAS::ADDRESS_SPACE_PARAM) + return std::nullopt; // Got to check at run-time. + switch (IID) { + case Intrinsic::nvvm_isspacep_global: + return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL; + case Intrinsic::nvvm_isspacep_local: + return AS == NVPTXAS::ADDRESS_SPACE_LOCAL; + case Intrinsic::nvvm_isspacep_shared: + return AS == NVPTXAS::ADDRESS_SPACE_SHARED; + case Intrinsic::nvvm_isspacep_shared_cluster: + // We can't tell shared from shared_cluster at compile time from AS alone, + // but it can't be either is AS is not shared. + return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt + : std::optional{false}; + case Intrinsic::nvvm_isspacep_const: + return AS == NVPTXAS::ADDRESS_SPACE_CONST; + default: + llvm_unreachable("Unexpected intrinsic"); + } +} + // Returns an instruction pointer (may be nullptr if we do not know the answer). // Returns nullopt if `II` is not one of the `isspacep` intrinsics. static std::optional handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) { - // Returns true/false when we know the answer, nullopt otherwise. - auto CheckASMatch = [](unsigned IID, unsigned AS) -> std::optional { - if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC || - AS == NVPTXAS::ADDRESS_SPACE_PARAM) - return std::nullopt; // Got to check at run-time. - switch (IID) { - case Intrinsic::nvvm_isspacep_global: - return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL; - case Intrinsic::nvvm_isspacep_local: - return AS == NVPTXAS::ADDRESS_SPACE_LOCAL; - case Intrinsic::nvvm_isspacep_shared: - return AS == NVPTXAS::ADDRESS_SPACE_SHARED; - case Intrinsic::nvvm_isspacep_shared_cluster: - // We can't tell shared from shared_cluster at compile time from AS alone, - // but it can't be either is AS is not shared. - return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt - : std::optional{false}; - case Intrinsic::nvvm_isspacep_const: - return AS == NVPTXAS::ADDRESS_SPACE_CONST; - default: - llvm_unreachable("Unexpected intrinsic"); - } - }; switch (auto IID = II.getIntrinsicID()) { case Intrinsic::nvvm_isspacep_global: @@ -458,7 +459,7 @@ handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) { if (auto *ASCO = dyn_cast(Op0)) AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace(); - if (std::optional Answer = CheckASMatch(IID, AS)) + if (std::optional Answer = evaluateIsSpace(IID, AS)) return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), *Answer)); return nullptr; // Don't know the answer, got to check at run time. @@ -525,3 +526,37 @@ void NVPTXTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) { BaseT::getPeelingPreferences(L, SE, PP); } + +bool NVPTXTTIImpl::collectFlatAddressOperands(SmallVectorImpl &OpIndexes, + Intrinsic::ID IID) const { + switch (IID) { + case Intrinsic::nvvm_isspacep_const: + case Intrinsic::nvvm_isspacep_global: + case Intrinsic::nvvm_isspacep_local: + case Intrinsic::nvvm_isspacep_shared: + case Intrinsic::nvvm_isspacep_shared_cluster: { + OpIndexes.push_back(0); + return true; + } + } + return false; +} + +Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, + Value *OldV, + Value *NewV) const { + const Intrinsic::ID IID = II->getIntrinsicID(); + switch (IID) { + case Intrinsic::nvvm_isspacep_const: + case Intrinsic::nvvm_isspacep_global: + case Intrinsic::nvvm_isspacep_local: + case Intrinsic::nvvm_isspacep_shared: + case Intrinsic::nvvm_isspacep_shared_cluster: { + const unsigned NewAS = NewV->getType()->getPointerAddressSpace(); + if (const auto R = evaluateIsSpace(IID, NewAS)) + return ConstantInt::get(II->getType(), *R); + return nullptr; + } + } + return nullptr; +} \ No newline at end of file diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index 86140daa7be48..0f4fb280b2d99 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -123,6 +123,12 @@ class NVPTXTTIImpl : public BasicTTIImplBase { return true; } } + + bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, + Intrinsic::ID IID) const; + + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, + Value *NewV) const; }; } // end namespace llvm diff --git a/llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll b/llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll new file mode 100644 index 0000000000000..348fa688770df --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=nvptx64-nvidia-cuda -passes=infer-address-spaces,bdce %s | FileCheck %s + +target datalayout = "e-p:64:64:64-p3:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +declare i1 @llvm.nvvm.isspacep.const(ptr) readnone noinline +declare i1 @llvm.nvvm.isspacep.global(ptr) readnone noinline +declare i1 @llvm.nvvm.isspacep.local(ptr) readnone noinline +declare i1 @llvm.nvvm.isspacep.shared(ptr) readnone noinline +declare i1 @llvm.nvvm.isspacep.cluster.shared(ptr) readnone noinline + +define i1 @test_isspacep_const_true(ptr addrspace(4) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_const_true( +; CHECK-SAME: ptr addrspace(4) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i1 true +; +entry: + %addr0 = addrspacecast ptr addrspace(4) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.const(ptr %addr1) + ret i1 %val +} + +define i1 @test_isspacep_const_false(ptr addrspace(1) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_const_false( +; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i1 false +; +entry: + %addr0 = addrspacecast ptr addrspace(1) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.const(ptr %addr1) + ret i1 %val +} + +define i1 @test_isspacep_global_true(ptr addrspace(1) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_global_true( +; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i1 true +; +entry: + %addr0 = addrspacecast ptr addrspace(1) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.global(ptr %addr1) + ret i1 %val +} + +define i1 @test_isspacep_global_false(ptr addrspace(4) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_global_false( +; CHECK-SAME: ptr addrspace(4) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i1 false +; +entry: + %addr0 = addrspacecast ptr addrspace(4) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.global(ptr %addr1) + ret i1 %val +} + +define i1 @test_isspacep_local_true(ptr addrspace(5) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_local_true( +; CHECK-SAME: ptr addrspace(5) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i1 true +; +entry: + %addr0 = addrspacecast ptr addrspace(5) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.local(ptr %addr1) + ret i1 %val +} + +define i1 @test_isspacep_local_false(ptr addrspace(1) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_local_false( +; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i1 false +; +entry: + %addr0 = addrspacecast ptr addrspace(1) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.local(ptr %addr1) + ret i1 %val +} + +define i1 @test_isspacep_shared_true(ptr addrspace(3) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_shared_true( +; CHECK-SAME: ptr addrspace(3) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i1 true +; +entry: + %addr0 = addrspacecast ptr addrspace(3) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.shared(ptr %addr1) + ret i1 %val +} + +define i1 @test_isspacep_shared_false(ptr addrspace(1) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_shared_false( +; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i1 false +; +entry: + %addr0 = addrspacecast ptr addrspace(1) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.shared(ptr %addr1) + ret i1 %val +} + +define i1 @test_isspacep_cluster_shared_unsure(ptr addrspace(3) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_cluster_shared_unsure( +; CHECK-SAME: ptr addrspace(3) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ADDR1:%.*]] = getelementptr i8, ptr addrspace(3) [[ADDR]], i32 10 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) [[ADDR1]] to ptr +; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr [[TMP0]]) +; CHECK-NEXT: ret i1 [[VAL]] +; +entry: + %addr0 = addrspacecast ptr addrspace(3) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %addr1) + ret i1 %val +} + +define i1 @test_isspacep_cluster_shared_false(ptr addrspace(1) %addr) { +; CHECK-LABEL: define i1 @test_isspacep_cluster_shared_false( +; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i1 false +; +entry: + %addr0 = addrspacecast ptr addrspace(1) %addr to ptr + %addr1 = getelementptr i8, ptr %addr0, i32 10 + %val = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %addr1) + ret i1 %val +} From 431f4f771441dbf1d02e15f8ad3b6f0ecd51c658 Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Thu, 31 Oct 2024 23:25:48 +0000 Subject: [PATCH 3/4] address formatting --- llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index 5df211726882e..46c909295ade7 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -416,7 +416,7 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC, llvm_unreachable("All SpecialCase enumerators should be handled in switch."); } - // Returns true/false when we know the answer, nullopt otherwise. +// Returns true/false when we know the answer, nullopt otherwise. static std::optional evaluateIsSpace(Intrinsic::ID IID, unsigned AS) { if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC || AS == NVPTXAS::ADDRESS_SPACE_PARAM) From 05f4e4ed6e03c8935bb44e631d07bc284bf913c4 Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Thu, 7 Nov 2024 23:02:08 +0000 Subject: [PATCH 4/4] address comment --- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 2 ++ llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index a5a147da8da1c..31eb0b4fd7b72 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -302,6 +302,8 @@ void NVPTXPassConfig::addAddressSpaceInferencePasses() { // be eliminated by SROA. addPass(createSROAPass()); addPass(createNVPTXLowerAllocaPass()); + // TODO: Consider running InferAddressSpaces during opt, earlier in the + // compilation flow. addPass(createInferAddressSpacesPass()); addPass(createNVPTXAtomicLowerPass()); } diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index 46c909295ade7..8d482ffb27b14 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -442,6 +442,10 @@ static std::optional evaluateIsSpace(Intrinsic::ID IID, unsigned AS) { // Returns an instruction pointer (may be nullptr if we do not know the answer). // Returns nullopt if `II` is not one of the `isspacep` intrinsics. +// +// TODO: If InferAddressSpaces were run early enough in the pipeline this could +// be removed in favor of the constant folding that occurs there through +// rewriteIntrinsicWithAddressSpace static std::optional handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) {