From 673d66954933fc6749fb7932d8d7304b80e0e831 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Thu, 17 Oct 2024 15:33:00 -0700 Subject: [PATCH 1/7] [NVPTX] instcombine known pointer AS checks. This avoids crashing on impossible address space casts guarded by `__isGlobal/__isShared`. --- llvm/include/llvm/Support/NVPTXAddrSpace.h | 33 +++ llvm/lib/Analysis/InstructionSimplify.cpp | 30 ++ .../Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h | 12 +- .../Transforms/InstCombine/NVPTX/isspacep.ll | 261 ++++++++++++++++++ 4 files changed, 326 insertions(+), 10 deletions(-) create mode 100644 llvm/include/llvm/Support/NVPTXAddrSpace.h create mode 100644 llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll diff --git a/llvm/include/llvm/Support/NVPTXAddrSpace.h b/llvm/include/llvm/Support/NVPTXAddrSpace.h new file mode 100644 index 0000000000000..063d2aaffdc57 --- /dev/null +++ b/llvm/include/llvm/Support/NVPTXAddrSpace.h @@ -0,0 +1,33 @@ +//===---------------- AMDGPUAddrSpace.h -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// AMDGPU address space definition +/// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_NVPTXADDRSPACE_H +#define LLVM_SUPPORT_NVPTXADDRSPACE_H + +namespace llvm { +namespace NVPTXAS { +enum AddressSpace : unsigned { + ADDRESS_SPACE_GENERIC = 0, + ADDRESS_SPACE_GLOBAL = 1, + ADDRESS_SPACE_SHARED = 3, + ADDRESS_SPACE_CONST = 4, + ADDRESS_SPACE_LOCAL = 5, + + ADDRESS_SPACE_PARAM = 101, +}; +} // end namespace NVPTXAS + +} // end namespace llvm + +#endif // LLVM_SUPPORT_NVPTXADDRSPACE_H diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index d08be1e55c853..b525bc27d72b8 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -38,10 +38,12 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Statepoint.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/NVPTXAddrSpace.h" #include #include using namespace llvm; @@ -6365,6 +6367,34 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, break; } + case Intrinsic::nvvm_isspacep_global: + case Intrinsic::nvvm_isspacep_local: + case Intrinsic::nvvm_isspacep_shared: + case Intrinsic::nvvm_isspacep_const: { + auto *Ty = F->getReturnType(); + unsigned AS = Op0->getType()->getPointerAddressSpace(); + if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC) { + if (auto *ASC = dyn_cast(Op0)) + AS = ASC->getSrcAddressSpace(); + else if (auto *CE = dyn_cast(Op0)) { + if (CE->getOpcode() == Instruction::AddrSpaceCast) + AS = CE->getOperand(0)->getType()->getPointerAddressSpace(); + } + } + if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC || + AS == NVPTXAS::ADDRESS_SPACE_PARAM) + return nullptr; // Got to check at run-time. + bool ASMatches = (AS == NVPTXAS::ADDRESS_SPACE_GLOBAL && + IID == Intrinsic::nvvm_isspacep_global) || + (AS == NVPTXAS::ADDRESS_SPACE_LOCAL && + IID == Intrinsic::nvvm_isspacep_local) || + (AS == NVPTXAS::ADDRESS_SPACE_SHARED && + IID == Intrinsic::nvvm_isspacep_shared) || + (AS == NVPTXAS::ADDRESS_SPACE_CONST && + IID == Intrinsic::nvvm_isspacep_const); + return ConstantInt::get(Ty, ASMatches); + break; + } default: break; } diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h index 815b600fe93a9..d06e2c00ec3f9 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h @@ -16,18 +16,10 @@ #ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXBASEINFO_H #define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXBASEINFO_H +#include "llvm/Support/NVPTXAddrSpace.h" namespace llvm { -enum AddressSpace { - ADDRESS_SPACE_GENERIC = 0, - ADDRESS_SPACE_GLOBAL = 1, - ADDRESS_SPACE_SHARED = 3, - ADDRESS_SPACE_CONST = 4, - ADDRESS_SPACE_LOCAL = 5, - - // NVVM Internal - ADDRESS_SPACE_PARAM = 101 -}; +using namespace NVPTXAS; namespace NVPTXII { enum { diff --git a/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll b/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll new file mode 100644 index 0000000000000..f53ec0120cfb3 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll @@ -0,0 +1,261 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instcombine -mtriple=nvptx64-nvidia-cuda -S | FileCheck %s +target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +; Source data in different AS. +@shared_data = dso_local addrspace(3) global i32 undef, align 4 +@global_data = dso_local addrspace(1) externally_initialized global i32 0, align 4 +@const_data = dso_local addrspace(4) externally_initialized constant i32 3, align 4 + +; Results get stored here. +@gen = dso_local addrspace(1) externally_initialized global i8 0, align 1 +@g1 = dso_local addrspace(1) externally_initialized global i8 0, align 1 +@g2 = dso_local addrspace(1) externally_initialized global i8 0, align 1 +@s1 = dso_local addrspace(1) externally_initialized global i8 0, align 1 +@s2 = dso_local addrspace(1) externally_initialized global i8 0, align 1 +@c1 = dso_local addrspace(1) externally_initialized global i8 0, align 1 +@c2 = dso_local addrspace(1) externally_initialized global i8 0, align 1 +@l = dso_local addrspace(1) externally_initialized global i8 0, align 1 + +declare i1 @llvm.nvvm.isspacep.global(ptr nocapture) +declare i1 @llvm.nvvm.isspacep.shared(ptr nocapture) +declare i1 @llvm.nvvm.isspacep.const(ptr nocapture) +declare i1 @llvm.nvvm.isspacep.local(ptr nocapture) + +define dso_local void @check_global(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %generic_data, ptr addrspace(5) %local_data) local_unnamed_addr { +; CHECK-LABEL: define dso_local void @check_global( +; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENERIC_DATA:%.*]], ptr addrspace(5) [[LOCAL_DATA:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.global(ptr [[GENERIC_DATA]]) +; CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8 +; CHECK-NEXT: store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 +; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 +; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 +; CHECK-NEXT: ret void +; +entry: + ; No constant folding for generic pointers of unknown origin. + %gen0 = tail call i1 @llvm.nvvm.isspacep.global(ptr %generic_data) + %storedv = zext i1 %gen0 to i8 + store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 + + %isg1 = tail call i1 @llvm.nvvm.isspacep.global(ptr addrspacecast (ptr addrspace(1) @global_data to ptr)) + %isg18 = zext i1 %isg1 to i8 + store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 + + %global_data_asc = addrspacecast ptr addrspace(1) @global_data to ptr + %isg2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %global_data_asc) + %isg28 = zext i1 %isg2 to i8 + store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 + + %iss1 = tail call i1 @llvm.nvvm.isspacep.global(ptr addrspacecast (ptr addrspace(3) @shared_data to ptr)) + %iss18 = zext i1 %iss1 to i8 + store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 + + %shared_data_asc = addrspacecast ptr addrspace(3) @shared_data to ptr + %iss2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %shared_data_asc) + %iss28 = zext i1 %iss2 to i8 + store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 + + %isc1 = tail call i1 @llvm.nvvm.isspacep.global(ptr addrspacecast (ptr addrspace(4) @const_data to ptr)) + %isc18 = zext i1 %isc1 to i8 + store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 + + %const_data_asc = addrspacecast ptr addrspace(4) @const_data to ptr + %isc2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %const_data_asc) + %isc28 = zext i1 %isc2 to i8 + store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 + + ; Local data can't ihave a constant address, so we can't have a constant ASC expression + ; We can only use an ASC instruction. + %local_data_asc = addrspacecast ptr addrspace(5) %local_data to ptr + %isl = call i1 @llvm.nvvm.isspacep.global(ptr nonnull %local_data_asc) + %isl8 = zext i1 %isl to i8 + store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 + + ret void +} + +define dso_local void @check_shared(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %generic_data, ptr addrspace(5) %local_data) local_unnamed_addr { +; CHECK-LABEL: define dso_local void @check_shared( +; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENERIC_DATA:%.*]], ptr addrspace(5) [[LOCAL_DATA:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.shared(ptr [[GENERIC_DATA]]) +; CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8 +; CHECK-NEXT: store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 +; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 +; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 +; CHECK-NEXT: ret void +; +entry: + ; No constant folding for generic pointers of unknown origin. + %gen0 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %generic_data) + %storedv = zext i1 %gen0 to i8 + store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 + + %isg1 = tail call i1 @llvm.nvvm.isspacep.shared(ptr addrspacecast (ptr addrspace(1) @global_data to ptr)) + %isg18 = zext i1 %isg1 to i8 + store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 + + %global_data_asc = addrspacecast ptr addrspace(1) @global_data to ptr + %isg2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %global_data_asc) + %isg28 = zext i1 %isg2 to i8 + store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 + + %iss1 = tail call i1 @llvm.nvvm.isspacep.shared(ptr addrspacecast (ptr addrspace(3) @shared_data to ptr)) + %iss18 = zext i1 %iss1 to i8 + store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 + + %shared_data_asc = addrspacecast ptr addrspace(3) @shared_data to ptr + %iss2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %shared_data_asc) + %iss28 = zext i1 %iss2 to i8 + store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 + + %isc1 = tail call i1 @llvm.nvvm.isspacep.shared(ptr addrspacecast (ptr addrspace(4) @const_data to ptr)) + %isc18 = zext i1 %isc1 to i8 + store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 + + %const_data_asc = addrspacecast ptr addrspace(4) @const_data to ptr + %isc2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %const_data_asc) + %isc28 = zext i1 %isc2 to i8 + store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 + + ; Local data can't have a constant address, so we can't have a constant ASC expression + ; We can only use an ASC instruction. + %local_data_asc = addrspacecast ptr addrspace(5) %local_data to ptr + %isl = call i1 @llvm.nvvm.isspacep.shared(ptr nonnull %local_data_asc) + %isl8 = zext i1 %isl to i8 + store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 + + ret void +} + +define dso_local void @check_const(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %generic_data, ptr addrspace(5) %local_data) local_unnamed_addr { +; CHECK-LABEL: define dso_local void @check_const( +; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENERIC_DATA:%.*]], ptr addrspace(5) [[LOCAL_DATA:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.const(ptr [[GENERIC_DATA]]) +; CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8 +; CHECK-NEXT: store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 +; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 +; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 +; CHECK-NEXT: ret void +; +entry: + ; No constant folding for generic pointers of unknown origin. + %gen0 = tail call i1 @llvm.nvvm.isspacep.const(ptr %generic_data) + %storedv = zext i1 %gen0 to i8 + store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 + + %isg1 = tail call i1 @llvm.nvvm.isspacep.const(ptr addrspacecast (ptr addrspace(1) @global_data to ptr)) + %isg18 = zext i1 %isg1 to i8 + store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 + + %global_data_asc = addrspacecast ptr addrspace(1) @global_data to ptr + %isg2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %global_data_asc) + %isg28 = zext i1 %isg2 to i8 + store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 + + %iss1 = tail call i1 @llvm.nvvm.isspacep.const(ptr addrspacecast (ptr addrspace(3) @shared_data to ptr)) + %iss18 = zext i1 %iss1 to i8 + store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 + + %shared_data_asc = addrspacecast ptr addrspace(3) @shared_data to ptr + %iss2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %shared_data_asc) + %iss28 = zext i1 %iss2 to i8 + store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 + + %isc1 = tail call i1 @llvm.nvvm.isspacep.const(ptr addrspacecast (ptr addrspace(4) @const_data to ptr)) + %isc18 = zext i1 %isc1 to i8 + store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 + + %const_data_asc = addrspacecast ptr addrspace(4) @const_data to ptr + %isc2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %const_data_asc) + %isc28 = zext i1 %isc2 to i8 + store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 + + ; Local data can't have a constant address, so we can't have a constant ASC expression + ; We can only use an ASC instruction. + %local_data_asc = addrspacecast ptr addrspace(5) %local_data to ptr + %isl = call i1 @llvm.nvvm.isspacep.const(ptr nonnull %local_data_asc) + %isl8 = zext i1 %isl to i8 + store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 + + ret void +} + +define dso_local void @check_local(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %generic_data, ptr addrspace(5) %local_data) local_unnamed_addr { +; CHECK-LABEL: define dso_local void @check_local( +; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENERIC_DATA:%.*]], ptr addrspace(5) [[LOCAL_DATA:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.local(ptr [[GENERIC_DATA]]) +; CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8 +; CHECK-NEXT: store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 +; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 +; CHECK-NEXT: ret void +; +entry: + ; No constant folding for generic pointers of unknown origin. + %gen0 = tail call i1 @llvm.nvvm.isspacep.local(ptr %generic_data) + %storedv = zext i1 %gen0 to i8 + store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 + + %isg1 = tail call i1 @llvm.nvvm.isspacep.local(ptr addrspacecast (ptr addrspace(1) @global_data to ptr)) + %isg18 = zext i1 %isg1 to i8 + store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 + + %global_data_asc = addrspacecast ptr addrspace(1) @global_data to ptr + %isg2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %global_data_asc) + %isg28 = zext i1 %isg2 to i8 + store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 + + %iss1 = tail call i1 @llvm.nvvm.isspacep.local(ptr addrspacecast (ptr addrspace(3) @shared_data to ptr)) + %iss18 = zext i1 %iss1 to i8 + store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 + + %shared_data_asc = addrspacecast ptr addrspace(3) @shared_data to ptr + %iss2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %shared_data_asc) + %iss28 = zext i1 %iss2 to i8 + store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 + + %isc1 = tail call i1 @llvm.nvvm.isspacep.local(ptr addrspacecast (ptr addrspace(4) @const_data to ptr)) + %isc18 = zext i1 %isc1 to i8 + store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 + + %const_data_asc = addrspacecast ptr addrspace(4) @const_data to ptr + %isc2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %const_data_asc) + %isc28 = zext i1 %isc2 to i8 + store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 + + ; Local data can't have a constant address, so we can't have a constant ASC expression + ; We can only use an ASC instruction. + %local_data_asc = addrspacecast ptr addrspace(5) %local_data to ptr + %isl = call i1 @llvm.nvvm.isspacep.local(ptr nonnull %local_data_asc) + %isl8 = zext i1 %isl to i8 + store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 + + ret void +} + From 48b3e6f69b64a82ce0ff16ed8a69cb87a7ac1424 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Fri, 18 Oct 2024 12:57:25 -0700 Subject: [PATCH 2/7] Clean up names in comments. --- llvm/include/llvm/Support/NVPTXAddrSpace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/Support/NVPTXAddrSpace.h b/llvm/include/llvm/Support/NVPTXAddrSpace.h index 063d2aaffdc57..93eae39e3d230 100644 --- a/llvm/include/llvm/Support/NVPTXAddrSpace.h +++ b/llvm/include/llvm/Support/NVPTXAddrSpace.h @@ -1,4 +1,4 @@ -//===---------------- AMDGPUAddrSpace.h -------------------------*- C++ -*-===// +//===---------------- NVPTXAddrSpace.h -------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // /// \file -/// AMDGPU address space definition +/// NVPTX address space definition /// // //===----------------------------------------------------------------------===// From 401e4f3437e7aeb6a028448b66a07b514900c95b Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Fri, 25 Oct 2024 11:09:22 -0700 Subject: [PATCH 3/7] Cast directly to AddrSpaceCastOperator --- llvm/lib/Analysis/InstructionSimplify.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index b525bc27d72b8..5090e09f20701 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -6376,10 +6376,8 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC) { if (auto *ASC = dyn_cast(Op0)) AS = ASC->getSrcAddressSpace(); - else if (auto *CE = dyn_cast(Op0)) { - if (CE->getOpcode() == Instruction::AddrSpaceCast) - AS = CE->getOperand(0)->getType()->getPointerAddressSpace(); - } + else if (auto *ASCO = dyn_cast(Op0)) + AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace(); } if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC || AS == NVPTXAS::ADDRESS_SPACE_PARAM) From fffb81ed92a28c6f43d34bc6e63fe1953ac2bdbc Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Mon, 28 Oct 2024 14:08:32 -0700 Subject: [PATCH 4/7] Use AS-specific pointers for ASC instruction testing If we use a pointer to a constant, instcombine collapses it into an ASC operator. --- .../Transforms/InstCombine/NVPTX/isspacep.ll | 112 ++++++++++-------- 1 file changed, 64 insertions(+), 48 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll b/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll index f53ec0120cfb3..dedd85e1a8cda 100644 --- a/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll +++ b/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll @@ -23,11 +23,11 @@ declare i1 @llvm.nvvm.isspacep.shared(ptr nocapture) declare i1 @llvm.nvvm.isspacep.const(ptr nocapture) declare i1 @llvm.nvvm.isspacep.local(ptr nocapture) -define dso_local void @check_global(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %generic_data, ptr addrspace(5) %local_data) local_unnamed_addr { +define dso_local void @check_global(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %genp, ; CHECK-LABEL: define dso_local void @check_global( -; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENERIC_DATA:%.*]], ptr addrspace(5) [[LOCAL_DATA:%.*]]) local_unnamed_addr { +; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENP:%.*]], ptr addrspace(1) [[GP:%.*]], ptr addrspace(3) [[SP:%.*]], ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[LP:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.global(ptr [[GENERIC_DATA]]) +; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.global(ptr [[GENP]]) ; CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8 ; CHECK-NEXT: store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 ; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 @@ -39,9 +39,13 @@ define dso_local void @check_global(ptr nocapture noundef readnone %out, ptr noc ; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 ; CHECK-NEXT: ret void ; + ptr addrspace(1) %gp, + ptr addrspace(3) %sp, + ptr addrspace(4) %cp, + ptr addrspace(5) %lp) local_unnamed_addr { entry: ; No constant folding for generic pointers of unknown origin. - %gen0 = tail call i1 @llvm.nvvm.isspacep.global(ptr %generic_data) + %gen0 = tail call i1 @llvm.nvvm.isspacep.global(ptr %genp) %storedv = zext i1 %gen0 to i8 store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 @@ -49,8 +53,8 @@ entry: %isg18 = zext i1 %isg1 to i8 store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 - %global_data_asc = addrspacecast ptr addrspace(1) @global_data to ptr - %isg2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %global_data_asc) + %gp_asc = addrspacecast ptr addrspace(1) %gp to ptr + %isg2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %gp_asc) %isg28 = zext i1 %isg2 to i8 store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 @@ -58,8 +62,8 @@ entry: %iss18 = zext i1 %iss1 to i8 store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 - %shared_data_asc = addrspacecast ptr addrspace(3) @shared_data to ptr - %iss2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %shared_data_asc) + %sp_asc = addrspacecast ptr addrspace(3) %sp to ptr + %iss2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %sp_asc) %iss28 = zext i1 %iss2 to i8 store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 @@ -67,26 +71,26 @@ entry: %isc18 = zext i1 %isc1 to i8 store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 - %const_data_asc = addrspacecast ptr addrspace(4) @const_data to ptr - %isc2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %const_data_asc) + %cp_asc = addrspacecast ptr addrspace(4) %cp to ptr + %isc2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %cp_asc) %isc28 = zext i1 %isc2 to i8 store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 ; Local data can't ihave a constant address, so we can't have a constant ASC expression ; We can only use an ASC instruction. - %local_data_asc = addrspacecast ptr addrspace(5) %local_data to ptr - %isl = call i1 @llvm.nvvm.isspacep.global(ptr nonnull %local_data_asc) + %lp_asc = addrspacecast ptr addrspace(5) %lp to ptr + %isl = call i1 @llvm.nvvm.isspacep.global(ptr nonnull %lp_asc) %isl8 = zext i1 %isl to i8 store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 ret void } -define dso_local void @check_shared(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %generic_data, ptr addrspace(5) %local_data) local_unnamed_addr { +define dso_local void @check_shared(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %genp, ; CHECK-LABEL: define dso_local void @check_shared( -; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENERIC_DATA:%.*]], ptr addrspace(5) [[LOCAL_DATA:%.*]]) local_unnamed_addr { +; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENP:%.*]], ptr addrspace(1) [[GP:%.*]], ptr addrspace(3) [[SP:%.*]], ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[LP:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.shared(ptr [[GENERIC_DATA]]) +; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.shared(ptr [[GENP]]) ; CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8 ; CHECK-NEXT: store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 ; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 @@ -98,9 +102,13 @@ define dso_local void @check_shared(ptr nocapture noundef readnone %out, ptr noc ; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 ; CHECK-NEXT: ret void ; + ptr addrspace(1) %gp, + ptr addrspace(3) %sp, + ptr addrspace(4) %cp, + ptr addrspace(5) %lp) local_unnamed_addr { entry: ; No constant folding for generic pointers of unknown origin. - %gen0 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %generic_data) + %gen0 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %genp) %storedv = zext i1 %gen0 to i8 store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 @@ -108,8 +116,8 @@ entry: %isg18 = zext i1 %isg1 to i8 store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 - %global_data_asc = addrspacecast ptr addrspace(1) @global_data to ptr - %isg2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %global_data_asc) + %gp_asc = addrspacecast ptr addrspace(1) %gp to ptr + %isg2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %gp_asc) %isg28 = zext i1 %isg2 to i8 store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 @@ -117,8 +125,8 @@ entry: %iss18 = zext i1 %iss1 to i8 store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 - %shared_data_asc = addrspacecast ptr addrspace(3) @shared_data to ptr - %iss2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %shared_data_asc) + %sp_asc = addrspacecast ptr addrspace(3) %sp to ptr + %iss2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %sp_asc) %iss28 = zext i1 %iss2 to i8 store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 @@ -126,26 +134,26 @@ entry: %isc18 = zext i1 %isc1 to i8 store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 - %const_data_asc = addrspacecast ptr addrspace(4) @const_data to ptr - %isc2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %const_data_asc) + %cp_asc = addrspacecast ptr addrspace(4) %cp to ptr + %isc2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %cp_asc) %isc28 = zext i1 %isc2 to i8 store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 ; Local data can't have a constant address, so we can't have a constant ASC expression ; We can only use an ASC instruction. - %local_data_asc = addrspacecast ptr addrspace(5) %local_data to ptr - %isl = call i1 @llvm.nvvm.isspacep.shared(ptr nonnull %local_data_asc) + %lp_asc = addrspacecast ptr addrspace(5) %lp to ptr + %isl = call i1 @llvm.nvvm.isspacep.shared(ptr nonnull %lp_asc) %isl8 = zext i1 %isl to i8 store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 ret void } -define dso_local void @check_const(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %generic_data, ptr addrspace(5) %local_data) local_unnamed_addr { +define dso_local void @check_const(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %genp, ; CHECK-LABEL: define dso_local void @check_const( -; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENERIC_DATA:%.*]], ptr addrspace(5) [[LOCAL_DATA:%.*]]) local_unnamed_addr { +; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENP:%.*]], ptr addrspace(1) [[GP:%.*]], ptr addrspace(3) [[SP:%.*]], ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[LP:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.const(ptr [[GENERIC_DATA]]) +; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.const(ptr [[GENP]]) ; CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8 ; CHECK-NEXT: store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 ; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 @@ -157,9 +165,13 @@ define dso_local void @check_const(ptr nocapture noundef readnone %out, ptr noca ; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 ; CHECK-NEXT: ret void ; + ptr addrspace(1) %gp, + ptr addrspace(3) %sp, + ptr addrspace(4) %cp, + ptr addrspace(5) %lp) local_unnamed_addr { entry: ; No constant folding for generic pointers of unknown origin. - %gen0 = tail call i1 @llvm.nvvm.isspacep.const(ptr %generic_data) + %gen0 = tail call i1 @llvm.nvvm.isspacep.const(ptr %genp) %storedv = zext i1 %gen0 to i8 store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 @@ -167,8 +179,8 @@ entry: %isg18 = zext i1 %isg1 to i8 store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 - %global_data_asc = addrspacecast ptr addrspace(1) @global_data to ptr - %isg2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %global_data_asc) + %gp_asc = addrspacecast ptr addrspace(1) %gp to ptr + %isg2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %gp_asc) %isg28 = zext i1 %isg2 to i8 store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 @@ -176,8 +188,8 @@ entry: %iss18 = zext i1 %iss1 to i8 store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 - %shared_data_asc = addrspacecast ptr addrspace(3) @shared_data to ptr - %iss2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %shared_data_asc) + %sp_asc = addrspacecast ptr addrspace(3) %sp to ptr + %iss2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %sp_asc) %iss28 = zext i1 %iss2 to i8 store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 @@ -185,26 +197,26 @@ entry: %isc18 = zext i1 %isc1 to i8 store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 - %const_data_asc = addrspacecast ptr addrspace(4) @const_data to ptr - %isc2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %const_data_asc) + %cp_asc = addrspacecast ptr addrspace(4) %cp to ptr + %isc2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %cp_asc) %isc28 = zext i1 %isc2 to i8 store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 ; Local data can't have a constant address, so we can't have a constant ASC expression ; We can only use an ASC instruction. - %local_data_asc = addrspacecast ptr addrspace(5) %local_data to ptr - %isl = call i1 @llvm.nvvm.isspacep.const(ptr nonnull %local_data_asc) + %lp_asc = addrspacecast ptr addrspace(5) %lp to ptr + %isl = call i1 @llvm.nvvm.isspacep.const(ptr nonnull %lp_asc) %isl8 = zext i1 %isl to i8 store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 ret void } -define dso_local void @check_local(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %generic_data, ptr addrspace(5) %local_data) local_unnamed_addr { +define dso_local void @check_local(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %genp, ; CHECK-LABEL: define dso_local void @check_local( -; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENERIC_DATA:%.*]], ptr addrspace(5) [[LOCAL_DATA:%.*]]) local_unnamed_addr { +; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENP:%.*]], ptr addrspace(1) [[GP:%.*]], ptr addrspace(3) [[SP:%.*]], ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[LP:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.local(ptr [[GENERIC_DATA]]) +; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.local(ptr [[GENP]]) ; CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8 ; CHECK-NEXT: store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 ; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 @@ -216,9 +228,13 @@ define dso_local void @check_local(ptr nocapture noundef readnone %out, ptr noca ; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 ; CHECK-NEXT: ret void ; + ptr addrspace(1) %gp, + ptr addrspace(3) %sp, + ptr addrspace(4) %cp, + ptr addrspace(5) %lp) local_unnamed_addr { entry: ; No constant folding for generic pointers of unknown origin. - %gen0 = tail call i1 @llvm.nvvm.isspacep.local(ptr %generic_data) + %gen0 = tail call i1 @llvm.nvvm.isspacep.local(ptr %genp) %storedv = zext i1 %gen0 to i8 store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 @@ -226,8 +242,8 @@ entry: %isg18 = zext i1 %isg1 to i8 store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 - %global_data_asc = addrspacecast ptr addrspace(1) @global_data to ptr - %isg2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %global_data_asc) + %gp_asc = addrspacecast ptr addrspace(1) %gp to ptr + %isg2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %gp_asc) %isg28 = zext i1 %isg2 to i8 store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 @@ -235,8 +251,8 @@ entry: %iss18 = zext i1 %iss1 to i8 store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 - %shared_data_asc = addrspacecast ptr addrspace(3) @shared_data to ptr - %iss2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %shared_data_asc) + %sp_asc = addrspacecast ptr addrspace(3) %sp to ptr + %iss2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %sp_asc) %iss28 = zext i1 %iss2 to i8 store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 @@ -244,15 +260,15 @@ entry: %isc18 = zext i1 %isc1 to i8 store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 - %const_data_asc = addrspacecast ptr addrspace(4) @const_data to ptr - %isc2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %const_data_asc) + %cp_asc = addrspacecast ptr addrspace(4) %cp to ptr + %isc2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %cp_asc) %isc28 = zext i1 %isc2 to i8 store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 ; Local data can't have a constant address, so we can't have a constant ASC expression ; We can only use an ASC instruction. - %local_data_asc = addrspacecast ptr addrspace(5) %local_data to ptr - %isl = call i1 @llvm.nvvm.isspacep.local(ptr nonnull %local_data_asc) + %lp_asc = addrspacecast ptr addrspace(5) %lp to ptr + %isl = call i1 @llvm.nvvm.isspacep.local(ptr nonnull %lp_asc) %isl8 = zext i1 %isl to i8 store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 From cc934b31d49b320cc3c55ed39b8ee6896daf0aaa Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Mon, 28 Oct 2024 14:05:18 -0700 Subject: [PATCH 5/7] Move isspacep intrinsics processing to NVPTX backend --- llvm/lib/Analysis/InstructionSimplify.cpp | 26 -------- .../Target/NVPTX/NVPTXTargetTransformInfo.cpp | 63 ++++++++++++++++++- 2 files changed, 60 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 5090e09f20701..90a92b9781bad 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -6367,32 +6367,6 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, break; } - case Intrinsic::nvvm_isspacep_global: - case Intrinsic::nvvm_isspacep_local: - case Intrinsic::nvvm_isspacep_shared: - case Intrinsic::nvvm_isspacep_const: { - auto *Ty = F->getReturnType(); - unsigned AS = Op0->getType()->getPointerAddressSpace(); - if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC) { - if (auto *ASC = dyn_cast(Op0)) - AS = ASC->getSrcAddressSpace(); - else if (auto *ASCO = dyn_cast(Op0)) - AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace(); - } - if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC || - AS == NVPTXAS::ADDRESS_SPACE_PARAM) - return nullptr; // Got to check at run-time. - bool ASMatches = (AS == NVPTXAS::ADDRESS_SPACE_GLOBAL && - IID == Intrinsic::nvvm_isspacep_global) || - (AS == NVPTXAS::ADDRESS_SPACE_LOCAL && - IID == Intrinsic::nvvm_isspacep_local) || - (AS == NVPTXAS::ADDRESS_SPACE_SHARED && - IID == Intrinsic::nvvm_isspacep_shared) || - (AS == NVPTXAS::ADDRESS_SPACE_CONST && - IID == Intrinsic::nvvm_isspacep_const); - return ConstantInt::get(Ty, ASMatches); - break; - } default: break; } diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index e35ba25b47880..31087a0054e9f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -15,10 +15,12 @@ #include "llvm/CodeGen/CostTable.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" #include using namespace llvm; @@ -117,7 +119,8 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) { } // Convert NVVM intrinsics to target-generic LLVM code where possible. -static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { +static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC, + IntrinsicInst *II) { // Each NVVM intrinsic we can simplify can be replaced with one of: // // * an LLVM intrinsic, @@ -413,11 +416,65 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { llvm_unreachable("All SpecialCase enumerators should be handled in switch."); } +// Returns an instruction pointer (may be nullptr if we do not know the answer). +// Returns nullopt if `II` is not one of the `isspacep` intrinsics. +static std::optional +handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) { + Value *Op0 = II.getArgOperand(0); + // Returns true/false when we know the answer, nullopt otherwise. + auto CheckASMatch = [](unsigned IID, unsigned AS) -> std::optional { + if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC || + AS == NVPTXAS::ADDRESS_SPACE_PARAM) + return std::nullopt; // Got to check at run-time. + switch (IID) { + case Intrinsic::nvvm_isspacep_global: + return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL; + case Intrinsic::nvvm_isspacep_local: + return AS == NVPTXAS::ADDRESS_SPACE_LOCAL; + case Intrinsic::nvvm_isspacep_shared: + return AS == NVPTXAS::ADDRESS_SPACE_SHARED; + case Intrinsic::nvvm_isspacep_shared_cluster: + // We can't tell shared from shared_cluster at compile time from AS alone, + // but it can't be either is AS is not shared. + return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt + : std::optional{false}; + case Intrinsic::nvvm_isspacep_const: + return AS == NVPTXAS::ADDRESS_SPACE_CONST; + default: + llvm_unreachable("Unexpected intrinsic"); + } + }; + + switch (auto IID = II.getIntrinsicID()) { + case Intrinsic::nvvm_isspacep_global: + case Intrinsic::nvvm_isspacep_local: + case Intrinsic::nvvm_isspacep_shared: + case Intrinsic::nvvm_isspacep_shared_cluster: + case Intrinsic::nvvm_isspacep_const: { + auto *Ty = II.getType(); + unsigned AS = Op0->getType()->getPointerAddressSpace(); + // Peek through ASC to generic AS. + // TODO: we could dig deeper through both ASCs and GEPs. + if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC) + if (auto *ASCO = dyn_cast(Op0)) + AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace(); + + if (std::optional Answer = CheckASMatch(IID, AS)) + return IC.replaceInstUsesWith(II, ConstantInt::get(Ty, *Answer)); + return nullptr; // Don't know the answer, got to check at run time. + } + default: + return std::nullopt; + } +} + std::optional NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { - if (Instruction *I = simplifyNvvmIntrinsic(&II, IC)) { + if (std::optional I = handleSpaceCheckIntrinsics(IC, II)) + return *I; + if (Instruction *I = convertNvvmIntrinsicToLlvm(IC, &II)) return I; - } + return std::nullopt; } From fddfffe0db22f8d5ba5b1f009cdb606ab975b5c7 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Wed, 30 Oct 2024 14:31:17 -0700 Subject: [PATCH 6/7] revert changes to InstructionSimplify.cpp --- llvm/lib/Analysis/InstructionSimplify.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 90a92b9781bad..d08be1e55c853 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -38,12 +38,10 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Statepoint.h" #include "llvm/Support/KnownBits.h" -#include "llvm/Support/NVPTXAddrSpace.h" #include #include using namespace llvm; From 34e800b7b6eceacc6e32c942d145c5ff6aa5192c Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Wed, 30 Oct 2024 16:01:05 -0700 Subject: [PATCH 7/7] Do not touch intrinsic operands until we've checked intrinsic ID. Fixes the crash in MLIR and CUDA compilation when they handle some other nvvm intrinsic w/o arguments. --- llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index 31087a0054e9f..3507573df1869 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -420,7 +420,6 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC, // Returns nullopt if `II` is not one of the `isspacep` intrinsics. static std::optional handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) { - Value *Op0 = II.getArgOperand(0); // Returns true/false when we know the answer, nullopt otherwise. auto CheckASMatch = [](unsigned IID, unsigned AS) -> std::optional { if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC || @@ -451,7 +450,7 @@ handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) { case Intrinsic::nvvm_isspacep_shared: case Intrinsic::nvvm_isspacep_shared_cluster: case Intrinsic::nvvm_isspacep_const: { - auto *Ty = II.getType(); + Value *Op0 = II.getArgOperand(0); unsigned AS = Op0->getType()->getPointerAddressSpace(); // Peek through ASC to generic AS. // TODO: we could dig deeper through both ASCs and GEPs. @@ -460,7 +459,8 @@ handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) { AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace(); if (std::optional Answer = CheckASMatch(IID, AS)) - return IC.replaceInstUsesWith(II, ConstantInt::get(Ty, *Answer)); + return IC.replaceInstUsesWith(II, + ConstantInt::get(II.getType(), *Answer)); return nullptr; // Don't know the answer, got to check at run time. } default: