Skip to content
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
9f3cac4
Enable `InferAddressSpaces` for SPIR-V.
AlexVlx Oct 2, 2024
fcab1dd
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Oct 2, 2024
dc1a5f5
Fix formatting.
AlexVlx Oct 2, 2024
d5483cd
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Oct 6, 2024
a28ff5d
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Oct 11, 2024
31a5ebe
Reduce set of tests.
AlexVlx Oct 11, 2024
a01e1bc
Fix formatting.
AlexVlx Oct 12, 2024
ab1fb66
Fix inclusion ordering.
AlexVlx Oct 12, 2024
168149a
Only enable "fancy" stuff fof amdgcnspirv for now.
AlexVlx Oct 12, 2024
e1e57ad
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Oct 12, 2024
102e886
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Oct 15, 2024
797a80a
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Oct 22, 2024
a7d1467
Remove spurious target check, clarify comment.
AlexVlx Oct 22, 2024
770afb8
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Oct 22, 2024
ef95080
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Nov 4, 2024
cb9d363
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Nov 28, 2024
a707363
Implement feedback.
AlexVlx Nov 28, 2024
a3c88f8
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Nov 28, 2024
fe923f2
Update llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
AlexVlx Nov 28, 2024
c7e34e7
Guard AMDGCN specific predicate implementation.
AlexVlx Dec 4, 2024
845d195
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Dec 4, 2024
b15f7ff
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Mar 2, 2025
ac82484
Update test.
AlexVlx Mar 2, 2025
8657436
Update test.
AlexVlx Mar 2, 2025
ce1922a
Update test.
AlexVlx Mar 2, 2025
2bc152a
Do not use magic constant directly.
AlexVlx Mar 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 27 additions & 35 deletions clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,11 @@
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi(
// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] {
// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4
// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4
// OPT-SPIRV-NEXT: ret void
//
// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel1Pi(
Expand Down Expand Up @@ -126,13 +124,11 @@ __global__ void kernel1(int *x) {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri(
// OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4
// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4
// OPT-SPIRV-NEXT: ret void
//
// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel2Ri(
Expand Down Expand Up @@ -195,7 +191,7 @@ __global__ void kernel2(int &x) {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i(
// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] {
// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4
// OPT-SPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4
Expand Down Expand Up @@ -261,7 +257,7 @@ __global__ void kernel3(__attribute__((address_space(2))) int *x,
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_func void @_Z4funcPi(
// OPT-SPIRV-SAME: ptr addrspace(4) nocapture noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] {
// OPT-SPIRV-SAME: ptr addrspace(4) nocapture noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[X]], align 4
// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
Expand Down Expand Up @@ -343,7 +339,7 @@ struct S {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S(
// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1
Expand Down Expand Up @@ -446,19 +442,17 @@ __global__ void kernel4(struct S s) {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S(
// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[S_COERCE]] to i64
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[TMP1]], align 8
// OPT-SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4
// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1
// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP2]], align 4
// OPT-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 8
// OPT-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8
// OPT-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[TMP4]], align 4
// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00
// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP4]], align 4
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(1) [[S_COERCE]], align 8
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4
// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4
// OPT-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[S_COERCE]], i64 8
// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(1) [[Y]], align 8
// OPT-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP2]], align 4
// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00
// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP2]], align 4
// OPT-SPIRV-NEXT: ret void
//
// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel5P1S(
Expand Down Expand Up @@ -551,7 +545,7 @@ struct T {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T(
// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0
// OPT-SPIRV-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 0
Expand Down Expand Up @@ -631,13 +625,11 @@ __global__ void kernel6(struct T t) {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi(
// OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-SAME: ptr addrspace(1) noalias nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4
// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4
// OPT-SPIRV-NEXT: ret void
//
// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel7Pi(
Expand Down Expand Up @@ -700,7 +692,7 @@ struct SS {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS(
// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/SPIRV/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ add_llvm_target(SPIRVCodeGen
Core
Demangle
GlobalISel
Passes
Scalar
SPIRVAnalysis
MC
SPIRVDesc
Expand Down
91 changes: 91 additions & 0 deletions llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,15 @@
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/InitializePasses.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Passes/OptimizationLevel.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/Transforms/Utils.h"
#include <optional>

Expand Down Expand Up @@ -91,6 +97,88 @@ SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT,
setRequiresStructuredCFG(false);
}

enum AddressSpace {
Function = storageClassToAddressSpace(SPIRV::StorageClass::Function),
CrossWorkgroup =
storageClassToAddressSpace(SPIRV::StorageClass::CrossWorkgroup),
UniformConstant =
storageClassToAddressSpace(SPIRV::StorageClass::UniformConstant),
Workgroup = storageClassToAddressSpace(SPIRV::StorageClass::Workgroup),
Generic = storageClassToAddressSpace(SPIRV::StorageClass::Generic)
};

unsigned SPIRVTargetMachine::getAssumedAddrSpace(const Value *V) const {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move to separate change, not sure this is necessarily valid for spirv

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

UniformConstant is pretty much OCL constant (with a bit of handwavium around initializers being allowed depending on an undefined client API). This is just saying that if you have a load from that, and you're loading a pointer, that pointer can only point to global (CrossWorkgroup), which I think holds here as well because there's no legal way to put a private or a local (shared) pointer in there (if you do it at static init, before a kernel executes, you cannot form those types of addresses, if you do it as the kernel executes it's UB). Or are you worried about cases where global does not include constant?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the routine is ok for a vanilla OpenCL environment but extensions may make it invalid.

const auto *LD = dyn_cast<LoadInst>(V);
if (!LD)
return UINT32_MAX;

// It must be a load from a pointer to Generic.
assert(V->getType()->isPointerTy() &&
V->getType()->getPointerAddressSpace() == AddressSpace::Generic);

const auto *Ptr = LD->getPointerOperand();
if (Ptr->getType()->getPointerAddressSpace() != AddressSpace::UniformConstant)
return UINT32_MAX;
// For a loaded from a pointer to UniformConstant, we can infer CrossWorkgroup
// storage, as this could only have been legally initialised with a
// CrossWorkgroup (aka device) constant pointer.
return AddressSpace::CrossWorkgroup;
}

std::pair<const Value *, unsigned>
SPIRVTargetMachine::getPredicatedAddrSpace(const Value *V) const {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Drop this part from the patch, it's not tested and is questionable enough to do separately

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

using namespace PatternMatch;

if (auto *II = dyn_cast<IntrinsicInst>(V)) {
switch (II->getIntrinsicID()) {
case Intrinsic::amdgcn_is_shared:
return std::pair(II->getArgOperand(0), AddressSpace::Workgroup);
case Intrinsic::amdgcn_is_private:
return std::pair(II->getArgOperand(0), AddressSpace::Function);
default:
break;
}
return std::pair(nullptr, UINT32_MAX);
}
// Check the global pointer predication based on
// (!is_share(p) && !is_private(p)). Note that logic 'and' is commutative and
// the order of 'is_shared' and 'is_private' is not significant.
Value *Ptr;
if (getTargetTriple().getVendor() == Triple::VendorType::AMD &&
match(
const_cast<Value *>(V),
m_c_And(m_Not(m_Intrinsic<Intrinsic::amdgcn_is_shared>(m_Value(Ptr))),
m_Not(m_Intrinsic<Intrinsic::amdgcn_is_private>(
m_Deferred(Ptr))))))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't be looking at the amdgcn intrinsics? Surely spirv has its own operations for this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's AMDGCN flavoured SPIR-V, which'd possibly have these in source; I don't think there's AS predicates in SPIR-V, at least not AFAICS in Clang/LLVM/the spec - happy to add them if they exist, but we'll need both.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I have skimmed SPIRV correctly, it expects invalid addrspacecasts (OpGenericCastToPtrExplicit) to return null. You could implement the same kind of check by looking for icmp ne (addrspacecast x to y), null

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Neither the BE nor the Translator handle that at the moment, and I suspect it's meant for implementing some specific bit of OpenCL (SYCL?) functionality. We use the non-explicit flavours, and those don't return null (and are diagnosed as illegal if they are illegal per spec). This is probably a good way of implementing the predicates / handling this, so thank you for it. Having said that, I reiterate that we have AMDGCN flavoured SPIR-V where the actual AMDGCN predicates would manifest / make sense.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could do the same thing for amdgpu. We implement addrspacecast with the same operations.

This also reminds me, we should have a valid flag on addrspacecast.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh just seeing this comment @AlexVlx

I think that we just need to implement the AS predicates (is_local / is_private & friends) atop OpGenericPtrMemSemantics

is that for AMDGCN or something more general ? If the latter, the spec doesn't offer enough guarantee to do that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking about generic (general) predicates for SPIR-V. AFAICS the spec says this about OpGenericPtrMemSemantics: Result is a valid [Memory Semantics](https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#Memory_Semantics_-id-) which includes mask bits set for the Storage Class for the specific (non-Generic) Storage Class of Pointer. My interpretation (which could be wrong) is that the bits returned in the mask actually indicate the pointee's AS, so the generic predicates would lower to (handwavium alert) OpGenericPtrMemSemantics + bitwise AND.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My interpretation (which could be wrong) is that the bits returned in the mask actually indicate the pointee's AS, so the generic predicates would lower to (handwavium alert) OpGenericPtrMemSemantics + bitwise AND.

The returned value is guaranteed to be a valid combination for the AS but an impl can use the same combination for different AS.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’m not quite sure how to parse this, apologies - what is an implementation in this case? It would be rather odd to have a valid implementation use e.g. setting the WorkGroup bit to denote CrossWorkGroup, would it not? Note I’m only considering SPIR-V, not what a target would decide to lower it to.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is an implementation in this case?

a tool consuming the SPIR-V module like an opencl driver

It would be rather odd to have a valid implementation use e.g. setting the WorkGroup bit to denote CrossWorkGroup, would it not?

it is, but it may not make a difference for all platforms (e.g. CPUs don't typically have a dedicated workgroup memory) and checking what you are dealing can be somehow expensive or complex for no clear benefit down the line.

return std::pair(Ptr, AddressSpace::CrossWorkgroup);

return std::pair(nullptr, UINT32_MAX);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the fancy stuff that should go into a follow up patch to add assume support


bool SPIRVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
if (SrcAS != AddressSpace::Generic && SrcAS != AddressSpace::CrossWorkgroup)
return false;
return DestAS == AddressSpace::Generic ||
DestAS == AddressSpace::CrossWorkgroup;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is separate, I don't think InferAddressSpaces relies on this

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does, please see isNoopPtrIntCastPair in its implementation.


void SPIRVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerCGSCCOptimizerLateEPCallback(
[](CGSCCPassManager &PM, OptimizationLevel Level) {
if (Level == OptimizationLevel::O0)
return;

FunctionPassManager FPM;

// Add infer address spaces pass to the opt pipeline after inlining
// but before SROA to increase SROA opportunities.
FPM.addPass(InferAddressSpacesPass(AddressSpace::Generic));

PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
});
}

namespace {
// SPIR-V Code Generator Pass Configuration Options.
class SPIRVPassConfig : public TargetPassConfig {
Expand Down Expand Up @@ -178,6 +266,9 @@ void SPIRVPassConfig::addIRPasses() {
addPass(createSPIRVStructurizerPass());
}

if (TM.getOptLevel() > CodeGenOptLevel::None)
addPass(createInferAddressSpacesPass(AddressSpace::Generic));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure why this is a pass parameter to InferAddressSpaces, and a TTI hook

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because if one invokes the pass directly via opt there's no way but the TTI query to set Flat/Generic to anything but 0, and because making it explicit at the point of construction rather than relying on that seems somewhat more self documenting.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of curiosity, why do invoke this pass twice: in the middle-end and code-gen?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I remember we had some phase ordering issues where we needed to run this multiple times. I'm not sure what the current status is. We certainly need to run this after inlining


addPass(createSPIRVRegularizerPass());
addPass(createSPIRVPrepareFunctionsPass(TM));
addPass(createSPIRVStripConvergenceIntrinsicsPass());
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/SPIRV/SPIRVTargetMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ class SPIRVTargetMachine : public LLVMTargetMachine {
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}

unsigned getAssumedAddrSpace(const Value *V) const override;
std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const override;
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DstAS) const override;

void registerPassBuilderCallbacks(PassBuilder &PB) override;
};
} // namespace llvm

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ class SPIRVTTIImpl : public BasicTTIImplBase<SPIRVTTIImpl> {
: BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}

unsigned getFlatAddressSpace() const {
return storageClassToAddressSpace(SPIRV::StorageClass::Generic);
}

TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
// SPIR-V natively supports OpBitcount, per 3.53.14 in the spec, as such it
// is reasonable to assume the Op is fast / preferable to the expanded loop.
Expand Down
31 changes: 31 additions & 0 deletions llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces -o - %s | FileCheck %s
; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces -o - %s | FileCheck %s

@c0 = addrspace(2) global ptr undef

; CHECK-LABEL: @generic_ptr_from_constant
; CHECK: addrspacecast ptr addrspace(4) %p to ptr addrspace(1)
; CHECK-NEXT: load float, ptr addrspace(1)
define spir_func float @generic_ptr_from_constant() {
%p = load ptr addrspace(4), ptr addrspace(2) @c0
%v = load float, ptr addrspace(4) %p
ret float %v
}

%struct.S = type { ptr addrspace(4), ptr addrspace(4) }

; CHECK-LABEL: @generic_ptr_from_aggregate_argument
; CHECK: addrspacecast ptr addrspace(4) %p0 to ptr addrspace(1)
; CHECK: addrspacecast ptr addrspace(4) %p1 to ptr addrspace(1)
; CHECK: load i32, ptr addrspace(1)
; CHECK: store float %v1, ptr addrspace(1)
; CHECK: ret
define spir_kernel void @generic_ptr_from_aggregate_argument(ptr addrspace(2) byval(%struct.S) align 8 %0) {
%p0 = load ptr addrspace(4), ptr addrspace(2) %0
%f1 = getelementptr inbounds %struct.S, ptr addrspace(2) %0, i64 0, i32 1
%p1 = load ptr addrspace(4), ptr addrspace(2) %f1
%v0 = load i32, ptr addrspace(4) %p0
%v1 = sitofp i32 %v0 to float
store float %v1, ptr addrspace(4) %p1
ret void
}
Loading
Loading