Skip to content
Closed
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
9f3cac4
Enable `InferAddressSpaces` for SPIR-V.
AlexVlx Oct 2, 2024
fcab1dd
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Oct 2, 2024
dc1a5f5
Fix formatting.
AlexVlx Oct 2, 2024
d5483cd
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Oct 6, 2024
a28ff5d
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Oct 11, 2024
31a5ebe
Reduce set of tests.
AlexVlx Oct 11, 2024
a01e1bc
Fix formatting.
AlexVlx Oct 12, 2024
ab1fb66
Fix inclusion ordering.
AlexVlx Oct 12, 2024
168149a
Only enable "fancy" stuff fof amdgcnspirv for now.
AlexVlx Oct 12, 2024
e1e57ad
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Oct 12, 2024
102e886
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Oct 15, 2024
797a80a
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Oct 22, 2024
a7d1467
Remove spurious target check, clarify comment.
AlexVlx Oct 22, 2024
770afb8
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Oct 22, 2024
ef95080
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Nov 4, 2024
cb9d363
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Nov 28, 2024
a707363
Implement feedback.
AlexVlx Nov 28, 2024
a3c88f8
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Nov 28, 2024
fe923f2
Update llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
AlexVlx Nov 28, 2024
c7e34e7
Guard AMDGCN specific predicate implementation.
AlexVlx Dec 4, 2024
845d195
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Dec 4, 2024
b15f7ff
Merge branch 'main' of https://github.com/llvm/llvm-project into infe…
AlexVlx Mar 2, 2025
ac82484
Update test.
AlexVlx Mar 2, 2025
8657436
Update test.
AlexVlx Mar 2, 2025
ce1922a
Update test.
AlexVlx Mar 2, 2025
2bc152a
Do not use magic constant directly.
AlexVlx Mar 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 26 additions & 34 deletions clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,11 @@
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi(
// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] {
// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4
// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4
// OPT-SPIRV-NEXT: ret void
//
// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel1Pi(
Expand Down Expand Up @@ -126,13 +124,11 @@ __global__ void kernel1(int *x) {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri(
// OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4
// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4
// OPT-SPIRV-NEXT: ret void
//
// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel2Ri(
Expand Down Expand Up @@ -261,7 +257,7 @@ __global__ void kernel3(__attribute__((address_space(2))) int *x,
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_func void @_Z4funcPi(
// OPT-SPIRV-SAME: ptr addrspace(4) nocapture noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] {
// OPT-SPIRV-SAME: ptr addrspace(4) nocapture noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[X]], align 4
// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
Expand Down Expand Up @@ -343,7 +339,7 @@ struct S {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S(
// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1
Expand Down Expand Up @@ -446,19 +442,17 @@ __global__ void kernel4(struct S s) {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S(
// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[S_COERCE]] to i64
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[TMP1]], align 8
// OPT-SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4
// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1
// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP2]], align 4
// OPT-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 8
// OPT-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8
// OPT-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[TMP4]], align 4
// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00
// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP4]], align 4
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(1) [[S_COERCE]], align 8
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4
// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4
// OPT-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[S_COERCE]], i64 8
// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(1) [[Y]], align 8
// OPT-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP2]], align 4
// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00
// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP2]], align 4
// OPT-SPIRV-NEXT: ret void
//
// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel5P1S(
Expand Down Expand Up @@ -551,7 +545,7 @@ struct T {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T(
// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0
// OPT-SPIRV-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 0
Expand Down Expand Up @@ -631,13 +625,11 @@ __global__ void kernel6(struct T t) {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi(
// OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-SAME: ptr addrspace(1) noalias nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4
// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4
// OPT-SPIRV-NEXT: ret void
//
// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel7Pi(
Expand Down Expand Up @@ -700,7 +692,7 @@ struct SS {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS(
// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/SPIRV/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ add_llvm_target(SPIRVCodeGen
Core
Demangle
GlobalISel
Passes
Scalar
SPIRVAnalysis
MC
SPIRVDesc
Expand Down
66 changes: 66 additions & 0 deletions llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,16 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Passes/OptimizationLevel.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/Transforms/Scalar/Reg2Mem.h"
#include "llvm/Transforms/Utils.h"
#include <optional>
Expand Down Expand Up @@ -92,6 +98,63 @@ SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT,
setRequiresStructuredCFG(false);
}

enum AddressSpace {
Function = storageClassToAddressSpace(SPIRV::StorageClass::Function),
CrossWorkgroup =
storageClassToAddressSpace(SPIRV::StorageClass::CrossWorkgroup),
UniformConstant =
storageClassToAddressSpace(SPIRV::StorageClass::UniformConstant),
Workgroup = storageClassToAddressSpace(SPIRV::StorageClass::Workgroup),
Generic = storageClassToAddressSpace(SPIRV::StorageClass::Generic)
};

unsigned SPIRVTargetMachine::getAssumedAddrSpace(const Value *V) const {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move to separate change, not sure this is necessarily valid for spirv

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

UniformConstant is pretty much OCL constant (with a bit of handwavium around initializers being allowed depending on an undefined client API). This is just saying that if you have a load from that, and you're loading a pointer, that pointer can only point to global (CrossWorkgroup), which I think holds here as well because there's no legal way to put a private or a local (shared) pointer in there (if you do it at static init, before a kernel executes, you cannot form those types of addresses, if you do it as the kernel executes it's UB). Or are you worried about cases where global does not include constant?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the routine is ok for a vanilla OpenCL environment but extensions may make it invalid.

// TODO: we only enable this for AMDGCN flavoured SPIR-V, where we know it to
// be correct; this might be relaxed in the future.
if (getTargetTriple().getVendor() != Triple::VendorType::AMD)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please move this whole hook to a separate PR. I also do not think we should have any vendor checks

return UINT32_MAX;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add AddressSpace::Unknown/ AddressSpace::Invalid instead of using magic constant or at least document the meaning of UINT32_MAX?


const auto *LD = dyn_cast<LoadInst>(V);
if (!LD)
return UINT32_MAX;

// It must be a load from a pointer to Generic.
assert(V->getType()->isPointerTy() &&
V->getType()->getPointerAddressSpace() == AddressSpace::Generic);

const auto *Ptr = LD->getPointerOperand();
if (Ptr->getType()->getPointerAddressSpace() != AddressSpace::UniformConstant)
return UINT32_MAX;
// For a loaded from a pointer to UniformConstant, we can infer CrossWorkgroup
// storage, as this could only have been legally initialised with a
// CrossWorkgroup (aka device) constant pointer.
return AddressSpace::CrossWorkgroup;
}

bool SPIRVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
if (SrcAS != AddressSpace::Generic && SrcAS != AddressSpace::CrossWorkgroup)
return false;
return DestAS == AddressSpace::Generic ||
DestAS == AddressSpace::CrossWorkgroup;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is separate, I don't think InferAddressSpaces relies on this

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does, please see isNoopPtrIntCastPair in its implementation.


void SPIRVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerCGSCCOptimizerLateEPCallback(
[](CGSCCPassManager &PM, OptimizationLevel Level) {
if (Level == OptimizationLevel::O0)
return;

FunctionPassManager FPM;

// Add infer address spaces pass to the opt pipeline after inlining
// but before SROA to increase SROA opportunities.
FPM.addPass(InferAddressSpacesPass(AddressSpace::Generic));

PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
});
}

namespace {
// SPIR-V Code Generator Pass Configuration Options.
class SPIRVPassConfig : public TargetPassConfig {
Expand Down Expand Up @@ -198,6 +261,9 @@ void SPIRVPassConfig::addIRPasses() {
addPass(createPromoteMemoryToRegisterPass());
}

if (TM.getOptLevel() > CodeGenOptLevel::None)
addPass(createInferAddressSpacesPass(AddressSpace::Generic));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure why this is a pass parameter to InferAddressSpaces, and a TTI hook

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because if one invokes the pass directly via opt there's no way but the TTI query to set Flat/Generic to anything but 0, and because making it explicit at the point of construction rather than relying on that seems somewhat more self documenting.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of curiosity, why do invoke this pass twice: in the middle-end and code-gen?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I remember we had some phase ordering issues where we needed to run this multiple times. I'm not sure what the current status is. We certainly need to run this after inlining


addPass(createSPIRVRegularizerPass());
addPass(createSPIRVPrepareFunctionsPass(TM));
addPass(createSPIRVStripConvergenceIntrinsicsPass());
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/SPIRV/SPIRVTargetMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ class SPIRVTargetMachine : public CodeGenTargetMachineImpl {
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}

unsigned getAssumedAddrSpace(const Value *V) const override;
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DstAS) const override;

void registerPassBuilderCallbacks(PassBuilder &PB) override;
};
} // namespace llvm

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ class SPIRVTTIImpl : public BasicTTIImplBase<SPIRVTTIImpl> {
: BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}

unsigned getFlatAddressSpace() const {
return storageClassToAddressSpace(SPIRV::StorageClass::Generic);
}

TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
// SPIR-V natively supports OpBitcount, per 3.53.14 in the spec, as such it
// is reasonable to assume the Op is fast / preferable to the expanded loop.
Expand Down
30 changes: 30 additions & 0 deletions llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
; RUN: opt -S -mtriple=spirv64-amd-amdhsa -passes=infer-address-spaces -o - %s | FileCheck %s

@c0 = addrspace(2) global ptr undef

; CHECK-LABEL: @generic_ptr_from_constant
; CHECK: addrspacecast ptr addrspace(4) %p to ptr addrspace(1)
; CHECK-NEXT: load float, ptr addrspace(1)
define spir_func float @generic_ptr_from_constant() {
%p = load ptr addrspace(4), ptr addrspace(2) @c0
%v = load float, ptr addrspace(4) %p
ret float %v
}

%struct.S = type { ptr addrspace(4), ptr addrspace(4) }

; CHECK-LABEL: @generic_ptr_from_aggregate_argument
; CHECK: addrspacecast ptr addrspace(4) %p0 to ptr addrspace(1)
; CHECK: addrspacecast ptr addrspace(4) %p1 to ptr addrspace(1)
; CHECK: load i32, ptr addrspace(1)
; CHECK: store float %v1, ptr addrspace(1)
; CHECK: ret
define spir_kernel void @generic_ptr_from_aggregate_argument(ptr addrspace(2) byval(%struct.S) align 8 %0) {
%p0 = load ptr addrspace(4), ptr addrspace(2) %0
%f1 = getelementptr inbounds %struct.S, ptr addrspace(2) %0, i64 0, i32 1
%p1 = load ptr addrspace(4), ptr addrspace(2) %f1
%v0 = load i32, ptr addrspace(4) %p0
%v1 = sitofp i32 %v0 to float
store float %v1, ptr addrspace(4) %p1
ret void
}
Loading
Loading