-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[llvm][opt][Transforms][SPIR-V] Enable InferAddressSpaces for SPIR-V
#110897
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 18 commits
9f3cac4
fcab1dd
dc1a5f5
d5483cd
a28ff5d
31a5ebe
a01e1bc
ab1fb66
168149a
e1e57ad
102e886
797a80a
a7d1467
770afb8
ef95080
cb9d363
a707363
a3c88f8
fe923f2
c7e34e7
845d195
b15f7ff
ac82484
8657436
ce1922a
2bc152a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -52,6 +52,8 @@ add_llvm_target(SPIRVCodeGen | |
| Core | ||
| Demangle | ||
| GlobalISel | ||
| Passes | ||
| Scalar | ||
| SPIRVAnalysis | ||
| MC | ||
| SPIRVDesc | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25,10 +25,16 @@ | |
| #include "llvm/CodeGen/Passes.h" | ||
| #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" | ||
| #include "llvm/CodeGen/TargetPassConfig.h" | ||
| #include "llvm/IR/IntrinsicsAMDGPU.h" | ||
| #include "llvm/IR/PatternMatch.h" | ||
| #include "llvm/InitializePasses.h" | ||
| #include "llvm/MC/TargetRegistry.h" | ||
| #include "llvm/Pass.h" | ||
| #include "llvm/Passes/OptimizationLevel.h" | ||
| #include "llvm/Passes/PassBuilder.h" | ||
| #include "llvm/Target/TargetOptions.h" | ||
| #include "llvm/Transforms/Scalar.h" | ||
| #include "llvm/Transforms/Scalar/InferAddressSpaces.h" | ||
| #include "llvm/Transforms/Scalar/Reg2Mem.h" | ||
| #include "llvm/Transforms/Utils.h" | ||
| #include <optional> | ||
|
|
@@ -92,6 +98,63 @@ SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT, | |
| setRequiresStructuredCFG(false); | ||
| } | ||
|
|
||
| enum AddressSpace { | ||
| Function = storageClassToAddressSpace(SPIRV::StorageClass::Function), | ||
| CrossWorkgroup = | ||
| storageClassToAddressSpace(SPIRV::StorageClass::CrossWorkgroup), | ||
| UniformConstant = | ||
| storageClassToAddressSpace(SPIRV::StorageClass::UniformConstant), | ||
| Workgroup = storageClassToAddressSpace(SPIRV::StorageClass::Workgroup), | ||
| Generic = storageClassToAddressSpace(SPIRV::StorageClass::Generic) | ||
| }; | ||
|
|
||
| unsigned SPIRVTargetMachine::getAssumedAddrSpace(const Value *V) const { | ||
| // TODO: we only enable this for AMDGCN flavoured SPIR-V, where we know it to | ||
| // be correct; this might be relaxed in the future. | ||
| if (getTargetTriple().getVendor() != Triple::VendorType::AMD) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please move this whole hook to a separate PR. I also do not think we should have any vendor checks |
||
| return UINT32_MAX; | ||
|
||
|
|
||
| const auto *LD = dyn_cast<LoadInst>(V); | ||
| if (!LD) | ||
| return UINT32_MAX; | ||
|
|
||
| // It must be a load from a pointer to Generic. | ||
| assert(V->getType()->isPointerTy() && | ||
| V->getType()->getPointerAddressSpace() == AddressSpace::Generic); | ||
|
|
||
| const auto *Ptr = LD->getPointerOperand(); | ||
| if (Ptr->getType()->getPointerAddressSpace() != AddressSpace::UniformConstant) | ||
| return UINT32_MAX; | ||
| // For a loaded from a pointer to UniformConstant, we can infer CrossWorkgroup | ||
AlexVlx marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| // storage, as this could only have been legally initialised with a | ||
| // CrossWorkgroup (aka device) constant pointer. | ||
| return AddressSpace::CrossWorkgroup; | ||
| } | ||
|
|
||
| bool SPIRVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS, | ||
| unsigned DestAS) const { | ||
| if (SrcAS != AddressSpace::Generic && SrcAS != AddressSpace::CrossWorkgroup) | ||
| return false; | ||
| return DestAS == AddressSpace::Generic || | ||
AlexVlx marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| DestAS == AddressSpace::CrossWorkgroup; | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is separate, I don't think InferAddressSpaces relies on this
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It does, please see |
||
|
|
||
| void SPIRVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { | ||
| PB.registerCGSCCOptimizerLateEPCallback( | ||
| [](CGSCCPassManager &PM, OptimizationLevel Level) { | ||
| if (Level == OptimizationLevel::O0) | ||
| return; | ||
|
|
||
| FunctionPassManager FPM; | ||
|
|
||
| // Add infer address spaces pass to the opt pipeline after inlining | ||
| // but before SROA to increase SROA opportunities. | ||
| FPM.addPass(InferAddressSpacesPass(AddressSpace::Generic)); | ||
|
|
||
| PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); | ||
| }); | ||
| } | ||
|
|
||
| namespace { | ||
| // SPIR-V Code Generator Pass Configuration Options. | ||
| class SPIRVPassConfig : public TargetPassConfig { | ||
|
|
@@ -198,6 +261,9 @@ void SPIRVPassConfig::addIRPasses() { | |
| addPass(createPromoteMemoryToRegisterPass()); | ||
| } | ||
|
|
||
| if (TM.getOptLevel() > CodeGenOptLevel::None) | ||
| addPass(createInferAddressSpacesPass(AddressSpace::Generic)); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure why this is a pass parameter to InferAddressSpaces, and a TTI hook
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because if one invokes the pass directly via opt there's no way but the TTI query to set Flat/Generic to anything but 0, and because making it explicit at the point of construction rather than relying on that seems somewhat more self documenting.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Out of curiosity, why do invoke this pass twice: in the middle-end and code-gen?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I remember we had some phase ordering issues where we needed to run this multiple times. I'm not sure what the current status is. We certainly need to run this after inlining |
||
|
|
||
| addPass(createSPIRVRegularizerPass()); | ||
| addPass(createSPIRVPrepareFunctionsPass(TM)); | ||
| addPass(createSPIRVStripConvergenceIntrinsicsPass()); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| ; RUN: opt -S -mtriple=spirv64-amd-amdhsa -passes=infer-address-spaces -o - %s | FileCheck %s | ||
|
|
||
| @c0 = addrspace(2) global ptr undef | ||
|
|
||
| ; CHECK-LABEL: @generic_ptr_from_constant | ||
| ; CHECK: addrspacecast ptr addrspace(4) %p to ptr addrspace(1) | ||
| ; CHECK-NEXT: load float, ptr addrspace(1) | ||
| define spir_func float @generic_ptr_from_constant() { | ||
| %p = load ptr addrspace(4), ptr addrspace(2) @c0 | ||
| %v = load float, ptr addrspace(4) %p | ||
| ret float %v | ||
| } | ||
|
|
||
| %struct.S = type { ptr addrspace(4), ptr addrspace(4) } | ||
|
|
||
| ; CHECK-LABEL: @generic_ptr_from_aggregate_argument | ||
| ; CHECK: addrspacecast ptr addrspace(4) %p0 to ptr addrspace(1) | ||
| ; CHECK: addrspacecast ptr addrspace(4) %p1 to ptr addrspace(1) | ||
| ; CHECK: load i32, ptr addrspace(1) | ||
| ; CHECK: store float %v1, ptr addrspace(1) | ||
| ; CHECK: ret | ||
| define spir_kernel void @generic_ptr_from_aggregate_argument(ptr addrspace(2) byval(%struct.S) align 8 %0) { | ||
| %p0 = load ptr addrspace(4), ptr addrspace(2) %0 | ||
| %f1 = getelementptr inbounds %struct.S, ptr addrspace(2) %0, i64 0, i32 1 | ||
| %p1 = load ptr addrspace(4), ptr addrspace(2) %f1 | ||
| %v0 = load i32, ptr addrspace(4) %p0 | ||
| %v1 = sitofp i32 %v0 to float | ||
| store float %v1, ptr addrspace(4) %p1 | ||
| ret void | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Move to separate change, not sure this is necessarily valid for spirv
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
UniformConstantis pretty much OCLconstant(with a bit of handwavium around initializers being allowed depending on an undefined client API). This is just saying that if you have a load from that, and you're loading a pointer, that pointer can only point to global (CrossWorkgroup), which I think holds here as well because there's no legal way to put a private or a local (shared) pointer in there (if you do it at static init, before a kernel executes, you cannot form those types of addresses, if you do it as the kernel executes it's UB). Or are you worried about cases where global does not include constant?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the routine is ok for a vanilla OpenCL environment but extensions may make it invalid.