-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[SPIR-V] Support nonuniformindex
intrsinsic in SPIRV CodeGen.
#162540
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-spir-v Author: Lucie Choi (luciechoi) ChangesSupport
Unit test
RWStructuredBuffer<uint4> StructuredOut[64];
RWBuffer<uint> UnStructuredOut[64];
[numthreads(64,1,1)]
void main(uint3 GTID: SV_GroupThreadID) {
StructuredOut[(NonUniformResourceIndex(GTID.x + 1))][98][0] = 99;
UnStructuredOut[(NonUniformResourceIndex(GTID.x))][96] = 95;
}
Full diff: https://github.com/llvm/llvm-project/pull/162540.diff 2 Files Affected:
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 989950fb8f8b5..ba28b3fe1ee5c 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -316,6 +316,9 @@ class SPIRVInstructionSelector : public InstructionSelector {
bool selectImageWriteIntrinsic(MachineInstr &I) const;
bool selectResourceGetPointer(Register &ResVReg, const SPIRVType *ResType,
MachineInstr &I) const;
+ bool selectResourceNonUniformIndex(Register &ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const;
bool selectModf(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I) const;
bool selectUpdateCounter(Register &ResVReg, const SPIRVType *ResType,
@@ -347,7 +350,7 @@ class SPIRVInstructionSelector : public InstructionSelector {
SPIRV::StorageClass::StorageClass SC,
uint32_t Set, uint32_t Binding,
uint32_t ArraySize, Register IndexReg,
- bool IsNonUniform, StringRef Name,
+ StringRef Name,
MachineIRBuilder MIRBuilder) const;
SPIRVType *widenTypeToVec4(const SPIRVType *Type, MachineInstr &I) const;
bool extractSubvector(Register &ResVReg, const SPIRVType *ResType,
@@ -364,6 +367,9 @@ class SPIRVInstructionSelector : public InstructionSelector {
MachineInstr &I) const;
bool loadHandleBeforePosition(Register &HandleReg, const SPIRVType *ResType,
GIntrinsic &HandleDef, MachineInstr &Pos) const;
+ void recursivelyDecorateChildAsNonUniform(Register &NonUniformReg,
+ const SPIRVType *RegType,
+ MachineInstr &I) const;
};
bool sampledTypeIsSignedInteger(const llvm::Type *HandleType) {
@@ -3465,6 +3471,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
case Intrinsic::spv_discard: {
return selectDiscard(ResVReg, ResType, I);
}
+ case Intrinsic::spv_resource_nonuniformindex: {
+ return selectResourceNonUniformIndex(ResVReg, ResType, I);
+ }
default: {
std::string DiagMsg;
raw_string_ostream OS(DiagMsg);
@@ -3504,7 +3513,6 @@ bool SPIRVInstructionSelector::selectCounterHandleFromBinding(
uint32_t Binding = getIConstVal(Intr.getOperand(3).getReg(), MRI);
uint32_t ArraySize = getIConstVal(MainHandleDef->getOperand(4).getReg(), MRI);
Register IndexReg = MainHandleDef->getOperand(5).getReg();
- const bool IsNonUniform = false;
std::string CounterName =
getStringValueFromReg(MainHandleDef->getOperand(6).getReg(), *MRI) +
".counter";
@@ -3513,7 +3521,7 @@ bool SPIRVInstructionSelector::selectCounterHandleFromBinding(
MachineIRBuilder MIRBuilder(I);
Register CounterVarReg = buildPointerToResource(
GR.getPointeeType(ResType), GR.getPointerStorageClass(ResType), Set,
- Binding, ArraySize, IndexReg, IsNonUniform, CounterName, MIRBuilder);
+ Binding, ArraySize, IndexReg, CounterName, MIRBuilder);
return BuildCOPY(ResVReg, CounterVarReg, I);
}
@@ -3713,6 +3721,58 @@ bool SPIRVInstructionSelector::selectResourceGetPointer(
.constrainAllUses(TII, TRI, RBI);
}
+bool SPIRVInstructionSelector::selectResourceNonUniformIndex(
+ Register &ResVReg, const SPIRVType *ResType, MachineInstr &I) const {
+ Register ObjReg = I.getOperand(2).getReg();
+ if (!BuildCOPY(ResVReg, ObjReg, I))
+ return false;
+
+ buildOpDecorate(ResVReg, I, TII, SPIRV::Decoration::NonUniformEXT, {});
+ // Check for the registers that use the index marked as non-uniform
+ // and recursively mark them as non-uniform.
+ // Per the spec, it's necessary that the final argument used for
+ // load/store/sample/atomic must be decorated, so we need to propagate the
+ // decoration through access chains and copies.
+ // https://docs.vulkan.org/samples/latest/samples/extensions/descriptor_indexing/README.html#_when_to_use_non_uniform_indexing_qualifier
+ recursivelyDecorateChildAsNonUniform(ResVReg, ResType, I);
+ return true;
+}
+
+void SPIRVInstructionSelector::recursivelyDecorateChildAsNonUniform(
+ Register &NonUniformReg, const SPIRVType *RegType, MachineInstr &I) const {
+ std::vector<std::tuple<Register, const SPIRVType *, MachineInstr *>> WorkList;
+ bool isDecorated = false;
+ for (MachineInstr &Use :
+ RegType->getMF()->getRegInfo().use_instructions(NonUniformReg)) {
+ if (Use.getOpcode() != SPIRV::OpDecorate &&
+ Use.getOpcode() != SPIRV::OpAccessChain &&
+ Use.getOpcode() != SPIRV::OpCopyObject &&
+ Use.getOpcode() != SPIRV::OpLoad)
+ continue;
+
+ if (Use.getOpcode() == SPIRV::OpDecorate &&
+ Use.getOperand(1).getImm() == SPIRV::Decoration::NonUniformEXT) {
+ isDecorated = true;
+ continue;
+ }
+
+ Register ResultReg = Use.getOperand(0).getReg();
+ SPIRVType *ResultType = GR.getResultType(ResultReg);
+ WorkList.push_back(std::make_tuple(ResultReg, ResultType, &Use));
+ }
+
+ if (!isDecorated) {
+ buildOpDecorate(NonUniformReg, I, TII, SPIRV::Decoration::NonUniformEXT,
+ {});
+ }
+
+ for (auto &Item : WorkList) {
+ recursivelyDecorateChildAsNonUniform(std::get<0>(Item), std::get<1>(Item),
+ *std::get<2>(Item));
+ }
+ return;
+}
+
bool SPIRVInstructionSelector::extractSubvector(
Register &ResVReg, const SPIRVType *ResType, Register &ReadReg,
MachineInstr &InsertionPoint) const {
@@ -3784,7 +3844,7 @@ bool SPIRVInstructionSelector::selectImageWriteIntrinsic(
Register SPIRVInstructionSelector::buildPointerToResource(
const SPIRVType *SpirvResType, SPIRV::StorageClass::StorageClass SC,
uint32_t Set, uint32_t Binding, uint32_t ArraySize, Register IndexReg,
- bool IsNonUniform, StringRef Name, MachineIRBuilder MIRBuilder) const {
+ StringRef Name, MachineIRBuilder MIRBuilder) const {
const Type *ResType = GR.getTypeForSPIRVType(SpirvResType);
if (ArraySize == 1) {
SPIRVType *PtrType =
@@ -3803,14 +3863,7 @@ Register SPIRVInstructionSelector::buildPointerToResource(
SPIRVType *ResPointerType =
GR.getOrCreateSPIRVPointerType(ResType, MIRBuilder, SC);
-
Register AcReg = MRI->createVirtualRegister(GR.getRegClass(ResPointerType));
- if (IsNonUniform) {
- // It is unclear which value needs to be marked an non-uniform, so both
- // the index and the access changed are decorated as non-uniform.
- buildOpDecorate(IndexReg, MIRBuilder, SPIRV::Decoration::NonUniformEXT, {});
- buildOpDecorate(AcReg, MIRBuilder, SPIRV::Decoration::NonUniformEXT, {});
- }
MIRBuilder.buildInstr(SPIRV::OpAccessChain)
.addDef(AcReg)
@@ -4560,9 +4613,6 @@ bool SPIRVInstructionSelector::loadHandleBeforePosition(
uint32_t Binding = foldImm(HandleDef.getOperand(3), MRI);
uint32_t ArraySize = foldImm(HandleDef.getOperand(4), MRI);
Register IndexReg = HandleDef.getOperand(5).getReg();
- // FIXME: The IsNonUniform flag needs to be set based on resource analysis.
- // https://github.com/llvm/llvm-project/issues/155701
- bool IsNonUniform = false;
std::string Name =
getStringValueFromReg(HandleDef.getOperand(6).getReg(), *MRI);
@@ -4576,13 +4626,8 @@ bool SPIRVInstructionSelector::loadHandleBeforePosition(
SC = GR.getPointerStorageClass(ResType);
}
- Register VarReg =
- buildPointerToResource(VarType, SC, Set, Binding, ArraySize, IndexReg,
- IsNonUniform, Name, MIRBuilder);
-
- if (IsNonUniform)
- buildOpDecorate(HandleReg, HandleDef, TII, SPIRV::Decoration::NonUniformEXT,
- {});
+ Register VarReg = buildPointerToResource(VarType, SC, Set, Binding, ArraySize,
+ IndexReg, Name, MIRBuilder);
// The handle for the buffer is the pointer to the resource. For an image, the
// handle is the image object. So images get an extra load.
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll
index 5e15aab7ddee0..ee9e2b0eded2b 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll
@@ -1,56 +1,38 @@
-; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s
-; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %}
-
-; This test depends on llvm.svp.resource.nonuniformindex support (not yet implemented)
-; https://github.com/llvm/llvm-project/issues/160231
-; XFAIL: *
-
-@.str.b0 = private unnamed_addr constant [3 x i8] c"B0\00", align 1
+; RUN: llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s --match-full-lines
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %}
; CHECK-DAG: OpCapability Shader
; CHECK-DAG: OpCapability ShaderNonUniformEXT
-; CHECK-DAG: OpCapability StorageImageArrayNonUniformIndexing
-; CHECK-DAG: OpCapability Image1D
-; CHECK-NOT: OpCapability
-
-; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3
-; CHECK-DAG: OpDecorate [[Var]] Binding 4
-; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform
-; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform
-; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform
-; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform
-; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform
-; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform
-
-; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0
-; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] 1D 2 0 0 2 R32i {{$}}
-; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]]
-; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3
-; CHECK-DAG: [[One]] = OpConstant [[int]] 1
-; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0{{$}}
-; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]]
-; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]]
-; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant
-
-; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}}
-; CHECK-NEXT: OpLabel
-define void @main() #0 {
-; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]]
-; CHECK: [[ld0]] = OpLoad [[BufferType]] [[ac0]]
- %buffer0 = call target("spirv.Image", i32, 0, 2, 0, 0, 2, 24)
- @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_2_24(
- i32 3, i32 4, i32 3, i32 0, ptr nonnull @.str.b0)
- %ptr0 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_f32_5_2_0_0_2_0t(target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) %buffer0, i32 0)
- store i32 0, ptr %ptr0, align 4
-
-; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]]
-; CHECK: [[ld1]] = OpLoad [[BufferType]] [[ac1]]
- %buffer1 = call target("spirv.Image", i32, 0, 2, 0, 0, 2, 24)
- @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_2_24(
- i32 3, i32 4, i32 3, i32 1, ptr nonnull @.str.b0)
- %ptr1 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_f32_5_2_0_0_2_0t(target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) %buffer1, i32 0)
- store i32 0, ptr %ptr1, align 4
+; CHECK-DAG: OpDecorate {{%[0-9]+}} NonUniformEXT
+; CHECK-DAG: OpDecorate {{%[0-9]+}} NonUniformEXT
+; CHECK-DAG: OpDecorate {{%[0-9]+}} NonUniformEXT
+; CHECK-DAG: OpDecorate {{%[0-9]+}} NonUniformEXT
+; CHECK-DAG: OpDecorate %[[#access1:]] NonUniformEXT
+@StructuredOut.str = private unnamed_addr constant [14 x i8] c"StructuredOut\00", align 1
+; CHECK-DAG: OpDecorate {{%[0-9]+}} NonUniformEXT
+; CHECK-DAG: OpDecorate {{%[0-9]+}} NonUniformEXT
+; CHECK-DAG: OpDecorate {{%[0-9]+}} NonUniformEXT
+; CHECK-DAG: OpDecorate %[[#access2:]] NonUniformEXT
+; CHECK-DAG: OpDecorate %[[#load:]] NonUniformEXT
+@UnStructuredOut.str = private unnamed_addr constant [16 x i8] c"UnStructuredOut\00", align 1
+
+define void @main() local_unnamed_addr #0 {
+entry:
+ %0 = tail call i32 @llvm.spv.thread.id.in.group.i32(i32 0)
+ %add.i = add i32 %0, 1
+ %1 = tail call noundef i32 @llvm.spv.resource.nonuniformindex(i32 %add.i)
+ %2 = tail call target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_a0v4i32_12_1t(i32 0, i32 0, i32 64, i32 %1, ptr nonnull @StructuredOut.str)
+ %3 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v4i32_12_1t(target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1) %2, i32 98)
+ %4 = load <4 x i32>, ptr addrspace(11) %3, align 16
+ %vecins.i = insertelement <4 x i32> %4, i32 99, i64 0
+; CHECK: OpStore %[[#access1]] {{%[0-9]+}} Aligned 16
+ store <4 x i32> %vecins.i, ptr addrspace(11) %3, align 16
+ %5 = tail call noundef i32 @llvm.spv.resource.nonuniformindex(i32 %0)
+ %6 = tail call target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_i32_5_2_0_0_2_33t(i32 1, i32 0, i32 64, i32 %5, ptr nonnull @UnStructuredOut.str)
+ %7 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %6, i32 96)
+; CHECK: %[[#access2:]] = OpAccessChain {{.*}}
+; CHECK: %[[#load]] = OpLoad {{.*}}
+; CHECK: OpImageWrite %[[#load]] {{%[0-9]+}} {{%[0-9]+}}
+ store i32 95, ptr addrspace(11) %7, align 4
ret void
}
-
-attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I like the algorithm. It is simple and clean. However, I think the implementation can be trimmed a bit.
The main thing is the testing. There are many cases we need to consider, to make sure the right capabilities are added. Please add tests that would correspond to an array of cbuffer, buffers, RWBuffers, structuredBuffers, and RWStructruedBuffers. Each of those require different capabilities.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You changed this test from a non uniform index into an array of storage images (RWBuffer) into an array of storage buffer (RWStructuredBuffers). Can you look at the full set of tests that we will need? See #122355.
You cannot generate all of these from HLSL yet. For now add that can be generated from HLSL. Look at the tables in https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst#textures and https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst#constanttexturestructuredbyte-buffers to see the links between HLSL type and the Vulkan type.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for pointing out!
Based on our discussion
- Separated the tests for
RWStructuredBuffer
andRWBuffer
. - Other resource types https://godbolt.org/z/johhnWse1 and https://godbolt.org/z/Gr8oKjxMb aren't yet added to the frontend or have bugs in their implementation, will open up the issues!
RWStructuredBuffer
should haveStorageBufferArrayNonUniformIndexing
extension added (both in dxc and llvm)- The SPIRV instructions for using
Buffer
doesn't pass spirv-val, we need to replaceOpTypeImageRead
withOpTypeImageFetch
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
High level comment/question: adding an intrinic is a solid approach, but I can't stop myself but wonder, if we want to add new intrinsic for each new decoration. There are some other potential alternatives for this:
- existing spirv.Decoration metadata (which might not work great here, as it require some tricky frontend modifications to be emitted in the first place);
- (don't remember if it's done in SPIR-V BE) intrinsics like var/ptr/global annotation - this can be simply emitted by annotation attribute in clang (and can be incapsulated in higher language API implementation).
Those mechanisms have their cons: both metadata and annotation intrinsics can be dropped by optimizer, hence we shouldn't use them when semantics is affected. But this feature seem to be performance-related, so technically we can reuse one of these mechanisms.
This is not a blocker, I just want to align for the future progress (and I assume, clang already generates the intrinsic, right?)
1b59cd1
to
4934de9
Compare
This is related to correctness. If the decoration and corresponding capability is not present, Vulkan drivers will assume indexing is uniform and simply take the value in the first thread when indexing the resource array. This is incorrect behaviour. It is annoying that SPIR-V defaulted to the fast behaviour instead of the conservative behaviour. The mechanism we use has to be preserved through optimization. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Two minor issue. Otherwise this looks good to me.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@s-perron thank you for the explanation. Fine with me then.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/116/builds/19597 Here is the relevant piece of the build log for the reference
|
…#162540) Support `@llvm.spv.resource.nonuniformindex` in SPIRV Codegen. - Add `NonUniformEXT` decoration to the registers marked as `nonuniformindex`, and recursively decorate its child registers (e.g. Copy, AccessChain, Load) that access such index. - `OpCapability ShaderNonUniformEXT` is already added in the code. - [SPV_EXT_descriptor_indexing](https://github.khronos.org/SPIRV-Registry/extensions/EXT/SPV_EXT_descriptor_indexing.html) is skipped because it's added to SPIRV Core in 1.5. ## Unit test - The unit test checks that the register being used in the final Store/Load/Write instruction is decorated, as required by the spec. - The implementation follows [DXC](https://godbolt.org/z/zhqGThcaf) in that it recursively decorates all the child elements until the end. ```hlsl RWStructuredBuffer<uint4> StructuredOut[64]; RWBuffer<uint> UnStructuredOut[64]; [numthreads(64,1,1)] void main(uint3 GTID: SV_GroupThreadID) { StructuredOut[(NonUniformResourceIndex(GTID.x + 1))][98][0] = 99; UnStructuredOut[(NonUniformResourceIndex(GTID.x))][96] = 95; } ``` Resolves llvm#160231, llvm#161852. Verified [offload-test-suite](https://github.com/llvm/offload-test-suite/blob/cfc37840c8ad0d9c08ee900ecbc0b02cc56478ae/test/Feature/ResourceArrays/unbounded-array-nuri.test) started passing for clang.
Support
@llvm.spv.resource.nonuniformindex
in SPIRV Codegen.NonUniformEXT
decoration to the registers marked asnonuniformindex
, and recursively decorate its child registers (e.g. Copy, AccessChain, Load) that access such index.OpCapability ShaderNonUniformEXT
is already added in the code.Unit test
Resolves #160231, #161852.
Verified offload-test-suite started passing for clang.