Skip to content

Commit c6cf5c9

Browse files
authored
Add execution mode for derivatives in compute shaders (microsoft#5939)
This commit adds the execution mode to compute shaders when derivatives are used. I only added a single test because the derivatives are all handled in one place. The choice of execution mode was tested using the implicit LOD instructions.
1 parent ed1502e commit c6cf5c9

File tree

2 files changed

+41
-4
lines changed

2 files changed

+41
-4
lines changed

tools/clang/lib/SPIRV/SpirvEmitter.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11193,8 +11193,14 @@ SpirvEmitter::processIntrinsicF32ToF16(const CallExpr *callExpr) {
1119311193

1119411194
SpirvInstruction *SpirvEmitter::processIntrinsicUsingSpirvInst(
1119511195
const CallExpr *callExpr, spv::Op opcode, bool actPerRowForMatrices) {
11196-
// Certain opcodes are only allowed in pixel shader
11197-
if (!spvContext.isPS())
11196+
// The derivative opcodes are only allowed in pixel shader, or in compute
11197+
// shaderers when the SPV_NV_compute_shader_derivatives is enabled.
11198+
if (!spvContext.isPS()) {
11199+
// For cases where the instructions are known to be invalid, we turn on
11200+
// legalization expecting the invalid use to be optimized away. For compute
11201+
// shaders, we add the execution mode to enable the derivatives. We legalize
11202+
// in this case as well because that is what we did before the extension was
11203+
// used, and we do not want to change previous behaviour too much.
1119811204
switch (opcode) {
1119911205
case spv::Op::OpDPdx:
1120011206
case spv::Op::OpDPdy:
@@ -11205,13 +11211,15 @@ SpirvInstruction *SpirvEmitter::processIntrinsicUsingSpirvInst(
1120511211
case spv::Op::OpFwidth:
1120611212
case spv::Op::OpFwidthFine:
1120711213
case spv::Op::OpFwidthCoarse:
11214+
if (spvContext.isCS())
11215+
addDerivativeGroupExecutionMode();
1120811216
needsLegalization = true;
1120911217
break;
1121011218
default:
11211-
// Only the given opcodes need legalization. Anything else should preserve
11212-
// previous.
11219+
// Only the given opcodes need legalization and the execution mode.
1121311220
break;
1121411221
}
11222+
}
1121511223

1121611224
const auto loc = callExpr->getExprLoc();
1121711225
const auto range = callExpr->getSourceRange();
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// RUN: %dxc -T cs_6_6 -E main -fspv-extension=SPV_NV_compute_shader_derivatives -fcgl %s -spirv 2>&1 | FileCheck %s
2+
3+
// CHECK: OpCapability ComputeDerivativeGroupQuadsNV
4+
// CHECK: OpExtension "SPV_NV_compute_shader_derivatives"
5+
// CHECK: OpExecutionMode %main DerivativeGroupQuadsNV
6+
7+
8+
SamplerState ss : register(s2);
9+
SamplerComparisonState scs;
10+
11+
RWStructuredBuffer<uint> o;
12+
Texture1D <float> t1;
13+
14+
[numthreads(2,2,1)]
15+
void main(uint3 id : SV_GroupThreadID)
16+
{
17+
// CHECK: OpDPdx %float %float_0_5
18+
o[0] = ddx(0.5);
19+
// CHECK: OpDPdxCoarse %float %float_0_5
20+
o[1] = ddx_coarse(0.5);
21+
// CHECK: OpDPdy %float %float_0_5
22+
o[2] = ddy(0.5);
23+
// CHECK: OpDPdyCoarse %float %float_0_5
24+
o[3] = ddy_coarse(0.5);
25+
// CHECK: OpDPdxFine %float %float_0_5
26+
o[4] = ddx_fine(0.5);
27+
// CHECK: OpDPdyFine %float %float_0_5
28+
o[5] = ddy_fine(0.5);
29+
}

0 commit comments

Comments
 (0)