Skip to content

Commit 1598cd8

Browse files
authored
Pix: Shader debug speedup (microsoft#5666)
Two related things: -Move value/store annotations before instruction number annotations so that we can avoid giving instruction numbers to the allocas that the value-to-declare pass adds, since they are only there to implement PIX shader debugging and do not need to be debugged by the PIX end-user. And, otherwise, adding them messes up the instruction numbering for the next part: -Add parameters that allow PIX to limit instrumentation to a smaller range of instruction numbers. This is a crude way to make instrumentation workable for very large (>100k or so instructions) shaders which otherwise will break the GPU driver either via out of memory or via TDRs when the instrumentation is run.
1 parent 94d9a8a commit 1598cd8

File tree

5 files changed

+124
-28
lines changed

5 files changed

+124
-28
lines changed

lib/DxilPIXPasses/DxilAnnotateWithVirtualRegister.cpp

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -133,11 +133,33 @@ bool DxilAnnotateWithVirtualRegister::runOnModule(llvm::Module &M) {
133133

134134
auto instrumentableFunctions = PIXPassHelpers::GetAllInstrumentableFunctions(*m_DM);
135135

136-
for (auto * F : instrumentableFunctions) {
136+
for (auto *F : instrumentableFunctions) {
137+
for (auto &block : F->getBasicBlockList()) {
138+
for (llvm::Instruction &I : block.getInstList()) {
139+
AnnotateValues(&I);
140+
}
141+
}
142+
}
143+
144+
for (auto *F : instrumentableFunctions) {
145+
for (auto &block : F->getBasicBlockList()) {
146+
for (llvm::Instruction &I : block.getInstList()) {
147+
AnnotateStore(&I);
148+
}
149+
}
150+
}
151+
152+
for (auto *F : instrumentableFunctions) {
137153
int InstructionRangeStart = InstNum;
138154
int InstructionRangeEnd = InstNum;
139155
for (auto &block : F->getBasicBlockList()) {
140156
for (llvm::Instruction &I : block.getInstList()) {
157+
// If the instruction is part of the debug value instrumentation added by this pass,
158+
// it doesn't need to be instrumented for the PIX user.
159+
uint32_t unused1, unused2;
160+
if (auto *Alloca = llvm::dyn_cast<llvm::AllocaInst>(&I))
161+
if (PixAllocaReg::FromInst(Alloca, &unused1, &unused2))
162+
continue;
141163
if (!llvm::isa<llvm::DbgDeclareInst>(&I)) {
142164
pix_dxil::PixDxilInstNum::AddMD(M.getContext(), &I, InstNum++);
143165
InstructionRangeEnd = InstNum;
@@ -163,22 +185,6 @@ bool DxilAnnotateWithVirtualRegister::runOnModule(llvm::Module &M) {
163185
*OSOverride << "\nInstructionCount:" << InstNum << "\n";
164186
}
165187

166-
for (auto * F : instrumentableFunctions) {
167-
for (auto &block : F->getBasicBlockList()) {
168-
for (llvm::Instruction &I : block.getInstList()) {
169-
AnnotateValues(&I);
170-
}
171-
}
172-
}
173-
174-
for (auto * F : instrumentableFunctions) {
175-
for (auto &block : F->getBasicBlockList()) {
176-
for (llvm::Instruction &I : block.getInstList()) {
177-
AnnotateStore(&I);
178-
}
179-
}
180-
}
181-
182188
m_DM = nullptr;
183189
return m_uVReg > 0;
184190
}

lib/DxilPIXPasses/DxilDebugInstrumentation.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,8 @@ class DxilDebugInstrumentation : public ModulePass {
212212
unsigned InstanceId;
213213
} VertexShader;
214214
};
215+
unsigned m_FirstInstruction = 0;
216+
unsigned m_LastInstruction = static_cast<unsigned>(-1);
215217

216218
uint64_t m_UAVSize = 1024 * 1024;
217219
struct PerFunctionValues
@@ -290,6 +292,9 @@ class DxilDebugInstrumentation : public ModulePass {
290292
};
291293

292294
void DxilDebugInstrumentation::applyOptions(PassOptions O) {
295+
GetPassOptionUnsigned(O, "FirstInstruction", &m_FirstInstruction, 0);
296+
GetPassOptionUnsigned(O, "LastInstruction", &m_LastInstruction,
297+
static_cast<unsigned>(-1));
293298
GetPassOptionUnsigned(O, "parameter0", &m_Parameters.Parameters[0], 0);
294299
GetPassOptionUnsigned(O, "parameter1", &m_Parameters.Parameters[1], 0);
295300
GetPassOptionUnsigned(O, "parameter2", &m_Parameters.Parameters[2], 0);
@@ -1008,7 +1013,13 @@ bool DxilDebugInstrumentation::RunOnFunction(
10081013
for (inst_iterator I = inst_begin(entryFunction),
10091014
E = inst_end(entryFunction);
10101015
I != E; ++I) {
1011-
AllInstructions.push_back(&*I);
1016+
std::uint32_t InstructionNumber;
1017+
if (pix_dxil::PixDxilInstNum::FromInst(&*I, &InstructionNumber)) {
1018+
if (InstructionNumber < m_FirstInstruction ||
1019+
InstructionNumber >= m_LastInstruction)
1020+
continue;
1021+
AllInstructions.push_back(&*I);
1022+
}
10121023
}
10131024

10141025
// Branchless instrumentation requires taking care of a few things:
@@ -1121,6 +1132,8 @@ bool DxilDebugInstrumentation::RunOnFunction(
11211132
if (!pix_dxil::PixDxilInstNum::FromInst(ValueNPhi.Phi, &InstNum)) {
11221133
continue;
11231134
}
1135+
if (InstNum < m_FirstInstruction || InstNum >= m_LastInstruction)
1136+
continue;
11241137

11251138
BuilderContext BC{M, DM, Ctx, HlslOP, Builder};
11261139
addStepDebugEntryValue(BC, InstNum, ValueNPhi.Val, RegNum,

tools/clang/test/HLSLFileCheck/pix/AnnotateVirtualRegs-Raygen.hlsl

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,13 @@ void ENTRY()
2121
TraceRay(scene, 0 /*rayFlags*/, 0xFF /*rayMask*/, 0 /*sbtRecordOffset*/, 1 /*sbtRecordStride*/, 0 /*missIndex*/, ray, pld);
2222
}
2323

24-
// CHECK: {{.*}} = alloca %struct.RayDesc, align 4, !pix-dxil-inst-num {{.*}}, !pix-alloca-reg [[RDAlloca:![0-9]+]]
25-
// CHECK: {{.*}} = alloca %struct.RayPayload, align 4, !pix-dxil-inst-num {{.*}}, !pix-alloca-reg [[RPAlloca:![0-9]+]]
26-
// CHECK: {{.*}} = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* {{.*}}, i32 0, i32 0, !pix-dxil-inst-num {{.*}}, !pix-dxil-reg [[RDGEP:![0-9]+]]
27-
// CHECK: {{.*}} = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @dx.nothing.a, i32 0, i32 0), !pix-dxil-inst-num {{.*}}, !pix-dxil-reg [[NothGEP:![0-9]+]]
28-
// CHECK: {{.*}} = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* {{.*}}, i32 0, i32 1, !pix-dxil-inst-num {{.*}}, !pix-dxil-reg [[RDGEP2:![0-9]+]]
29-
// CHECK: {{.*}} = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @dx.nothing.a, i32 0, i32 0), !pix-dxil-inst-num {{.*}}, !pix-dxil-reg [[NothGEP2:![0-9]+]]
30-
// CHECK: {{.*}} = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* {{.*}}, i32 0, i32 2, !pix-dxil-inst-num {{.*}}, !pix-dxil-reg [[RDGEP3:![0-9]+]]
31-
// CHECK: {{.*}} = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @dx.nothing.a, i32 0, i32 0), !pix-dxil-inst-num {{.*}}, !pix-dxil-reg [[NothGEP3:![0-9]+]]
24+
// CHECK: {{.*}} = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* {{.*}}, i32 0, i32 0, !pix-dxil-reg [[RDGEP:![0-9]+]], !pix-dxil-inst-num {{.*}}
25+
// CHECK: {{.*}} = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @dx.nothing.a, i32 0, i32 0), !pix-dxil-reg [[NothGEP:![0-9]+]], !pix-dxil-inst-num {{.*}}
26+
// CHECK: {{.*}} = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* {{.*}}, i32 0, i32 1, !pix-dxil-reg [[RDGEP2:![0-9]+]], !pix-dxil-inst-num {{.*}}
27+
// CHECK: {{.*}} = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @dx.nothing.a, i32 0, i32 0), !pix-dxil-reg [[NothGEP2:![0-9]+]], !pix-dxil-inst-num {{.*}}
28+
// CHECK: {{.*}} = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* {{.*}}, i32 0, i32 2, !pix-dxil-reg [[RDGEP3:![0-9]+]], !pix-dxil-inst-num {{.*}}
29+
// CHECK: {{.*}} = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @dx.nothing.a, i32 0, i32 0), !pix-dxil-reg [[NothGEP3:![0-9]+]], !pix-dxil-inst-num {{.*}}
3230

33-
// CHECK-DAG: [[RDAlloca]] = !{i32 1, i32 0, i32 8}
34-
// CHECK-DAG: [[RPAlloca]] = !{i32 1, i32 8, i32 3}
3531
// CHECK-DAG: [[RDGEP]] = !{i32 0, i32 0}
3632
// CHECK-DAG: [[NothGEP]] = !{i32 0, i32 11}
3733
// CHECK-DAG: [[RDGEP2]] = !{i32 0, i32 3}
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// The PIX debug instrumentation pass takes optional arguments that limit the range of instruction numbers that will be instrumented.
2+
// (This is to cope with extremely large shaders, the instrumentation of which will break, either by out-of-memory or by TDRing when run.)
3+
4+
// RUN: %dxc -EFlowControlPS -Tps_6_0 %s | %opt -S -dxil-annotate-with-virtual-regs -hlsl-dxil-debug-instrumentation,FirstInstruction=6,LastInstruction=9 | %FileCheck %s
5+
6+
// The only instrumented instructions should have instruction numbers in the range [6,9):
7+
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %PIX_DebugUAV_Handle, {{.*}}, i32 undef, i32 6
8+
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %PIX_DebugUAV_Handle, {{.*}}, i32 undef, i32 7
9+
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %PIX_DebugUAV_Handle, {{.*}}, i32 undef, i32 8
10+
11+
// Two more stores to finish off the instrumentation for instruction #8:
12+
// CHECK: call void @dx.op.bufferStore.f32
13+
// CHECK: call void @dx.op.bufferStore.i32
14+
15+
// Then no more instrumentation at all:
16+
// CHECK-NOT: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %PIX_DebugUAV_Handle
17+
18+
struct VS_OUTPUT_ENV {
19+
float4 Pos : SV_Position;
20+
float2 Tex : TEXCOORD0;
21+
};
22+
23+
int i32;
24+
float f32;
25+
26+
float4 Vectorize(float f) {
27+
float4 ret;
28+
29+
if (f < 1024) // testbreakpoint0
30+
ret.x = f;
31+
else
32+
ret.x = f + 100;
33+
34+
if (f < 512)
35+
ret.y = f;
36+
else
37+
ret.y = f + 10;
38+
39+
if (f < 2048)
40+
ret.z = f;
41+
else
42+
ret.z = f + 1000;
43+
44+
if (f < 4096)
45+
ret.w = f + f;
46+
else
47+
ret.w = f + 1;
48+
49+
return ret;
50+
}
51+
52+
float4 FlowControlPS(VS_OUTPUT_ENV input) : SV_Target {
53+
float4 ret = {0, 0, 0, 1}; // FirstExecutableLine
54+
switch (i32) {
55+
case 0:
56+
ret = float4(1, 0, 1, 1);
57+
break;
58+
case 32:
59+
ret = Vectorize(f32);
60+
break;
61+
}
62+
63+
if (i32 > 10) {
64+
ret.r += 0.1f;
65+
} else {
66+
ret.g += 0.1f;
67+
}
68+
69+
for (uint i = 0; i < 3; ++i) // testbreakpoint1
70+
{
71+
ret.b += f32 / 1024.f;
72+
}
73+
74+
for (uint j = 0; j < i32 / 8; ++j) {
75+
ret.b += f32 / 1000.f;
76+
}
77+
78+
return ret; // LastExecutableLine
79+
}

utils/hct/hctdb.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2132,6 +2132,8 @@ def add_pass(name, type_name, doc, opts):
21322132
{'n':'checkForDynamicIndexing','t':'bool','c':1}])
21332133
add_pass('hlsl-dxil-debug-instrumentation', 'DxilDebugInstrumentation', 'HLSL DXIL debug instrumentation for PIX', [
21342134
{'n':'UAVSize','t':'int','c':1},
2135+
{'n':'FirstInstruction','t':'int','c':1},
2136+
{'n':'LastInstruction','t':'int','c':1},
21352137
{'n':'parameter0','t':'int','c':1},
21362138
{'n':'parameter1','t':'int','c':1},
21372139
{'n':'parameter2','t':'int','c':1}])

0 commit comments

Comments
 (0)