Skip to content

Commit 446da19

Browse files
authored
PIX: Optimize debug instrumentation for fewest emitted instructions (microsoft#6281)
This change switches from instrumentation per instruction to instrumentation per basic block. Furthermore, not every instruction in a basic block needs to write debug data to the output UAV- it's enough to know that the basic block was entered, as long as the calling application can figure out what instructions were in that block. To support that knowledge, the pass now emits a text "precis" of each basic block. Also, the previous branchless UAV bounds enforcement was replaced with something similar that emits fewer instructions at the cost of a larger UAV. This tradeoff is WELL worth it. Additionally, the debug pass used to add extra blocks in order to solidify the arguments to phi instructions. This work was unnecessary, and added a lot of complexity to the resulting instrumented shader. The debugger application is only interested in the value of the phi itself and the actual value produced via the actual preceding edge.   Full details are in the comments in the code. This change reduces driver-side compilation overhead from "overnight" to 2 minutes on a 160k-instruction shader.
1 parent 627e400 commit 446da19

16 files changed

+799
-457
lines changed

include/dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,15 @@ static constexpr uint32_t ID = 3;
2828

2929
void AddMD(llvm::LLVMContext &Ctx, llvm::Instruction *pI,
3030
std::uint32_t InstNum);
31-
bool FromInst(llvm::Instruction *pI, std::uint32_t *pInstNum);
31+
bool FromInst(llvm::Instruction const *pI, std::uint32_t *pInstNum);
3232
} // namespace PixDxilInstNum
3333

3434
namespace PixDxilReg {
3535
static constexpr char MDName[] = "pix-dxil-reg";
3636
static constexpr uint32_t ID = 0;
3737

3838
void AddMD(llvm::LLVMContext &Ctx, llvm::Instruction *pI, std::uint32_t RegNum);
39-
bool FromInst(llvm::Instruction *pI, std::uint32_t *pRegNum);
39+
bool FromInst(llvm::Instruction const *pI, std::uint32_t *pRegNum);
4040
} // namespace PixDxilReg
4141

4242
namespace PixAllocaReg {
@@ -45,7 +45,7 @@ static constexpr uint32_t ID = 1;
4545

4646
void AddMD(llvm::LLVMContext &Ctx, llvm::AllocaInst *pAlloca,
4747
std::uint32_t RegNum, std::uint32_t Count);
48-
bool FromInst(llvm::AllocaInst *pAlloca, std::uint32_t *pRegBase,
48+
bool FromInst(llvm::AllocaInst const *pAlloca, std::uint32_t *pRegBase,
4949
std::uint32_t *pRegSize);
5050
} // namespace PixAllocaReg
5151

lib/DxilPIXPasses/DxilDbgValueToDbgDeclare.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -989,15 +989,6 @@ void DxilDbgValueToDbgDeclare::handleDbgValue(llvm::Module &M,
989989
}
990990
}
991991

992-
class ScopedInstruction {
993-
llvm::Instruction *m_Instruction;
994-
995-
public:
996-
ScopedInstruction(llvm::Instruction *I) : m_Instruction(I) {}
997-
~ScopedInstruction() { delete m_Instruction; }
998-
llvm::Instruction *Get() const { return m_Instruction; }
999-
};
1000-
1001992
struct GlobalVariableAndStorage {
1002993
llvm::DIGlobalVariable *DIGV;
1003994
OffsetInBits Offset;

lib/DxilPIXPasses/DxilDebugInstrumentation.cpp

Lines changed: 608 additions & 264 deletions
Large diffs are not rendered by default.

lib/DxilPIXPasses/DxilPIXVirtualRegisters.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ void pix_dxil::PixDxilInstNum::AddMD(llvm::LLVMContext &Ctx,
3333
llvm::ConstantAsMetadata::get(B.getInt32(InstNum))}));
3434
}
3535

36-
bool pix_dxil::PixDxilInstNum::FromInst(llvm::Instruction *pI,
36+
bool pix_dxil::PixDxilInstNum::FromInst(llvm::Instruction const *pI,
3737
std::uint32_t *pInstNum) {
3838
*pInstNum = 0;
3939

@@ -73,7 +73,7 @@ void pix_dxil::PixDxilReg::AddMD(llvm::LLVMContext &Ctx, llvm::Instruction *pI,
7373
llvm::ConstantAsMetadata::get(B.getInt32(RegNum))}));
7474
}
7575

76-
bool pix_dxil::PixDxilReg::FromInst(llvm::Instruction *pI,
76+
bool pix_dxil::PixDxilReg::FromInst(llvm::Instruction const *pI,
7777
std::uint32_t *pRegNum) {
7878
*pRegNum = 0;
7979

@@ -141,7 +141,7 @@ void pix_dxil::PixAllocaReg::AddMD(llvm::LLVMContext &Ctx,
141141
llvm::ConstantAsMetadata::get(B.getInt32(Count))}));
142142
}
143143

144-
bool pix_dxil::PixAllocaReg::FromInst(llvm::AllocaInst *pAlloca,
144+
bool pix_dxil::PixAllocaReg::FromInst(llvm::AllocaInst const *pAlloca,
145145
std::uint32_t *pRegBase,
146146
std::uint32_t *pRegSize) {
147147
*pRegBase = 0;

lib/DxilPIXPasses/PixPassHelpers.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,10 @@ using namespace llvm;
3737
using namespace hlsl;
3838

3939
namespace PIXPassHelpers {
40-
bool IsAllocateRayQueryInstruction(llvm::Value *Val) {
40+
bool IsAllocateRayQueryInstruction(llvm::Value const *Val) {
4141
if (Val != nullptr) {
42-
if (llvm::Instruction *Inst = llvm::dyn_cast<llvm::Instruction>(Val)) {
42+
if (llvm::Instruction const *Inst =
43+
llvm::dyn_cast<llvm::Instruction>(Val)) {
4344
return hlsl::OP::IsDxilOpFuncCallInst(Inst,
4445
hlsl::OP::OpCode::AllocateRayQuery);
4546
}

lib/DxilPIXPasses/PixPassHelpers.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,17 @@
2020
#endif
2121

2222
namespace PIXPassHelpers {
23-
bool IsAllocateRayQueryInstruction(llvm::Value *Val);
23+
24+
class ScopedInstruction {
25+
llvm::Instruction *m_Instruction;
26+
27+
public:
28+
ScopedInstruction(llvm::Instruction *I) : m_Instruction(I) {}
29+
~ScopedInstruction() { delete m_Instruction; }
30+
llvm::Instruction *Get() const { return m_Instruction; }
31+
};
32+
33+
bool IsAllocateRayQueryInstruction(llvm::Value const *Val);
2434
llvm::CallInst *CreateUAV(hlsl::DxilModule &DM, llvm::IRBuilder<> &Builder,
2535
unsigned int registerId, const char *name);
2636
llvm::CallInst *CreateHandleForResource(hlsl::DxilModule &DM,

tools/clang/test/HLSLFileCheck/pix/DebugBasic.hlsl

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-debug-instrumentation | %FileCheck %s
1+
// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-debug-instrumentation,UAVSize=128 | %FileCheck %s
22

33
// Check that the basic starting header is present:
44

@@ -10,17 +10,18 @@
1010
// CHECK: %CompareToX = icmp eq i32 %XIndex, 0
1111
// CHECK: %CompareToY = icmp eq i32 %YIndex, 0
1212
// CHECK: %ComparePos = and i1 %CompareToX, %CompareToY
13-
// CHECK: %OffsetMultiplicand = zext i1 %ComparePos to i32
14-
// CHECK: %ComplementOfMultiplicand = sub i32 1, %OffsetMultiplicand
15-
// CHECK: %OffsetAddend = mul i32 983040, %ComplementOfMultiplicand
16-
// CHECK: %IncrementForThisInvocation = mul i32 8, %OffsetMultiplicand
17-
18-
// Check the first instruction was instrumented:
19-
// CHECK: %UAVIncResult = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle, i32 0
20-
// CHECK: %MaskedForUAVLimit = and i32 %UAVIncResult, 983039
21-
// CHECK: %MultipliedForInterest = mul i32 %MaskedForUAVLimit, %OffsetMultiplicand
22-
// CHECK: %AddedForInterest = add i32 %MultipliedForInterest, %OffsetAddend
23-
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %PIX_DebugUAV_Handle, i32 %AddedForInterest
13+
14+
15+
// Check for branches-for-interest and AND value and counter location for a UAV size of 128
16+
// CHECK: br i1 %ComparePos, label %PIXInterestingBlock, label %PIXNonInterestingBlock
17+
// CHECK: %PIXOffsetOr = phi i32 [ 0, %PIXInterestingBlock ], [ 64, %PIXNonInterestingBlock ]
18+
// CHECK: %PIXCounterLocation = phi i32 [ 63, %PIXInterestingBlock ], [ 127, %PIXNonInterestingBlock ]
19+
20+
// Check the first block header was emitted: (increment, AND + OR)
21+
// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle, i32 0
22+
// CHECK: and i32
23+
// CHECK: or i32
24+
2425

2526

2627
[RootSignature("")]

tools/clang/test/HLSLFileCheck/pix/DebugFlowControl.hlsl

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,31 @@
22

33
// Check that flow control constructs don't break the instrumentation.
44

5-
// CHECK: %UAVIncResult2 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle, i32 0
5+
// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle, i32 0
66

7-
// CHECK: %MaskedForUAVLimit3 = and i32 %UAVIncResult2, 983039
7+
// There should be several blocks that have instrumentation:
88

9-
// CHECK: %MultipliedForInterest4 = mul i32 %MaskedForUAVLimit3, %OffsetMultiplicand
9+
// CHECK: ; preds =
10+
// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle
1011

11-
// CHECK: %AddedForInterest5 = add i32 %MultipliedForInterest4, %OffsetAddend
12+
// CHECK: ; preds =
13+
// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle
14+
15+
// CHECK: ; preds =
16+
// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle
17+
18+
// CHECK: ; preds =
19+
// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle
20+
21+
// CHECK: ; preds =
22+
// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle
23+
24+
// CHECK: ; preds =
25+
// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle
26+
27+
// CHECK: ; preds =
28+
// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle
1229

13-
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %PIX_DebugUAV_Handle, i32 %AddedForInterest5
1430

1531

1632
struct VS_OUTPUT_ENV {

tools/clang/test/HLSLFileCheck/pix/DebugInstrumentRet.hlsl

Lines changed: 0 additions & 15 deletions
This file was deleted.

tools/clang/test/HLSLFileCheck/pix/DebugLimitedInstructionOverrides.hlsl

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
// The PIX debug instrumentation pass takes optional arguments that limit the range of instruction numbers that will be instrumented.
22
// (This is to cope with extremely large shaders, the instrumentation of which will break, either by out-of-memory or by TDRing when run.)
33

4-
// RUN: %dxc -EFlowControlPS -Tps_6_0 %s | %opt -S -dxil-annotate-with-virtual-regs -hlsl-dxil-debug-instrumentation,FirstInstruction=6,LastInstruction=9 | %FileCheck %s
4+
// RUN: %dxc -EFlowControlPS -Tps_6_0 %s | %opt -S -dxil-annotate-with-virtual-regs -hlsl-dxil-debug-instrumentation,FirstInstruction=4,LastInstruction=20 | %FileCheck %s
55

6-
// The only instrumented instructions should have instruction numbers in the range [6,9):
7-
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %PIX_DebugUAV_Handle, {{.*}}, i32 undef, i32 6
8-
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %PIX_DebugUAV_Handle, {{.*}}, i32 undef, i32 7
9-
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %PIX_DebugUAV_Handle, {{.*}}, i32 undef, i32 8
6+
// The only instrumented blocks should have instruction numbers in the range [4,20):
107

11-
// Two more stores to finish off the instrumentation for instruction #8:
12-
// CHECK: call void @dx.op.bufferStore.f32
13-
// CHECK: call void @dx.op.bufferStore.i32
8+
// Skip over the preamble
9+
// CHECK: switch i32
10+
//
11+
// Now there should be exactly two more instrumented blocks (two increments of the counter UAV entry)
12+
// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle
13+
// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle
1414

15-
// Then no more instrumentation at all:
16-
// CHECK-NOT: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %PIX_DebugUAV_Handle
15+
// Then no more instrumentation at all (i.e. no more increments of the counter UAV entry):
16+
// CHECK-NOT: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle
1717

1818
struct VS_OUTPUT_ENV {
1919
float4 Pos : SV_Position;

0 commit comments

Comments
 (0)