Skip to content

Commit a03a77f

Browse files
authored
PIX: split vector alloca writes to enable debug instrumentation (microsoft#6990)
Bit of background: PIX's debug passes add new allocas, stores to which tie debug info to DXIL Values. In the case of a preexisting alloca, PIX will still add its debug writes, but the codegen may have emitted vector-valued stores. Concretely, this happens for the ray payload and RayDesc structs in DXR. Those vector-values stores were being treated incorrectly, resulting in missing values in the PIX shader debugger. This change splits vector-valued stores to such allocas into extractelement/scalar-stores, which enables the rest of the PIX instrumentation to function as expected. (Worth noting that this change only applies to the PIX shader debugging pass.)
1 parent eb1223e commit a03a77f

File tree

3 files changed

+258
-11
lines changed

3 files changed

+258
-11
lines changed

lib/DxilPIXPasses/DxilAnnotateWithVirtualRegister.cpp

Lines changed: 73 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ class DxilAnnotateWithVirtualRegister : public llvm::ModulePass {
7777
private:
7878
void AnnotateValues(llvm::Instruction *pI);
7979
void AnnotateStore(llvm::Instruction *pI);
80+
void SplitVectorStores(hlsl::OP *HlslOP, llvm::Instruction *pI);
8081
bool IsAllocaRegisterWrite(llvm::Value *V, llvm::AllocaInst **pAI,
8182
llvm::Value **pIdx);
8283
void AnnotateAlloca(llvm::AllocaInst *pAlloca);
@@ -133,6 +134,15 @@ bool DxilAnnotateWithVirtualRegister::runOnModule(llvm::Module &M) {
133134
auto instrumentableFunctions =
134135
PIXPassHelpers::GetAllInstrumentableFunctions(*m_DM);
135136

137+
for (auto *F : instrumentableFunctions) {
138+
for (auto &block : F->getBasicBlockList()) {
139+
for (auto it = block.begin(); it != block.end();) {
140+
llvm::Instruction *I = &*(it++);
141+
SplitVectorStores(m_DM->GetOP(), I);
142+
}
143+
}
144+
}
145+
136146
for (auto *F : instrumentableFunctions) {
137147
for (auto &block : F->getBasicBlockList()) {
138148
for (llvm::Instruction &I : block.getInstList()) {
@@ -297,15 +307,37 @@ bool DxilAnnotateWithVirtualRegister::IsAllocaRegisterWrite(
297307
return false;
298308
}
299309
// And of course the member we're after might not be at the beginning of
300-
// the struct:
301-
auto *pStructType = llvm::dyn_cast<llvm::StructType>(
302-
pPointerGEP->getPointerOperandType()->getPointerElementType());
303-
auto *pStructMember =
304-
llvm::dyn_cast<llvm::ConstantInt>(pPointerGEP->getOperand(2));
305-
uint64_t memberIndex = pStructMember->getLimitedValue();
306-
for (uint64_t i = 0; i < memberIndex; ++i) {
307-
precedingMemberCount +=
308-
CountStructMembers(pStructType->getStructElementType(i));
310+
// any containing struct:
311+
if (auto *pStructType = llvm::dyn_cast<llvm::StructType>(
312+
pPointerGEP->getPointerOperandType()
313+
->getPointerElementType())) {
314+
auto *pStructMember =
315+
llvm::dyn_cast<llvm::ConstantInt>(pPointerGEP->getOperand(2));
316+
uint64_t memberIndex = pStructMember->getLimitedValue();
317+
for (uint64_t i = 0; i < memberIndex; ++i) {
318+
precedingMemberCount +=
319+
CountStructMembers(pStructType->getStructElementType(i));
320+
}
321+
}
322+
323+
// And the source pointer may be a vector (floatn) type,
324+
// and if so, that's another offset to consider.
325+
llvm::Type *DestType = pGEP->getPointerOperand()->getType();
326+
// We expect this to be a pointer type (it's a GEP after all):
327+
if (DestType->isPointerTy()) {
328+
llvm::Type *PointedType = DestType->getPointerElementType();
329+
// Being careful to check num operands too in order to avoid false
330+
// positives:
331+
if (PointedType->isVectorTy() && pGEP->getNumOperands() == 3) {
332+
// Fetch the second deref (in operand 2).
333+
// (the first derefs the pointer to the "floatn",
334+
// and the second denotes the index into the floatn.)
335+
llvm::Value *vectorIndex = pGEP->getOperand(2);
336+
if (auto *constIntIIndex =
337+
llvm::cast<llvm::ConstantInt>(vectorIndex)) {
338+
precedingMemberCount += constIntIIndex->getLimitedValue();
339+
}
340+
}
309341
}
310342
} else {
311343
return false;
@@ -365,6 +397,8 @@ void DxilAnnotateWithVirtualRegister::AnnotateAlloca(
365397
AssignNewAllocaRegister(pAlloca, 1);
366398
} else if (auto *AT = llvm::dyn_cast<llvm::ArrayType>(pAllocaTy)) {
367399
AssignNewAllocaRegister(pAlloca, AT->getNumElements());
400+
} else if (auto *VT = llvm::dyn_cast<llvm::VectorType>(pAllocaTy)) {
401+
AssignNewAllocaRegister(pAlloca, VT->getNumElements());
368402
} else if (auto *ST = llvm::dyn_cast<llvm::StructType>(pAllocaTy)) {
369403
AssignNewAllocaRegister(pAlloca, CountStructMembers(ST));
370404
} else {
@@ -433,6 +467,36 @@ void DxilAnnotateWithVirtualRegister::AssignNewAllocaRegister(
433467
m_uVReg += C;
434468
}
435469

470+
void DxilAnnotateWithVirtualRegister::SplitVectorStores(hlsl::OP *HlslOP,
471+
llvm::Instruction *pI) {
472+
auto *pSt = llvm::dyn_cast<llvm::StoreInst>(pI);
473+
if (pSt == nullptr) {
474+
return;
475+
}
476+
477+
llvm::AllocaInst *Alloca;
478+
llvm::Value *Index;
479+
if (!IsAllocaRegisterWrite(pSt->getPointerOperand(), &Alloca, &Index)) {
480+
return;
481+
}
482+
483+
llvm::Type *SourceType = pSt->getValueOperand()->getType();
484+
if (SourceType->isVectorTy()) {
485+
if (auto *constIntIIndex = llvm::cast<llvm::ConstantInt>(Index)) {
486+
// break vector alloca stores up into individual stores
487+
llvm::IRBuilder<> B(pSt);
488+
for (uint64_t el = 0; el < SourceType->getVectorNumElements(); ++el) {
489+
llvm::Value *destPointer = B.CreateGEP(pSt->getPointerOperand(),
490+
{B.getInt32(0), B.getInt32(el)});
491+
llvm::Value *source =
492+
B.CreateExtractElement(pSt->getValueOperand(), el);
493+
B.CreateStore(source, destPointer);
494+
}
495+
pI->eraseFromParent();
496+
}
497+
}
498+
}
499+
436500
} // namespace
437501

438502
using namespace llvm;

tools/clang/unittests/HLSL/PixTest.cpp

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#endif
1515

1616
#include <algorithm>
17+
#include <array>
1718
#include <cassert>
1819
#include <cfloat>
1920
#include <map>
@@ -149,6 +150,8 @@ class PixTest : public ::testing::Test {
149150
TEST_METHOD(DebugInstrumentation_TextOutput)
150151
TEST_METHOD(DebugInstrumentation_BlockReport)
151152

153+
TEST_METHOD(DebugInstrumentation_VectorAllocaWrite_Structs)
154+
152155
dxc::DxcDllSupport m_dllSupport;
153156
VersionSupportInfo m_ver;
154157

@@ -442,6 +445,7 @@ class PixTest : public ::testing::Test {
442445
CComPtr<IDxcBlob> RunDxilPIXDXRInvocationsLog(IDxcBlob *blob);
443446
void TestPixUAVCase(char const *hlsl, wchar_t const *model,
444447
wchar_t const *entry);
448+
std::string Disassemble(IDxcBlob *pProgram);
445449
};
446450

447451
bool PixTest::InitSupport() {
@@ -1151,6 +1155,16 @@ static bool FindStructMemberFromStore(llvm::StoreInst *S,
11511155
return false;
11521156
}
11531157

1158+
std::string PixTest::Disassemble(IDxcBlob *pProgram) {
1159+
CComPtr<IDxcCompiler> pCompiler;
1160+
CComPtr<IDxcOperationResult> pResult;
1161+
CComPtr<IDxcBlobEncoding> pSource;
1162+
VERIFY_SUCCEEDED(CreateCompiler(m_dllSupport, &pCompiler));
1163+
CComPtr<IDxcBlobEncoding> pDisassembly;
1164+
VERIFY_SUCCEEDED(pCompiler->Disassemble(pProgram, &pDisassembly));
1165+
return BlobToUtf8(pDisassembly);
1166+
}
1167+
11541168
// This function lives in lib\DxilPIXPasses\DxilAnnotateWithVirtualRegister.cpp
11551169
// Declared here so we can test it.
11561170
uint32_t CountStructMembers(llvm::Type const *pType);
@@ -3023,3 +3037,172 @@ float4 main() : SV_Target {
30233037
VERIFY_IS_TRUE(foundDoubleAssignment);
30243038
VERIFY_IS_TRUE(found32BitAllocaStore);
30253039
}
3040+
3041+
std::string ExtractBracedSubstring(std::string const &line) {
3042+
auto open = line.find('{');
3043+
auto close = line.find('}');
3044+
if (open != std::string::npos && close != std::string::npos &&
3045+
open + 1 < close) {
3046+
return line.substr(open + 1, close - open - 1);
3047+
}
3048+
return {};
3049+
}
3050+
3051+
int ExtractMetaInt32Value(std::string const &token) {
3052+
auto findi32 = token.find("i32 ");
3053+
if (findi32 != std::string_view::npos) {
3054+
return atoi(
3055+
std::string(token.data() + findi32 + 4, token.length() - (findi32 + 4))
3056+
.c_str());
3057+
}
3058+
return -1;
3059+
}
3060+
3061+
std::vector<std::string> Split(std::string str, char delimeter) {
3062+
std::vector<std::string> lines;
3063+
3064+
auto const *p = str.data();
3065+
auto const *justPastPreviousDelimiter = p;
3066+
while (p < str.data() + str.length()) {
3067+
if (*p == delimeter) {
3068+
lines.emplace_back(std::string(justPastPreviousDelimiter,
3069+
p - justPastPreviousDelimiter));
3070+
justPastPreviousDelimiter = p + 1;
3071+
p = justPastPreviousDelimiter;
3072+
} else {
3073+
p++;
3074+
}
3075+
}
3076+
3077+
lines.emplace_back(
3078+
std::string(justPastPreviousDelimiter, p - justPastPreviousDelimiter));
3079+
3080+
return lines;
3081+
}
3082+
3083+
struct MetadataAllocaDefinition {
3084+
int base;
3085+
int count;
3086+
};
3087+
using AllocaDefinitions = std::map<int, MetadataAllocaDefinition>;
3088+
struct MetadataAllocaWrite {
3089+
int allocaDefMetadataKey;
3090+
int offset;
3091+
int size;
3092+
};
3093+
using AllocaWrites = std::map<int, MetadataAllocaWrite>;
3094+
3095+
struct AllocaMetadata {
3096+
AllocaDefinitions allocaDefinitions;
3097+
AllocaWrites allocaWrites;
3098+
std::vector<int> allocaWritesMetaKeys;
3099+
};
3100+
3101+
AllocaMetadata
3102+
FindAllocaRelatedMetadata(std::vector<std::string> const &lines) {
3103+
3104+
const char *allocaMetaDataAssignment = "= !{i32 1, ";
3105+
const char *allocaRegWRiteAssignment = "= !{i32 2, !";
3106+
const char *allocaRegWriteTag = "!pix-alloca-reg-write !";
3107+
3108+
AllocaMetadata ret;
3109+
for (auto const &line : lines) {
3110+
if (line[0] == '!') {
3111+
auto key = atoi(std::string(line.data() + 1, line.length() - 1).c_str());
3112+
if (key != -1) {
3113+
if (line.find(allocaMetaDataAssignment) != std::string::npos) {
3114+
std::string bitInBraces = ExtractBracedSubstring(line);
3115+
if (bitInBraces != "") {
3116+
auto tokens = Split(bitInBraces, ',');
3117+
if (tokens.size() == 3) {
3118+
auto value0 = ExtractMetaInt32Value(tokens[1]);
3119+
auto value1 = ExtractMetaInt32Value(tokens[2]);
3120+
if (value0 != -1 && value1 != -1) {
3121+
MetadataAllocaDefinition def;
3122+
def.base = value0;
3123+
def.count = value1;
3124+
ret.allocaDefinitions[key] = def;
3125+
}
3126+
}
3127+
}
3128+
} else if (line.find(allocaRegWRiteAssignment) != std::string::npos) {
3129+
std::string bitInBraces = ExtractBracedSubstring(line);
3130+
if (bitInBraces != "") {
3131+
auto tokens = Split(bitInBraces, ',');
3132+
if (tokens.size() == 4 && tokens[1][1] == '!') {
3133+
auto allocaKey = atoi(tokens[1].c_str() + 2);
3134+
auto value0 = ExtractMetaInt32Value(tokens[2]);
3135+
auto value1 = ExtractMetaInt32Value(tokens[3]);
3136+
if (value0 != -1 && value1 != -1) {
3137+
MetadataAllocaWrite aw;
3138+
aw.allocaDefMetadataKey = allocaKey;
3139+
aw.size = value0;
3140+
aw.offset = value1;
3141+
ret.allocaWrites[key] = aw;
3142+
}
3143+
}
3144+
}
3145+
}
3146+
}
3147+
} else {
3148+
auto findAw = line.find(allocaRegWriteTag);
3149+
if (findAw != std::string::npos) {
3150+
ret.allocaWritesMetaKeys.push_back(
3151+
atoi(line.c_str() + findAw + strlen(allocaRegWriteTag)));
3152+
}
3153+
}
3154+
}
3155+
return ret;
3156+
}
3157+
3158+
TEST_F(PixTest, DebugInstrumentation_VectorAllocaWrite_Structs) {
3159+
const char *source = R"x(
3160+
RaytracingAccelerationStructure Scene : register(t0, space0);
3161+
struct RayPayload
3162+
{
3163+
float4 color;
3164+
};
3165+
RWStructuredBuffer<float> UAV: register(u0);
3166+
[shader("raygeneration")]
3167+
void RaygenInternalName()
3168+
{
3169+
RayDesc ray;
3170+
ray.Origin = float3(UAV[0], UAV[1],UAV[3]);
3171+
ray.Direction = float3(4.4,5.5,6.6);
3172+
ray.TMin = 0.001;
3173+
ray.TMax = 10000.0;
3174+
RayPayload payload = { float4(0, 1, 0, 1) };
3175+
TraceRay(Scene, RAY_FLAG_CULL_BACK_FACING_TRIANGLES, ~0, 0, 1, 0, ray, payload);
3176+
})x";
3177+
3178+
auto compiled = Compile(m_dllSupport, source, L"lib_6_6", {L"-Od"});
3179+
auto output = RunDebugPass(compiled);
3180+
auto disassembly = Disassemble(output.blob);
3181+
auto lines = Split(disassembly, '\n');
3182+
auto metaDataKeyToValue = FindAllocaRelatedMetadata(lines);
3183+
// To validate that the RayDesc and RayPayload instances were fully covered,
3184+
// check that there are alloca writes that cover all of them. RayPayload
3185+
// has four elements, and RayDesc has eight.
3186+
std::array<bool, 4> RayPayloadElementCoverage;
3187+
std::array<bool, 8> RayDescElementCoverage;
3188+
3189+
for (auto const &write : metaDataKeyToValue.allocaWrites) {
3190+
// the whole point of the changes with this test is to separate vector
3191+
// writes into individual elements:
3192+
VERIFY_ARE_EQUAL(1, write.second.size);
3193+
auto findAlloca = metaDataKeyToValue.allocaDefinitions.find(
3194+
write.second.allocaDefMetadataKey);
3195+
if (findAlloca != metaDataKeyToValue.allocaDefinitions.end()) {
3196+
if (findAlloca->second.count == 4) {
3197+
RayPayloadElementCoverage[write.second.offset] = true;
3198+
} else if (findAlloca->second.count == 8) {
3199+
RayDescElementCoverage[write.second.offset] = true;
3200+
}
3201+
}
3202+
}
3203+
// Check that coverage for every element was emitted:
3204+
for (auto const &b : RayPayloadElementCoverage)
3205+
VERIFY_IS_TRUE(b);
3206+
for (auto const &b : RayDescElementCoverage)
3207+
VERIFY_IS_TRUE(b);
3208+
}

tools/clang/unittests/HLSL/PixTestUtils.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ int ExtractMetaInt32Value(std::string const &token) {
5454
return -1;
5555
}
5656
std::map<int, std::pair<int, int>>
57-
MetaDataKeyToRegisterNumber(std::vector<std::string> const &lines) {
57+
FindAllocaRelatedMetadata(std::vector<std::string> const &lines) {
5858
// Find lines of the exemplary form
5959
// "!249 = !{i32 0, i32 20}" (in the case of a DXIL value)
6060
// "!196 = !{i32 1, i32 5, i32 1}" (in the case of a DXIL alloca reg)
@@ -148,7 +148,7 @@ DxilRegisterToNameMap BuildDxilRegisterToNameMap(char const *disassembly) {
148148

149149
auto lines = Tokenize(disassembly, "\n");
150150

151-
auto metaDataKeyToValue = MetaDataKeyToRegisterNumber(lines);
151+
auto metaDataKeyToValue = FindAllocaRelatedMetadata(lines);
152152

153153
for (auto const &line : lines) {
154154
if (line[0] == '!') {

0 commit comments

Comments
 (0)