Skip to content

Commit 9a06f4d

Browse files
author
Greg Roth
authored
Consolidate buffer store translation (#7251)
Consolidate buffer store translation Added structured and types buffer support to TranslateStore and used it for all such lowerings. Includes IR and fcgl tests for the same in addition to recently added load/store tests that exercise this same code.
1 parent 94596e1 commit 9a06f4d

File tree

5 files changed

+2560
-112
lines changed

5 files changed

+2560
-112
lines changed

lib/HLSL/HLOperationLower.cpp

Lines changed: 63 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -4335,18 +4335,15 @@ void Split64bitValForStore(Type *EltTy, ArrayRef<Value *> vals, unsigned size,
43354335
}
43364336

43374337
void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
4338-
Value *offset, IRBuilder<> &Builder, hlsl::OP *OP,
4339-
Value *sampIdx = nullptr) {
4338+
Value *Idx, Value *offset, IRBuilder<> &Builder,
4339+
hlsl::OP *OP, Value *sampIdx = nullptr) {
43404340
Type *Ty = val->getType();
4341-
4342-
// This function is no longer used for lowering stores to a
4343-
// structured buffer.
4344-
DXASSERT_NOMSG(RK != DxilResource::Kind::StructuredBuffer);
4345-
43464341
OP::OpCode opcode = OP::OpCode::NumOpCodes;
4342+
bool IsTyped = true;
43474343
switch (RK) {
43484344
case DxilResource::Kind::RawBuffer:
43494345
case DxilResource::Kind::StructuredBuffer:
4346+
IsTyped = false;
43504347
opcode = OP::OpCode::RawBufferStore;
43514348
break;
43524349
case DxilResource::Kind::TypedBuffer:
@@ -4364,10 +4361,6 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
43644361
break;
43654362
}
43664363

4367-
bool isTyped = opcode == OP::OpCode::TextureStore ||
4368-
opcode == OP::OpCode::TextureStoreSample ||
4369-
RK == DxilResource::Kind::TypedBuffer;
4370-
43714364
Type *i32Ty = Builder.getInt32Ty();
43724365
Type *i64Ty = Builder.getInt64Ty();
43734366
Type *doubleTy = Builder.getDoubleTy();
@@ -4390,7 +4383,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
43904383
alignValue = 4;
43914384
Constant *Alignment = OP->GetI32Const(alignValue);
43924385
bool is64 = EltTy == i64Ty || EltTy == doubleTy;
4393-
if (is64 && isTyped) {
4386+
if (is64 && IsTyped) {
43944387
EltTy = i32Ty;
43954388
}
43964389

@@ -4406,38 +4399,42 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
44064399
storeArgs.emplace_back(opArg); // opcode
44074400
storeArgs.emplace_back(handle); // resource handle
44084401

4409-
unsigned offset0Idx = 0;
4410-
if (RK == DxilResource::Kind::RawBuffer ||
4411-
RK == DxilResource::Kind::TypedBuffer) {
4412-
// Offset 0
4413-
if (offset->getType()->isVectorTy()) {
4414-
Value *scalarOffset = Builder.CreateExtractElement(offset, (uint64_t)0);
4415-
storeArgs.emplace_back(scalarOffset); // offset
4402+
unsigned OffsetIdx = 0;
4403+
if (opcode == OP::OpCode::RawBufferStore ||
4404+
opcode == OP::OpCode::BufferStore) {
4405+
// Append Coord0 (Index) value.
4406+
if (Idx->getType()->isVectorTy()) {
4407+
Value *ScalarIdx = Builder.CreateExtractElement(Idx, (uint64_t)0);
4408+
storeArgs.emplace_back(ScalarIdx); // Coord0 (Index).
44164409
} else {
4417-
storeArgs.emplace_back(offset); // offset
4410+
storeArgs.emplace_back(Idx); // Coord0 (Index).
44184411
}
44194412

4420-
// Store offset0 for later use
4421-
offset0Idx = storeArgs.size() - 1;
4413+
// Store OffsetIdx representing the argument that may need to be incremented
4414+
// later to load additional chunks of data.
4415+
// Only structured buffers can use the offset parameter.
4416+
// Others must increment the index.
4417+
if (RK == DxilResource::Kind::StructuredBuffer)
4418+
OffsetIdx = storeArgs.size();
4419+
else
4420+
OffsetIdx = storeArgs.size() - 1;
44224421

4423-
// Offset 1
4424-
storeArgs.emplace_back(undefI);
4422+
// Coord1 (Offset).
4423+
// Only relevant when storing more than 4 elements to structured buffers.
4424+
storeArgs.emplace_back(offset);
44254425
} else {
44264426
// texture store
44274427
unsigned coordSize = DxilResource::GetNumCoords(RK);
44284428

44294429
// Set x first.
4430-
if (offset->getType()->isVectorTy())
4431-
storeArgs.emplace_back(Builder.CreateExtractElement(offset, (uint64_t)0));
4430+
if (Idx->getType()->isVectorTy())
4431+
storeArgs.emplace_back(Builder.CreateExtractElement(Idx, (uint64_t)0));
44324432
else
4433-
storeArgs.emplace_back(offset);
4434-
4435-
// Store offset0 for later use
4436-
offset0Idx = storeArgs.size() - 1;
4433+
storeArgs.emplace_back(Idx);
44374434

44384435
for (unsigned i = 1; i < 3; i++) {
44394436
if (i < coordSize)
4440-
storeArgs.emplace_back(Builder.CreateExtractElement(offset, i));
4437+
storeArgs.emplace_back(Builder.CreateExtractElement(Idx, i));
44414438
else
44424439
storeArgs.emplace_back(undefI);
44434440
}
@@ -4464,30 +4461,24 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
44644461
}
44654462

44664463
for (unsigned j = 0; j < storeArgsList.size(); j++) {
4467-
4468-
// For second and subsequent store calls, increment the offset0 (i.e. store
4469-
// index)
4464+
// For second and subsequent store calls, increment the resource-appropriate
4465+
// index or offset parameter.
44704466
if (j > 0) {
4471-
// Greater than four-components store is not allowed for
4472-
// TypedBuffer and Textures. So greater than four elements
4473-
// scenario should only get hit here for RawBuffer.
4474-
DXASSERT_NOMSG(RK == DxilResource::Kind::RawBuffer);
44754467
unsigned EltSize = OP->GetAllocSizeForType(EltTy);
4476-
unsigned newOffset = EltSize * MaxStoreElemCount * j;
4477-
Value *newOffsetVal = ConstantInt::get(Builder.getInt32Ty(), newOffset);
4478-
newOffsetVal =
4479-
Builder.CreateAdd(storeArgsList[0][offset0Idx], newOffsetVal);
4480-
storeArgsList[j][offset0Idx] = newOffsetVal;
4468+
unsigned NewCoord = EltSize * MaxStoreElemCount * j;
4469+
Value *NewCoordVal = ConstantInt::get(Builder.getInt32Ty(), NewCoord);
4470+
NewCoordVal = Builder.CreateAdd(storeArgsList[0][OffsetIdx], NewCoordVal);
4471+
storeArgsList[j][OffsetIdx] = NewCoordVal;
44814472
}
44824473

4483-
// values
4474+
// Set value parameters.
44844475
uint8_t mask = 0;
44854476
if (Ty->isVectorTy()) {
44864477
unsigned vecSize =
44874478
std::min((j + 1) * MaxStoreElemCount, Ty->getVectorNumElements()) -
44884479
(j * MaxStoreElemCount);
44894480
Value *emptyVal = undefVal;
4490-
if (isTyped) {
4481+
if (IsTyped) {
44914482
mask = DXIL::kCompMask_All;
44924483
emptyVal = Builder.CreateExtractElement(val, (uint64_t)0);
44934484
}
@@ -4503,7 +4494,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
45034494
}
45044495

45054496
} else {
4506-
if (isTyped) {
4497+
if (IsTyped) {
45074498
mask = DXIL::kCompMask_All;
45084499
storeArgsList[j].emplace_back(val);
45094500
storeArgsList[j].emplace_back(val);
@@ -4518,7 +4509,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
45184509
}
45194510
}
45204511

4521-
if (is64 && isTyped) {
4512+
if (is64 && IsTyped) {
45224513
unsigned size = 1;
45234514
if (Ty->isVectorTy()) {
45244515
size =
@@ -4576,7 +4567,8 @@ Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
45764567

45774568
Value *val = CI->getArgOperand(HLOperandIndex::kStoreValOpIdx);
45784569
Value *offset = CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx);
4579-
TranslateStore(RK, handle, val, offset, Builder, hlslOP);
4570+
Value *UndefI = UndefValue::get(Builder.getInt32Ty());
4571+
TranslateStore(RK, handle, val, offset, UndefI, Builder, hlslOP);
45804572

45814573
return nullptr;
45824574
}
@@ -7907,40 +7899,11 @@ Value *TranslateStructBufMatLd(CallInst *CI, IRBuilder<> &Builder,
79077899
void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle,
79087900
hlsl::OP *OP, Value *bufIdx, Value *baseOffset,
79097901
Value *val, const DataLayout &DL) {
7910-
HLMatrixType MatTy = HLMatrixType::cast(matType);
7911-
Type *EltTy = MatTy.getElementTypeForMem();
7912-
7913-
val = MatTy.emitLoweredRegToMem(val, Builder);
7914-
7915-
unsigned EltSize = DL.getTypeAllocSize(EltTy);
7916-
Constant *Alignment = OP->GetI32Const(EltSize);
7917-
Value *offset = baseOffset;
7918-
if (baseOffset == nullptr)
7919-
offset = OP->GetU32Const(0);
7920-
7921-
unsigned matSize = MatTy.getNumElements();
7922-
Value *undefElt = UndefValue::get(EltTy);
7923-
7924-
unsigned storeSize = matSize;
7925-
if (matSize % 4) {
7926-
storeSize = matSize + 4 - (matSize & 3);
7927-
}
7928-
std::vector<Value *> elts(storeSize, undefElt);
7929-
for (unsigned i = 0; i < matSize; i++)
7930-
elts[i] = Builder.CreateExtractElement(val, i);
7931-
7932-
for (unsigned i = 0; i < matSize; i += 4) {
7933-
uint8_t mask = 0;
7934-
for (unsigned j = 0; j < 4 && (i + j) < matSize; j++) {
7935-
if (elts[i + j] != undefElt)
7936-
mask |= (1 << j);
7937-
}
7938-
GenerateStructBufSt(handle, bufIdx, offset, EltTy, OP, Builder,
7939-
{elts[i], elts[i + 1], elts[i + 2], elts[i + 3]}, mask,
7940-
Alignment);
7941-
// Update offset by 4*4bytes.
7942-
offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * EltSize));
7943-
}
7902+
[[maybe_unused]] HLMatrixType MatTy = HLMatrixType::cast(matType);
7903+
DXASSERT(MatTy.getLoweredVectorType(false /*MemRepr*/) == val->getType(),
7904+
"helper type should match vectorized matrix");
7905+
TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx,
7906+
baseOffset, Builder, OP);
79447907
}
79457908

79467909
void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, HLResource::Kind RK,
@@ -8085,6 +8048,9 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
80858048

80868049
GEP->eraseFromParent();
80878050
} else if (StoreInst *stUser = dyn_cast<StoreInst>(subsUser)) {
8051+
// Store elements of matrix in a struct. Needs to be done one scalar at a
8052+
// time even for vectors in the case that matrix orientation spreads the
8053+
// indexed scalars throughout the matrix vector.
80888054
IRBuilder<> stBuilder(stUser);
80898055
Value *Val = stUser->getValueOperand();
80908056
if (Val->getType()->isVectorTy()) {
@@ -8108,6 +8074,9 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
81088074
LoadInst *ldUser = cast<LoadInst>(subsUser);
81098075
IRBuilder<> ldBuilder(ldUser);
81108076
Value *ldData = UndefValue::get(resultType);
8077+
// Load elements of matrix in a struct. Needs to be done one scalar at a
8078+
// time even for vectors in the case that matrix orientation spreads the
8079+
// indexed scalars throughout the matrix vector.
81118080
if (resultType->isVectorTy()) {
81128081
for (unsigned i = 0; i < resultSize; i++) {
81138082
Value *ResultElt;
@@ -8248,30 +8217,9 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
82488217
LdInst->eraseFromParent();
82498218
} else if (StoreInst *StInst = dyn_cast<StoreInst>(user)) {
82508219
// Store of scalar/vector within a struct or structured raw store.
8251-
Type *Ty = StInst->getValueOperand()->getType();
8252-
Type *pOverloadTy = Ty->getScalarType();
8253-
Value *offset = baseOffset;
8254-
82558220
Value *val = StInst->getValueOperand();
8256-
Value *undefVal = llvm::UndefValue::get(pOverloadTy);
8257-
Value *vals[] = {undefVal, undefVal, undefVal, undefVal};
8258-
uint8_t mask = 0;
8259-
if (Ty->isVectorTy()) {
8260-
unsigned vectorNumElements = Ty->getVectorNumElements();
8261-
DXASSERT(vectorNumElements <= 4, "up to 4 elements in vector");
8262-
assert(vectorNumElements <= 4);
8263-
for (unsigned i = 0; i < vectorNumElements; i++) {
8264-
vals[i] = Builder.CreateExtractElement(val, i);
8265-
mask |= (1 << i);
8266-
}
8267-
} else {
8268-
vals[0] = val;
8269-
mask = DXIL::kCompMask_X;
8270-
}
8271-
Constant *alignment =
8272-
OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType()));
8273-
GenerateStructBufSt(handle, bufIdx, offset, pOverloadTy, OP, Builder, vals,
8274-
mask, alignment);
8221+
TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx,
8222+
baseOffset, Builder, OP);
82758223
StInst->eraseFromParent();
82768224
} else if (BitCastInst *BCI = dyn_cast<BitCastInst>(user)) {
82778225
// Recurse users
@@ -8418,14 +8366,15 @@ void TranslateTypedBufferSubscript(CallInst *CI, HLOperationLowerHelper &helper,
84188366
User *user = *(It++);
84198367
Instruction *I = cast<Instruction>(user);
84208368
IRBuilder<> Builder(I);
8369+
Value *UndefI = UndefValue::get(Builder.getInt32Ty());
84218370
if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
84228371
TranslateTypedBufSubscript(CI, RK, RC, handle, ldInst, Builder, hlslOP,
84238372
helper.dataLayout);
84248373
} else if (StoreInst *stInst = dyn_cast<StoreInst>(user)) {
84258374
Value *val = stInst->getValueOperand();
84268375
TranslateStore(RK, handle, val,
8427-
CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
8428-
Builder, hlslOP);
8376+
CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx),
8377+
UndefI, Builder, hlslOP);
84298378
// delete the st
84308379
stInst->eraseFromParent();
84318380
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(user)) {
@@ -8450,9 +8399,10 @@ void TranslateTypedBufferSubscript(CallInst *CI, HLOperationLowerHelper &helper,
84508399
// Generate St.
84518400
// Reset insert point, UpdateVectorElt may move SI to different block.
84528401
StBuilder.SetInsertPoint(SI);
8453-
TranslateStore(RK, handle, ldVal,
8454-
CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
8455-
StBuilder, hlslOP);
8402+
TranslateStore(
8403+
RK, handle, ldVal,
8404+
CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), UndefI,
8405+
StBuilder, hlslOP);
84568406
SI->eraseFromParent();
84578407
continue;
84588408
}
@@ -8642,9 +8592,10 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
86428592
} else {
86438593
StoreInst *stInst = cast<StoreInst>(*U);
86448594
Value *val = stInst->getValueOperand();
8595+
Value *UndefI = UndefValue::get(Builder.getInt32Ty());
86458596
TranslateStore(RK, handle, val,
8646-
CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
8647-
Builder, hlslOP, mipLevel);
8597+
CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx),
8598+
UndefI, Builder, hlslOP, mipLevel);
86488599
stInst->eraseFromParent();
86498600
}
86508601
Translated = true;

0 commit comments

Comments
 (0)