@@ -4335,18 +4335,15 @@ void Split64bitValForStore(Type *EltTy, ArrayRef<Value *> vals, unsigned size,
43354335}
43364336
43374337void TranslateStore (DxilResource::Kind RK, Value *handle, Value *val,
4338- Value *offset, IRBuilder<> &Builder, hlsl::OP *OP ,
4339- Value *sampIdx = nullptr ) {
4338+ Value *Idx, Value * offset, IRBuilder<> &Builder,
4339+ hlsl::OP *OP, Value *sampIdx = nullptr ) {
43404340 Type *Ty = val->getType ();
4341-
4342- // This function is no longer used for lowering stores to a
4343- // structured buffer.
4344- DXASSERT_NOMSG (RK != DxilResource::Kind::StructuredBuffer);
4345-
43464341 OP::OpCode opcode = OP::OpCode::NumOpCodes;
4342+ bool IsTyped = true ;
43474343 switch (RK) {
43484344 case DxilResource::Kind::RawBuffer:
43494345 case DxilResource::Kind::StructuredBuffer:
4346+ IsTyped = false ;
43504347 opcode = OP::OpCode::RawBufferStore;
43514348 break ;
43524349 case DxilResource::Kind::TypedBuffer:
@@ -4364,10 +4361,6 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
43644361 break ;
43654362 }
43664363
4367- bool isTyped = opcode == OP::OpCode::TextureStore ||
4368- opcode == OP::OpCode::TextureStoreSample ||
4369- RK == DxilResource::Kind::TypedBuffer;
4370-
43714364 Type *i32Ty = Builder.getInt32Ty ();
43724365 Type *i64Ty = Builder.getInt64Ty ();
43734366 Type *doubleTy = Builder.getDoubleTy ();
@@ -4390,7 +4383,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
43904383 alignValue = 4 ;
43914384 Constant *Alignment = OP->GetI32Const (alignValue);
43924385 bool is64 = EltTy == i64Ty || EltTy == doubleTy;
4393- if (is64 && isTyped ) {
4386+ if (is64 && IsTyped ) {
43944387 EltTy = i32Ty;
43954388 }
43964389
@@ -4406,38 +4399,42 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
44064399 storeArgs.emplace_back (opArg); // opcode
44074400 storeArgs.emplace_back (handle); // resource handle
44084401
4409- unsigned offset0Idx = 0 ;
4410- if (RK == DxilResource::Kind::RawBuffer ||
4411- RK == DxilResource::Kind::TypedBuffer ) {
4412- // Offset 0
4413- if (offset ->getType ()->isVectorTy ()) {
4414- Value *scalarOffset = Builder.CreateExtractElement (offset , (uint64_t )0 );
4415- storeArgs.emplace_back (scalarOffset ); // offset
4402+ unsigned OffsetIdx = 0 ;
4403+ if (opcode == OP::OpCode::RawBufferStore ||
4404+ opcode == OP::OpCode::BufferStore ) {
4405+ // Append Coord0 (Index) value.
4406+ if (Idx ->getType ()->isVectorTy ()) {
4407+ Value *ScalarIdx = Builder.CreateExtractElement (Idx , (uint64_t )0 );
4408+ storeArgs.emplace_back (ScalarIdx ); // Coord0 (Index).
44164409 } else {
4417- storeArgs.emplace_back (offset ); // offset
4410+ storeArgs.emplace_back (Idx ); // Coord0 (Index).
44184411 }
44194412
4420- // Store offset0 for later use
4421- offset0Idx = storeArgs.size () - 1 ;
4413+ // Store OffsetIdx representing the argument that may need to be incremented
4414+ // later to load additional chunks of data.
4415+ // Only structured buffers can use the offset parameter.
4416+ // Others must increment the index.
4417+ if (RK == DxilResource::Kind::StructuredBuffer)
4418+ OffsetIdx = storeArgs.size ();
4419+ else
4420+ OffsetIdx = storeArgs.size () - 1 ;
44224421
4423- // Offset 1
4424- storeArgs.emplace_back (undefI);
4422+ // Coord1 (Offset).
4423+ // Only relevant when storing more than 4 elements to structured buffers.
4424+ storeArgs.emplace_back (offset);
44254425 } else {
44264426 // texture store
44274427 unsigned coordSize = DxilResource::GetNumCoords (RK);
44284428
44294429 // Set x first.
4430- if (offset ->getType ()->isVectorTy ())
4431- storeArgs.emplace_back (Builder.CreateExtractElement (offset , (uint64_t )0 ));
4430+ if (Idx ->getType ()->isVectorTy ())
4431+ storeArgs.emplace_back (Builder.CreateExtractElement (Idx , (uint64_t )0 ));
44324432 else
4433- storeArgs.emplace_back (offset);
4434-
4435- // Store offset0 for later use
4436- offset0Idx = storeArgs.size () - 1 ;
4433+ storeArgs.emplace_back (Idx);
44374434
44384435 for (unsigned i = 1 ; i < 3 ; i++) {
44394436 if (i < coordSize)
4440- storeArgs.emplace_back (Builder.CreateExtractElement (offset , i));
4437+ storeArgs.emplace_back (Builder.CreateExtractElement (Idx , i));
44414438 else
44424439 storeArgs.emplace_back (undefI);
44434440 }
@@ -4464,30 +4461,24 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
44644461 }
44654462
44664463 for (unsigned j = 0 ; j < storeArgsList.size (); j++) {
4467-
4468- // For second and subsequent store calls, increment the offset0 (i.e. store
4469- // index)
4464+ // For second and subsequent store calls, increment the resource-appropriate
4465+ // index or offset parameter.
44704466 if (j > 0 ) {
4471- // Greater than four-components store is not allowed for
4472- // TypedBuffer and Textures. So greater than four elements
4473- // scenario should only get hit here for RawBuffer.
4474- DXASSERT_NOMSG (RK == DxilResource::Kind::RawBuffer);
44754467 unsigned EltSize = OP->GetAllocSizeForType (EltTy);
4476- unsigned newOffset = EltSize * MaxStoreElemCount * j;
4477- Value *newOffsetVal = ConstantInt::get (Builder.getInt32Ty (), newOffset);
4478- newOffsetVal =
4479- Builder.CreateAdd (storeArgsList[0 ][offset0Idx], newOffsetVal);
4480- storeArgsList[j][offset0Idx] = newOffsetVal;
4468+ unsigned NewCoord = EltSize * MaxStoreElemCount * j;
4469+ Value *NewCoordVal = ConstantInt::get (Builder.getInt32Ty (), NewCoord);
4470+ NewCoordVal = Builder.CreateAdd (storeArgsList[0 ][OffsetIdx], NewCoordVal);
4471+ storeArgsList[j][OffsetIdx] = NewCoordVal;
44814472 }
44824473
4483- // values
4474+ // Set value parameters.
44844475 uint8_t mask = 0 ;
44854476 if (Ty->isVectorTy ()) {
44864477 unsigned vecSize =
44874478 std::min ((j + 1 ) * MaxStoreElemCount, Ty->getVectorNumElements ()) -
44884479 (j * MaxStoreElemCount);
44894480 Value *emptyVal = undefVal;
4490- if (isTyped ) {
4481+ if (IsTyped ) {
44914482 mask = DXIL::kCompMask_All ;
44924483 emptyVal = Builder.CreateExtractElement (val, (uint64_t )0 );
44934484 }
@@ -4503,7 +4494,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
45034494 }
45044495
45054496 } else {
4506- if (isTyped ) {
4497+ if (IsTyped ) {
45074498 mask = DXIL::kCompMask_All ;
45084499 storeArgsList[j].emplace_back (val);
45094500 storeArgsList[j].emplace_back (val);
@@ -4518,7 +4509,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
45184509 }
45194510 }
45204511
4521- if (is64 && isTyped ) {
4512+ if (is64 && IsTyped ) {
45224513 unsigned size = 1 ;
45234514 if (Ty->isVectorTy ()) {
45244515 size =
@@ -4576,7 +4567,8 @@ Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
45764567
45774568 Value *val = CI->getArgOperand (HLOperandIndex::kStoreValOpIdx );
45784569 Value *offset = CI->getArgOperand (HLOperandIndex::kStoreOffsetOpIdx );
4579- TranslateStore (RK, handle, val, offset, Builder, hlslOP);
4570+ Value *UndefI = UndefValue::get (Builder.getInt32Ty ());
4571+ TranslateStore (RK, handle, val, offset, UndefI, Builder, hlslOP);
45804572
45814573 return nullptr ;
45824574}
@@ -7907,40 +7899,11 @@ Value *TranslateStructBufMatLd(CallInst *CI, IRBuilder<> &Builder,
79077899void TranslateStructBufMatSt (Type *matType, IRBuilder<> &Builder, Value *handle,
79087900 hlsl::OP *OP, Value *bufIdx, Value *baseOffset,
79097901 Value *val, const DataLayout &DL) {
7910- HLMatrixType MatTy = HLMatrixType::cast (matType);
7911- Type *EltTy = MatTy.getElementTypeForMem ();
7912-
7913- val = MatTy.emitLoweredRegToMem (val, Builder);
7914-
7915- unsigned EltSize = DL.getTypeAllocSize (EltTy);
7916- Constant *Alignment = OP->GetI32Const (EltSize);
7917- Value *offset = baseOffset;
7918- if (baseOffset == nullptr )
7919- offset = OP->GetU32Const (0 );
7920-
7921- unsigned matSize = MatTy.getNumElements ();
7922- Value *undefElt = UndefValue::get (EltTy);
7923-
7924- unsigned storeSize = matSize;
7925- if (matSize % 4 ) {
7926- storeSize = matSize + 4 - (matSize & 3 );
7927- }
7928- std::vector<Value *> elts (storeSize, undefElt);
7929- for (unsigned i = 0 ; i < matSize; i++)
7930- elts[i] = Builder.CreateExtractElement (val, i);
7931-
7932- for (unsigned i = 0 ; i < matSize; i += 4 ) {
7933- uint8_t mask = 0 ;
7934- for (unsigned j = 0 ; j < 4 && (i + j) < matSize; j++) {
7935- if (elts[i + j] != undefElt)
7936- mask |= (1 << j);
7937- }
7938- GenerateStructBufSt (handle, bufIdx, offset, EltTy, OP, Builder,
7939- {elts[i], elts[i + 1 ], elts[i + 2 ], elts[i + 3 ]}, mask,
7940- Alignment);
7941- // Update offset by 4*4bytes.
7942- offset = Builder.CreateAdd (offset, OP->GetU32Const (4 * EltSize));
7943- }
7902+ [[maybe_unused]] HLMatrixType MatTy = HLMatrixType::cast (matType);
7903+ DXASSERT (MatTy.getLoweredVectorType (false /* MemRepr*/ ) == val->getType (),
7904+ " helper type should match vectorized matrix" );
7905+ TranslateStore (DxilResource::Kind::StructuredBuffer, handle, val, bufIdx,
7906+ baseOffset, Builder, OP);
79447907}
79457908
79467909void TranslateStructBufMatLdSt (CallInst *CI, Value *handle, HLResource::Kind RK,
@@ -8085,6 +8048,9 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
80858048
80868049 GEP->eraseFromParent ();
80878050 } else if (StoreInst *stUser = dyn_cast<StoreInst>(subsUser)) {
8051+ // Store elements of matrix in a struct. Needs to be done one scalar at a
8052+ // time even for vectors in the case that matrix orientation spreads the
8053+ // indexed scalars throughout the matrix vector.
80888054 IRBuilder<> stBuilder (stUser);
80898055 Value *Val = stUser->getValueOperand ();
80908056 if (Val->getType ()->isVectorTy ()) {
@@ -8108,6 +8074,9 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
81088074 LoadInst *ldUser = cast<LoadInst>(subsUser);
81098075 IRBuilder<> ldBuilder (ldUser);
81108076 Value *ldData = UndefValue::get (resultType);
8077+ // Load elements of matrix in a struct. Needs to be done one scalar at a
8078+ // time even for vectors in the case that matrix orientation spreads the
8079+ // indexed scalars throughout the matrix vector.
81118080 if (resultType->isVectorTy ()) {
81128081 for (unsigned i = 0 ; i < resultSize; i++) {
81138082 Value *ResultElt;
@@ -8248,30 +8217,9 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
82488217 LdInst->eraseFromParent ();
82498218 } else if (StoreInst *StInst = dyn_cast<StoreInst>(user)) {
82508219 // Store of scalar/vector within a struct or structured raw store.
8251- Type *Ty = StInst->getValueOperand ()->getType ();
8252- Type *pOverloadTy = Ty->getScalarType ();
8253- Value *offset = baseOffset;
8254-
82558220 Value *val = StInst->getValueOperand ();
8256- Value *undefVal = llvm::UndefValue::get (pOverloadTy);
8257- Value *vals[] = {undefVal, undefVal, undefVal, undefVal};
8258- uint8_t mask = 0 ;
8259- if (Ty->isVectorTy ()) {
8260- unsigned vectorNumElements = Ty->getVectorNumElements ();
8261- DXASSERT (vectorNumElements <= 4 , " up to 4 elements in vector" );
8262- assert (vectorNumElements <= 4 );
8263- for (unsigned i = 0 ; i < vectorNumElements; i++) {
8264- vals[i] = Builder.CreateExtractElement (val, i);
8265- mask |= (1 << i);
8266- }
8267- } else {
8268- vals[0 ] = val;
8269- mask = DXIL::kCompMask_X ;
8270- }
8271- Constant *alignment =
8272- OP->GetI32Const (DL.getTypeAllocSize (Ty->getScalarType ()));
8273- GenerateStructBufSt (handle, bufIdx, offset, pOverloadTy, OP, Builder, vals,
8274- mask, alignment);
8221+ TranslateStore (DxilResource::Kind::StructuredBuffer, handle, val, bufIdx,
8222+ baseOffset, Builder, OP);
82758223 StInst->eraseFromParent ();
82768224 } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(user)) {
82778225 // Recurse users
@@ -8418,14 +8366,15 @@ void TranslateTypedBufferSubscript(CallInst *CI, HLOperationLowerHelper &helper,
84188366 User *user = *(It++);
84198367 Instruction *I = cast<Instruction>(user);
84208368 IRBuilder<> Builder (I);
8369+ Value *UndefI = UndefValue::get (Builder.getInt32Ty ());
84218370 if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
84228371 TranslateTypedBufSubscript (CI, RK, RC, handle, ldInst, Builder, hlslOP,
84238372 helper.dataLayout );
84248373 } else if (StoreInst *stInst = dyn_cast<StoreInst>(user)) {
84258374 Value *val = stInst->getValueOperand ();
84268375 TranslateStore (RK, handle, val,
8427- CI->getArgOperand (HLOperandIndex::kStoreOffsetOpIdx ),
8428- Builder, hlslOP);
8376+ CI->getArgOperand (HLOperandIndex::kSubscriptIndexOpIdx ),
8377+ UndefI, Builder, hlslOP);
84298378 // delete the st
84308379 stInst->eraseFromParent ();
84318380 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(user)) {
@@ -8450,9 +8399,10 @@ void TranslateTypedBufferSubscript(CallInst *CI, HLOperationLowerHelper &helper,
84508399 // Generate St.
84518400 // Reset insert point, UpdateVectorElt may move SI to different block.
84528401 StBuilder.SetInsertPoint (SI);
8453- TranslateStore (RK, handle, ldVal,
8454- CI->getArgOperand (HLOperandIndex::kStoreOffsetOpIdx ),
8455- StBuilder, hlslOP);
8402+ TranslateStore (
8403+ RK, handle, ldVal,
8404+ CI->getArgOperand (HLOperandIndex::kSubscriptIndexOpIdx ), UndefI,
8405+ StBuilder, hlslOP);
84568406 SI->eraseFromParent ();
84578407 continue ;
84588408 }
@@ -8642,9 +8592,10 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
86428592 } else {
86438593 StoreInst *stInst = cast<StoreInst>(*U);
86448594 Value *val = stInst->getValueOperand ();
8595+ Value *UndefI = UndefValue::get (Builder.getInt32Ty ());
86458596 TranslateStore (RK, handle, val,
8646- CI->getArgOperand (HLOperandIndex::kStoreOffsetOpIdx ),
8647- Builder, hlslOP, mipLevel);
8597+ CI->getArgOperand (HLOperandIndex::kSubscriptIndexOpIdx ),
8598+ UndefI, Builder, hlslOP, mipLevel);
86488599 stInst->eraseFromParent ();
86498600 }
86508601 Translated = true ;
0 commit comments