1111#include " llvm/Frontend/HLSL/CBuffer.h"
1212#include " llvm/Frontend/HLSL/HLSLResource.h"
1313#include " llvm/IR/IRBuilder.h"
14+ #include " llvm/IR/IntrinsicInst.h"
1415#include " llvm/IR/IntrinsicsDirectX.h"
1516#include " llvm/InitializePasses.h"
1617#include " llvm/Pass.h"
18+ #include " llvm/Support/FormatVariadic.h"
1719#include " llvm/Transforms/Utils/Local.h"
1820
1921#define DEBUG_TYPE " dxil-cbuffer-access"
@@ -54,71 +56,106 @@ struct CBufferRowIntrin {
5456 }
5557 }
5658};
57- } // namespace
5859
59- static size_t getOffsetForCBufferGEP (GEPOperator *GEP, GlobalVariable *Global,
60- const DataLayout &DL) {
61- // Since we should always have a constant offset, we should only ever have a
62- // single GEP of indirection from the Global.
63- assert (GEP->getPointerOperand () == Global &&
64- " Indirect access to resource handle" );
60+ // Helper for creating CBuffer handles and loading data from them
61+ struct CBufferResource {
62+ GlobalVariable *GVHandle;
63+ GlobalVariable *Member;
64+ size_t MemberOffset;
6565
66- APInt ConstantOffset (DL.getIndexTypeSizeInBits (GEP->getType ()), 0 );
67- bool Success = GEP->accumulateConstantOffset (DL, ConstantOffset);
68- (void )Success;
69- assert (Success && " Offsets into cbuffer globals must be constant" );
66+ LoadInst *Handle;
7067
71- if (auto *ATy = dyn_cast<ArrayType>(Global->getValueType ()))
72- ConstantOffset = hlsl::translateCBufArrayOffset (DL, ConstantOffset, ATy);
68+ CBufferResource (GlobalVariable *GVHandle, GlobalVariable *Member,
69+ size_t MemberOffset)
70+ : GVHandle(GVHandle), Member(Member), MemberOffset(MemberOffset) {}
7371
74- return ConstantOffset.getZExtValue ();
75- }
72+ const DataLayout &getDataLayout () { return GVHandle->getDataLayout (); }
73+ Type *getValueType () { return Member->getValueType (); }
74+ iterator_range<ConstantDataSequential::user_iterator> users () {
75+ return Member->users ();
76+ }
77+
78+ // / Get the byte offset of a Pointer-typed Value * `Val` relative to Member.
79+ // / `Val` can either be Member itself, or a GEP of a constant offset from
80+ // / Member
81+ size_t getOffsetForCBufferGEP (Value *Val) {
82+ assert (isa<PointerType>(Val->getType ()) &&
83+ " Expected a pointer-typed value" );
84+
85+ if (Val == Member)
86+ return 0 ;
87+
88+ if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
89+ // Since we should always have a constant offset, we should only ever have
90+ // a single GEP of indirection from the Global.
91+ assert (GEP->getPointerOperand () == Member &&
92+ " Indirect access to resource handle" );
93+
94+ const DataLayout &DL = getDataLayout ();
95+ APInt ConstantOffset (DL.getIndexTypeSizeInBits (GEP->getType ()), 0 );
96+ bool Success = GEP->accumulateConstantOffset (DL, ConstantOffset);
97+ (void )Success;
98+ assert (Success && " Offsets into cbuffer globals must be constant" );
99+
100+ if (auto *ATy = dyn_cast<ArrayType>(Member->getValueType ()))
101+ ConstantOffset =
102+ hlsl::translateCBufArrayOffset (DL, ConstantOffset, ATy);
103+
104+ return ConstantOffset.getZExtValue ();
105+ }
76106
77- // / Replace access via cbuffer global with a load from the cbuffer handle
78- // / itself.
79- static void replaceAccess (LoadInst *LI, GlobalVariable *Global,
80- GlobalVariable *HandleGV, size_t BaseOffset,
81- SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
82- const DataLayout &DL = HandleGV->getDataLayout ();
107+ llvm_unreachable (" Expected Val to be a GlobalVariable or GEP" );
108+ }
83109
84- size_t Offset = BaseOffset;
85- if (auto *GEP = dyn_cast<GEPOperator>(LI->getPointerOperand ()))
86- Offset += getOffsetForCBufferGEP (GEP, Global, DL);
87- else if (LI->getPointerOperand () != Global)
88- llvm_unreachable (" Load instruction doesn't reference cbuffer global" );
110+ // / Create a handle for this cbuffer resource using the IRBuilder `Builder`
111+ // / and sets the handle as the current one to use for subsequent calls to
112+ // / `loadValue`
113+ void createAndSetCurrentHandle (IRBuilder<> &Builder) {
114+ Handle = Builder.CreateLoad (GVHandle->getValueType (), GVHandle,
115+ GVHandle->getName ());
116+ }
89117
90- IRBuilder<> Builder (LI);
91- auto *Handle = Builder.CreateLoad (HandleGV->getValueType (), HandleGV,
92- HandleGV->getName ());
93-
94- Type *Ty = LI->getType ();
95- CBufferRowIntrin Intrin (DL, Ty->getScalarType ());
96- // The cbuffer consists of some number of 16-byte rows.
97- unsigned int CurrentRow = Offset / hlsl::CBufferRowSizeInBytes;
98- unsigned int CurrentIndex =
99- (Offset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize ;
100-
101- auto *CBufLoad = Builder.CreateIntrinsic (
102- Intrin.RetTy , Intrin.IID ,
103- {Handle, ConstantInt::get (Builder.getInt32Ty (), CurrentRow)}, nullptr ,
104- LI->getName ());
105- auto *Elt =
106- Builder.CreateExtractValue (CBufLoad, {CurrentIndex++}, LI->getName ());
107-
108- Value *Result = nullptr ;
109- unsigned int Remaining =
110- ((DL.getTypeSizeInBits (Ty) / 8 ) / Intrin.EltSize ) - 1 ;
111- if (Remaining == 0 ) {
112- // We only have a single element, so we're done.
113- Result = Elt;
114-
115- // However, if we loaded a <1 x T>, then we need to adjust the type here.
116- if (auto *VT = dyn_cast<FixedVectorType>(LI->getType ())) {
117- assert (VT->getNumElements () == 1 && " Can't have multiple elements here" );
118- Result = Builder.CreateInsertElement (PoisonValue::get (VT), Result,
119- Builder.getInt32 (0 ));
118+ // / Load a value of type `Ty` at offset `Offset` using the handle from the
119+ // / last call to `createAndSetCurrentHandle`
120+ Value *loadValue (IRBuilder<> &Builder, Type *Ty, size_t Offset,
121+ const Twine &Name = " " ) {
122+ assert (Handle &&
123+ " Expected a handle for this cbuffer global resource to be created "
124+ " before loading a value from it" );
125+ const DataLayout &DL = getDataLayout ();
126+
127+ size_t TargetOffset = MemberOffset + Offset;
128+ CBufferRowIntrin Intrin (DL, Ty->getScalarType ());
129+ // The cbuffer consists of some number of 16-byte rows.
130+ unsigned int CurrentRow = TargetOffset / hlsl::CBufferRowSizeInBytes;
131+ unsigned int CurrentIndex =
132+ (TargetOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize ;
133+
134+ auto *CBufLoad = Builder.CreateIntrinsic (
135+ Intrin.RetTy , Intrin.IID ,
136+ {Handle, ConstantInt::get (Builder.getInt32Ty (), CurrentRow)}, nullptr ,
137+ Name + " .load" );
138+ auto *Elt = Builder.CreateExtractValue (CBufLoad, {CurrentIndex++},
139+ Name + " .extract" );
140+
141+ Value *Result = nullptr ;
142+ unsigned int Remaining =
143+ ((DL.getTypeSizeInBits (Ty) / 8 ) / Intrin.EltSize ) - 1 ;
144+
145+ if (Remaining == 0 ) {
146+ // We only have a single element, so we're done.
147+ Result = Elt;
148+
149+ // However, if we loaded a <1 x T>, then we need to adjust the type here.
150+ if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
151+ assert (VT->getNumElements () == 1 &&
152+ " Can't have multiple elements here" );
153+ Result = Builder.CreateInsertElement (PoisonValue::get (VT), Result,
154+ Builder.getInt32 (0 ), Name);
155+ }
156+ return Result;
120157 }
121- } else {
158+
122159 // Walk each element and extract it, wrapping to new rows as needed.
123160 SmallVector<Value *> Extracts{Elt};
124161 while (Remaining--) {
@@ -128,40 +165,138 @@ static void replaceAccess(LoadInst *LI, GlobalVariable *Global,
128165 CBufLoad = Builder.CreateIntrinsic (
129166 Intrin.RetTy , Intrin.IID ,
130167 {Handle, ConstantInt::get (Builder.getInt32Ty (), ++CurrentRow)},
131- nullptr , LI-> getName () );
168+ nullptr , Name + " .load " );
132169
133170 Extracts.push_back (Builder.CreateExtractValue (CBufLoad, {CurrentIndex++},
134- LI-> getName () ));
171+ Name + " .extract " ));
135172 }
136173
137174 // Finally, we build up the original loaded value.
138175 Result = PoisonValue::get (Ty);
139176 for (int I = 0 , E = Extracts.size (); I < E; ++I)
140177 Result =
141- Builder.CreateInsertElement (Result, Extracts[I], Builder.getInt32 (I));
178+ Builder.CreateInsertElement (Result, Extracts[I], Builder.getInt32 (I),
179+ Name + formatv (" .upto{}" , I));
180+ return Result;
142181 }
182+ };
143183
184+ } // namespace
185+
186+ // / Replace load via cbuffer global with a load from the cbuffer handle itself.
187+ static void replaceLoad (LoadInst *LI, CBufferResource &CBR,
188+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
189+ size_t Offset = CBR.getOffsetForCBufferGEP (LI->getPointerOperand ());
190+ IRBuilder<> Builder (LI);
191+ CBR.createAndSetCurrentHandle (Builder);
192+ Value *Result = CBR.loadValue (Builder, LI->getType (), Offset, LI->getName ());
144193 LI->replaceAllUsesWith (Result);
145194 DeadInsts.push_back (LI);
146195}
147196
148- static void replaceAccessesWithHandle (GlobalVariable *Global,
149- GlobalVariable *HandleGV,
150- size_t BaseOffset) {
197+ // / This function recursively copies N array elements from the cbuffer resource
198+ // / CBR to the MemCpy Destination. Recursion is used to unravel multidimensional
199+ // / arrays into a sequence of scalar/vector extracts and stores.
200+ static void copyArrayElemsForMemCpy (IRBuilder<> &Builder, MemCpyInst *MCI,
201+ CBufferResource &CBR, ArrayType *ArrTy,
202+ size_t ArrOffset, size_t N,
203+ const Twine &Name = " " ) {
204+ const DataLayout &DL = MCI->getDataLayout ();
205+ Type *ElemTy = ArrTy->getElementType ();
206+ size_t ElemTySize = DL.getTypeAllocSize (ElemTy);
207+ for (unsigned I = 0 ; I < N; ++I) {
208+ size_t Offset = ArrOffset + I * ElemTySize;
209+
210+ // Recursively copy nested arrays
211+ if (ArrayType *ElemArrTy = dyn_cast<ArrayType>(ElemTy)) {
212+ copyArrayElemsForMemCpy (Builder, MCI, CBR, ElemArrTy, Offset,
213+ ElemArrTy->getNumElements (), Name);
214+ continue ;
215+ }
216+
217+ // Load CBuffer value and store it in Dest
218+ APInt CBufArrayOffset (
219+ DL.getIndexTypeSizeInBits (MCI->getSource ()->getType ()), Offset);
220+ CBufArrayOffset =
221+ hlsl::translateCBufArrayOffset (DL, CBufArrayOffset, ArrTy);
222+ Value *CBufferVal =
223+ CBR.loadValue (Builder, ElemTy, CBufArrayOffset.getZExtValue (), Name);
224+ Value *GEP =
225+ Builder.CreateInBoundsGEP (Builder.getInt8Ty (), MCI->getDest (),
226+ {Builder.getInt32 (Offset)}, Name + " .dest" );
227+ Builder.CreateStore (CBufferVal, GEP, MCI->isVolatile ());
228+ }
229+ }
230+
231+ // / Replace memcpy from a cbuffer global with a memcpy from the cbuffer handle
232+ // / itself. Assumes the cbuffer global is an array, and the length of bytes to
233+ // / copy is divisible by array element allocation size.
234+ // / The memcpy source must also be a direct cbuffer global reference, not a GEP.
235+ static void replaceMemCpy (MemCpyInst *MCI, CBufferResource &CBR) {
236+
237+ ArrayType *ArrTy = dyn_cast<ArrayType>(CBR.getValueType ());
238+ assert (ArrTy && " MemCpy lowering is only supported for array types" );
239+
240+ // This assumption vastly simplifies the implementation
241+ if (MCI->getSource () != CBR.Member )
242+ reportFatalUsageError (
243+ " Expected MemCpy source to be a cbuffer global variable" );
244+
245+ ConstantInt *Length = dyn_cast<ConstantInt>(MCI->getLength ());
246+ uint64_t ByteLength = Length->getZExtValue ();
247+
248+ // If length to copy is zero, no memcpy is needed
249+ if (ByteLength == 0 ) {
250+ MCI->eraseFromParent ();
251+ return ;
252+ }
253+
254+ const DataLayout &DL = CBR.getDataLayout ();
255+
256+ Type *ElemTy = ArrTy->getElementType ();
257+ size_t ElemSize = DL.getTypeAllocSize (ElemTy);
258+ assert (ByteLength % ElemSize == 0 &&
259+ " Length of bytes to MemCpy must be divisible by allocation size of "
260+ " source/destination array elements" );
261+ size_t ElemsToCpy = ByteLength / ElemSize;
262+
263+ IRBuilder<> Builder (MCI);
264+ CBR.createAndSetCurrentHandle (Builder);
265+
266+ copyArrayElemsForMemCpy (Builder, MCI, CBR, ArrTy, 0 , ElemsToCpy,
267+ " memcpy." + MCI->getDest ()->getName () + " ." +
268+ MCI->getSource ()->getName ());
269+
270+ MCI->eraseFromParent ();
271+ }
272+
273+ static void replaceAccessesWithHandle (CBufferResource &CBR) {
151274 SmallVector<WeakTrackingVH> DeadInsts;
152275
153- SmallVector<User *> ToProcess{Global-> users ()};
276+ SmallVector<User *> ToProcess{CBR. users ()};
154277 while (!ToProcess.empty ()) {
155278 User *Cur = ToProcess.pop_back_val ();
156279
157280 // If we have a load instruction, replace the access.
158281 if (auto *LI = dyn_cast<LoadInst>(Cur)) {
159- replaceAccess (LI, Global, HandleGV, BaseOffset, DeadInsts);
282+ replaceLoad (LI, CBR, DeadInsts);
283+ continue ;
284+ }
285+
286+ // If we have a memcpy instruction, replace it with multiple accesses and
287+ // subsequent stores to the destination
288+ if (auto *MCI = dyn_cast<MemCpyInst>(Cur)) {
289+ replaceMemCpy (MCI, CBR);
160290 continue ;
161291 }
162292
163293 // Otherwise, walk users looking for a load...
164- ToProcess.append (Cur->user_begin (), Cur->user_end ());
294+ if (isa<GetElementPtrInst>(Cur) || isa<GEPOperator>(Cur)) {
295+ ToProcess.append (Cur->user_begin (), Cur->user_end ());
296+ continue ;
297+ }
298+
299+ llvm_unreachable (" Unexpected user of Global" );
165300 }
166301 RecursivelyDeleteTriviallyDeadInstructions (DeadInsts);
167302}
@@ -173,7 +308,8 @@ static bool replaceCBufferAccesses(Module &M) {
173308
174309 for (const hlsl::CBufferMapping &Mapping : *CBufMD)
175310 for (const hlsl::CBufferMember &Member : Mapping.Members ) {
176- replaceAccessesWithHandle (Member.GV , Mapping.Handle , Member.Offset );
311+ CBufferResource CBR (Mapping.Handle , Member.GV , Member.Offset );
312+ replaceAccessesWithHandle (CBR);
177313 Member.GV ->removeFromParent ();
178314 }
179315
0 commit comments