Skip to content

Commit 29843c0

Browse files
committed
wip: Explicit structs in clang codegen
This abandons the `dx.Layout` idea and just uses explicit padding. Note: Reordered fields break stuff, including ones from implicit bindings.
1 parent 030d8cb commit 29843c0

20 files changed

+528
-383
lines changed

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4678,6 +4678,26 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
46784678
emitArraySubscriptGEP(*this, Int8Ty, Addr.emitRawPointer(*this),
46794679
ScaledIdx, false, SignedIndices, E->getExprLoc());
46804680
Addr = Address(EltPtr, OrigBaseElemTy, EltAlign);
4681+
} else if (E->getType().getAddressSpace() == LangAS::hlsl_constant) {
4682+
// This is an array inside of a cbuffer.
4683+
Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
4684+
auto *Idx = EmitIdxAfterBase(/*Promote*/true);
4685+
4686+
// ...
4687+
CharUnits RowAlignedSize = getContext()
4688+
.getTypeSizeInChars(E->getType())
4689+
.alignTo(CharUnits::fromQuantity(16));
4690+
4691+
llvm::Value *RowAlignedSizeVal =
4692+
llvm::ConstantInt::get(Idx->getType(), RowAlignedSize.getQuantity());
4693+
llvm::Value *ScaledIdx = Builder.CreateMul(Idx, RowAlignedSizeVal);
4694+
4695+
CharUnits EltAlign =
4696+
getArrayElementAlign(Addr.getAlignment(), Idx, RowAlignedSize);
4697+
llvm::Value *EltPtr =
4698+
emitArraySubscriptGEP(*this, Int8Ty, Addr.emitRawPointer(*this),
4699+
ScaledIdx, false, SignedIndices, E->getExprLoc());
4700+
Addr = Address(EltPtr, Addr.getElementType(), EltAlign);
46814701
} else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) {
46824702
// If this is A[i] where A is an array, the frontend will have decayed the
46834703
// base to be a ArrayToPointerDecay implicit cast. While correct, it is

clang/lib/CodeGen/CGExprAgg.cpp

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,14 @@
1919
#include "CodeGenModule.h"
2020
#include "ConstantEmitter.h"
2121
#include "EHScopeStack.h"
22+
#include "HLSLBufferLayoutBuilder.h"
2223
#include "TargetInfo.h"
2324
#include "clang/AST/ASTContext.h"
2425
#include "clang/AST/Attr.h"
2526
#include "clang/AST/DeclCXX.h"
2627
#include "clang/AST/DeclTemplate.h"
2728
#include "clang/AST/StmtVisitor.h"
29+
#include "llvm/ADT/ScopeExit.h"
2830
#include "llvm/IR/Constants.h"
2931
#include "llvm/IR/Function.h"
3032
#include "llvm/IR/GlobalVariable.h"
@@ -2280,6 +2282,127 @@ AggValueSlot::Overlap_t CodeGenFunction::getOverlapForBaseInit(
22802282
return AggValueSlot::MayOverlap;
22812283
}
22822284

2285+
namespace {
2286+
class HLSLBufferCopyEmitter {
2287+
CodeGenFunction &CGF;
2288+
Address DestPtr;
2289+
Address SrcPtr;
2290+
llvm::Type *LayoutTy = nullptr;
2291+
2292+
SmallVector<llvm::Value *> CurStoreIndices;
2293+
SmallVector<llvm::Value *> CurLoadIndices;
2294+
2295+
void emitCopyAtIndices(llvm::Type *FieldTy, unsigned StoreIndex,
2296+
unsigned LoadIndex) {
2297+
CurStoreIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, StoreIndex));
2298+
CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, LoadIndex));
2299+
auto RestoreIndices = llvm::make_scope_exit([&]() {
2300+
CurStoreIndices.pop_back();
2301+
CurLoadIndices.pop_back();
2302+
});
2303+
2304+
if (processArray(FieldTy))
2305+
return;
2306+
if (processBufferLayoutArray(FieldTy))
2307+
return;
2308+
if (processStruct(FieldTy))
2309+
return;
2310+
2311+
// We have a scalar or vector element - emit a copy.
2312+
CharUnits Align = CharUnits::fromQuantity(
2313+
CGF.CGM.getDataLayout().getABITypeAlign(FieldTy));
2314+
Address SrcGEP = RawAddress(
2315+
CGF.Builder.CreateInBoundsGEP(LayoutTy, SrcPtr.getBasePointer(),
2316+
CurLoadIndices, "cbuf.src"),
2317+
FieldTy, Align, SrcPtr.isKnownNonNull());
2318+
Address DestGEP = CGF.Builder.CreateInBoundsGEP(
2319+
DestPtr, CurStoreIndices, FieldTy, Align, "cbuf.dest");
2320+
llvm::Value *Load = CGF.Builder.CreateLoad(SrcGEP, "cbuf.load");
2321+
CGF.Builder.CreateStore(Load, DestGEP);
2322+
}
2323+
2324+
bool processArray(llvm::Type *FieldTy) {
2325+
auto *AT = dyn_cast<llvm::ArrayType>(FieldTy);
2326+
if (!AT)
2327+
return false;
2328+
2329+
// If we have an array then there isn't any padding
2330+
// between elements. We just need to copy each element over.
2331+
for (unsigned I = 0, E = AT->getNumElements(); I < E; ++I)
2332+
emitCopyAtIndices(AT->getElementType(), I, I);
2333+
return true;
2334+
}
2335+
2336+
bool processBufferLayoutArray(llvm::Type *FieldTy) {
2337+
auto *ST = dyn_cast<llvm::StructType>(FieldTy);
2338+
if (!ST || ST->getNumElements() != 2)
2339+
return false;
2340+
2341+
auto *PaddedEltsTy = dyn_cast<llvm::ArrayType>(ST->getElementType(0));
2342+
if (!PaddedEltsTy)
2343+
return false;
2344+
2345+
auto *PaddedTy = dyn_cast<llvm::StructType>(PaddedEltsTy->getElementType());
2346+
if (!PaddedTy || PaddedTy->getNumElements() != 2)
2347+
return false;
2348+
2349+
if (!CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(
2350+
PaddedTy->getElementType(1)))
2351+
return false;
2352+
2353+
llvm::Type *ElementTy = ST->getElementType(1);
2354+
if (PaddedTy->getElementType(0) != ElementTy)
2355+
return false;
2356+
2357+
// All but the last of the logical array elements are in the padded array.
2358+
unsigned NumElts = PaddedEltsTy->getNumElements() + 1;
2359+
2360+
// Add an extra indirection to the load for the struct and walk the
2361+
// array prefix.
2362+
CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, 0));
2363+
for (unsigned I = 0; I < NumElts - 1; ++I)
2364+
emitCopyAtIndices(ElementTy, I, I);
2365+
CurLoadIndices.pop_back();
2366+
2367+
// Now copy the last element.
2368+
emitCopyAtIndices(ElementTy, NumElts - 1, 1);
2369+
2370+
return true;
2371+
}
2372+
2373+
bool processStruct(llvm::Type *FieldTy) {
2374+
auto *ST = dyn_cast<llvm::StructType>(FieldTy);
2375+
if (!ST)
2376+
return false;
2377+
2378+
unsigned Skipped = 0;
2379+
for (unsigned I = 0, E = ST->getNumElements(); I < E; ++I) {
2380+
llvm::Type *ElementTy = ST->getElementType(I);
2381+
if (CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(ElementTy))
2382+
++Skipped;
2383+
else
2384+
emitCopyAtIndices(ElementTy, I, I + Skipped);
2385+
}
2386+
return true;
2387+
}
2388+
2389+
public:
2390+
HLSLBufferCopyEmitter(CodeGenFunction &CGF, Address DestPtr, Address SrcPtr)
2391+
: CGF(CGF), DestPtr(DestPtr), SrcPtr(SrcPtr) {}
2392+
2393+
bool emitCopy(QualType CType) {
2394+
LayoutTy = HLSLBufferLayoutBuilder(CGF.CGM).layOutType(CType);
2395+
2396+
// If we don't have an aggregate, we can just fall back to normal memcpy.
2397+
if (!LayoutTy->isAggregateType())
2398+
return false;
2399+
2400+
emitCopyAtIndices(LayoutTy, 0, 0);
2401+
return true;
2402+
}
2403+
};
2404+
} // namespace
2405+
22832406
void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty,
22842407
AggValueSlot::Overlap_t MayOverlap,
22852408
bool isVolatile) {
@@ -2315,6 +2438,10 @@ void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty,
23152438
}
23162439
}
23172440

2441+
if (getLangOpts().HLSL && Ty.getAddressSpace() == LangAS::hlsl_constant)
2442+
if (HLSLBufferCopyEmitter(*this, DestPtr, SrcPtr).emitCopy(Ty))
2443+
return;
2444+
23182445
// Aggregate assignment turns into llvm.memcpy. This is almost valid per
23192446
// C99 6.5.16.1p3, which states "If the value being stored in an object is
23202447
// read from another object that overlaps in anyway the storage of the first

clang/lib/CodeGen/CGHLSLRuntime.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -266,9 +266,9 @@ CGHLSLRuntime::convertHLSLSpecificType(const Type *T,
266266
assert(T->isHLSLSpecificType() && "Not an HLSL specific type!");
267267

268268
// Check if the target has a specific translation for this type first.
269-
if (llvm::Type *TargetTy =
269+
if (llvm::Type *LayoutTy =
270270
CGM.getTargetCodeGenInfo().getHLSLType(CGM, T, Packoffsets))
271-
return TargetTy;
271+
return LayoutTy;
272272

273273
llvm_unreachable("Generic handling of HLSL types is not supported.");
274274
}
@@ -285,10 +285,8 @@ void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl,
285285

286286
// get the layout struct from constant buffer target type
287287
llvm::Type *BufType = BufGV->getValueType();
288-
llvm::Type *BufLayoutType =
289-
cast<llvm::TargetExtType>(BufType)->getTypeParameter(0);
290288
llvm::StructType *LayoutStruct = cast<llvm::StructType>(
291-
cast<llvm::TargetExtType>(BufLayoutType)->getTypeParameter(0));
289+
cast<llvm::TargetExtType>(BufType)->getTypeParameter(0));
292290

293291
// Start metadata list associating the buffer global variable with its
294292
// constatns
@@ -327,6 +325,9 @@ void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl,
327325
continue;
328326
}
329327

328+
if (CGM.getTargetCodeGenInfo().isHLSLPadding(*ElemIt))
329+
++ElemIt;
330+
330331
assert(ElemIt != LayoutStruct->element_end() &&
331332
"number of elements in layout struct does not match");
332333
llvm::Type *LayoutType = *ElemIt++;
@@ -424,12 +425,11 @@ void CGHLSLRuntime::addBuffer(const HLSLBufferDecl *BufDecl) {
424425
if (BufDecl->hasValidPackoffset())
425426
fillPackoffsetLayout(BufDecl, Layout);
426427

427-
llvm::TargetExtType *TargetTy =
428-
cast<llvm::TargetExtType>(convertHLSLSpecificType(
429-
ResHandleTy, BufDecl->hasValidPackoffset() ? &Layout : nullptr));
428+
llvm::Type *LayoutTy = convertHLSLSpecificType(
429+
ResHandleTy, BufDecl->hasValidPackoffset() ? &Layout : nullptr);
430430
llvm::GlobalVariable *BufGV = new GlobalVariable(
431-
TargetTy, /*isConstant*/ false,
432-
GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(TargetTy),
431+
LayoutTy, /*isConstant*/ false,
432+
GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(LayoutTy),
433433
llvm::formatv("{0}{1}", BufDecl->getName(),
434434
BufDecl->isCBuffer() ? ".cb" : ".tb"),
435435
GlobalValue::NotThreadLocal);
@@ -462,7 +462,7 @@ void CGHLSLRuntime::addRootSignature(
462462
SignatureDecl->getRootElements(), nullptr, M);
463463
}
464464

465-
llvm::TargetExtType *
465+
llvm::StructType *
466466
CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) {
467467
const auto Entry = LayoutTypes.find(StructType);
468468
if (Entry != LayoutTypes.end())
@@ -471,7 +471,7 @@ CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) {
471471
}
472472

473473
void CGHLSLRuntime::addHLSLBufferLayoutType(const RecordType *StructType,
474-
llvm::TargetExtType *LayoutTy) {
474+
llvm::StructType *LayoutTy) {
475475
assert(getHLSLBufferLayoutType(StructType) == nullptr &&
476476
"layout type for this struct already exist");
477477
LayoutTypes[StructType] = LayoutTy;

clang/lib/CodeGen/CGHLSLRuntime.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -186,10 +186,9 @@ class CGHLSLRuntime {
186186

187187
llvm::Instruction *getConvergenceToken(llvm::BasicBlock &BB);
188188

189-
llvm::TargetExtType *
190-
getHLSLBufferLayoutType(const RecordType *LayoutStructTy);
189+
llvm::StructType *getHLSLBufferLayoutType(const RecordType *LayoutStructTy);
191190
void addHLSLBufferLayoutType(const RecordType *LayoutStructTy,
192-
llvm::TargetExtType *LayoutTy);
191+
llvm::StructType *LayoutTy);
193192
void emitInitListOpaqueValues(CodeGenFunction &CGF, InitListExpr *E);
194193

195194
std::optional<LValue>
@@ -207,7 +206,7 @@ class CGHLSLRuntime {
207206
HLSLResourceBindingAttr *RBA);
208207
llvm::Triple::ArchType getArch();
209208

210-
llvm::DenseMap<const clang::RecordType *, llvm::TargetExtType *> LayoutTypes;
209+
llvm::DenseMap<const clang::RecordType *, llvm::StructType *> LayoutTypes;
211210
};
212211

213212
} // namespace CodeGen

0 commit comments

Comments
 (0)