|
19 | 19 | #include "CodeGenModule.h"
|
20 | 20 | #include "ConstantEmitter.h"
|
21 | 21 | #include "EHScopeStack.h"
|
| 22 | +#include "HLSLBufferLayoutBuilder.h" |
22 | 23 | #include "TargetInfo.h"
|
23 | 24 | #include "clang/AST/ASTContext.h"
|
24 | 25 | #include "clang/AST/Attr.h"
|
25 | 26 | #include "clang/AST/DeclCXX.h"
|
26 | 27 | #include "clang/AST/DeclTemplate.h"
|
27 | 28 | #include "clang/AST/StmtVisitor.h"
|
| 29 | +#include "llvm/ADT/ScopeExit.h" |
28 | 30 | #include "llvm/IR/Constants.h"
|
29 | 31 | #include "llvm/IR/Function.h"
|
30 | 32 | #include "llvm/IR/GlobalVariable.h"
|
@@ -2280,6 +2282,127 @@ AggValueSlot::Overlap_t CodeGenFunction::getOverlapForBaseInit(
|
2280 | 2282 | return AggValueSlot::MayOverlap;
|
2281 | 2283 | }
|
2282 | 2284 |
|
| 2285 | +namespace { |
| 2286 | +class HLSLBufferCopyEmitter { |
| 2287 | + CodeGenFunction &CGF; |
| 2288 | + Address DestPtr; |
| 2289 | + Address SrcPtr; |
| 2290 | + llvm::Type *LayoutTy = nullptr; |
| 2291 | + |
| 2292 | + SmallVector<llvm::Value *> CurStoreIndices; |
| 2293 | + SmallVector<llvm::Value *> CurLoadIndices; |
| 2294 | + |
| 2295 | + void emitCopyAtIndices(llvm::Type *FieldTy, unsigned StoreIndex, |
| 2296 | + unsigned LoadIndex) { |
| 2297 | + CurStoreIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, StoreIndex)); |
| 2298 | + CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, LoadIndex)); |
| 2299 | + auto RestoreIndices = llvm::make_scope_exit([&]() { |
| 2300 | + CurStoreIndices.pop_back(); |
| 2301 | + CurLoadIndices.pop_back(); |
| 2302 | + }); |
| 2303 | + |
| 2304 | + if (processArray(FieldTy)) |
| 2305 | + return; |
| 2306 | + if (processBufferLayoutArray(FieldTy)) |
| 2307 | + return; |
| 2308 | + if (processStruct(FieldTy)) |
| 2309 | + return; |
| 2310 | + |
| 2311 | + // We have a scalar or vector element - emit a copy. |
| 2312 | + CharUnits Align = CharUnits::fromQuantity( |
| 2313 | + CGF.CGM.getDataLayout().getABITypeAlign(FieldTy)); |
| 2314 | + Address SrcGEP = RawAddress( |
| 2315 | + CGF.Builder.CreateInBoundsGEP(LayoutTy, SrcPtr.getBasePointer(), |
| 2316 | + CurLoadIndices, "cbuf.src"), |
| 2317 | + FieldTy, Align, SrcPtr.isKnownNonNull()); |
| 2318 | + Address DestGEP = CGF.Builder.CreateInBoundsGEP( |
| 2319 | + DestPtr, CurStoreIndices, FieldTy, Align, "cbuf.dest"); |
| 2320 | + llvm::Value *Load = CGF.Builder.CreateLoad(SrcGEP, "cbuf.load"); |
| 2321 | + CGF.Builder.CreateStore(Load, DestGEP); |
| 2322 | + } |
| 2323 | + |
| 2324 | + bool processArray(llvm::Type *FieldTy) { |
| 2325 | + auto *AT = dyn_cast<llvm::ArrayType>(FieldTy); |
| 2326 | + if (!AT) |
| 2327 | + return false; |
| 2328 | + |
| 2329 | + // If we have an array then there isn't any padding |
| 2330 | + // between elements. We just need to copy each element over. |
| 2331 | + for (unsigned I = 0, E = AT->getNumElements(); I < E; ++I) |
| 2332 | + emitCopyAtIndices(AT->getElementType(), I, I); |
| 2333 | + return true; |
| 2334 | + } |
| 2335 | + |
| 2336 | + bool processBufferLayoutArray(llvm::Type *FieldTy) { |
| 2337 | + auto *ST = dyn_cast<llvm::StructType>(FieldTy); |
| 2338 | + if (!ST || ST->getNumElements() != 2) |
| 2339 | + return false; |
| 2340 | + |
| 2341 | + auto *PaddedEltsTy = dyn_cast<llvm::ArrayType>(ST->getElementType(0)); |
| 2342 | + if (!PaddedEltsTy) |
| 2343 | + return false; |
| 2344 | + |
| 2345 | + auto *PaddedTy = dyn_cast<llvm::StructType>(PaddedEltsTy->getElementType()); |
| 2346 | + if (!PaddedTy || PaddedTy->getNumElements() != 2) |
| 2347 | + return false; |
| 2348 | + |
| 2349 | + if (!CGF.CGM.getTargetCodeGenInfo().isHLSLPadding( |
| 2350 | + PaddedTy->getElementType(1))) |
| 2351 | + return false; |
| 2352 | + |
| 2353 | + llvm::Type *ElementTy = ST->getElementType(1); |
| 2354 | + if (PaddedTy->getElementType(0) != ElementTy) |
| 2355 | + return false; |
| 2356 | + |
| 2357 | + // All but the last of the logical array elements are in the padded array. |
| 2358 | + unsigned NumElts = PaddedEltsTy->getNumElements() + 1; |
| 2359 | + |
| 2360 | + // Add an extra indirection to the load for the struct and walk the |
| 2361 | + // array prefix. |
| 2362 | + CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, 0)); |
| 2363 | + for (unsigned I = 0; I < NumElts - 1; ++I) |
| 2364 | + emitCopyAtIndices(ElementTy, I, I); |
| 2365 | + CurLoadIndices.pop_back(); |
| 2366 | + |
| 2367 | + // Now copy the last element. |
| 2368 | + emitCopyAtIndices(ElementTy, NumElts - 1, 1); |
| 2369 | + |
| 2370 | + return true; |
| 2371 | + } |
| 2372 | + |
| 2373 | + bool processStruct(llvm::Type *FieldTy) { |
| 2374 | + auto *ST = dyn_cast<llvm::StructType>(FieldTy); |
| 2375 | + if (!ST) |
| 2376 | + return false; |
| 2377 | + |
| 2378 | + unsigned Skipped = 0; |
| 2379 | + for (unsigned I = 0, E = ST->getNumElements(); I < E; ++I) { |
| 2380 | + llvm::Type *ElementTy = ST->getElementType(I); |
| 2381 | + if (CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(ElementTy)) |
| 2382 | + ++Skipped; |
| 2383 | + else |
| 2384 | + emitCopyAtIndices(ElementTy, I, I + Skipped); |
| 2385 | + } |
| 2386 | + return true; |
| 2387 | + } |
| 2388 | + |
| 2389 | +public: |
| 2390 | + HLSLBufferCopyEmitter(CodeGenFunction &CGF, Address DestPtr, Address SrcPtr) |
| 2391 | + : CGF(CGF), DestPtr(DestPtr), SrcPtr(SrcPtr) {} |
| 2392 | + |
| 2393 | + bool emitCopy(QualType CType) { |
| 2394 | + LayoutTy = HLSLBufferLayoutBuilder(CGF.CGM).layOutType(CType); |
| 2395 | + |
| 2396 | + // If we don't have an aggregate, we can just fall back to normal memcpy. |
| 2397 | + if (!LayoutTy->isAggregateType()) |
| 2398 | + return false; |
| 2399 | + |
| 2400 | + emitCopyAtIndices(LayoutTy, 0, 0); |
| 2401 | + return true; |
| 2402 | + } |
| 2403 | +}; |
| 2404 | +} // namespace |
| 2405 | + |
2283 | 2406 | void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty,
|
2284 | 2407 | AggValueSlot::Overlap_t MayOverlap,
|
2285 | 2408 | bool isVolatile) {
|
@@ -2315,6 +2438,10 @@ void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty,
|
2315 | 2438 | }
|
2316 | 2439 | }
|
2317 | 2440 |
|
| 2441 | + if (getLangOpts().HLSL && Ty.getAddressSpace() == LangAS::hlsl_constant) |
| 2442 | + if (HLSLBufferCopyEmitter(*this, DestPtr, SrcPtr).emitCopy(Ty)) |
| 2443 | + return; |
| 2444 | + |
2318 | 2445 | // Aggregate assignment turns into llvm.memcpy. This is almost valid per
|
2319 | 2446 | // C99 6.5.16.1p3, which states "If the value being stored in an object is
|
2320 | 2447 | // read from another object that overlaps in anyway the storage of the first
|
|
0 commit comments