Skip to content
Draft
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions clang/include/clang/AST/CharUnits.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ namespace clang {
/// Among other things, this promises that
/// self.alignTo(N) will just return self.
bool isMultipleOf(CharUnits N) const {
return (*this % N) == 0;
return (*this % N) == CharUnits::Zero();
}

// Arithmetic operators.
Expand All @@ -165,8 +165,8 @@ namespace clang {
CharUnits operator% (QuantityType N) const {
return CharUnits(Quantity % N);
}
QuantityType operator% (const CharUnits &Other) const {
return Quantity % Other.Quantity;
CharUnits operator% (const CharUnits &Other) const {
return CharUnits(Quantity % Other.Quantity);
}
CharUnits operator+ (const CharUnits &Other) const {
return CharUnits(Quantity + Other.Quantity);
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/AST/APValue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,7 @@ void APValue::printPretty(raw_ostream &Out, const PrintingPolicy &Policy,
if (!O.isZero()) {
if (IsReference)
Out << "*(";
if (S.isZero() || O % S) {
if (S.isZero() || !O.isMultipleOf(S)) {
Out << "(char*)";
S = CharUnits::One();
}
Expand Down
9 changes: 4 additions & 5 deletions clang/lib/AST/RecordLayoutBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2087,9 +2087,8 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
if (InsertExtraPadding) {
CharUnits ASanAlignment = CharUnits::fromQuantity(8);
CharUnits ExtraSizeForAsan = ASanAlignment;
if (FieldSize % ASanAlignment)
ExtraSizeForAsan +=
ASanAlignment - CharUnits::fromQuantity(FieldSize % ASanAlignment);
if (!FieldSize.isMultipleOf(ASanAlignment))
ExtraSizeForAsan += ASanAlignment - (FieldSize % ASanAlignment);
EffectiveFieldSize = FieldSize = FieldSize + ExtraSizeForAsan;
}

Expand Down Expand Up @@ -2119,10 +2118,10 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
if (RD->hasAttr<PackedAttr>() || !MaxFieldAlignment.isZero())
if (FieldAlign < OriginalFieldAlign)
if (D->getType()->isRecordType()) {
// If the offset is a multiple of the alignment of
// If the offset is not a multiple of the alignment of
// the type, raise the warning.
// TODO: Takes no account the alignment of the outer struct
if (FieldOffset % OriginalFieldAlign != 0)
if (!FieldOffset.isMultipleOf(OriginalFieldAlign))
Diag(D->getLocation(), diag::warn_unaligned_access)
<< Context.getCanonicalTagType(RD) << D->getName()
<< D->getType();
Expand Down
5 changes: 4 additions & 1 deletion clang/lib/Basic/Targets/DirectX.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,11 @@ class LLVM_LIBRARY_VISIBILITY DirectXTargetInfo : public TargetInfo {
NoAsmVariants = true;
PlatformMinVersion = Triple.getOSVersion();
PlatformName = llvm::Triple::getOSTypeName(Triple.getOS());
// TODO: We need to align vectors on the element size generally, but for now
// we hard code this for 3-element 32- and 64-bit vectors as a workaround.
// See https://github.com/llvm/llvm-project/issues/123968
resetDataLayout("e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:"
"32-f64:64-n8:16:32:64");
"32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64");
TheCXXABI.set(TargetCXXABI::GenericItanium);
}
bool useFP16ConversionIntrinsics() const override { return false; }
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/CGAtomic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -880,7 +880,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
CharUnits MaxInlineWidth =
getContext().toCharUnitsFromBits(MaxInlineWidthInBits);
DiagnosticsEngine &Diags = CGM.getDiags();
bool Misaligned = (Ptr.getAlignment() % TInfo.Width) != 0;
bool Misaligned = !Ptr.getAlignment().isMultipleOf(TInfo.Width);
bool Oversized = getContext().toBits(TInfo.Width) > MaxInlineWidthInBits;
if (Misaligned) {
Diags.Report(E->getBeginLoc(), diag::warn_atomic_op_misaligned)
Expand Down
20 changes: 20 additions & 0 deletions clang/lib/CodeGen/CGExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4678,6 +4678,26 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
emitArraySubscriptGEP(*this, Int8Ty, Addr.emitRawPointer(*this),
ScaledIdx, false, SignedIndices, E->getExprLoc());
Addr = Address(EltPtr, OrigBaseElemTy, EltAlign);
} else if (E->getType().getAddressSpace() == LangAS::hlsl_constant) {
// This is an array inside of a cbuffer.
Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
auto *Idx = EmitIdxAfterBase(/*Promote*/true);

// ...
CharUnits RowAlignedSize = getContext()
.getTypeSizeInChars(E->getType())
.alignTo(CharUnits::fromQuantity(16));

llvm::Value *RowAlignedSizeVal =
llvm::ConstantInt::get(Idx->getType(), RowAlignedSize.getQuantity());
llvm::Value *ScaledIdx = Builder.CreateMul(Idx, RowAlignedSizeVal);

CharUnits EltAlign =
getArrayElementAlign(Addr.getAlignment(), Idx, RowAlignedSize);
llvm::Value *EltPtr =
emitArraySubscriptGEP(*this, Int8Ty, Addr.emitRawPointer(*this),
ScaledIdx, false, SignedIndices, E->getExprLoc());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doing an i8 gep does work for SPIR-V. We want to avoid that as much as possible. Can this be turned into a typed GEP with the padded type when needed? I tried writing it myself so I could make a suggestion, but I can't get it right.

See https://discourse.llvm.org/t/type-based-gep-plans/87183/14

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bogner You are still generating this GEP with an i8. Can we change this to a GEP on the array type.

Start with the test at the end. Compile with clang-dxc test_minimal_peeling.hlsl -T cs_6_8 -spirv -fcgl

The access to myArray is:

  %3 = load i32, ptr %index, align 4, !tbaa !4
  %idxprom = zext i32 %3 to i64
  %4 = mul i64 %idxprom, 16
  %arrayidx = getelementptr i8, ptr addrspace(12) @myArray, i64 %4 ; <-- problem for SPIR-V.
  %f = getelementptr inbounds nuw %struct.OrigType, ptr addrspace(12) %arrayidx, i32 0, i32 0
  %5 = load float, ptr addrspace(12) %f, align 1, !tbaa !14

It would be better if it could be

  %3 = load i32, ptr %index, align 4, !tbaa !4
  %idxprom = zext i32 %3 to i64
  %arrayidx = getelementptr [4 x <{ %OrigType, target("spirv.Padding", 12) }>], ptr addrspace(12) @myArray, i64 %idxprom ; <-- Explicit array
  %f = getelementptr inbounds nuw %struct.OrigType, ptr addrspace(12) %arrayidx, i32 0, i32 0
  %5 = load float, ptr addrspace(12) %f, align 1, !tbaa !14

You use the original array size with the padding.

struct OrigType {
  float f;
};

cbuffer MyCBuffer {
  OrigType myArray[4];
  float anotherValue;
};

RWBuffer<float4> output;

[numthreads(1, 1, 1)]
void main(uint3 DTid : SV_DispatchThreadID) {
  uint index = DTid.x % 4;
  float v = myArray[index].f;
  float f = anotherValue;
  output[0] = float4(v, f, 0, 0);
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've tried this out in 92bd225 (which still needs test updates). This looks mostly reasonable with the caveat that we do need a bit of a fictional type for this to work (the array with padding on all elements including the last one). Since we don't actually read the padding this is probably fine.

Addr = Address(EltPtr, Addr.getElementType(), EltAlign);
} else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) {
// If this is A[i] where A is an array, the frontend will have decayed the
// base to be a ArrayToPointerDecay implicit cast. While correct, it is
Expand Down
127 changes: 127 additions & 0 deletions clang/lib/CodeGen/CGExprAgg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@
#include "CodeGenModule.h"
#include "ConstantEmitter.h"
#include "EHScopeStack.h"
#include "HLSLBufferLayoutBuilder.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attr.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/StmtVisitor.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
Expand Down Expand Up @@ -2280,6 +2282,127 @@ AggValueSlot::Overlap_t CodeGenFunction::getOverlapForBaseInit(
return AggValueSlot::MayOverlap;
}

namespace {
class HLSLBufferCopyEmitter {
CodeGenFunction &CGF;
Address DestPtr;
Address SrcPtr;
llvm::Type *LayoutTy = nullptr;

SmallVector<llvm::Value *> CurStoreIndices;
SmallVector<llvm::Value *> CurLoadIndices;

void emitCopyAtIndices(llvm::Type *FieldTy, unsigned StoreIndex,
unsigned LoadIndex) {
CurStoreIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, StoreIndex));
CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, LoadIndex));
auto RestoreIndices = llvm::make_scope_exit([&]() {
CurStoreIndices.pop_back();
CurLoadIndices.pop_back();
});

if (processArray(FieldTy))
return;
if (processBufferLayoutArray(FieldTy))
return;
if (processStruct(FieldTy))
return;

// We have a scalar or vector element - emit a copy.
CharUnits Align = CharUnits::fromQuantity(
CGF.CGM.getDataLayout().getABITypeAlign(FieldTy));
Address SrcGEP = RawAddress(
CGF.Builder.CreateInBoundsGEP(LayoutTy, SrcPtr.getBasePointer(),
CurLoadIndices, "cbuf.src"),
FieldTy, Align, SrcPtr.isKnownNonNull());
Address DestGEP = CGF.Builder.CreateInBoundsGEP(
DestPtr, CurStoreIndices, FieldTy, Align, "cbuf.dest");
llvm::Value *Load = CGF.Builder.CreateLoad(SrcGEP, "cbuf.load");
CGF.Builder.CreateStore(Load, DestGEP);
}

bool processArray(llvm::Type *FieldTy) {
auto *AT = dyn_cast<llvm::ArrayType>(FieldTy);
if (!AT)
return false;

// If we have an array then there isn't any padding
// between elements. We just need to copy each element over.
for (unsigned I = 0, E = AT->getNumElements(); I < E; ++I)
emitCopyAtIndices(AT->getElementType(), I, I);
return true;
}

bool processBufferLayoutArray(llvm::Type *FieldTy) {
auto *ST = dyn_cast<llvm::StructType>(FieldTy);
if (!ST || ST->getNumElements() != 2)
return false;

auto *PaddedEltsTy = dyn_cast<llvm::ArrayType>(ST->getElementType(0));
if (!PaddedEltsTy)
return false;

auto *PaddedTy = dyn_cast<llvm::StructType>(PaddedEltsTy->getElementType());
if (!PaddedTy || PaddedTy->getNumElements() != 2)
return false;

if (!CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(
PaddedTy->getElementType(1)))
return false;

llvm::Type *ElementTy = ST->getElementType(1);
if (PaddedTy->getElementType(0) != ElementTy)
return false;

// All but the last of the logical array elements are in the padded array.
unsigned NumElts = PaddedEltsTy->getNumElements() + 1;

// Add an extra indirection to the load for the struct and walk the
// array prefix.
CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, 0));
for (unsigned I = 0; I < NumElts - 1; ++I)
emitCopyAtIndices(ElementTy, I, I);
CurLoadIndices.pop_back();

// Now copy the last element.
emitCopyAtIndices(ElementTy, NumElts - 1, 1);

return true;
}

bool processStruct(llvm::Type *FieldTy) {
auto *ST = dyn_cast<llvm::StructType>(FieldTy);
if (!ST)
return false;

unsigned Skipped = 0;
for (unsigned I = 0, E = ST->getNumElements(); I < E; ++I) {
llvm::Type *ElementTy = ST->getElementType(I);
if (CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(ElementTy))
++Skipped;
else
emitCopyAtIndices(ElementTy, I, I + Skipped);
}
return true;
}

public:
HLSLBufferCopyEmitter(CodeGenFunction &CGF, Address DestPtr, Address SrcPtr)
: CGF(CGF), DestPtr(DestPtr), SrcPtr(SrcPtr) {}

bool emitCopy(QualType CType) {
LayoutTy = HLSLBufferLayoutBuilder(CGF.CGM).layOutType(CType);

// If we don't have an aggregate, we can just fall back to normal memcpy.
if (!LayoutTy->isAggregateType())
return false;

emitCopyAtIndices(LayoutTy, 0, 0);
return true;
}
};
} // namespace

void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty,
AggValueSlot::Overlap_t MayOverlap,
bool isVolatile) {
Expand Down Expand Up @@ -2315,6 +2438,10 @@ void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty,
}
}

if (getLangOpts().HLSL && Ty.getAddressSpace() == LangAS::hlsl_constant)
if (HLSLBufferCopyEmitter(*this, DestPtr, SrcPtr).emitCopy(Ty))
return;

// Aggregate assignment turns into llvm.memcpy. This is almost valid per
// C99 6.5.16.1p3, which states "If the value being stored in an object is
// read from another object that overlaps in anyway the storage of the first
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/CGExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ llvm::Constant *ConstantAggregateBuilder::buildFrom(

// All remaining elements must be the same type.
if (Elems[I]->getType() != CommonType ||
Offset(I) % ElemSize != 0) {
!Offset(I).isMultipleOf(ElemSize)) {
CanEmitArray = false;
break;
}
Expand Down
24 changes: 12 additions & 12 deletions clang/lib/CodeGen/CGHLSLRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,9 +266,9 @@ CGHLSLRuntime::convertHLSLSpecificType(const Type *T,
assert(T->isHLSLSpecificType() && "Not an HLSL specific type!");

// Check if the target has a specific translation for this type first.
if (llvm::Type *TargetTy =
if (llvm::Type *LayoutTy =
CGM.getTargetCodeGenInfo().getHLSLType(CGM, T, Packoffsets))
return TargetTy;
return LayoutTy;

llvm_unreachable("Generic handling of HLSL types is not supported.");
}
Expand All @@ -285,10 +285,8 @@ void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl,

// get the layout struct from constant buffer target type
llvm::Type *BufType = BufGV->getValueType();
llvm::Type *BufLayoutType =
cast<llvm::TargetExtType>(BufType)->getTypeParameter(0);
llvm::StructType *LayoutStruct = cast<llvm::StructType>(
cast<llvm::TargetExtType>(BufLayoutType)->getTypeParameter(0));
cast<llvm::TargetExtType>(BufType)->getTypeParameter(0));

// Start metadata list associating the buffer global variable with its
// constatns
Expand Down Expand Up @@ -327,6 +325,9 @@ void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl,
continue;
}

if (CGM.getTargetCodeGenInfo().isHLSLPadding(*ElemIt))
++ElemIt;

assert(ElemIt != LayoutStruct->element_end() &&
"number of elements in layout struct does not match");
llvm::Type *LayoutType = *ElemIt++;
Expand Down Expand Up @@ -424,12 +425,11 @@ void CGHLSLRuntime::addBuffer(const HLSLBufferDecl *BufDecl) {
if (BufDecl->hasValidPackoffset())
fillPackoffsetLayout(BufDecl, Layout);

llvm::TargetExtType *TargetTy =
cast<llvm::TargetExtType>(convertHLSLSpecificType(
ResHandleTy, BufDecl->hasValidPackoffset() ? &Layout : nullptr));
llvm::Type *LayoutTy = convertHLSLSpecificType(
ResHandleTy, BufDecl->hasValidPackoffset() ? &Layout : nullptr);
llvm::GlobalVariable *BufGV = new GlobalVariable(
TargetTy, /*isConstant*/ false,
GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(TargetTy),
LayoutTy, /*isConstant*/ false,
GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(LayoutTy),
llvm::formatv("{0}{1}", BufDecl->getName(),
BufDecl->isCBuffer() ? ".cb" : ".tb"),
GlobalValue::NotThreadLocal);
Expand Down Expand Up @@ -462,7 +462,7 @@ void CGHLSLRuntime::addRootSignature(
SignatureDecl->getRootElements(), nullptr, M);
}

llvm::TargetExtType *
llvm::StructType *
CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) {
const auto Entry = LayoutTypes.find(StructType);
if (Entry != LayoutTypes.end())
Expand All @@ -471,7 +471,7 @@ CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) {
}

void CGHLSLRuntime::addHLSLBufferLayoutType(const RecordType *StructType,
llvm::TargetExtType *LayoutTy) {
llvm::StructType *LayoutTy) {
assert(getHLSLBufferLayoutType(StructType) == nullptr &&
"layout type for this struct already exist");
LayoutTypes[StructType] = LayoutTy;
Expand Down
7 changes: 3 additions & 4 deletions clang/lib/CodeGen/CGHLSLRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,10 +186,9 @@ class CGHLSLRuntime {

llvm::Instruction *getConvergenceToken(llvm::BasicBlock &BB);

llvm::TargetExtType *
getHLSLBufferLayoutType(const RecordType *LayoutStructTy);
llvm::StructType *getHLSLBufferLayoutType(const RecordType *LayoutStructTy);
void addHLSLBufferLayoutType(const RecordType *LayoutStructTy,
llvm::TargetExtType *LayoutTy);
llvm::StructType *LayoutTy);
void emitInitListOpaqueValues(CodeGenFunction &CGF, InitListExpr *E);

std::optional<LValue>
Expand All @@ -207,7 +206,7 @@ class CGHLSLRuntime {
HLSLResourceBindingAttr *RBA);
llvm::Triple::ArchType getArch();

llvm::DenseMap<const clang::RecordType *, llvm::TargetExtType *> LayoutTypes;
llvm::DenseMap<const clang::RecordType *, llvm::StructType *> LayoutTypes;
};

} // namespace CodeGen
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/CGObjCMac.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5367,7 +5367,7 @@ IvarLayoutBuilder::buildBitmap(CGObjCCommonMac &CGObjC,

// Ignore scan requests that don't start at an even multiple of the
// word size. We can't encode them.
if ((beginOfScan % WordSize) != 0)
if (!beginOfScan.isMultipleOf(WordSize))
continue;

// Ignore scan requests that start before the instance start.
Expand Down
Loading
Loading