Skip to content

Commit 88293bb

Browse files
zoecarverhuixie90
authored andcommitted
[Builtin] Add __builtin_clear_padding
Adds `__builtin_clear_padding` to zero all padding bits of a struct. This builtin should match the behavior of those in NVCC and GCC (and MSVC?). There are some tests in this patch but hopefully we'll also get tests from other compilers (so all builtins can be as similar as possible). I'm planning to add support for unions, bitfields (both as members and members of sub-objects), and booleans as follow up patches. Differential Revision: https://reviews.llvm.org/D87974 overlapping subobjects + opague pointer union, rename, scalar types
1 parent 1e48418 commit 88293bb

File tree

6 files changed

+1178
-0
lines changed

6 files changed

+1178
-0
lines changed

clang/include/clang/Basic/Builtins.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -968,6 +968,11 @@ def IsWithinLifetime : LangBuiltin<"CXX_LANG"> {
968968
let Spellings = ["__builtin_is_within_lifetime"];
969969
let Attributes = [NoThrow, CustomTypeChecking, Consteval];
970970
let Prototype = "bool(void*)";
971+
972+
def ClearPadding : LangBuiltin<"CXX_LANG"> {
973+
let Spellings = ["__builtin_clear_padding"];
974+
let Attributes = [NoThrow];
975+
let Prototype = "void(void*)";
971976
}
972977

973978
// GCC exception builtins

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@
3636
#include "llvm/IR/MatrixBuilder.h"
3737
#include "llvm/Support/ConvertUTF.h"
3838
#include "llvm/Support/ScopedPrinter.h"
39+
#include "llvm/TargetParser/AArch64TargetParser.h"
40+
#include "llvm/TargetParser/X86TargetParser.h"
41+
#include <algorithm>
3942
#include <optional>
4043
#include <utility>
4144

@@ -2554,6 +2557,205 @@ static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
25542557
return RValue::get(CGF->Builder.CreateCall(UBF, Args));
25552558
}
25562559

2560+
template <class T>
2561+
void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
2562+
size_t CurrentStartOffset,
2563+
size_t &RunningOffset, T &&WriteZeroAtOffset,
2564+
bool VisitVirtualBase);
2565+
2566+
template <class T>
2567+
void ClearPaddingStruct(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
2568+
StructType *ST, size_t CurrentStartOffset,
2569+
size_t &RunningOffset, T &&WriteZeroAtOffset,
2570+
bool VisitVirtualBase) {
2571+
llvm::dbgs() << "clear padding struct: " << ST->getName().data() << '\n';
2572+
const auto &DL = CGF.CGM.getModule().getDataLayout();
2573+
auto *SL = DL.getStructLayout(ST);
2574+
auto *R = dyn_cast<CXXRecordDecl>(Ty->getAsRecordDecl());
2575+
if (!R) {
2576+
llvm::dbgs() << "Not a CXXRecordDecl\n";
2577+
return;
2578+
}
2579+
const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
2580+
if (ASTLayout.hasOwnVFPtr()) {
2581+
llvm::dbgs() << "vtable ptr. Incrementing RunningOffset from "
2582+
<< RunningOffset << " to "
2583+
<< RunningOffset + DL.getPointerSizeInBits() / 8 << '\n';
2584+
RunningOffset += DL.getPointerSizeInBits() / 8;
2585+
}
2586+
std::vector<std::pair<size_t, CXXBaseSpecifier>> Bases;
2587+
Bases.reserve(R->getNumBases());
2588+
// todo get vbases
2589+
for (auto Base : R->bases()) {
2590+
auto *BaseRecord = cast<CXXRecordDecl>(Base.getType()->getAsRecordDecl());
2591+
if (!Base.isVirtual()) {
2592+
auto Offset = static_cast<size_t>(
2593+
ASTLayout.getBaseClassOffset(BaseRecord).getQuantity());
2594+
Bases.emplace_back(Offset, Base);
2595+
}
2596+
}
2597+
2598+
auto VisitBases =
2599+
[&](std::vector<std::pair<size_t, CXXBaseSpecifier>> &BasesToVisit) {
2600+
std::sort(
2601+
BasesToVisit.begin(), BasesToVisit.end(),
2602+
[](const auto &P1, const auto &P2) { return P1.first < P2.first; });
2603+
for (const auto &Pair : BasesToVisit) {
2604+
// is it OK to use structured binding in clang? what is the language
2605+
// version?
2606+
auto Offset = Pair.first;
2607+
auto Base = Pair.second;
2608+
2609+
llvm::dbgs() << "visiting base at offset " << Offset << '\n';
2610+
// Recursively zero out base classes.
2611+
auto Index = SL->getElementContainingOffset(Offset);
2612+
Value *Idx = CGF.Builder.getSize(Index);
2613+
llvm::Type *CurrentBaseType = CGF.ConvertTypeForMem(Base.getType());
2614+
Value *BaseElement = CGF.Builder.CreateGEP(CurrentBaseType, Ptr, Idx);
2615+
RecursivelyClearPaddingImpl(CGF, BaseElement, Base.getType(),
2616+
CurrentStartOffset + Offset,
2617+
RunningOffset, WriteZeroAtOffset, false);
2618+
}
2619+
};
2620+
2621+
VisitBases(Bases);
2622+
2623+
size_t NumFields = std::distance(R->field_begin(), R->field_end());
2624+
std::vector<size_t> FieldOffsets;
2625+
FieldOffsets.reserve(NumFields);
2626+
auto CurrentField = R->field_begin();
2627+
for (size_t I = 0; I < NumFields; ++I, ++CurrentField) {
2628+
// Size needs to be in bytes so we can compare it later.
2629+
auto Offset = ASTLayout.getFieldOffset(I) / 8;
2630+
llvm::dbgs() << "visiting field at offset " << Offset << '\n';
2631+
auto Index = SL->getElementContainingOffset(Offset);
2632+
Value *Idx = CGF.Builder.getSize(Index);
2633+
llvm::Type *CurrentFieldType =
2634+
CGF.ConvertTypeForMem(CurrentField->getType());
2635+
Value *Element = CGF.Builder.CreateGEP(CurrentFieldType, Ptr, Idx);
2636+
RecursivelyClearPaddingImpl(CGF, Element, CurrentField->getType(),
2637+
CurrentStartOffset + Offset, RunningOffset,
2638+
WriteZeroAtOffset, true);
2639+
}
2640+
2641+
if (VisitVirtualBase) {
2642+
2643+
std::vector<std::pair<size_t, CXXBaseSpecifier>> VBases;
2644+
VBases.reserve(R->getNumVBases());
2645+
for (auto VBase : R->vbases()) {
2646+
auto *BaseRecord =
2647+
cast<CXXRecordDecl>(VBase.getType()->getAsRecordDecl());
2648+
auto Offset = static_cast<size_t>(
2649+
ASTLayout.getVBaseClassOffset(BaseRecord).getQuantity());
2650+
VBases.emplace_back(Offset, VBase);
2651+
}
2652+
2653+
VisitBases(VBases);
2654+
}
2655+
}
2656+
2657+
template <class T>
2658+
void ClearPaddingConstantArray(CodeGenFunction &CGF, Value *Ptr,
2659+
llvm::Type *Type, ConstantArrayType const *AT,
2660+
size_t CurrentStartOffset, size_t &RunningOffset,
2661+
T &&WriteZeroAtOffset) {
2662+
llvm::dbgs() << "clear padding constant array\n";
2663+
for (size_t ArrIndex = 0; ArrIndex < AT->getSize().getLimitedValue();
2664+
++ArrIndex) {
2665+
2666+
QualType ElementQualType = AT->getElementType();
2667+
2668+
auto *ElementRecord = ElementQualType->getAsRecordDecl();
2669+
if (!ElementRecord) {
2670+
llvm::dbgs() << "null!\n";
2671+
}
2672+
auto ElementAlign =
2673+
ElementRecord
2674+
? CGF.getContext().getASTRecordLayout(ElementRecord).getAlignment()
2675+
: CGF.getContext().getTypeAlignInChars(ElementQualType);
2676+
2677+
Address FieldElementAddr{Ptr, Type, ElementAlign};
2678+
2679+
auto Element = CGF.Builder.CreateConstArrayGEP(FieldElementAddr, ArrIndex);
2680+
auto *ElementType = CGF.ConvertTypeForMem(ElementQualType);
2681+
auto AllocSize =
2682+
CGF.CGM.getModule().getDataLayout().getTypeAllocSize(ElementType);
2683+
llvm::dbgs() << "clearing array index! " << ArrIndex << '\n';
2684+
RecursivelyClearPaddingImpl(CGF, Element.getBasePointer(), ElementQualType,
2685+
CurrentStartOffset +
2686+
ArrIndex * AllocSize.getKnownMinValue(),
2687+
RunningOffset, WriteZeroAtOffset, true);
2688+
}
2689+
}
2690+
2691+
template <class T>
2692+
void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
2693+
size_t CurrentStartOffset,
2694+
size_t &RunningOffset, T &&WriteZeroAtOffset,
2695+
bool VisitVirtualBase) {
2696+
2697+
llvm::dbgs() << "clear padding before current [" << RunningOffset << ", "
2698+
<< CurrentStartOffset << ")\n";
2699+
for (; RunningOffset < CurrentStartOffset; ++RunningOffset) {
2700+
WriteZeroAtOffset(RunningOffset);
2701+
}
2702+
auto *Type = CGF.ConvertTypeForMem(Ty);
2703+
auto Size = CGF.CGM.getModule()
2704+
.getDataLayout()
2705+
.getTypeSizeInBits(Type)
2706+
.getKnownMinValue() /
2707+
8;
2708+
2709+
if (auto *AT = dyn_cast<ConstantArrayType>(Ty)) {
2710+
ClearPaddingConstantArray(CGF, Ptr, Type, AT, CurrentStartOffset,
2711+
RunningOffset, WriteZeroAtOffset);
2712+
} else if (auto *ST = dyn_cast<StructType>(Type); ST && Ty->isRecordType()) {
2713+
ClearPaddingStruct(CGF, Ptr, Ty, ST, CurrentStartOffset, RunningOffset,
2714+
WriteZeroAtOffset, VisitVirtualBase);
2715+
} else if (Ty->isAtomicType()) {
2716+
RecursivelyClearPaddingImpl(CGF, Ptr, Ty.getAtomicUnqualifiedType(),
2717+
CurrentStartOffset, RunningOffset,
2718+
WriteZeroAtOffset, true);
2719+
} else {
2720+
llvm::dbgs() << "increment running offset from: " << RunningOffset << " to "
2721+
<< RunningOffset + Size << '\n';
2722+
RunningOffset =
2723+
std::max(RunningOffset, CurrentStartOffset + static_cast<size_t>(Size));
2724+
}
2725+
}
2726+
2727+
static void RecursivelyClearPadding(CodeGenFunction &CGF, Value *Ptr,
2728+
QualType Ty) {
2729+
auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
2730+
auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
2731+
auto WriteZeroAtOffset = [&](uint64_t Offset) {
2732+
auto *Index = ConstantInt::get(CGF.IntTy, Offset);
2733+
auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
2734+
CGF.Builder.CreateAlignedStore(
2735+
Zero, Element,
2736+
CharUnits::One().alignmentAtOffset(CharUnits::fromQuantity(Offset)));
2737+
};
2738+
2739+
size_t RunningOffset = 0;
2740+
2741+
RecursivelyClearPaddingImpl(CGF, Ptr, Ty, 0, RunningOffset, WriteZeroAtOffset,
2742+
true);
2743+
2744+
// Clear tail padding
2745+
auto *Type = CGF.ConvertTypeForMem(Ty);
2746+
2747+
auto Size = CGF.CGM.getModule()
2748+
.getDataLayout()
2749+
.getTypeAllocSize(Type)
2750+
.getKnownMinValue();
2751+
2752+
llvm::dbgs() << "clear tail padding [" << RunningOffset << ", " << Size
2753+
<< ")\n";
2754+
for (; RunningOffset < Size; ++RunningOffset) {
2755+
WriteZeroAtOffset(RunningOffset);
2756+
}
2757+
}
2758+
25572759
RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
25582760
const CallExpr *E,
25592761
ReturnValueSlot ReturnValue) {
@@ -4766,6 +4968,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
47664968

47674969
return RValue::get(Ptr);
47684970
}
4971+
case Builtin::BI__builtin_clear_padding: {
4972+
const Expr *Op = E->getArg(0);
4973+
Value *Address = EmitScalarExpr(Op);
4974+
auto PointeeTy = Op->getType()->getPointeeType();
4975+
RecursivelyClearPadding(*this, Address, PointeeTy);
4976+
return RValue::get(nullptr);
4977+
}
47694978
case Builtin::BI__sync_fetch_and_add:
47704979
case Builtin::BI__sync_fetch_and_sub:
47714980
case Builtin::BI__sync_fetch_and_or:

clang/lib/Sema/SemaChecking.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2435,7 +2435,37 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
24352435
return BuiltinIsWithinLifetime(*this, TheCall);
24362436
case Builtin::BI__builtin_trivially_relocate:
24372437
return BuiltinTriviallyRelocate(*this, TheCall);
2438+
case Builtin::BI__builtin_clear_padding: {
2439+
const auto numArgs = TheCall->getNumArgs();
2440+
if (numArgs < 1) {
2441+
Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_few_args_one)
2442+
<< 0 /*function call*/ << "T*" << 0;
2443+
return ExprError();
2444+
}
2445+
if (numArgs > 1) {
2446+
Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_many_args_one)
2447+
<< 0 /*function call*/ << "T*" << numArgs << 0;
2448+
return ExprError();
2449+
}
24382450

2451+
const Expr *PtrArg = TheCall->getArg(0);
2452+
const QualType PtrArgType = PtrArg->getType();
2453+
if (!PtrArgType->isPointerType()) {
2454+
Diag(PtrArg->getBeginLoc(), diag::err_typecheck_convert_incompatible)
2455+
<< PtrArgType << "pointer" << 1 << 0 << 3 << 1 << PtrArgType
2456+
<< "pointer";
2457+
return ExprError();
2458+
}
2459+
if (PtrArgType->getPointeeType().isConstQualified()) {
2460+
Diag(PtrArg->getBeginLoc(), diag::err_typecheck_assign_const)
2461+
<< TheCall->getSourceRange() << 5 /*ConstUnknown*/;
2462+
return ExprError();
2463+
}
2464+
if (RequireCompleteType(PtrArg->getBeginLoc(), PtrArgType->getPointeeType(),
2465+
diag::err_typecheck_decl_incomplete_type))
2466+
return ExprError();
2467+
break;
2468+
}
24392469
case Builtin::BI__sync_fetch_and_add:
24402470
case Builtin::BI__sync_fetch_and_add_1:
24412471
case Builtin::BI__sync_fetch_and_add_2:
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
2+
3+
struct alignas(4) Foo {
4+
char a;
5+
alignas(2) char b;
6+
};
7+
8+
struct alignas(4) Bar {
9+
char c;
10+
alignas(2) char d;
11+
};
12+
13+
struct alignas(4) Baz : Foo {
14+
char e;
15+
Bar f;
16+
};
17+
18+
// Baz structure:
19+
// "a", PAD_1, "b", PAD_2, "c", PAD_3, PAD_4, PAD_5, "c", PAD_6, "d", PAD_7
20+
// %struct.Baz = type { %struct.Foo, i8, [3 x i8], %struct.Bar }
21+
// %struct.Foo = type { i8, i8, i8, i8 }
22+
// %struct.Bar = type { i8, i8, i8, i8 }
23+
24+
// CHECK-LABEL: define void @_Z7testBazP3Baz(%struct.Baz* %baz)
25+
// CHECK: [[ADDR:%.*]] = alloca %struct.Baz*
26+
// CHECK: store %struct.Baz* %baz, %struct.Baz** [[ADDR]]
27+
// CHECK: [[BAZ:%.*]] = load %struct.Baz*, %struct.Baz** [[ADDR]]
28+
// CHECK: [[BAZ_RAW_PTR:%.*]] = bitcast %struct.Baz* [[BAZ]] to i8*
29+
30+
// CHECK: [[FOO_BASE:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 0
31+
// CHECK: [[FOO_RAW_PTR:%.*]] = bitcast %struct.Foo* [[FOO_BASE]] to i8*
32+
// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 1
33+
// CHECK: store i8 0, i8* [[PAD_1]]
34+
// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 3
35+
// CHECK: store i8 0, i8* [[PAD_2]]
36+
37+
// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 5
38+
// CHECK: store i8 0, i8* [[PAD_3]]
39+
// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 6
40+
// CHECK: store i8 0, i8* [[PAD_4]]
41+
// CHECK: [[PAD_5:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 7
42+
// CHECK: store i8 0, i8* [[PAD_5]]
43+
44+
// CHECK: [[BAR_MEMBER:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 3
45+
// CHECK: [[BAR_RAW_PTR:%.*]] = bitcast %struct.Bar* [[BAR_MEMBER]] to i8*
46+
// CHECK: [[PAD_6:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 1
47+
// CHECK: store i8 0, i8* [[PAD_6]]
48+
// CHECK: [[PAD_7:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 3
49+
// CHECK: store i8 0, i8* [[PAD_7]]
50+
// CHECK: ret void
51+
void testBaz(Baz *baz) {
52+
__builtin_clear_padding(baz);
53+
}
54+
55+
struct UnsizedTail {
56+
int size;
57+
alignas(8) char buf[];
58+
59+
UnsizedTail(int size) : size(size) {}
60+
};
61+
62+
// UnsizedTail structure:
63+
// "size", PAD_1, PAD_2, PAD_3, PAD_4
64+
// %struct.UnsizedTail = type { i32, [4 x i8], [0 x i8] }
65+
66+
// CHECK-LABEL: define void @_Z15testUnsizedTailP11UnsizedTail(%struct.UnsizedTail* %u)
67+
// CHECK: [[U_ADDR:%.*]] = alloca %struct.UnsizedTail*
68+
// CHECK: store %struct.UnsizedTail* %u, %struct.UnsizedTail** [[U_ADDR]]
69+
// CHECK: [[U:%.*]] = load %struct.UnsizedTail*, %struct.UnsizedTail** [[U_ADDR]]
70+
// CHECK: [[U_RAW_PTR:%.*]] = bitcast %struct.UnsizedTail* [[U]] to i8*
71+
// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 4
72+
// CHECK: store i8 0, i8* [[PAD_1]]
73+
// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 5
74+
// CHECK: store i8 0, i8* [[PAD_2]]
75+
// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 6
76+
// CHECK: store i8 0, i8* [[PAD_3]]
77+
// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 7
78+
// CHECK: store i8 0, i8* [[PAD_4]]
79+
// CHECK: ret void
80+
void testUnsizedTail(UnsizedTail *u) {
81+
__builtin_clear_padding(u);
82+
}
83+
84+
struct ArrOfStructsWithPadding {
85+
Bar bars[2];
86+
};
87+
88+
// ArrOfStructsWithPadding structure:
89+
// "c" (1), PAD_1, "d" (1), PAD_2, "c" (2), PAD_3, "d" (2), PAD_4
90+
// %struct.ArrOfStructsWithPadding = type { [2 x %struct.Bar] }
91+
92+
// CHECK-LABEL: define void @_Z27testArrOfStructsWithPaddingP23ArrOfStructsWithPadding(%struct.ArrOfStructsWithPadding* %arr)
93+
// CHECK: [[ARR_ADDR:%.*]] = alloca %struct.ArrOfStructsWithPadding*
94+
// CHECK: store %struct.ArrOfStructsWithPadding* %arr, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
95+
// CHECK: [[ARR:%.*]] = load %struct.ArrOfStructsWithPadding*, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
96+
// CHECK: [[BARS:%.*]] = getelementptr inbounds %struct.ArrOfStructsWithPadding, %struct.ArrOfStructsWithPadding* [[ARR]], i32 0, i32 0
97+
// CHECK: [[FIRST:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 0
98+
// CHECK: [[FIRST_RAW_PTR:%.*]] = bitcast %struct.Bar* [[FIRST]] to i8*
99+
// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FIRST_RAW_PTR]], i32 1
100+
// CHECK: store i8 0, i8* [[PAD_1]]
101+
// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* %4, i32 3
102+
// CHECK: store i8 0, i8* [[PAD_2]]
103+
// CHECK: [[SECOND:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 1
104+
// CHECK: [[SECOND_RAW_PTR:%.*]] = bitcast %struct.Bar* [[SECOND]] to i8*
105+
// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 1
106+
// CHECK: store i8 0, i8* [[PAD_3]]
107+
// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 3
108+
// CHECK: store i8 0, i8* [[PAD_4]]
109+
// CHECK: ret void
110+
void testArrOfStructsWithPadding(ArrOfStructsWithPadding *arr) {
111+
__builtin_clear_padding(arr);
112+
}

0 commit comments

Comments
 (0)