Skip to content

Commit 222fa83

Browse files
committed
[Clang] Correct __builtin_dynamic_object_size for subobject types
The second argument of __builtin_dynamic_object_size controls whether it returns the size of the whole object or the closest surrounding object. For this struct: struct s { int foo; char bar[2][40]; int baz; int qux; }; int main(int argc, char **argv) { struct s f; #define report(x) printf(#x ": %zu\n", x) argc = 1; report(__builtin_dynamic_object_size(f.bar[argc], 0)); report(__builtin_dynamic_object_size(f.bar[argc], 1)); return 0; } should return: __builtin_dynamic_object_size(f.bar[argc], 0): 48 __builtin_dynamic_object_size(f.bar[argc], 1): 40 determined by the least significant bit of the TYPE. The LLVM IR isn't sufficient to determine what could be considered a "sub-object". Instead determine the size / offset info in the front-end and pass that information along with the intrinsic. This expands the llvm.objectsize intrinsic to add these three new fields: - The fifth argument controls which object: - If false, return the size of the closest surrounding object. - If true, return the size of the whole object from the pointer. - If non-zero and the fifth argument is 'false', the size of the sub-object. - If non-zero and the fifth argument is 'false', the offset of the sub-object.
1 parent 91791c6 commit 222fa83

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+1052
-460
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 162 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "clang/AST/Decl.h"
2727
#include "clang/AST/OSLog.h"
2828
#include "clang/AST/OperationKinds.h"
29+
#include "clang/AST/StmtVisitor.h"
2930
#include "clang/Basic/TargetBuiltins.h"
3031
#include "clang/Basic/TargetInfo.h"
3132
#include "clang/Basic/TargetOptions.h"
@@ -1052,11 +1053,144 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
10521053
return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
10531054
}
10541055

1056+
namespace {
1057+
1058+
/// SubobjectFinder - A simple visitor to find the "sub-object" pointed to by a
1059+
/// __builtin_dynamic_object_size call. Information gathered from the
1060+
/// sub-object is used by the back-end to determine the correct size when the
1061+
/// 'TYPE' of the __bdos call has the least significant bit set (i.e. asking
1062+
/// for the sub-object size).
1063+
///
1064+
/// The expectation is that we'll eventually hit one of three expression types:
1065+
///
1066+
/// 1. DeclRefExpr - This is the expression for the base of the structure.
1067+
/// It's exactly what we want to build an access to the \p counted_by
1068+
/// field.
1069+
/// 2. MemberExpr - This is the field in the structure.
1070+
/// 3. CompoundLiteralExpr - This is for people who create something
1071+
/// heretical like (struct foo has a flexible array member):
1072+
///
1073+
/// (struct foo){ 1, 2 }.blah[idx];
1074+
///
1075+
/// All other expressions can be correctly handled with the current code.
1076+
struct SubobjectFinder
1077+
: public ConstStmtVisitor<SubobjectFinder, const Expr *> {
1078+
SubobjectFinder() = default;
1079+
1080+
//===--------------------------------------------------------------------===//
1081+
// Visitor Methods
1082+
//===--------------------------------------------------------------------===//
1083+
1084+
const Expr *VisitStmt(const Stmt *S) { return nullptr; }
1085+
1086+
const Expr *VisitDeclRefExpr(const DeclRefExpr *E) { return E; }
1087+
const Expr *VisitMemberExpr(const MemberExpr *E) { return E; }
1088+
const Expr *VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) {
1089+
return E;
1090+
}
1091+
1092+
const Expr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
1093+
return Visit(E->getBase());
1094+
}
1095+
const Expr *VisitCastExpr(const CastExpr *E) {
1096+
return Visit(E->getSubExpr());
1097+
}
1098+
const Expr *VisitParenExpr(const ParenExpr *E) {
1099+
return Visit(E->getSubExpr());
1100+
}
1101+
const Expr *VisitUnaryAddrOf(const clang::UnaryOperator *E) {
1102+
return Visit(E->getSubExpr());
1103+
}
1104+
const Expr *VisitUnaryDeref(const clang::UnaryOperator *E) {
1105+
return Visit(E->getSubExpr());
1106+
}
1107+
};
1108+
1109+
} // end anonymous namespace
1110+
1111+
/// getFieldInfo - Gather the size and offset of the field \p VD in \p RD.
1112+
static std::pair<uint64_t, uint64_t> getFieldInfo(CodeGenFunction &CGF,
1113+
const RecordDecl *RD,
1114+
const ValueDecl *VD,
1115+
uint64_t Offset = 0) {
1116+
if (!RD)
1117+
return std::make_pair(0, 0);
1118+
1119+
ASTContext &Ctx = CGF.getContext();
1120+
const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
1121+
unsigned FieldNo = 0;
1122+
1123+
for (const Decl *D : RD->decls()) {
1124+
if (const auto *Record = dyn_cast<RecordDecl>(D)) {
1125+
std::pair<uint64_t, uint64_t> Res =
1126+
getFieldInfo(CGF, Record->getDefinition(), VD,
1127+
Offset + Layout.getFieldOffset(FieldNo));
1128+
if (Res.first != 0)
1129+
return Res;
1130+
continue;
1131+
}
1132+
1133+
if (const auto *FD = dyn_cast<FieldDecl>(D); FD && FD == VD) {
1134+
Offset += Layout.getFieldOffset(FieldNo);
1135+
return std::make_pair(Ctx.getTypeSizeInChars(FD->getType()).getQuantity(),
1136+
Ctx.toCharUnitsFromBits(Offset).getQuantity());
1137+
}
1138+
1139+
if (isa<FieldDecl>(D))
1140+
++FieldNo;
1141+
}
1142+
1143+
return std::make_pair(0, 0);
1144+
}
1145+
1146+
/// getSubobjectInfo - Find the sub-object that \p E points to. If it lives
1147+
/// inside a struct, return the "size" and "offset" of that sub-object.
1148+
static std::pair<uint64_t, uint64_t> getSubobjectInfo(CodeGenFunction &CGF,
1149+
const Expr *E) {
1150+
const Expr *Subobject = SubobjectFinder().Visit(E);
1151+
if (!Subobject)
1152+
return std::make_pair(0, 0);
1153+
1154+
const RecordDecl *OuterRD = nullptr;
1155+
const ValueDecl *VD = nullptr;
1156+
1157+
if (const auto *DRE = dyn_cast<DeclRefExpr>(Subobject)) {
1158+
VD = DRE->getDecl();
1159+
QualType Ty = VD->getType();
1160+
if (Ty->isPointerType())
1161+
Ty = Ty->getPointeeType();
1162+
OuterRD = Ty->getAsRecordDecl();
1163+
} else if (const auto *ME = dyn_cast<MemberExpr>(Subobject)) {
1164+
VD = ME->getMemberDecl();
1165+
OuterRD = VD->getDeclContext()->getOuterLexicalRecordContext();
1166+
} else {
1167+
if (!isa<CompoundLiteralExpr>(Subobject))
1168+
llvm_unreachable("unexpected expression");
1169+
1170+
// We encounter a CompoundLiteralExpr if we have something like this:
1171+
//
1172+
// __builtin_dynamic_object_size(&(struct x){ 1, 2, 3 }, 1)
1173+
//
1174+
// In that case, we want the size of the whole struct. So we don't have to
1175+
// worry about finding a suboject.
1176+
return std::make_pair(0, 0);
1177+
}
1178+
1179+
if (!VD || !OuterRD)
1180+
// The expression is referencing an object that's not in a struct.
1181+
return std::make_pair(0, 0);
1182+
1183+
return getFieldInfo(CGF, OuterRD->getDefinition(), VD);
1184+
}
1185+
10551186
/// Returns a Value corresponding to the size of the given expression.
10561187
/// This Value may be either of the following:
1057-
/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
1058-
/// it)
1059-
/// - A call to the @llvm.objectsize intrinsic
1188+
///
1189+
/// - An Argument if E is a param with the pass_object_size attribute on
1190+
/// it,
1191+
/// - An Instruction representing the calculation of the value, when a
1192+
/// flexible array member is involved, or
1193+
/// - A call to the @llvm.objectsize intrinsic.
10601194
///
10611195
/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
10621196
/// and we wouldn't otherwise try to reference a pass_object_size parameter,
@@ -1084,18 +1218,31 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
10841218
}
10851219
}
10861220

1221+
// LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1222+
// evaluate E for side-effects. In either case, we shouldn't lower to
1223+
// @llvm.objectsize.
1224+
if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
1225+
return getDefaultBuiltinObjectSizeResult(Type, ResType);
1226+
1227+
std::pair<Value *, Value *> SubobjectInfo =
1228+
std::make_pair(Builder.getInt64(0), Builder.getInt64(0));
1229+
10871230
if (IsDynamic) {
10881231
// Emit special code for a flexible array member with the "counted_by"
10891232
// attribute.
10901233
if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
10911234
return V;
1092-
}
10931235

1094-
// LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1095-
// evaluate E for side-effects. In either case, we shouldn't lower to
1096-
// @llvm.objectsize.
1097-
if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
1098-
return getDefaultBuiltinObjectSizeResult(Type, ResType);
1236+
if ((Type & 1) != 0) {
1237+
// The object size is constrained to the sub-object containing the
1238+
// element. If it's in a structure, get the size and offset information
1239+
// for back-end processing.
1240+
std::pair<uint64_t, uint64_t> Info = getSubobjectInfo(*this, E);
1241+
if (Info.first != 0)
1242+
SubobjectInfo = std::make_pair(Builder.getInt64(Info.first),
1243+
Builder.getInt64(Info.second));
1244+
}
1245+
}
10991246

11001247
Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
11011248
assert(Ptr->getType()->isPointerTy() &&
@@ -1109,7 +1256,12 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
11091256
// For GCC compatibility, __builtin_object_size treat NULL as unknown size.
11101257
Value *NullIsUnknown = Builder.getTrue();
11111258
Value *Dynamic = Builder.getInt1(IsDynamic);
1112-
return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
1259+
// If the least significant bit is clear, objects are whole variables. If
1260+
// it's set, a closest surrounding subobject is considered the object a
1261+
// pointer points to.
1262+
Value *WholeObj = Builder.getInt1((Type & 1) == 0);
1263+
return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic, WholeObj,
1264+
SubobjectInfo.first, SubobjectInfo.second});
11131265
}
11141266

11151267
namespace {

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -742,13 +742,18 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
742742
// FIXME: If Address Sanitizer is enabled, insert dynamic instrumentation
743743
// to check this.
744744
// FIXME: Get object address space
745-
llvm::Type *Tys[2] = { IntPtrTy, Int8PtrTy };
746-
llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::objectsize, Tys);
745+
llvm::Function *F =
746+
CGM.getIntrinsic(llvm::Intrinsic::objectsize, {IntPtrTy, Int8PtrTy});
747747
llvm::Value *Min = Builder.getFalse();
748748
llvm::Value *NullIsUnknown = Builder.getFalse();
749749
llvm::Value *Dynamic = Builder.getFalse();
750+
llvm::Value *WholeObj = Builder.getTrue();
751+
llvm::Value *SubobjectSize = Builder.getInt64(0);
752+
llvm::Value *SubobjectOffset = Builder.getInt64(0);
750753
llvm::Value *LargeEnough = Builder.CreateICmpUGE(
751-
Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic}), Size);
754+
Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic, WholeObj,
755+
SubobjectSize, SubobjectOffset}),
756+
Size);
752757
Checks.push_back(std::make_pair(LargeEnough, SanitizerKind::ObjectSize));
753758
}
754759
}

clang/test/CodeGen/catch-undef-behavior.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
void foo(void) {
3636
union { int i; } u;
3737

38-
// CHECK-COMMON: %[[SIZE:.*]] = call i64 @llvm.objectsize.i64.p0(ptr %[[PTR:.*]], i1 false, i1 false, i1 false)
38+
// CHECK-COMMON: %[[SIZE:.*]] = call i64 @llvm.objectsize.i64.p0(ptr %[[PTR:.*]], i1 false, i1 false, i1 false, i1 true, i64 0, i64 0)
3939
// CHECK-COMMON-NEXT: %[[OK:.*]] = icmp uge i64 %[[SIZE]], 4
4040

4141
// CHECK-UBSAN: br i1 %[[OK]], {{.*}} !prof ![[WEIGHT_MD:.*]], !nosanitize

0 commit comments

Comments
 (0)