Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 146 additions & 10 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "clang/AST/Decl.h"
#include "clang/AST/OSLog.h"
#include "clang/AST/OperationKinds.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/TargetOptions.h"
Expand Down Expand Up @@ -1052,11 +1053,130 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
}

namespace {

/// SubobjectFinder - A simple visitor to find the "sub-object" pointed to by a
/// __builtin_dynamic_object_size call. Information gathered from the
/// sub-object is used by the back-end to determine the correct size when the
/// 'TYPE' of the __bdos call has the least significant bit set (i.e. asking
/// for the sub-object size).
///
/// The expectation is that we'll eventually hit one of three expression types:
///
/// 1. DeclRefExpr - This is the expression for the base of the structure.
/// 2. MemberExpr - This is the field in the structure.
/// 3. CompoundLiteralExpr - This is for people who create something
/// heretical like (struct foo has a flexible array member):
///
/// (struct foo){ 1, 2 }.blah[idx];
///
/// All other expressions can be correctly handled with the current code.
struct SubobjectFinder
: public ConstStmtVisitor<SubobjectFinder, const Expr *> {
SubobjectFinder() = default;

//===--------------------------------------------------------------------===//
// Visitor Methods
//===--------------------------------------------------------------------===//

const Expr *VisitStmt(const Stmt *S) { return nullptr; }

const Expr *VisitDeclRefExpr(const DeclRefExpr *E) { return E; }
const Expr *VisitMemberExpr(const MemberExpr *E) { return E; }
const Expr *VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) {
return E;
}

const Expr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
return Visit(E->getBase());
}
const Expr *VisitCastExpr(const CastExpr *E) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does GCC look through explicit casts? I wonder if this should be restricted to ImplicitCastExprs.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems to. See https://godbolt.org/z/4xaY4191o for an example (the &((char *)&var.z.a)[argc] example looks through them.

return Visit(E->getSubExpr());
}
Comment on lines +1093 to +1095
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A derived-to-base cast is a traversal to a subobject in C++, so we should presumably terminate the traversal when we reach one and use the offset and size of the base class as the subobject.

That'd be a pretty big delta from what you have here, but it'd be a good idea to add a FIXME here.

const Expr *VisitParenExpr(const ParenExpr *E) {
return Visit(E->getSubExpr());
}
const Expr *VisitUnaryAddrOf(const clang::UnaryOperator *E) {
return Visit(E->getSubExpr());
}
const Expr *VisitUnaryDeref(const clang::UnaryOperator *E) {
return Visit(E->getSubExpr());
}
Comment on lines +1102 to +1104
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you'll need to be more careful when walking through address-of / dereferences -- the set of things you should step over when traversing a pointer to an object is different from the set of things you should step over when traversing an object lvalue. For example, the bounds to use for *p->member will be computed as the bounds of member, which isn't correct. I think you could address this by either having separate traversals for pointers versus lvalues, or by avoiding (for example) stepping through lvalue-to-rvalue conversions when stepping over CastExprs -- and in fact, the latter seems like a good idea in general, given that a CastExpr could do pretty much anything to the pointer / lvalue. In general, I think it's only really safe to step over casts that are a no-op for address purposes. Bitcasts seem OK, address space conversions seem OK, etc. but a lot of cast kinds are not going to be reasonable to step over.

};

} // end anonymous namespace

/// getFieldInfo - Gather the size of the field \p VD in \p RD.
static uint64_t getFieldInfo(CodeGenFunction &CGF, const RecordDecl *RD,
const ValueDecl *VD) {
if (!RD)
return 0;

ASTContext &Ctx = CGF.getContext();

for (const Decl *D : RD->decls()) {
if (const auto *Record = dyn_cast<RecordDecl>(D)) {
uint64_t Res = getFieldInfo(CGF, Record->getDefinition(), VD);
if (Res != 0)
return Res;
continue;
}

if (const auto *FD = dyn_cast<FieldDecl>(D); FD == VD)
return Ctx.getTypeSizeInChars(FD->getType()).getQuantity();
}

return 0;
}

/// getSubobjectInfo - Find the sub-object that \p E points to. If it lives
/// inside a struct, return the "size" and "offset" of that sub-object.
static uint64_t getSubobjectInfo(CodeGenFunction &CGF, const Expr *E) {
const Expr *Subobject = SubobjectFinder().Visit(E);
if (!Subobject)
return 0;

const RecordDecl *OuterRD = nullptr;
const ValueDecl *VD = nullptr;

if (const auto *DRE = dyn_cast<DeclRefExpr>(Subobject)) {
// We're pointing to the beginning of the struct.
VD = DRE->getDecl();
QualType Ty = VD->getType();
if (Ty->isPointerType())
Ty = Ty->getPointeeType();
OuterRD = Ty->getAsRecordDecl();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I'm reading this correctly, I think this case is redundant: getFieldInfo only succeeds when VD is a field, but we're not going to have an evaluated DeclRefExpr that names a field. Can we return 0, 0 in this case, like we do for compound literals? I think the only case when we have non-zero values to return is when we've found a FieldDecl.

} else if (const auto *ME = dyn_cast<MemberExpr>(Subobject)) {
VD = ME->getMemberDecl();
OuterRD = VD->getDeclContext()->getOuterLexicalRecordContext();
} else {
if (!isa<CompoundLiteralExpr>(Subobject))
llvm_unreachable("unexpected expression");

// We encounter a CompoundLiteralExpr if we have something like this:
//
// __builtin_dynamic_object_size(&(struct x){ 1, 2, 3 }, 1)
//
// In that case, we want the size of the whole struct. So we don't have to
// worry about finding a suboject.
return 0;
}

if (!VD || !OuterRD)
// The expression is referencing an object that's not in a struct.
return 0;

return getFieldInfo(CGF, OuterRD->getDefinition(), VD);
}

/// Returns a Value corresponding to the size of the given expression.
/// This Value may be either of the following:
/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
/// it)
/// - A call to the @llvm.objectsize intrinsic
///
/// - An Argument if E is a param with the pass_object_size attribute on
/// it,
/// - An Instruction representing the calculation of the value, when a
/// flexible array member is involved, or
/// - A call to the @llvm.objectsize intrinsic.
///
/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
/// and we wouldn't otherwise try to reference a pass_object_size parameter,
Expand Down Expand Up @@ -1084,18 +1204,29 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
}
}

// LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
// evaluate E for side-effects. In either case, we shouldn't lower to
// @llvm.objectsize.
if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
return getDefaultBuiltinObjectSizeResult(Type, ResType);

Value *SubobjectSize = Builder.getInt64(0);

if (IsDynamic) {
// Emit special code for a flexible array member with the "counted_by"
// attribute.
if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
return V;
}

// LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
// evaluate E for side-effects. In either case, we shouldn't lower to
// @llvm.objectsize.
if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
return getDefaultBuiltinObjectSizeResult(Type, ResType);
if ((Type & 1) != 0) {
// The object size is constrained to the sub-object containing the
// element. If it's in a structure, get the size and offset information
// for back-end processing.
uint64_t Info = getSubobjectInfo(*this, E);
if (Info != 0)
SubobjectSize = Builder.getInt64(Info);
}
}

Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
assert(Ptr->getType()->isPointerTy() &&
Expand All @@ -1109,7 +1240,12 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
// For GCC compatibility, __builtin_object_size treat NULL as unknown size.
Value *NullIsUnknown = Builder.getTrue();
Value *Dynamic = Builder.getInt1(IsDynamic);
return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
// If the least significant bit is clear, objects are whole variables. If
// it's set, a closest surrounding subobject is considered the object a
// pointer points to.
Value *WholeObj = Builder.getInt1((Type & 1) == 0);
return Builder.CreateCall(
F, {Ptr, Min, NullIsUnknown, Dynamic, WholeObj, SubobjectSize});
}

namespace {
Expand Down
10 changes: 7 additions & 3 deletions clang/lib/CodeGen/CGExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -742,13 +742,17 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
// FIXME: If Address Sanitizer is enabled, insert dynamic instrumentation
// to check this.
// FIXME: Get object address space
llvm::Type *Tys[2] = { IntPtrTy, Int8PtrTy };
llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::objectsize, Tys);
llvm::Function *F =
CGM.getIntrinsic(llvm::Intrinsic::objectsize, {IntPtrTy, Int8PtrTy});
llvm::Value *Min = Builder.getFalse();
llvm::Value *NullIsUnknown = Builder.getFalse();
llvm::Value *Dynamic = Builder.getFalse();
llvm::Value *WholeObj = Builder.getTrue();
llvm::Value *SubobjectSize = Builder.getInt64(0);
llvm::Value *LargeEnough = Builder.CreateICmpUGE(
Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic}), Size);
Builder.CreateCall(
F, {Ptr, Min, NullIsUnknown, Dynamic, WholeObj, SubobjectSize}),
Size);
Checks.push_back(std::make_pair(LargeEnough, SanitizerKind::ObjectSize));
}
}
Expand Down
Loading