Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions clang/docs/AllocToken.rst
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,35 @@ which encodes the token ID hint in the allocation function name.
This ABI provides a more efficient alternative where
``-falloc-token-max`` is small.

Instrumenting Non-Standard Allocation Functions
-----------------------------------------------

By default, AllocToken only instruments standard library allocation functions.
This simplifies adoption, as a compatible allocator only needs to provide
token-enabled variants for a well-defined set of standard functions.

To extend instrumentation to custom allocation functions, enable broader
coverage with ``-fsanitize-alloc-token-extended``. Such functions require being
marked with the `malloc
<https://clang.llvm.org/docs/AttributeReference.html#malloc>`_ or `alloc_size
<https://clang.llvm.org/docs/AttributeReference.html#alloc-size>`_ attributes
(or a combination).

For example:

.. code-block:: c

void *custom_malloc(size_t size) __attribute__((malloc));
void *my_malloc(size_t size) __attribute__((alloc_size(1)));

// Original:
ptr1 = custom_malloc(size);
ptr2 = my_malloc(size);

// Instrumented:
ptr1 = __alloc_token_custom_malloc(size, token_id);
ptr2 = __alloc_token_my_malloc(size, token_id);

Disabling Instrumentation
-------------------------

Expand Down
129 changes: 125 additions & 4 deletions clang/lib/CodeGen/CGExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "clang/AST/Attr.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/NSAPI.h"
#include "clang/AST/ParentMapContext.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/CodeGenOptions.h"
Expand Down Expand Up @@ -1353,6 +1354,115 @@ void CodeGenFunction::EmitAllocToken(llvm::CallBase *CB, QualType AllocType) {
CB->setMetadata(llvm::LLVMContext::MD_alloc_token, MDN);
}

namespace {
/// Infer type from a simple sizeof expression.
QualType inferTypeFromSizeofExpr(const Expr *E) {
const Expr *Arg = E->IgnoreParenImpCasts();
if (const auto *UET = dyn_cast<UnaryExprOrTypeTraitExpr>(Arg)) {
if (UET->getKind() == UETT_SizeOf) {
if (UET->isArgumentType())
return UET->getArgumentTypeInfo()->getType();
else
return UET->getArgumentExpr()->getType();
}
}
return QualType();
}

/// Infer type from an arithmetic expression involving a sizeof. For example:
///
/// malloc(sizeof(MyType) + padding); // infers 'MyType'
/// malloc(sizeof(MyType) * 32); // infers 'MyType'
/// malloc(32 * sizeof(MyType)); // infers 'MyType'
/// malloc(sizeof(MyType) << 1); // infers 'MyType'
/// ...
///
/// More complex arithmetic expressions are supported, but are a heuristic, e.g.
/// when considering allocations for structs with flexible array members:
///
/// malloc(sizeof(HasFlexArray) + sizeof(int) * 32); // infers 'HasFlexArray'
///
QualType inferPossibleTypeFromArithSizeofExpr(const Expr *E) {
const Expr *Arg = E->IgnoreParenImpCasts();
// The argument is a lone sizeof expression.
if (QualType T = inferTypeFromSizeofExpr(Arg); !T.isNull())
return T;
if (const auto *BO = dyn_cast<BinaryOperator>(Arg)) {
// Argument is an arithmetic expression. Cover common arithmetic patterns
// involving sizeof.
switch (BO->getOpcode()) {
case BO_Add:
case BO_Div:
case BO_Mul:
case BO_Shl:
case BO_Shr:
case BO_Sub:
if (QualType T = inferPossibleTypeFromArithSizeofExpr(BO->getLHS());
!T.isNull())
return T;
if (QualType T = inferPossibleTypeFromArithSizeofExpr(BO->getRHS());
!T.isNull())
return T;
break;
default:
break;
}
}
return QualType();
}

/// If the expression E is a reference to a variable, infer the type from a
/// variable's initializer if it contains a sizeof. Beware, this is a heuristic
/// and ignores if a variable is later reassigned. For example:
///
/// size_t my_size = sizeof(MyType);
/// void *x = malloc(my_size); // infers 'MyType'
///
QualType inferPossibleTypeFromVarInitSizeofExpr(const Expr *E) {
const Expr *Arg = E->IgnoreParenImpCasts();
if (const auto *DRE = dyn_cast<DeclRefExpr>(Arg)) {
if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
if (const Expr *Init = VD->getInit())
return inferPossibleTypeFromArithSizeofExpr(Init);
}
}
return QualType();
}

/// Deduces the allocated type by checking if the allocation call's result
/// is immediately used in a cast expression. For example:
///
/// MyType *x = (MyType *)malloc(4096); // infers 'MyType'
///
QualType inferPossibleTypeFromCastExpr(const CallExpr *CallE,
const CastExpr *CastE) {
if (!CastE)
return QualType();
QualType PtrType = CastE->getType();
if (PtrType->isPointerType())
return PtrType->getPointeeType();
return QualType();
}
} // end anonymous namespace

void CodeGenFunction::EmitAllocToken(llvm::CallBase *CB, const CallExpr *E) {
QualType AllocType;
// First check arguments.
for (const Expr *Arg : E->arguments()) {
AllocType = inferPossibleTypeFromArithSizeofExpr(Arg);
if (AllocType.isNull())
AllocType = inferPossibleTypeFromVarInitSizeofExpr(Arg);
if (!AllocType.isNull())
break;
}
// Then check later casts.
if (AllocType.isNull())
AllocType = inferPossibleTypeFromCastExpr(E, CurCast);
// Emit if we were able to infer the type.
if (!AllocType.isNull())
EmitAllocToken(CB, AllocType);
}

CodeGenFunction::ComplexPairTy CodeGenFunction::
EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
bool isInc, bool isPre) {
Expand Down Expand Up @@ -5723,6 +5833,9 @@ LValue CodeGenFunction::EmitConditionalOperatorLValue(
/// are permitted with aggregate result, including noop aggregate casts, and
/// cast from scalar to union.
LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
auto RestoreCurCast =
llvm::make_scope_exit([this, Prev = CurCast] { CurCast = Prev; });
CurCast = E;
switch (E->getCastKind()) {
case CK_ToVoid:
case CK_BitCast:
Expand Down Expand Up @@ -6668,16 +6781,24 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType,
RValue Call = EmitCall(FnInfo, Callee, ReturnValue, Args, &LocalCallOrInvoke,
E == MustTailCall, E->getExprLoc());

// Generate function declaration DISuprogram in order to be used
// in debug info about call sites.
if (CGDebugInfo *DI = getDebugInfo()) {
if (auto *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
if (auto *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
// Generate function declaration DISuprogram in order to be used
// in debug info about call sites.
if (CGDebugInfo *DI = getDebugInfo()) {
FunctionArgList Args;
QualType ResTy = BuildFunctionArgList(CalleeDecl, Args);
DI->EmitFuncDeclForCallSite(LocalCallOrInvoke,
DI->getFunctionType(CalleeDecl, ResTy, Args),
CalleeDecl);
}
if (CalleeDecl->hasAttr<RestrictAttr>() ||
CalleeDecl->hasAttr<AllocSizeAttr>()) {
// Function has 'malloc' (aka. 'restrict') or 'alloc_size' attribute.
if (SanOpts.has(SanitizerKind::AllocToken)) {
// Set !alloc_token metadata.
EmitAllocToken(LocalCallOrInvoke, E);
}
}
}
if (CallOrInvoke)
*CallOrInvoke = LocalCallOrInvoke;
Expand Down
12 changes: 10 additions & 2 deletions clang/lib/CodeGen/CGExprCXX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1371,8 +1371,16 @@ RValue CodeGenFunction::EmitBuiltinNewDeleteCall(const FunctionProtoType *Type,

for (auto *Decl : Ctx.getTranslationUnitDecl()->lookup(Name))
if (auto *FD = dyn_cast<FunctionDecl>(Decl))
if (Ctx.hasSameType(FD->getType(), QualType(Type, 0)))
return EmitNewDeleteCall(*this, FD, Type, Args);
if (Ctx.hasSameType(FD->getType(), QualType(Type, 0))) {
RValue RV = EmitNewDeleteCall(*this, FD, Type, Args);
if (auto *CB = dyn_cast_if_present<llvm::CallBase>(RV.getScalarVal())) {
if (SanOpts.has(SanitizerKind::AllocToken)) {
// Set !alloc_token metadata.
EmitAllocToken(CB, TheCall);
}
}
return RV;
}
llvm_unreachable("predeclared global operator new/delete is missing");
}

Expand Down
5 changes: 5 additions & 0 deletions clang/lib/CodeGen/CGExprScalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "clang/Basic/DiagnosticTrap.h"
#include "clang/Basic/TargetInfo.h"
#include "llvm/ADT/APFixedPoint.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
Expand Down Expand Up @@ -2434,6 +2435,10 @@ static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue SrcVal,
// have to handle a more broad range of conversions than explicit casts, as they
// handle things like function to ptr-to-function decay etc.
Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
auto RestoreCurCast =
llvm::make_scope_exit([this, Prev = CGF.CurCast] { CGF.CurCast = Prev; });
CGF.CurCast = CE;

Expr *E = CE->getSubExpr();
QualType DestTy = CE->getType();
CastKind Kind = CE->getCastKind();
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,10 @@ class CodeGenFunction : public CodeGenTypeCache {
QualType FnRetTy;
llvm::Function *CurFn = nullptr;

/// If a cast expression is being visited, this holds the current cast's
/// expression.
const CastExpr *CurCast = nullptr;

/// Save Parameter Decl for coroutine.
llvm::SmallVector<const ParmVarDecl *, 4> FnArgs;

Expand Down Expand Up @@ -3350,6 +3354,9 @@ class CodeGenFunction : public CodeGenTypeCache {

/// Emit additional metadata used by the AllocToken instrumentation.
void EmitAllocToken(llvm::CallBase *CB, QualType AllocType);
/// Emit additional metadata used by the AllocToken instrumentation,
/// inferring the type from an allocation call expression.
void EmitAllocToken(llvm::CallBase *CB, const CallExpr *E);

llvm::Value *GetCountedByFieldExprGEP(const Expr *Base, const FieldDecl *FD,
const FieldDecl *CountDecl);
Expand Down
14 changes: 13 additions & 1 deletion clang/test/CodeGen/alloc-token-lower.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ typedef __typeof(sizeof(int)) size_t;
void *malloc(size_t size);

// CHECK-LABEL: @test_malloc(
// CHECK: call{{.*}} ptr @__alloc_token_malloc(i64 noundef 4, i64 0)
// CHECK: call{{.*}} ptr @__alloc_token_malloc(i64 noundef 4, i64 2689373973731826898){{.*}} !alloc_token [[META_INT:![0-9]+]]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Use a regex instead of hard-coding the token here and in alloc-token-nonlibcalls.c?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're using a stable hash, so this should always be the same.
Or do you mean to be able to refer to the same token via e.g. [[TOKEN_INT]]?
Though we still want to check the exact value, given it's meant to be stable.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just figured that at this level (Clang test), we don't care so much about the exact number, just that there is a number. LLVM tests can have expectations about the exact hash.

But I don't feel particularly strongly about this.

void *test_malloc() {
return malloc(sizeof(int));
}
Expand All @@ -20,3 +20,15 @@ void *test_malloc() {
void *no_sanitize_malloc(size_t size) __attribute__((no_sanitize("alloc-token"))) {
return malloc(sizeof(int));
}

// By default, we should not be touching malloc-attributed non-libcall
// functions: there might be an arbitrary number of these, and a compatible
// allocator will only implement standard allocation functions.
void *nonstandard_malloc(size_t size) __attribute__((malloc));
// CHECK-LABEL: @test_nonlibcall_malloc(
// CHECK: call{{.*}} ptr @nonstandard_malloc(i64 noundef 4){{.*}} !alloc_token [[META_INT]]
void *test_nonlibcall_malloc() {
return nonstandard_malloc(sizeof(int));
}

// CHECK: [[META_INT]] = !{!"int", i1 false}
23 changes: 23 additions & 0 deletions clang/test/CodeGen/alloc-token-nonlibcalls.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// RUN: %clang_cc1 -fsanitize=alloc-token -fsanitize-alloc-token-extended -triple x86_64-linux-gnu -emit-llvm -disable-llvm-passes %s -o - | FileCheck --check-prefixes=CHECK,CHECK-CODEGEN %s
// RUN: %clang_cc1 -fsanitize=alloc-token -fsanitize-alloc-token-extended -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefixes=CHECK,CHECK-LOWER %s
// RUN: %clang_cc1 -O -fsanitize=alloc-token -fsanitize-alloc-token-extended -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefixes=CHECK,CHECK-LOWER %s

typedef __typeof(sizeof(int)) size_t;
typedef size_t gfp_t;

void *custom_malloc(size_t size) __attribute__((malloc));
void *__kmalloc(size_t size, gfp_t flags) __attribute__((alloc_size(1)));

void *sink;

// CHECK-LABEL: @test_nonlibcall_alloc(
// CHECK-CODEGEN: call noalias ptr @custom_malloc(i64 noundef 4){{.*}} !alloc_token [[META_INT:![0-9]+]]
// CHECK-CODEGEN: call ptr @__kmalloc(i64 noundef 4, i64 noundef 0){{.*}} !alloc_token [[META_INT]]
// CHECK-LOWER: call{{.*}} noalias ptr @__alloc_token_custom_malloc(i64 noundef 4, i64 2689373973731826898){{.*}} !alloc_token [[META_INT:![0-9]+]]
// CHECK-LOWER: call{{.*}} ptr @__alloc_token___kmalloc(i64 noundef 4, i64 noundef 0, i64 2689373973731826898){{.*}} !alloc_token [[META_INT]]
void test_nonlibcall_alloc() {
sink = custom_malloc(sizeof(int));
sink = __kmalloc(sizeof(int), 0);
}

// CHECK: [[META_INT]] = !{!"int", i1 false}
45 changes: 24 additions & 21 deletions clang/test/CodeGen/alloc-token.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,39 @@

typedef __typeof(sizeof(int)) size_t;

void *aligned_alloc(size_t alignment, size_t size);
void *malloc(size_t size);
void *calloc(size_t num, size_t size);
void *realloc(void *ptr, size_t size);
void *reallocarray(void *ptr, size_t nmemb, size_t size);
void *memalign(size_t alignment, size_t size);
void *valloc(size_t size);
void *pvalloc(size_t size);
void *aligned_alloc(size_t alignment, size_t size) __attribute__((malloc));
void *malloc(size_t size) __attribute__((malloc));
void *calloc(size_t num, size_t size) __attribute__((malloc));
void *realloc(void *ptr, size_t size) __attribute__((malloc));
void *reallocarray(void *ptr, size_t nmemb, size_t size) __attribute__((malloc));
void *memalign(size_t alignment, size_t size) __attribute__((malloc));
void *valloc(size_t size) __attribute__((malloc));
void *pvalloc(size_t size) __attribute__((malloc));
int posix_memalign(void **memptr, size_t alignment, size_t size);

void *sink;

// CHECK-LABEL: define dso_local void @test_malloc_like(
// CHECK: call ptr @malloc(i64 noundef 4)
// CHECK: call ptr @calloc(i64 noundef 3, i64 noundef 4)
// CHECK: call ptr @realloc(ptr noundef {{.*}}, i64 noundef 8)
// CHECK: call ptr @reallocarray(ptr noundef {{.*}}, i64 noundef 5, i64 noundef 8)
// CHECK: call align 128 ptr @aligned_alloc(i64 noundef 128, i64 noundef 1024)
// CHECK: call align 16 ptr @memalign(i64 noundef 16, i64 noundef 256)
// CHECK: call ptr @valloc(i64 noundef 4096)
// CHECK: call ptr @pvalloc(i64 noundef 8192)
// CHECK: call noalias ptr @malloc(i64 noundef 4){{.*}} !alloc_token [[META_INT:![0-9]+]]
// CHECK: call noalias ptr @calloc(i64 noundef 3, i64 noundef 4){{.*}} !alloc_token [[META_INT]]
// CHECK: call noalias ptr @realloc(ptr noundef {{.*}}, i64 noundef 8){{.*}} !alloc_token [[META_LONG:![0-9]+]]
// CHECK: call noalias ptr @reallocarray(ptr noundef {{.*}}, i64 noundef 5, i64 noundef 8), !alloc_token [[META_LONG]]
// CHECK: call noalias align 128 ptr @aligned_alloc(i64 noundef 128, i64 noundef 4){{.*}} !alloc_token [[META_INT]]
// CHECK: call noalias align 16 ptr @memalign(i64 noundef 16, i64 noundef 4){{.*}} !alloc_token [[META_INT]]
// CHECK: call noalias ptr @valloc(i64 noundef 4), !alloc_token [[META_INT]]
// CHECK: call noalias ptr @pvalloc(i64 noundef 4), !alloc_token [[META_INT]]
// CHECK: call i32 @posix_memalign(ptr noundef @sink, i64 noundef 64, i64 noundef 4)
void test_malloc_like() {
sink = malloc(sizeof(int));
sink = calloc(3, sizeof(int));
sink = realloc(sink, sizeof(long));
sink = reallocarray(sink, 5, sizeof(long));
sink = aligned_alloc(128, 1024);
sink = memalign(16, 256);
sink = valloc(4096);
sink = pvalloc(8192);
posix_memalign(&sink, 64, sizeof(int));
sink = aligned_alloc(128, sizeof(int));
sink = memalign(16, sizeof(int));
sink = valloc(sizeof(int));
sink = pvalloc(sizeof(int));
posix_memalign(&sink, 64, sizeof(int)); // FIXME: support posix_memalign
}

// CHECK: [[META_INT]] = !{!"int", i1 false}
// CHECK: [[META_LONG]] = !{!"long", i1 false}
Loading
Loading