Skip to content

Commit fd71440

Browse files
committed
[AllocToken, Clang] Infer type hints from sizeof expressions and casts
For the AllocToken pass to accurately calculate token ID hints, we should attach `!alloc_token` metadata for allocation calls to avoid reverting to LLVM IR-type based hints (which depends on later "uses" and is rather imprecise). Unlike new expressions, untyped allocation calls (like `malloc`, `calloc`, `::operator new(..)`, `__builtin_operator_new`, etc.) have no syntactic type associated with them. For -fsanitize=alloc-token, type hints are sufficient, and we can attempt to infer the type based on common idioms. When encountering allocation calls (with `__attribute__((malloc))` or `__attribute__((alloc_size(..))`), attach `!alloc_token` by inferring the allocated type from (a) sizeof argument expressions such as `malloc(sizeof(MyType))`, and (b) casts such as `(MyType*)malloc(4096)`. Note that non-standard allocation functions with these attributes are not instrumented by default. Use `-fsanitize-alloc-token-extended` to instrument them as well. Link: https://discourse.llvm.org/t/rfc-a-framework-for-allocator-partitioning-hints/87434 Pull Request: llvm#156841
1 parent ac181d7 commit fd71440

File tree

10 files changed

+318
-73
lines changed

10 files changed

+318
-73
lines changed

clang/docs/AllocToken.rst

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,35 @@ which encodes the token ID hint in the allocation function name.
122122
This ABI provides a more efficient alternative where
123123
``-falloc-token-max`` is small.
124124

125+
Instrumenting Non-Standard Allocation Functions
126+
-----------------------------------------------
127+
128+
By default, AllocToken only instruments standard library allocation functions.
129+
This simplifies adoption, as a compatible allocator only needs to provide
130+
token-enabled variants for a well-defined set of standard functions.
131+
132+
To extend instrumentation to custom allocation functions, enable broader
133+
coverage with ``-fsanitize-alloc-token-extended``. Such functions require being
134+
marked with the `malloc
135+
<https://clang.llvm.org/docs/AttributeReference.html#malloc>`_ or `alloc_size
136+
<https://clang.llvm.org/docs/AttributeReference.html#alloc-size>`_ attributes
137+
(or a combination).
138+
139+
For example:
140+
141+
.. code-block:: c
142+
143+
void *custom_malloc(size_t size) __attribute__((malloc));
144+
void *my_malloc(size_t size) __attribute__((alloc_size(1)));
145+
146+
// Original:
147+
ptr1 = custom_malloc(size);
148+
ptr2 = my_malloc(size);
149+
150+
// Instrumented:
151+
ptr1 = __alloc_token_custom_malloc(size, token_id);
152+
ptr2 = __alloc_token_my_malloc(size, token_id);
153+
125154
Disabling Instrumentation
126155
-------------------------
127156

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 125 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "clang/AST/Attr.h"
3131
#include "clang/AST/DeclObjC.h"
3232
#include "clang/AST/NSAPI.h"
33+
#include "clang/AST/ParentMapContext.h"
3334
#include "clang/AST/StmtVisitor.h"
3435
#include "clang/Basic/Builtins.h"
3536
#include "clang/Basic/CodeGenOptions.h"
@@ -1353,6 +1354,115 @@ void CodeGenFunction::EmitAllocToken(llvm::CallBase *CB, QualType AllocType) {
13531354
CB->setMetadata(llvm::LLVMContext::MD_alloc_token, MDN);
13541355
}
13551356

1357+
namespace {
1358+
/// Infer type from a simple sizeof expression.
1359+
QualType inferTypeFromSizeofExpr(const Expr *E) {
1360+
const Expr *Arg = E->IgnoreParenImpCasts();
1361+
if (const auto *UET = dyn_cast<UnaryExprOrTypeTraitExpr>(Arg)) {
1362+
if (UET->getKind() == UETT_SizeOf) {
1363+
if (UET->isArgumentType())
1364+
return UET->getArgumentTypeInfo()->getType();
1365+
else
1366+
return UET->getArgumentExpr()->getType();
1367+
}
1368+
}
1369+
return QualType();
1370+
}
1371+
1372+
/// Infer type from an arithmetic expression involving a sizeof. For example:
1373+
///
1374+
/// malloc(sizeof(MyType) + padding); // infers 'MyType'
1375+
/// malloc(sizeof(MyType) * 32); // infers 'MyType'
1376+
/// malloc(32 * sizeof(MyType)); // infers 'MyType'
1377+
/// malloc(sizeof(MyType) << 1); // infers 'MyType'
1378+
/// ...
1379+
///
1380+
/// More complex arithmetic expressions are supported, but are a heuristic, e.g.
1381+
/// when considering allocations for structs with flexible array members:
1382+
///
1383+
/// malloc(sizeof(HasFlexArray) + sizeof(int) * 32); // infers 'HasFlexArray'
1384+
///
1385+
QualType inferPossibleTypeFromArithSizeofExpr(const Expr *E) {
1386+
const Expr *Arg = E->IgnoreParenImpCasts();
1387+
// The argument is a lone sizeof expression.
1388+
if (QualType T = inferTypeFromSizeofExpr(Arg); !T.isNull())
1389+
return T;
1390+
if (const auto *BO = dyn_cast<BinaryOperator>(Arg)) {
1391+
// Argument is an arithmetic expression. Cover common arithmetic patterns
1392+
// involving sizeof.
1393+
switch (BO->getOpcode()) {
1394+
case BO_Add:
1395+
case BO_Div:
1396+
case BO_Mul:
1397+
case BO_Shl:
1398+
case BO_Shr:
1399+
case BO_Sub:
1400+
if (QualType T = inferPossibleTypeFromArithSizeofExpr(BO->getLHS());
1401+
!T.isNull())
1402+
return T;
1403+
if (QualType T = inferPossibleTypeFromArithSizeofExpr(BO->getRHS());
1404+
!T.isNull())
1405+
return T;
1406+
break;
1407+
default:
1408+
break;
1409+
}
1410+
}
1411+
return QualType();
1412+
}
1413+
1414+
/// If the expression E is a reference to a variable, infer the type from a
1415+
/// variable's initializer if it contains a sizeof. Beware, this is a heuristic
1416+
/// and ignores if a variable is later reassigned. For example:
1417+
///
1418+
/// size_t my_size = sizeof(MyType);
1419+
/// void *x = malloc(my_size); // infers 'MyType'
1420+
///
1421+
QualType inferPossibleTypeFromVarInitSizeofExpr(const Expr *E) {
1422+
const Expr *Arg = E->IgnoreParenImpCasts();
1423+
if (const auto *DRE = dyn_cast<DeclRefExpr>(Arg)) {
1424+
if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
1425+
if (const Expr *Init = VD->getInit())
1426+
return inferPossibleTypeFromArithSizeofExpr(Init);
1427+
}
1428+
}
1429+
return QualType();
1430+
}
1431+
1432+
/// Deduces the allocated type by checking if the allocation call's result
1433+
/// is immediately used in a cast expression. For example:
1434+
///
1435+
/// MyType *x = (MyType *)malloc(4096); // infers 'MyType'
1436+
///
1437+
QualType inferPossibleTypeFromCastExpr(const CallExpr *CallE,
1438+
const CastExpr *CastE) {
1439+
if (!CastE)
1440+
return QualType();
1441+
QualType PtrType = CastE->getType();
1442+
if (PtrType->isPointerType())
1443+
return PtrType->getPointeeType();
1444+
return QualType();
1445+
}
1446+
} // end anonymous namespace
1447+
1448+
void CodeGenFunction::EmitAllocToken(llvm::CallBase *CB, const CallExpr *E) {
1449+
QualType AllocType;
1450+
// First check arguments.
1451+
for (const Expr *Arg : E->arguments()) {
1452+
AllocType = inferPossibleTypeFromArithSizeofExpr(Arg);
1453+
if (AllocType.isNull())
1454+
AllocType = inferPossibleTypeFromVarInitSizeofExpr(Arg);
1455+
if (!AllocType.isNull())
1456+
break;
1457+
}
1458+
// Then check later casts.
1459+
if (AllocType.isNull())
1460+
AllocType = inferPossibleTypeFromCastExpr(E, CurCast);
1461+
// Emit if we were able to infer the type.
1462+
if (!AllocType.isNull())
1463+
EmitAllocToken(CB, AllocType);
1464+
}
1465+
13561466
CodeGenFunction::ComplexPairTy CodeGenFunction::
13571467
EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
13581468
bool isInc, bool isPre) {
@@ -5723,6 +5833,9 @@ LValue CodeGenFunction::EmitConditionalOperatorLValue(
57235833
/// are permitted with aggregate result, including noop aggregate casts, and
57245834
/// cast from scalar to union.
57255835
LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
5836+
auto RestoreCurCast =
5837+
llvm::make_scope_exit([this, Prev = CurCast] { CurCast = Prev; });
5838+
CurCast = E;
57265839
switch (E->getCastKind()) {
57275840
case CK_ToVoid:
57285841
case CK_BitCast:
@@ -6668,16 +6781,24 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType,
66686781
RValue Call = EmitCall(FnInfo, Callee, ReturnValue, Args, &LocalCallOrInvoke,
66696782
E == MustTailCall, E->getExprLoc());
66706783

6671-
// Generate function declaration DISuprogram in order to be used
6672-
// in debug info about call sites.
6673-
if (CGDebugInfo *DI = getDebugInfo()) {
6674-
if (auto *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
6784+
if (auto *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
6785+
// Generate function declaration DISuprogram in order to be used
6786+
// in debug info about call sites.
6787+
if (CGDebugInfo *DI = getDebugInfo()) {
66756788
FunctionArgList Args;
66766789
QualType ResTy = BuildFunctionArgList(CalleeDecl, Args);
66776790
DI->EmitFuncDeclForCallSite(LocalCallOrInvoke,
66786791
DI->getFunctionType(CalleeDecl, ResTy, Args),
66796792
CalleeDecl);
66806793
}
6794+
if (CalleeDecl->hasAttr<RestrictAttr>() ||
6795+
CalleeDecl->hasAttr<AllocSizeAttr>()) {
6796+
// Function has 'malloc' (aka. 'restrict') or 'alloc_size' attribute.
6797+
if (SanOpts.has(SanitizerKind::AllocToken)) {
6798+
// Set !alloc_token metadata.
6799+
EmitAllocToken(LocalCallOrInvoke, E);
6800+
}
6801+
}
66816802
}
66826803
if (CallOrInvoke)
66836804
*CallOrInvoke = LocalCallOrInvoke;

clang/lib/CodeGen/CGExprCXX.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1371,8 +1371,16 @@ RValue CodeGenFunction::EmitBuiltinNewDeleteCall(const FunctionProtoType *Type,
13711371

13721372
for (auto *Decl : Ctx.getTranslationUnitDecl()->lookup(Name))
13731373
if (auto *FD = dyn_cast<FunctionDecl>(Decl))
1374-
if (Ctx.hasSameType(FD->getType(), QualType(Type, 0)))
1375-
return EmitNewDeleteCall(*this, FD, Type, Args);
1374+
if (Ctx.hasSameType(FD->getType(), QualType(Type, 0))) {
1375+
RValue RV = EmitNewDeleteCall(*this, FD, Type, Args);
1376+
if (auto *CB = dyn_cast_if_present<llvm::CallBase>(RV.getScalarVal())) {
1377+
if (SanOpts.has(SanitizerKind::AllocToken)) {
1378+
// Set !alloc_token metadata.
1379+
EmitAllocToken(CB, TheCall);
1380+
}
1381+
}
1382+
return RV;
1383+
}
13761384
llvm_unreachable("predeclared global operator new/delete is missing");
13771385
}
13781386

clang/lib/CodeGen/CGExprScalar.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "clang/Basic/DiagnosticTrap.h"
3434
#include "clang/Basic/TargetInfo.h"
3535
#include "llvm/ADT/APFixedPoint.h"
36+
#include "llvm/ADT/ScopeExit.h"
3637
#include "llvm/IR/Argument.h"
3738
#include "llvm/IR/CFG.h"
3839
#include "llvm/IR/Constants.h"
@@ -2434,6 +2435,10 @@ static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue SrcVal,
24342435
// have to handle a more broad range of conversions than explicit casts, as they
24352436
// handle things like function to ptr-to-function decay etc.
24362437
Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
2438+
auto RestoreCurCast =
2439+
llvm::make_scope_exit([this, Prev = CGF.CurCast] { CGF.CurCast = Prev; });
2440+
CGF.CurCast = CE;
2441+
24372442
Expr *E = CE->getSubExpr();
24382443
QualType DestTy = CE->getType();
24392444
CastKind Kind = CE->getCastKind();

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,10 @@ class CodeGenFunction : public CodeGenTypeCache {
346346
QualType FnRetTy;
347347
llvm::Function *CurFn = nullptr;
348348

349+
/// If a cast expression is being visited, this holds the current cast's
350+
/// expression.
351+
const CastExpr *CurCast = nullptr;
352+
349353
/// Save Parameter Decl for coroutine.
350354
llvm::SmallVector<const ParmVarDecl *, 4> FnArgs;
351355

@@ -3350,6 +3354,9 @@ class CodeGenFunction : public CodeGenTypeCache {
33503354

33513355
/// Emit additional metadata used by the AllocToken instrumentation.
33523356
void EmitAllocToken(llvm::CallBase *CB, QualType AllocType);
3357+
/// Emit additional metadata used by the AllocToken instrumentation,
3358+
/// inferring the type from an allocation call expression.
3359+
void EmitAllocToken(llvm::CallBase *CB, const CallExpr *E);
33533360

33543361
llvm::Value *GetCountedByFieldExprGEP(const Expr *Base, const FieldDecl *FD,
33553362
const FieldDecl *CountDecl);

clang/test/CodeGen/alloc-token-lower.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ typedef __typeof(sizeof(int)) size_t;
1010
void *malloc(size_t size);
1111

1212
// CHECK-LABEL: @test_malloc(
13-
// CHECK: call{{.*}} ptr @__alloc_token_malloc(i64 noundef 4, i64 0)
13+
// CHECK: call{{.*}} ptr @__alloc_token_malloc(i64 noundef 4, i64 2689373973731826898){{.*}} !alloc_token [[META_INT:![0-9]+]]
1414
void *test_malloc() {
1515
return malloc(sizeof(int));
1616
}
@@ -20,3 +20,15 @@ void *test_malloc() {
2020
void *no_sanitize_malloc(size_t size) __attribute__((no_sanitize("alloc-token"))) {
2121
return malloc(sizeof(int));
2222
}
23+
24+
// By default, we should not be touching malloc-attributed non-libcall
25+
// functions: there might be an arbitrary number of these, and a compatible
26+
// allocator will only implement standard allocation functions.
27+
void *nonstandard_malloc(size_t size) __attribute__((malloc));
28+
// CHECK-LABEL: @test_nonlibcall_malloc(
29+
// CHECK: call{{.*}} ptr @nonstandard_malloc(i64 noundef 4){{.*}} !alloc_token [[META_INT]]
30+
void *test_nonlibcall_malloc() {
31+
return nonstandard_malloc(sizeof(int));
32+
}
33+
34+
// CHECK: [[META_INT]] = !{!"int", i1 false}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// RUN: %clang_cc1 -fsanitize=alloc-token -fsanitize-alloc-token-extended -triple x86_64-linux-gnu -emit-llvm -disable-llvm-passes %s -o - | FileCheck --check-prefixes=CHECK,CHECK-CODEGEN %s
2+
// RUN: %clang_cc1 -fsanitize=alloc-token -fsanitize-alloc-token-extended -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefixes=CHECK,CHECK-LOWER %s
3+
// RUN: %clang_cc1 -O -fsanitize=alloc-token -fsanitize-alloc-token-extended -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefixes=CHECK,CHECK-LOWER %s
4+
5+
typedef __typeof(sizeof(int)) size_t;
6+
typedef size_t gfp_t;
7+
8+
void *custom_malloc(size_t size) __attribute__((malloc));
9+
void *__kmalloc(size_t size, gfp_t flags) __attribute__((alloc_size(1)));
10+
11+
void *sink;
12+
13+
// CHECK-LABEL: @test_nonlibcall_alloc(
14+
// CHECK-CODEGEN: call noalias ptr @custom_malloc(i64 noundef 4){{.*}} !alloc_token [[META_INT:![0-9]+]]
15+
// CHECK-CODEGEN: call ptr @__kmalloc(i64 noundef 4, i64 noundef 0){{.*}} !alloc_token [[META_INT]]
16+
// CHECK-LOWER: call{{.*}} noalias ptr @__alloc_token_custom_malloc(i64 noundef 4, i64 2689373973731826898){{.*}} !alloc_token [[META_INT:![0-9]+]]
17+
// CHECK-LOWER: call{{.*}} ptr @__alloc_token___kmalloc(i64 noundef 4, i64 noundef 0, i64 2689373973731826898){{.*}} !alloc_token [[META_INT]]
18+
void test_nonlibcall_alloc() {
19+
sink = custom_malloc(sizeof(int));
20+
sink = __kmalloc(sizeof(int), 0);
21+
}
22+
23+
// CHECK: [[META_INT]] = !{!"int", i1 false}

clang/test/CodeGen/alloc-token.c

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,36 +2,39 @@
22

33
typedef __typeof(sizeof(int)) size_t;
44

5-
void *aligned_alloc(size_t alignment, size_t size);
6-
void *malloc(size_t size);
7-
void *calloc(size_t num, size_t size);
8-
void *realloc(void *ptr, size_t size);
9-
void *reallocarray(void *ptr, size_t nmemb, size_t size);
10-
void *memalign(size_t alignment, size_t size);
11-
void *valloc(size_t size);
12-
void *pvalloc(size_t size);
5+
void *aligned_alloc(size_t alignment, size_t size) __attribute__((malloc));
6+
void *malloc(size_t size) __attribute__((malloc));
7+
void *calloc(size_t num, size_t size) __attribute__((malloc));
8+
void *realloc(void *ptr, size_t size) __attribute__((malloc));
9+
void *reallocarray(void *ptr, size_t nmemb, size_t size) __attribute__((malloc));
10+
void *memalign(size_t alignment, size_t size) __attribute__((malloc));
11+
void *valloc(size_t size) __attribute__((malloc));
12+
void *pvalloc(size_t size) __attribute__((malloc));
1313
int posix_memalign(void **memptr, size_t alignment, size_t size);
1414

1515
void *sink;
1616

1717
// CHECK-LABEL: define dso_local void @test_malloc_like(
18-
// CHECK: call ptr @malloc(i64 noundef 4)
19-
// CHECK: call ptr @calloc(i64 noundef 3, i64 noundef 4)
20-
// CHECK: call ptr @realloc(ptr noundef {{.*}}, i64 noundef 8)
21-
// CHECK: call ptr @reallocarray(ptr noundef {{.*}}, i64 noundef 5, i64 noundef 8)
22-
// CHECK: call align 128 ptr @aligned_alloc(i64 noundef 128, i64 noundef 1024)
23-
// CHECK: call align 16 ptr @memalign(i64 noundef 16, i64 noundef 256)
24-
// CHECK: call ptr @valloc(i64 noundef 4096)
25-
// CHECK: call ptr @pvalloc(i64 noundef 8192)
18+
// CHECK: call noalias ptr @malloc(i64 noundef 4){{.*}} !alloc_token [[META_INT:![0-9]+]]
19+
// CHECK: call noalias ptr @calloc(i64 noundef 3, i64 noundef 4){{.*}} !alloc_token [[META_INT]]
20+
// CHECK: call noalias ptr @realloc(ptr noundef {{.*}}, i64 noundef 8){{.*}} !alloc_token [[META_LONG:![0-9]+]]
21+
// CHECK: call noalias ptr @reallocarray(ptr noundef {{.*}}, i64 noundef 5, i64 noundef 8), !alloc_token [[META_LONG]]
22+
// CHECK: call noalias align 128 ptr @aligned_alloc(i64 noundef 128, i64 noundef 4){{.*}} !alloc_token [[META_INT]]
23+
// CHECK: call noalias align 16 ptr @memalign(i64 noundef 16, i64 noundef 4){{.*}} !alloc_token [[META_INT]]
24+
// CHECK: call noalias ptr @valloc(i64 noundef 4), !alloc_token [[META_INT]]
25+
// CHECK: call noalias ptr @pvalloc(i64 noundef 4), !alloc_token [[META_INT]]
2626
// CHECK: call i32 @posix_memalign(ptr noundef @sink, i64 noundef 64, i64 noundef 4)
2727
void test_malloc_like() {
2828
sink = malloc(sizeof(int));
2929
sink = calloc(3, sizeof(int));
3030
sink = realloc(sink, sizeof(long));
3131
sink = reallocarray(sink, 5, sizeof(long));
32-
sink = aligned_alloc(128, 1024);
33-
sink = memalign(16, 256);
34-
sink = valloc(4096);
35-
sink = pvalloc(8192);
36-
posix_memalign(&sink, 64, sizeof(int));
32+
sink = aligned_alloc(128, sizeof(int));
33+
sink = memalign(16, sizeof(int));
34+
sink = valloc(sizeof(int));
35+
sink = pvalloc(sizeof(int));
36+
posix_memalign(&sink, 64, sizeof(int)); // FIXME: support posix_memalign
3737
}
38+
39+
// CHECK: [[META_INT]] = !{!"int", i1 false}
40+
// CHECK: [[META_LONG]] = !{!"long", i1 false}

0 commit comments

Comments
 (0)