Skip to content

Commit ac181d7

Browse files
committed
[AllocToken, Clang] Implement TypeHashPointerSplit mode
Implement the TypeHashPointerSplit mode: This mode assigns a token ID based on the hash of the allocated type's name, where the top half ID-space is reserved for types that contain pointers and the bottom half for types that do not contain pointers. This mode with max tokens of 2 (`-falloc-token-max=2`) may also be valuable for heap hardening strategies that simply separate pointer types from non-pointer types. Make it the new default mode. Link: https://discourse.llvm.org/t/rfc-a-framework-for-allocator-partitioning-hints/87434 Pull Request: llvm#156840
1 parent 50e424d commit ac181d7

File tree

12 files changed

+369
-33
lines changed

12 files changed

+369
-33
lines changed

clang/docs/AllocToken.rst

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,18 @@ Token Assignment Mode
3131

3232
The default mode to calculate tokens is:
3333

34-
* ``typehash``: This mode assigns a token ID based on the hash of the allocated
35-
type's name.
34+
* ``typehashpointersplit``: This mode assigns a token ID based on the hash of
35+
the allocated type's name, where the top half ID-space is reserved for types
36+
that contain pointers and the bottom half for types that do not contain
37+
pointers.
3638

3739
Other token ID assignment modes are supported, but they may be subject to
3840
change or removal. These may (experimentally) be selected with ``-mllvm
3941
-alloc-token-mode=<mode>``:
4042

43+
* ``typehash``: This mode assigns a token ID based on the hash of the allocated
44+
type's name.
45+
4146
* ``random``: This mode assigns a statically-determined random token ID to each
4247
allocation site.
4348

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 67 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1272,20 +1272,84 @@ void CodeGenFunction::EmitBoundsCheckImpl(const Expr *E, llvm::Value *Bound,
12721272
EmitCheck(std::make_pair(Check, CheckKind), CheckHandler, StaticData, Index);
12731273
}
12741274

1275+
static bool
1276+
typeContainsPointer(QualType T,
1277+
llvm::SmallPtrSet<const RecordDecl *, 4> &VisitedRD,
1278+
bool &IncompleteType) {
1279+
QualType CanonicalType = T.getCanonicalType();
1280+
if (CanonicalType->isPointerType())
1281+
return true; // base case
1282+
1283+
// Look through typedef chain to check for special types.
1284+
for (QualType CurrentT = T; const auto *TT = CurrentT->getAs<TypedefType>();
1285+
CurrentT = TT->getDecl()->getUnderlyingType()) {
1286+
const IdentifierInfo *II = TT->getDecl()->getIdentifier();
1287+
// Special Case: Syntactically uintptr_t is not a pointer; semantically,
1288+
// however, very likely used as such. Therefore, classify uintptr_t as a
1289+
// pointer, too.
1290+
if (II && II->isStr("uintptr_t"))
1291+
return true;
1292+
}
1293+
1294+
// The type is an array; check the element type.
1295+
if (const ArrayType *AT = dyn_cast<ArrayType>(CanonicalType))
1296+
return typeContainsPointer(AT->getElementType(), VisitedRD, IncompleteType);
1297+
// The type is a struct, class, or union.
1298+
if (const RecordDecl *RD = CanonicalType->getAsRecordDecl()) {
1299+
if (!RD->isCompleteDefinition()) {
1300+
IncompleteType = true;
1301+
return false;
1302+
}
1303+
if (!VisitedRD.insert(RD).second)
1304+
return false; // already visited
1305+
// Check all fields.
1306+
for (const FieldDecl *Field : RD->fields()) {
1307+
if (typeContainsPointer(Field->getType(), VisitedRD, IncompleteType))
1308+
return true;
1309+
}
1310+
// For C++ classes, also check base classes.
1311+
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
1312+
// Polymorphic types require a vptr.
1313+
if (CXXRD->isDynamicClass())
1314+
return true;
1315+
for (const CXXBaseSpecifier &Base : CXXRD->bases()) {
1316+
if (typeContainsPointer(Base.getType(), VisitedRD, IncompleteType))
1317+
return true;
1318+
}
1319+
}
1320+
}
1321+
return false;
1322+
}
1323+
12751324
void CodeGenFunction::EmitAllocToken(llvm::CallBase *CB, QualType AllocType) {
12761325
assert(SanOpts.has(SanitizerKind::AllocToken) &&
12771326
"Only needed with -fsanitize=alloc-token");
12781327

1328+
llvm::MDBuilder MDB(getLLVMContext());
1329+
1330+
// Get unique type name.
12791331
PrintingPolicy Policy(CGM.getContext().getLangOpts());
12801332
Policy.SuppressTagKeyword = true;
12811333
Policy.FullyQualifiedName = true;
12821334
SmallString<64> TypeName;
12831335
llvm::raw_svector_ostream TypeNameOS(TypeName);
12841336
AllocType.getCanonicalType().print(TypeNameOS, Policy);
1285-
auto *TypeMDS = llvm::MDString::get(CGM.getLLVMContext(), TypeNameOS.str());
1337+
auto *TypeNameMD = MDB.createString(TypeNameOS.str());
1338+
1339+
// Check if QualType contains a pointer. Implements a simple DFS to
1340+
// recursively check if a type contains a pointer type.
1341+
llvm::SmallPtrSet<const RecordDecl *, 4> VisitedRD;
1342+
bool IncompleteType = false;
1343+
const bool ContainsPtr =
1344+
typeContainsPointer(AllocType, VisitedRD, IncompleteType);
1345+
if (!ContainsPtr && IncompleteType)
1346+
return;
1347+
auto *ContainsPtrC = Builder.getInt1(ContainsPtr);
1348+
auto *ContainsPtrMD = MDB.createConstant(ContainsPtrC);
12861349

1287-
// Format: !{<type-name>}
1288-
auto *MDN = llvm::MDNode::get(CGM.getLLVMContext(), {TypeMDS});
1350+
// Format: !{<type-name>, <contains-pointer>}
1351+
auto *MDN =
1352+
llvm::MDNode::get(CGM.getLLVMContext(), {TypeNameMD, ContainsPtrMD});
12891353
CB->setMetadata(llvm::LLVMContext::MD_alloc_token, MDN);
12901354
}
12911355

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
// RUN: %clang_cc1 -fsanitize=alloc-token -triple x86_64-linux-gnu -std=c++20 -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
2+
3+
#include "../Analysis/Inputs/system-header-simulator-cxx.h"
4+
5+
typedef __UINTPTR_TYPE__ uintptr_t;
6+
7+
extern "C" {
8+
void *malloc(size_t size);
9+
}
10+
11+
// CHECK-LABEL: define dso_local noundef ptr @_Z15test_malloc_intv(
12+
// CHECK: call ptr @malloc(i64 noundef 4)
13+
void *test_malloc_int() {
14+
int *a = (int *)malloc(sizeof(int));
15+
*a = 42;
16+
return a;
17+
}
18+
19+
// CHECK-LABEL: define dso_local noundef ptr @_Z15test_malloc_ptrv(
20+
// CHECK: call ptr @malloc(i64 noundef 8)
21+
int **test_malloc_ptr() {
22+
int **a = (int **)malloc(sizeof(int*));
23+
*a = nullptr;
24+
return a;
25+
}
26+
27+
// CHECK-LABEL: define dso_local noundef ptr @_Z12test_new_intv(
28+
// CHECK: call noalias noundef nonnull ptr @_Znwm(i64 noundef 4){{.*}} !alloc_token [[META_INT:![0-9]+]]
29+
int *test_new_int() {
30+
return new int;
31+
}
32+
33+
// CHECK-LABEL: define dso_local noundef ptr @_Z20test_new_ulong_arrayv(
34+
// CHECK: call noalias noundef nonnull ptr @_Znam(i64 noundef 80){{.*}} !alloc_token [[META_ULONG:![0-9]+]]
35+
unsigned long *test_new_ulong_array() {
36+
return new unsigned long[10];
37+
}
38+
39+
// CHECK-LABEL: define dso_local noundef ptr @_Z12test_new_ptrv(
40+
// CHECK: call noalias noundef nonnull ptr @_Znwm(i64 noundef 8){{.*}} !alloc_token [[META_INTPTR:![0-9]+]]
41+
int **test_new_ptr() {
42+
return new int*;
43+
}
44+
45+
// CHECK-LABEL: define dso_local noundef ptr @_Z18test_new_ptr_arrayv(
46+
// CHECK: call noalias noundef nonnull ptr @_Znam(i64 noundef 80){{.*}} !alloc_token [[META_INTPTR]]
47+
int **test_new_ptr_array() {
48+
return new int*[10];
49+
}
50+
51+
struct ContainsPtr {
52+
int a;
53+
char *buf;
54+
};
55+
56+
// CHECK-LABEL: define dso_local noundef ptr @_Z27test_malloc_struct_with_ptrv(
57+
// CHECK: call ptr @malloc(i64 noundef 16)
58+
ContainsPtr *test_malloc_struct_with_ptr() {
59+
ContainsPtr *c = (ContainsPtr *)malloc(sizeof(ContainsPtr));
60+
return c;
61+
}
62+
63+
// CHECK-LABEL: define dso_local noundef ptr @_Z33test_malloc_struct_array_with_ptrv(
64+
// CHECK: call ptr @malloc(i64 noundef 160)
65+
ContainsPtr *test_malloc_struct_array_with_ptr() {
66+
ContainsPtr *c = (ContainsPtr *)malloc(10 * sizeof(ContainsPtr));
67+
return c;
68+
}
69+
70+
// CHECK-LABEL: define dso_local noundef ptr @_Z32test_operatornew_struct_with_ptrv(
71+
// CHECK: call noalias noundef nonnull ptr @_Znwm(i64 noundef 16)
72+
ContainsPtr *test_operatornew_struct_with_ptr() {
73+
ContainsPtr *c = (ContainsPtr *)__builtin_operator_new(sizeof(ContainsPtr));
74+
return c;
75+
}
76+
77+
// CHECK-LABEL: define dso_local noundef ptr @_Z38test_operatornew_struct_array_with_ptrv(
78+
// CHECK: call noalias noundef nonnull ptr @_Znwm(i64 noundef 160)
79+
ContainsPtr *test_operatornew_struct_array_with_ptr() {
80+
ContainsPtr *c = (ContainsPtr *)__builtin_operator_new(10 * sizeof(ContainsPtr));
81+
return c;
82+
}
83+
84+
// CHECK-LABEL: define dso_local noundef ptr @_Z33test_operatornew_struct_with_ptr2v(
85+
// CHECK: call noalias noundef nonnull ptr @_Znwm(i64 noundef 16)
86+
ContainsPtr *test_operatornew_struct_with_ptr2() {
87+
ContainsPtr *c = (ContainsPtr *)__builtin_operator_new(sizeof(*c));
88+
return c;
89+
}
90+
91+
// CHECK-LABEL: define dso_local noundef ptr @_Z39test_operatornew_struct_array_with_ptr2v(
92+
// CHECK: call noalias noundef nonnull ptr @_Znwm(i64 noundef 160)
93+
ContainsPtr *test_operatornew_struct_array_with_ptr2() {
94+
ContainsPtr *c = (ContainsPtr *)__builtin_operator_new(10 * sizeof(*c));
95+
return c;
96+
}
97+
98+
// CHECK-LABEL: define dso_local noundef ptr @_Z24test_new_struct_with_ptrv(
99+
// CHECK: call noalias noundef nonnull ptr @_Znwm(i64 noundef 16){{.*}} !alloc_token [[META_CONTAINSPTR:![0-9]+]]
100+
ContainsPtr *test_new_struct_with_ptr() {
101+
return new ContainsPtr;
102+
}
103+
104+
// CHECK-LABEL: define dso_local noundef ptr @_Z30test_new_struct_array_with_ptrv(
105+
// CHECK: call noalias noundef nonnull ptr @_Znam(i64 noundef 160){{.*}} !alloc_token [[META_CONTAINSPTR]]
106+
ContainsPtr *test_new_struct_array_with_ptr() {
107+
return new ContainsPtr[10];
108+
}
109+
110+
class TestClass {
111+
public:
112+
void Foo();
113+
~TestClass();
114+
int data[16];
115+
};
116+
117+
// CHECK-LABEL: define dso_local noundef ptr @_Z14test_new_classv(
118+
// CHECK: call noalias noundef nonnull ptr @_Znwm(i64 noundef 64){{.*}} !alloc_token [[META_TESTCLASS:![0-9]+]]
119+
TestClass *test_new_class() {
120+
return new TestClass();
121+
}
122+
123+
// CHECK-LABEL: define dso_local noundef ptr @_Z20test_new_class_arrayv(
124+
// CHECK: call noalias noundef nonnull ptr @_Znam(i64 noundef 648){{.*}} !alloc_token [[META_TESTCLASS]]
125+
TestClass *test_new_class_array() {
126+
return new TestClass[10];
127+
}
128+
129+
// Test that we detect that virtual classes have implicit vtable pointer.
130+
class VirtualTestClass {
131+
public:
132+
virtual void Foo();
133+
virtual ~VirtualTestClass();
134+
int data[16];
135+
};
136+
137+
// CHECK-LABEL: define dso_local noundef ptr @_Z22test_new_virtual_classv(
138+
// CHECK: call noalias noundef nonnull ptr @_Znwm(i64 noundef 72){{.*}} !alloc_token [[META_VIRTUALTESTCLASS:![0-9]+]]
139+
VirtualTestClass *test_new_virtual_class() {
140+
return new VirtualTestClass();
141+
}
142+
143+
// CHECK-LABEL: define dso_local noundef ptr @_Z28test_new_virtual_class_arrayv(
144+
// CHECK: call noalias noundef nonnull ptr @_Znam(i64 noundef 728){{.*}} !alloc_token [[META_VIRTUALTESTCLASS]]
145+
VirtualTestClass *test_new_virtual_class_array() {
146+
return new VirtualTestClass[10];
147+
}
148+
149+
// uintptr_t is treated as a pointer.
150+
struct MyStructUintptr {
151+
int a;
152+
uintptr_t ptr;
153+
};
154+
155+
// CHECK-LABEL: define dso_local noundef ptr @_Z18test_uintptr_isptrv(
156+
// CHECK: call noalias noundef nonnull ptr @_Znwm(i64 noundef 16){{.*}} !alloc_token [[META_MYSTRUCTUINTPTR:![0-9]+]]
157+
MyStructUintptr *test_uintptr_isptr() {
158+
return new MyStructUintptr;
159+
}
160+
161+
using uptr = uintptr_t;
162+
// CHECK-LABEL: define dso_local noundef ptr @_Z19test_uintptr_isptr2v(
163+
// CHECK: call noalias noundef nonnull ptr @_Znwm(i64 noundef 8){{.*}} !alloc_token [[META_UINTPTR:![0-9]+]]
164+
uptr *test_uintptr_isptr2() {
165+
return new uptr;
166+
}
167+
168+
// CHECK: [[META_INT]] = !{!"int", i1 false}
169+
// CHECK: [[META_ULONG]] = !{!"unsigned long", i1 false}
170+
// CHECK: [[META_INTPTR]] = !{!"int *", i1 true}
171+
// CHECK: [[META_CONTAINSPTR]] = !{!"ContainsPtr", i1 true}
172+
// CHECK: [[META_TESTCLASS]] = !{!"TestClass", i1 false}
173+
// CHECK: [[META_VIRTUALTESTCLASS]] = !{!"VirtualTestClass", i1 true}
174+
// CHECK: [[META_MYSTRUCTUINTPTR]] = !{!"MyStructUintptr", i1 true}
175+
// CHECK: [[META_UINTPTR]] = !{!"unsigned long", i1 true}

clang/test/CodeGenCXX/alloc-token.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,5 +137,5 @@ TestClass *test_new_class_array() {
137137
return arr;
138138
}
139139

140-
// CHECK: [[META_INT]] = !{!"int"}
141-
// CHECK: [[META_TESTCLASS]] = !{!"TestClass"}
140+
// CHECK: [[META_INT]] = !{!"int", i1 false}
141+
// CHECK: [[META_TESTCLASS]] = !{!"TestClass", i1 true}

llvm/docs/LangRef.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8588,13 +8588,14 @@ functions, and contains richer semantic information about the type of the
85888588
allocation. This information is consumed by the ``alloc-token`` pass to
85898589
instrument such calls with allocation token IDs.
85908590

8591-
The metadata contains a string with the type of an allocation.
8591+
The metadata contains: string with the type of an allocation, and a boolean
8592+
denoting if the type contains a pointer.
85928593

85938594
.. code-block:: none
85948595

85958596
call ptr @malloc(i64 64), !alloc_token !0
85968597

8597-
!0 = !{!"<type-name>"}
8598+
!0 = !{!"<type-name>", i1 <contains-pointer>}
85988599

85998600
Module Flags Metadata
86008601
=====================

llvm/lib/IR/Verifier.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5398,8 +5398,10 @@ void Verifier::visitCapturesMetadata(Instruction &I, const MDNode *Captures) {
53985398

53995399
void Verifier::visitAllocTokenMetadata(Instruction &I, MDNode *MD) {
54005400
Check(isa<CallBase>(I), "!alloc_token should only exist on calls", &I);
5401-
Check(MD->getNumOperands() == 1, "!alloc_token must have 1 operand", MD);
5401+
Check(MD->getNumOperands() == 2, "!alloc_token must have 2 operands", MD);
54025402
Check(isa<MDString>(MD->getOperand(0)), "expected string", MD);
5403+
Check(mdconst::dyn_extract_or_null<ConstantInt>(MD->getOperand(1)),
5404+
"expected integer constant", MD);
54035405
}
54045406

54055407
/// verifyInstruction - Verify that an instruction is well formed.

0 commit comments

Comments
 (0)