Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions clang/docs/AllocToken.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,18 @@ Token Assignment Mode

The default mode to calculate tokens is:

* *TypeHash* (mode=2): This mode assigns a token ID based on the hash of
the allocated type's name.
* *TypeHashPointerSplit* (mode=3): This mode assigns a token ID based on
the hash of the allocated type's name, where the top half ID-space is
reserved for types that contain pointers and the bottom half for types that
do not contain pointers.

Other token ID assignment modes are supported, but they may be subject to
change or removal. These may (experimentally) be selected with ``-mllvm
-alloc-token-mode=<mode>``:

* *TypeHash* (mode=2): This mode assigns a token ID based on the hash of
the allocated type's name.

* *Random* (mode=1): This mode assigns a statically-determined random token ID
to each allocation site.

Expand Down
67 changes: 64 additions & 3 deletions clang/lib/CodeGen/CGExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1277,14 +1277,75 @@ void CodeGenFunction::EmitAllocTokenHint(llvm::CallBase *CB,
assert(SanOpts.has(SanitizerKind::AllocToken) &&
"Only needed with -fsanitize=alloc-token");

llvm::MDBuilder MDB(getLLVMContext());

// Get unique type name.
PrintingPolicy Policy(CGM.getContext().getLangOpts());
Policy.SuppressTagKeyword = true;
Policy.FullyQualifiedName = true;
std::string TypeName = AllocType.getCanonicalType().getAsString(Policy);
auto *TypeMDS = llvm::MDString::get(CGM.getLLVMContext(), TypeName);
auto *TypeNameMD = MDB.createString(TypeName);

// Check if QualType contains a pointer. Implements a simple DFS to
// recursively check if a type contains a pointer type.
llvm::SmallPtrSet<const RecordDecl *, 4> VisitedRD;
bool IncompleteType = false;
auto TypeContainsPtr = [&](auto &&self, QualType T) -> bool {
QualType CanonicalType = T.getCanonicalType();
if (CanonicalType->isPointerType())
return true; // base case

// Look through typedef chain to check for special types.
for (QualType CurrentT = T; const auto *TT = CurrentT->getAs<TypedefType>();
CurrentT = TT->getDecl()->getUnderlyingType()) {
const IdentifierInfo *II = TT->getDecl()->getIdentifier();
if (!II)
continue;
// Special Case: Syntactically uintptr_t is not a pointer; semantically,
// however, very likely used as such. Therefore, classify uintptr_t as a
// pointer, too.
if (II->isStr("uintptr_t"))
return true;
}

// The type is an array; check the element type.
if (const ArrayType *AT = CanonicalType->getAsArrayTypeUnsafe())
return self(self, AT->getElementType());
// The type is a struct, class, or union.
if (const RecordDecl *RD = CanonicalType->getAsRecordDecl()) {
if (!RD->isCompleteDefinition()) {
IncompleteType = true;
return false;
}
if (!VisitedRD.insert(RD).second)
return false; // already visited
// Check all fields.
for (const FieldDecl *Field : RD->fields()) {
if (self(self, Field->getType()))
return true;
}
// For C++ classes, also check base classes.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
// Polymorphic types require a vptr.
if (CXXRD->isPolymorphic())
return true;
for (const CXXBaseSpecifier &Base : CXXRD->bases()) {
if (self(self, Base.getType()))
return true;
}
}
}
return false;
};
const bool ContainsPtr = TypeContainsPtr(TypeContainsPtr, AllocType);
if (!ContainsPtr && IncompleteType)
return;
auto *ContainsPtrC = Builder.getInt1(ContainsPtr);
auto *ContainsPtrMD = MDB.createConstant(ContainsPtrC);

// Format: !{<type-name>}
auto *MDN = llvm::MDNode::get(CGM.getLLVMContext(), {TypeMDS});
// Format: !{<type-name>, <contains-pointer>}
auto *MDN =
llvm::MDNode::get(CGM.getLLVMContext(), {TypeNameMD, ContainsPtrMD});
CB->setMetadata(llvm::LLVMContext::MD_alloc_token_hint, MDN);
}

Expand Down
177 changes: 177 additions & 0 deletions clang/test/CodeGenCXX/alloc-token-pointer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
// Check -fsanitize=alloc-token TypeHashPointerSplit mode with only 2
// tokens so we effectively only test the contains-pointer logic.
//
// RUN: %clang_cc1 -fsanitize=alloc-token -falloc-token-max=2 -triple x86_64-linux-gnu -std=c++20 -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -O -fsanitize=alloc-token -falloc-token-max=2 -triple x86_64-linux-gnu -std=c++20 -emit-llvm %s -o - | FileCheck %s

#include "../Analysis/Inputs/system-header-simulator-cxx.h"

typedef __UINTPTR_TYPE__ uintptr_t;

extern "C" {
void *malloc(size_t size);
}

// CHECK-LABEL: @_Z15test_malloc_intv(
void *test_malloc_int() {
// CHECK: call{{.*}} ptr @__alloc_token_malloc(i64 noundef 4, i64 0)
int *a = (int *)malloc(sizeof(int));
*a = 42;
return a;
}

// CHECK-LABEL: @_Z15test_malloc_ptrv(
int **test_malloc_ptr() {
// FIXME: This should not be token ID 0!
// CHECK: call{{.*}} ptr @__alloc_token_malloc(i64 noundef 8, i64 0)
int **a = (int **)malloc(sizeof(int*));
*a = nullptr;
return a;
}

// CHECK-LABEL: @_Z12test_new_intv(
int *test_new_int() {
// CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 4, i64 0){{.*}} !alloc_token_hint
return new int;
}

// CHECK-LABEL: @_Z20test_new_ulong_arrayv(
unsigned long *test_new_ulong_array() {
// CHECK: call {{.*}} ptr @__alloc_token_Znam(i64 noundef 80, i64 0){{.*}} !alloc_token_hint
return new unsigned long[10];
}

// CHECK-LABEL: @_Z12test_new_ptrv(
int **test_new_ptr() {
// CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 8, i64 1){{.*}} !alloc_token_hint
return new int*;
}

// CHECK-LABEL: @_Z18test_new_ptr_arrayv(
int **test_new_ptr_array() {
// CHECK: call {{.*}} ptr @__alloc_token_Znam(i64 noundef 80, i64 1){{.*}} !alloc_token_hint
return new int*[10];
}

struct ContainsPtr {
int a;
char *buf;
};

// CHECK-LABEL: @_Z27test_malloc_struct_with_ptrv(
ContainsPtr *test_malloc_struct_with_ptr() {
// FIXME: This should not be token ID 0!
// CHECK: call{{.*}} ptr @__alloc_token_malloc(i64 noundef 16, i64 0)
ContainsPtr *c = (ContainsPtr *)malloc(sizeof(ContainsPtr));
return c;
}

// CHECK-LABEL: @_Z33test_malloc_struct_array_with_ptrv(
ContainsPtr *test_malloc_struct_array_with_ptr() {
// FIXME: This should not be token ID 0!
// CHECK: call{{.*}} ptr @__alloc_token_malloc(i64 noundef 160, i64 0)
ContainsPtr *c = (ContainsPtr *)malloc(10 * sizeof(ContainsPtr));
return c;
}

// CHECK-LABEL: @_Z32test_operatornew_struct_with_ptrv(
ContainsPtr *test_operatornew_struct_with_ptr() {
// FIXME: This should not be token ID 0!
// CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 16, i64 0)
ContainsPtr *c = (ContainsPtr *)__builtin_operator_new(sizeof(ContainsPtr));
return c;
}

// CHECK-LABEL: @_Z38test_operatornew_struct_array_with_ptrv(
ContainsPtr *test_operatornew_struct_array_with_ptr() {
// FIXME: This should not be token ID 0!
// CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 160, i64 0)
ContainsPtr *c = (ContainsPtr *)__builtin_operator_new(10 * sizeof(ContainsPtr));
return c;
}

// CHECK-LABEL: @_Z33test_operatornew_struct_with_ptr2v(
ContainsPtr *test_operatornew_struct_with_ptr2() {
// FIXME: This should not be token ID 0!
// CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 16, i64 0)
ContainsPtr *c = (ContainsPtr *)__builtin_operator_new(sizeof(*c));
return c;
}

// CHECK-LABEL: @_Z39test_operatornew_struct_array_with_ptr2v(
ContainsPtr *test_operatornew_struct_array_with_ptr2() {
// FIXME: This should not be token ID 0!
// CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 160, i64 0)
ContainsPtr *c = (ContainsPtr *)__builtin_operator_new(10 * sizeof(*c));
return c;
}

// CHECK-LABEL: @_Z24test_new_struct_with_ptrv(
ContainsPtr *test_new_struct_with_ptr() {
// CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 16, i64 1){{.*}} !alloc_token_hint
return new ContainsPtr;
}

// CHECK-LABEL: @_Z30test_new_struct_array_with_ptrv(
ContainsPtr *test_new_struct_array_with_ptr() {
// CHECK: call {{.*}} ptr @__alloc_token_Znam(i64 noundef 160, i64 1){{.*}} !alloc_token_hint
return new ContainsPtr[10];
}

class TestClass {
public:
void Foo();
~TestClass();
int data[16];
};

// CHECK-LABEL: @_Z14test_new_classv(
TestClass *test_new_class() {
// CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 64, i64 0){{.*}} !alloc_token_hint
return new TestClass();
}

// CHECK-LABEL: @_Z20test_new_class_arrayv(
TestClass *test_new_class_array() {
// CHECK: call {{.*}} ptr @__alloc_token_Znam(i64 noundef 648, i64 0){{.*}} !alloc_token_hint
return new TestClass[10];
}

// Test that we detect that virtual classes have implicit vtable pointer.
class VirtualTestClass {
public:
virtual void Foo();
virtual ~VirtualTestClass();
int data[16];
};

// CHECK-LABEL: @_Z22test_new_virtual_classv(
VirtualTestClass *test_new_virtual_class() {
// CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 72, i64 1){{.*}} !alloc_token_hint
return new VirtualTestClass();
}

// CHECK-LABEL: @_Z28test_new_virtual_class_arrayv(
VirtualTestClass *test_new_virtual_class_array() {
// CHECK: call {{.*}} ptr @__alloc_token_Znam(i64 noundef 728, i64 1){{.*}} !alloc_token_hint
return new VirtualTestClass[10];
}

// uintptr_t is treated as a pointer.
struct MyStructUintptr {
int a;
uintptr_t ptr;
};

// CHECK-LABEL: @_Z18test_uintptr_isptrv(
MyStructUintptr *test_uintptr_isptr() {
// CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 16, i64 1)
return new MyStructUintptr;
}

using uptr = uintptr_t;
// CHECK-LABEL: @_Z19test_uintptr_isptr2v(
uptr *test_uintptr_isptr2() {
// CHECK: call {{.*}} ptr @__alloc_token_Znwm(i64 noundef 8, i64 1)
return new uptr;
}
57 changes: 51 additions & 6 deletions llvm/lib/Transforms/Instrumentation/AllocToken.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ enum class TokenMode : unsigned {
/// Token ID based on allocated type hash.
TypeHash = 2,

/// Token ID based on allocated type hash, where the top half ID-space is
/// reserved for types that contain pointers and the bottom half for types
/// that do not contain pointers.
TypeHashPointerSplit = 3,

// Mode count - keep last
ModeCount
};
Expand All @@ -89,7 +94,7 @@ struct ModeParser : public cl::parser<unsigned> {

cl::opt<unsigned, false, ModeParser>
ClMode("alloc-token-mode", cl::desc("Token assignment mode"), cl::Hidden,
cl::init(static_cast<unsigned>(TokenMode::TypeHash)));
cl::init(static_cast<unsigned>(TokenMode::TypeHashPointerSplit)));

cl::opt<std::string> ClFuncPrefix("alloc-token-prefix",
cl::desc("The allocation function prefix"),
Expand Down Expand Up @@ -142,16 +147,23 @@ STATISTIC(NumAllocations, "Allocations found");

/// Returns the !alloc_token_hint metadata if available.
///
/// Expected format is: !{<type-name>}
/// Expected format is: !{<type-name>, <contains-pointer>}
MDNode *getAllocTokenHintMetadata(const CallBase &CB) {
MDNode *Ret = CB.getMetadata(LLVMContext::MD_alloc_token_hint);
if (!Ret)
return nullptr;
assert(Ret->getNumOperands() == 1 && "bad !alloc_token_hint");
assert(Ret->getNumOperands() == 2 && "bad !alloc_token_hint");
assert(isa<MDString>(Ret->getOperand(0)));
assert(isa<ConstantAsMetadata>(Ret->getOperand(1)));
return Ret;
}

bool containsPointer(const MDNode *MD) {
ConstantAsMetadata *C = cast<ConstantAsMetadata>(MD->getOperand(1));
auto *CI = cast<ConstantInt>(C->getValue());
return CI->getValue().getBoolValue();
}

class ModeBase {
public:
explicit ModeBase(uint64_t MaxTokens) : MaxTokens(MaxTokens) {}
Expand Down Expand Up @@ -198,12 +210,20 @@ class TypeHashMode : public ModeBase {
using ModeBase::ModeBase;

uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
const auto [N, H] = getHash(CB, ORE);
return N ? boundedToken(H) : H;
}

protected:
std::pair<MDNode *, uint64_t> getHash(const CallBase &CB,
OptimizationRemarkEmitter &ORE) {
if (MDNode *N = getAllocTokenHintMetadata(CB)) {
MDString *S = cast<MDString>(N->getOperand(0));
return boundedToken(xxHash64(S->getString()));
return {N, xxHash64(S->getString())};
}
// Fallback.
remarkNoHint(CB, ORE);
return ClFallbackToken;
return {nullptr, ClFallbackToken};
}

/// Remark that there was no precise type information.
Expand All @@ -219,6 +239,26 @@ class TypeHashMode : public ModeBase {
}
};

/// Implementation for TokenMode::TypeHashPointerSplit.
class TypeHashPointerSplitMode : public TypeHashMode {
public:
using TypeHashMode::TypeHashMode;

uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
if (MaxTokens == 1)
return 0;
const uint64_t HalfTokens =
(MaxTokens ? MaxTokens : std::numeric_limits<uint64_t>::max()) / 2;
const auto [N, H] = getHash(CB, ORE);
if (!N)
return H; // fallback token
uint64_t Hash = H % HalfTokens; // base hash
if (containsPointer(N))
Hash += HalfTokens;
return Hash;
}
};

// Apply opt overrides.
AllocTokenOptions &&transformOptionsFromCl(AllocTokenOptions &&Opts) {
if (!Opts.MaxTokens.has_value())
Expand All @@ -244,6 +284,9 @@ class AllocToken {
case TokenMode::TypeHash:
Mode.emplace<TypeHashMode>(*Options.MaxTokens);
break;
case TokenMode::TypeHashPointerSplit:
Mode.emplace<TypeHashPointerSplitMode>(*Options.MaxTokens);
break;
case TokenMode::ModeCount:
llvm_unreachable("");
break;
Expand Down Expand Up @@ -281,7 +324,9 @@ class AllocToken {
// Cache for replacement functions.
DenseMap<std::pair<LibFunc, uint64_t>, FunctionCallee> TokenAllocFunctions;
// Selected mode.
std::variant<IncrementMode, RandomMode, TypeHashMode> Mode;
std::variant<IncrementMode, RandomMode, TypeHashMode,
TypeHashPointerSplitMode>
Mode;
};

bool AllocToken::instrumentFunction(Function &F) {
Expand Down
Loading