Skip to content

Commit e0117a3

Browse files
committed
[FuzzMutate] RandomIRBuilder has more source and sink type now.
Source and Sink are required when generating a new instruction. (Term defined by previous author, in LLVM terms it's probably Use and User.) Previously, only instructions in the same block is considered when taking source and sink. In this patch, more source and sink types are considered. For source, we have SrcFromInstInCurBlock, FunctionArgument, InstInDominator, SrcFromGlobalVariable, and NewConstOrStack. For sink, we have SinkToInstInCurBlock, PointersInDominator, InstInDominatee, NewStore, and SinkToGlobalVariable. A unit test to make sure source always dominates an instruction, and the instruction always dominates the sink is included. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D139907
1 parent 6128bcd commit e0117a3

File tree

3 files changed

+541
-52
lines changed

3 files changed

+541
-52
lines changed

llvm/include/llvm/FuzzMutate/RandomIRBuilder.h

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,16 @@
1818
#include <random>
1919

2020
namespace llvm {
21+
class AllocaInst;
2122
class BasicBlock;
23+
class Function;
24+
class GlobalVariable;
2225
class Instruction;
2326
class LLVMContext;
2427
class Type;
2528
class Value;
29+
class Module;
30+
2631
namespace fuzzerop {
2732
class SourcePred;
2833
}
@@ -38,6 +43,23 @@ struct RandomIRBuilder {
3843

3944
// TODO: Try to make this a bit less of a random mishmash of functions.
4045

46+
/// Create a stack memory at the head of the function, store \c Init to the
47+
/// memory if provided.
48+
AllocaInst *createStackMemory(Function *F, Type *Ty, Value *Init = nullptr);
49+
/// Find or create a global variable. It will be initialized by random
50+
/// constants that satisfies \c Pred. It will also report whether this global
51+
/// variable found or created.
52+
std::pair<GlobalVariable *, bool>
53+
findOrCreateGlobalVariable(Module *M, ArrayRef<Value *> Srcs,
54+
fuzzerop::SourcePred Pred);
55+
enum SourceType {
56+
SrcFromInstInCurBlock,
57+
FunctionArgument,
58+
InstInDominator,
59+
SrcFromGlobalVariable,
60+
NewConstOrStack,
61+
EndOfValueSource,
62+
};
4163
/// Find a "source" for some operation, which will be used in one of the
4264
/// operation's operands. This either selects an instruction in \c Insts or
4365
/// returns some new arbitrary Value.
@@ -54,11 +76,22 @@ struct RandomIRBuilder {
5476
Value *newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
5577
ArrayRef<Value *> Srcs, fuzzerop::SourcePred Pred,
5678
bool allowConstant = true);
79+
80+
enum SinkType {
81+
/// TODO: Also consider pointers in function argument.
82+
SinkToInstInCurBlock,
83+
PointersInDominator,
84+
InstInDominatee,
85+
NewStore,
86+
SinkToGlobalVariable,
87+
EndOfValueSink,
88+
};
5789
/// Find a viable user for \c V in \c Insts, which should all be contained in
5890
/// \c BB. This may also create some new instruction in \c BB and use that.
59-
void connectToSink(BasicBlock &BB, ArrayRef<Instruction *> Insts, Value *V);
91+
Instruction *connectToSink(BasicBlock &BB, ArrayRef<Instruction *> Insts,
92+
Value *V);
6093
/// Create a user for \c V in \c BB.
61-
void newSink(BasicBlock &BB, ArrayRef<Instruction *> Insts, Value *V);
94+
Instruction *newSink(BasicBlock &BB, ArrayRef<Instruction *> Insts, Value *V);
6295
Value *findPointer(BasicBlock &BB, ArrayRef<Instruction *> Insts,
6396
ArrayRef<Value *> Srcs, fuzzerop::SourcePred Pred);
6497
Type *chooseType(LLVMContext &Context, ArrayRef<Value *> Srcs,

llvm/lib/FuzzMutate/RandomIRBuilder.cpp

Lines changed: 241 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,89 @@
1313
#include "llvm/IR/BasicBlock.h"
1414
#include "llvm/IR/Constants.h"
1515
#include "llvm/IR/DataLayout.h"
16+
#include "llvm/IR/Dominators.h"
17+
#include "llvm/IR/Function.h"
1618
#include "llvm/IR/Instructions.h"
1719
#include "llvm/IR/IntrinsicInst.h"
20+
#include "llvm/IR/Module.h"
1821

1922
using namespace llvm;
2023
using namespace fuzzerop;
2124

25+
/// Return a vector of Blocks that dominates this block, excluding current
26+
/// block.
27+
static std::vector<BasicBlock *> getDominators(BasicBlock *BB) {
28+
std::vector<BasicBlock *> ret;
29+
DominatorTree DT(*BB->getParent());
30+
DomTreeNode *Node = DT[BB]->getIDom();
31+
while (Node && Node->getBlock()) {
32+
ret.push_back(Node->getBlock());
33+
// Get parent block.
34+
Node = Node->getIDom();
35+
}
36+
return ret;
37+
}
38+
39+
/// Return a vector of Blocks that is dominated by this block, excluding current
40+
/// block
41+
static std::vector<BasicBlock *> getDominatees(BasicBlock *BB) {
42+
DominatorTree DT(*BB->getParent());
43+
std::vector<BasicBlock *> ret;
44+
for (DomTreeNode *Child : DT[BB]->children())
45+
ret.push_back(Child->getBlock());
46+
uint64_t Idx = 0;
47+
while (Idx < ret.size()) {
48+
DomTreeNode *Node = DT[ret[Idx]];
49+
Idx++;
50+
for (DomTreeNode *Child : Node->children())
51+
ret.push_back(Child->getBlock());
52+
}
53+
return ret;
54+
}
55+
56+
AllocaInst *RandomIRBuilder::createStackMemory(Function *F, Type *Ty,
57+
Value *Init) {
58+
/// TODO: For all Allocas, maybe allocate an array.
59+
BasicBlock *EntryBB = &F->getEntryBlock();
60+
DataLayout DL(F->getParent());
61+
AllocaInst *Alloca = new AllocaInst(Ty, DL.getAllocaAddrSpace(), "A",
62+
&*EntryBB->getFirstInsertionPt());
63+
if (Init)
64+
new StoreInst(Init, Alloca, Alloca->getNextNode());
65+
return Alloca;
66+
}
67+
68+
std::pair<GlobalVariable *, bool>
69+
RandomIRBuilder::findOrCreateGlobalVariable(Module *M, ArrayRef<Value *> Srcs,
70+
fuzzerop::SourcePred Pred) {
71+
auto MatchesPred = [&Srcs, &Pred](GlobalVariable *GV) {
72+
// Can't directly compare GV's type, as it would be a pointer to the actual
73+
// type.
74+
return Pred.matches(Srcs, UndefValue::get(GV->getValueType()));
75+
};
76+
bool DidCreate = false;
77+
SmallVector<GlobalVariable *, 4> GlobalVars;
78+
for (GlobalVariable &GV : M->globals()) {
79+
GlobalVars.push_back(&GV);
80+
}
81+
auto RS = makeSampler(Rand, make_filter_range(GlobalVars, MatchesPred));
82+
RS.sample(nullptr, 1);
83+
GlobalVariable *GV = RS.getSelection();
84+
if (!GV) {
85+
DidCreate = true;
86+
using LinkageTypes = GlobalVariable::LinkageTypes;
87+
auto TRS = makeSampler<Constant *>(Rand);
88+
TRS.sample(Pred.generate(Srcs, KnownTypes));
89+
Constant *Init = TRS.getSelection();
90+
Type *Ty = Init->getType();
91+
GV = new GlobalVariable(*M, Ty, false, LinkageTypes::ExternalLinkage, Init,
92+
"G", nullptr,
93+
GlobalValue::ThreadLocalMode::NotThreadLocal,
94+
M->getDataLayout().getDefaultGlobalsAddressSpace());
95+
}
96+
return {GV, DidCreate};
97+
}
98+
2299
Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
23100
ArrayRef<Instruction *> Insts) {
24101
return findOrCreateSource(BB, Insts, {}, anyType());
@@ -29,15 +106,83 @@ Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
29106
ArrayRef<Value *> Srcs,
30107
SourcePred Pred,
31108
bool allowConstant) {
32-
auto MatchesPred = [&Srcs, &Pred](Instruction *Inst) {
33-
return Pred.matches(Srcs, Inst);
34-
};
35-
auto RS = makeSampler(Rand, make_filter_range(Insts, MatchesPred));
36-
// Also consider choosing no source, meaning we want a new one.
37-
RS.sample(nullptr, /*Weight=*/1);
38-
if (Instruction *Src = RS.getSelection())
39-
return Src;
40-
return newSource(BB, Insts, Srcs, Pred, allowConstant);
109+
auto MatchesPred = [&Srcs, &Pred](Value *V) { return Pred.matches(Srcs, V); };
110+
SmallVector<uint64_t, 8> SrcTys;
111+
for (uint64_t i = 0; i < EndOfValueSource; i++)
112+
SrcTys.push_back(i);
113+
std::shuffle(SrcTys.begin(), SrcTys.end(), Rand);
114+
for (uint64_t SrcTy : SrcTys) {
115+
switch (SrcTy) {
116+
case SrcFromInstInCurBlock: {
117+
auto RS = makeSampler(Rand, make_filter_range(Insts, MatchesPred));
118+
if (!RS.isEmpty()) {
119+
return RS.getSelection();
120+
}
121+
break;
122+
}
123+
case FunctionArgument: {
124+
Function *F = BB.getParent();
125+
SmallVector<Argument *, 8> Args;
126+
for (uint64_t i = 0; i < F->arg_size(); i++) {
127+
Args.push_back(F->getArg(i));
128+
}
129+
auto RS = makeSampler(Rand, make_filter_range(Args, MatchesPred));
130+
if (!RS.isEmpty()) {
131+
return RS.getSelection();
132+
}
133+
break;
134+
}
135+
case InstInDominator: {
136+
auto Dominators = getDominators(&BB);
137+
std::shuffle(Dominators.begin(), Dominators.end(), Rand);
138+
for (BasicBlock *Dom : Dominators) {
139+
SmallVector<Instruction *, 16> Instructions;
140+
for (Instruction &I : *Dom) {
141+
Instructions.push_back(&I);
142+
}
143+
auto RS =
144+
makeSampler(Rand, make_filter_range(Instructions, MatchesPred));
145+
// Also consider choosing no source, meaning we want a new one.
146+
if (!RS.isEmpty()) {
147+
return RS.getSelection();
148+
}
149+
}
150+
break;
151+
}
152+
case SrcFromGlobalVariable: {
153+
Module *M = BB.getParent()->getParent();
154+
auto [GV, DidCreate] = findOrCreateGlobalVariable(M, Srcs, Pred);
155+
Type *Ty = GV->getValueType();
156+
LoadInst *LoadGV = nullptr;
157+
if (BB.getTerminator()) {
158+
LoadGV = new LoadInst(Ty, GV, "LGV", &*BB.getFirstInsertionPt());
159+
} else {
160+
LoadGV = new LoadInst(Ty, GV, "LGV", &BB);
161+
}
162+
// Because we might be generating new values, we have to check if it
163+
// matches again.
164+
if (DidCreate) {
165+
if (Pred.matches(Srcs, LoadGV)) {
166+
return LoadGV;
167+
}
168+
LoadGV->eraseFromParent();
169+
// If no one is using this GlobalVariable, delete it too.
170+
if (GV->use_empty()) {
171+
GV->eraseFromParent();
172+
}
173+
}
174+
break;
175+
}
176+
case NewConstOrStack: {
177+
return newSource(BB, Insts, Srcs, Pred, allowConstant);
178+
}
179+
default:
180+
case EndOfValueSource: {
181+
llvm_unreachable("EndOfValueSource executed");
182+
}
183+
}
184+
}
185+
llvm_unreachable("Can't find a source");
41186
}
42187

43188
Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
@@ -76,12 +221,7 @@ Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
76221
if (!allowConstant && isa<Constant>(newSrc)) {
77222
Type *Ty = newSrc->getType();
78223
Function *F = BB.getParent();
79-
BasicBlock *EntryBB = &F->getEntryBlock();
80-
/// TODO: For all Allocas, maybe allocate an array.
81-
DataLayout DL(BB.getParent()->getParent());
82-
AllocaInst *Alloca = new AllocaInst(Ty, DL.getProgramAddressSpace(), "A",
83-
EntryBB->getTerminator());
84-
new StoreInst(newSrc, Alloca, EntryBB->getTerminator());
224+
AllocaInst *Alloca = createStackMemory(F, Ty, newSrc);
85225
if (BB.getTerminator()) {
86226
newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", BB.getTerminator());
87227
} else {
@@ -119,48 +259,106 @@ static bool isCompatibleReplacement(const Instruction *I, const Use &Operand,
119259
if (OperandNo >= 1)
120260
return false;
121261
break;
262+
case Instruction::Call:
263+
case Instruction::Invoke:
264+
case Instruction::CallBr: {
265+
const CallBase *II = cast<CallBase>(I);
266+
const Function *Callee = II->getCalledFunction();
267+
return !Callee->hasParamAttribute(OperandNo, Attribute::ImmArg);
268+
}
122269
default:
123270
break;
124271
}
125272
return true;
126273
}
127274

128-
void RandomIRBuilder::connectToSink(BasicBlock &BB,
129-
ArrayRef<Instruction *> Insts, Value *V) {
130-
auto RS = makeSampler<Use *>(Rand);
131-
for (auto &I : Insts) {
132-
if (isa<IntrinsicInst>(I))
133-
// TODO: Replacing operands of intrinsics would be interesting, but
134-
// there's no easy way to verify that a given replacement is valid given
135-
// that intrinsics can impose arbitrary constraints.
136-
continue;
137-
for (Use &U : I->operands())
138-
if (isCompatibleReplacement(I, U, V))
139-
RS.sample(&U, 1);
140-
}
141-
// Also consider choosing no sink, meaning we want a new one.
142-
RS.sample(nullptr, /*Weight=*/1);
143-
144-
if (Use *Sink = RS.getSelection()) {
145-
User *U = Sink->getUser();
146-
unsigned OpNo = Sink->getOperandNo();
147-
U->setOperand(OpNo, V);
148-
return;
275+
Instruction *RandomIRBuilder::connectToSink(BasicBlock &BB,
276+
ArrayRef<Instruction *> Insts,
277+
Value *V) {
278+
SmallVector<uint64_t, 8> SinkTys;
279+
for (uint64_t i = 0; i < EndOfValueSink; i++)
280+
SinkTys.push_back(i);
281+
std::shuffle(SinkTys.begin(), SinkTys.end(), Rand);
282+
auto findSinkAndConnect =
283+
[this, V](ArrayRef<Instruction *> Instructions) -> Instruction * {
284+
auto RS = makeSampler<Use *>(Rand);
285+
for (auto &I : Instructions) {
286+
for (Use &U : I->operands())
287+
if (isCompatibleReplacement(I, U, V))
288+
RS.sample(&U, 1);
289+
}
290+
if (!RS.isEmpty()) {
291+
Use *Sink = RS.getSelection();
292+
User *U = Sink->getUser();
293+
unsigned OpNo = Sink->getOperandNo();
294+
U->setOperand(OpNo, V);
295+
return cast<Instruction>(U);
296+
}
297+
return nullptr;
298+
};
299+
Instruction *Sink = nullptr;
300+
for (uint64_t SinkTy : SinkTys) {
301+
switch (SinkTy) {
302+
case SinkToInstInCurBlock:
303+
Sink = findSinkAndConnect(Insts);
304+
if (Sink)
305+
return Sink;
306+
break;
307+
case PointersInDominator: {
308+
auto Dominators = getDominators(&BB);
309+
std::shuffle(Dominators.begin(), Dominators.end(), Rand);
310+
for (BasicBlock *Dom : Dominators) {
311+
for (Instruction &I : *Dom) {
312+
if (PointerType *PtrTy = dyn_cast<PointerType>(I.getType()))
313+
return new StoreInst(V, &I, Insts.back());
314+
}
315+
}
316+
break;
317+
}
318+
case InstInDominatee: {
319+
auto Dominatees = getDominatees(&BB);
320+
std::shuffle(Dominatees.begin(), Dominatees.end(), Rand);
321+
for (BasicBlock *Dominee : Dominatees) {
322+
std::vector<Instruction *> Instructions;
323+
for (Instruction &I : *Dominee)
324+
Instructions.push_back(&I);
325+
Sink = findSinkAndConnect(Instructions);
326+
if (Sink) {
327+
return Sink;
328+
}
329+
}
330+
break;
331+
}
332+
case NewStore:
333+
/// TODO: allocate a new stack memory.
334+
return newSink(BB, Insts, V);
335+
case SinkToGlobalVariable: {
336+
Module *M = BB.getParent()->getParent();
337+
auto [GV, DidCreate] =
338+
findOrCreateGlobalVariable(M, {}, fuzzerop::onlyType(V->getType()));
339+
return new StoreInst(V, GV, Insts.back());
340+
}
341+
case EndOfValueSink:
342+
default:
343+
llvm_unreachable("EndOfValueSink executed");
344+
}
149345
}
150-
newSink(BB, Insts, V);
346+
llvm_unreachable("Can't find a sink");
151347
}
152348

153-
void RandomIRBuilder::newSink(BasicBlock &BB, ArrayRef<Instruction *> Insts,
154-
Value *V) {
349+
Instruction *RandomIRBuilder::newSink(BasicBlock &BB,
350+
ArrayRef<Instruction *> Insts, Value *V) {
155351
Value *Ptr = findPointer(BB, Insts, {V}, matchFirstType());
156352
if (!Ptr) {
157-
if (uniform(Rand, 0, 1))
158-
Ptr = new AllocaInst(V->getType(), 0, "A", &*BB.getFirstInsertionPt());
159-
else
353+
if (uniform(Rand, 0, 1)) {
354+
Type *Ty = V->getType();
355+
Ptr = createStackMemory(BB.getParent(), Ty, UndefValue::get(Ty));
356+
} else {
160357
Ptr = UndefValue::get(PointerType::get(V->getType(), 0));
358+
}
161359
}
162360

163-
new StoreInst(V, Ptr, Insts.back());
361+
return new StoreInst(V, Ptr, Insts.back());
164362
}
165363

166364
Value *RandomIRBuilder::findPointer(BasicBlock &BB,

0 commit comments

Comments
 (0)