Skip to content

Commit 0775d5b

Browse files
committed
Minor refactoring to address review comments
1 parent bbbd6c9 commit 0775d5b

File tree

2 files changed

+54
-51
lines changed

2 files changed

+54
-51
lines changed

llvm/include/llvm/Analysis/IR2VecAnalysis.h

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ class IR2VecResult;
5353
/// mapping between an entity of the IR (like opcode, type, argument, etc.) and
5454
/// its corresponding embedding.
5555
class IR2VecVocabAnalysis : public AnalysisInfoMixin<IR2VecVocabAnalysis> {
56-
unsigned DIM = 0;
5756
ir2vec::Vocab Vocabulary;
5857
Error readVocabulary();
5958

@@ -67,16 +66,15 @@ class IR2VecVocabAnalysis : public AnalysisInfoMixin<IR2VecVocabAnalysis> {
6766
class IR2VecVocabResult {
6867
ir2vec::Vocab Vocabulary;
6968
bool Valid = false;
70-
unsigned DIM = 0;
7169

7270
public:
7371
IR2VecVocabResult() = default;
74-
IR2VecVocabResult(ir2vec::Vocab &&Vocabulary, unsigned Dim);
72+
IR2VecVocabResult(ir2vec::Vocab &&Vocabulary);
7573

7674
// Helper functions
7775
bool isValid() const { return Valid; }
7876
const ir2vec::Vocab &getVocabulary() const;
79-
unsigned getDimension() const { return DIM; }
77+
unsigned getDimension() const;
8078
bool invalidate(Module &M, const PreservedAnalyses &PA,
8179
ModuleAnalysisManager::Invalidator &Inv);
8280
};
@@ -85,23 +83,22 @@ class IR2VecResult {
8583
SmallMapVector<const Instruction *, ir2vec::Embedding, 128> InstVecMap;
8684
SmallMapVector<const BasicBlock *, ir2vec::Embedding, 16> BBVecMap;
8785
ir2vec::Embedding FuncVector;
88-
unsigned DIM = 0;
8986
bool Valid = false;
9087

9188
public:
9289
IR2VecResult() = default;
9390
IR2VecResult(
94-
SmallMapVector<const Instruction *, ir2vec::Embedding, 128> &&InstMap,
95-
SmallMapVector<const BasicBlock *, ir2vec::Embedding, 16> &&BBMap,
96-
ir2vec::Embedding &&FuncVector, unsigned Dim);
91+
const SmallMapVector<const Instruction *, ir2vec::Embedding, 128>
92+
&&InstMap,
93+
const SmallMapVector<const BasicBlock *, ir2vec::Embedding, 16> &&BBMap,
94+
const ir2vec::Embedding &&FuncVector);
9795
bool isValid() const { return Valid; }
9896

9997
const SmallMapVector<const Instruction *, ir2vec::Embedding, 128> &
10098
getInstVecMap() const;
10199
const SmallMapVector<const BasicBlock *, ir2vec::Embedding, 16> &
102100
getBBVecMap() const;
103101
const ir2vec::Embedding &getFunctionVector() const;
104-
unsigned getDimension() const;
105102
};
106103

107104
/// This analysis provides the IR2Vec embeddings for instructions, basic blocks,

llvm/lib/Analysis/IR2VecAnalysis.cpp

Lines changed: 48 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ STATISTIC(DataMissCounter, "Number of data misses in the vocabulary");
4040
/// IR2VecKind is used to specify the type of embeddings to generate.
4141
// FIXME: Currently we support only Symbolic. Add support for
4242
// Flow-aware in upcoming patches.
43-
enum class IR2VecKind { Symbolic, Flowaware };
43+
enum class IR2VecKind { Symbolic, FlowAware };
4444

4545
static cl::OptionCategory IR2VecAnalysisCategory("IR2Vec Analysis Options");
4646

@@ -49,7 +49,7 @@ cl::opt<IR2VecKind>
4949
cl::desc("Choose type of embeddings to generate:"),
5050
cl::values(clEnumValN(IR2VecKind::Symbolic, "symbolic",
5151
"Generates symbolic embeddings"),
52-
clEnumValN(IR2VecKind::Flowaware, "flowaware",
52+
clEnumValN(IR2VecKind::FlowAware, "flow-aware",
5353
"Generates flow-aware embeddings")),
5454
cl::init(IR2VecKind::Symbolic), cl::cat(IR2VecAnalysisCategory));
5555

@@ -82,19 +82,19 @@ class Embeddings {
8282
/// in the IR instructions to generate the vector representation.
8383
// FIXME: Defaults to the values used in the original algorithm. Can be
8484
// parameterized later.
85-
float WO = 1.0, WT = 0.5, WA = 0.2;
85+
const float OpcWeight = 1.0, TypeWeight = 0.5, ArgWeight = 0.2;
8686

8787
/// Dimension of the vector representation; captured from the input vocabulary
88-
unsigned DIM = 300;
88+
const unsigned Dimension = 300;
8989

9090
// Utility maps - these are used to store the vector representations of
9191
// instructions, basic blocks and functions.
9292
Embedding FuncVector;
9393
SmallMapVector<const BasicBlock *, Embedding, 16> BBVecMap;
9494
SmallMapVector<const Instruction *, Embedding, 128> InstVecMap;
9595

96-
Embeddings(const Function &F, const Vocab &Vocabulary, unsigned DIM)
97-
: F(F), Vocabulary(Vocabulary), DIM(DIM) {}
96+
Embeddings(const Function &F, const Vocab &Vocabulary, unsigned Dimension)
97+
: F(F), Vocabulary(Vocabulary), Dimension(Dimension) {}
9898

9999
/// Lookup vocabulary for a given Key. If the key is not found, it returns a
100100
/// zero vector.
@@ -141,9 +141,9 @@ class Symbolic : public Embeddings {
141141
Embedding computeFunc2Vec();
142142

143143
public:
144-
Symbolic(const Function &F, const Vocab &Vocabulary, unsigned DIM)
145-
: Embeddings(F, Vocabulary, DIM) {
146-
FuncVector = Embedding(DIM, 0);
144+
Symbolic(const Function &F, const Vocab &Vocabulary, unsigned Dimension)
145+
: Embeddings(F, Vocabulary, Dimension) {
146+
FuncVector = Embedding(Dimension, 0);
147147
}
148148
void computeEmbeddings() override;
149149
};
@@ -163,14 +163,15 @@ void addVectors(Embedding &Vec, const Embedding &Vec2) {
163163
// FIXME: Currently lookups are string based. Use numeric Keys
164164
// for efficiency.
165165
Embedding Embeddings::lookupVocab(const std::string &Key) {
166-
Embedding Vec(DIM, 0);
166+
Embedding Vec(Dimension, 0);
167167
// FIXME: Use zero vectors in vocab and assert failure for
168168
// unknown entities rather than silently returning zeroes here.
169-
if (Vocabulary.find(Key) == Vocabulary.end()) {
169+
auto It = Vocabulary.find(Key);
170+
if (It == Vocabulary.end()) {
170171
LLVM_DEBUG(errs() << "cannot find key in map : " << Key << "\n");
171-
DataMissCounter++;
172+
++DataMissCounter;
172173
} else {
173-
Vec = Vocabulary[Key];
174+
Vec = It->second;
174175
}
175176
return Vec;
176177
}
@@ -179,24 +180,27 @@ void Symbolic::computeEmbeddings() {
179180
if (F.isDeclaration())
180181
return;
181182
for (auto &BB : F) {
182-
auto It = BBVecMap.find(&BB);
183-
if (It != BBVecMap.end())
183+
auto Result = BBVecMap.try_emplace(&BB);
184+
if (!Result.second)
184185
continue;
185-
BBVecMap[&BB] = computeBB2Vec(BB);
186-
addVectors(FuncVector, BBVecMap[&BB]);
186+
auto It = Result.first;
187+
It->second = std::move(computeBB2Vec(BB));
188+
addVectors(FuncVector, It->second);
187189
}
188190
}
189191

190192
Embedding Symbolic::computeBB2Vec(const BasicBlock &BB) {
191-
Embedding BBVector(DIM, 0);
193+
Embedding BBVector(Dimension, 0);
192194

193195
for (auto &I : BB) {
194-
Embedding InstVector(DIM, 0);
196+
Embedding InstVector(Dimension, 0);
195197

196198
auto Vec = lookupVocab(I.getOpcodeName());
197-
scaleVector(Vec, WO);
199+
scaleVector(Vec, OpcWeight);
198200
addVectors(InstVector, Vec);
199201

202+
// FIXME: Currently lookups are string based. Use numeric Keys
203+
// for efficiency.
200204
auto Type = I.getType();
201205
if (Type->isVoidTy()) {
202206
Vec = lookupVocab("voidTy");
@@ -225,10 +229,10 @@ Embedding Symbolic::computeBB2Vec(const BasicBlock &BB) {
225229
} else {
226230
Vec = lookupVocab("unknownTy");
227231
}
228-
scaleVector(Vec, WT);
232+
scaleVector(Vec, TypeWeight);
229233
addVectors(InstVector, Vec);
230234

231-
for (auto &Op : I.operands()) {
235+
for (const auto &Op : I.operands()) {
232236
Embedding Vec;
233237
if (isa<Function>(Op)) {
234238
Vec = lookupVocab("function");
@@ -239,7 +243,7 @@ Embedding Symbolic::computeBB2Vec(const BasicBlock &BB) {
239243
} else {
240244
Vec = lookupVocab("variable");
241245
}
242-
scaleVector(Vec, WA);
246+
scaleVector(Vec, ArgWeight);
243247
addVectors(InstVector, Vec);
244248
}
245249
InstVecMap[&I] = InstVector;
@@ -253,14 +257,19 @@ Embedding Symbolic::computeBB2Vec(const BasicBlock &BB) {
253257
// IR2VecVocabResult and IR2VecVocabAnalysis
254258
//===----------------------------------------------------------------------===//
255259

256-
IR2VecVocabResult::IR2VecVocabResult(ir2vec::Vocab &&Vocabulary, unsigned Dim)
257-
: Vocabulary(std::move(Vocabulary)), Valid(true), DIM(Dim) {}
260+
IR2VecVocabResult::IR2VecVocabResult(ir2vec::Vocab &&Vocabulary)
261+
: Vocabulary(std::move(Vocabulary)), Valid(true) {}
258262

259263
const ir2vec::Vocab &IR2VecVocabResult::getVocabulary() const {
260264
assert(Valid);
261265
return Vocabulary;
262266
}
263267

268+
unsigned IR2VecVocabResult::getDimension() const {
269+
assert(Valid);
270+
return Vocabulary.begin()->second.size();
271+
}
272+
264273
// For now, assume vocabulary is stable unless explicitly invalidated.
265274
bool IR2VecVocabResult::invalidate(Module &M, const PreservedAnalyses &PA,
266275
ModuleAnalysisManager::Invalidator &Inv) {
@@ -269,7 +278,7 @@ bool IR2VecVocabResult::invalidate(Module &M, const PreservedAnalyses &PA,
269278
}
270279

271280
// FIXME: Make this optional. We can avoid file reads
272-
// by auto-generating the vocabulary during the build time.
281+
// by auto-generating a default vocabulary during the build time.
273282
Error IR2VecVocabAnalysis::readVocabulary() {
274283
auto BufOrError = MemoryBuffer::getFileOrSTDIN(VocabFile, /*IsText=*/true);
275284
if (!BufOrError) {
@@ -295,7 +304,6 @@ Error IR2VecVocabAnalysis::readVocabulary() {
295304
return Entry.second.size() == Dim;
296305
}) &&
297306
"All vectors in the vocabulary are not of the same dimension");
298-
this->DIM = Dim;
299307
return Error::success();
300308
}
301309

@@ -313,19 +321,19 @@ IR2VecVocabAnalysis::run(Module &M, ModuleAnalysisManager &AM) {
313321
});
314322
return IR2VecVocabResult();
315323
}
316-
return IR2VecVocabResult(std::move(Vocabulary), DIM);
324+
return IR2VecVocabResult(std::move(Vocabulary));
317325
}
318326

319327
// ==----------------------------------------------------------------------===//
320328
// IR2VecResult and IR2VecAnalysis
321329
//===----------------------------------------------------------------------===//
322330

323331
IR2VecResult::IR2VecResult(
324-
SmallMapVector<const Instruction *, Embedding, 128> &&InstMap,
325-
SmallMapVector<const BasicBlock *, Embedding, 16> &&BBMap,
326-
Embedding &&FuncVector, unsigned Dim)
332+
const SmallMapVector<const Instruction *, Embedding, 128> &&InstMap,
333+
const SmallMapVector<const BasicBlock *, Embedding, 16> &&BBMap,
334+
const Embedding &&FuncVector)
327335
: InstVecMap(std::move(InstMap)), BBVecMap(std::move(BBMap)),
328-
FuncVector(std::move(FuncVector)), DIM(Dim), Valid(true) {}
336+
FuncVector(std::move(FuncVector)), Valid(true) {}
329337

330338
const SmallMapVector<const Instruction *, Embedding, 128> &
331339
IR2VecResult::getInstVecMap() const {
@@ -341,7 +349,6 @@ const Embedding &IR2VecResult::getFunctionVector() const {
341349
assert(Valid);
342350
return FuncVector;
343351
}
344-
unsigned IR2VecResult::getDimension() const { return DIM; }
345352

346353
IR2VecAnalysis::Result IR2VecAnalysis::run(Function &F,
347354
FunctionAnalysisManager &FAM) {
@@ -365,19 +372,18 @@ IR2VecAnalysis::Result IR2VecAnalysis::run(Function &F,
365372
case IR2VecKind::Symbolic:
366373
Emb = std::make_unique<Symbolic>(F, Vocabulary, Dim);
367374
break;
368-
case IR2VecKind::Flowaware:
375+
case IR2VecKind::FlowAware:
369376
// FIXME: Add support for flow-aware embeddings
370-
llvm_unreachable("Flow-aware embeddings are not supported yet");
371-
break;
372377
default:
373-
llvm_unreachable("Invalid IR2Vec mode");
378+
Ctx->emitError("Invalid IR2Vec mode");
379+
return IR2VecResult();
374380
}
381+
375382
Emb->computeEmbeddings();
376-
auto InstMap = Emb->getInstVecMap();
377-
auto BBMap = Emb->getBBVecMap();
378-
auto FuncVec = Emb->getFunctionVector();
379-
return IR2VecResult(std::move(InstMap), std::move(BBMap), std::move(FuncVec),
380-
Dim);
383+
auto &InstMap = Emb->getInstVecMap();
384+
auto &BBMap = Emb->getBBVecMap();
385+
auto &FuncVec = Emb->getFunctionVector();
386+
return IR2VecResult(std::move(InstMap), std::move(BBMap), std::move(FuncVec));
381387
}
382388

383389
// ==----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)