Skip to content

Commit 657e5b5

Browse files
committed
Some minor refactoring
1 parent 9ba9971 commit 657e5b5

File tree

6 files changed

+133
-67
lines changed

6 files changed

+133
-67
lines changed

llvm/include/llvm/Analysis/IR2VecAnalysis.h

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,37 +41,37 @@ class Function;
4141

4242
namespace ir2vec {
4343
using Embedding = std::vector<double>;
44-
// ToDo: Current the keys are strings. This can be changed to
44+
// FIXME: Current the keys are strings. This can be changed to
4545
// use integers for cheaper lookups.
4646
using Vocab = std::map<std::string, Embedding>;
4747
} // namespace ir2vec
4848

49-
class VocabResult;
49+
class IR2VecVocabResult;
5050
class IR2VecResult;
5151

5252
/// This analysis provides the vocabulary for IR2Vec. The vocabulary provides a
5353
/// mapping between an entity of the IR (like opcode, type, argument, etc.) and
5454
/// its corresponding embedding.
55-
class VocabAnalysis : public AnalysisInfoMixin<VocabAnalysis> {
55+
class IR2VecVocabAnalysis : public AnalysisInfoMixin<IR2VecVocabAnalysis> {
5656
unsigned DIM = 0;
5757
ir2vec::Vocab Vocabulary;
5858
Error readVocabulary();
5959

6060
public:
6161
static AnalysisKey Key;
62-
VocabAnalysis() = default;
63-
using Result = VocabResult;
62+
IR2VecVocabAnalysis() = default;
63+
using Result = IR2VecVocabResult;
6464
Result run(Module &M, ModuleAnalysisManager &MAM);
6565
};
6666

67-
class VocabResult {
67+
class IR2VecVocabResult {
6868
ir2vec::Vocab Vocabulary;
6969
bool Valid = false;
7070
unsigned DIM = 0;
7171

7272
public:
73-
VocabResult() = default;
74-
VocabResult(const ir2vec::Vocab &Vocabulary, unsigned Dim);
73+
IR2VecVocabResult() = default;
74+
IR2VecVocabResult(ir2vec::Vocab &&Vocabulary, unsigned Dim);
7575

7676
// Helper functions
7777
bool isValid() const { return Valid; }
@@ -91,9 +91,9 @@ class IR2VecResult {
9191
public:
9292
IR2VecResult() = default;
9393
IR2VecResult(
94-
SmallMapVector<const Instruction *, ir2vec::Embedding, 128> InstMap,
95-
SmallMapVector<const BasicBlock *, ir2vec::Embedding, 16> BBMap,
96-
const ir2vec::Embedding &FuncVector, unsigned Dim);
94+
SmallMapVector<const Instruction *, ir2vec::Embedding, 128> &&InstMap,
95+
SmallMapVector<const BasicBlock *, ir2vec::Embedding, 16> &&BBMap,
96+
ir2vec::Embedding &&FuncVector, unsigned Dim);
9797
bool isValid() const { return Valid; }
9898

9999
const SmallMapVector<const Instruction *, ir2vec::Embedding, 128> &
@@ -107,9 +107,6 @@ class IR2VecResult {
107107
/// This analysis provides the IR2Vec embeddings for instructions, basic blocks,
108108
/// and functions.
109109
class IR2VecAnalysis : public AnalysisInfoMixin<IR2VecAnalysis> {
110-
bool Avg;
111-
float WO = 1, WT = 0.5, WA = 0.2;
112-
113110
public:
114111
IR2VecAnalysis() = default;
115112
static AnalysisKey Key;

llvm/lib/Analysis/IR2VecAnalysis.cpp

Lines changed: 54 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -31,35 +31,35 @@ using namespace ir2vec;
3131

3232
#define DEBUG_TYPE "ir2vec"
3333

34-
STATISTIC(dataMissCounter, "Number of data misses in the vocabulary");
34+
STATISTIC(DataMissCounter, "Number of data misses in the vocabulary");
3535

3636
/// IR2Vec computes two kinds of embeddings: Symbolic and Flow-aware.
3737
/// Symbolic embeddings capture the "syntactic" and "statistical correlation"
3838
/// of the IR entities. Flow-aware embeddings build on top of symbolic
3939
/// embeddings and additionally capture the flow information in the IR.
4040
/// IR2VecKind is used to specify the type of embeddings to generate.
41-
// ToDo: Currently we support only Symbolic.
42-
// We shall add support for Flow-aware in upcoming patches.
43-
enum IR2VecKind { symbolic, flowaware };
41+
// FIXME: Currently we support only Symbolic. Add support for
42+
// Flow-aware in upcoming patches.
43+
enum class IR2VecKind { Symbolic, Flowaware };
4444

4545
static cl::OptionCategory IR2VecAnalysisCategory("IR2Vec Analysis Options");
4646

4747
cl::opt<IR2VecKind>
4848
IR2VecMode("ir2vec-mode",
4949
cl::desc("Choose type of embeddings to generate:"),
50-
cl::values(clEnumValN(symbolic, "symbolic",
50+
cl::values(clEnumValN(IR2VecKind::Symbolic, "symbolic",
5151
"Generates symbolic embeddings"),
52-
clEnumValN(flowaware, "flowaware",
52+
clEnumValN(IR2VecKind::Flowaware, "flowaware",
5353
"Generates flow-aware embeddings")),
54-
cl::init(symbolic), cl::cat(IR2VecAnalysisCategory));
54+
cl::init(IR2VecKind::Symbolic), cl::cat(IR2VecAnalysisCategory));
5555

56-
// ToDo: Use a default vocab when not specified
56+
// FIXME: Use a default vocab when not specified
5757
static cl::opt<std::string>
5858
VocabFile("ir2vec-vocab-path", cl::Optional,
5959
cl::desc("Path to the vocabulary file for IR2Vec"), cl::init(""),
6060
cl::cat(IR2VecAnalysisCategory));
6161

62-
AnalysisKey VocabAnalysis::Key;
62+
AnalysisKey IR2VecVocabAnalysis::Key;
6363
AnalysisKey IR2VecAnalysis::Key;
6464

6565
// ==----------------------------------------------------------------------===//
@@ -80,7 +80,7 @@ class Embeddings {
8080

8181
/// Weights for different entities (like opcode, arguments, types)
8282
/// in the IR instructions to generate the vector representation.
83-
// ToDo: Defaults to the values used in the original algorithm. Can be
83+
// FIXME: Defaults to the values used in the original algorithm. Can be
8484
// parameterized later.
8585
float WO = 1.0, WT = 0.5, WA = 0.2;
8686

@@ -109,9 +109,6 @@ class Embeddings {
109109
/// embeddings being computed.
110110
virtual void computeEmbeddings() = 0;
111111

112-
/// Returns the dimension of the embedding vector.
113-
unsigned getDimension() const { return DIM; }
114-
115112
/// Returns a map containing instructions and the corresponding vector
116113
/// representations for a given module corresponding to the IR2Vec
117114
/// algorithm.
@@ -163,15 +160,15 @@ void addVectors(Embedding &Vec, const Embedding &Vec2) {
163160
std::plus<double>());
164161
}
165162

166-
// ToDo: Currently lookups are string based. Use numeric Keys
163+
// FIXME: Currently lookups are string based. Use numeric Keys
167164
// for efficiency.
168165
Embedding Embeddings::lookupVocab(const std::string &Key) {
169166
Embedding Vec(DIM, 0);
170-
// ToDo: Use zero vectors in vocab and assert failure for
167+
// FIXME: Use zero vectors in vocab and assert failure for
171168
// unknown entities rather than silently returning zeroes here.
172169
if (Vocabulary.find(Key) == Vocabulary.end()) {
173170
LLVM_DEBUG(errs() << "cannot find key in map : " << Key << "\n");
174-
dataMissCounter++;
171+
DataMissCounter++;
175172
} else {
176173
Vec = Vocabulary[Key];
177174
}
@@ -182,16 +179,15 @@ void Symbolic::computeEmbeddings() {
182179
if (F.isDeclaration())
183180
return;
184181
for (auto &BB : F) {
182+
auto It = BBVecMap.find(&BB);
183+
if (It != BBVecMap.end())
184+
continue;
185185
BBVecMap[&BB] = computeBB2Vec(BB);
186186
addVectors(FuncVector, BBVecMap[&BB]);
187187
}
188188
}
189189

190190
Embedding Symbolic::computeBB2Vec(const BasicBlock &BB) {
191-
auto It = BBVecMap.find(&BB);
192-
if (It != BBVecMap.end()) {
193-
return It->second;
194-
}
195191
Embedding BBVector(DIM, 0);
196192

197193
for (auto &I : BB) {
@@ -245,36 +241,36 @@ Embedding Symbolic::computeBB2Vec(const BasicBlock &BB) {
245241
}
246242
scaleVector(Vec, WA);
247243
addVectors(InstVector, Vec);
248-
InstVecMap[&I] = InstVector;
249244
}
245+
InstVecMap[&I] = InstVector;
250246
addVectors(BBVector, InstVector);
251247
}
252248
return BBVector;
253249
}
254250
} // namespace
255251

256252
// ==----------------------------------------------------------------------===//
257-
// VocabResult and VocabAnalysis
253+
// IR2VecVocabResult and IR2VecVocabAnalysis
258254
//===----------------------------------------------------------------------===//
259255

260-
VocabResult::VocabResult(const ir2vec::Vocab &Vocabulary, unsigned Dim)
256+
IR2VecVocabResult::IR2VecVocabResult(ir2vec::Vocab &&Vocabulary, unsigned Dim)
261257
: Vocabulary(std::move(Vocabulary)), Valid(true), DIM(Dim) {}
262258

263-
const ir2vec::Vocab &VocabResult::getVocabulary() const {
259+
const ir2vec::Vocab &IR2VecVocabResult::getVocabulary() const {
264260
assert(Valid);
265261
return Vocabulary;
266262
}
267263

268264
// For now, assume vocabulary is stable unless explicitly invalidated.
269-
bool VocabResult::invalidate(Module &M, const PreservedAnalyses &PA,
270-
ModuleAnalysisManager::Invalidator &Inv) {
271-
auto PAC = PA.getChecker<VocabAnalysis>();
265+
bool IR2VecVocabResult::invalidate(Module &M, const PreservedAnalyses &PA,
266+
ModuleAnalysisManager::Invalidator &Inv) {
267+
auto PAC = PA.getChecker<IR2VecVocabAnalysis>();
272268
return !(PAC.preservedWhenStateless());
273269
}
274270

275-
// ToDo: Make this optional. We can avoid file reads
271+
// FIXME: Make this optional. We can avoid file reads
276272
// by auto-generating the vocabulary during the build time.
277-
Error VocabAnalysis::readVocabulary() {
273+
Error IR2VecVocabAnalysis::readVocabulary() {
278274
auto BufOrError = MemoryBuffer::getFileOrSTDIN(VocabFile, /*IsText=*/true);
279275
if (!BufOrError) {
280276
return createFileError(VocabFile, BufOrError.getError());
@@ -303,27 +299,31 @@ Error VocabAnalysis::readVocabulary() {
303299
return Error::success();
304300
}
305301

306-
VocabAnalysis::Result VocabAnalysis::run(Module &M, ModuleAnalysisManager &AM) {
302+
IR2VecVocabAnalysis::Result
303+
IR2VecVocabAnalysis::run(Module &M, ModuleAnalysisManager &AM) {
304+
auto Ctx = &M.getContext();
307305
if (VocabFile.empty()) {
308-
// ToDo: Use default vocabulary
309-
errs() << "Error: IR2Vec vocabulary file path not specified.\n";
310-
return VocabResult(); // Return invalid result
306+
// FIXME: Use default vocabulary
307+
Ctx->emitError("IR2Vec vocabulary file path not specified");
308+
return IR2VecVocabResult(); // Return invalid result
311309
}
312-
313-
if (auto Err = readVocabulary())
314-
return VocabResult();
315-
316-
return VocabResult(std::move(Vocabulary), DIM);
310+
if (auto Err = readVocabulary()) {
311+
handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) {
312+
Ctx->emitError("Error reading vocabulary: " + EI.message());
313+
});
314+
return IR2VecVocabResult();
315+
}
316+
return IR2VecVocabResult(std::move(Vocabulary), DIM);
317317
}
318318

319319
// ==----------------------------------------------------------------------===//
320320
// IR2VecResult and IR2VecAnalysis
321321
//===----------------------------------------------------------------------===//
322322

323323
IR2VecResult::IR2VecResult(
324-
const SmallMapVector<const Instruction *, Embedding, 128> InstMap,
325-
const SmallMapVector<const BasicBlock *, Embedding, 16> BBMap,
326-
const Embedding &FuncVector, unsigned Dim)
324+
SmallMapVector<const Instruction *, Embedding, 128> &&InstMap,
325+
SmallMapVector<const BasicBlock *, Embedding, 16> &&BBMap,
326+
Embedding &&FuncVector, unsigned Dim)
327327
: InstVecMap(std::move(InstMap)), BBVecMap(std::move(BBMap)),
328328
FuncVector(std::move(FuncVector)), DIM(Dim), Valid(true) {}
329329

@@ -342,29 +342,31 @@ const Embedding &IR2VecResult::getFunctionVector() const {
342342
return FuncVector;
343343
}
344344
unsigned IR2VecResult::getDimension() const { return DIM; }
345+
345346
IR2VecAnalysis::Result IR2VecAnalysis::run(Function &F,
346347
FunctionAnalysisManager &FAM) {
347348
auto *VocabRes = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F)
348-
.getCachedResult<VocabAnalysis>(*F.getParent());
349+
.getCachedResult<IR2VecVocabAnalysis>(*F.getParent());
350+
auto Ctx = &F.getContext();
349351
if (!VocabRes->isValid()) {
350-
errs() << "Error: IR2Vec vocabulary is invalid.\n";
352+
Ctx->emitError("IR2Vec vocabulary is invalid");
351353
return IR2VecResult();
352354
}
353355

354356
auto Dim = VocabRes->getDimension();
355357
if (Dim <= 0) {
356-
errs() << "Error: IR2Vec vocabulary dimension is zero.\n";
358+
Ctx->emitError("IR2Vec vocabulary dimension is zero");
357359
return IR2VecResult();
358360
}
359361

360362
auto Vocabulary = VocabRes->getVocabulary();
361363
std::unique_ptr<Embeddings> Emb;
362364
switch (IR2VecMode) {
363-
case IR2VecKind::symbolic:
365+
case IR2VecKind::Symbolic:
364366
Emb = std::make_unique<Symbolic>(F, Vocabulary, Dim);
365367
break;
366-
case flowaware:
367-
// ToDo: Add support for flow-aware embeddings
368+
case IR2VecKind::Flowaware:
369+
// FIXME: Add support for flow-aware embeddings
368370
llvm_unreachable("Flow-aware embeddings are not supported yet");
369371
break;
370372
default:
@@ -391,16 +393,18 @@ void IR2VecPrinterPass::printVector(const Embedding &Vec) const {
391393

392394
PreservedAnalyses IR2VecPrinterPass::run(Module &M,
393395
ModuleAnalysisManager &MAM) {
394-
auto VocabResult = MAM.getResult<VocabAnalysis>(M);
395-
assert(VocabResult.isValid() && "Vocab is invalid");
396+
auto IR2VecVocabResult = MAM.getResult<IR2VecVocabAnalysis>(M);
397+
assert(IR2VecVocabResult.isValid() && "Vocab is invalid");
396398

397399
for (Function &F : M) {
398400
auto &FAM =
399401
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
400402

401403
auto IR2VecRes = FAM.getResult<IR2VecAnalysis>(F);
404+
402405
if (!IR2VecRes.isValid()) {
403-
errs() << "Error: IR2Vec embeddings are invalid.\n";
406+
auto Ctx = &F.getContext();
407+
Ctx->emitError("IR2Vec embeddings are invalid");
404408
return PreservedAnalyses::all();
405409
}
406410

0 commit comments

Comments
 (0)