@@ -40,7 +40,7 @@ STATISTIC(DataMissCounter, "Number of data misses in the vocabulary");
4040// / IR2VecKind is used to specify the type of embeddings to generate.
4141// FIXME: Currently we support only Symbolic. Add support for
4242// Flow-aware in upcoming patches.
43- enum class IR2VecKind { Symbolic, Flowaware };
43+ enum class IR2VecKind { Symbolic, FlowAware };
4444
4545static cl::OptionCategory IR2VecAnalysisCategory (" IR2Vec Analysis Options" );
4646
@@ -49,7 +49,7 @@ cl::opt<IR2VecKind>
4949 cl::desc (" Choose type of embeddings to generate:" ),
5050 cl::values(clEnumValN(IR2VecKind::Symbolic, " symbolic" ,
5151 " Generates symbolic embeddings" ),
52- clEnumValN(IR2VecKind::Flowaware , " flowaware " ,
52+ clEnumValN(IR2VecKind::FlowAware , " flow-aware " ,
5353 " Generates flow-aware embeddings" )),
5454 cl::init(IR2VecKind::Symbolic), cl::cat(IR2VecAnalysisCategory));
5555
@@ -82,19 +82,19 @@ class Embeddings {
8282 // / in the IR instructions to generate the vector representation.
8383 // FIXME: Defaults to the values used in the original algorithm. Can be
8484 // parameterized later.
85- float WO = 1.0 , WT = 0.5 , WA = 0.2 ;
85+ const float OpcWeight = 1.0 , TypeWeight = 0.5 , ArgWeight = 0.2 ;
8686
8787 // / Dimension of the vector representation; captured from the input vocabulary
88- unsigned DIM = 300 ;
88+ const unsigned Dimension = 300 ;
8989
9090 // Utility maps - these are used to store the vector representations of
9191 // instructions, basic blocks and functions.
9292 Embedding FuncVector;
9393 SmallMapVector<const BasicBlock *, Embedding, 16 > BBVecMap;
9494 SmallMapVector<const Instruction *, Embedding, 128 > InstVecMap;
9595
96- Embeddings (const Function &F, const Vocab &Vocabulary, unsigned DIM )
97- : F(F), Vocabulary(Vocabulary), DIM(DIM ) {}
96+ Embeddings (const Function &F, const Vocab &Vocabulary, unsigned Dimension )
97+ : F(F), Vocabulary(Vocabulary), Dimension(Dimension ) {}
9898
9999 // / Lookup vocabulary for a given Key. If the key is not found, it returns a
100100 // / zero vector.
@@ -141,9 +141,9 @@ class Symbolic : public Embeddings {
141141 Embedding computeFunc2Vec ();
142142
143143public:
144- Symbolic (const Function &F, const Vocab &Vocabulary, unsigned DIM )
145- : Embeddings(F, Vocabulary, DIM ) {
146- FuncVector = Embedding (DIM , 0 );
144+ Symbolic (const Function &F, const Vocab &Vocabulary, unsigned Dimension )
145+ : Embeddings(F, Vocabulary, Dimension ) {
146+ FuncVector = Embedding (Dimension , 0 );
147147 }
148148 void computeEmbeddings () override ;
149149};
@@ -163,14 +163,15 @@ void addVectors(Embedding &Vec, const Embedding &Vec2) {
163163// FIXME: Currently lookups are string based. Use numeric Keys
164164// for efficiency.
165165Embedding Embeddings::lookupVocab (const std::string &Key) {
166- Embedding Vec (DIM , 0 );
166+ Embedding Vec (Dimension , 0 );
167167 // FIXME: Use zero vectors in vocab and assert failure for
168168 // unknown entities rather than silently returning zeroes here.
169- if (Vocabulary.find (Key) == Vocabulary.end ()) {
169+ auto It = Vocabulary.find (Key);
170+ if (It == Vocabulary.end ()) {
170171 LLVM_DEBUG (errs () << " cannot find key in map : " << Key << " \n " );
171- DataMissCounter++ ;
172+ ++DataMissCounter ;
172173 } else {
173- Vec = Vocabulary[Key] ;
174+ Vec = It-> second ;
174175 }
175176 return Vec;
176177}
@@ -179,24 +180,27 @@ void Symbolic::computeEmbeddings() {
179180 if (F.isDeclaration ())
180181 return ;
181182 for (auto &BB : F) {
182- auto It = BBVecMap.find (&BB);
183- if (It != BBVecMap. end () )
183+ auto Result = BBVecMap.try_emplace (&BB);
184+ if (!Result. second )
184185 continue ;
185- BBVecMap[&BB] = computeBB2Vec (BB);
186- addVectors (FuncVector, BBVecMap[&BB]);
186+ auto It = Result.first ;
187+ It->second = std::move (computeBB2Vec (BB));
188+ addVectors (FuncVector, It->second );
187189 }
188190}
189191
190192Embedding Symbolic::computeBB2Vec (const BasicBlock &BB) {
191- Embedding BBVector (DIM , 0 );
193+ Embedding BBVector (Dimension , 0 );
192194
193195 for (auto &I : BB) {
194- Embedding InstVector (DIM , 0 );
196+ Embedding InstVector (Dimension , 0 );
195197
196198 auto Vec = lookupVocab (I.getOpcodeName ());
197- scaleVector (Vec, WO );
199+ scaleVector (Vec, OpcWeight );
198200 addVectors (InstVector, Vec);
199201
202+ // FIXME: Currently lookups are string based. Use numeric Keys
203+ // for efficiency.
200204 auto Type = I.getType ();
201205 if (Type->isVoidTy ()) {
202206 Vec = lookupVocab (" voidTy" );
@@ -225,10 +229,10 @@ Embedding Symbolic::computeBB2Vec(const BasicBlock &BB) {
225229 } else {
226230 Vec = lookupVocab (" unknownTy" );
227231 }
228- scaleVector (Vec, WT );
232+ scaleVector (Vec, TypeWeight );
229233 addVectors (InstVector, Vec);
230234
231- for (auto &Op : I.operands ()) {
235+ for (const auto &Op : I.operands ()) {
232236 Embedding Vec;
233237 if (isa<Function>(Op)) {
234238 Vec = lookupVocab (" function" );
@@ -239,7 +243,7 @@ Embedding Symbolic::computeBB2Vec(const BasicBlock &BB) {
239243 } else {
240244 Vec = lookupVocab (" variable" );
241245 }
242- scaleVector (Vec, WA );
246+ scaleVector (Vec, ArgWeight );
243247 addVectors (InstVector, Vec);
244248 }
245249 InstVecMap[&I] = InstVector;
@@ -253,14 +257,19 @@ Embedding Symbolic::computeBB2Vec(const BasicBlock &BB) {
253257// IR2VecVocabResult and IR2VecVocabAnalysis
254258// ===----------------------------------------------------------------------===//
255259
256- IR2VecVocabResult::IR2VecVocabResult (ir2vec::Vocab &&Vocabulary, unsigned Dim )
257- : Vocabulary(std::move(Vocabulary)), Valid(true ), DIM(Dim) {}
260+ IR2VecVocabResult::IR2VecVocabResult (ir2vec::Vocab &&Vocabulary)
261+ : Vocabulary(std::move(Vocabulary)), Valid(true ) {}
258262
259263const ir2vec::Vocab &IR2VecVocabResult::getVocabulary () const {
260264 assert (Valid);
261265 return Vocabulary;
262266}
263267
268+ unsigned IR2VecVocabResult::getDimension () const {
269+ assert (Valid);
270+ return Vocabulary.begin ()->second .size ();
271+ }
272+
264273// For now, assume vocabulary is stable unless explicitly invalidated.
265274bool IR2VecVocabResult::invalidate (Module &M, const PreservedAnalyses &PA,
266275 ModuleAnalysisManager::Invalidator &Inv) {
@@ -269,7 +278,7 @@ bool IR2VecVocabResult::invalidate(Module &M, const PreservedAnalyses &PA,
269278}
270279
271280// FIXME: Make this optional. We can avoid file reads
272- // by auto-generating the vocabulary during the build time.
281+ // by auto-generating a default vocabulary during the build time.
273282Error IR2VecVocabAnalysis::readVocabulary () {
274283 auto BufOrError = MemoryBuffer::getFileOrSTDIN (VocabFile, /* IsText=*/ true );
275284 if (!BufOrError) {
@@ -295,7 +304,6 @@ Error IR2VecVocabAnalysis::readVocabulary() {
295304 return Entry.second .size () == Dim;
296305 }) &&
297306 " All vectors in the vocabulary are not of the same dimension" );
298- this ->DIM = Dim;
299307 return Error::success ();
300308}
301309
@@ -313,19 +321,19 @@ IR2VecVocabAnalysis::run(Module &M, ModuleAnalysisManager &AM) {
313321 });
314322 return IR2VecVocabResult ();
315323 }
316- return IR2VecVocabResult (std::move (Vocabulary), DIM );
324+ return IR2VecVocabResult (std::move (Vocabulary));
317325}
318326
319327// ==----------------------------------------------------------------------===//
320328// IR2VecResult and IR2VecAnalysis
321329// ===----------------------------------------------------------------------===//
322330
323331IR2VecResult::IR2VecResult (
324- SmallMapVector<const Instruction *, Embedding, 128 > &&InstMap,
325- SmallMapVector<const BasicBlock *, Embedding, 16 > &&BBMap,
326- Embedding &&FuncVector, unsigned Dim )
332+ const SmallMapVector<const Instruction *, Embedding, 128 > &&InstMap,
333+ const SmallMapVector<const BasicBlock *, Embedding, 16 > &&BBMap,
334+ const Embedding &&FuncVector)
327335 : InstVecMap(std::move(InstMap)), BBVecMap(std::move(BBMap)),
328- FuncVector(std::move(FuncVector)), DIM(Dim), Valid(true ) {}
336+ FuncVector(std::move(FuncVector)), Valid(true ) {}
329337
330338const SmallMapVector<const Instruction *, Embedding, 128 > &
331339IR2VecResult::getInstVecMap () const {
@@ -341,7 +349,6 @@ const Embedding &IR2VecResult::getFunctionVector() const {
341349 assert (Valid);
342350 return FuncVector;
343351}
344- unsigned IR2VecResult::getDimension () const { return DIM; }
345352
346353IR2VecAnalysis::Result IR2VecAnalysis::run (Function &F,
347354 FunctionAnalysisManager &FAM) {
@@ -365,19 +372,18 @@ IR2VecAnalysis::Result IR2VecAnalysis::run(Function &F,
365372 case IR2VecKind::Symbolic:
366373 Emb = std::make_unique<Symbolic>(F, Vocabulary, Dim);
367374 break ;
368- case IR2VecKind::Flowaware :
375+ case IR2VecKind::FlowAware :
369376 // FIXME: Add support for flow-aware embeddings
370- llvm_unreachable (" Flow-aware embeddings are not supported yet" );
371- break ;
372377 default :
373- llvm_unreachable (" Invalid IR2Vec mode" );
378+ Ctx->emitError (" Invalid IR2Vec mode" );
379+ return IR2VecResult ();
374380 }
381+
375382 Emb->computeEmbeddings ();
376- auto InstMap = Emb->getInstVecMap ();
377- auto BBMap = Emb->getBBVecMap ();
378- auto FuncVec = Emb->getFunctionVector ();
379- return IR2VecResult (std::move (InstMap), std::move (BBMap), std::move (FuncVec),
380- Dim);
383+ auto &InstMap = Emb->getInstVecMap ();
384+ auto &BBMap = Emb->getBBVecMap ();
385+ auto &FuncVec = Emb->getFunctionVector ();
386+ return IR2VecResult (std::move (InstMap), std::move (BBMap), std::move (FuncVec));
381387}
382388
383389// ==----------------------------------------------------------------------===//
0 commit comments