diff --git a/llvm/docs/CommandGuide/llvm-ir2vec.rst b/llvm/docs/CommandGuide/llvm-ir2vec.rst index 0c9fb6e94b6f3..fc590a6180316 100644 --- a/llvm/docs/CommandGuide/llvm-ir2vec.rst +++ b/llvm/docs/CommandGuide/llvm-ir2vec.rst @@ -13,7 +13,9 @@ DESCRIPTION :program:`llvm-ir2vec` is a standalone command-line tool for IR2Vec. It generates IR2Vec embeddings for LLVM IR and supports triplet generation -for vocabulary training. The tool provides three main subcommands: +for vocabulary training. + +The tool provides three main subcommands: 1. **triplets**: Generates numeric triplets in train2id format for vocabulary training from LLVM IR. @@ -93,7 +95,7 @@ Example Usage: .. code-block:: bash - llvm-ir2vec embeddings --ir2vec-vocab-path=vocab.json --level=func input.bc -o embeddings.txt + llvm-ir2vec embeddings --ir2vec-vocab-path=vocab.json --ir2vec-kind=symbolic --level=func input.bc -o embeddings.txt OPTIONS ------- @@ -129,6 +131,16 @@ Subcommand-specific options: Process only the specified function instead of all functions in the module. +.. option:: --ir2vec-kind= + + Specify the kind of IR2Vec embeddings to generate. Valid values are: + + * ``symbolic`` - Generate symbolic embeddings (default) + * ``flow-aware`` - Generate flow-aware embeddings + + Flow-aware embeddings consider control flow relationships between instructions, + while symbolic embeddings focus on the symbolic representation of instructions. + .. option:: --ir2vec-vocab-path= Specify the path to the vocabulary file (required for embedding generation). diff --git a/llvm/test/tools/llvm-ir2vec/embeddings-flowaware.ll b/llvm/test/tools/llvm-ir2vec/embeddings-flowaware.ll new file mode 100644 index 0000000000000..b2362f83caf4f --- /dev/null +++ b/llvm/test/tools/llvm-ir2vec/embeddings-flowaware.ll @@ -0,0 +1,73 @@ +; RUN: llvm-ir2vec embeddings --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-DEFAULT +; RUN: llvm-ir2vec embeddings --level=func --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-FUNC-LEVEL +; RUN: llvm-ir2vec embeddings --level=func --function=abc --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-FUNC-LEVEL-ABC +; RUN: not llvm-ir2vec embeddings --level=func --function=def --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s 2>&1 | FileCheck %s -check-prefix=CHECK-FUNC-DEF +; RUN: llvm-ir2vec embeddings --level=bb --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-BB-LEVEL +; RUN: llvm-ir2vec embeddings --level=bb --function=abc_repeat --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-BB-LEVEL-ABC-REPEAT +; RUN: llvm-ir2vec embeddings --level=inst --function=abc_repeat --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-INST-LEVEL-ABC-REPEAT + +define dso_local noundef float @abc(i32 noundef %a, float noundef %b) #0 { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca float, align 4 + store i32 %a, ptr %a.addr, align 4 + store float %b, ptr %b.addr, align 4 + %0 = load i32, ptr %a.addr, align 4 + %1 = load i32, ptr %a.addr, align 4 + %mul = mul nsw i32 %0, %1 + %conv = sitofp i32 %mul to float + %2 = load float, ptr %b.addr, align 4 + %add = fadd float %conv, %2 + ret float %add +} + +define dso_local noundef float @abc_repeat(i32 noundef %a, float noundef %b) #0 { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca float, align 4 + store i32 %a, ptr %a.addr, align 4 + store float %b, ptr %b.addr, align 4 + %0 = load i32, ptr %a.addr, align 4 + %1 = load i32, ptr %a.addr, align 4 + %mul = mul nsw i32 %0, %1 + %conv = sitofp i32 %mul to float + %2 = load float, ptr %b.addr, align 4 + %add = fadd float %conv, %2 + ret float %add +} + +; CHECK-DEFAULT: Function: abc +; CHECK-DEFAULT-NEXT: [ 3630.00 3672.00 3714.00 ] +; CHECK-DEFAULT-NEXT: Function: abc_repeat +; CHECK-DEFAULT-NEXT: [ 3630.00 3672.00 3714.00 ] + +; CHECK-FUNC-LEVEL: Function: abc +; CHECK-FUNC-LEVEL-NEXT: [ 3630.00 3672.00 3714.00 ] +; CHECK-FUNC-LEVEL-NEXT: Function: abc_repeat +; CHECK-FUNC-LEVEL-NEXT: [ 3630.00 3672.00 3714.00 ] + +; CHECK-FUNC-LEVEL-ABC: Function: abc +; CHECK-FUNC-LEVEL-NEXT-ABC: [ 3630.00 3672.00 3714.00 ] + +; CHECK-FUNC-DEF: Error: Function 'def' not found + +; CHECK-BB-LEVEL: Function: abc +; CHECK-BB-LEVEL-NEXT: entry: [ 3630.00 3672.00 3714.00 ] +; CHECK-BB-LEVEL-NEXT: Function: abc_repeat +; CHECK-BB-LEVEL-NEXT: entry: [ 3630.00 3672.00 3714.00 ] + +; CHECK-BB-LEVEL-ABC-REPEAT: Function: abc_repeat +; CHECK-BB-LEVEL-ABC-REPEAT-NEXT: entry: [ 3630.00 3672.00 3714.00 ] + +; CHECK-INST-LEVEL-ABC-REPEAT: Function: abc_repeat +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %a.addr = alloca i32, align 4 [ 91.00 92.00 93.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %b.addr = alloca float, align 4 [ 91.00 92.00 93.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: store i32 %a, ptr %a.addr, align 4 [ 188.00 190.00 192.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: store float %b, ptr %b.addr, align 4 [ 188.00 190.00 192.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %0 = load i32, ptr %a.addr, align 4 [ 185.00 187.00 189.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %1 = load i32, ptr %a.addr, align 4 [ 185.00 187.00 189.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %mul = mul nsw i32 %0, %1 [ 419.00 424.00 429.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %conv = sitofp i32 %mul to float [ 549.00 555.00 561.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %2 = load float, ptr %b.addr, align 4 [ 185.00 187.00 189.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %add = fadd float %conv, %2 [ 774.00 783.00 792.00 ] +; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: ret float %add [ 775.00 785.00 795.00 ] diff --git a/llvm/test/tools/llvm-ir2vec/embeddings.ll b/llvm/test/tools/llvm-ir2vec/embeddings-symbolic.ll similarity index 100% rename from llvm/test/tools/llvm-ir2vec/embeddings.ll rename to llvm/test/tools/llvm-ir2vec/embeddings-symbolic.ll diff --git a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp index 8e17a4a3ab53d..8f8b4e2f2bda8 100644 --- a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp +++ b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp @@ -25,9 +25,11 @@ /// 3. Embedding Generation (embeddings): /// Generates IR2Vec embeddings using a trained vocabulary. /// Usage: llvm-ir2vec embeddings --ir2vec-vocab-path=vocab.json -/// --level=func input.bc -o embeddings.txt Levels: --level=inst -/// (instructions), --level=bb (basic blocks), --level=func (functions) -/// (See IR2Vec.cpp for more embedding generation options) +/// --ir2vec-kind= --level= input.bc -o embeddings.txt +/// Kind: --ir2vec-kind=symbolic (default), --ir2vec-kind=flow-aware +/// Levels: --level=inst (instructions), --level=bb (basic blocks), +/// --level=func (functions) (See IR2Vec.cpp for more embedding generation +/// options) /// //===----------------------------------------------------------------------===// @@ -243,7 +245,7 @@ class IR2VecTool { // Create embedder for this function assert(Vocab->isValid() && "Vocabulary is not valid"); - auto Emb = Embedder::create(IR2VecKind::Symbolic, F, *Vocab); + auto Emb = Embedder::create(IR2VecEmbeddingKind, F, *Vocab); if (!Emb) { OS << "Error: Failed to create embedder for function " << F.getName() << "\n";