From 4acd12343dbe3572d9ffb9be55b4bccd522b76ca Mon Sep 17 00:00:00 2001 From: Anastasiya Chernikova Date: Wed, 23 Oct 2024 16:14:28 +0300 Subject: [PATCH 1/3] [Exegesis] Implemented strategy for load operation This fix helps to map operand memory to destination registers. If instruction is load, we can self-alias it in case when instruction overrides whole address register. For that we use provided scratch memory. --- llvm/tools/llvm-exegesis/lib/CodeTemplate.h | 3 ++ .../lib/SerialSnippetGenerator.cpp | 46 ++++++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/llvm/tools/llvm-exegesis/lib/CodeTemplate.h b/llvm/tools/llvm-exegesis/lib/CodeTemplate.h index 7aca224302a1f..8c32c8d26fc54 100644 --- a/llvm/tools/llvm-exegesis/lib/CodeTemplate.h +++ b/llvm/tools/llvm-exegesis/lib/CodeTemplate.h @@ -132,6 +132,9 @@ struct CodeTemplate { // If the template uses the provided scratch memory, the register in which // the pointer to this memory is passed in to the function. unsigned ScratchSpacePointerInReg = 0; + // Require to pre-store value of a given register (fisrt) + // to scratch memory with given offset (second) + SmallVector, 2> PreinitScratchMemory; #if defined(__GNUC__) && (defined(__clang__) || LLVM_GNUC_PREREQ(8, 0, 0)) // FIXME: GCC7 bug workaround. Drop #if after GCC7 no longer supported. diff --git a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp index 25cdf1ce66d44..4cd316c9b606f 100644 --- a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp +++ b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp @@ -104,7 +104,51 @@ static void appendCodeTemplates(const LLVMState &State, } case ExecutionMode::SERIAL_VIA_MEMORY_INSTR: { // Select back-to-back memory instruction. - // TODO: Implement me. + + auto &I = Variant.getInstr(); + if (I.Description.mayLoad()) { + // If instruction is load, we can self-alias it in case when instruction + // overrides whole address register. For that we use provided scratch + // memory. + + // TODO: now it is not checked if load writes the whole register. + + auto DefOpIt = find_if(I.Operands, [](Operand const &op) { + return op.isDef() && op.isReg(); + }); + + if (DefOpIt == I.Operands.end()) + return; + + const Operand &DefOp = *DefOpIt; + auto &ET = State.getExegesisTarget(); + auto ScratchMemoryRegister = ET.getScratchMemoryRegister( + State.getTargetMachine().getTargetTriple()); + auto &RegClass = + State.getTargetMachine().getMCRegisterInfo()->getRegClass( + DefOp.getExplicitOperandInfo().RegClass); + + // Register classes of def operand and memory operand must be the same + // to perform aliasing. + if (!RegClass.contains(ScratchMemoryRegister)) + return; + + ET.fillMemoryOperands(Variant, ScratchMemoryRegister, 0); + Variant.getValueFor(DefOp) = MCOperand::createReg(ScratchMemoryRegister); + + CodeTemplate CT; + CT.Execution = ExecutionModeBit; + if (CT.ScratchSpacePointerInReg == 0) + CT.ScratchSpacePointerInReg = ScratchMemoryRegister; + + CT.Info = std::string(ExecutionClassDescription); + CT.Instructions.push_back(std::move(Variant)); + CT.PreinitScratchMemory.emplace_back(ScratchMemoryRegister, + /* Offset */ 0); + CodeTemplates.push_back(std::move(CT)); + } + + // TODO: implement more cases return; } case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: { From 3508c766605bf09249f7281873d2ff6eccbe8cfc Mon Sep 17 00:00:00 2001 From: Anastasiya Chernikova Date: Tue, 24 Dec 2024 16:54:31 +0300 Subject: [PATCH 2/3] Addressed comments and add test --- .../llvm-exegesis/RISCV/latency-by-load.s | 60 +++++++++++++++++++ llvm/tools/llvm-exegesis/lib/CodeTemplate.h | 3 - .../lib/SerialSnippetGenerator.cpp | 15 ++--- 3 files changed, 66 insertions(+), 12 deletions(-) create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/latency-by-load.s diff --git a/llvm/test/tools/llvm-exegesis/RISCV/latency-by-load.s b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-load.s new file mode 100644 index 0000000000000..ea8075d1eaba2 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-load.s @@ -0,0 +1,60 @@ +# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LD |& FileCheck --check-prefix=TEST1 %s + +TEST1: --- +TEST1-NEXT: mode: latency +TEST1-NEXT: key: +TEST1-NEXT: instructions: +TEST1-NEXT: - 'LD X10 X10 i_0x0' + +# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LW |& FileCheck --check-prefix=TEST2 %s + +TEST2: --- +TEST2-NEXT: mode: latency +TEST2-NEXT: key: +TEST2-NEXT: instructions: +TEST2-NEXT: - 'LW X10 X10 i_0x0' + +# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LH |& FileCheck --check-prefix=TEST3 %s + +TEST3: --- +TEST3-NEXT: mode: latency +TEST3-NEXT: key: +TEST3-NEXT: instructions: +TEST3-NEXT: - 'LH X10 X10 i_0x0' + +# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LWU |& FileCheck --check-prefix=TEST4 %s + +TEST4: --- +TEST4-NEXT: mode: latency +TEST4-NEXT: key: +TEST4-NEXT: instructions: +TEST4-NEXT: - 'LWU X10 X10 i_0x0' + +# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LBU |& FileCheck --check-prefix=TEST5 %s + +TEST5: --- +TEST5-NEXT: mode: latency +TEST5-NEXT: key: +TEST5-NEXT: instructions: +TEST5-NEXT: - 'LBU X10 X10 i_0x0' + +# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LUI |& FileCheck --check-prefix=TEST6 %s + +TEST6: LUI: No strategy found to make the execution serial + + +# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LB |& FileCheck --check-prefix=TEST7 %s + +TEST7: --- +TEST7-NEXT: mode: latency +TEST7-NEXT: key: +TEST7-NEXT: instructions: +TEST7-NEXT: - 'LB X10 X10 i_0x0' + +# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -mattr=+a -opcode-name=LR_W_RL |& FileCheck --check-prefix=TEST8 %s + +TEST8: --- +TEST8-NEXT: mode: latency +TEST8-NEXT: key: +TEST8-NEXT: instructions: +TEST8-NEXT: - 'LR_W_RL X10 X10' diff --git a/llvm/tools/llvm-exegesis/lib/CodeTemplate.h b/llvm/tools/llvm-exegesis/lib/CodeTemplate.h index 8c32c8d26fc54..7aca224302a1f 100644 --- a/llvm/tools/llvm-exegesis/lib/CodeTemplate.h +++ b/llvm/tools/llvm-exegesis/lib/CodeTemplate.h @@ -132,9 +132,6 @@ struct CodeTemplate { // If the template uses the provided scratch memory, the register in which // the pointer to this memory is passed in to the function. unsigned ScratchSpacePointerInReg = 0; - // Require to pre-store value of a given register (fisrt) - // to scratch memory with given offset (second) - SmallVector, 2> PreinitScratchMemory; #if defined(__GNUC__) && (defined(__clang__) || LLVM_GNUC_PREREQ(8, 0, 0)) // FIXME: GCC7 bug workaround. Drop #if after GCC7 no longer supported. diff --git a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp index 4cd316c9b606f..eeea42b319505 100644 --- a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp +++ b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp @@ -113,18 +113,18 @@ static void appendCodeTemplates(const LLVMState &State, // TODO: now it is not checked if load writes the whole register. - auto DefOpIt = find_if(I.Operands, [](Operand const &op) { - return op.isDef() && op.isReg(); + auto DefOpIt = find_if(I.Operands, [](Operand const &Op) { + return Op.isDef() && Op.isReg(); }); if (DefOpIt == I.Operands.end()) return; const Operand &DefOp = *DefOpIt; - auto &ET = State.getExegesisTarget(); - auto ScratchMemoryRegister = ET.getScratchMemoryRegister( + const ExegesisTarget &ET = State.getExegesisTarget(); + unsigned ScratchMemoryRegister = ET.getScratchMemoryRegister( State.getTargetMachine().getTargetTriple()); - auto &RegClass = + const llvm::MCRegisterClass &RegClass = State.getTargetMachine().getMCRegisterInfo()->getRegClass( DefOp.getExplicitOperandInfo().RegClass); @@ -138,13 +138,10 @@ static void appendCodeTemplates(const LLVMState &State, CodeTemplate CT; CT.Execution = ExecutionModeBit; - if (CT.ScratchSpacePointerInReg == 0) - CT.ScratchSpacePointerInReg = ScratchMemoryRegister; + CT.ScratchSpacePointerInReg = ScratchMemoryRegister; CT.Info = std::string(ExecutionClassDescription); CT.Instructions.push_back(std::move(Variant)); - CT.PreinitScratchMemory.emplace_back(ScratchMemoryRegister, - /* Offset */ 0); CodeTemplates.push_back(std::move(CT)); } From f76ffe47f667cc32a4cce6d5bd18166dc6c034d2 Mon Sep 17 00:00:00 2001 From: Anastasiya Chernikova Date: Tue, 24 Dec 2024 18:40:05 +0300 Subject: [PATCH 3/3] Fix test --- .../tools/llvm-exegesis/RISCV/latency-by-load.s | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/test/tools/llvm-exegesis/RISCV/latency-by-load.s b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-load.s index ea8075d1eaba2..63c03516ca5d3 100644 --- a/llvm/test/tools/llvm-exegesis/RISCV/latency-by-load.s +++ b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-load.s @@ -1,4 +1,4 @@ -# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LD |& FileCheck --check-prefix=TEST1 %s +# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LD 2>&1 | FileCheck --check-prefix=TEST1 %s TEST1: --- TEST1-NEXT: mode: latency @@ -6,7 +6,7 @@ TEST1-NEXT: key: TEST1-NEXT: instructions: TEST1-NEXT: - 'LD X10 X10 i_0x0' -# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LW |& FileCheck --check-prefix=TEST2 %s +# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LW 2>&1 | FileCheck --check-prefix=TEST2 %s TEST2: --- TEST2-NEXT: mode: latency @@ -14,7 +14,7 @@ TEST2-NEXT: key: TEST2-NEXT: instructions: TEST2-NEXT: - 'LW X10 X10 i_0x0' -# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LH |& FileCheck --check-prefix=TEST3 %s +# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LH 2>&1 | FileCheck --check-prefix=TEST3 %s TEST3: --- TEST3-NEXT: mode: latency @@ -22,7 +22,7 @@ TEST3-NEXT: key: TEST3-NEXT: instructions: TEST3-NEXT: - 'LH X10 X10 i_0x0' -# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LWU |& FileCheck --check-prefix=TEST4 %s +# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LWU 2>&1 | FileCheck --check-prefix=TEST4 %s TEST4: --- TEST4-NEXT: mode: latency @@ -30,7 +30,7 @@ TEST4-NEXT: key: TEST4-NEXT: instructions: TEST4-NEXT: - 'LWU X10 X10 i_0x0' -# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LBU |& FileCheck --check-prefix=TEST5 %s +# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LBU 2>&1 | FileCheck --check-prefix=TEST5 %s TEST5: --- TEST5-NEXT: mode: latency @@ -38,12 +38,12 @@ TEST5-NEXT: key: TEST5-NEXT: instructions: TEST5-NEXT: - 'LBU X10 X10 i_0x0' -# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LUI |& FileCheck --check-prefix=TEST6 %s +# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LUI 2>&1 | FileCheck --check-prefix=TEST6 %s TEST6: LUI: No strategy found to make the execution serial -# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LB |& FileCheck --check-prefix=TEST7 %s +# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -opcode-name=LB 2>&1 | FileCheck --check-prefix=TEST7 %s TEST7: --- TEST7-NEXT: mode: latency @@ -51,7 +51,7 @@ TEST7-NEXT: key: TEST7-NEXT: instructions: TEST7-NEXT: - 'LB X10 X10 i_0x0' -# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -mattr=+a -opcode-name=LR_W_RL |& FileCheck --check-prefix=TEST8 %s +# RUN: llvm-exegesis -mode=latency --benchmark-phase=assemble-measured-code -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -mattr=+a -opcode-name=LR_W_RL 2>&1 | FileCheck --check-prefix=TEST8 %s TEST8: --- TEST8-NEXT: mode: latency