Skip to content

Commit b8bb4b4

Browse files
committed
Remove libc subsituation
Based on the performance evaluation, the checking overhead of libc subsituation is larger than its performance improvement in most cases. Its effectiveness only displays in the specific cases, and it cannot be further optimized though JIT. Therefore, we decided to remove this mechanism.
1 parent 88fa915 commit b8bb4b4

File tree

4 files changed

+9
-379
lines changed

4 files changed

+9
-379
lines changed

src/decode.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,9 +196,7 @@ enum op_field {
196196
_(fuse2) \
197197
_(fuse3) \
198198
_(fuse4) \
199-
_(fuse5) \
200-
_(fuse6) \
201-
_(fuse7)
199+
_(fuse5)
202200

203201
/* clang-format off */
204202
/* IR (intermediate representation) is exclusively represented by RISC-V

src/emulate.c

Lines changed: 7 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -492,34 +492,8 @@ static bool do_fuse4(riscv_t *rv, rv_insn_t *ir, uint64_t cycle, uint32_t PC)
492492
MUST_TAIL return next->impl(rv, next, cycle, PC);
493493
}
494494

495-
/* memset */
496-
static bool do_fuse5(riscv_t *rv,
497-
const rv_insn_t *ir UNUSED,
498-
uint64_t cycle,
499-
uint32_t PC UNUSED)
500-
{
501-
/* FIXME: specify the correct cycle count for memset routine */
502-
cycle += 2;
503-
rv->io.on_memset(rv);
504-
rv->csr_cycle = cycle;
505-
return true;
506-
}
507-
508-
/* memcpy */
509-
static bool do_fuse6(riscv_t *rv,
510-
const rv_insn_t *ir UNUSED,
511-
uint64_t cycle,
512-
uint32_t PC UNUSED)
513-
{
514-
/* FIXME: specify the correct cycle count for memcpy routine */
515-
cycle += 2;
516-
rv->io.on_memcpy(rv);
517-
rv->csr_cycle = cycle;
518-
return true;
519-
}
520-
521495
/* multiple shift immediate */
522-
static bool do_fuse7(riscv_t *rv,
496+
static bool do_fuse5(riscv_t *rv,
523497
const rv_insn_t *ir,
524498
uint64_t cycle,
525499
uint32_t PC)
@@ -676,50 +650,6 @@ static void block_translate(riscv_t *rv, block_t *block)
676650
remove_next_nth_ir(rv, ir, block, count - 1); \
677651
}
678652

679-
#include "rv32_libc.h"
680-
681-
static bool detect_memset(riscv_t *rv, size_t type)
682-
{
683-
static const struct rv32libc_impl rv32_memset[] = {
684-
{memset0_insn, ARRAYS_SIZE(memset0_insn)},
685-
{memset1_insn, ARRAYS_SIZE(memset1_insn)},
686-
};
687-
assert(type < ARRAYS_SIZE(rv32_memset));
688-
689-
const uint32_t *memset_insn = rv32_memset[type].insn;
690-
const size_t memset_len = rv32_memset[type].len;
691-
692-
uint32_t tmp_pc = rv->PC;
693-
for (uint32_t i = 0; i < memset_len; i++) {
694-
const uint32_t insn = rv->io.mem_ifetch(tmp_pc);
695-
if (unlikely(insn != memset_insn[i]))
696-
return false;
697-
tmp_pc += 4;
698-
}
699-
return true;
700-
}
701-
702-
static bool detect_memcpy(riscv_t *rv, size_t type)
703-
{
704-
static const struct rv32libc_impl rv32_memcpy[] = {
705-
{memcpy0_insn, ARRAYS_SIZE(memcpy0_insn)},
706-
{memcpy1_insn, ARRAYS_SIZE(memcpy1_insn)},
707-
};
708-
assert(type < ARRAYS_SIZE(rv32_memcpy));
709-
710-
const uint32_t *memcpy_insn = rv32_memcpy[type].insn;
711-
const size_t memcpy_len = rv32_memcpy[type].len;
712-
713-
uint32_t tmp_pc = rv->PC;
714-
for (uint32_t i = 0; i < memcpy_len; i++) {
715-
const uint32_t insn = rv->io.mem_ifetch(tmp_pc);
716-
if (unlikely(insn != memcpy_insn[i]))
717-
return false;
718-
tmp_pc += 4;
719-
}
720-
return true;
721-
}
722-
723653
static inline void remove_next_nth_ir(const riscv_t *rv,
724654
rv_insn_t *ir,
725655
block_t *block,
@@ -735,88 +665,6 @@ static inline void remove_next_nth_ir(const riscv_t *rv,
735665
block->n_insn -= n;
736666
}
737667

738-
static bool libc_substitute(riscv_t *rv, block_t *block)
739-
{
740-
rv_insn_t *ir = block->ir_head, *next_ir = NULL;
741-
switch (ir->opcode) {
742-
case rv_insn_addi:
743-
/* Compare the target block with the first basic block of memset and
744-
* memcpy.
745-
* If the two blocks match, extract the instruction sequence starting
746-
* from pc_start of the basic block and compare it with the pre-recorded
747-
* memset/memcpy instruction sequence.
748-
*/
749-
if (IF_imm(ir, 15) && IF_rd(ir, t1) && IF_rs1(ir, zero)) {
750-
next_ir = ir->next;
751-
assert(next_ir);
752-
if (IF_insn(next_ir, addi) && IF_rd(next_ir, a4) &&
753-
IF_rs1(next_ir, a0) && IF_rs2(next_ir, zero)) {
754-
next_ir = next_ir->next;
755-
if (IF_insn(next_ir, bgeu) && IF_imm(next_ir, 60) &&
756-
IF_rs1(next_ir, t1) && IF_rs2(next_ir, a2)) {
757-
if (detect_memset(rv, 0)) {
758-
ir->opcode = rv_insn_fuse5;
759-
ir->impl = dispatch_table[ir->opcode];
760-
remove_next_nth_ir(rv, ir, block, 2);
761-
return true;
762-
}
763-
}
764-
}
765-
} else if (IF_imm(ir, 0) && IF_rd(ir, t1) && IF_rs1(ir, a0)) {
766-
next_ir = ir->next;
767-
assert(next_ir);
768-
if (IF_insn(next_ir, beq) && IF_rs1(next_ir, a2) &&
769-
IF_rs2(next_ir, zero)) {
770-
if (IF_imm(next_ir, 20) && detect_memset(rv, 1)) {
771-
ir->opcode = rv_insn_fuse5;
772-
ir->impl = dispatch_table[ir->opcode];
773-
remove_next_nth_ir(rv, ir, block, 1);
774-
return true;
775-
}
776-
if (IF_imm(next_ir, 28) && detect_memcpy(rv, 1)) {
777-
ir->opcode = rv_insn_fuse6;
778-
ir->impl = dispatch_table[ir->opcode];
779-
remove_next_nth_ir(rv, ir, block, 1);
780-
return true;
781-
};
782-
}
783-
}
784-
break;
785-
case rv_insn_xor:
786-
/* Compare the target block with the first basic block of memcpy, if
787-
* two block is match, we would extract the instruction sequence
788-
* starting from the pc_start of the basic block and then compare
789-
* it with the pre-recorded memcpy instruction sequence.
790-
*/
791-
if (IF_rd(ir, a5) && IF_rs1(ir, a0) && IF_rs2(ir, a1)) {
792-
next_ir = ir->next;
793-
assert(next_ir);
794-
if (IF_insn(next_ir, andi) && IF_imm(next_ir, 3) &&
795-
IF_rd(next_ir, a5) && IF_rs1(next_ir, a5)) {
796-
next_ir = next_ir->next;
797-
if (IF_insn(next_ir, add) && IF_rd(next_ir, a7) &&
798-
IF_rs1(next_ir, a0) && IF_rs2(next_ir, a2)) {
799-
next_ir = next_ir->next;
800-
if (IF_insn(next_ir, bne) && IF_imm(next_ir, 104) &&
801-
IF_rs1(next_ir, a5) && IF_rs2(next_ir, zero)) {
802-
if (detect_memcpy(rv, 0)) {
803-
ir->opcode = rv_insn_fuse6;
804-
ir->impl = dispatch_table[ir->opcode];
805-
remove_next_nth_ir(rv, ir, block, 3);
806-
return true;
807-
}
808-
}
809-
}
810-
}
811-
}
812-
break;
813-
/* TODO: Inject other frequently used function calls from the C standard
814-
* library.
815-
*/
816-
}
817-
return false;
818-
}
819-
820668
/* Check if instructions in a block match a specific pattern. If they do,
821669
* rewrite them as fused instructions.
822670
*
@@ -902,7 +750,7 @@ static void match_pattern(riscv_t *rv, block_t *block)
902750
ir->fuse = malloc(count * sizeof(opcode_fuse_t));
903751
assert(ir->fuse);
904752
memcpy(ir->fuse, ir, sizeof(opcode_fuse_t));
905-
ir->opcode = rv_insn_fuse7;
753+
ir->opcode = rv_insn_fuse5;
906754
ir->imm2 = count;
907755
ir->impl = dispatch_table[ir->opcode];
908756
next_ir = ir->next;
@@ -971,14 +819,13 @@ static block_t *block_find_or_translate(riscv_t *rv)
971819
next = block_alloc(rv);
972820
block_translate(rv, next);
973821

974-
if (!libc_substitute(rv, next)) {
975-
optimize_constant(rv, next);
822+
optimize_constant(rv, next);
976823
#if RV32_HAS(GDBSTUB)
977-
if (likely(!rv->debug_mode))
824+
if (likely(!rv->debug_mode))
978825
#endif
979-
/* macro operation fusion */
980-
match_pattern(rv, next);
981-
}
826+
/* macro operation fusion */
827+
match_pattern(rv, next);
828+
982829
#if !RV32_HAS(JIT)
983830
/* insert the block into block map */
984831
block_insert(&rv->block_map, next);

src/jit.c

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,14 +1491,11 @@ static inline void liveness_calc(block_t *block)
14911491
}
14921492
break;
14931493
case rv_insn_fuse4:
1494-
case rv_insn_fuse7:
1494+
case rv_insn_fuse5:
14951495
for (int i = 0; i < ir->imm2; i++) {
14961496
liveness[ir->fuse[i].rs1] = idx;
14971497
}
14981498
break;
1499-
case rv_insn_fuse5:
1500-
case rv_insn_fuse6:
1501-
break;
15021499
default:
15031500
__UNREACHABLE;
15041501
}
@@ -1778,24 +1775,6 @@ static void do_fuse4(struct jit_state *state, riscv_t *rv, rv_insn_t *ir)
17781775
}
17791776

17801777
static void do_fuse5(struct jit_state *state, riscv_t *rv UNUSED, rv_insn_t *ir)
1781-
{
1782-
store_back(state);
1783-
emit_load_imm(state, temp_reg, ir->pc + 4);
1784-
emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC));
1785-
emit_call(state, (intptr_t) rv->io.on_memset);
1786-
emit_exit(state);
1787-
}
1788-
1789-
static void do_fuse6(struct jit_state *state, riscv_t *rv UNUSED, rv_insn_t *ir)
1790-
{
1791-
store_back(state);
1792-
emit_load_imm(state, temp_reg, ir->pc + 4);
1793-
emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC));
1794-
emit_call(state, (intptr_t) rv->io.on_memcpy);
1795-
emit_exit(state);
1796-
}
1797-
1798-
static void do_fuse7(struct jit_state *state, riscv_t *rv UNUSED, rv_insn_t *ir)
17991778
{
18001779
opcode_fuse_t *fuse = ir->fuse;
18011780
for (int i = 0; i < ir->imm2; i++) {

0 commit comments

Comments
 (0)