Skip to content

Commit 8be2b88

Browse files
committed
Avoid generating machine code repeatly for same basic block
Originally, every execution path had its own machine code. Therefore, even if some basic blocks had been traced in other execution paths, we still generated a new copy of machine code. To use the code cache space more efficiently, we modified the recorded table to be global, allowing every execution path to share the machine code. This modification ensures that every basic block has only one copy of machine code, thus saving code cache space. For example, the code cache stored 1,926,471 (1.9 MB) bytes of machine code for AES previously. After the modification, the code cache stored 182,730 (0.18 MB) bytes of machine code.
1 parent c328a8a commit 8be2b88

File tree

2 files changed

+34
-26
lines changed

2 files changed

+34
-26
lines changed

src/jit.c

Lines changed: 31 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@
5656
#define JIT_OP_MOD_REG (JIT_CLS_ALU | JIT_SRC_REG | 0x90)
5757

5858
#define STACK_SIZE 512
59-
#define MAX_INSNS 1024
59+
#define MAX_JUMPS 1024
60+
#define MAX_BLOCKS 8192
6061
#define IN_JUMP_THRESHOLD 256
6162
#if defined(__x86_64__)
6263
#define JUMP_LOC jump_loc + 2
@@ -243,8 +244,8 @@ static inline void emit_load_imm(struct jit_state *state, int dst, int64_t imm);
243244

244245
static inline void offset_map_insert(struct jit_state *state, int32_t target_pc)
245246
{
246-
struct offset_map *map_entry = &state->offset_map[state->n_insn++];
247-
assert(state->n_insn < MAX_INSNS);
247+
struct offset_map *map_entry = &state->offset_map[state->n_blocks++];
248+
assert(state->n_blocks < MAX_BLOCKS);
248249
map_entry->pc = target_pc;
249250
map_entry->offset = state->offset;
250251
}
@@ -354,7 +355,7 @@ static inline void emit_jump_target_address(struct jit_state *state,
354355
int32_t target_pc)
355356
{
356357
struct jump *jump = &state->jumps[state->n_jumps++];
357-
assert(state->n_jumps < MAX_INSNS);
358+
assert(state->n_jumps < MAX_JUMPS);
358359
jump->offset_loc = state->offset;
359360
jump->target_pc = target_pc;
360361
emit4(state, 0);
@@ -556,7 +557,7 @@ static inline void emit_jump_target_offset(struct jit_state *state,
556557
uint32_t jump_state_offset)
557558
{
558559
struct jump *jump = &state->jumps[state->n_jumps++];
559-
assert(state->n_jumps < MAX_INSNS);
560+
assert(state->n_jumps < MAX_JUMPS);
560561
jump->offset_loc = jump_loc;
561562
jump->target_offset = jump_state_offset;
562563
}
@@ -934,7 +935,7 @@ static inline void emit_jmp(struct jit_state *state, uint32_t target_pc)
934935
emit_jump_target_address(state, target_pc);
935936
#elif defined(__aarch64__)
936937
struct jump *jump = &state->jumps[state->n_jumps++];
937-
assert(state->n_jumps < MAX_INSNS);
938+
assert(state->n_jumps < MAX_JUMPS);
938939
jump->offset_loc = state->offset;
939940
jump->target_pc = target_pc;
940941
emit_a64(state, UBR_B);
@@ -1512,7 +1513,7 @@ static void resolve_jumps(struct jit_state *state)
15121513
#endif
15131514
else {
15141515
target_loc = jump.offset_loc + sizeof(uint32_t);
1515-
for (int i = 0; i < state->n_insn; i++) {
1516+
for (int i = 0; i < state->n_blocks; i++) {
15161517
if (jump.target_pc == state->offset_map[i].pc) {
15171518
target_loc = state->offset_map[i].offset;
15181519
break;
@@ -1534,27 +1535,26 @@ static void resolve_jumps(struct jit_state *state)
15341535

15351536
static void translate_chained_block(struct jit_state *state,
15361537
riscv_t *rv,
1537-
block_t *block,
1538-
set_t *set)
1538+
block_t *block)
15391539
{
1540-
if (set_has(set, block->pc_start))
1540+
if (set_has(&state->set, block->pc_start))
15411541
return;
15421542

1543-
set_add(set, block->pc_start);
1543+
set_add(&state->set, block->pc_start);
15441544
offset_map_insert(state, block->pc_start);
15451545
translate(state, rv, block);
15461546
rv_insn_t *ir = block->ir_tail;
1547-
if (ir->branch_untaken && !set_has(set, ir->branch_untaken->pc)) {
1547+
if (ir->branch_untaken && !set_has(&state->set, ir->branch_untaken->pc)) {
15481548
block_t *block1 =
15491549
cache_get(rv->block_cache, ir->branch_untaken->pc, false);
15501550
if (block1->translatable)
1551-
translate_chained_block(state, rv, block1, set);
1551+
translate_chained_block(state, rv, block1);
15521552
}
1553-
if (ir->branch_taken && !set_has(set, ir->branch_taken->pc)) {
1553+
if (ir->branch_taken && !set_has(&state->set, ir->branch_taken->pc)) {
15541554
block_t *block1 =
15551555
cache_get(rv->block_cache, ir->branch_taken->pc, false);
15561556
if (block1->translatable)
1557-
translate_chained_block(state, rv, block1, set);
1557+
translate_chained_block(state, rv, block1);
15581558
}
15591559
branch_history_table_t *bt = ir->branch_table;
15601560
if (bt) {
@@ -1566,26 +1566,30 @@ static void translate_chained_block(struct jit_state *state,
15661566
max_idx = i;
15671567
}
15681568
if (bt->PC[max_idx] && bt->times[max_idx] >= IN_JUMP_THRESHOLD &&
1569-
!set_has(set, bt->PC[max_idx])) {
1569+
!set_has(&state->set, bt->PC[max_idx])) {
15701570
block_t *block1 =
15711571
cache_get(rv->block_cache, bt->PC[max_idx], false);
15721572
if (block1 && block1->translatable)
1573-
translate_chained_block(state, rv, block1, set);
1573+
translate_chained_block(state, rv, block1);
15741574
}
15751575
}
15761576
}
15771577

15781578
uint32_t jit_translate(riscv_t *rv, block_t *block)
15791579
{
15801580
struct jit_state *state = rv->jit_state;
1581-
memset(state->offset_map, 0, MAX_INSNS * sizeof(struct offset_map));
1582-
memset(state->jumps, 0, MAX_INSNS * sizeof(struct jump));
1583-
state->n_insn = 0;
1581+
if (set_has(&state->set, block->pc_start)) {
1582+
for (int i = 0; i < state->n_blocks; i++) {
1583+
if (block->pc_start == state->offset_map[i].pc) {
1584+
return state->offset_map[i].offset;
1585+
}
1586+
}
1587+
__UNREACHABLE;
1588+
}
1589+
memset(state->jumps, 0, 1024 * sizeof(struct jump));
15841590
state->n_jumps = 0;
15851591
uint32_t entry_loc = state->offset;
1586-
set_t set;
1587-
set_reset(&set);
1588-
translate_chained_block(&(*state), rv, block, &set);
1592+
translate_chained_block(&(*state), rv, block);
15891593
if (state->offset == state->size) {
15901594
printf("Target buffer too small\n");
15911595
goto out;
@@ -1608,10 +1612,12 @@ struct jit_state *jit_state_init(size_t size)
16081612
#endif
16091613
,
16101614
-1, 0);
1615+
state->n_blocks = 0;
16111616
assert(state->buf != MAP_FAILED);
1617+
set_reset(&state->set);
16121618
prepare_translate(state);
1613-
state->offset_map = calloc(MAX_INSNS, sizeof(struct offset_map));
1614-
state->jumps = calloc(MAX_INSNS, sizeof(struct jump));
1619+
state->offset_map = calloc(MAX_BLOCKS, sizeof(struct offset_map));
1620+
state->jumps = calloc(MAX_JUMPS, sizeof(struct jump));
16151621
return state;
16161622
}
16171623

src/jit.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <stdint.h>
99

1010
#include "riscv_private.h"
11+
#include "utils.h"
1112

1213
struct jump {
1314
uint32_t offset_loc;
@@ -21,6 +22,7 @@ struct offset_map {
2122
};
2223

2324
struct jit_state {
25+
set_t set;
2426
uint8_t *buf;
2527
uint32_t offset;
2628
uint32_t stack_size;
@@ -29,7 +31,7 @@ struct jit_state {
2931
uint32_t exit_loc;
3032
uint32_t retpoline_loc;
3133
struct offset_map *offset_map;
32-
int n_insn;
34+
int n_blocks;
3335
struct jump *jumps;
3436
int n_jumps;
3537
};

0 commit comments

Comments
 (0)