Skip to content

Commit dc13bec

Browse files
authored
Merge pull request #355 from qwe661234/improve_idj
Improve the performance of indirect jump for T1C
2 parents d99884e + 8c2a753 commit dc13bec

File tree

4 files changed

+82
-11
lines changed

4 files changed

+82
-11
lines changed

src/decode.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,9 +278,13 @@ typedef struct {
278278

279279
#define HISTORY_SIZE 16
280280
typedef struct {
281-
uint8_t idx;
282281
uint32_t PC[HISTORY_SIZE];
282+
#if !RV32_HAS(JIT)
283+
uint8_t idx;
283284
struct rv_insn *target[HISTORY_SIZE];
285+
#else
286+
uint32_t times[HISTORY_SIZE];
287+
#endif
284288
} branch_history_table_t;
285289

286290
typedef struct rv_insn {

src/jit.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1336,6 +1336,26 @@ static void ra_load2_sext(struct jit_state *state,
13361336
}
13371337
}
13381338

1339+
void parse_branch_history_table(struct jit_state *state, rv_insn_t *ir)
1340+
{
1341+
int max_idx = 0;
1342+
branch_history_table_t *bt = ir->branch_table;
1343+
for (int i = 0; i < HISTORY_SIZE; i++) {
1344+
if (!bt->times[i])
1345+
break;
1346+
if (bt->times[max_idx] < bt->times[i])
1347+
max_idx = i;
1348+
}
1349+
if (bt->PC[max_idx]) {
1350+
emit_load_imm(state, register_map[0], bt->PC[max_idx]);
1351+
emit_cmp32(state, temp_reg, register_map[0]);
1352+
uint32_t jump_loc = state->offset;
1353+
emit_jcc_offset(state, 0x85);
1354+
emit_jmp(state, bt->PC[max_idx]);
1355+
emit_jump_target_offset(state, JUMP_LOC, state->offset);
1356+
}
1357+
}
1358+
13391359
#define GEN(inst, code) \
13401360
static void do_##inst(struct jit_state *state UNUSED, riscv_t *rv UNUSED, \
13411361
rv_insn_t *ir UNUSED) \
@@ -1532,6 +1552,22 @@ static void translate_chained_block(struct jit_state *state,
15321552
if (block1->translatable)
15331553
translate_chained_block(state, rv, block1, set);
15341554
}
1555+
branch_history_table_t *bt = ir->branch_table;
1556+
if (bt) {
1557+
int max_idx = 0;
1558+
for (int i = 0; i < HISTORY_SIZE; i++) {
1559+
if (!bt->times[i])
1560+
break;
1561+
if (bt->times[max_idx] < bt->times[i])
1562+
max_idx = i;
1563+
}
1564+
if (bt->PC[max_idx] && !set_has(set, bt->PC[max_idx])) {
1565+
block_t *block1 =
1566+
cache_get(rv->block_cache, bt->PC[max_idx], false);
1567+
if (block1 && block1->translatable)
1568+
translate_chained_block(state, rv, block1, set);
1569+
}
1570+
}
15351571
}
15361572

15371573
uint32_t jit_translate(riscv_t *rv, block_t *block)

src/rv32_template.c

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,11 @@
107107
* | | store the result into dst. |
108108
* | cond, src; | set condition if (src) |
109109
* | end; | set the end of condition if (src) |
110+
* | predict; | parse the branch table of indirect |
111+
* | | jump and search the jump target with |
112+
* | | maxiumal frequency. Then, comparing |
113+
* | | and jumping to the target if the |
114+
* | | program counter matches. |
110115
* | break; | In the end of a basic block, we need |
111116
* | | to store all VM register value to rv |
112117
* | | data, becasue the register allocation |
@@ -197,6 +202,7 @@ RVOP(
197202
* recorded. Additionally, the C code generator can reference the branch history
198203
* table to link he indirect jump targets.
199204
*/
205+
#if !RV32_HAS(JIT)
200206
#define LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE() \
201207
/* lookup branch history table */ \
202208
for (int i = 0; i < HISTORY_SIZE; i++) { \
@@ -213,7 +219,33 @@ RVOP(
213219
ir->branch_table->idx = (ir->branch_table->idx + 1) % HISTORY_SIZE; \
214220
MUST_TAIL return block->ir_head->impl(rv, block->ir_head, cycle, PC); \
215221
}
216-
222+
#else
223+
#define LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE() \
224+
block_t *block = cache_get(rv->block_cache, PC, true); \
225+
if (block) { \
226+
for (int i = 0; i < HISTORY_SIZE; i++) { \
227+
if (ir->branch_table->PC[i] == PC) { \
228+
ir->branch_table->times[i]++; \
229+
MUST_TAIL return block->ir_head->impl(rv, block->ir_head, \
230+
cycle, PC); \
231+
} \
232+
} \
233+
/* update branch history table */ \
234+
int min_idx = 0; \
235+
for (int i = 0; i < HISTORY_SIZE; i++) { \
236+
if (!ir->branch_table->times[i]) { \
237+
min_idx = i; \
238+
break; \
239+
} else if (ir->branch_table->times[min_idx] > \
240+
ir->branch_table->times[i]) { \
241+
min_idx = i; \
242+
} \
243+
} \
244+
ir->branch_table->times[min_idx] = 1; \
245+
ir->branch_table->PC[min_idx] = PC; \
246+
MUST_TAIL return block->ir_head->impl(rv, block->ir_head, cycle, PC); \
247+
}
248+
#endif
217249
/* The indirect jump instruction JALR uses the I-type encoding. The target
218250
* address is obtained by adding the sign-extended 12-bit I-immediate to the
219251
* register rs1, then setting the least-significant bit of the result to zero.
@@ -234,9 +266,7 @@ RVOP(
234266
#if !RV32_HAS(EXT_C)
235267
RV_EXC_MISALIGN_HANDLER(pc, insn, false, 0);
236268
#endif
237-
#if !RV32_HAS(JIT)
238269
LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE();
239-
#endif
240270
rv->csr_cycle = cycle;
241271
rv->PC = PC;
242272
return true;
@@ -250,8 +280,9 @@ RVOP(
250280
mov, VR1, TMP;
251281
alu32imm, 32, 0x81, 0, TMP, imm;
252282
alu32imm, 32, 0x81, 4, TMP, ~1U;
253-
st, S32, TMP, PC;
254283
break;
284+
predict;
285+
st, S32, TMP, PC;
255286
exit;
256287
}))
257288

@@ -2254,18 +2285,17 @@ RVOP(
22542285
cjr,
22552286
{
22562287
PC = rv->X[ir->rs1];
2257-
#if !RV32_HAS(JIT)
22582288
LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE();
2259-
#endif
22602289
rv->csr_cycle = cycle;
22612290
rv->PC = PC;
22622291
return true;
22632292
},
22642293
GEN({
22652294
rald, VR0, rs1;
22662295
mov, VR0, TMP;
2267-
st, S32, TMP, PC;
22682296
break;
2297+
predict;
2298+
st, S32, TMP, PC;
22692299
exit;
22702300
}))
22712301

@@ -2307,9 +2337,7 @@ RVOP(
23072337
const int32_t jump_to = rv->X[ir->rs1];
23082338
rv->X[rv_reg_ra] = PC + 2;
23092339
PC = jump_to;
2310-
#if !RV32_HAS(JIT)
23112340
LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE();
2312-
#endif
23132341
rv->csr_cycle = cycle;
23142342
rv->PC = PC;
23152343
return true;
@@ -2319,8 +2347,9 @@ RVOP(
23192347
ldimm, VR0, pc, 2;
23202348
rald, VR1, rs1;
23212349
mov, VR1, TMP;
2322-
st, S32, TMP, PC;
23232350
break;
2351+
predict;
2352+
st, S32, TMP, PC;
23242353
exit;
23252354
}))
23262355

tools/gen-jit-template.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,8 @@ def parse_argv(EXT_LIST, SKIP_LIST):
254254
asm = "store_back(state);"
255255
elif items[0] == "assert":
256256
asm = "assert(NULL);"
257+
elif items[0] == "predict":
258+
asm = "parse_branch_history_table(state, ir);"
257259
output += asm + "\n"
258260
output += "})\n"
259261

0 commit comments

Comments
 (0)