Skip to content

Commit 8c2a753

Browse files
committed
Improve the performance of indirect jump for T1C
Considering the T1C ends with the indirect jump, our interpreter mode now records the target of the indirect jump, identifying the most frequent jump target for T1C. The T1 generated code for the indirect jump compares the selected jump target and executes the jump if the comparison results in equality. With this enhancement, T1C and T2C which are integrated later can proceed with an indirect jump if the target PC matches the selected jump target. Based on the performance analysis, the benchmarks with significant number of indirect jump effectively improve the performance. | Metric | N_in_jmp | Original | Propused | Speedup | |----------+-----------+----------+----------+---------| |miniz | 2098313| 1.266 s| 1.225 s| +3.35%| |sha512 | 10500727| 1.905 s| 1.861 s| +2.36%| |dhrystone | 20000618| 0.325 s| 0.253 s| +28.46%| |nqueens | 44658722| 1.051 s| 0.79 s| +33.04%| |qsort | 275000250| 1.978 s| 1.517 s| +30.39%|
1 parent ccf8b9b commit 8c2a753

File tree

4 files changed

+82
-11
lines changed

4 files changed

+82
-11
lines changed

src/decode.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,9 +278,13 @@ typedef struct {
278278

279279
#define HISTORY_SIZE 16
280280
typedef struct {
281-
uint8_t idx;
282281
uint32_t PC[HISTORY_SIZE];
282+
#if !RV32_HAS(JIT)
283+
uint8_t idx;
283284
struct rv_insn *target[HISTORY_SIZE];
285+
#else
286+
uint32_t times[HISTORY_SIZE];
287+
#endif
284288
} branch_history_table_t;
285289

286290
typedef struct rv_insn {

src/jit.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1336,6 +1336,26 @@ static void ra_load2_sext(struct jit_state *state,
13361336
}
13371337
}
13381338

1339+
void parse_branch_history_table(struct jit_state *state, rv_insn_t *ir)
1340+
{
1341+
int max_idx = 0;
1342+
branch_history_table_t *bt = ir->branch_table;
1343+
for (int i = 0; i < HISTORY_SIZE; i++) {
1344+
if (!bt->times[i])
1345+
break;
1346+
if (bt->times[max_idx] < bt->times[i])
1347+
max_idx = i;
1348+
}
1349+
if (bt->PC[max_idx]) {
1350+
emit_load_imm(state, register_map[0], bt->PC[max_idx]);
1351+
emit_cmp32(state, temp_reg, register_map[0]);
1352+
uint32_t jump_loc = state->offset;
1353+
emit_jcc_offset(state, 0x85);
1354+
emit_jmp(state, bt->PC[max_idx]);
1355+
emit_jump_target_offset(state, JUMP_LOC, state->offset);
1356+
}
1357+
}
1358+
13391359
#define GEN(inst, code) \
13401360
static void do_##inst(struct jit_state *state UNUSED, riscv_t *rv UNUSED, \
13411361
rv_insn_t *ir UNUSED) \
@@ -1532,6 +1552,22 @@ static void translate_chained_block(struct jit_state *state,
15321552
if (block1->translatable)
15331553
translate_chained_block(state, rv, block1, set);
15341554
}
1555+
branch_history_table_t *bt = ir->branch_table;
1556+
if (bt) {
1557+
int max_idx = 0;
1558+
for (int i = 0; i < HISTORY_SIZE; i++) {
1559+
if (!bt->times[i])
1560+
break;
1561+
if (bt->times[max_idx] < bt->times[i])
1562+
max_idx = i;
1563+
}
1564+
if (bt->PC[max_idx] && !set_has(set, bt->PC[max_idx])) {
1565+
block_t *block1 =
1566+
cache_get(rv->block_cache, bt->PC[max_idx], false);
1567+
if (block1 && block1->translatable)
1568+
translate_chained_block(state, rv, block1, set);
1569+
}
1570+
}
15351571
}
15361572

15371573
uint32_t jit_translate(riscv_t *rv, block_t *block)

src/rv32_template.c

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,11 @@
107107
* | | store the result into dst. |
108108
* | cond, src; | set condition if (src) |
109109
* | end; | set the end of condition if (src) |
110+
* | predict; | parse the branch table of indirect |
111+
* | | jump and search the jump target with |
112+
* | | maxiumal frequency. Then, comparing |
113+
* | | and jumping to the target if the |
114+
* | | program counter matches. |
110115
* | break; | In the end of a basic block, we need |
111116
* | | to store all VM register value to rv |
112117
* | | data, becasue the register allocation |
@@ -197,6 +202,7 @@ RVOP(
197202
* recorded. Additionally, the C code generator can reference the branch history
198203
* table to link he indirect jump targets.
199204
*/
205+
#if !RV32_HAS(JIT)
200206
#define LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE() \
201207
/* lookup branch history table */ \
202208
for (int i = 0; i < HISTORY_SIZE; i++) { \
@@ -213,7 +219,33 @@ RVOP(
213219
ir->branch_table->idx = (ir->branch_table->idx + 1) % HISTORY_SIZE; \
214220
MUST_TAIL return block->ir_head->impl(rv, block->ir_head, cycle, PC); \
215221
}
216-
222+
#else
223+
#define LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE() \
224+
block_t *block = cache_get(rv->block_cache, PC, true); \
225+
if (block) { \
226+
for (int i = 0; i < HISTORY_SIZE; i++) { \
227+
if (ir->branch_table->PC[i] == PC) { \
228+
ir->branch_table->times[i]++; \
229+
MUST_TAIL return block->ir_head->impl(rv, block->ir_head, \
230+
cycle, PC); \
231+
} \
232+
} \
233+
/* update branch history table */ \
234+
int min_idx = 0; \
235+
for (int i = 0; i < HISTORY_SIZE; i++) { \
236+
if (!ir->branch_table->times[i]) { \
237+
min_idx = i; \
238+
break; \
239+
} else if (ir->branch_table->times[min_idx] > \
240+
ir->branch_table->times[i]) { \
241+
min_idx = i; \
242+
} \
243+
} \
244+
ir->branch_table->times[min_idx] = 1; \
245+
ir->branch_table->PC[min_idx] = PC; \
246+
MUST_TAIL return block->ir_head->impl(rv, block->ir_head, cycle, PC); \
247+
}
248+
#endif
217249
/* The indirect jump instruction JALR uses the I-type encoding. The target
218250
* address is obtained by adding the sign-extended 12-bit I-immediate to the
219251
* register rs1, then setting the least-significant bit of the result to zero.
@@ -234,9 +266,7 @@ RVOP(
234266
#if !RV32_HAS(EXT_C)
235267
RV_EXC_MISALIGN_HANDLER(pc, insn, false, 0);
236268
#endif
237-
#if !RV32_HAS(JIT)
238269
LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE();
239-
#endif
240270
rv->csr_cycle = cycle;
241271
rv->PC = PC;
242272
return true;
@@ -250,8 +280,9 @@ RVOP(
250280
mov, VR1, TMP;
251281
alu32imm, 32, 0x81, 0, TMP, imm;
252282
alu32imm, 32, 0x81, 4, TMP, ~1U;
253-
st, S32, TMP, PC;
254283
break;
284+
predict;
285+
st, S32, TMP, PC;
255286
exit;
256287
}))
257288

@@ -2252,18 +2283,17 @@ RVOP(
22522283
cjr,
22532284
{
22542285
PC = rv->X[ir->rs1];
2255-
#if !RV32_HAS(JIT)
22562286
LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE();
2257-
#endif
22582287
rv->csr_cycle = cycle;
22592288
rv->PC = PC;
22602289
return true;
22612290
},
22622291
GEN({
22632292
rald, VR0, rs1;
22642293
mov, VR0, TMP;
2265-
st, S32, TMP, PC;
22662294
break;
2295+
predict;
2296+
st, S32, TMP, PC;
22672297
exit;
22682298
}))
22692299

@@ -2305,9 +2335,7 @@ RVOP(
23052335
const int32_t jump_to = rv->X[ir->rs1];
23062336
rv->X[rv_reg_ra] = PC + 2;
23072337
PC = jump_to;
2308-
#if !RV32_HAS(JIT)
23092338
LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE();
2310-
#endif
23112339
rv->csr_cycle = cycle;
23122340
rv->PC = PC;
23132341
return true;
@@ -2317,8 +2345,9 @@ RVOP(
23172345
ldimm, VR0, pc, 2;
23182346
rald, VR1, rs1;
23192347
mov, VR1, TMP;
2320-
st, S32, TMP, PC;
23212348
break;
2349+
predict;
2350+
st, S32, TMP, PC;
23222351
exit;
23232352
}))
23242353

tools/gen-jit-template.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,8 @@ def parse_argv(EXT_LIST, SKIP_LIST):
254254
asm = "store_back(state);"
255255
elif items[0] == "assert":
256256
asm = "assert(NULL);"
257+
elif items[0] == "predict":
258+
asm = "parse_branch_history_table(state, ir);"
257259
output += asm + "\n"
258260
output += "})\n"
259261

0 commit comments

Comments
 (0)