Skip to content

Commit c935089

Browse files
authored
Lock register to avoid spilling it out by register allocator (#1188)
In one instruction, if one or multiple operands tending to lock some hardware registers in IR phase, like EAX, EDX for DIV, ECX for SHIFT, it leads to two known cases. case 1: allocate VOID `SHRU i250,i249,i3`. if pr_3 was allocated to vr_249 first, incoming allocation of vr_3 leads a spill out of `vr_249` and clear the value of `vr->hreg` of vr_249. When applying allocation result in FOREACH in L732, a NULL will be assigned to. case 2: unexpected spill out `DIV_U i1,i1,i44`. if allocation of vr_44 needs to spill out one hardware register, there is a chance that `hr_4` will be selected. If it happens, codegen will operate EDX and overwrite vr_44 value. The reason of how `hr_4` will be spilled out is a hidden bug that both information of `rc->hreg[]` and `rc->vreg` can be transfered from one block to the next one. It means even there is no vr binds to a hr in current block, the hr may still be thought as a busy one becase of the left infroamtion of previous blocks Workaround for cases: - Add `MOV LOCKED_hr LOCKED_hr` just after the instruction. It prevents case 1 - Add `MOV LOCKED_hr LOCKED_hr` just before the instruction. It prevents case 2
1 parent 8350d98 commit c935089

File tree

8 files changed

+136
-40
lines changed

8 files changed

+136
-40
lines changed

core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6172,30 +6172,32 @@ static const uint8 hreg_info_I64[3][16] = {
61726172
r8, r9, r10, r11, r12, r13, r14, r15 */
61736173
{ 1, 1, 1, 1, 1, 1, 1, 1,
61746174
0, 0, 0, 0, 0, 0, 0, 1 }, /* fixed, rsi is freely used */
6175-
{ 0, 0, 0, 0, 0, 0, 0, 0,
6175+
{ 0, 1, 0, 1, 1, 1, 0, 0,
61766176
1, 1, 1, 1, 0, 0, 0, 0 }, /* caller_saved_native */
6177-
{ 0, 0, 0, 0, 0, 0, 0, 0,
6177+
{ 0, 1, 1, 1, 1, 1, 0, 0,
61786178
1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_jitted */
61796179
};
61806180

6181+
/* System V AMD64 ABI Calling Conversion. [XYZ]MM0-7 */
61816182
static uint8 hreg_info_F32[3][16] = {
61826183
/* xmm0 ~ xmm15 */
61836184
{ 0, 0, 0, 0, 0, 0, 0, 0,
61846185
1, 1, 1, 1, 1, 1, 1, 1 },
61856186
{ 1, 1, 1, 1, 1, 1, 1, 1,
6186-
1, 1, 1, 1, 1, 1, 1, 1 }, /* TODO:caller_saved_native */
6187+
0, 0, 0, 0, 0, 0, 0, 0 }, /* caller_saved_native */
61876188
{ 1, 1, 1, 1, 1, 1, 1, 1,
6188-
1, 1, 1, 1, 1, 1, 1, 1 }, /* TODO:caller_saved_jitted */
6189+
1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_jitted */
61896190
};
61906191

6192+
/* System V AMD64 ABI Calling Conversion. [XYZ]MM0-7 */
61916193
static uint8 hreg_info_F64[3][16] = {
61926194
/* xmm0 ~ xmm15 */
61936195
{ 1, 1, 1, 1, 1, 1, 1, 1,
61946196
0, 0, 0, 0, 0, 0, 0, 0 },
61956197
{ 1, 1, 1, 1, 1, 1, 1, 1,
6196-
1, 1, 1, 1, 1, 1, 1, 1 }, /* TODO:caller_saved_native */
6198+
0, 0, 0, 0, 0, 0, 0, 0 }, /* caller_saved_native */
61976199
{ 1, 1, 1, 1, 1, 1, 1, 1,
6198-
1, 1, 1, 1, 1, 1, 1, 1 }, /* TODO:caller_saved_jitted */
6200+
1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_jitted */
61996201
};
62006202

62016203
static const JitHardRegInfo hreg_info = {

core/iwasm/fast-jit/fe/jit_emit_conversion.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ jit_compile_op_i32_trunc_f32(JitCompContext *cc, bool sign, bool saturating)
6464
}
6565
*(jit_insn_opndv(insn, 2)) = value;
6666

67+
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
68+
jit_lock_reg_in_insn(cc, insn, nan_ret);
69+
#endif
70+
6771
GEN_INSN(CMP, cc->cmp_reg, nan_ret, NEW_CONST(I32, 1));
6872
if (!jit_emit_exception(cc, EXCE_INVALID_CONVERSION_TO_INTEGER,
6973
JIT_OP_BEQ, cc->cmp_reg, NULL)) {
@@ -133,6 +137,10 @@ jit_compile_op_i32_trunc_f64(JitCompContext *cc, bool sign, bool saturating)
133137
}
134138
*(jit_insn_opndv(insn, 2)) = value;
135139

140+
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
141+
jit_lock_reg_in_insn(cc, insn, nan_ret);
142+
#endif
143+
136144
GEN_INSN(CMP, cc->cmp_reg, nan_ret, NEW_CONST(I32, 1));
137145
if (!jit_emit_exception(cc, EXCE_INVALID_CONVERSION_TO_INTEGER,
138146
JIT_OP_BEQ, cc->cmp_reg, NULL)) {
@@ -270,6 +278,10 @@ jit_compile_op_f32_convert_i64(JitCompContext *cc, bool sign)
270278
goto fail;
271279
}
272280
*(jit_insn_opndv(insn, 2)) = value;
281+
282+
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
283+
jit_lock_reg_in_insn(cc, insn, res);
284+
#endif
273285
}
274286

275287
PUSH_F32(res);
@@ -330,7 +342,10 @@ jit_compile_op_f64_convert_i64(JitCompContext *cc, bool sign)
330342
else {
331343
JitInsn *insn;
332344
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
345+
JitReg xmm0;
346+
333347
res = jit_codegen_get_hreg_by_name("xmm0_f64");
348+
xmm0 = jit_codegen_get_hreg_by_name("xmm0");
334349
#else
335350
res = jit_cc_new_reg_F64(cc);
336351
#endif
@@ -340,6 +355,9 @@ jit_compile_op_f64_convert_i64(JitCompContext *cc, bool sign)
340355
goto fail;
341356
}
342357
*(jit_insn_opndv(insn, 2)) = value;
358+
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
359+
jit_lock_reg_in_insn(cc, insn, xmm0);
360+
#endif
343361
}
344362
PUSH_F64(res);
345363

core/iwasm/fast-jit/fe/jit_emit_function.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,10 @@ jit_compile_op_call(JitCompContext *cc, uint32 func_idx, bool tail_call)
175175
*(jit_insn_opndv(insn, 4)) = cc->fp_reg;
176176
}
177177

178+
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
179+
jit_lock_reg_in_insn(cc, insn, native_ret);
180+
#endif
181+
178182
/* Check whether there is exception thrown */
179183
GEN_INSN(CMP, cc->cmp_reg, native_ret, NEW_CONST(I32, 0));
180184
if (!jit_emit_exception(cc, EXCE_ALREADY_THROWN, JIT_OP_BEQ,
@@ -353,6 +357,8 @@ jit_compile_op_call_indirect(JitCompContext *cc, uint32 type_idx,
353357
*(jit_insn_opndv(insn, 5)) = NEW_CONST(I32, func_type->param_count);
354358
*(jit_insn_opndv(insn, 6)) = argv;
355359

360+
jit_lock_reg_in_insn(cc, insn, native_ret);
361+
356362
/* Check whether there is exception thrown */
357363
GEN_INSN(CMP, cc->cmp_reg, native_ret, NEW_CONST(I32, 0));
358364
if (!jit_emit_exception(cc, EXCE_ALREADY_THROWN, JIT_OP_BEQ, cc->cmp_reg,

core/iwasm/fast-jit/fe/jit_emit_memory.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,10 @@ jit_compile_op_memory_grow(JitCompContext *cc, uint32 mem_idx)
500500
*(jit_insn_opndv(insn, 3)) = delta;
501501
}
502502

503+
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
504+
jit_lock_reg_in_insn(cc, insn, grow_result);
505+
#endif
506+
503507
/* check if enlarge memory success */
504508
res = jit_cc_new_reg_I32(cc);
505509
GEN_INSN(CMP, cc->cmp_reg, grow_result, NEW_CONST(I32, 0));

core/iwasm/fast-jit/fe/jit_emit_numberic.c

Lines changed: 41 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -510,57 +510,60 @@ compile_int_div_no_check(JitCompContext *cc, IntArithmetic arith_op,
510510
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
511511
case INT_DIV_S:
512512
case INT_DIV_U:
513+
{
514+
JitInsn *insn = NULL;
515+
513516
if (is_i32) {
514517
GEN_INSN(MOV, eax_hreg, left);
515518
if (arith_op == INT_DIV_S)
516-
GEN_INSN(DIV_S, eax_hreg, eax_hreg, right);
519+
insn = GEN_INSN(DIV_S, eax_hreg, eax_hreg, right);
517520
else
518-
GEN_INSN(DIV_U, eax_hreg, eax_hreg, right);
519-
/* Just to indicate that edx is used,
520-
register allocator cannot spill it out */
521-
GEN_INSN(MOV, edx_hreg, edx_hreg);
521+
insn = GEN_INSN(DIV_U, eax_hreg, eax_hreg, right);
522+
522523
res = eax_hreg;
523524
}
524525
else {
525526
GEN_INSN(MOV, rax_hreg, left);
526-
/* Just to indicate that eax is used,
527-
register allocator cannot spill it out */
528-
GEN_INSN(MOV, eax_hreg, eax_hreg);
529527
if (arith_op == INT_DIV_S)
530-
GEN_INSN(DIV_S, rax_hreg, rax_hreg, right);
528+
insn = GEN_INSN(DIV_S, rax_hreg, rax_hreg, right);
531529
else
532-
GEN_INSN(DIV_U, rax_hreg, rax_hreg, right);
533-
/* Just to indicate that edx is used,
534-
register allocator cannot spill it out */
535-
GEN_INSN(MOV, edx_hreg, edx_hreg);
530+
insn = GEN_INSN(DIV_U, rax_hreg, rax_hreg, right);
531+
536532
res = rax_hreg;
537533
}
534+
535+
jit_lock_reg_in_insn(cc, insn, eax_hreg);
536+
jit_lock_reg_in_insn(cc, insn, edx_hreg);
538537
break;
538+
}
539539
case INT_REM_S:
540540
case INT_REM_U:
541+
{
542+
JitInsn *insn = NULL;
543+
541544
if (is_i32) {
542545
GEN_INSN(MOV, eax_hreg, left);
543546
if (arith_op == INT_REM_S)
544-
GEN_INSN(REM_S, edx_hreg, eax_hreg, right);
547+
insn = GEN_INSN(REM_S, edx_hreg, eax_hreg, right);
545548
else
546-
GEN_INSN(REM_U, edx_hreg, eax_hreg, right);
549+
insn = GEN_INSN(REM_U, edx_hreg, eax_hreg, right);
550+
547551
res = edx_hreg;
548552
}
549553
else {
550554
GEN_INSN(MOV, rax_hreg, left);
551-
/* Just to indicate that eax is used,
552-
register allocator cannot spill it out */
553-
GEN_INSN(MOV, eax_hreg, eax_hreg);
554555
if (arith_op == INT_REM_S)
555-
GEN_INSN(REM_S, rdx_hreg, rax_hreg, right);
556+
insn = GEN_INSN(REM_S, rdx_hreg, rax_hreg, right);
556557
else
557-
GEN_INSN(REM_U, rdx_hreg, rax_hreg, right);
558-
/* Just to indicate that edx is used,
559-
register allocator cannot spill it out */
560-
GEN_INSN(MOV, edx_hreg, edx_hreg);
558+
insn = GEN_INSN(REM_U, rdx_hreg, rax_hreg, right);
559+
561560
res = rdx_hreg;
562561
}
562+
563+
jit_lock_reg_in_insn(cc, insn, eax_hreg);
564+
jit_lock_reg_in_insn(cc, insn, edx_hreg);
563565
break;
566+
}
564567
#else
565568
case INT_DIV_S:
566569
GEN_INSN(DIV_S, res, left, right);
@@ -1050,6 +1053,7 @@ compile_int_shl(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
10501053
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
10511054
JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx");
10521055
JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx");
1056+
JitInsn *insn = NULL;
10531057
#endif
10541058

10551059
right = compile_int_shift_modulo(cc, right, is_i32);
@@ -1063,8 +1067,8 @@ compile_int_shl(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
10631067
res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
10641068
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
10651069
GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right);
1066-
GEN_INSN(SHL, res, left, is_i32 ? ecx_hreg : rcx_hreg);
1067-
GEN_INSN(MOV, ecx_hreg, ecx_hreg);
1070+
insn = GEN_INSN(SHL, res, left, is_i32 ? ecx_hreg : rcx_hreg);
1071+
jit_lock_reg_in_insn(cc, insn, ecx_hreg);
10681072
#else
10691073
GEN_INSN(SHL, res, left, right);
10701074
#endif
@@ -1080,6 +1084,7 @@ compile_int_shrs(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
10801084
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
10811085
JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx");
10821086
JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx");
1087+
JitInsn *insn = NULL;
10831088
#endif
10841089

10851090
right = compile_int_shift_modulo(cc, right, is_i32);
@@ -1093,8 +1098,8 @@ compile_int_shrs(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
10931098
res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
10941099
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
10951100
GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right);
1096-
GEN_INSN(SHRS, res, left, is_i32 ? ecx_hreg : rcx_hreg);
1097-
GEN_INSN(MOV, ecx_hreg, ecx_hreg);
1101+
insn = GEN_INSN(SHRS, res, left, is_i32 ? ecx_hreg : rcx_hreg);
1102+
jit_lock_reg_in_insn(cc, insn, ecx_hreg);
10981103
#else
10991104
GEN_INSN(SHRS, res, left, right);
11001105
#endif
@@ -1110,6 +1115,7 @@ compile_int_shru(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
11101115
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
11111116
JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx");
11121117
JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx");
1118+
JitInsn *insn = NULL;
11131119
#endif
11141120

11151121
right = compile_int_shift_modulo(cc, right, is_i32);
@@ -1123,8 +1129,8 @@ compile_int_shru(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
11231129
res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
11241130
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
11251131
GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right);
1126-
GEN_INSN(SHRU, res, left, is_i32 ? ecx_hreg : rcx_hreg);
1127-
GEN_INSN(MOV, ecx_hreg, ecx_hreg);
1132+
insn = GEN_INSN(SHRU, res, left, is_i32 ? ecx_hreg : rcx_hreg);
1133+
jit_lock_reg_in_insn(cc, insn, ecx_hreg);
11281134
#else
11291135
GEN_INSN(SHRU, res, left, right);
11301136
#endif
@@ -1171,6 +1177,7 @@ compile_int_rotl(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
11711177
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
11721178
JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx");
11731179
JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx");
1180+
JitInsn *insn = NULL;
11741181
#endif
11751182

11761183
right = compile_int_shift_modulo(cc, right, is_i32);
@@ -1184,8 +1191,8 @@ compile_int_rotl(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
11841191
res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
11851192
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
11861193
GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right);
1187-
GEN_INSN(ROTL, res, left, is_i32 ? ecx_hreg : rcx_hreg);
1188-
GEN_INSN(MOV, ecx_hreg, ecx_hreg);
1194+
insn = GEN_INSN(ROTL, res, left, is_i32 ? ecx_hreg : rcx_hreg);
1195+
jit_lock_reg_in_insn(cc, insn, ecx_hreg);
11891196
#else
11901197
GEN_INSN(ROTL, res, left, right);
11911198
#endif
@@ -1232,6 +1239,7 @@ compile_int_rotr(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
12321239
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
12331240
JitReg ecx_hreg = jit_codegen_get_hreg_by_name("ecx");
12341241
JitReg rcx_hreg = jit_codegen_get_hreg_by_name("rcx");
1242+
JitInsn *insn = NULL;
12351243
#endif
12361244

12371245
right = compile_int_shift_modulo(cc, right, is_i32);
@@ -1245,8 +1253,8 @@ compile_int_rotr(JitCompContext *cc, JitReg left, JitReg right, bool is_i32)
12451253
res = is_i32 ? jit_cc_new_reg_I32(cc) : jit_cc_new_reg_I64(cc);
12461254
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
12471255
GEN_INSN(MOV, is_i32 ? ecx_hreg : rcx_hreg, right);
1248-
GEN_INSN(ROTR, res, left, is_i32 ? ecx_hreg : rcx_hreg);
1249-
GEN_INSN(MOV, ecx_hreg, ecx_hreg);
1256+
insn = GEN_INSN(ROTR, res, left, is_i32 ? ecx_hreg : rcx_hreg);
1257+
jit_lock_reg_in_insn(cc, insn, ecx_hreg);
12501258
#else
12511259
GEN_INSN(ROTR, res, left, right);
12521260
#endif

core/iwasm/fast-jit/jit_ir.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1558,3 +1558,46 @@ _jit_insn_check_opnd_access_LookupSwitch(const JitInsn *insn)
15581558
unsigned opcode = insn->opcode;
15591559
return (insn_opnd_kind[opcode] == JIT_OPND_KIND_LookupSwitch);
15601560
}
1561+
1562+
bool
1563+
jit_lock_reg_in_insn(JitCompContext *cc, JitInsn *the_insn, JitReg reg_to_lock)
1564+
{
1565+
bool ret = false;
1566+
JitInsn *prevent_spill = NULL;
1567+
JitInsn *indicate_using = NULL;
1568+
1569+
if (!the_insn)
1570+
goto just_return;
1571+
1572+
if (jit_cc_is_hreg_fixed(cc, reg_to_lock)) {
1573+
ret = true;
1574+
goto just_return;
1575+
}
1576+
1577+
/**
1578+
* give the virtual register of the locked hard register a minimum, non-zero
1579+
* distance, * so as to prevent it from being spilled out
1580+
*/
1581+
prevent_spill = jit_insn_new_MOV(reg_to_lock, reg_to_lock);
1582+
if (!prevent_spill)
1583+
goto just_return;
1584+
1585+
jit_insn_insert_before(the_insn, prevent_spill);
1586+
1587+
/**
1588+
* announce the locked hard register is being used, and do necessary spill
1589+
* ASAP
1590+
*/
1591+
indicate_using = jit_insn_new_MOV(reg_to_lock, reg_to_lock);
1592+
if (!indicate_using)
1593+
goto just_return;
1594+
1595+
jit_insn_insert_after(the_insn, indicate_using);
1596+
1597+
ret = true;
1598+
1599+
just_return:
1600+
if (!ret)
1601+
jit_set_last_error(cc, "generate insn failed");
1602+
return ret;
1603+
}

core/iwasm/fast-jit/jit_ir.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1882,6 +1882,9 @@ jit_cc_push_value(JitCompContext *cc, uint8 type, JitReg value);
18821882
bool
18831883
jit_cc_pop_value(JitCompContext *cc, uint8 type, JitReg *p_value);
18841884

1885+
bool
1886+
jit_lock_reg_in_insn(JitCompContext *cc, JitInsn *the_insn, JitReg reg_to_lock);
1887+
18851888
/**
18861889
* Update the control flow graph after successors of blocks are
18871890
* changed so that the predecessor vector of each block represents the

0 commit comments

Comments
 (0)