Skip to content

Commit ab2e959

Browse files
authored
Fix fast jit issues (#1208)
Move jit spill cache to the end of interp frame to reduce footprint Fix codegen compare float issue: should not overwritten the source registers Fix float to int conversion check integer overflow issue Unify the float compare Fix get_global issue
1 parent 5e9f08f commit ab2e959

File tree

9 files changed

+114
-120
lines changed

9 files changed

+114
-120
lines changed

core/config.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,6 @@
102102
#define WASM_ENABLE_FAST_JIT_DUMP 0
103103
#endif
104104

105-
#ifndef FAST_JIT_SPILL_CACHE_SIZE
106-
/* The size of fast jit spill cache in cell num, one cell num
107-
occpuies 4 bytes */
108-
#define FAST_JIT_SPILL_CACHE_SIZE 32
109-
#endif
110-
111105
#ifndef WASM_ENABLE_WAMR_COMPILER
112106
#define WASM_ENABLE_WAMR_COMPILER 0
113107
#endif

core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp

Lines changed: 71 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ x86::Gp regs_i64[] = {
8282
x86::r12, x86::r13, x86::r14, x86::r15,
8383
};
8484

85+
#define REG_F32_FREE_IDX 15
86+
#define REG_F64_FREE_IDX 15
87+
8588
x86::Xmm regs_float[] = {
8689
x86::xmm0,
8790
x86::xmm1,
@@ -349,6 +352,8 @@ cmp_r_and_jmp_label(JitCompContext *cc, x86::Assembler &a,
349352

350353
bool fp_cmp = cc->last_cmp_on_fp;
351354

355+
bh_assert(!fp_cmp || (fp_cmp && (op == GES)));
356+
352357
switch (op) {
353358
case EQ:
354359
{
@@ -362,60 +367,52 @@ cmp_r_and_jmp_label(JitCompContext *cc, x86::Assembler &a,
362367
}
363368
case GTS:
364369
{
365-
if (fp_cmp) {
366-
a.ja(imm);
367-
}
368-
else {
369-
a.jg(imm);
370-
}
370+
a.jg(imm);
371371
break;
372372
}
373373
case LES:
374374
{
375-
if (fp_cmp) {
376-
a.jnb(imm);
377-
}
378-
else {
379-
a.jng(imm);
380-
}
375+
a.jng(imm);
381376
break;
382377
}
383378
case GES:
384379
{
385-
if (fp_cmp) {
386-
a.jnb(imm);
387-
}
388-
else {
389-
380+
if (fp_cmp)
381+
a.jae(imm);
382+
else
390383
a.jnl(imm);
391-
}
392384
break;
393385
}
394386
case LTS:
395387
{
396-
if (fp_cmp) {
397-
a.ja(imm);
398-
}
399-
else {
400-
a.jl(imm);
401-
}
388+
a.jl(imm);
402389
break;
403390
}
404391
case GTU:
392+
{
405393
a.ja(imm);
406394
break;
395+
}
407396
case LEU:
397+
{
408398
a.jna(imm);
409399
break;
400+
}
410401
case GEU:
402+
{
411403
a.jnb(imm);
412404
break;
405+
}
413406
case LTU:
407+
{
414408
a.jb(imm);
415409
break;
410+
}
416411
default:
412+
{
417413
bh_assert(0);
418414
break;
415+
}
419416
}
420417

421418
if (r2) {
@@ -761,10 +758,17 @@ static bool
761758
mov_imm_to_m(x86::Assembler &a, x86::Mem &m_dst, Imm imm_src, uint32 bytes_dst)
762759
{
763760
if (bytes_dst == 8) {
764-
/* As there is no instruction `MOV m64, imm64`, we use
765-
two instructions to implement it */
766-
a.mov(regs_i64[REG_I64_FREE_IDX], imm_src);
767-
a.mov(m_dst, regs_i64[REG_I64_FREE_IDX]);
761+
int64 value = imm_src.value();
762+
if (value >= INT32_MIN && value <= INT32_MAX) {
763+
imm_src.setValue((int32)value);
764+
a.mov(m_dst, imm_src);
765+
}
766+
else {
767+
/* There is no instruction `MOV m64, imm64`, we use
768+
two instructions to implement it */
769+
a.mov(regs_i64[REG_I64_FREE_IDX], imm_src);
770+
a.mov(m_dst, regs_i64[REG_I64_FREE_IDX]);
771+
}
768772
}
769773
else
770774
a.mov(m_dst, imm_src);
@@ -4220,17 +4224,8 @@ static bool
42204224
cmp_imm_r_to_r_f32(x86::Assembler &a, int32 reg_no_dst, float data1_src,
42214225
int32 reg_no2_src)
42224226
{
4223-
const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
4224-
/* xmm -> m128 */
4225-
x86::Mem cache = x86::xmmword_ptr(regs_i64[hreg_info->exec_env_hreg_index],
4226-
offsetof(WASMExecEnv, jit_cache));
4227-
a.movups(cache, regs_float[reg_no2_src]);
4228-
4229-
/* imm -> gp -> xmm */
4230-
mov_imm_to_r_f32(a, reg_no2_src, data1_src);
4231-
4232-
/* comiss xmm m32 */
4233-
a.comiss(regs_float[reg_no2_src], cache);
4227+
mov_imm_to_r_f32(a, REG_F32_FREE_IDX, data1_src);
4228+
a.comiss(regs_float[REG_F32_FREE_IDX], regs_float[reg_no2_src]);
42344229
return true;
42354230
}
42364231

@@ -4249,15 +4244,8 @@ static bool
42494244
cmp_r_imm_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
42504245
float data2_src)
42514246
{
4252-
const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
4253-
/* imm -> m32 */
4254-
x86::Mem cache = x86::dword_ptr(regs_i64[hreg_info->exec_env_hreg_index],
4255-
offsetof(WASMExecEnv, jit_cache));
4256-
Imm imm(*(uint32 *)&data2_src);
4257-
mov_imm_to_m(a, cache, imm, 4);
4258-
4259-
/* comiss xmm m32 */
4260-
a.comiss(regs_float[reg_no1_src], cache);
4247+
mov_imm_to_r_f32(a, REG_F32_FREE_IDX, data2_src);
4248+
a.comiss(regs_float[reg_no1_src], regs_float[REG_F32_FREE_IDX]);
42614249
return true;
42624250
}
42634251

@@ -4315,17 +4303,8 @@ static bool
43154303
cmp_imm_r_to_r_f64(x86::Assembler &a, int32 reg_no_dst, double data1_src,
43164304
int32 reg_no2_src)
43174305
{
4318-
const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
4319-
/* xmm -> m128 */
4320-
x86::Mem cache = x86::qword_ptr(regs_i64[hreg_info->exec_env_hreg_index],
4321-
offsetof(WASMExecEnv, jit_cache));
4322-
a.movupd(cache, regs_float[reg_no2_src]);
4323-
4324-
/* imm -> gp -> xmm */
4325-
mov_imm_to_r_f64(a, reg_no2_src, data1_src);
4326-
4327-
/* comiss xmm m64 */
4328-
a.comisd(regs_float[reg_no2_src], cache);
4306+
mov_imm_to_r_f64(a, REG_F64_FREE_IDX, data1_src);
4307+
a.comisd(regs_float[REG_F64_FREE_IDX], regs_float[reg_no2_src]);
43294308
return true;
43304309
}
43314310

@@ -4344,15 +4323,8 @@ static bool
43444323
cmp_r_imm_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
43454324
double data2_src)
43464325
{
4347-
const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
4348-
/* imm -> m64 */
4349-
x86::Mem cache = x86::qword_ptr(regs_i64[hreg_info->exec_env_hreg_index],
4350-
offsetof(WASMExecEnv, jit_cache));
4351-
Imm imm(*(uint64 *)&data2_src);
4352-
mov_imm_to_m(a, cache, imm, 8);
4353-
4354-
/* comisd xmm m64 */
4355-
a.comisd(regs_float[reg_no1_src], cache);
4326+
mov_imm_to_r_f64(a, REG_F64_FREE_IDX, data2_src);
4327+
a.comisd(regs_float[reg_no1_src], regs_float[REG_F64_FREE_IDX]);
43564328
return true;
43574329
}
43584330

@@ -5071,13 +5043,19 @@ cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, int32 reg_no,
50715043
+ a.code()->sectionById(0)->buffer().size();
50725044
bool fp_cmp = cc->last_cmp_on_fp;
50735045

5046+
bh_assert(!fp_cmp || (fp_cmp && (op == GTS || op == GES)));
5047+
50745048
switch (op) {
50755049
case EQ:
5050+
{
50765051
a.je(target);
50775052
break;
5053+
}
50785054
case NE:
5055+
{
50795056
a.jne(target);
50805057
break;
5058+
}
50815059
case GTS:
50825060
{
50835061
if (fp_cmp) {
@@ -5090,18 +5068,13 @@ cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, int32 reg_no,
50905068
}
50915069
case LES:
50925070
{
5093-
if (fp_cmp) {
5094-
a.jnb(target);
5095-
}
5096-
else {
5097-
a.jng(target);
5098-
}
5071+
a.jng(target);
50995072
break;
51005073
}
51015074
case GES:
51025075
{
51035076
if (fp_cmp) {
5104-
a.jnb(target);
5077+
a.jae(target);
51055078
}
51065079
else {
51075080
a.jnl(target);
@@ -5110,29 +5083,34 @@ cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, int32 reg_no,
51105083
}
51115084
case LTS:
51125085
{
5113-
if (fp_cmp) {
5114-
a.ja(target);
5115-
}
5116-
else {
5117-
a.jl(target);
5118-
}
5086+
a.jl(target);
51195087
break;
51205088
}
51215089
case GTU:
5090+
{
51225091
a.ja(target);
51235092
break;
5093+
}
51245094
case LEU:
5095+
{
51255096
a.jna(target);
51265097
break;
5098+
}
51275099
case GEU:
5100+
{
51285101
a.jae(target);
51295102
break;
5103+
}
51305104
case LTU:
5105+
{
51315106
a.jb(target);
51325107
break;
5108+
}
51335109
default:
5110+
{
51345111
bh_assert(0);
51355112
break;
5113+
}
51365114
}
51375115

51385116
/* The offset written by asmjit is always 0, we patch it again */
@@ -5174,10 +5152,13 @@ lower_select(JitCompContext *cc, x86::Assembler &a, COND_OP op, JitReg r0,
51745152
CHECK_NCONST(r1);
51755153
CHECK_KIND(r1, JIT_REG_KIND_I32);
51765154

5177-
if (r0 == r3 && r0 != r2) {
5155+
if (r0 == r3 && r0 != r2 && !cc->last_cmp_on_fp) {
51785156
JitReg r_tmp;
51795157

5180-
/* Exchange r2, r3*/
5158+
/* For i32/i64, exchange r2 and r3 to make r0 equal to r2,
5159+
so as to decrease possible execution instructions.
5160+
For f32/f64 comparison, should not change the order as
5161+
the result of comparison with NaN may be different. */
51815162
r_tmp = r2;
51825163
r2 = r3;
51835164
r3 = r_tmp;
@@ -5258,7 +5239,8 @@ lower_branch(JitCompContext *cc, x86::Assembler &a, bh_list *jmp_info_list,
52585239

52595240
label_dst = jit_reg_no(r1);
52605241
if (label_dst < (int32)jit_cc_label_num(cc) - 1 && is_last_insn
5261-
&& label_is_neighboring(cc, label_src, label_dst)) {
5242+
&& label_is_neighboring(cc, label_src, label_dst)
5243+
&& !cc->last_cmp_on_fp) {
52625244
JitReg r_tmp;
52635245

52645246
r_tmp = r1;
@@ -6555,20 +6537,20 @@ static uint8 hreg_info_F32[3][16] = {
65556537
{ 0, 0, 0, 0, 0, 0, 0, 0,
65566538
1, 1, 1, 1, 1, 1, 1, 1 },
65576539
{ 1, 1, 1, 1, 1, 1, 1, 1,
6558-
1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_native */
6540+
1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_native */
65596541
{ 1, 1, 1, 1, 1, 1, 1, 1,
6560-
1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_jitted */
6542+
1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_jitted */
65616543
};
65626544

65636545
/* System V AMD64 ABI Calling Conversion. [XYZ]MM0-7 */
65646546
static uint8 hreg_info_F64[3][16] = {
65656547
/* xmm0 ~ xmm15 */
65666548
{ 1, 1, 1, 1, 1, 1, 1, 1,
6567-
0, 0, 0, 0, 0, 0, 0, 0 },
6549+
0, 0, 0, 0, 0, 0, 0, 1 },
65686550
{ 1, 1, 1, 1, 1, 1, 1, 1,
6569-
1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_native */
6551+
1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_native */
65706552
{ 1, 1, 1, 1, 1, 1, 1, 1,
6571-
1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_jitted */
6553+
1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_jitted */
65726554
};
65736555

65746556
static const JitHardRegInfo hreg_info = {

core/iwasm/fast-jit/fe/jit_emit_compare.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ jit_compile_op_compare_float_point(JitCompContext *cc, FloatCond cond,
187187
case FLOAT_LT:
188188
{
189189
GEN_INSN(CMP, cc->cmp_reg, rhs, lhs);
190-
GEN_INSN(SELECTLTS, res, cc->cmp_reg, const_one, const_zero);
190+
GEN_INSN(SELECTGTS, res, cc->cmp_reg, const_one, const_zero);
191191
break;
192192
}
193193
case FLOAT_GT:
@@ -199,7 +199,7 @@ jit_compile_op_compare_float_point(JitCompContext *cc, FloatCond cond,
199199
case FLOAT_LE:
200200
{
201201
GEN_INSN(CMP, cc->cmp_reg, rhs, lhs);
202-
GEN_INSN(SELECTLES, res, cc->cmp_reg, const_one, const_zero);
202+
GEN_INSN(SELECTGES, res, cc->cmp_reg, const_one, const_zero);
203203
break;
204204
}
205205
case FLOAT_GE:

core/iwasm/fast-jit/fe/jit_emit_conversion.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ jit_compile_op_i32_trunc_f32(JitCompContext *cc, bool sign, bool saturating)
6363
}
6464

6565
/* If value is out of integer range, throw exception */
66-
GEN_INSN(CMP, cc->cmp_reg, value, min_valid_float);
67-
if (!jit_emit_exception(cc, EXCE_INTEGER_OVERFLOW, JIT_OP_BLES,
66+
GEN_INSN(CMP, cc->cmp_reg, min_valid_float, value);
67+
if (!jit_emit_exception(cc, EXCE_INTEGER_OVERFLOW, JIT_OP_BGES,
6868
cc->cmp_reg, NULL)) {
6969
goto fail;
7070
}
@@ -123,8 +123,8 @@ jit_compile_op_i32_trunc_f64(JitCompContext *cc, bool sign, bool saturating)
123123
}
124124

125125
/* If value is out of integer range, throw exception */
126-
GEN_INSN(CMP, cc->cmp_reg, value, min_valid_double);
127-
if (!jit_emit_exception(cc, EXCE_INTEGER_OVERFLOW, JIT_OP_BLES,
126+
GEN_INSN(CMP, cc->cmp_reg, min_valid_double, value);
127+
if (!jit_emit_exception(cc, EXCE_INTEGER_OVERFLOW, JIT_OP_BGES,
128128
cc->cmp_reg, NULL)) {
129129
goto fail;
130130
}

0 commit comments

Comments
 (0)