Skip to content

Commit 66cd90d

Browse files
authored
Fix fast jit issues (#1201)
1 parent 9694ad7 commit 66cd90d

File tree

8 files changed

+395
-105
lines changed

8 files changed

+395
-105
lines changed

core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp

Lines changed: 147 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2880,6 +2880,68 @@ alu_imm_imm_to_r_f32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
28802880
return mov_imm_to_r_f32(a, reg_no_dst, data);
28812881
}
28822882

2883+
static bool
2884+
alu_r_m_float(x86::Assembler &a, ALU_OP op, int32 reg_no, x86::Mem &m,
2885+
bool is_f32)
2886+
{
2887+
switch (op) {
2888+
case ADD:
2889+
{
2890+
if (is_f32)
2891+
a.addss(regs_float[reg_no], m);
2892+
else
2893+
a.addsd(regs_float[reg_no], m);
2894+
break;
2895+
}
2896+
case SUB:
2897+
{
2898+
if (is_f32)
2899+
a.subss(regs_float[reg_no], m);
2900+
else
2901+
a.subsd(regs_float[reg_no], m);
2902+
break;
2903+
}
2904+
case MUL:
2905+
{
2906+
if (is_f32)
2907+
a.mulss(regs_float[reg_no], m);
2908+
else
2909+
a.mulsd(regs_float[reg_no], m);
2910+
break;
2911+
}
2912+
case DIV_S:
2913+
{
2914+
if (is_f32)
2915+
a.divss(regs_float[reg_no], m);
2916+
else
2917+
a.divsd(regs_float[reg_no], m);
2918+
break;
2919+
}
2920+
case MAX:
2921+
{
2922+
if (is_f32)
2923+
a.maxss(regs_float[reg_no], m);
2924+
else
2925+
a.maxsd(regs_float[reg_no], m);
2926+
break;
2927+
}
2928+
case MIN:
2929+
{
2930+
if (is_f32)
2931+
a.minss(regs_float[reg_no], m);
2932+
else
2933+
a.minsd(regs_float[reg_no], m);
2934+
break;
2935+
}
2936+
default:
2937+
{
2938+
bh_assert(0);
2939+
return false;
2940+
}
2941+
}
2942+
return true;
2943+
}
2944+
28832945
/**
28842946
* Encode float alu operation of imm and reg, and save result to reg
28852947
*
@@ -2902,10 +2964,9 @@ alu_imm_r_to_r_f32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
29022964
a.movaps(cache, regs_float[reg_no2_src]);
29032965

29042966
/* imm -> gp -> xmm */
2905-
mov_imm_to_r_f32(a, reg_no2_src, data1_src);
2967+
mov_imm_to_r_f32(a, reg_no_dst, data1_src);
29062968

2907-
a.addss(regs_float[reg_no2_src], cache);
2908-
return true;
2969+
return alu_r_m_float(a, op, reg_no_dst, cache, true);
29092970
}
29102971

29112972
/**
@@ -2930,8 +2991,9 @@ alu_r_imm_to_r_f32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
29302991
Imm imm(*(uint32 *)&data2_src);
29312992
mov_imm_to_m(a, cache, imm, 4);
29322993

2933-
a.addss(regs_float[reg_no1_src], cache);
2934-
return true;
2994+
mov_r_to_r_f32(a, reg_no_dst, reg_no1_src);
2995+
2996+
return alu_r_m_float(a, op, reg_no_dst, cache, true);
29352997
}
29362998

29372999
/**
@@ -3076,10 +3138,9 @@ alu_imm_r_to_r_f64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
30763138
a.movapd(cache, regs_float[reg_no2_src]);
30773139

30783140
/* imm -> gp -> xmm */
3079-
mov_imm_to_r_f64(a, reg_no2_src, data1_src);
3141+
mov_imm_to_r_f64(a, reg_no_dst, data1_src);
30803142

3081-
a.addsd(regs_float[reg_no2_src], cache);
3082-
return true;
3143+
return alu_r_m_float(a, op, reg_no_dst, cache, false);
30833144
}
30843145

30853146
/**
@@ -3104,8 +3165,9 @@ alu_r_imm_to_r_f64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
31043165
Imm imm(*(uint64 *)&data2_src);
31053166
mov_imm_to_m(a, cache, imm, 8);
31063167

3107-
a.addsd(regs_float[reg_no1_src], cache);
3108-
return true;
3168+
mov_r_to_r_f64(a, reg_no_dst, reg_no1_src);
3169+
3170+
return alu_r_m_float(a, op, reg_no_dst, cache, false);
31093171
}
31103172

31113173
/**
@@ -3359,18 +3421,42 @@ bit_r_imm_i64(x86::Assembler &a, BIT_OP op, int32 reg_no, int64 data)
33593421

33603422
switch (op) {
33613423
case OR:
3362-
if (data != 0)
3363-
a.or_(regs_i64[reg_no], imm);
3424+
if (data != 0) {
3425+
if (data >= INT32_MIN && data <= INT32_MAX) {
3426+
imm.setValue((int32)data);
3427+
a.or_(regs_i64[reg_no], imm);
3428+
}
3429+
else {
3430+
a.mov(regs_i64[REG_I64_FREE_IDX], imm);
3431+
a.or_(regs_i64[reg_no], regs_i64[REG_I64_FREE_IDX]);
3432+
}
3433+
}
33643434
break;
33653435
case XOR:
33663436
if (data == -1LL)
33673437
a.not_(regs_i64[reg_no]);
3368-
else if (data != 0)
3369-
a.xor_(regs_i64[reg_no], imm);
3438+
else if (data != 0) {
3439+
if (data >= INT32_MIN && data <= INT32_MAX) {
3440+
imm.setValue((int32)data);
3441+
a.xor_(regs_i64[reg_no], imm);
3442+
}
3443+
else {
3444+
a.mov(regs_i64[REG_I64_FREE_IDX], imm);
3445+
a.xor_(regs_i64[reg_no], regs_i64[REG_I64_FREE_IDX]);
3446+
}
3447+
}
33703448
break;
33713449
case AND:
3372-
if (data != -1LL)
3373-
a.and_(regs_i64[reg_no], imm);
3450+
if (data != -1LL) {
3451+
if (data >= INT32_MIN && data <= INT32_MAX) {
3452+
imm.setValue((int32)data);
3453+
a.and_(regs_i64[reg_no], imm);
3454+
}
3455+
else {
3456+
a.mov(regs_i64[REG_I64_FREE_IDX], imm);
3457+
a.and_(regs_i64[reg_no], regs_i64[REG_I64_FREE_IDX]);
3458+
}
3459+
}
33743460
break;
33753461
default:
33763462
bh_assert(0);
@@ -4958,6 +5044,7 @@ lower_cmp(JitCompContext *cc, x86::Assembler &a, JitReg r0, JitReg r1,
49585044
* Encode detecting the cmp flags in reg, and jmp to the relative address
49595045
* according to the condition opcode
49605046
*
5047+
* @param cc the compiler context
49615048
* @param a the assembler to emit the code
49625049
* @param reg_no the no of register which contains cmp flags of cmp result
49635050
* @param op the condition opcode to jmp
@@ -4966,12 +5053,13 @@ lower_cmp(JitCompContext *cc, x86::Assembler &a, JitReg r0, JitReg r1,
49665053
* @return return the next address of native code after encoded
49675054
*/
49685055
static bool
4969-
cmp_r_and_jmp_relative(x86::Assembler &a, int32 reg_no, COND_OP op,
4970-
int32 offset)
5056+
cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, int32 reg_no,
5057+
COND_OP op, int32 offset)
49715058
{
49725059
Imm target(INT32_MAX);
49735060
char *stream = (char *)a.code()->sectionById(0)->buffer().data()
49745061
+ a.code()->sectionById(0)->buffer().size();
5062+
bool fp_cmp = cc->last_cmp_on_fp;
49755063

49765064
switch (op) {
49775065
case EQ:
@@ -4981,17 +5069,46 @@ cmp_r_and_jmp_relative(x86::Assembler &a, int32 reg_no, COND_OP op,
49815069
a.jne(target);
49825070
break;
49835071
case GTS:
4984-
a.jg(target);
5072+
{
5073+
if (fp_cmp) {
5074+
a.jnbe(target);
5075+
}
5076+
else {
5077+
a.jg(target);
5078+
}
49855079
break;
5080+
}
49865081
case LES:
4987-
a.jng(target);
5082+
{
5083+
if (fp_cmp) {
5084+
a.jbe(target);
5085+
}
5086+
else {
5087+
a.jng(target);
5088+
}
49885089
break;
5090+
}
49895091
case GES:
4990-
a.jge(target);
5092+
{
5093+
if (fp_cmp) {
5094+
a.jnb(target);
5095+
}
5096+
else {
5097+
5098+
a.jnl(target);
5099+
}
49915100
break;
5101+
}
49925102
case LTS:
4993-
a.jl(target);
5103+
{
5104+
if (fp_cmp) {
5105+
a.jb(target);
5106+
}
5107+
else {
5108+
a.jl(target);
5109+
}
49945110
break;
5111+
}
49955112
case GTU:
49965113
a.ja(target);
49975114
break;
@@ -5074,7 +5191,8 @@ lower_select(JitCompContext *cc, x86::Assembler &a, COND_OP op, JitReg r0,
50745191
}
50755192

50765193
if (r3 && r0 != r3) {
5077-
if (!cmp_r_and_jmp_relative(a, jit_reg_no(r1), op, (int32)size_mov2))
5194+
if (!cmp_r_and_jmp_relative(cc, a, jit_reg_no(r1), op,
5195+
(int32)size_mov2))
50785196
return false;
50795197
a.embedDataArray(TypeId::kInt8, stream_mov2, size_mov2);
50805198
}
@@ -6398,7 +6516,7 @@ static uint8 hreg_info_F32[3][16] = {
63986516
{ 0, 0, 0, 0, 0, 0, 0, 0,
63996517
1, 1, 1, 1, 1, 1, 1, 1 },
64006518
{ 1, 1, 1, 1, 1, 1, 1, 1,
6401-
0, 0, 0, 0, 0, 0, 0, 0 }, /* caller_saved_native */
6519+
1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_native */
64026520
{ 1, 1, 1, 1, 1, 1, 1, 1,
64036521
1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_jitted */
64046522
};
@@ -6409,7 +6527,7 @@ static uint8 hreg_info_F64[3][16] = {
64096527
{ 1, 1, 1, 1, 1, 1, 1, 1,
64106528
0, 0, 0, 0, 0, 0, 0, 0 },
64116529
{ 1, 1, 1, 1, 1, 1, 1, 1,
6412-
0, 0, 0, 0, 0, 0, 0, 0 }, /* caller_saved_native */
6530+
1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_native */
64136531
{ 1, 1, 1, 1, 1, 1, 1, 1,
64146532
1, 1, 1, 1, 1, 1, 1, 1 }, /* caller_saved_jitted */
64156533
};
@@ -6466,12 +6584,16 @@ jit_codegen_get_hreg_by_name(const char *name)
64666584
return jit_reg_new(JIT_REG_KIND_I32, REG_ECX_IDX);
64676585
else if (strcmp(name, "edx") == 0)
64686586
return jit_reg_new(JIT_REG_KIND_I32, REG_EDX_IDX);
6587+
else if (strcmp(name, "esi") == 0)
6588+
return jit_reg_new(JIT_REG_KIND_I32, REG_ESI_IDX);
64696589
else if (strcmp(name, "rax") == 0)
64706590
return jit_reg_new(JIT_REG_KIND_I64, REG_RAX_IDX);
64716591
else if (strcmp(name, "rcx") == 0)
64726592
return jit_reg_new(JIT_REG_KIND_I64, REG_RCX_IDX);
64736593
else if (strcmp(name, "rdx") == 0)
64746594
return jit_reg_new(JIT_REG_KIND_I64, REG_RDX_IDX);
6595+
else if (strcmp(name, "r9") == 0)
6596+
return jit_reg_new(JIT_REG_KIND_I64, REG_R9_IDX);
64756597
else if (strcmp(name, "xmm0") == 0)
64766598
return jit_reg_new(JIT_REG_KIND_F32, 0);
64776599
else if (strcmp(name, "xmm0_f64") == 0)

0 commit comments

Comments
 (0)