Skip to content

Commit b84f117

Browse files
authored
Avoid modify src reg firstly when src reg and dst reg are the same (#1220)
1 parent 25b48bc commit b84f117

File tree

2 files changed

+46
-14
lines changed

2 files changed

+46
-14
lines changed

core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp

Lines changed: 44 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3014,7 +3014,6 @@ alu_r_imm_to_r_f32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
30143014
mov_imm_to_m(a, cache, imm, 4);
30153015

30163016
mov_r_to_r_f32(a, reg_no_dst, reg_no1_src);
3017-
30183017
return alu_r_m_float(a, op, reg_no_dst, cache, true);
30193018
}
30203019

@@ -3033,40 +3032,52 @@ static bool
30333032
alu_r_r_to_r_f32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
30343033
int32 reg_no1_src, int32 reg_no2_src)
30353034
{
3035+
bool store_result = false;
3036+
3037+
/**
3038+
* - op r0,r0,r1. do nothing since instructions always store results in
3039+
* the first register
3040+
*
3041+
* - op r1,r0,r1. use FREE_REG to cache and replace r0, and then store
3042+
* results in r1
3043+
*
3044+
* - op r0,r1,r2. use r0 to cache and replace r1, and accept the result
3045+
* naturally
3046+
**/
3047+
if (reg_no_dst == reg_no2_src) {
3048+
store_result = true;
3049+
reg_no_dst = REG_F32_FREE_IDX;
3050+
}
3051+
mov_r_to_r_f32(a, reg_no_dst, reg_no1_src);
3052+
30363053
switch (op) {
30373054
case ADD:
30383055
{
3039-
mov_r_to_r_f32(a, reg_no_dst, reg_no1_src);
30403056
a.addss(regs_float[reg_no_dst], regs_float[reg_no2_src]);
30413057
break;
30423058
}
30433059
case SUB:
30443060
{
3045-
mov_r_to_r_f32(a, reg_no_dst, reg_no1_src);
30463061
a.subss(regs_float[reg_no_dst], regs_float[reg_no2_src]);
30473062
break;
30483063
}
30493064
case MUL:
30503065
{
3051-
mov_r_to_r_f32(a, reg_no_dst, reg_no1_src);
30523066
a.mulss(regs_float[reg_no_dst], regs_float[reg_no2_src]);
30533067
break;
30543068
}
30553069
case DIV_S:
30563070
{
3057-
mov_r_to_r_f32(a, reg_no_dst, reg_no1_src);
30583071
a.divss(regs_float[reg_no_dst], regs_float[reg_no2_src]);
30593072
break;
30603073
}
30613074
case MAX:
30623075
{
3063-
mov_r_to_r_f32(a, reg_no_dst, reg_no1_src);
30643076
a.maxss(regs_float[reg_no_dst], regs_float[reg_no2_src]);
30653077
break;
30663078
}
30673079
case MIN:
30683080
{
3069-
mov_r_to_r_f32(a, reg_no_dst, reg_no1_src);
30703081
a.minss(regs_float[reg_no_dst], regs_float[reg_no2_src]);
30713082
break;
30723083
}
@@ -3076,6 +3087,10 @@ alu_r_r_to_r_f32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
30763087
return false;
30773088
}
30783089
}
3090+
3091+
if (store_result)
3092+
mov_r_to_r_f32(a, reg_no2_src, REG_F32_FREE_IDX);
3093+
30793094
return true;
30803095
}
30813096

@@ -3188,7 +3203,6 @@ alu_r_imm_to_r_f64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
31883203
mov_imm_to_m(a, cache, imm, 8);
31893204

31903205
mov_r_to_r_f64(a, reg_no_dst, reg_no1_src);
3191-
31923206
return alu_r_m_float(a, op, reg_no_dst, cache, false);
31933207
}
31943208

@@ -3207,40 +3221,52 @@ static bool
32073221
alu_r_r_to_r_f64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
32083222
int32 reg_no1_src, int32 reg_no2_src)
32093223
{
3224+
bool store_result = false;
3225+
3226+
/**
3227+
* - op r0,r0,r1. do nothing since instructions always store results in
3228+
* the first register
3229+
*
3230+
* - op r1,r0,r1. use FREE_REG to cache and replace r0, and then store
3231+
* results in r1
3232+
*
3233+
* - op r0,r1,r2. use r0 to cache and replace r1, and accept the result
3234+
* naturally
3235+
**/
3236+
if (reg_no_dst == reg_no2_src) {
3237+
store_result = true;
3238+
reg_no_dst = REG_F64_FREE_IDX;
3239+
}
3240+
mov_r_to_r_f64(a, reg_no_dst, reg_no1_src);
3241+
32103242
switch (op) {
32113243
case ADD:
32123244
{
3213-
mov_r_to_r_f64(a, reg_no_dst, reg_no1_src);
32143245
a.addsd(regs_float[reg_no_dst], regs_float[reg_no2_src]);
32153246
break;
32163247
}
32173248
case SUB:
32183249
{
3219-
mov_r_to_r_f64(a, reg_no_dst, reg_no1_src);
32203250
a.subsd(regs_float[reg_no_dst], regs_float[reg_no2_src]);
32213251
break;
32223252
}
32233253
case MUL:
32243254
{
3225-
mov_r_to_r_f64(a, reg_no_dst, reg_no1_src);
32263255
a.mulsd(regs_float[reg_no_dst], regs_float[reg_no2_src]);
32273256
break;
32283257
}
32293258
case DIV_S:
32303259
{
3231-
mov_r_to_r_f64(a, reg_no_dst, reg_no1_src);
32323260
a.divsd(regs_float[reg_no_dst], regs_float[reg_no2_src]);
32333261
break;
32343262
}
32353263
case MAX:
32363264
{
3237-
mov_r_to_r_f64(a, reg_no_dst, reg_no1_src);
32383265
a.maxsd(regs_float[reg_no_dst], regs_float[reg_no2_src]);
32393266
break;
32403267
}
32413268
case MIN:
32423269
{
3243-
mov_r_to_r_f64(a, reg_no_dst, reg_no1_src);
32443270
a.minsd(regs_float[reg_no_dst], regs_float[reg_no2_src]);
32453271
break;
32463272
}
@@ -3250,6 +3276,10 @@ alu_r_r_to_r_f64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
32503276
return false;
32513277
}
32523278
}
3279+
3280+
if (store_result)
3281+
mov_r_to_r_f64(a, reg_no2_src, REG_F64_FREE_IDX);
3282+
32533283
return true;
32543284
}
32553285

core/iwasm/fast-jit/fe/jit_emit_numberic.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1379,9 +1379,11 @@ compile_op_float_math(JitCompContext *cc, FloatMath math_op, bool is_f32)
13791379

13801380
switch (math_op) {
13811381
case FLOAT_ABS:
1382+
/* TODO: andps 0x7fffffffffffffff */
13821383
func = is_f32 ? (void *)fabsf : (void *)fabs;
13831384
break;
13841385
case FLOAT_NEG:
1386+
/* TODO: xorps 0x8000000000000000 */
13851387
func = is_f32 ? (void *)negf : (void *)neg;
13861388
break;
13871389
case FLOAT_CEIL:

0 commit comments

Comments
 (0)