@@ -82,6 +82,9 @@ x86::Gp regs_i64[] = {
8282 x86::r12, x86::r13, x86::r14, x86::r15,
8383};
8484
85+ #define REG_F32_FREE_IDX 15
86+ #define REG_F64_FREE_IDX 15
87+
8588x86::Xmm regs_float[] = {
8689 x86::xmm0,
8790 x86::xmm1,
@@ -349,6 +352,8 @@ cmp_r_and_jmp_label(JitCompContext *cc, x86::Assembler &a,
349352
350353 bool fp_cmp = cc->last_cmp_on_fp ;
351354
355+ bh_assert (!fp_cmp || (fp_cmp && (op == GES)));
356+
352357 switch (op) {
353358 case EQ:
354359 {
@@ -362,60 +367,52 @@ cmp_r_and_jmp_label(JitCompContext *cc, x86::Assembler &a,
362367 }
363368 case GTS:
364369 {
365- if (fp_cmp) {
366- a.ja (imm);
367- }
368- else {
369- a.jg (imm);
370- }
370+ a.jg (imm);
371371 break ;
372372 }
373373 case LES:
374374 {
375- if (fp_cmp) {
376- a.jnb (imm);
377- }
378- else {
379- a.jng (imm);
380- }
375+ a.jng (imm);
381376 break ;
382377 }
383378 case GES:
384379 {
385- if (fp_cmp) {
386- a.jnb (imm);
387- }
388- else {
389-
380+ if (fp_cmp)
381+ a.jae (imm);
382+ else
390383 a.jnl (imm);
391- }
392384 break ;
393385 }
394386 case LTS:
395387 {
396- if (fp_cmp) {
397- a.ja (imm);
398- }
399- else {
400- a.jl (imm);
401- }
388+ a.jl (imm);
402389 break ;
403390 }
404391 case GTU:
392+ {
405393 a.ja (imm);
406394 break ;
395+ }
407396 case LEU:
397+ {
408398 a.jna (imm);
409399 break ;
400+ }
410401 case GEU:
402+ {
411403 a.jnb (imm);
412404 break ;
405+ }
413406 case LTU:
407+ {
414408 a.jb (imm);
415409 break ;
410+ }
416411 default :
412+ {
417413 bh_assert (0 );
418414 break ;
415+ }
419416 }
420417
421418 if (r2) {
@@ -761,10 +758,17 @@ static bool
761758mov_imm_to_m (x86::Assembler &a, x86::Mem &m_dst, Imm imm_src, uint32 bytes_dst)
762759{
763760 if (bytes_dst == 8 ) {
764- /* As there is no instruction `MOV m64, imm64`, we use
765- two instructions to implement it */
766- a.mov (regs_i64[REG_I64_FREE_IDX], imm_src);
767- a.mov (m_dst, regs_i64[REG_I64_FREE_IDX]);
761+ int64 value = imm_src.value ();
762+ if (value >= INT32_MIN && value <= INT32_MAX) {
763+ imm_src.setValue ((int32)value);
764+ a.mov (m_dst, imm_src);
765+ }
766+ else {
767+ /* There is no instruction `MOV m64, imm64`, we use
768+ two instructions to implement it */
769+ a.mov (regs_i64[REG_I64_FREE_IDX], imm_src);
770+ a.mov (m_dst, regs_i64[REG_I64_FREE_IDX]);
771+ }
768772 }
769773 else
770774 a.mov (m_dst, imm_src);
@@ -4220,17 +4224,8 @@ static bool
42204224cmp_imm_r_to_r_f32 (x86::Assembler &a, int32 reg_no_dst, float data1_src,
42214225 int32 reg_no2_src)
42224226{
4223- const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info ();
4224- /* xmm -> m128 */
4225- x86::Mem cache = x86::xmmword_ptr (regs_i64[hreg_info->exec_env_hreg_index ],
4226- offsetof (WASMExecEnv, jit_cache));
4227- a.movups (cache, regs_float[reg_no2_src]);
4228-
4229- /* imm -> gp -> xmm */
4230- mov_imm_to_r_f32 (a, reg_no2_src, data1_src);
4231-
4232- /* comiss xmm m32 */
4233- a.comiss (regs_float[reg_no2_src], cache);
4227+ mov_imm_to_r_f32 (a, REG_F32_FREE_IDX, data1_src);
4228+ a.comiss (regs_float[REG_F32_FREE_IDX], regs_float[reg_no2_src]);
42344229 return true ;
42354230}
42364231
@@ -4249,15 +4244,8 @@ static bool
42494244cmp_r_imm_to_r_f32 (x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
42504245 float data2_src)
42514246{
4252- const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info ();
4253- /* imm -> m32 */
4254- x86::Mem cache = x86::dword_ptr (regs_i64[hreg_info->exec_env_hreg_index ],
4255- offsetof (WASMExecEnv, jit_cache));
4256- Imm imm (*(uint32 *)&data2_src);
4257- mov_imm_to_m (a, cache, imm, 4 );
4258-
4259- /* comiss xmm m32 */
4260- a.comiss (regs_float[reg_no1_src], cache);
4247+ mov_imm_to_r_f32 (a, REG_F32_FREE_IDX, data2_src);
4248+ a.comiss (regs_float[reg_no1_src], regs_float[REG_F32_FREE_IDX]);
42614249 return true ;
42624250}
42634251
@@ -4315,17 +4303,8 @@ static bool
43154303cmp_imm_r_to_r_f64 (x86::Assembler &a, int32 reg_no_dst, double data1_src,
43164304 int32 reg_no2_src)
43174305{
4318- const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info ();
4319- /* xmm -> m128 */
4320- x86::Mem cache = x86::qword_ptr (regs_i64[hreg_info->exec_env_hreg_index ],
4321- offsetof (WASMExecEnv, jit_cache));
4322- a.movupd (cache, regs_float[reg_no2_src]);
4323-
4324- /* imm -> gp -> xmm */
4325- mov_imm_to_r_f64 (a, reg_no2_src, data1_src);
4326-
4327- /* comiss xmm m64 */
4328- a.comisd (regs_float[reg_no2_src], cache);
4306+ mov_imm_to_r_f64 (a, REG_F64_FREE_IDX, data1_src);
4307+ a.comisd (regs_float[REG_F64_FREE_IDX], regs_float[reg_no2_src]);
43294308 return true ;
43304309}
43314310
@@ -4344,15 +4323,8 @@ static bool
43444323cmp_r_imm_to_r_f64 (x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
43454324 double data2_src)
43464325{
4347- const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info ();
4348- /* imm -> m64 */
4349- x86::Mem cache = x86::qword_ptr (regs_i64[hreg_info->exec_env_hreg_index ],
4350- offsetof (WASMExecEnv, jit_cache));
4351- Imm imm (*(uint64 *)&data2_src);
4352- mov_imm_to_m (a, cache, imm, 8 );
4353-
4354- /* comisd xmm m64 */
4355- a.comisd (regs_float[reg_no1_src], cache);
4326+ mov_imm_to_r_f64 (a, REG_F64_FREE_IDX, data2_src);
4327+ a.comisd (regs_float[reg_no1_src], regs_float[REG_F64_FREE_IDX]);
43564328 return true ;
43574329}
43584330
@@ -5071,13 +5043,19 @@ cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, int32 reg_no,
50715043 + a.code ()->sectionById (0 )->buffer ().size ();
50725044 bool fp_cmp = cc->last_cmp_on_fp ;
50735045
5046+ bh_assert (!fp_cmp || (fp_cmp && (op == GTS || op == GES)));
5047+
50745048 switch (op) {
50755049 case EQ:
5050+ {
50765051 a.je (target);
50775052 break ;
5053+ }
50785054 case NE:
5055+ {
50795056 a.jne (target);
50805057 break ;
5058+ }
50815059 case GTS:
50825060 {
50835061 if (fp_cmp) {
@@ -5090,18 +5068,13 @@ cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, int32 reg_no,
50905068 }
50915069 case LES:
50925070 {
5093- if (fp_cmp) {
5094- a.jnb (target);
5095- }
5096- else {
5097- a.jng (target);
5098- }
5071+ a.jng (target);
50995072 break ;
51005073 }
51015074 case GES:
51025075 {
51035076 if (fp_cmp) {
5104- a.jnb (target);
5077+ a.jae (target);
51055078 }
51065079 else {
51075080 a.jnl (target);
@@ -5110,29 +5083,34 @@ cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, int32 reg_no,
51105083 }
51115084 case LTS:
51125085 {
5113- if (fp_cmp) {
5114- a.ja (target);
5115- }
5116- else {
5117- a.jl (target);
5118- }
5086+ a.jl (target);
51195087 break ;
51205088 }
51215089 case GTU:
5090+ {
51225091 a.ja (target);
51235092 break ;
5093+ }
51245094 case LEU:
5095+ {
51255096 a.jna (target);
51265097 break ;
5098+ }
51275099 case GEU:
5100+ {
51285101 a.jae (target);
51295102 break ;
5103+ }
51305104 case LTU:
5105+ {
51315106 a.jb (target);
51325107 break ;
5108+ }
51335109 default :
5110+ {
51345111 bh_assert (0 );
51355112 break ;
5113+ }
51365114 }
51375115
51385116 /* The offset written by asmjit is always 0, we patch it again */
@@ -5174,10 +5152,13 @@ lower_select(JitCompContext *cc, x86::Assembler &a, COND_OP op, JitReg r0,
51745152 CHECK_NCONST (r1);
51755153 CHECK_KIND (r1, JIT_REG_KIND_I32);
51765154
5177- if (r0 == r3 && r0 != r2) {
5155+ if (r0 == r3 && r0 != r2 && !cc-> last_cmp_on_fp ) {
51785156 JitReg r_tmp;
51795157
5180- /* Exchange r2, r3*/
5158+ /* For i32/i64, exchange r2 and r3 to make r0 equal to r2,
5159+ so as to decrease possible execution instructions.
5160+ For f32/f64 comparison, should not change the order as
5161+ the result of comparison with NaN may be different. */
51815162 r_tmp = r2;
51825163 r2 = r3;
51835164 r3 = r_tmp;
@@ -5258,7 +5239,8 @@ lower_branch(JitCompContext *cc, x86::Assembler &a, bh_list *jmp_info_list,
52585239
52595240 label_dst = jit_reg_no (r1);
52605241 if (label_dst < (int32)jit_cc_label_num (cc) - 1 && is_last_insn
5261- && label_is_neighboring (cc, label_src, label_dst)) {
5242+ && label_is_neighboring (cc, label_src, label_dst)
5243+ && !cc->last_cmp_on_fp ) {
52625244 JitReg r_tmp;
52635245
52645246 r_tmp = r1;
@@ -6555,20 +6537,20 @@ static uint8 hreg_info_F32[3][16] = {
65556537 { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
65566538 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 },
65576539 { 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
6558- 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 }, /* caller_saved_native */
6540+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 }, /* caller_saved_native */
65596541 { 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
6560- 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 }, /* caller_saved_jitted */
6542+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 }, /* caller_saved_jitted */
65616543};
65626544
65636545/* System V AMD64 ABI Calling Conversion. [XYZ]MM0-7 */
65646546static uint8 hreg_info_F64[3 ][16 ] = {
65656547 /* xmm0 ~ xmm15 */
65666548 { 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
6567- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 },
6549+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 },
65686550 { 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
6569- 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 }, /* caller_saved_native */
6551+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 }, /* caller_saved_native */
65706552 { 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
6571- 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 }, /* caller_saved_jitted */
6553+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 }, /* caller_saved_jitted */
65726554};
65736555
65746556static const JitHardRegInfo hreg_info = {
0 commit comments