Skip to content

Commit 7dcba89

Browse files
committed
[DYNAREC] Removed some bloated x87 code, improved test19 (backported from box64)
1 parent 5c5639b commit 7dcba89

16 files changed

+1046
-416
lines changed

src/dynarec/dynarec_arm_0f.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,7 @@ uintptr_t dynarec0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
475475
VMOVD(d1, v0);
476476
}
477477
VCMP_F32(d1*2, s0);
478-
FCOMI(x1, x2, 0, 0, d1*2, s0, 1);
478+
FCOMI(x1, x2);
479479
break;
480480

481481
case 0x31:

src/dynarec/dynarec_arm_660f.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ uintptr_t dynarec660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nins
282282
v0 = sse_get_reg(dyn, ninst, x1, gd, 0);
283283
GETEX(q0, 0);
284284
VCMP_F64(v0, q0);
285-
FCOMI(x1, x2, 0, 0, v0, q0, 0);
285+
FCOMI(x1, x2);
286286
break;
287287

288288
case 0x38: // SSSE3 opcodes

src/dynarec/dynarec_arm_d8.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
100100
} else {
101101
VCMP_F64(v1, v2);
102102
}
103-
FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
103+
FCOM(x1, x2);
104104
break;
105105
case 0xD8:
106106
case 0xD9:
@@ -118,7 +118,7 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
118118
} else {
119119
VCMP_F64(v1, v2);
120120
}
121-
FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
121+
FCOM(x1, x2);
122122
X87_POP_OR_FAIL(dyn, ninst, x3);
123123
break;
124124
case 0xE0:
@@ -275,7 +275,7 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
275275
VCVT_F64_F32(d1, s0);
276276
VCMP_F64(v1, d1);
277277
}
278-
FCOM(x1, x2, x3, x14, v1, ST_IS_F(0)?s0:d1, ST_IS_F(0));
278+
FCOM(x1, x2);
279279
break;
280280
case 3:
281281
INST_NAME("FCOMP ST0, float[ED]");
@@ -296,7 +296,7 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
296296
VCVT_F64_F32(d1, s0);
297297
VCMP_F64(v1, d1);
298298
}
299-
FCOM(x1, x2, x3, x14, v1, ST_IS_F(0)?s0:d1, ST_IS_F(0));
299+
FCOM(x1, x2);
300300
X87_POP_OR_FAIL(dyn, ninst, x3);
301301
break;
302302
case 4:

src/dynarec/dynarec_arm_d9.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
126126
} else {
127127
VCMP_F64_0(v1);
128128
}
129-
FCOM(x1, x2, 0, 0, v1, 0, ST_IS_F(0)); // same flags...
129+
FCOM(x1, x2); // same flags...
130130
break;
131131
case 0xE5:
132132
INST_NAME("FXAM");

src/dynarec/dynarec_arm_da.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
128128
} else {
129129
VCMP_F64(v1, v2);
130130
}
131-
FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
131+
FCOM(x1, x2);
132132
X87_POP_OR_FAIL(dyn, ninst, x3);
133133
X87_POP_OR_FAIL(dyn, ninst, x3);
134134
break;
@@ -185,7 +185,7 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
185185
VMOVtoV(s0, ed);
186186
VCVT_F64_S32(d0, s0);
187187
VCMP_F64(v1, d0);
188-
FCOM(x1, x2, x3, x14, v1, d0, 0);
188+
FCOM(x1, x2);
189189
break;
190190
case 3:
191191
INST_NAME("FICOMP ST0, Ed");
@@ -196,7 +196,7 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
196196
VMOVtoV(s0, ed);
197197
VCVT_F64_S32(d0, s0);
198198
VCMP_F64(v1, d0);
199-
FCOM(x1, x2, x3, x14, v1, d0, 0);
199+
FCOM(x1, x2);
200200
X87_POP_OR_FAIL(dyn, ninst, x3);
201201
break;
202202
case 4:

src/dynarec/dynarec_arm_db.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ uintptr_t dynarecDB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
149149
} else {
150150
VCMP_F64(v1, v2);
151151
}
152-
FCOMI(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
152+
FCOMI(x1, x2);
153153
break;
154154
case 0xF0:
155155
case 0xF1:
@@ -168,7 +168,7 @@ uintptr_t dynarecDB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
168168
} else {
169169
VCMP_F64(v1, v2);
170170
}
171-
FCOMI(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
171+
FCOMI(x1, x2);
172172
break;
173173

174174
case 0xE0:

src/dynarec/dynarec_arm_dc.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
9696
} else {
9797
VCMP_F64(v1, v2);
9898
}
99-
FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
99+
FCOM(x1, x2);
100100
break;
101101
case 0xD8:
102102
case 0xD9:
@@ -114,7 +114,7 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
114114
} else {
115115
VCMP_F64(v1, v2);
116116
}
117-
FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
117+
FCOM(x1, x2);
118118
X87_POP_OR_FAIL(dyn, ninst, x3);
119119
break;
120120
case 0xE0:
@@ -258,7 +258,7 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
258258
VMOVtoV_D(d1, x2, x3);
259259
}
260260
VCMP_F64(v1, d1);
261-
FCOM(x1, x2, x3, x14, v1, d1, 0);
261+
FCOM(x1, x2);
262262
break;
263263
case 3:
264264
INST_NAME("FCOMP ST0, double[ED]");
@@ -275,7 +275,7 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
275275
VMOVtoV_D(d1, x2, x3);
276276
}
277277
VCMP_F64(v1, d1);
278-
FCOM(x1, x2, x3, x14, v1, d1, 0);
278+
FCOM(x1, x2);
279279
X87_POP_OR_FAIL(dyn, ninst, x3);
280280
break;
281281
case 4:

src/dynarec/dynarec_arm_dd.c

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
3232
uint8_t ed;
3333
int v1, v2;
3434
int i1, i2, i3;
35+
int j32;
3536

3637
MAYUSE(v2);
3738
MAYUSE(v1);
@@ -111,7 +112,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
111112
} else {
112113
VCMP_F64(v1, v2);
113114
}
114-
FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
115+
FCOM(x1, x2);
115116
break;
116117
case 0xE8:
117118
case 0xE9:
@@ -129,7 +130,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
129130
} else {
130131
VCMP_F64(v1, v2);
131132
}
132-
FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
133+
FCOM(x1, x2);
133134
X87_POP_OR_FAIL(dyn, ninst, x3);
134135
break;
135136

@@ -178,10 +179,53 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
178179
break;
179180
case 1:
180181
INST_NAME("FISTTP i64, ST0");
181-
x87_forget(dyn, ninst, x1, x2, 0);
182+
parity = getedparity(dyn, ninst, addr, nextop, 3);
182183
addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, 0, NULL);
183-
if(ed!=x1) {MOV_REG(x1, ed);}
184-
CALL(arm_fistt64, -1, 0);
184+
if(ninst
185+
&& dyn->insts[ninst-1].x86.addr
186+
&& *(uint8_t*)dyn->insts[ninst-1].x86.addr==0xDF
187+
&& (((*(uint8_t*)(dyn->insts[ninst-1].x86.addr+1))>>3)&7)==5)
188+
{
189+
if(parity) {
190+
STRD_IMM8(x2, ed, 0); // x2/x3 is 64bits
191+
} else {
192+
STR_IMM9(x2, ed, 0);
193+
STR_IMM9(x3, ed, 4);
194+
}
195+
} else {
196+
v1 = x87_get_st(dyn, ninst, x2, x3, 0, NEON_CACHE_ST_D);
197+
//addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, 0, NULL);
198+
fpu_get_scratch_double(dyn); // to alocate v0
199+
v2 = fpu_get_scratch_double(dyn);
200+
// get TOP
201+
LDR_IMM9(x14, xEmu, offsetof(x86emu_t, top));
202+
int a = 0 - dyn->n.x87stack;
203+
if(a<0) {
204+
SUB_IMM8(x14, x14, -a);
205+
AND_IMM8(x14, x14, 7); // (emu->top + i)&7
206+
} else if(a>0) {
207+
ADD_IMM8(x14, x14, a);
208+
AND_IMM8(x14, x14, 7); // (emu->top + i)&7
209+
}
210+
ADD_REG_LSL_IMM5(x14, xEmu, x14, 4); // each fpu_ll is 2 int64: ref than ll
211+
MOVW(x2, offsetof(x86emu_t, fpu_ll)); //can be optimized?
212+
ADD_REG_LSL_IMM5(x14, x14, x2, offsetof(fpu_ll_t, sref));
213+
VLDR_64(v2, x14, 0);
214+
VCEQ_32(v2, v2, v1); // compare
215+
VMOVfrV_D(x2, x3, v2);
216+
ANDS_REG_LSL_IMM5(x2, x2, x3, 0); // if NE then values are the same!
217+
B_MARK(cEQ); // do the i64 conversion
218+
// memcpy(ed, &STll(0).ll, sizeof(int64_t));
219+
LDRD_IMM8(x2, x14, offsetof(fpu_ll_t, sq)); // load ll
220+
B_MARK3(c__);
221+
MARK;
222+
MOV32(x2, arm_fist64_3);
223+
VMOV_64(0, v1); // prepare call to log2
224+
CALL_1DR_U64(x2, x2, x3, x14, (1<<x1));
225+
MARK3;
226+
STR_IMM9(x2, ed, 0);
227+
STR_IMM9(x3, ed, 4);
228+
}
185229
X87_POP_OR_FAIL(dyn, ninst, x3);
186230
break;
187231
case 2:

src/dynarec/dynarec_arm_de.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
9494
} else {
9595
VCMP_F64(v1, v2);
9696
}
97-
FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
97+
FCOM(x1, x2);
9898
X87_POP_OR_FAIL(dyn, ninst, x3);
9999
break;
100100

@@ -107,7 +107,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
107107
} else {
108108
VCMP_F64(v1, v2);
109109
}
110-
FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
110+
FCOM(x1, x2);
111111
X87_POP_OR_FAIL(dyn, ninst, x3);
112112
X87_POP_OR_FAIL(dyn, ninst, x3);
113113
break;

src/dynarec/dynarec_arm_df.c

Lines changed: 4 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
9090
} else {
9191
VCMP_F64(v1, v2);
9292
}
93-
FCOMI(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
93+
FCOMI(x1, x2);
9494
X87_POP_OR_FAIL(dyn, ninst, x3);
9595
break;
9696
case 0xF0:
@@ -111,7 +111,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
111111
} else {
112112
VCMP_F64(v1, v2);
113113
}
114-
FCOMI(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
114+
FCOMI(x1, x2);
115115
X87_POP_OR_FAIL(dyn, ninst, x3);
116116
break;
117117

@@ -186,9 +186,9 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
186186
BIC_IMM8(x3, x3, 0b10011111, 0);
187187
VMSR(x3);
188188
if(ST_IS_F(0)) {
189-
VCVTR_S32_F32(s0, v1);
189+
VCVT_S32_F32(s0, v1);
190190
} else {
191-
VCVTR_S32_F64(s0, v1);
191+
VCVT_S32_F64(s0, v1);
192192
}
193193
VMRS(x3); // get the FPCSR reg and test FPU execption (invalid operation only)
194194
VMOVfrV(ed, s0);
@@ -337,12 +337,10 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
337337
STR_IMM9(x3, ed, 4);
338338
}
339339
} else {
340-
#if 1
341340
v1 = x87_get_st(dyn, ninst, x2, x3, 0, NEON_CACHE_ST_D);
342341
//addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, 0, NULL);
343342
fpu_get_scratch_double(dyn); // to alocate v0
344343
v2 = fpu_get_scratch_double(dyn);
345-
#if 1
346344
// get TOP
347345
LDR_IMM9(x14, xEmu, offsetof(x86emu_t, top));
348346
int a = 0 - dyn->n.x87stack;
@@ -375,74 +373,6 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
375373
MARK3;
376374
STR_IMM9(x2, ed, 0);
377375
STR_IMM9(x3, ed, 4);
378-
#else
379-
v0 = fpu_get_scratch_double(dyn);
380-
s0 = fpu_get_scratch_single(dyn);
381-
// check STll(0).ref==ST(0).q so emu->fpu_ll[emu->top].ref == emu->mmx87[emu->top]
382-
// get TOP
383-
LDR_IMM9(x14, xEmu, offsetof(x86emu_t, top));
384-
int a = 0 - dyn->n.x87stack;
385-
if(a<0) {
386-
SUB_IMM8(x14, x14, -a);
387-
AND_IMM8(x14, x14, 7); // (emu->top + i)&7
388-
} else if(a>0) {
389-
ADD_IMM8(x14, x14, a);
390-
AND_IMM8(x14, x14, 7); // (emu->top + i)&7
391-
}
392-
ADD_REG_LSL_IMM5(x14, xEmu, x14, 4); // each fpu_ll is 2 int64: ref than ll
393-
MOVW(x2, offsetof(x86emu_t, fpu_ll)); //can be optimized?
394-
ADD_REG_LSL_IMM5(x14, x14, x2, 0);
395-
VLDR_64(v2, x14, 0);
396-
VCEQ_32(v2, v2, v1); // compare
397-
VMOVfrV_D(x2, x3, v2);
398-
ANDS_REG_LSL_IMM5(x2, x2, x3, 0); // if NE then values are the same!
399-
B_MARK(cEQ); // do the i64 conversion
400-
// memcpy(ed, &STll(0).ll, sizeof(int64_t));
401-
LDRD_IMM8(x2, x14, 8); // load ll
402-
B_MARK3(c__);
403-
MARK;
404-
VEOR(v0, v0, v0);
405-
MOVW(x2, 0x41F0);
406-
VMOVtoDx_16(v0, 3, x2); // V0 = (1<<32) as double
407-
VMOVfrDx_32(x14, v1, 1); // get high part to extract sign
408-
VABS_F64(v1 ,v1); //ST0 will be poped, so lost...
409-
VDIV_F64(v2, v1, v0); // v2 = abs(ST0)/(1<<32) : so 32 bits high part
410-
MSR_nzcvq_0();
411-
VMRS(x2); // get fpscr
412-
ORR_IMM8(x3, x2, 0b010, 9); // enable exceptions
413-
BIC_IMM8(x3, x3, 0b10011111, 0);
414-
VMSR(x3);
415-
VCVT_U32_F64(s0, v2); // convert high part to U32
416-
VMRS(x3); // get the FPCSR reg and test FPU execption (invalid operation only)
417-
VMSR(x2); // put back fpscr
418-
TSTS_IMM8_ROR(x3, 0b00000001, 0);
419-
B_MARK2(cEQ); // not overflow...
420-
MARKLOCK;
421-
MOV_IMM(x3, 0b10, 1); // 0x80000000
422-
MOVW(x2, 0);
423-
B_MARK3(c__);
424-
MARK2; // continue conversion, it fits an int64!
425-
VCVT_F64_U32(v2, s0); // int part now
426-
VMLS_F64(v1, v2, v0); // compute low part
427-
VMOVfrV(x3, s0); // transfert high path
428-
TSTS_IMM8_ROR(x14, 0b10, 1); // test high part with 0x800000000
429-
B_MARKLOCK(cNE); // int overflow...
430-
VCVT_U32_F64(s0, v1); // convert low part
431-
VMOVfrV(x2, s0); // transfert low part
432-
TSTS_IMM8_ROR(x14, 0b10, 1); // 0x800000000
433-
B_MARK3(cEQ);
434-
RSBS_IMM8(x2, x2, 0); // NEG(i64)
435-
RSC_IMM8(x3, x3, 0);
436-
MARK3;
437-
STR_IMM9(x2, x1, 0);
438-
STR_IMM9(x3, x1, 4);
439-
#endif
440-
#else
441-
MESSAGE(LOG_DUMP, "Need Optimization\n");
442-
x87_forget(dyn, ninst, x2, x3, 0);
443-
if(ed!=x1) {MOV_REG(x1, ed);}
444-
CALL(arm_fistp64, -1, 0);
445-
#endif
446376
}
447377
X87_POP_OR_FAIL(dyn, ninst, x3);
448378
break;

0 commit comments

Comments
 (0)