diff --git a/src/dynarec/dynarec_native_functions.c b/src/dynarec/dynarec_native_functions.c index 369d1e1fc..a6ee64aed 100644 --- a/src/dynarec/dynarec_native_functions.c +++ b/src/dynarec/dynarec_native_functions.c @@ -720,7 +720,7 @@ static int flagsCacheNeedsTransform(dynarec_native_t* dyn, int ninst) { int jmp = dyn->insts[ninst].x64.jmp_insts; if(jmp<0) return 0; - #ifdef ARM64 + #if defined(ARM64) || defined(LA64) // df_none is now a defered information if(dyn->insts[ninst].f_exit==dyn->insts[jmp].f_entry) return 0; diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c index 5897473ae..aa1cc8461 100644 --- a/src/dynarec/dynarec_native_pass.c +++ b/src/dynarec/dynarec_native_pass.c @@ -44,7 +44,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int int rep = 0; // 0 none, 1=F2 prefix, 2=F3 prefix int need_epilog = 1; // Clean up (because there are multiple passes) - #ifdef ARM64 + #if defined(ARM64) || defined(LA64) dyn->f = status_unk; #else dyn->f.pending = 0; @@ -111,7 +111,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int dyn->last_ip = 0; if(reset_n==-2) { MESSAGE(LOG_DEBUG, "Reset Caches to zero\n"); - #ifdef ARM64 + #if defined(ARM64) || defined(LA64) dyn->f = status_unk; #else dyn->f.dfnone = 0; @@ -128,7 +128,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int } if(dyn->insts[ninst].x64.barrier&BARRIER_FLAGS) { MESSAGE(LOG_DEBUG, "Apply Barrier Flags\n"); - #ifdef ARM64 + #if defined(ARM64) || defined(LA64) dyn->f = status_unk; #else dyn->f.dfnone = 0; @@ -262,7 +262,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int #endif } if(dyn->insts[next].x64.barrier&BARRIER_FLAGS) { - #ifdef ARM64 + #if defined(ARM64) || defined(LA64) dyn->f = status_unk; #else dyn->f.pending = 0; @@ -288,7 +288,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int // we use the 1st predecessor here if((ninst+1)size && !dyn->insts[ninst+1].x64.alive) { // reset fpu value... - #ifdef ARM64 + #if defined(ARM64) || defined(LA64) dyn->f = status_unk; #else dyn->f.dfnone = 0; diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index 33549cfee..041609385 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -2679,7 +2679,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xCF: INST_NAME("IRET"); - SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Not a hack, EFLAGS are restored + SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION); // Not a hack, EFLAGS are restored BARRIER(BARRIER_FLOAT); iret_to_epilog(dyn, ip, ninst, rex.w); *need_epilog = 0; diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index ef34e23d4..ad60acf37 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -417,8 +417,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("UCOMISS Gx, Ex"); } - SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); - SET_DFNONE(); + SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION); nextop = F8; GETGX(d0, 0); GETEXSS(v0, 0, 0); diff --git a/src/dynarec/la64/dynarec_la64_functions.c b/src/dynarec/la64/dynarec_la64_functions.c index d1a6bc116..db884437b 100644 --- a/src/dynarec/la64/dynarec_la64_functions.c +++ b/src/dynarec/la64/dynarec_la64_functions.c @@ -599,17 +599,20 @@ static const char* Ft[] = { "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa static const char* Vt[] = { "vra0", "vra1", "vra2", "vra3", "vra4", "vra5", "vra6", "vra7", "vrt0", "vrt1", "vrt2", "vrt3", "vrt4", "vrt5", "vrt6", "vrt7", "vrt8", "vrt9", "vrt10", "vrt11", "vrt12", "vrt13", "vrt14", "vrt15", "vrs0", "vrs1", "vrs2", "vrs3", "vrs4", "vrs5", "vrs6", "vrs7" }; static const char* XVt[] = { "xvra0", "xvra1", "xvra2", "xvra3", "xvra4", "xvra5", "xvra6", "xvra7", "xvrt0", "xvrt1", "xvrt2", "xvrt3", "xvrt4", "xvrt5", "xvrt6", "xvrt7", "xvrt8", "xvrt9", "xvrt10", "xvrt11", "xvrt12", "xvrt13", "xvrt14", "xvrt15", "xvrs0", "xvrs1", "xvrs2", "xvrs3", "xvrs4", "xvrs5", "xvrs6", "xvrs7" }; +static const char* df_status[] = { "unknown", "set", "none_pending", "none" }; + void printf_x64_instruction(dynarec_native_t* dyn, zydis_dec_t* dec, instruction_x64_t* inst, const char* name); void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t rex) { if (!dyn->need_dump && !BOX64ENV(dynarec_gdbjit) && !BOX64ENV(dynarec_perf_map)) return; static char buf[4096]; - int length = sprintf(buf, "barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, fuse=%d/%d, sm=%d(%d/%d)", + int length = sprintf(buf, "barrier=%d state=%d/%s(%s->%s), %s=%X/%X, use=%X, need=%X/%X, fuse=%d/%d, sm=%d(%d/%d)", dyn->insts[ninst].x64.barrier, dyn->insts[ninst].x64.state_flags, - dyn->f.pending, - dyn->f.dfnone, + df_status[dyn->f], + df_status[dyn->insts[ninst].f_entry], + df_status[dyn->insts[ninst].f_exit], dyn->insts[ninst].x64.may_set ? "may" : "set", dyn->insts[ninst].x64.set_flags, dyn->insts[ninst].x64.gen_flags, diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c index bded34a03..da3cd510c 100644 --- a/src/dynarec/la64/dynarec_la64_helper.c +++ b/src/dynarec/la64/dynarec_la64_helper.c @@ -349,6 +349,7 @@ void jump_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst) } TABLE64C(x2, const_epilog); SMEND(); + CHECK_DFNONE(0); BR(x2); } @@ -368,6 +369,7 @@ void jump_to_epilog_fast(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst) } TABLE64C(x2, const_epilog_fast); SMEND(); + CHECK_DFNONE(0); BR(x2); } @@ -407,6 +409,7 @@ void jump_to_next(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst, int is3 if (is32bits) ip &= 0xffffffffLL; + CHECK_DFNONE(0); int dest; if (reg) { if (reg != xRIP) { @@ -441,6 +444,7 @@ void ret_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int ninst, rex_t rex) MAYUSE(dyn); MAYUSE(ninst); MESSAGE(LOG_DUMP, "Ret to epilog\n"); + CHECK_DFNONE(0); POP1z(xRIP); MVz(x1, xRIP); SMEND(); @@ -465,6 +469,7 @@ void retn_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int ninst, rex_t rex, int MAYUSE(dyn); MAYUSE(ninst); MESSAGE(LOG_DUMP, "Retn to epilog\n"); + CHECK_DFNONE(0); POP1z(xRIP); if (n > 0x7ff) { MOV64x(x1, n); @@ -513,7 +518,7 @@ void iret_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int ninst, int is64bits) AND(xFlags, xFlags, x1); ORI(xFlags, xFlags, 0x2); SPILL_EFLAGS(); - SET_DFNONE(); + FORCE_DFNONE(); // POP RSP if (is64bits) { POP1(x3); // rsp @@ -540,6 +545,7 @@ void iret_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int ninst, int is64bits) void call_c(dynarec_la64_t* dyn, int ninst, la64_consts_t fnc, int reg, int ret, int saveflags, int savereg, int arg1, int arg2, int arg3, int arg4, int arg5, int arg6) { MAYUSE(fnc); + CHECK_DFNONE(1); if (savereg == 0) savereg = x87pc; if (saveflags) { @@ -608,6 +614,7 @@ void call_c(dynarec_la64_t* dyn, int ninst, la64_consts_t fnc, int reg, int ret, void call_n(dynarec_la64_t* dyn, int ninst, void* fnc, int w) { MAYUSE(fnc); + CHECK_DFNONE(1); fpu_pushcache(dyn, ninst, x3, 1); ST_D(xRSP, xEmu, offsetof(x64emu_t, regs[_SP])); ST_D(xRBP, xEmu, offsetof(x64emu_t, regs[_BP])); @@ -2269,24 +2276,36 @@ static void flagsCacheTransform(dynarec_la64_t* dyn, int ninst, int s1) int jmp = dyn->insts[ninst].x64.jmp_insts; if (jmp < 0) return; - if (dyn->f.dfnone || ((dyn->insts[jmp].f_exit.dfnone && !dyn->insts[jmp].f_entry.dfnone) && !dyn->insts[jmp].x64.use_flags)) // flags are fully known, nothing we can do more + if (dyn->insts[jmp].f_exit == dyn->insts[jmp].f_entry) // flags will be fully known, nothing we can do more return; MESSAGE(LOG_DUMP, "\tFlags fetch ---- ninst=%d -> %d\n", ninst, jmp); - int go = (dyn->insts[jmp].f_entry.dfnone && !dyn->f.dfnone && !dyn->insts[jmp].df_notneeded) ? 1 : 0; - switch (dyn->insts[jmp].f_entry.pending) { - case SF_UNKNOWN: - go = 0; + int go_fetch = 0; + switch (dyn->insts[jmp].f_entry) { + case status_unk: + if (dyn->insts[ninst].f_exit == status_none_pending) { + FORCE_DFNONE(); + } break; - default: - if (go && !(dyn->insts[jmp].x64.need_before & X_PEND) && (dyn->f.pending != SF_UNKNOWN)) { - // just clear df flags - go = 0; - ST_W(xZR, xEmu, offsetof(x64emu_t, df)); + case status_set: + if (dyn->insts[ninst].f_exit == status_none_pending) { + FORCE_DFNONE(); } + if (dyn->insts[ninst].f_exit == status_unk) + go_fetch = 1; + break; + case status_none_pending: + if (dyn->insts[ninst].f_exit != status_none) + go_fetch = 1; + break; + case status_none: + if (dyn->insts[ninst].f_exit == status_none_pending) { + FORCE_DFNONE(); + } else + go_fetch = 1; break; } - if (go) { - if (dyn->f.pending != SF_PENDING) { + if (go_fetch) { + if (dyn->f == status_unk) { LD_WU(s1, xEmu, offsetof(x64emu_t, df)); j64 = (GETMARKF2) - (dyn->native_size); BEQZ(s1, j64); @@ -2294,6 +2313,7 @@ static void flagsCacheTransform(dynarec_la64_t* dyn, int ninst, int s1) CALL_(const_updateflags, -1, 0, 0, 0); MARKF2; } + MESSAGE(LOG_DUMP, "\t---- Flags fetch\n"); } void CacheTransform(dynarec_la64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3) diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index 0d713ddfb..9eddcadd5 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -831,36 +831,29 @@ LOAD_REG(R15); #define FORCE_DFNONE() ST_W(xZR, xEmu, offsetof(x64emu_t, df)) +#define CHECK_DFNONE(N) \ + do { \ + if (dyn->f == status_none_pending) { \ + FORCE_DFNONE(); \ + if (N) dyn->f = status_none; \ + } \ + } while (0) -#define SET_DFNONE() \ - do { \ - if (!dyn->f.dfnone) { \ - FORCE_DFNONE(); \ - } \ - if (!dyn->insts[ninst].x64.may_set) { \ - dyn->f.dfnone = 1; \ - } \ +#define SET_DFNONE() \ + do { \ + if (!dyn->insts[ninst].x64.may_set && (dyn->f != status_none)) { \ + dyn->f = status_none_pending; \ + } \ } while (0) -#define SET_DF(S, N) \ - if ((N) != d_none) { \ - MOV32w(S, (N)); \ - ST_W(S, xEmu, offsetof(x64emu_t, df)); \ - if (dyn->f.pending == SF_PENDING \ - && dyn->insts[ninst].x64.need_after \ - && !(dyn->insts[ninst].x64.need_after & X_PEND)) { \ - CALL_(const_updateflags, -1, 0, 0, 0); \ - dyn->f.pending = SF_SET; \ - SET_NODF(); \ - } \ - dyn->f.dfnone = 0; \ - } else \ +#define SET_DF(S, N) \ + if ((N) != d_none) { \ + MOV32w(S, (N)); \ + ST_W(S, xEmu, offsetof(x64emu_t, df)); \ + dyn->f = status_set; \ + } else \ SET_DFNONE() -#define SET_NODF() dyn->f.dfnone = 0 -#define SET_DFOK() \ - dyn->f.dfnone = 1 - #define CLEAR_FLAGS_(s) \ MOV64x(s, (1UL << F_AF) | (1UL << F_CF) | (1UL << F_OF) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF)); \ ANDN(xFlags, xFlags, s); @@ -942,18 +935,11 @@ #endif #ifndef READFLAGS -#define READFLAGS(A) \ - if (((A) != X_PEND && dyn->f.pending != SF_SET) \ - && (dyn->f.pending != SF_SET_PENDING)) { \ - if (dyn->f.pending != SF_PENDING) { \ - LD_WU(x3, xEmu, offsetof(x64emu_t, df)); \ - j64 = (GETMARKF) - (dyn->native_size); \ - BEQ(x3, xZR, j64); \ - } \ - CALL_(const_updateflags, -1, 0, 0, 0); \ - MARKF; \ - dyn->f.pending = SF_SET; \ - SET_DFOK(); \ +#define READFLAGS(A) \ + if ((A) != X_PEND \ + && (dyn->f == status_unk)) { \ + CALL_(const_updateflags, -1, 0, 0, 0); \ + dyn->f = status_none; \ } #endif @@ -987,32 +973,28 @@ #define NAT_FLAGS_ENABLE_SIGN() dyn->insts[ninst].nat_flags_sign = 1 #ifndef SETFLAGS -#define SETFLAGS(A, B, FUSION) \ - if (dyn->f.pending != SF_SET \ - && ((B) & SF_SUB) \ - && (dyn->insts[ninst].x64.gen_flags & (~(A)))) \ - READFLAGS(((dyn->insts[ninst].x64.gen_flags & X_PEND) ? X_ALL : dyn->insts[ninst].x64.gen_flags) & (~(A))); \ - if (dyn->insts[ninst].x64.gen_flags) switch (B) { \ - case SF_SUBSET: \ - case SF_SET: dyn->f.pending = SF_SET; break; \ - case SF_SET_DF: \ - dyn->f.pending = SF_SET; \ - dyn->f.dfnone = 1; \ - break; \ - case SF_SET_NODF: \ - dyn->f.pending = SF_SET; \ - dyn->f.dfnone = 0; \ - break; \ - case SF_PENDING: dyn->f.pending = SF_PENDING; break; \ - case SF_SUBSET_PENDING: \ - case SF_SET_PENDING: \ - dyn->f.pending = (dyn->insts[ninst].x64.gen_flags & X_PEND) ? SF_SET_PENDING : SF_SET; \ - break; \ - } \ - else \ - dyn->f.pending = SF_SET; \ - dyn->insts[ninst].nat_flags_nofusion = (FUSION) +#define SETFLAGS(A, B, FUSION) \ + do { \ + if (((B) & SF_SUB) \ + && (dyn->insts[ninst].x64.gen_flags & (~(A)))) \ + READFLAGS(((dyn->insts[ninst].x64.gen_flags & X_PEND) ? X_ALL : dyn->insts[ninst].x64.gen_flags) & (~(A))); \ + if (dyn->insts[ninst].x64.gen_flags) switch (B) { \ + case SF_SET_DF: dyn->f = status_set; break; \ + case SF_SET_NODF: SET_DFNONE(); break; \ + case SF_SUBSET: \ + case SF_SUBSET_PENDING: \ + case SF_SET: \ + case SF_PENDING: \ + case SF_SET_PENDING: \ + SET_DFNONE(); \ + break; \ + } \ + else \ + SET_DFNONE(); \ + dyn->insts[ninst].nat_flags_nofusion = (FUSION); \ + } while (0) #endif + #ifndef JUMP #define JUMP(A, C) #endif diff --git a/src/dynarec/la64/dynarec_la64_pass0.h b/src/dynarec/la64/dynarec_la64_pass0.h index b0ad3f316..75d56900b 100644 --- a/src/dynarec/la64/dynarec_la64_pass0.h +++ b/src/dynarec/la64/dynarec_la64_pass0.h @@ -6,10 +6,13 @@ #define MESSAGE(A, ...) do {} while (0) #define MAYSETFLAGS() dyn->insts[ninst].x64.may_set = 1 -#define READFLAGS(A) \ - dyn->insts[ninst].x64.use_flags = A; \ - dyn->f.dfnone = 1; \ - dyn->f.pending = SF_SET +#define READFLAGS(A) \ + do { \ + dyn->insts[ninst].x64.use_flags = A; \ + if (dyn->f != status_none_pending) dyn->f = status_none; \ + if (!BOX64ENV(dynarec_df) && (A) & X_PEND) dyn->insts[ninst].x64.use_flags = X_ALL; \ + dyn->f = status_none; \ + } while (0) #define READFLAGS_FUSION(A, s1, s2, s3, s4, s5) \ if (BOX64ENV(dynarec_nativeflags) && ninst > 0) { \ @@ -27,12 +30,20 @@ } \ READFLAGS(A); -#define SETFLAGS(A, B, FUSION) \ - dyn->insts[ninst].x64.set_flags = A; \ - dyn->insts[ninst].x64.state_flags = (B) & ~SF_DF; \ - dyn->f.pending = (B) & SF_SET_PENDING; \ - dyn->f.dfnone = ((B) & SF_SET) ? (((B) == SF_SET_NODF) ? 0 : 1) : 0; \ - dyn->insts[ninst].nat_flags_nofusion = (FUSION) +#define SETFLAGS(A, B, FUSION) \ + do { \ + dyn->insts[ninst].x64.set_flags = A; \ + dyn->insts[ninst].x64.state_flags = (B) & ~SF_DF; \ + dyn->f = ((B) & SF_SET) ? (((B) == SF_SET_NODF) ? status_none : status_none_pending) : (((B) & SF_SET_PENDING) ? status_set : status_none_pending); \ + if (!BOX64ENV(dynarec_df)) { \ + dyn->f = status_none; \ + if ((A) == SF_PENDING) { \ + printf_log(LOG_INFO, "Warning, some opcode use SF_PENDING, forcing deferedflags ON\n"); \ + SET_BOX64ENV(dynarec_df, 1); \ + } \ + } \ + dyn->insts[ninst].nat_flags_nofusion = (FUSION); \ + } while (0) #define EMIT(A) dyn->native_size += 4 #define JUMP(A, C) add_jump(dyn, ninst); add_next(dyn, (uintptr_t)A); SMEND(); dyn->insts[ninst].x64.jmp = A; dyn->insts[ninst].x64.jmp_cond = C; dyn->insts[ninst].x64.jmp_insts = 0 diff --git a/src/dynarec/la64/dynarec_la64_private.h b/src/dynarec/la64/dynarec_la64_private.h index 76d6efe8b..b24b40660 100644 --- a/src/dynarec/la64/dynarec_la64_private.h +++ b/src/dynarec/la64/dynarec_la64_private.h @@ -79,9 +79,11 @@ typedef struct lsxcache_s { uint16_t ymm_used; // mask of the ymm regs used in this opcode } lsxcache_t; -typedef struct flagcache_s { - int pending; // is there a pending flags here, or to check? - uint8_t dfnone; // if deferred flags is already set to df_none +typedef enum flagcache_s { + status_unk = 0, // unknown deferred flags status + status_set, // deferred flags set to something (not 0) + status_none_pending, // deferred flags set to 0, but still pending the write to x64emu_t + status_none, // deferred flags set to 0, written to x64emu_t } flagcache_t; typedef struct callret_s callret_t;