Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 42 additions & 31 deletions src/dynarec/la64/dynarec_la64_00.c
Original file line number Diff line number Diff line change
Expand Up @@ -2173,21 +2173,31 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
switch ((nextop >> 3) & 7) {
case 0:
INST_NAME("ROL Eb, Ib");
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); // removed PENDING on purpose
u8 = geted_ib(dyn, addr, ninst, nextop) & 0x1f;
if (u8) {
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); // removed PENDING on purpose
GETEB(x1, 1);
u8 = F8 & 0x1f;
emit_rol8c(dyn, ninst, ed, u8, x4, x5, x6);
EBBACK();
} else {
FAKEED;
F8;
}
break;
case 1:
INST_NAME("ROR Eb, Ib");
MESSAGE(LOG_DUMP, "Need Optimization\n");
SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
GETEB(x1, 1);
u8 = F8 & 0x1f;
MOV32w(x2, u8);
CALL_(const_ror8, ed, x3, x1, x2);
EBBACK();
u8 = geted_ib(dyn, addr, ninst, nextop) & 0x1f;
if (u8) {
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); // removed PENDING on purpose
GETEB(x1, 1);
u8 = F8 & 0x1f;
emit_ror8c(dyn, ninst, x1, u8, x4, x5);
EBBACK();
} else {
FAKEED;
F8;
}
break;
case 2:
INST_NAME("RCL Eb, Ib");
Expand Down Expand Up @@ -2328,16 +2338,6 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
break;
case 2:
INST_NAME("RCL Ed, Ib");
// MESSAGE(LOG_DUMP, "Need Optimization\n");
// READFLAGS(X_CF);
// SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
// GETEDW(x4, x1, 0);
// u8 = (F8) & (rex.w ? 0x3f : 0x1f);
// MOV32w(x2, u8);
// CALL_(rex.w ? (const_rcl64) : (const_rcl32), ed, x4, x1, x2);
// WBACK;
// if (!wback && !rex.w) ZEROUP(ed);

u8 = geted_ib(dyn, addr, ninst, nextop) & (0x1f + (rex.w * 0x20));
if (u8) {
READFLAGS(X_CF);
Expand Down Expand Up @@ -2718,7 +2718,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
*ok = 0;
break;
case 0xD0:
case 0xD2: // TODO: Jump if CL is 0
case 0xD2:
nextop = F8;
switch ((nextop >> 3) & 7) {
case 0:
Expand All @@ -2727,37 +2727,48 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
GETEB(x1, 0);
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); // removed PENDING on purpose
emit_rol8c(dyn, ninst, ed, 1, x4, x5, x6);
EBBACK();
break;
} else {
INST_NAME("ROL Eb, CL");
GETEB(x1, 0);
ANDI(x2, xRCX, 0x1f);
if (BOX64DRENV(dynarec_safeflags) > 1) {
READFLAGS(X_OF | X_CF);
}
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); // removed PENDING on purpose
emit_rol8(dyn, ninst, ed, x2, x4, x5, x6);
UFLAG_IF {
ANDI(x2, xRCX, 0x1f);
BEQ_NEXT(x2, xZR);
}
ANDI(x2, xRCX, 7);
emit_rol8(dyn, ninst, ed, x2, x4, x5);
EBBACK();
break;
}
EBBACK();
break;
case 1:
if (opcode == 0xD0) {
INST_NAME("ROR Eb, 1");
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); // removed PENDING on purpose
GETEB(x1, 0);
MOV32w(x2, 1);
SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
emit_ror8c(dyn, ninst, ed, 1, x4, x5);
EBBACK();
break;
} else {
INST_NAME("ROR Eb, CL");
GETEB(x1, 0);
ANDI(x2, xRCX, 0x1f);
if (BOX64DRENV(dynarec_safeflags) > 1) {
READFLAGS(X_OF | X_CF);
}
SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); // removed PENDING on purpose
UFLAG_IF {
ANDI(x2, xRCX, 0x1f);
BEQ_NEXT(x2, xZR);
}
ANDI(x2, xRCX, 7);
emit_ror8(dyn, ninst, ed, x2, x4, x5);
EBBACK();
break;
}
MESSAGE(LOG_DUMP, "Need Optimization\n");
CALL_(const_ror8, ed, x3, x1, x2);
EBBACK();
break;
case 2:
if (opcode == 0xD0) {
INST_NAME("RCL Eb, 1");
Expand Down
172 changes: 113 additions & 59 deletions src/dynarec/la64/dynarec_la64_emit_shift.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,14 @@ void emit_shl16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
}

SLLI_D(s1, s1, c);
BSTRPICK_D(s1, s1, 15, 0);

IFX (X_PEND) {
ST_H(s1, xEmu, offsetof(x64emu_t, res));
}
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
if (dyn->insts[ninst].nat_flags_fusion) {
BSTRPICK_D(s1, s1, 15, 0);
NAT_FLAGS_OPS(s1, xZR, s3, xZR);
}
return;
}

Expand Down Expand Up @@ -596,7 +598,6 @@ void emit_shr16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
void emit_shr16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5)
{
if (!c) return;
// c != 0
IFX (X_PEND) {
MOV64x(s3, c);
ST_H(s3, xEmu, offsetof(x64emu_t, op2));
Expand Down Expand Up @@ -873,7 +874,6 @@ void emit_sar16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
void emit_sar16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5)
{
if (!c) return;
// c != 0
IFX (X_PEND) {
MOV64x(s3, c);
ST_H(s3, xEmu, offsetof(x64emu_t, op2));
Expand All @@ -893,11 +893,13 @@ void emit_sar16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
}
IFXA ((X_AF | X_OF), BOX64DRENV(dynarec_safeflags)) X64_SET_EFLAGS(xZR, (X_AF | X_OF));
SRLI_D(s1, s1, c);
BSTRPICK_D(s1, s1, 15, 0);
IFX (X_PEND) {
ST_H(s1, xEmu, offsetof(x64emu_t, res));
}
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
if (dyn->insts[ninst].nat_flags_fusion) {
BSTRPICK_D(s1, s1, 15, 0);
NAT_FLAGS_OPS(s1, xZR, s3, xZR);
}
return;
}

Expand Down Expand Up @@ -1123,49 +1125,32 @@ void emit_ror32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
}

// emit ROL8 instruction, from s1, s2, store result in s1 using s3 s4 and s5 as scratch
void emit_rol8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
// emit ROL8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
void emit_rol8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
{
int64_t j64;
BEQ_NEXT(s2, xZR);

ANDI(s5, s2, 0b111);
if (cpuext.lbt) {
IFX (X_CF | X_OF) X64_ROTL_B(s1, s5);
IFXA (X_OF, BOX64DRENV(dynarec_safeflags)) {
SRLI_W(s3, s1, 6);
SRLI_D(s4, s3, 1);
XOR(s3, s3, s4);
SLLI_D(s3, s3, F_OF);
X64_SET_EFLAGS(s3, X_OF);
}

ADDI_D(s3, xZR, 8);
SUB_D(s3, s3, s5);
ROTR_B(s1, s1, s3);

IFX (X_PEND) {
ST_B(s1, xEmu, offsetof(x64emu_t, res));
}
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
return;
}
IFXORNAT (X_ALL) SET_DFNONE();
RESTORE_EFLAGS(s3);

IFX (X_OF) {
SRLI_W(s3, s1, 6);
SRLI_D(s4, s3, 1);
XOR(s3, s3, s4);
BSTRINS_D(xFlags, s3, F_OF, F_OF);
}

SLL_D(s3, s1, s5);
NEG_D(s4, s5);
ADDI_D(s4, s4, 8);
SRL_D(s1, s1, s4);
ADDI_D(s4, xZR, 8);
SUB_D(s4, s4, s2);
SLLI_D(s3, s1, 8);
OR(s1, s3, s1);
SRL_D(s1, s1, s4);
IFX (X_CF) {
BSTRINS_D(xFlags, s1, F_CF, F_CF);
}

IFX (X_CF) BSTRINS_D(xFlags, s1, F_CF, F_CF);
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
IFXA (X_ALL, cpuext.lbt) SPILL_EFLAGS();
if (dyn->insts[ninst].nat_flags_fusion) {
ANDI(s1, s1, 0xFF);
NAT_FLAGS_OPS(s1, xZR, s3, xZR);
}
}

// emit ROL8 instruction, from s1, c, store result in s1 using s3 s4 and s5 as scratch
Expand Down Expand Up @@ -1208,39 +1193,44 @@ void emit_rol8c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
IFX (X_CF) {
BSTRINS_D(xFlags, s1, F_CF, F_CF);
}
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
if (dyn->insts[ninst].nat_flags_fusion) {
ANDI(s1, s1, 0xFF);
NAT_FLAGS_OPS(s1, xZR, s3, xZR);
}
}

// emit ROL16 instruction, from s1, c, store result in s1 using s3 s4 and s5 as scratch
void emit_rol16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5)
{
int64_t j64;
if (!c) return;
IFXORNAT (X_ALL) SET_DFNONE();
RESTORE_EFLAGS(s3);

IFXA (X_OF, c == 1) {
SRLI_D(s3, s1, 14);
SRLI_D(s4, s3, 1);
XOR(s3, s4, s3);
BSTRINS_D(xFlags, s3, F_OF, F_OF);
}

if (c & 15) {
int rc = 16 - (c & 15);
if (cpuext.lbt)
ROTRI_H(s1, s1, 16 - (c & 15));
ROTRI_H(s1, s1, rc);
else {
SRLI_D(s3, s1, 16 - (c & 15));
SLLI_D(s1, s1, c & 15);
OR(s1, s1, s3);
BSTRPICK_D(s1, s1, 15, 0);
SLLI_D(s3, s1, 16);
OR(s1, s3, s1);
SRLI_D(s1, s1, rc);
}
}

IFX (X_CF | X_OF) {
ANDI(s4, s1, 1 << F_CF);
IFXA (X_OF, c == 1) {
ANDI(s4, s1, 1 << F_CF);
SRLI_D(s3, s1, 15);
XOR(s3, s3, s4);
BSTRINS_D(xFlags, s3, F_OF, F_OF);
}
IFX (X_CF) BSTRINS_D(xFlags, s1, F_CF, F_CF);
}
IFX (X_CF) BSTRINS_D(xFlags, s1, F_CF, F_CF);

IFXA (X_ALL, cpuext.lbt) SPILL_EFLAGS();
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
if (dyn->insts[ninst].nat_flags_fusion) {
if (!cpuext.lbt) BSTRPICK_D(s1, s1, 15, 0);
NAT_FLAGS_OPS(s1, xZR, s3, xZR);
}
}

// emit ROL32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
Expand Down Expand Up @@ -1620,7 +1610,10 @@ void emit_rcl8c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
SRLI_D(s1, s1, 9 - c);

IFXA (X_ALL, cpuext.lbt) SPILL_EFLAGS();
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
if (dyn->insts[ninst].nat_flags_fusion) {
ANDI(s1, s1, 0xFF);
NAT_FLAGS_OPS(s1, xZR, s3, xZR);
}
}

// emit RCL16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
Expand Down Expand Up @@ -1720,7 +1713,10 @@ void emit_rcr8c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
}

IFXA (X_ALL, cpuext.lbt) SPILL_EFLAGS();
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
if (dyn->insts[ninst].nat_flags_fusion) {
ANDI(s1, s1, 0xFF);
NAT_FLAGS_OPS(s1, xZR, s3, xZR);
}
}

// emit RCR16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
Expand Down Expand Up @@ -1786,6 +1782,64 @@ void emit_rcr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
}

// emit ROR8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
void emit_ror8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
{
IFXORNAT (X_ALL) SET_DFNONE();
RESTORE_EFLAGS(s3);

IFX (X_OF) {
SRLI_D(s3, s1, 7);
XOR(s3, s1, s3);
BSTRINS_D(xFlags, s3, F_OF, F_OF);
}

SLLI_D(s3, s1, 8);
OR(s1, s1, s3);
SRL_D(s1, s1, s2);

IFX (X_CF) {
SRLI_D(s3, s1, 7);
BSTRINS_D(xFlags, s3, F_CF, F_CF);
}

IFXA (X_ALL, cpuext.lbt) SPILL_EFLAGS();
if (dyn->insts[ninst].nat_flags_fusion) {
ANDI(s1, s1, 0xFF);
NAT_FLAGS_OPS(s1, xZR, s3, xZR);
}
}

// emit ROR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
void emit_ror8c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4)
{
if (!c) return;
IFXORNAT (X_ALL) SET_DFNONE();
RESTORE_EFLAGS(s3);

IFX (X_OF) {
SRLI_D(s3, s1, 7);
XOR(s3, s1, s3);
BSTRINS_D(xFlags, s3, F_OF, F_OF);
}

if (c & 7) {
SLLI_D(s3, s1, 8);
OR(s1, s3, s1);
SRLI_D(s1, s1, c & 7);
}
IFX (X_CF) {
SRLI_D(s3, s1, 7);
BSTRINS_D(xFlags, s3, F_CF, F_CF);
}

IFXA (X_ALL, cpuext.lbt) SPILL_EFLAGS();
if (dyn->insts[ninst].nat_flags_fusion) {
ANDI(s1, s1, 0xFF);
NAT_FLAGS_OPS(s1, xZR, s3, xZR);
}
}

// emit ROR16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
void emit_ror16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4)
{
Expand Down
Loading
Loading