Skip to content

Commit 4ed3c02

Browse files
authored
[LA64_DYNAREC] Refined more rotate opcodes flags computation (#3327)
1 parent bebae71 commit 4ed3c02

File tree

3 files changed

+298
-229
lines changed

3 files changed

+298
-229
lines changed

src/dynarec/la64/dynarec_la64_00.c

Lines changed: 42 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2173,21 +2173,31 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
21732173
switch ((nextop >> 3) & 7) {
21742174
case 0:
21752175
INST_NAME("ROL Eb, Ib");
2176-
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); // removed PENDING on purpose
2176+
u8 = geted_ib(dyn, addr, ninst, nextop) & 0x1f;
2177+
if (u8) {
2178+
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); // removed PENDING on purpose
21772179
GETEB(x1, 1);
21782180
u8 = F8 & 0x1f;
21792181
emit_rol8c(dyn, ninst, ed, u8, x4, x5, x6);
21802182
EBBACK();
2183+
} else {
2184+
FAKEED;
2185+
F8;
2186+
}
21812187
break;
21822188
case 1:
21832189
INST_NAME("ROR Eb, Ib");
2184-
MESSAGE(LOG_DUMP, "Need Optimization\n");
2185-
SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
2186-
GETEB(x1, 1);
2187-
u8 = F8 & 0x1f;
2188-
MOV32w(x2, u8);
2189-
CALL_(const_ror8, ed, x3, x1, x2);
2190-
EBBACK();
2190+
u8 = geted_ib(dyn, addr, ninst, nextop) & 0x1f;
2191+
if (u8) {
2192+
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); // removed PENDING on purpose
2193+
GETEB(x1, 1);
2194+
u8 = F8 & 0x1f;
2195+
emit_ror8c(dyn, ninst, x1, u8, x4, x5);
2196+
EBBACK();
2197+
} else {
2198+
FAKEED;
2199+
F8;
2200+
}
21912201
break;
21922202
case 2:
21932203
INST_NAME("RCL Eb, Ib");
@@ -2328,16 +2338,6 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
23282338
break;
23292339
case 2:
23302340
INST_NAME("RCL Ed, Ib");
2331-
// MESSAGE(LOG_DUMP, "Need Optimization\n");
2332-
// READFLAGS(X_CF);
2333-
// SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
2334-
// GETEDW(x4, x1, 0);
2335-
// u8 = (F8) & (rex.w ? 0x3f : 0x1f);
2336-
// MOV32w(x2, u8);
2337-
// CALL_(rex.w ? (const_rcl64) : (const_rcl32), ed, x4, x1, x2);
2338-
// WBACK;
2339-
// if (!wback && !rex.w) ZEROUP(ed);
2340-
23412341
u8 = geted_ib(dyn, addr, ninst, nextop) & (0x1f + (rex.w * 0x20));
23422342
if (u8) {
23432343
READFLAGS(X_CF);
@@ -2718,7 +2718,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
27182718
*ok = 0;
27192719
break;
27202720
case 0xD0:
2721-
case 0xD2: // TODO: Jump if CL is 0
2721+
case 0xD2:
27222722
nextop = F8;
27232723
switch ((nextop >> 3) & 7) {
27242724
case 0:
@@ -2727,37 +2727,48 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
27272727
GETEB(x1, 0);
27282728
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); // removed PENDING on purpose
27292729
emit_rol8c(dyn, ninst, ed, 1, x4, x5, x6);
2730+
EBBACK();
2731+
break;
27302732
} else {
27312733
INST_NAME("ROL Eb, CL");
27322734
GETEB(x1, 0);
2733-
ANDI(x2, xRCX, 0x1f);
27342735
if (BOX64DRENV(dynarec_safeflags) > 1) {
27352736
READFLAGS(X_OF | X_CF);
27362737
}
27372738
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); // removed PENDING on purpose
2738-
emit_rol8(dyn, ninst, ed, x2, x4, x5, x6);
2739+
UFLAG_IF {
2740+
ANDI(x2, xRCX, 0x1f);
2741+
BEQ_NEXT(x2, xZR);
2742+
}
2743+
ANDI(x2, xRCX, 7);
2744+
emit_rol8(dyn, ninst, ed, x2, x4, x5);
2745+
EBBACK();
2746+
break;
27392747
}
2740-
EBBACK();
2741-
break;
27422748
case 1:
27432749
if (opcode == 0xD0) {
27442750
INST_NAME("ROR Eb, 1");
2751+
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); // removed PENDING on purpose
27452752
GETEB(x1, 0);
2746-
MOV32w(x2, 1);
2747-
SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
2753+
emit_ror8c(dyn, ninst, ed, 1, x4, x5);
2754+
EBBACK();
2755+
break;
27482756
} else {
27492757
INST_NAME("ROR Eb, CL");
27502758
GETEB(x1, 0);
2751-
ANDI(x2, xRCX, 0x1f);
27522759
if (BOX64DRENV(dynarec_safeflags) > 1) {
27532760
READFLAGS(X_OF | X_CF);
27542761
}
2755-
SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
2762+
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); // removed PENDING on purpose
2763+
UFLAG_IF {
2764+
ANDI(x2, xRCX, 0x1f);
2765+
BEQ_NEXT(x2, xZR);
2766+
}
2767+
ANDI(x2, xRCX, 7);
2768+
emit_ror8(dyn, ninst, ed, x2, x4, x5);
2769+
EBBACK();
2770+
break;
27562771
}
2757-
MESSAGE(LOG_DUMP, "Need Optimization\n");
2758-
CALL_(const_ror8, ed, x3, x1, x2);
2759-
EBBACK();
2760-
break;
27612772
case 2:
27622773
if (opcode == 0xD0) {
27632774
INST_NAME("RCL Eb, 1");

src/dynarec/la64/dynarec_la64_emit_shift.c

Lines changed: 113 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -126,12 +126,14 @@ void emit_shl16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
126126
}
127127

128128
SLLI_D(s1, s1, c);
129-
BSTRPICK_D(s1, s1, 15, 0);
130129

131130
IFX (X_PEND) {
132131
ST_H(s1, xEmu, offsetof(x64emu_t, res));
133132
}
134-
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
133+
if (dyn->insts[ninst].nat_flags_fusion) {
134+
BSTRPICK_D(s1, s1, 15, 0);
135+
NAT_FLAGS_OPS(s1, xZR, s3, xZR);
136+
}
135137
return;
136138
}
137139

@@ -596,7 +598,6 @@ void emit_shr16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
596598
void emit_shr16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5)
597599
{
598600
if (!c) return;
599-
// c != 0
600601
IFX (X_PEND) {
601602
MOV64x(s3, c);
602603
ST_H(s3, xEmu, offsetof(x64emu_t, op2));
@@ -873,7 +874,6 @@ void emit_sar16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
873874
void emit_sar16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5)
874875
{
875876
if (!c) return;
876-
// c != 0
877877
IFX (X_PEND) {
878878
MOV64x(s3, c);
879879
ST_H(s3, xEmu, offsetof(x64emu_t, op2));
@@ -893,11 +893,13 @@ void emit_sar16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
893893
}
894894
IFXA ((X_AF | X_OF), BOX64DRENV(dynarec_safeflags)) X64_SET_EFLAGS(xZR, (X_AF | X_OF));
895895
SRLI_D(s1, s1, c);
896-
BSTRPICK_D(s1, s1, 15, 0);
897896
IFX (X_PEND) {
898897
ST_H(s1, xEmu, offsetof(x64emu_t, res));
899898
}
900-
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
899+
if (dyn->insts[ninst].nat_flags_fusion) {
900+
BSTRPICK_D(s1, s1, 15, 0);
901+
NAT_FLAGS_OPS(s1, xZR, s3, xZR);
902+
}
901903
return;
902904
}
903905

@@ -1123,49 +1125,32 @@ void emit_ror32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
11231125
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
11241126
}
11251127

1126-
// emit ROL8 instruction, from s1, s2, store result in s1 using s3 s4 and s5 as scratch
1127-
void emit_rol8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
1128+
// emit ROL8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
1129+
void emit_rol8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
11281130
{
1129-
int64_t j64;
1130-
BEQ_NEXT(s2, xZR);
1131-
1132-
ANDI(s5, s2, 0b111);
1133-
if (cpuext.lbt) {
1134-
IFX (X_CF | X_OF) X64_ROTL_B(s1, s5);
1135-
IFXA (X_OF, BOX64DRENV(dynarec_safeflags)) {
1136-
SRLI_W(s3, s1, 6);
1137-
SRLI_D(s4, s3, 1);
1138-
XOR(s3, s3, s4);
1139-
SLLI_D(s3, s3, F_OF);
1140-
X64_SET_EFLAGS(s3, X_OF);
1141-
}
1142-
1143-
ADDI_D(s3, xZR, 8);
1144-
SUB_D(s3, s3, s5);
1145-
ROTR_B(s1, s1, s3);
1146-
1147-
IFX (X_PEND) {
1148-
ST_B(s1, xEmu, offsetof(x64emu_t, res));
1149-
}
1150-
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
1151-
return;
1152-
}
1131+
IFXORNAT (X_ALL) SET_DFNONE();
1132+
RESTORE_EFLAGS(s3);
11531133

11541134
IFX (X_OF) {
11551135
SRLI_W(s3, s1, 6);
11561136
SRLI_D(s4, s3, 1);
11571137
XOR(s3, s3, s4);
11581138
BSTRINS_D(xFlags, s3, F_OF, F_OF);
11591139
}
1160-
1161-
SLL_D(s3, s1, s5);
1162-
NEG_D(s4, s5);
1163-
ADDI_D(s4, s4, 8);
1164-
SRL_D(s1, s1, s4);
1140+
ADDI_D(s4, xZR, 8);
1141+
SUB_D(s4, s4, s2);
1142+
SLLI_D(s3, s1, 8);
11651143
OR(s1, s3, s1);
1144+
SRL_D(s1, s1, s4);
1145+
IFX (X_CF) {
1146+
BSTRINS_D(xFlags, s1, F_CF, F_CF);
1147+
}
11661148

1167-
IFX (X_CF) BSTRINS_D(xFlags, s1, F_CF, F_CF);
1168-
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
1149+
IFXA (X_ALL, cpuext.lbt) SPILL_EFLAGS();
1150+
if (dyn->insts[ninst].nat_flags_fusion) {
1151+
ANDI(s1, s1, 0xFF);
1152+
NAT_FLAGS_OPS(s1, xZR, s3, xZR);
1153+
}
11691154
}
11701155

11711156
// emit ROL8 instruction, from s1, c, store result in s1 using s3 s4 and s5 as scratch
@@ -1208,39 +1193,44 @@ void emit_rol8c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
12081193
IFX (X_CF) {
12091194
BSTRINS_D(xFlags, s1, F_CF, F_CF);
12101195
}
1211-
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
1196+
if (dyn->insts[ninst].nat_flags_fusion) {
1197+
ANDI(s1, s1, 0xFF);
1198+
NAT_FLAGS_OPS(s1, xZR, s3, xZR);
1199+
}
12121200
}
12131201

12141202
// emit ROL16 instruction, from s1, c, store result in s1 using s3 s4 and s5 as scratch
12151203
void emit_rol16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5)
12161204
{
1217-
int64_t j64;
12181205
if (!c) return;
12191206
IFXORNAT (X_ALL) SET_DFNONE();
1207+
RESTORE_EFLAGS(s3);
1208+
1209+
IFXA (X_OF, c == 1) {
1210+
SRLI_D(s3, s1, 14);
1211+
SRLI_D(s4, s3, 1);
1212+
XOR(s3, s4, s3);
1213+
BSTRINS_D(xFlags, s3, F_OF, F_OF);
1214+
}
1215+
12201216
if (c & 15) {
1217+
int rc = 16 - (c & 15);
12211218
if (cpuext.lbt)
1222-
ROTRI_H(s1, s1, 16 - (c & 15));
1219+
ROTRI_H(s1, s1, rc);
12231220
else {
1224-
SRLI_D(s3, s1, 16 - (c & 15));
1225-
SLLI_D(s1, s1, c & 15);
1226-
OR(s1, s1, s3);
1227-
BSTRPICK_D(s1, s1, 15, 0);
1221+
SLLI_D(s3, s1, 16);
1222+
OR(s1, s3, s1);
1223+
SRLI_D(s1, s1, rc);
12281224
}
12291225
}
12301226

1231-
IFX (X_CF | X_OF) {
1232-
ANDI(s4, s1, 1 << F_CF);
1233-
IFXA (X_OF, c == 1) {
1234-
ANDI(s4, s1, 1 << F_CF);
1235-
SRLI_D(s3, s1, 15);
1236-
XOR(s3, s3, s4);
1237-
BSTRINS_D(xFlags, s3, F_OF, F_OF);
1238-
}
1239-
IFX (X_CF) BSTRINS_D(xFlags, s1, F_CF, F_CF);
1240-
}
1227+
IFX (X_CF) BSTRINS_D(xFlags, s1, F_CF, F_CF);
12411228

12421229
IFXA (X_ALL, cpuext.lbt) SPILL_EFLAGS();
1243-
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
1230+
if (dyn->insts[ninst].nat_flags_fusion) {
1231+
if (!cpuext.lbt) BSTRPICK_D(s1, s1, 15, 0);
1232+
NAT_FLAGS_OPS(s1, xZR, s3, xZR);
1233+
}
12441234
}
12451235

12461236
// emit ROL32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
@@ -1620,7 +1610,10 @@ void emit_rcl8c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
16201610
SRLI_D(s1, s1, 9 - c);
16211611

16221612
IFXA (X_ALL, cpuext.lbt) SPILL_EFLAGS();
1623-
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
1613+
if (dyn->insts[ninst].nat_flags_fusion) {
1614+
ANDI(s1, s1, 0xFF);
1615+
NAT_FLAGS_OPS(s1, xZR, s3, xZR);
1616+
}
16241617
}
16251618

16261619
// emit RCL16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
@@ -1720,7 +1713,10 @@ void emit_rcr8c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
17201713
}
17211714

17221715
IFXA (X_ALL, cpuext.lbt) SPILL_EFLAGS();
1723-
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
1716+
if (dyn->insts[ninst].nat_flags_fusion) {
1717+
ANDI(s1, s1, 0xFF);
1718+
NAT_FLAGS_OPS(s1, xZR, s3, xZR);
1719+
}
17241720
}
17251721

17261722
// emit RCR16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
@@ -1786,6 +1782,64 @@ void emit_rcr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
17861782
if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR);
17871783
}
17881784

1785+
// emit ROR8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
1786+
void emit_ror8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
1787+
{
1788+
IFXORNAT (X_ALL) SET_DFNONE();
1789+
RESTORE_EFLAGS(s3);
1790+
1791+
IFX (X_OF) {
1792+
SRLI_D(s3, s1, 7);
1793+
XOR(s3, s1, s3);
1794+
BSTRINS_D(xFlags, s3, F_OF, F_OF);
1795+
}
1796+
1797+
SLLI_D(s3, s1, 8);
1798+
OR(s1, s1, s3);
1799+
SRL_D(s1, s1, s2);
1800+
1801+
IFX (X_CF) {
1802+
SRLI_D(s3, s1, 7);
1803+
BSTRINS_D(xFlags, s3, F_CF, F_CF);
1804+
}
1805+
1806+
IFXA (X_ALL, cpuext.lbt) SPILL_EFLAGS();
1807+
if (dyn->insts[ninst].nat_flags_fusion) {
1808+
ANDI(s1, s1, 0xFF);
1809+
NAT_FLAGS_OPS(s1, xZR, s3, xZR);
1810+
}
1811+
}
1812+
1813+
// emit ROR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
1814+
void emit_ror8c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4)
1815+
{
1816+
if (!c) return;
1817+
IFXORNAT (X_ALL) SET_DFNONE();
1818+
RESTORE_EFLAGS(s3);
1819+
1820+
IFX (X_OF) {
1821+
SRLI_D(s3, s1, 7);
1822+
XOR(s3, s1, s3);
1823+
BSTRINS_D(xFlags, s3, F_OF, F_OF);
1824+
}
1825+
1826+
if (c & 7) {
1827+
SLLI_D(s3, s1, 8);
1828+
OR(s1, s3, s1);
1829+
SRLI_D(s1, s1, c & 7);
1830+
}
1831+
IFX (X_CF) {
1832+
SRLI_D(s3, s1, 7);
1833+
BSTRINS_D(xFlags, s3, F_CF, F_CF);
1834+
}
1835+
1836+
IFXA (X_ALL, cpuext.lbt) SPILL_EFLAGS();
1837+
if (dyn->insts[ninst].nat_flags_fusion) {
1838+
ANDI(s1, s1, 0xFF);
1839+
NAT_FLAGS_OPS(s1, xZR, s3, xZR);
1840+
}
1841+
}
1842+
17891843
// emit ROR16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
17901844
void emit_ror16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4)
17911845
{

0 commit comments

Comments
 (0)