@@ -126,12 +126,14 @@ void emit_shl16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
126126 }
127127
128128 SLLI_D (s1 , s1 , c );
129- BSTRPICK_D (s1 , s1 , 15 , 0 );
130129
131130 IFX (X_PEND ) {
132131 ST_H (s1 , xEmu , offsetof(x64emu_t , res ));
133132 }
134- if (dyn -> insts [ninst ].nat_flags_fusion ) NAT_FLAGS_OPS (s1 , xZR , s3 , xZR );
133+ if (dyn -> insts [ninst ].nat_flags_fusion ) {
134+ BSTRPICK_D (s1 , s1 , 15 , 0 );
135+ NAT_FLAGS_OPS (s1 , xZR , s3 , xZR );
136+ }
135137 return ;
136138 }
137139
@@ -596,7 +598,6 @@ void emit_shr16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
596598void emit_shr16c (dynarec_la64_t * dyn , int ninst , int s1 , uint32_t c , int s3 , int s4 , int s5 )
597599{
598600 if (!c ) return ;
599- // c != 0
600601 IFX (X_PEND ) {
601602 MOV64x (s3 , c );
602603 ST_H (s3 , xEmu , offsetof(x64emu_t , op2 ));
@@ -873,7 +874,6 @@ void emit_sar16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
873874void emit_sar16c (dynarec_la64_t * dyn , int ninst , int s1 , uint32_t c , int s3 , int s4 , int s5 )
874875{
875876 if (!c ) return ;
876- // c != 0
877877 IFX (X_PEND ) {
878878 MOV64x (s3 , c );
879879 ST_H (s3 , xEmu , offsetof(x64emu_t , op2 ));
@@ -893,11 +893,13 @@ void emit_sar16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
893893 }
894894 IFXA ((X_AF | X_OF ), BOX64DRENV (dynarec_safeflags )) X64_SET_EFLAGS (xZR , (X_AF | X_OF ));
895895 SRLI_D (s1 , s1 , c );
896- BSTRPICK_D (s1 , s1 , 15 , 0 );
897896 IFX (X_PEND ) {
898897 ST_H (s1 , xEmu , offsetof(x64emu_t , res ));
899898 }
900- if (dyn -> insts [ninst ].nat_flags_fusion ) NAT_FLAGS_OPS (s1 , xZR , s3 , xZR );
899+ if (dyn -> insts [ninst ].nat_flags_fusion ) {
900+ BSTRPICK_D (s1 , s1 , 15 , 0 );
901+ NAT_FLAGS_OPS (s1 , xZR , s3 , xZR );
902+ }
901903 return ;
902904 }
903905
@@ -1123,49 +1125,32 @@ void emit_ror32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
11231125 if (dyn -> insts [ninst ].nat_flags_fusion ) NAT_FLAGS_OPS (s1 , xZR , s3 , xZR );
11241126}
11251127
1126- // emit ROL8 instruction, from s1, s2, store result in s1 using s3 s4 and s5 as scratch
1127- void emit_rol8 (dynarec_la64_t * dyn , int ninst , int s1 , int s2 , int s3 , int s4 , int s5 )
1128+ // emit ROL8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
1129+ void emit_rol8 (dynarec_la64_t * dyn , int ninst , int s1 , int s2 , int s3 , int s4 )
11281130{
1129- int64_t j64 ;
1130- BEQ_NEXT (s2 , xZR );
1131-
1132- ANDI (s5 , s2 , 0b111 );
1133- if (cpuext .lbt ) {
1134- IFX (X_CF | X_OF ) X64_ROTL_B (s1 , s5 );
1135- IFXA (X_OF , BOX64DRENV (dynarec_safeflags )) {
1136- SRLI_W (s3 , s1 , 6 );
1137- SRLI_D (s4 , s3 , 1 );
1138- XOR (s3 , s3 , s4 );
1139- SLLI_D (s3 , s3 , F_OF );
1140- X64_SET_EFLAGS (s3 , X_OF );
1141- }
1142-
1143- ADDI_D (s3 , xZR , 8 );
1144- SUB_D (s3 , s3 , s5 );
1145- ROTR_B (s1 , s1 , s3 );
1146-
1147- IFX (X_PEND ) {
1148- ST_B (s1 , xEmu , offsetof(x64emu_t , res ));
1149- }
1150- if (dyn -> insts [ninst ].nat_flags_fusion ) NAT_FLAGS_OPS (s1 , xZR , s3 , xZR );
1151- return ;
1152- }
1131+ IFXORNAT (X_ALL ) SET_DFNONE ();
1132+ RESTORE_EFLAGS (s3 );
11531133
11541134 IFX (X_OF ) {
11551135 SRLI_W (s3 , s1 , 6 );
11561136 SRLI_D (s4 , s3 , 1 );
11571137 XOR (s3 , s3 , s4 );
11581138 BSTRINS_D (xFlags , s3 , F_OF , F_OF );
11591139 }
1160-
1161- SLL_D (s3 , s1 , s5 );
1162- NEG_D (s4 , s5 );
1163- ADDI_D (s4 , s4 , 8 );
1164- SRL_D (s1 , s1 , s4 );
1140+ ADDI_D (s4 , xZR , 8 );
1141+ SUB_D (s4 , s4 , s2 );
1142+ SLLI_D (s3 , s1 , 8 );
11651143 OR (s1 , s3 , s1 );
1144+ SRL_D (s1 , s1 , s4 );
1145+ IFX (X_CF ) {
1146+ BSTRINS_D (xFlags , s1 , F_CF , F_CF );
1147+ }
11661148
1167- IFX (X_CF ) BSTRINS_D (xFlags , s1 , F_CF , F_CF );
1168- if (dyn -> insts [ninst ].nat_flags_fusion ) NAT_FLAGS_OPS (s1 , xZR , s3 , xZR );
1149+ IFXA (X_ALL , cpuext .lbt ) SPILL_EFLAGS ();
1150+ if (dyn -> insts [ninst ].nat_flags_fusion ) {
1151+ ANDI (s1 , s1 , 0xFF );
1152+ NAT_FLAGS_OPS (s1 , xZR , s3 , xZR );
1153+ }
11691154}
11701155
11711156// emit ROL8 instruction, from s1, c, store result in s1 using s3 s4 and s5 as scratch
@@ -1208,39 +1193,44 @@ void emit_rol8c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
12081193 IFX (X_CF ) {
12091194 BSTRINS_D (xFlags , s1 , F_CF , F_CF );
12101195 }
1211- if (dyn -> insts [ninst ].nat_flags_fusion ) NAT_FLAGS_OPS (s1 , xZR , s3 , xZR );
1196+ if (dyn -> insts [ninst ].nat_flags_fusion ) {
1197+ ANDI (s1 , s1 , 0xFF );
1198+ NAT_FLAGS_OPS (s1 , xZR , s3 , xZR );
1199+ }
12121200}
12131201
12141202// emit ROL16 instruction, from s1, c, store result in s1 using s3 s4 and s5 as scratch
12151203void emit_rol16c (dynarec_la64_t * dyn , int ninst , int s1 , uint32_t c , int s3 , int s4 , int s5 )
12161204{
1217- int64_t j64 ;
12181205 if (!c ) return ;
12191206 IFXORNAT (X_ALL ) SET_DFNONE ();
1207+ RESTORE_EFLAGS (s3 );
1208+
1209+ IFXA (X_OF , c == 1 ) {
1210+ SRLI_D (s3 , s1 , 14 );
1211+ SRLI_D (s4 , s3 , 1 );
1212+ XOR (s3 , s4 , s3 );
1213+ BSTRINS_D (xFlags , s3 , F_OF , F_OF );
1214+ }
1215+
12201216 if (c & 15 ) {
1217+ int rc = 16 - (c & 15 );
12211218 if (cpuext .lbt )
1222- ROTRI_H (s1 , s1 , 16 - ( c & 15 ) );
1219+ ROTRI_H (s1 , s1 , rc );
12231220 else {
1224- SRLI_D (s3 , s1 , 16 - (c & 15 ));
1225- SLLI_D (s1 , s1 , c & 15 );
1226- OR (s1 , s1 , s3 );
1227- BSTRPICK_D (s1 , s1 , 15 , 0 );
1221+ SLLI_D (s3 , s1 , 16 );
1222+ OR (s1 , s3 , s1 );
1223+ SRLI_D (s1 , s1 , rc );
12281224 }
12291225 }
12301226
1231- IFX (X_CF | X_OF ) {
1232- ANDI (s4 , s1 , 1 << F_CF );
1233- IFXA (X_OF , c == 1 ) {
1234- ANDI (s4 , s1 , 1 << F_CF );
1235- SRLI_D (s3 , s1 , 15 );
1236- XOR (s3 , s3 , s4 );
1237- BSTRINS_D (xFlags , s3 , F_OF , F_OF );
1238- }
1239- IFX (X_CF ) BSTRINS_D (xFlags , s1 , F_CF , F_CF );
1240- }
1227+ IFX (X_CF ) BSTRINS_D (xFlags , s1 , F_CF , F_CF );
12411228
12421229 IFXA (X_ALL , cpuext .lbt ) SPILL_EFLAGS ();
1243- if (dyn -> insts [ninst ].nat_flags_fusion ) NAT_FLAGS_OPS (s1 , xZR , s3 , xZR );
1230+ if (dyn -> insts [ninst ].nat_flags_fusion ) {
1231+ if (!cpuext .lbt ) BSTRPICK_D (s1 , s1 , 15 , 0 );
1232+ NAT_FLAGS_OPS (s1 , xZR , s3 , xZR );
1233+ }
12441234}
12451235
12461236// emit ROL32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
@@ -1620,7 +1610,10 @@ void emit_rcl8c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
16201610 SRLI_D (s1 , s1 , 9 - c );
16211611
16221612 IFXA (X_ALL , cpuext .lbt ) SPILL_EFLAGS ();
1623- if (dyn -> insts [ninst ].nat_flags_fusion ) NAT_FLAGS_OPS (s1 , xZR , s3 , xZR );
1613+ if (dyn -> insts [ninst ].nat_flags_fusion ) {
1614+ ANDI (s1 , s1 , 0xFF );
1615+ NAT_FLAGS_OPS (s1 , xZR , s3 , xZR );
1616+ }
16241617}
16251618
16261619// emit RCL16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
@@ -1720,7 +1713,10 @@ void emit_rcr8c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
17201713 }
17211714
17221715 IFXA (X_ALL , cpuext .lbt ) SPILL_EFLAGS ();
1723- if (dyn -> insts [ninst ].nat_flags_fusion ) NAT_FLAGS_OPS (s1 , xZR , s3 , xZR );
1716+ if (dyn -> insts [ninst ].nat_flags_fusion ) {
1717+ ANDI (s1 , s1 , 0xFF );
1718+ NAT_FLAGS_OPS (s1 , xZR , s3 , xZR );
1719+ }
17241720}
17251721
17261722// emit RCR16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
@@ -1786,6 +1782,64 @@ void emit_rcr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
17861782 if (dyn -> insts [ninst ].nat_flags_fusion ) NAT_FLAGS_OPS (s1 , xZR , s3 , xZR );
17871783}
17881784
1785+ // emit ROR8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
1786+ void emit_ror8 (dynarec_la64_t * dyn , int ninst , int s1 , int s2 , int s3 , int s4 )
1787+ {
1788+ IFXORNAT (X_ALL ) SET_DFNONE ();
1789+ RESTORE_EFLAGS (s3 );
1790+
1791+ IFX (X_OF ) {
1792+ SRLI_D (s3 , s1 , 7 );
1793+ XOR (s3 , s1 , s3 );
1794+ BSTRINS_D (xFlags , s3 , F_OF , F_OF );
1795+ }
1796+
1797+ SLLI_D (s3 , s1 , 8 );
1798+ OR (s1 , s1 , s3 );
1799+ SRL_D (s1 , s1 , s2 );
1800+
1801+ IFX (X_CF ) {
1802+ SRLI_D (s3 , s1 , 7 );
1803+ BSTRINS_D (xFlags , s3 , F_CF , F_CF );
1804+ }
1805+
1806+ IFXA (X_ALL , cpuext .lbt ) SPILL_EFLAGS ();
1807+ if (dyn -> insts [ninst ].nat_flags_fusion ) {
1808+ ANDI (s1 , s1 , 0xFF );
1809+ NAT_FLAGS_OPS (s1 , xZR , s3 , xZR );
1810+ }
1811+ }
1812+
1813+ // emit ROR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
1814+ void emit_ror8c (dynarec_la64_t * dyn , int ninst , int s1 , uint32_t c , int s3 , int s4 )
1815+ {
1816+ if (!c ) return ;
1817+ IFXORNAT (X_ALL ) SET_DFNONE ();
1818+ RESTORE_EFLAGS (s3 );
1819+
1820+ IFX (X_OF ) {
1821+ SRLI_D (s3 , s1 , 7 );
1822+ XOR (s3 , s1 , s3 );
1823+ BSTRINS_D (xFlags , s3 , F_OF , F_OF );
1824+ }
1825+
1826+ if (c & 7 ) {
1827+ SLLI_D (s3 , s1 , 8 );
1828+ OR (s1 , s3 , s1 );
1829+ SRLI_D (s1 , s1 , c & 7 );
1830+ }
1831+ IFX (X_CF ) {
1832+ SRLI_D (s3 , s1 , 7 );
1833+ BSTRINS_D (xFlags , s3 , F_CF , F_CF );
1834+ }
1835+
1836+ IFXA (X_ALL , cpuext .lbt ) SPILL_EFLAGS ();
1837+ if (dyn -> insts [ninst ].nat_flags_fusion ) {
1838+ ANDI (s1 , s1 , 0xFF );
1839+ NAT_FLAGS_OPS (s1 , xZR , s3 , xZR );
1840+ }
1841+ }
1842+
17891843// emit ROR16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
17901844void emit_ror16c (dynarec_la64_t * dyn , int ninst , int s1 , uint32_t c , int s3 , int s4 )
17911845{
0 commit comments