Skip to content

Commit d822d85

Browse files
committed
SPU: Tame PUTLLC16
1 parent 7001940 commit d822d85

File tree

1 file changed

+39
-6
lines changed

1 file changed

+39
-6
lines changed

rpcs3/Emu/Cell/SPUCommonRecompiler.cpp

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4973,6 +4973,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
49734973
u32 lsa_last_pc = SPU_LS_SIZE; // PC of first LSA write
49744974
u32 get_pc = SPU_LS_SIZE; // PC of GETLLAR
49754975
u32 put_pc = SPU_LS_SIZE; // PC of PUTLLC
4976+
u32 rdatomic_pc = SPU_LS_SIZE; // PC of last RdAtomcStat read
49764977
reg_state_t ls{}; // state of LS load/store address register
49774978
reg_state_t ls_offs = reg_state_t::from_value(0); // Added value to ls
49784979
reg_state_t lsa{}; // state of LSA register on GETLLAR
@@ -6333,6 +6334,8 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
63336334
break;
63346335
}
63356336

6337+
atomic16->rdatomic_pc = pos;
6338+
63366339
const auto it = atomic16_all.find(pos);
63376340

63386341
if (it == atomic16_all.end())
@@ -7273,7 +7276,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
72737276

72747277
for (const auto& [pc_commited, pattern] : atomic16_all)
72757278
{
7276-
if (!pattern.active)
7279+
if (!pattern.active || pattern.lsa_pc >= pattern.rdatomic_pc)
72777280
{
72787281
continue;
72797282
}
@@ -7283,6 +7286,17 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
72837286
continue;
72847287
}
72857288

7289+
std::string pattern_hash;
7290+
{
7291+
sha1_context ctx;
7292+
u8 output[20]{};
7293+
7294+
sha1_starts(&ctx);
7295+
sha1_update(&ctx, reinterpret_cast<const u8*>(result.data.data()) + (pattern.lsa_pc - result.lower_bound), pattern.rdatomic_pc - pattern.lsa_pc);
7296+
sha1_finish(&ctx, output);
7297+
fmt::append(pattern_hash, "%s", fmt::base57(output));
7298+
}
7299+
72867300
union putllc16_or_0_info
72877301
{
72887302
u64 data;
@@ -7373,16 +7387,35 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
73737387
value.reg2 = pattern.reg2;
73747388
}
73757389

7390+
bool allow_pattern = true;
7391+
73767392
if (g_cfg.core.spu_accurate_reservations)
73777393
{
7378-
// Because enabling it is a hack, as it turns out
7379-
// continue;
7394+
// The problem with PUTLLC16 optimization, that it is in theory correct at the bounds of the spu function.
7395+
// But if the SPU code reuses the cache line data observed, it is not truly atomic.
7396+
// So we may enable it only for known cases where SPU atomic data is not used after the function leaves.
7397+
7398+
// So the two options are:
7399+
7400+
// 1. Atomic compare exchange 16 bytes operation. (rest of data is not read) -> good for RPCS3 to optimize.
7401+
// 2. Fetch 128 bytes (read them later), modify only 16 bytes. -> Bad for RPCS3 to optimize.
7402+
7403+
// This difference cannot be known at analyzer time but from observing callers.
7404+
static constexpr std::initializer_list<std::string_view> allowed_patterns =
7405+
{
7406+
"620oYSe8uQqq9eTkhWfMqoEXX0us"sv, // CellSpurs JobChain acquire pattern
7407+
};
7408+
7409+
allow_pattern = std::any_of(allowed_patterns.begin(), allowed_patterns.end(), FN(pattern_hash == x));
73807410
}
73817411

7382-
add_pattern(false, inst_attr::putllc16, pattern.put_pc - result.entry_point, value.data);
7412+
if (allow_pattern)
7413+
{
7414+
add_pattern(false, inst_attr::putllc16, pattern.put_pc - result.entry_point, value.data);
7415+
}
73837416

7384-
spu_log.success("PUTLLC16 Pattern Detected! (mem_count=%d, put_pc=0x%x, pc_rel=%d, offset=0x%x, const=%u, two_regs=%d, reg=%u, runtime=%d, 0x%x-%s) (putllc0=%d, putllc16+0=%d, all=%d)"
7385-
, pattern.mem_count, pattern.put_pc, value.type == v_relative, value.off18, value.type == v_const, value.type == v_reg2, value.reg, value.runtime16_select, entry_point, func_hash, +stats.nowrite, ++stats.single, +stats.all);
7417+
spu_log.success("PUTLLC16 Pattern Detected! (mem_count=%d, put_pc=0x%x, pc_rel=%d, offset=0x%x, const=%u, two_regs=%d, reg=%u, runtime=%d, 0x%x-%s, pattern-hash=%s) (putllc0=%d, putllc16+0=%d, all=%d)"
7418+
, pattern.mem_count, pattern.put_pc, value.type == v_relative, value.off18, value.type == v_const, value.type == v_reg2, value.reg, value.runtime16_select, entry_point, func_hash, pattern_hash, +stats.nowrite, ++stats.single, +stats.all);
73867419
}
73877420

73887421
for (const auto& [read_pc, pattern] : rchcnt_loop_all)

0 commit comments

Comments
 (0)