@@ -4973,6 +4973,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
49734973 u32 lsa_last_pc = SPU_LS_SIZE; // PC of first LSA write
49744974 u32 get_pc = SPU_LS_SIZE; // PC of GETLLAR
49754975 u32 put_pc = SPU_LS_SIZE; // PC of PUTLLC
4976+ u32 rdatomic_pc = SPU_LS_SIZE; // PC of last RdAtomcStat read
49764977 reg_state_t ls{}; // state of LS load/store address register
49774978 reg_state_t ls_offs = reg_state_t ::from_value(0 ); // Added value to ls
49784979 reg_state_t lsa{}; // state of LSA register on GETLLAR
@@ -6333,6 +6334,8 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
63336334 break ;
63346335 }
63356336
6337+ atomic16->rdatomic_pc = pos;
6338+
63366339 const auto it = atomic16_all.find (pos);
63376340
63386341 if (it == atomic16_all.end ())
@@ -7273,7 +7276,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
72737276
72747277 for (const auto & [pc_commited, pattern] : atomic16_all)
72757278 {
7276- if (!pattern.active )
7279+ if (!pattern.active || pattern. lsa_pc >= pattern. rdatomic_pc )
72777280 {
72787281 continue ;
72797282 }
@@ -7283,6 +7286,17 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
72837286 continue ;
72847287 }
72857288
7289+ std::string pattern_hash;
7290+ {
7291+ sha1_context ctx;
7292+ u8 output[20 ]{};
7293+
7294+ sha1_starts (&ctx);
7295+ sha1_update (&ctx, reinterpret_cast <const u8 *>(result.data .data ()) + (pattern.lsa_pc - result.lower_bound ), pattern.rdatomic_pc - pattern.lsa_pc );
7296+ sha1_finish (&ctx, output);
7297+ fmt::append (pattern_hash, " %s" , fmt::base57 (output));
7298+ }
7299+
72867300 union putllc16_or_0_info
72877301 {
72887302 u64 data;
@@ -7373,16 +7387,35 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
73737387 value.reg2 = pattern.reg2 ;
73747388 }
73757389
7390+ bool allow_pattern = true ;
7391+
73767392 if (g_cfg.core .spu_accurate_reservations )
73777393 {
7378- // Because enabling it is a hack, as it turns out
7379- // continue;
7394+ // The problem with PUTLLC16 optimization, that it is in theory correct at the bounds of the spu function.
7395+ // But if the SPU code reuses the cache line data observed, it is not truly atomic.
7396+ // So we may enable it only for known cases where SPU atomic data is not used after the function leaves.
7397+
7398+ // So the two options are:
7399+
7400+ // 1. Atomic compare exchange 16 bytes operation. (rest of data is not read) -> good for RPCS3 to optimize.
7401+ // 2. Fetch 128 bytes (read them later), modify only 16 bytes. -> Bad for RPCS3 to optimize.
7402+
7403+ // This difference cannot be known at analyzer time but from observing callers.
7404+ static constexpr std::initializer_list<std::string_view> allowed_patterns =
7405+ {
7406+ " 620oYSe8uQqq9eTkhWfMqoEXX0us" sv, // CellSpurs JobChain acquire pattern
7407+ };
7408+
7409+ allow_pattern = std::any_of (allowed_patterns.begin (), allowed_patterns.end (), FN (pattern_hash == x));
73807410 }
73817411
7382- add_pattern (false , inst_attr::putllc16, pattern.put_pc - result.entry_point , value.data );
7412+ if (allow_pattern)
7413+ {
7414+ add_pattern (false , inst_attr::putllc16, pattern.put_pc - result.entry_point , value.data );
7415+ }
73837416
7384- spu_log.success (" PUTLLC16 Pattern Detected! (mem_count=%d, put_pc=0x%x, pc_rel=%d, offset=0x%x, const=%u, two_regs=%d, reg=%u, runtime=%d, 0x%x-%s) (putllc0=%d, putllc16+0=%d, all=%d)"
7385- , pattern.mem_count , pattern.put_pc , value.type == v_relative, value.off18 , value.type == v_const, value.type == v_reg2, value.reg , value.runtime16_select , entry_point, func_hash, +stats.nowrite , ++stats.single , +stats.all );
7417+ spu_log.success (" PUTLLC16 Pattern Detected! (mem_count=%d, put_pc=0x%x, pc_rel=%d, offset=0x%x, const=%u, two_regs=%d, reg=%u, runtime=%d, 0x%x-%s, pattern-hash=%s ) (putllc0=%d, putllc16+0=%d, all=%d)"
7418+ , pattern.mem_count , pattern.put_pc , value.type == v_relative, value.off18 , value.type == v_const, value.type == v_reg2, value.reg , value.runtime16_select , entry_point, func_hash, pattern_hash, +stats.nowrite , ++stats.single , +stats.all );
73867419 }
73877420
73887421 for (const auto & [read_pc, pattern] : rchcnt_loop_all)
0 commit comments