Skip to content

Commit 63f7bcf

Browse files
committed
SPU: Use CMPXCHG16B for atomic store if possible
1 parent 109abea commit 63f7bcf

File tree

1 file changed

+45
-2
lines changed

1 file changed

+45
-2
lines changed

rpcs3/Emu/Cell/SPUThread.cpp

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,33 @@ static FORCE_INLINE void mov_rdata_avx(__m256i* dst, const __m256i* src)
303303
}
304304
#endif
305305

306+
// Check if only a single 16-bytes block has changed
307+
// Returning its position, or -1 if that is not the situation
308+
static inline usz scan16_rdata(const decltype(spu_thread::rdata)& _lhs, const decltype(spu_thread::rdata)& _rhs)
309+
{
310+
const auto lhs = reinterpret_cast<const v128*>(_lhs);
311+
const auto rhs = reinterpret_cast<const v128*>(_rhs);
312+
313+
u32 mask = 0;
314+
315+
for (usz i = 0; i < 8; i += 4)
316+
{
317+
const u32 a = (lhs[i + 0] != rhs[i + 0]) ? 1 : 0;
318+
const u32 b = (lhs[i + 1] != rhs[i + 1]) ? 1 : 0;
319+
const u32 c = (lhs[i + 2] != rhs[i + 2]) ? 1 : 0;
320+
const u32 d = (lhs[i + 3] != rhs[i + 3]) ? 1 : 0;
321+
322+
mask |= ((a << 0) + (b << 1) + (c << 2) + (c << 3)) << i;
323+
}
324+
325+
if (mask && (mask & (mask - 1)) == 0)
326+
{
327+
return std::countr_zero(mask);
328+
}
329+
330+
return umax;
331+
}
332+
306333
#ifdef _MSC_VER
307334
__forceinline
308335
#endif
@@ -3854,6 +3881,11 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
38543881
return false;
38553882
}
38563883

3884+
static const auto cast_as = [](void* ptr, usz pos){ return reinterpret_cast<u128*>(ptr) + pos; };
3885+
static const auto cast_as_const = [](const void* ptr, usz pos){ return reinterpret_cast<const u128*>(ptr) + pos; };
3886+
3887+
const usz diff16_pos = scan16_rdata(to_write, rdata);
3888+
38573889
auto [_oldd, _ok] = res.fetch_op([&](u64& r)
38583890
{
38593891
if ((r & -128) != rtime || (r & 127))
@@ -3975,8 +4007,19 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
39754007

39764008
if (cmp_rdata(rdata, super_data))
39774009
{
3978-
mov_rdata(super_data, to_write);
3979-
return true;
4010+
if (diff16_pos != umax)
4011+
{
4012+
// Do it with CMPXCHG16B if possible, this allows to improve accuracy whenever "RSX Accurate Reservations" is off
4013+
if (atomic_storage<u128>::compare_exchange(*cast_as(super_data, diff16_pos), *cast_as(rdata, diff16_pos), *cast_as_const(to_write, diff16_pos)))
4014+
{
4015+
return true;
4016+
}
4017+
}
4018+
else
4019+
{
4020+
mov_rdata(super_data, to_write);
4021+
return true;
4022+
}
39804023
}
39814024

39824025
return false;

0 commit comments

Comments
 (0)