@@ -425,6 +425,28 @@ extern void mov_rdata_nt(spu_rdata_t& _dst, const spu_rdata_t& _src)
425425#endif
426426}
427427
428+ static inline u32 cmp16_rdata (const decltype (spu_thread::rdata)& rdata, const decltype(spu_thread::rdata)& to_write)
429+ {
430+ u32 diffcnt = 0 ;
431+ u32 pos = 0 ;
432+
433+ for (u32 i = 0 ; i < 8 ; i++)
434+ {
435+ if (std::memcmp (rdata + (i * 16 ), to_write + (i * 16 ), 16 ))
436+ {
437+ diffcnt++;
438+ pos = i;
439+ }
440+ };
441+
442+ if (diffcnt == 1 )
443+ {
444+ return pos;
445+ }
446+
447+ return -1 ;
448+ }
449+
428450#if defined(_MSC_VER)
429451#define mwaitx_func
430452#define waitpkg_func
@@ -3969,17 +3991,52 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
39693991 auto & super_data = *vm::get_super_ptr<spu_rdata_t >(addr);
39703992 const bool success = [&]()
39713993 {
3972- // Full lock (heavyweight)
3973- // TODO: vm::check_addr
3974- vm::writer_lock lock (addr, range_lock);
3975-
3976- if (cmp_rdata (rdata, super_data))
3994+ if (int pos = cmp16_rdata (rdata, to_write); addr != spurs_addr && pos != -1 )
39773995 {
3978- mov_rdata (super_data, to_write);
3979- return true ;
3996+ auto & bits = *utils::bless<atomic_t <u64 >>(vm::g_reservations + ((addr & 0xff80 ) / 2 + 32 ));
3997+ const auto bits_val = +bits;
3998+
3999+ // Full lock (heavyweight)
4000+ // TODO: vm::check_addr
4001+ const bool halt_ppus = bits_val == umax || bits_val < 0x100000 ;
4002+
4003+ if (vm::writer_lock lock (addr, range_lock, halt_ppus); cmp_rdata (rdata, super_data))
4004+ {
4005+ auto cast_as = [](void * ptr, usz pos){ return reinterpret_cast <u128 *>(ptr) + pos; };
4006+ auto cast_as_const = [](const void * ptr, usz pos){ return reinterpret_cast <const u128 *>(ptr) + pos; };
4007+
4008+ if (halt_ppus)
4009+ {
4010+ *cast_as (super_data, pos) = *cast_as_const (to_write, pos);
4011+ bits.try_inc (u64 {umax});
4012+ return true ;
4013+ }
4014+ else if (atomic_storage<u128 >::compare_exchange (*cast_as (super_data, pos), *cast_as (rdata, pos), *cast_as_const (to_write, pos)))
4015+ {
4016+ bits.try_inc (u64 {umax});
4017+ *cast_as (rdata, pos) = *cast_as_const (to_write, pos);
4018+ ensure (cmp_rdata (rdata, super_data));
4019+ return true ;
4020+ }
4021+ }
4022+
4023+ bits = u64 {umax};
4024+ return false ;
39804025 }
4026+ else
4027+ {
4028+ // Full lock (heavyweight)
4029+ // TODO: vm::check_addr
4030+ vm::writer_lock lock (addr, range_lock);
39814031
3982- return false ;
4032+ if (cmp_rdata (rdata, super_data))
4033+ {
4034+ mov_rdata (super_data, to_write);
4035+ return true ;
4036+ }
4037+
4038+ return false ;
4039+ }
39834040 }();
39844041
39854042 res += success ? 64 : 0 - 64 ;
0 commit comments