@@ -425,6 +425,28 @@ extern void mov_rdata_nt(spu_rdata_t& _dst, const spu_rdata_t& _src)
425425#endif
426426}
427427
428+ static inline u32 cmp16_rdata (const decltype (spu_thread::rdata)& rdata, const decltype(spu_thread::rdata)& to_write)
429+ {
430+ u32 diffcnt = 0 ;
431+ u32 pos = 0 ;
432+
433+ for (u32 i = 0 ; i < 8 ; i++)
434+ {
435+ if (std::memcmp (rdata + (i * 16 ), to_write + (i * 16 ), 16 ))
436+ {
437+ diffcnt++;
438+ pos = i;
439+ }
440+ };
441+
442+ if (diffcnt == 1 )
443+ {
444+ return pos;
445+ }
446+
447+ return -1 ;
448+ }
449+
428450#if defined(_MSC_VER)
429451#define mwaitx_func
430452#define waitpkg_func
@@ -3968,17 +3990,52 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
39683990 auto & super_data = *vm::get_super_ptr<spu_rdata_t >(addr);
39693991 const bool success = [&]()
39703992 {
3971- // Full lock (heavyweight)
3972- // TODO: vm::check_addr
3973- vm::writer_lock lock (addr, range_lock);
3974-
3975- if (cmp_rdata (rdata, super_data))
3993+ if (int pos = cmp16_rdata (rdata, to_write); addr != spurs_addr && pos != -1 )
39763994 {
3977- mov_rdata (super_data, to_write);
3978- return true ;
3995+ auto & bits = *utils::bless<atomic_t <u64 >>(vm::g_reservations + ((addr & 0xff80 ) / 2 + 32 ));
3996+ const auto bits_val = +bits;
3997+
3998+ // Full lock (heavyweight)
3999+ // TODO: vm::check_addr
4000+ const bool halt_ppus = bits_val == umax || bits_val < 0x100000 ;
4001+
4002+ if (vm::writer_lock lock (addr, range_lock, halt_ppus); cmp_rdata (rdata, super_data))
4003+ {
4004+ auto cast_as = [](void * ptr, usz pos){ return reinterpret_cast <u128 *>(ptr) + pos; };
4005+ auto cast_as_const = [](const void * ptr, usz pos){ return reinterpret_cast <const u128 *>(ptr) + pos; };
4006+
4007+ if (halt_ppus)
4008+ {
4009+ *cast_as (super_data, pos) = *cast_as_const (to_write, pos);
4010+ bits.try_inc (u64 {umax});
4011+ return true ;
4012+ }
4013+ else if (atomic_storage<u128 >::compare_exchange (*cast_as (super_data, pos), *cast_as (rdata, pos), *cast_as_const (to_write, pos)))
4014+ {
4015+ bits.try_inc (u64 {umax});
4016+ *cast_as (rdata, pos) = *cast_as_const (to_write, pos);
4017+ ensure (cmp_rdata (rdata, super_data));
4018+ return true ;
4019+ }
4020+ }
4021+
4022+ bits = u64 {umax};
4023+ return false ;
39794024 }
4025+ else
4026+ {
4027+ // Full lock (heavyweight)
4028+ // TODO: vm::check_addr
4029+ vm::writer_lock lock (addr, range_lock);
39804030
3981- return false ;
4031+ if (cmp_rdata (rdata, super_data))
4032+ {
4033+ mov_rdata (super_data, to_write);
4034+ return true ;
4035+ }
4036+
4037+ return false ;
4038+ }
39824039 }();
39834040
39844041 res += success ? 64 : 0 - 64 ;
0 commit comments