@@ -490,6 +490,11 @@ namespace vm
490490
491491namespace spu
492492{
493+ struct raw_spu_atomic_info_t
494+ {
495+ std::array<atomic_t <spu_atomic_op_info_for_group>, 8 > raw_atomic_ops;
496+ };
497+
493498 namespace scheduler
494499 {
495500 std::array<atomic_t <u8 >, 65536 > atomic_instruction_table = {};
@@ -4699,6 +4704,154 @@ u32 evaluate_spin_optimization(std::span<u8> stats, u64 evaluate_time, const cfg
46994704 return busy_waiting_switch;
47004705}
47014706
4707+ inline u8 spu_to_index (const spu_thread* spu) noexcept
4708+ {
4709+ return spu->group ? (spu->lv2_id >> 24 ) : spu->lv2_id ;
4710+ }
4711+
4712+ inline std::array<atomic_t <spu_atomic_op_info_for_group>, 8 >& get_spu_atomic_op_info (const spu_thread* spu) noexcept
4713+ {
4714+ return spu->group ? spu->group ->atomic_ops : g_fxo->get <spu::raw_spu_atomic_info_t >().raw_atomic_ops ;
4715+ }
4716+
4717+ // To be used by GETLLAR
4718+ // Returns none-zero if needs to wait
4719+ int test_and_update_atomic_op_info (std::array<atomic_t <spu_atomic_op_info_for_group>, 8 >& spu_info, u8 index, u32 raddr, u32 getllar_pc)
4720+ {
4721+ auto info = spu_info[index].load ();
4722+
4723+ if (info.addr / 128 == raddr / 128 && info.getllar == getllar_pc)
4724+ {
4725+ if (info.addr % 128 )
4726+ {
4727+ info.addr &= -128 ;
4728+ spu_info[index].release (info);
4729+ return 0 ;
4730+ }
4731+
4732+ // Repeated GETLLAR: disable entry
4733+ }
4734+
4735+ info = {};
4736+
4737+ spu_info[index].release (info);
4738+
4739+ for (usz i = 0 ; i < spu_info.size (); i++)
4740+ {
4741+ info = spu_info[i].load ();
4742+
4743+ if (info.addr / 128 == raddr / 128 && info.getllar == getllar_pc)
4744+ {
4745+ int wait = 0 ;
4746+
4747+ spu_info[i].fetch_op ([&](spu_atomic_op_info_for_group& value)
4748+ {
4749+ wait = 0 ;
4750+
4751+ if (value.addr / 128 == raddr / 128 && value.getllar == getllar_pc)
4752+ {
4753+ if (value.addr % 128 == 0 )
4754+ {
4755+ wait = 2 ;
4756+ return false ;
4757+ }
4758+
4759+ if (value.addr & (1u << index))
4760+ {
4761+ value.addr &= ~(1u << index);
4762+ wait = 1 ;
4763+ return true ;
4764+ }
4765+ }
4766+
4767+ return false ;
4768+ });
4769+
4770+ if (wait)
4771+ {
4772+ return wait;
4773+ }
4774+ }
4775+ }
4776+
4777+ return 0 ;
4778+ }
4779+
4780+ // To be used when PUTLLC finishes to create a temporary barrier until the SPURS loop restarts
4781+ void downgrade_to_temporary_atomic_op_info (std::array<atomic_t <spu_atomic_op_info_for_group>, 8 >& spu_info, u8 index, u32 raddr, u32 getllar_pc)
4782+ {
4783+ auto info = spu_info[index].load ();
4784+
4785+ if (info.addr / 128 == raddr / 128 && info.getllar == getllar_pc)
4786+ {
4787+ info.addr |= 127 ;
4788+ spu_info[index].release (info);
4789+ return ;
4790+ }
4791+
4792+ info = {};
4793+ spu_info[index].release (info);
4794+ }
4795+
4796+ // To be used by PUTLLC initiates
4797+ // Returns none-zero if needs to wait
4798+ int init_atomic_op_info (std::array<atomic_t <spu_atomic_op_info_for_group>, 8 >& spu_info, u8 index, u32 raddr, u32 getllar_pc)
4799+ {
4800+ // Initialiy store locked entry with temporary lock
4801+ spu_atomic_op_info_for_group info{};
4802+ info.addr = raddr | 127 ;
4803+ info.getllar = getllar_pc;
4804+
4805+ spu_info[index].release (info);
4806+
4807+ for (usz i = 0 ; i < spu_info.size (); i++)
4808+ {
4809+ if (i == index)
4810+ {
4811+ continue ;
4812+ }
4813+
4814+ info = spu_info[i].load ();
4815+
4816+ if (info.addr / 128 == raddr / 128 && info.getllar == getllar_pc)
4817+ {
4818+ int wait = 0 ;
4819+
4820+ spu_info[i].fetch_op ([&](spu_atomic_op_info_for_group& value)
4821+ {
4822+ wait = 0 ;
4823+
4824+ if (value.addr / 128 == raddr / 128 && value.getllar == getllar_pc)
4825+ {
4826+ if (value.addr % 128 == 0 )
4827+ {
4828+ wait = 2 ;
4829+ return false ;
4830+ }
4831+
4832+ if (value.addr & (1u << index))
4833+ {
4834+ value.addr &= ~(1u << index);
4835+ wait = 1 ;
4836+ return true ;
4837+ }
4838+ }
4839+
4840+ return false ;
4841+ });
4842+
4843+ return wait;
4844+ }
4845+ }
4846+
4847+ // If exclusive, upgrade to full lock
4848+ info.addr = raddr;
4849+ info.getllar = getllar_pc;
4850+ spu_info[index].store (info);
4851+
4852+ return 0 ;
4853+ }
4854+
47024855bool spu_thread::process_mfc_cmd ()
47034856{
47044857 // Stall infinitely if MFC queue is full
@@ -5015,11 +5168,50 @@ bool spu_thread::process_mfc_cmd()
50155168 last_getllar = pc;
50165169 last_gtsc = perf0.get ();
50175170 }
5171+ else
5172+ {
5173+ last_getllar = pc;
5174+ }
50185175
50195176 last_getllar_addr = addr;
50205177 getllar_spin_count = 0 ;
50215178 getllar_busy_waiting_switch = umax;
50225179
5180+ if (ch_mfc_cmd.eal == spurs_addr)
5181+ {
5182+ u64 timeout = 0 ;
5183+
5184+ while (true )
5185+ {
5186+ const int wait = test_and_update_atomic_op_info (get_spu_atomic_op_info (this ), spu_to_index (this ), ch_mfc_cmd.eal , last_getllar);
5187+
5188+ if (!wait)
5189+ {
5190+ break ;
5191+ }
5192+
5193+ const u64 current = get_system_time ();
5194+
5195+ if (!timeout)
5196+ {
5197+ timeout = current + g_cfg.core .spu_delay_penalty * 1000 ;
5198+ }
5199+ else if (current >= timeout)
5200+ {
5201+ break ;
5202+ }
5203+
5204+ if (wait == 2 )
5205+ {
5206+ std::this_thread::yield ();
5207+ }
5208+ else
5209+ {
5210+ busy_wait (50000 );
5211+ }
5212+ }
5213+ }
5214+
50235215 u64 ntime = 0 ;
50245216 rsx::reservation_lock rsx_lock (addr, 128 );
50255217
@@ -5232,6 +5424,41 @@ bool spu_thread::process_mfc_cmd()
52325424 }
52335425 }
52345426
5427+ if (ch_mfc_cmd.eal == spurs_addr)
5428+ {
5429+ u64 timeout = 0 ;
5430+
5431+ while (true )
5432+ {
5433+ const int wait = init_atomic_op_info (get_spu_atomic_op_info (this ), spu_to_index (this ), raddr, last_getllar);
5434+
5435+ if (!wait)
5436+ {
5437+ break ;
5438+ }
5439+
5440+ const u64 current = get_system_time ();
5441+
5442+ if (!timeout)
5443+ {
5444+ timeout = current + g_cfg.core .spu_delay_penalty * 1000 ;
5445+ }
5446+ else if (current >= timeout)
5447+ {
5448+ break ;
5449+ }
5450+
5451+ if (wait == 2 )
5452+ {
5453+ std::this_thread::yield ();
5454+ }
5455+ else
5456+ {
5457+ busy_wait (50000 );
5458+ }
5459+ }
5460+ }
5461+
52355462 if (do_putllc (ch_mfc_cmd))
52365463 {
52375464 ch_atomic_stat.set_value (MFC_PUTLLC_SUCCESS);
@@ -5299,6 +5526,7 @@ bool spu_thread::process_mfc_cmd()
52995526 std::memcpy (dump.data , _ptr<u8 >(ch_mfc_cmd.lsa & 0x3ff80 ), 128 );
53005527 }
53015528
5529+ downgrade_to_temporary_atomic_op_info (get_spu_atomic_op_info (this ), spu_to_index (this ), raddr, last_getllar);
53025530 static_cast <void >(test_stopped ());
53035531 return true ;
53045532 }
@@ -6180,7 +6408,12 @@ s64 spu_thread::get_ch_value(u32 ch)
61806408
61816409 eventstat_busy_waiting_switch = value ? 1 : 0 ;
61826410 }
6183-
6411+
6412+ if (raddr == spurs_addr)
6413+ {
6414+ downgrade_to_temporary_atomic_op_info (get_spu_atomic_op_info (this ), spu_to_index (this ), raddr, last_getllar);
6415+ }
6416+
61846417 for (bool is_first = true ; !events.count ; events = get_events (mask1 & ~SPU_EVENT_LR, true , true ), is_first = false )
61856418 {
61866419 const auto old = +state;
0 commit comments