Skip to content

Commit cafffb4

Browse files
committed
SPU: SPURS oriented thread waiting
1 parent 16f619d commit cafffb4

File tree

3 files changed

+244
-4
lines changed

3 files changed

+244
-4
lines changed

rpcs3/Emu/Cell/SPUThread.cpp

Lines changed: 234 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,11 @@ namespace vm
490490

491491
namespace spu
492492
{
493+
struct raw_spu_atomic_info_t
494+
{
495+
std::array<atomic_t<spu_atomic_op_info_for_group>, 8> raw_atomic_ops;
496+
};
497+
493498
namespace scheduler
494499
{
495500
std::array<atomic_t<u8>, 65536> atomic_instruction_table = {};
@@ -4699,6 +4704,154 @@ u32 evaluate_spin_optimization(std::span<u8> stats, u64 evaluate_time, const cfg
46994704
return busy_waiting_switch;
47004705
}
47014706

4707+
inline u8 spu_to_index(const spu_thread* spu) noexcept
4708+
{
4709+
return spu->group ? (spu->lv2_id >> 24) : spu->lv2_id;
4710+
}
4711+
4712+
inline std::array<atomic_t<spu_atomic_op_info_for_group>, 8>& get_spu_atomic_op_info(const spu_thread* spu) noexcept
4713+
{
4714+
return spu->group ? spu->group->atomic_ops : g_fxo->get<spu::raw_spu_atomic_info_t>().raw_atomic_ops;
4715+
}
4716+
4717+
// To be used by GETLLAR
4718+
// Returns none-zero if needs to wait
4719+
int test_and_update_atomic_op_info(std::array<atomic_t<spu_atomic_op_info_for_group>, 8>& spu_info, u8 index, u32 raddr, u32 getllar_pc)
4720+
{
4721+
auto info = spu_info[index].load();
4722+
4723+
if (info.addr / 128 == raddr / 128 && info.getllar == getllar_pc)
4724+
{
4725+
if (info.addr % 128)
4726+
{
4727+
info.addr &= -128;
4728+
spu_info[index].release(info);
4729+
return 0;
4730+
}
4731+
4732+
// Repeated GETLLAR: disable entry
4733+
}
4734+
4735+
info = {};
4736+
4737+
spu_info[index].release(info);
4738+
4739+
for (usz i = 0; i < spu_info.size(); i++)
4740+
{
4741+
info = spu_info[i].load();
4742+
4743+
if (info.addr / 128 == raddr / 128 && info.getllar == getllar_pc)
4744+
{
4745+
int wait = 0;
4746+
4747+
spu_info[i].fetch_op([&](spu_atomic_op_info_for_group& value)
4748+
{
4749+
wait = 0;
4750+
4751+
if (value.addr / 128 == raddr / 128 && value.getllar == getllar_pc)
4752+
{
4753+
if (value.addr % 128 == 0)
4754+
{
4755+
wait = 2;
4756+
return false;
4757+
}
4758+
4759+
if (value.addr & (1u << index))
4760+
{
4761+
value.addr &= ~(1u << index);
4762+
wait = 1;
4763+
return true;
4764+
}
4765+
}
4766+
4767+
return false;
4768+
});
4769+
4770+
if (wait)
4771+
{
4772+
return wait;
4773+
}
4774+
}
4775+
}
4776+
4777+
return 0;
4778+
}
4779+
4780+
// To be used when PUTLLC finishes to create a temporary barrier until the SPURS loop restarts
4781+
void downgrade_to_temporary_atomic_op_info(std::array<atomic_t<spu_atomic_op_info_for_group>, 8>& spu_info, u8 index, u32 raddr, u32 getllar_pc)
4782+
{
4783+
auto info = spu_info[index].load();
4784+
4785+
if (info.addr / 128 == raddr / 128 && info.getllar == getllar_pc)
4786+
{
4787+
info.addr |= 127;
4788+
spu_info[index].release(info);
4789+
return;
4790+
}
4791+
4792+
info = {};
4793+
spu_info[index].release(info);
4794+
}
4795+
4796+
// To be used by PUTLLC initiates
4797+
// Returns none-zero if needs to wait
4798+
int init_atomic_op_info(std::array<atomic_t<spu_atomic_op_info_for_group>, 8>& spu_info, u8 index, u32 raddr, u32 getllar_pc)
4799+
{
4800+
// Initialiy store locked entry with temporary lock
4801+
spu_atomic_op_info_for_group info{};
4802+
info.addr = raddr | 127;
4803+
info.getllar = getllar_pc;
4804+
4805+
spu_info[index].release(info);
4806+
4807+
for (usz i = 0; i < spu_info.size(); i++)
4808+
{
4809+
if (i == index)
4810+
{
4811+
continue;
4812+
}
4813+
4814+
info = spu_info[i].load();
4815+
4816+
if (info.addr / 128 == raddr / 128 && info.getllar == getllar_pc)
4817+
{
4818+
int wait = 0;
4819+
4820+
spu_info[i].fetch_op([&](spu_atomic_op_info_for_group& value)
4821+
{
4822+
wait = 0;
4823+
4824+
if (value.addr / 128 == raddr / 128 && value.getllar == getllar_pc)
4825+
{
4826+
if (value.addr % 128 == 0)
4827+
{
4828+
wait = 2;
4829+
return false;
4830+
}
4831+
4832+
if (value.addr & (1u << index))
4833+
{
4834+
value.addr &= ~(1u << index);
4835+
wait = 1;
4836+
return true;
4837+
}
4838+
}
4839+
4840+
return false;
4841+
});
4842+
4843+
return wait;
4844+
}
4845+
}
4846+
4847+
// If exclusive, upgrade to full lock
4848+
info.addr = raddr;
4849+
info.getllar = getllar_pc;
4850+
spu_info[index].store(info);
4851+
4852+
return 0;
4853+
}
4854+
47024855
bool spu_thread::process_mfc_cmd()
47034856
{
47044857
// Stall infinitely if MFC queue is full
@@ -5015,11 +5168,50 @@ bool spu_thread::process_mfc_cmd()
50155168
last_getllar = pc;
50165169
last_gtsc = perf0.get();
50175170
}
5171+
else
5172+
{
5173+
last_getllar = pc;
5174+
}
50185175

50195176
last_getllar_addr = addr;
50205177
getllar_spin_count = 0;
50215178
getllar_busy_waiting_switch = umax;
50225179

5180+
if (ch_mfc_cmd.eal == spurs_addr)
5181+
{
5182+
u64 timeout = 0;
5183+
5184+
while (true)
5185+
{
5186+
const int wait = test_and_update_atomic_op_info(get_spu_atomic_op_info(this), spu_to_index(this), ch_mfc_cmd.eal, last_getllar);
5187+
5188+
if (!wait)
5189+
{
5190+
break;
5191+
}
5192+
5193+
const u64 current = get_system_time();
5194+
5195+
if (!timeout)
5196+
{
5197+
timeout = current + g_cfg.core.spu_delay_penalty * 1000;
5198+
}
5199+
else if (current >= timeout)
5200+
{
5201+
break;
5202+
}
5203+
5204+
if (wait == 2)
5205+
{
5206+
std::this_thread::yield();
5207+
}
5208+
else
5209+
{
5210+
busy_wait(50000);
5211+
}
5212+
}
5213+
}
5214+
50235215
u64 ntime = 0;
50245216
rsx::reservation_lock rsx_lock(addr, 128);
50255217

@@ -5232,6 +5424,41 @@ bool spu_thread::process_mfc_cmd()
52325424
}
52335425
}
52345426

5427+
if (ch_mfc_cmd.eal == spurs_addr)
5428+
{
5429+
u64 timeout = 0;
5430+
5431+
while (true)
5432+
{
5433+
const int wait = init_atomic_op_info(get_spu_atomic_op_info(this), spu_to_index(this), raddr, last_getllar);
5434+
5435+
if (!wait)
5436+
{
5437+
break;
5438+
}
5439+
5440+
const u64 current = get_system_time();
5441+
5442+
if (!timeout)
5443+
{
5444+
timeout = current + g_cfg.core.spu_delay_penalty * 1000;
5445+
}
5446+
else if (current >= timeout)
5447+
{
5448+
break;
5449+
}
5450+
5451+
if (wait == 2)
5452+
{
5453+
std::this_thread::yield();
5454+
}
5455+
else
5456+
{
5457+
busy_wait(50000);
5458+
}
5459+
}
5460+
}
5461+
52355462
if (do_putllc(ch_mfc_cmd))
52365463
{
52375464
ch_atomic_stat.set_value(MFC_PUTLLC_SUCCESS);
@@ -5299,6 +5526,7 @@ bool spu_thread::process_mfc_cmd()
52995526
std::memcpy(dump.data, _ptr<u8>(ch_mfc_cmd.lsa & 0x3ff80), 128);
53005527
}
53015528

5529+
downgrade_to_temporary_atomic_op_info(get_spu_atomic_op_info(this), spu_to_index(this), raddr, last_getllar);
53025530
static_cast<void>(test_stopped());
53035531
return true;
53045532
}
@@ -6180,7 +6408,12 @@ s64 spu_thread::get_ch_value(u32 ch)
61806408

61816409
eventstat_busy_waiting_switch = value ? 1 : 0;
61826410
}
6183-
6411+
6412+
if (raddr == spurs_addr)
6413+
{
6414+
downgrade_to_temporary_atomic_op_info(get_spu_atomic_op_info(this), spu_to_index(this), raddr, last_getllar);
6415+
}
6416+
61846417
for (bool is_first = true; !events.count; events = get_events(mask1 & ~SPU_EVENT_LR, true, true), is_first = false)
61856418
{
61866419
const auto old = +state;

rpcs3/Emu/Cell/SPUThread.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,12 @@ struct spu_imm_table_t
497497

498498
extern const spu_imm_table_t g_spu_imm;
499499

500+
struct spu_atomic_op_info_for_group
501+
{
502+
u32 addr;
503+
u32 getllar;
504+
};
505+
500506
enum FPSCR_EX
501507
{
502508
//Single-precision exceptions

rpcs3/Emu/Cell/lv2/sys_spu.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -297,9 +297,10 @@ struct lv2_spu_group
297297
bool set_terminate = false;
298298

299299
std::array<shared_ptr<named_thread<spu_thread>>, 8> threads; // SPU Threads
300-
std::array<s8, 256> threads_map; // SPU Threads map based number
301-
std::array<std::pair<u32, std::vector<sys_spu_segment>>, 8> imgs; // Entry points, SPU image segments
302-
std::array<std::array<u64, 4>, 8> args; // SPU Thread Arguments
300+
std::array<s8, 256> threads_map{}; // SPU Threads map based number
301+
std::array<std::pair<u32, std::vector<sys_spu_segment>>, 8> imgs{}; // Entry points, SPU image segments
302+
std::array<std::array<u64, 4>, 8> args{}; // SPU Thread Arguments
303+
std::array<atomic_t<spu_atomic_op_info_for_group>, 8> atomic_ops{};
303304

304305
shared_ptr<lv2_event_queue> ep_run; // port for SYS_SPU_THREAD_GROUP_EVENT_RUN events
305306
shared_ptr<lv2_event_queue> ep_exception; // TODO: SYS_SPU_THREAD_GROUP_EVENT_EXCEPTION

0 commit comments

Comments
 (0)