Skip to content

Commit 107d9dc

Browse files
committed
PPU Profiler
1 parent c669a0b commit 107d9dc

File tree

3 files changed

+60
-35
lines changed

3 files changed

+60
-35
lines changed

rpcs3/Emu/CPU/CPUThread.cpp

Lines changed: 54 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ struct cpu_prof
190190
reservation_samples = 0;
191191
}
192192

193-
static std::string format(const std::multimap<u64, u64, std::greater<u64>>& chart, u64 samples, u64 idle, bool extended_print = false)
193+
static std::string format(const std::multimap<u64, u64, std::greater<u64>>& chart, u64 samples, u64 idle, u32 type_id, bool extended_print = false)
194194
{
195195
// Print results
196196
std::string results;
@@ -204,11 +204,18 @@ struct cpu_prof
204204
const f64 _frac = count / busy / samples;
205205

206206
// Print only 7 hash characters out of 11 (which covers roughly 48 bits)
207-
fmt::append(results, "\n\t[%s", fmt::base57(be_t<u64>{name}));
208-
results.resize(results.size() - 4);
207+
if (type_id == 2)
208+
{
209+
fmt::append(results, "\n\t[%s", fmt::base57(be_t<u64>{name}));
210+
results.resize(results.size() - 4);
209211

210-
// Print chunk address from lowest 16 bits
211-
fmt::append(results, "...chunk-0x%05x]: %.4f%% (%u)", (name & 0xffff) * 4, _frac * 100., count);
212+
// Print chunk address from lowest 16 bits
213+
fmt::append(results, "...chunk-0x%05x]: %.4f%% (%u)", (name & 0xffff) * 4, _frac * 100., count);
214+
}
215+
else
216+
{
217+
fmt::append(results, "\n\t[0x%07x]: %.4f%% (%u)", name, _frac * 100., count);
218+
}
212219

213220
if (results.size() >= (extended_print ? 10000 : 5000))
214221
{
@@ -257,27 +264,37 @@ struct cpu_prof
257264
}
258265

259266
// Print results
260-
const std::string results = format(chart, samples, idle);
267+
const std::string results = format(chart, samples, idle, ptr->id_type());
261268
profiler.notice("Thread \"%s\" [0x%08x]: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%):\n%s", ptr->get_name(), ptr->id, samples, get_percent(idle, samples), new_samples, reservation_samples, get_percent(reservation_samples, samples - idle), results);
262269

263270
new_samples = 0;
264271
}
265272

266-
static void print_all(std::unordered_map<shared_ptr<cpu_thread>, sample_info>& threads, sample_info& all_info)
273+
static void print_all(std::unordered_map<shared_ptr<cpu_thread>, sample_info>& threads, sample_info& all_info, u32 type_id)
267274
{
268275
u64 new_samples = 0;
269276

270277
// Print all results and cleanup
271278
for (auto& [ptr, info] : threads)
272279
{
280+
if (ptr->id_type() != type_id)
281+
{
282+
continue;
283+
}
284+
273285
new_samples += info.new_samples;
274286
info.print(ptr);
275287
}
276288

277289
std::multimap<u64, u64, std::greater<u64>> chart;
278290

279-
for (auto& [_, info] : threads)
291+
for (auto& [ptr, info] : threads)
280292
{
293+
if (ptr->id_type() != type_id)
294+
{
295+
continue;
296+
}
297+
281298
// This function collects thread information regardless of 'new_samples' member state
282299
for (auto& [name, count] : info.freq)
283300
{
@@ -301,7 +318,7 @@ struct cpu_prof
301318

302319
if (new_samples < min_print_all_samples && thread_ctrl::state() != thread_state::aborting)
303320
{
304-
profiler.notice("All Threads: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%): Not enough new samples have been collected since the last print.", samples, get_percent(idle, samples), new_samples, reservation, get_percent(reservation, samples - idle));
321+
profiler.notice("All %s Threads: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%): Not enough new samples have been collected since the last print.", type_id == 1 ? "PPU" : "SPU", samples, get_percent(idle, samples), new_samples, reservation, get_percent(reservation, samples - idle));
305322
return;
306323
}
307324

@@ -310,12 +327,13 @@ struct cpu_prof
310327
chart.emplace(count, name);
311328
}
312329

313-
const std::string results = format(chart, samples, idle, true);
314-
profiler.notice("All Threads: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%):%s", samples, get_percent(idle, samples), new_samples, reservation, get_percent(reservation, samples - idle), results);
330+
const std::string results = format(chart, samples, idle, type_id, true);
331+
profiler.notice("All %s Threads: %u samples (%.4f%% idle), %u new, %u reservation (%.4f%%):%s", type_id == 1 ? "PPU" : "SPU", samples, get_percent(idle, samples), new_samples, reservation, get_percent(reservation, samples - idle), results);
315332
}
316333
};
317334

318-
sample_info all_threads_info{};
335+
sample_info all_spu_threads_info{};
336+
sample_info all_ppu_threads_info{};
319337

320338
void operator()()
321339
{
@@ -376,8 +394,11 @@ struct cpu_prof
376394
{
377395
if (auto state = +ptr->state; cpu_flag::exit - state)
378396
{
397+
const auto spu = ptr->try_get<spu_thread>();
398+
const auto ppu = ptr->try_get<ppu_thread>();
399+
379400
// Get short function hash
380-
const u64 name = atomic_storage<u64>::load(ptr->block_hash);
401+
const u64 name = ppu ? atomic_storage<u32>::load(ppu->cia) : atomic_storage<u64>::load(ptr->block_hash);
381402

382403
// Append occurrence
383404
info.samples++;
@@ -387,17 +408,17 @@ struct cpu_prof
387408
info.freq[name]++;
388409
info.new_samples++;
389410

390-
if (auto spu = ptr->try_get<spu_thread>())
411+
if (spu)
391412
{
392413
if (spu->raddr)
393414
{
394415
info.reservation_samples++;
395416
}
396-
}
397417

398-
// Append verification time to fixed common name 0000000...chunk-0x3fffc
399-
if (name >> 16 && (name & 0xffff) == 0)
400-
info.freq[0xffff]++;
418+
// Append verification time to fixed common name 0000000...chunk-0x3fffc
419+
if (name >> 16 && (name & 0xffff) == 0)
420+
info.freq[0xffff]++;
421+
}
401422
}
402423
else
403424
{
@@ -420,8 +441,10 @@ struct cpu_prof
420441
{
421442
profiler.success("Flushing profiling results...");
422443

423-
all_threads_info = {};
424-
sample_info::print_all(threads, all_threads_info);
444+
all_ppu_threads_info = {};
445+
all_spu_threads_info = {};
446+
sample_info::print_all(threads, all_ppu_threads_info, 1);
447+
sample_info::print_all(threads, all_spu_threads_info, 2);
425448
}
426449

427450
if (Emu.IsPaused())
@@ -442,7 +465,8 @@ struct cpu_prof
442465
}
443466

444467
// Print all remaining results
445-
sample_info::print_all(threads, all_threads_info);
468+
sample_info::print_all(threads, all_ppu_threads_info, 1);
469+
sample_info::print_all(threads, all_spu_threads_info, 2);
446470
}
447471

448472
static constexpr auto thread_name = "CPU Profiler"sv;
@@ -459,7 +483,7 @@ extern f64 get_cpu_program_usage_percent(u64 hash)
459483
{
460484
u64 total = 0;
461485

462-
for (auto [name, count] : prof->all_threads_info.freq)
486+
for (auto [name, count] : prof->all_spu_threads_info.freq)
463487
{
464488
if ((name & -65536) == hash)
465489
{
@@ -472,7 +496,7 @@ extern f64 get_cpu_program_usage_percent(u64 hash)
472496
return 0;
473497
}
474498

475-
return std::max<f64>(0.0001, static_cast<f64>(total) * 100 / (prof->all_threads_info.samples - prof->all_threads_info.idle));
499+
return std::max<f64>(0.0001, static_cast<f64>(total) * 100 / (prof->all_spu_threads_info.samples - prof->all_spu_threads_info.idle));
476500
}
477501
}
478502

@@ -639,22 +663,17 @@ void cpu_thread::operator()()
639663
thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(get_class()));
640664
}
641665

642-
while (!g_fxo->is_init<cpu_profiler>())
643-
{
644-
if (Emu.IsStopped())
645-
{
646-
return;
647-
}
648-
649-
// Can we have a little race, right? First thread is started concurrently with g_fxo->init()
650-
thread_ctrl::wait_for(1000);
651-
}
666+
ensure(g_fxo->is_init<cpu_profiler>());
652667

653668
switch (get_class())
654669
{
655670
case thread_class::ppu:
656671
{
657-
//g_fxo->get<cpu_profiler>().registered.push(id);
672+
if (g_cfg.core.ppu_prof)
673+
{
674+
g_fxo->get<cpu_profiler>().registered.push(id);
675+
}
676+
658677
break;
659678
}
660679
case thread_class::spu:
@@ -1546,7 +1565,7 @@ void cpu_thread::flush_profilers() noexcept
15461565
return;
15471566
}
15481567

1549-
if (g_cfg.core.spu_prof || g_cfg.core.spu_debug)
1568+
if (g_cfg.core.spu_prof || g_cfg.core.spu_debug || g_cfg.core.ppu_prof)
15501569
{
15511570
g_fxo->get<cpu_profiler>().registered.push(0);
15521571
}

rpcs3/Emu/Cell/PPUTranslator.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,11 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
593593
{
594594
callee = m_module->getOrInsertFunction(fmt::format("__0x%x", target_last - base), type);
595595
cast<Function>(callee.getCallee())->setCallingConv(CallingConv::GHC);
596+
597+
if (g_cfg.core.ppu_prof)
598+
{
599+
m_ir->CreateStore(m_ir->getInt32(target_last), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_cia - m_locals)));
600+
}
596601
}
597602
}
598603
else

rpcs3/Emu/system_config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ struct cfg_root : cfg::node
5757
cfg::_bool spu_verification{ this, "SPU Verification", true }; // Should be enabled
5858
cfg::_bool spu_cache{ this, "SPU Cache", true };
5959
cfg::_bool spu_prof{ this, "SPU Profiler", false };
60+
cfg::_bool ppu_prof{ this, "PPU Profiler", false };
6061
cfg::uint<0, 16> mfc_transfers_shuffling{ this, "MFC Commands Shuffling Limit", 0 };
6162
cfg::uint<0, 10000> mfc_transfers_timeout{ this, "MFC Commands Timeout", 0, true };
6263
cfg::_bool mfc_shuffling_in_steps{ this, "MFC Commands Shuffling In Steps", false, true };

0 commit comments

Comments
 (0)