Skip to content

Commit be053cb

Browse files
authored
Temporarily Fix Incorrect Kernel Perfetto Trace Duration due to Firmware Timestamp Bug (#134)
* Perfetto duration temp fix setup * Add timestamp change amounts to ROCP Info * Groups kernel dispatch info by agent and queue id before sorting. Midpoint interpolation is then performed on the sorted kernels * Moved dispatch bins into the for-loop * Fix compilation error by using const ref * Modified for review comments * Changed variable names [ROCm/rocprofiler-sdk commit: 6518c54]
1 parent e75ab64 commit be053cb

File tree

1 file changed

+107
-49
lines changed

1 file changed

+107
-49
lines changed

projects/rocprofiler-sdk/source/lib/output/generatePerfetto.cpp

Lines changed: 107 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,11 @@
3232
#include <rocprofiler-sdk/cxx/operators.hpp>
3333
#include <rocprofiler-sdk/cxx/perfetto.hpp>
3434

35+
#include <fmt/core.h>
36+
3537
#include <atomic>
3638
#include <future>
39+
#include <iostream>
3740
#include <map>
3841
#include <thread>
3942
#include <unordered_map>
@@ -505,62 +508,117 @@ write_perfetto(
505508
itr.end_timestamp);
506509
tracing_session->FlushBlocking();
507510
}
508-
509511
for(auto ditr : kernel_dispatch_gen)
510-
for(auto itr : kernel_dispatch_gen.get(ditr))
512+
{
513+
auto generator = kernel_dispatch_gen.get(ditr);
514+
// Group kernels on the same queue and agent. Temporary fix for firmware timestamp bug
515+
// Can be removed once bug is resolved.
516+
auto dispatch_bins = std::unordered_map<
517+
rocprofiler_agent_id_t,
518+
std::unordered_map<
519+
rocprofiler_queue_id_t,
520+
std::vector<rocprofiler_buffer_tracing_kernel_dispatch_record_t*>>>{};
521+
for(auto& itr : generator)
511522
{
512-
const auto& info = itr.dispatch_info;
513-
const kernel_symbol_info* sym = tool_metadata.get_kernel_symbol(info.kernel_id);
514-
515-
CHECK(sym != nullptr);
516-
517-
auto name = std::string_view{sym->kernel_name};
518-
auto& track = agent_queue_tracks.at(info.agent_id).at(info.queue_id);
523+
const auto& info = itr.dispatch_info;
524+
dispatch_bins[info.agent_id][info.queue_id].emplace_back(&itr);
525+
}
519526

520-
if(demangled.find(name) == demangled.end())
527+
for(const auto& aitr : dispatch_bins)
528+
{
529+
for(auto qitr : aitr.second)
521530
{
522-
demangled.emplace(name, common::cxx_demangle(name));
531+
// Sort kernels on the same queue and agent by timestamp
532+
std::sort(qitr.second.begin(),
533+
qitr.second.end(),
534+
[](const auto* lhs, const auto* rhs) {
535+
return lhs->start_timestamp < rhs->start_timestamp;
536+
});
537+
538+
// Loop over the kernels (qitr.second) and put them into perfetto.
539+
for(auto it = qitr.second.begin(); it != qitr.second.end(); ++it)
540+
{
541+
auto& current = **it;
542+
const auto& info = current.dispatch_info;
543+
const kernel_symbol_info* sym =
544+
tool_metadata.get_kernel_symbol(info.kernel_id);
545+
546+
CHECK(sym != nullptr);
547+
548+
auto name = std::string_view{sym->kernel_name};
549+
auto& track = agent_queue_tracks.at(info.agent_id).at(info.queue_id);
550+
551+
// Temporary fix until timestamp issues are resolved: Set timestamps to be
552+
// halfway between ending timestamp and starting timestamp of overlapping
553+
// kernel dispatches. Perfetto displays slices incorrectly if overlapping
554+
// slices on the same track are not completely enveloped.
555+
auto next = std::next(it);
556+
if(next != qitr.second.end() &&
557+
(*next)->start_timestamp < (*it)->end_timestamp)
558+
{
559+
auto start = (*next)->start_timestamp;
560+
auto end = std::min((*it)->end_timestamp, (*next)->end_timestamp);
561+
auto mid = start + (end - start) / 2;
562+
// Report changed timestamps to ROCP INFO
563+
ROCP_INFO << fmt::format(
564+
"Kernel ending timestamp increased by {} ns to {} ns with "
565+
"following kernel starting timestamp decreased by {} ns to {} ns "
566+
"due to firmware timestamp error.",
567+
((*it)->end_timestamp - mid),
568+
mid,
569+
(mid - (*next)->start_timestamp),
570+
mid);
571+
(*it)->end_timestamp = mid;
572+
(*next)->start_timestamp = mid;
573+
}
574+
575+
if(demangled.find(name) == demangled.end())
576+
{
577+
demangled.emplace(name, common::cxx_demangle(name));
578+
}
579+
580+
TRACE_EVENT_BEGIN(
581+
sdk::perfetto_category<sdk::category::kernel_dispatch>::name,
582+
::perfetto::StaticString(demangled.at(name).c_str()),
583+
track,
584+
current.start_timestamp,
585+
::perfetto::Flow::ProcessScoped(current.correlation_id.internal),
586+
"begin_ns",
587+
current.start_timestamp,
588+
"end_ns",
589+
current.end_timestamp,
590+
"delta_ns",
591+
(current.end_timestamp - current.start_timestamp),
592+
"kind",
593+
current.kind,
594+
"agent",
595+
tool_metadata.get_node_id(info.agent_id),
596+
"corr_id",
597+
current.correlation_id.internal,
598+
"queue",
599+
info.queue_id.handle,
600+
"tid",
601+
current.thread_id,
602+
"kernel_id",
603+
info.kernel_id,
604+
"private_segment_size",
605+
info.private_segment_size,
606+
"group_segment_size",
607+
info.group_segment_size,
608+
"workgroup_size",
609+
info.workgroup_size.x * info.workgroup_size.y * info.workgroup_size.z,
610+
"grid_size",
611+
info.grid_size.x * info.grid_size.y * info.grid_size.z);
612+
TRACE_EVENT_END(
613+
sdk::perfetto_category<sdk::category::kernel_dispatch>::name,
614+
track,
615+
current.end_timestamp);
616+
tracing_session->FlushBlocking();
617+
}
523618
}
524-
525-
TRACE_EVENT_BEGIN(
526-
sdk::perfetto_category<sdk::category::kernel_dispatch>::name,
527-
::perfetto::StaticString(demangled.at(name).c_str()),
528-
track,
529-
itr.start_timestamp,
530-
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
531-
"begin_ns",
532-
itr.start_timestamp,
533-
"end_ns",
534-
itr.end_timestamp,
535-
"delta_ns",
536-
(itr.end_timestamp - itr.start_timestamp),
537-
"kind",
538-
itr.kind,
539-
"agent",
540-
agents_map.at(info.agent_id).logical_node_id,
541-
"corr_id",
542-
itr.correlation_id.internal,
543-
"queue",
544-
info.queue_id.handle,
545-
"tid",
546-
itr.thread_id,
547-
"kernel_id",
548-
info.kernel_id,
549-
"private_segment_size",
550-
info.private_segment_size,
551-
"group_segment_size",
552-
info.group_segment_size,
553-
"workgroup_size",
554-
info.workgroup_size.x * info.workgroup_size.y * info.workgroup_size.z,
555-
"grid_size",
556-
info.grid_size.x * info.grid_size.y * info.grid_size.z);
557-
TRACE_EVENT_END(sdk::perfetto_category<sdk::category::kernel_dispatch>::name,
558-
track,
559-
itr.end_timestamp);
560-
tracing_session->FlushBlocking();
561619
}
620+
}
562621
}
563-
564622
// counter tracks
565623
{
566624
// memory copy counter track

0 commit comments

Comments
 (0)