|
32 | 32 | #include <rocprofiler-sdk/cxx/operators.hpp> |
33 | 33 | #include <rocprofiler-sdk/cxx/perfetto.hpp> |
34 | 34 |
|
| 35 | +#include <fmt/core.h> |
| 36 | + |
35 | 37 | #include <atomic> |
36 | 38 | #include <future> |
| 39 | +#include <iostream> |
37 | 40 | #include <map> |
38 | 41 | #include <thread> |
39 | 42 | #include <unordered_map> |
@@ -505,62 +508,117 @@ write_perfetto( |
505 | 508 | itr.end_timestamp); |
506 | 509 | tracing_session->FlushBlocking(); |
507 | 510 | } |
508 | | - |
509 | 511 | for(auto ditr : kernel_dispatch_gen) |
510 | | - for(auto itr : kernel_dispatch_gen.get(ditr)) |
| 512 | + { |
| 513 | + auto generator = kernel_dispatch_gen.get(ditr); |
| 514 | + // Group kernels on the same queue and agent. Temporary fix for firmware timestamp bug |
| 515 | + // Can be removed once bug is resolved. |
| 516 | + auto dispatch_bins = std::unordered_map< |
| 517 | + rocprofiler_agent_id_t, |
| 518 | + std::unordered_map< |
| 519 | + rocprofiler_queue_id_t, |
| 520 | + std::vector<rocprofiler_buffer_tracing_kernel_dispatch_record_t*>>>{}; |
| 521 | + for(auto& itr : generator) |
511 | 522 | { |
512 | | - const auto& info = itr.dispatch_info; |
513 | | - const kernel_symbol_info* sym = tool_metadata.get_kernel_symbol(info.kernel_id); |
514 | | - |
515 | | - CHECK(sym != nullptr); |
516 | | - |
517 | | - auto name = std::string_view{sym->kernel_name}; |
518 | | - auto& track = agent_queue_tracks.at(info.agent_id).at(info.queue_id); |
| 523 | + const auto& info = itr.dispatch_info; |
| 524 | + dispatch_bins[info.agent_id][info.queue_id].emplace_back(&itr); |
| 525 | + } |
519 | 526 |
|
520 | | - if(demangled.find(name) == demangled.end()) |
| 527 | + for(const auto& aitr : dispatch_bins) |
| 528 | + { |
| 529 | + for(auto qitr : aitr.second) |
521 | 530 | { |
522 | | - demangled.emplace(name, common::cxx_demangle(name)); |
| 531 | + // Sort kernels on the same queue and agent by timestamp |
| 532 | + std::sort(qitr.second.begin(), |
| 533 | + qitr.second.end(), |
| 534 | + [](const auto* lhs, const auto* rhs) { |
| 535 | + return lhs->start_timestamp < rhs->start_timestamp; |
| 536 | + }); |
| 537 | + |
| 538 | + // Loop over the kernels (qitr.second) and put them into perfetto. |
| 539 | + for(auto it = qitr.second.begin(); it != qitr.second.end(); ++it) |
| 540 | + { |
| 541 | + auto& current = **it; |
| 542 | + const auto& info = current.dispatch_info; |
| 543 | + const kernel_symbol_info* sym = |
| 544 | + tool_metadata.get_kernel_symbol(info.kernel_id); |
| 545 | + |
| 546 | + CHECK(sym != nullptr); |
| 547 | + |
| 548 | + auto name = std::string_view{sym->kernel_name}; |
| 549 | + auto& track = agent_queue_tracks.at(info.agent_id).at(info.queue_id); |
| 550 | + |
| 551 | + // Temporary fix until timestamp issues are resolved: Set timestamps to be |
| 552 | + // halfway between ending timestamp and starting timestamp of overlapping |
| 553 | + // kernel dispatches. Perfetto displays slices incorrectly if overlapping |
| 554 | + // slices on the same track are not completely enveloped. |
| 555 | + auto next = std::next(it); |
| 556 | + if(next != qitr.second.end() && |
| 557 | + (*next)->start_timestamp < (*it)->end_timestamp) |
| 558 | + { |
| 559 | + auto start = (*next)->start_timestamp; |
| 560 | + auto end = std::min((*it)->end_timestamp, (*next)->end_timestamp); |
| 561 | + auto mid = start + (end - start) / 2; |
| 562 | + // Report changed timestamps to ROCP INFO |
| 563 | + ROCP_INFO << fmt::format( |
| 564 | + "Kernel ending timestamp increased by {} ns to {} ns with " |
| 565 | + "following kernel starting timestamp decreased by {} ns to {} ns " |
| 566 | + "due to firmware timestamp error.", |
| 567 | + ((*it)->end_timestamp - mid), |
| 568 | + mid, |
| 569 | + (mid - (*next)->start_timestamp), |
| 570 | + mid); |
| 571 | + (*it)->end_timestamp = mid; |
| 572 | + (*next)->start_timestamp = mid; |
| 573 | + } |
| 574 | + |
| 575 | + if(demangled.find(name) == demangled.end()) |
| 576 | + { |
| 577 | + demangled.emplace(name, common::cxx_demangle(name)); |
| 578 | + } |
| 579 | + |
| 580 | + TRACE_EVENT_BEGIN( |
| 581 | + sdk::perfetto_category<sdk::category::kernel_dispatch>::name, |
| 582 | + ::perfetto::StaticString(demangled.at(name).c_str()), |
| 583 | + track, |
| 584 | + current.start_timestamp, |
| 585 | + ::perfetto::Flow::ProcessScoped(current.correlation_id.internal), |
| 586 | + "begin_ns", |
| 587 | + current.start_timestamp, |
| 588 | + "end_ns", |
| 589 | + current.end_timestamp, |
| 590 | + "delta_ns", |
| 591 | + (current.end_timestamp - current.start_timestamp), |
| 592 | + "kind", |
| 593 | + current.kind, |
| 594 | + "agent", |
| 595 | + tool_metadata.get_node_id(info.agent_id), |
| 596 | + "corr_id", |
| 597 | + current.correlation_id.internal, |
| 598 | + "queue", |
| 599 | + info.queue_id.handle, |
| 600 | + "tid", |
| 601 | + current.thread_id, |
| 602 | + "kernel_id", |
| 603 | + info.kernel_id, |
| 604 | + "private_segment_size", |
| 605 | + info.private_segment_size, |
| 606 | + "group_segment_size", |
| 607 | + info.group_segment_size, |
| 608 | + "workgroup_size", |
| 609 | + info.workgroup_size.x * info.workgroup_size.y * info.workgroup_size.z, |
| 610 | + "grid_size", |
| 611 | + info.grid_size.x * info.grid_size.y * info.grid_size.z); |
| 612 | + TRACE_EVENT_END( |
| 613 | + sdk::perfetto_category<sdk::category::kernel_dispatch>::name, |
| 614 | + track, |
| 615 | + current.end_timestamp); |
| 616 | + tracing_session->FlushBlocking(); |
| 617 | + } |
523 | 618 | } |
524 | | - |
525 | | - TRACE_EVENT_BEGIN( |
526 | | - sdk::perfetto_category<sdk::category::kernel_dispatch>::name, |
527 | | - ::perfetto::StaticString(demangled.at(name).c_str()), |
528 | | - track, |
529 | | - itr.start_timestamp, |
530 | | - ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), |
531 | | - "begin_ns", |
532 | | - itr.start_timestamp, |
533 | | - "end_ns", |
534 | | - itr.end_timestamp, |
535 | | - "delta_ns", |
536 | | - (itr.end_timestamp - itr.start_timestamp), |
537 | | - "kind", |
538 | | - itr.kind, |
539 | | - "agent", |
540 | | - agents_map.at(info.agent_id).logical_node_id, |
541 | | - "corr_id", |
542 | | - itr.correlation_id.internal, |
543 | | - "queue", |
544 | | - info.queue_id.handle, |
545 | | - "tid", |
546 | | - itr.thread_id, |
547 | | - "kernel_id", |
548 | | - info.kernel_id, |
549 | | - "private_segment_size", |
550 | | - info.private_segment_size, |
551 | | - "group_segment_size", |
552 | | - info.group_segment_size, |
553 | | - "workgroup_size", |
554 | | - info.workgroup_size.x * info.workgroup_size.y * info.workgroup_size.z, |
555 | | - "grid_size", |
556 | | - info.grid_size.x * info.grid_size.y * info.grid_size.z); |
557 | | - TRACE_EVENT_END(sdk::perfetto_category<sdk::category::kernel_dispatch>::name, |
558 | | - track, |
559 | | - itr.end_timestamp); |
560 | | - tracing_session->FlushBlocking(); |
561 | 619 | } |
| 620 | + } |
562 | 621 | } |
563 | | - |
564 | 622 | // counter tracks |
565 | 623 | { |
566 | 624 | // memory copy counter track |
|
0 commit comments