diff --git a/CHANGELOG.md b/CHANGELOG.md index 84b2004b0..47b323d04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -210,6 +210,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec - Fixed perfetto counter collection by introducing per dispatch accumulation. - Fixed queue preemption error and HSA_STATUS_ERROR_INVALID_PACKET_FORMAT error for stochastic PC-sampling for MI300X, leading to more stable runs. - Fixed the system hang issue for host-trap PC-sampling on MI300X. +- Fixed kernel trace csv output generated by rocpd. ### Removed diff --git a/source/lib/python/rocpd/source/csv.cpp b/source/lib/python/rocpd/source/csv.cpp index 5be3f9797..ca6823f65 100644 --- a/source/lib/python/rocpd/source/csv.cpp +++ b/source/lib/python/rocpd/source/csv.cpp @@ -75,15 +75,11 @@ CsvManager::CsvManager(rocprofiler::tool::output_config output_cfg) this->csv_configs = { {CsvType::KERNEL_DISPATCH, {"kernel_trace.csv", - "\"Guid\",\"Kind\",\"Agent_Id\",\"Queue_Id\"," - "\"Stream_Id\",\"Thread_Id\",\"Dispatch_Id\"," - "\"Kernel_Id\",\"Kernel_Name\",\"Correlation_Id\"," - "\"Start_Timestamp\",\"End_" - "Timestamp\",\"Private_Segment_Size\",\"Group_" - "Segment_Size\",\"Workgroup_Size_X\"," - "\"Workgroup_Size_Y\",\"Workgroup_Size_Z\",\"Grid_" - "Size_X\",\"Grid_Size_Y\",\"Grid_Size_" - "Z\""}}, + "\"Guid\",\"Kind\",\"Agent_Id\",\"Queue_Id\",\"Stream_Id\",\"Thread_Id\",\"Dispatch_Id\"," + "\"Kernel_Id\",\"Kernel_Name\",\"Correlation_Id\",\"Start_Timestamp\",\"End_Timestamp\"," + "\"LDS_Block_Size\",\"Scratch_Size\",\"VGPR_Count\",\"Accum_VGPR_Count\",\"SGPR_Count\"," + "\"Workgroup_Size_X\",\"Workgroup_Size_Y\",\"Workgroup_Size_Z\"," + "\"Grid_Size_X\",\"Grid_Size_Y\",\"Grid_Size_Z\""}}, {CsvType::MEMORY_COPY, {"memory_copy_trace.csv", "\"Guid\",\"Kind\",\"Direction\",\"Stream_Id\",\"Source_Agent_Id\"," @@ -260,8 +256,11 @@ write_kernel_csv( kernel.stack_id, kernel.start, kernel.end, - kernel.scratch_size, kernel.lds_size, + kernel.scratch_size, + kernel.vgpr_count, + kernel.accum_vgpr_count, + kernel.sgpr_count, kernel.workgroup_size.x, kernel.workgroup_size.y, kernel.workgroup_size.z, diff --git a/source/lib/python/rocpd/source/types.hpp b/source/lib/python/rocpd/source/types.hpp index 165c68c36..454ff9ec0 100644 --- a/source/lib/python/rocpd/source/types.hpp +++ b/source/lib/python/rocpd/source/types.hpp @@ -286,6 +286,9 @@ struct kernel_dispatch uint64_t stack_id = 0; uint64_t parent_stack_id = 0; uint64_t corr_id = 0; + uint64_t vgpr_count = 0; + uint64_t accum_vgpr_count = 0; + uint64_t sgpr_count = 0; }; struct memory_allocation @@ -709,6 +712,9 @@ load(ArchiveT& ar, rocpd::types::kernel_dispatch& data) load_dim3("grid", data.grid_size); LOAD_DATA_FIELD(lds_size); LOAD_DATA_FIELD(scratch_size); + LOAD_DATA_FIELD(vgpr_count); + LOAD_DATA_FIELD(accum_vgpr_count); + LOAD_DATA_FIELD(sgpr_count); LOAD_DATA_FIELD(static_lds_size); LOAD_DATA_FIELD(static_scratch_size); LOAD_DATA_FIELD(stack_id); diff --git a/source/share/rocprofiler-sdk-rocpd/data_views.sql b/source/share/rocprofiler-sdk-rocpd/data_views.sql index 868542c99..37dea72b6 100644 --- a/source/share/rocprofiler-sdk-rocpd/data_views.sql +++ b/source/share/rocprofiler-sdk-rocpd/data_views.sql @@ -315,6 +315,9 @@ SELECT K.workgroup_size_z AS workgroup_z, K.group_segment_size AS lds_size, K.private_segment_size AS scratch_size, + S.arch_vgpr_count AS vgpr_count, + S.accum_vgpr_count, + S.sgpr_count, S.group_segment_size AS static_lds_size, S.private_segment_size AS static_scratch_size, E.stack_id,