Skip to content

Commit 50b185b

Browse files
authored
rocprofv3: PC Sampling Support (#14)
* Adding tool pc sampling support Fixing merge issue tool support on SDKupdates link amd-comgr Sanitizer failure fix fix format Addressing review comments misc fix Adding dispatch id to the CSV output AddingCHANGELOG [ROCProfV3][PC Sampling] Initial ROCProfV3 PC sampling tests for JSON and CSV formats (#17) ROCProfV3 initial tests for JSON and CSV output. Simple kernels that simplify the verification of samples to instruction decoding has been introduced. removing option to enable pc sampling explicitly Adding documentation no pc-sampling option in tests anymore Addressing review comments Updating docs an option for choosing whether all units must be sampled try ignoring PC sampling tests (#36) * run pc-sampling tests on MI2xx runners * use v_fmac_f32 instead of s_nop 0 in tests * fixing docs
1 parent eedee0f commit 50b185b

36 files changed

+1978
-56
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec
109109
- Check to force tools to initialize context id with zero
110110
- Support to specify hardware counters for collection using rocprofv3 as `rocprofv3 --pmc [COUNTER [COUNTER ...]]`
111111
- Memory Allocation Tracing
112+
- PC sampling tool support with CSV and JSON output formats
112113

113114
### Changed
114115

source/bin/rocprofv3.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import sys
55
import argparse
66
import subprocess
7+
import numpy
78

89

910
class dotdict(dict):
@@ -167,6 +168,30 @@ def add_parser_bool_argument(gparser, *args, **kwargs):
167168
help="Collect tracing data for HIP API, HSA API, Marker (ROCTx) API, RCCL API, Memory operations (copies, scratch, and allocations), and Kernel dispatches.",
168169
)
169170

171+
pc_sampling_options = parser.add_argument_group("PC sampling options")
172+
173+
pc_sampling_options.add_argument(
174+
"--pc-sampling-unit",
175+
help="",
176+
default=None,
177+
type=str.lower,
178+
choices=("instructions", "cycles", "time"),
179+
)
180+
181+
pc_sampling_options.add_argument(
182+
"--pc-sampling-method",
183+
help="",
184+
default=None,
185+
type=str.lower,
186+
choices=("stochastic", "host_trap"),
187+
)
188+
189+
pc_sampling_options.add_argument(
190+
"--pc-sampling-interval",
191+
help="",
192+
default=None,
193+
type=numpy.uint64,
194+
)
170195
basic_tracing_options = parser.add_argument_group("Basic tracing options")
171196

172197
# Add the arguments
@@ -904,6 +929,18 @@ def log_config(_env):
904929
if args.log_level in ("info", "trace", "env"):
905930
log_config(app_env)
906931

932+
if args.pc_sampling_unit or args.pc_sampling_method or args.pc_sampling_method:
933+
if not (
934+
args.pc_sampling_unit and args.pc_sampling_method and args.pc_sampling_method
935+
):
936+
fatal_error("All three PC sampling configurations need to be set")
937+
938+
update_env("ROCPROFILER_PC_SAMPLING_BETA_ENABLED", "ON")
939+
update_env("ROCPROF_PC_SAMPLING_UNIT", args.pc_sampling_unit)
940+
update_env("ROCPROF_PC_SAMPLING_METHOD", args.pc_sampling_method)
941+
update_env("ROCPROF_PC_SAMPLING_INTERVAL", args.pc_sampling_interval)
942+
update_env("ROCPROF_ENABLE_PC_SAMPLING", args.pc_sampling)
943+
907944
if use_execv:
908945
# does not return
909946
os.execvpe(app_args[0], app_args, env=app_env)

source/docs/how-to/using-rocprofv3.rst

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,18 @@ Here is the sample of commonly used ``rocprofv3`` command-line options. Some opt
162162
- Perfetto shared memory size hint in KB. default: 64 KB
163163
- Extension
164164

165+
* - ``--pc-sampling-method``
166+
- Type of PC Sampling, currently only host trap method is supported
167+
- PC Sampling Configurations
168+
169+
* - ``--pc-sampling-unit``
170+
- The unit appropriate to the PC sampling type/method, currently only time unit is supported
171+
- PC Sampling Configurations
172+
173+
* - ``--pc-sampling-interval``
174+
- Frequency at which PC samples are generated
175+
- PC Sampling Configurations
176+
165177
To see exhaustive list of ``rocprofv3`` options, run:
166178

167179
.. code-block:: bash
@@ -675,6 +687,9 @@ Properties
675687
trace.
676688
- **``preload``** *(array)*: Libraries to prepend to LD_PRELOAD
677689
(usually for sanitizers).
690+
- **``pc_sampling_unit``** *(string)*: pc sampling unit.
691+
- **``pc_sampling_method``** *(string)*: pc sampling method.
692+
- **``pc_sampling_interval``** *(integer)*: pc sampling interval.
678693

679694
.. code-block:: shell
680695
@@ -1039,6 +1054,14 @@ Properties
10391054
- **`id`** *(integer, required)*: Dimension ID.
10401055
- **`instance_size`** *(integer, required)*: Size of the instance.
10411056
- **`name`** *(string, required)*: Name of the dimension.
1057+
- **``pc_sample_instructions``** *(array)*: Array of decoded
1058+
instructions matching sampled PCs from pc_sample_host_trap
1059+
section.
1060+
- **``pc_sample_comments``** *(array)*: Comments matching
1061+
assembly instructions from pc_sample_instructions array. If
1062+
debug symbols are available, comments provide instructions
1063+
to source-line mapping. Otherwise, a comment is an empty
1064+
string.
10421065
- **`code_objects`** *(array, required)*: Code object records.
10431066
- **Items** *(object)*
10441067
- **`size`** *(integer, required)*: Size of the code object.
@@ -1103,6 +1126,37 @@ Properties
11031126
- **`arch_vgpr_count`** *(integer, required)*: Count of VGPRs.
11041127
- **`sgpr_count`** *(integer, required)*: Count of SGPRs.
11051128
- **`lds_block_size_v`** *(integer, required)*: Size of LDS block.
1129+
- **``pc_sample_host_trap``** *(array)*: Host Trap PC Sampling records.
1130+
- **Items** *(object)*
1131+
- **``hw_id``** *(object)*: Describes hardware part on which sampled wave was running.
1132+
- **``chiplet``** *(integer)*: Chiplet index.
1133+
- **``wave_id``** *(integer)*: Wave slot index.
1134+
- **``simd_id``** *(integer)*: SIMD index.
1135+
- **``pipe_id``** *(integer)*: Pipe index.
1136+
- **``cu_or_wgp_id``** *(integer)*: Index of compute unit or workgroup processer.
1137+
- **``shader_array_id``** *(integer)*: Shader array index.
1138+
- **``shader_engine_id``** *(integer)*: Shader engine
1139+
index.
1140+
- **``workgroup_id``** *(integer)*: Workgroup position in the 3D.
1141+
- **``vm_id``** *(integer)*: Virtual memory ID.
1142+
- **``queue_id``** *(integer)*: Queue id.
1143+
- **``microengine_id``** *(integer)*: ACE
1144+
(microengine) index.
1145+
- **``pc``** *(object)*: Encapsulates information about
1146+
sampled PC.
1147+
- **``code_object_id``** *(integer)*: Code object id.
1148+
- **``code_object_offset``** *(integer)*: Offset within the object if the latter is known. Otherwise, virtual address of the PC.
1149+
- **``exec_mask``** *(integer)*: Execution mask indicating active SIMD lanes of sampled wave.
1150+
- **``timestamp``** *(integer)*: Timestamp.
1151+
- **``dispatch_id``** *(integer)*: Dispatch id.
1152+
- **``correlation_id``** *(object)*: Correlation ID information.
1153+
- **``internal``** *(integer)*: Internal correlation ID.
1154+
- **``external``** *(integer)*: External correlation ID.
1155+
- **``rocprofiler_dim3_t``** *(object)*: Position of the workgroup in 3D grid.
1156+
- **``x``** *(integer)*: Dimension x.
1157+
- **``y``** *(integer)*: Dimension y.
1158+
- **``z``** *(integer)*: Dimension z.
1159+
- **``wave_in_group``** *(integer)*: Wave position within the workgroup (0-31).
11061160
- **`buffer_records`** *(object, required)*: Buffer record details.
11071161
- **`kernel_dispatch`** *(array)*: Kernel dispatch records.
11081162
- **Items** *(object)*

source/docs/rocprofv3-schema.json

Lines changed: 131 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,14 @@
559559
"required": [
560560
"dimension_ids"
561561
]
562+
},
563+
"pc_sample_instructions": {
564+
"type": "array",
565+
"description": "Array of decoded instructions matching sampled PCs from pc_sample_host_trap section."
566+
},
567+
"pc_sample_comments": {
568+
"type": "array",
569+
"description": "Comments matching assembly instructions from pc_sample_instructions array. If debug symbols are available, comments provide instructions to source-line mapping. Otherwise, a comment is an empty string."
562570
}
563571
}
564572
},
@@ -925,7 +933,129 @@
925933
"lds_block_size_v"
926934
]
927935
}
928-
}
936+
},
937+
"pc_sample_host_trap": {
938+
"type": "array",
939+
"description": "Host Trap PC Sampling records.",
940+
"items": {
941+
"type": "object",
942+
"properties": {
943+
"hw_id": {
944+
"type": "object",
945+
"description" : "Describes hardware part on which sampled wave was running.",
946+
"properties": {
947+
"chiplet":{
948+
"type": "integer",
949+
"description": "Chiplet index."
950+
},
951+
"wave_id ":{
952+
"type": "integer",
953+
"description": "Wave slot index."
954+
},
955+
"simd_id":{
956+
"type": "integer",
957+
"description": "SIMD index."
958+
},
959+
"pipe_id ":{
960+
"type": "integer",
961+
"description": "Pipe index."
962+
},
963+
"cu_or_wgp_id":{
964+
"type": "integer",
965+
"description": "Index of compute unit or workgroup processer."
966+
},
967+
"shader_array_id":{
968+
"type": "integer",
969+
"description": "Shader array index."
970+
},
971+
"shader_engine_id":{
972+
"type": "integer",
973+
"description": "Shader engine index."
974+
},
975+
"workgroup_id":{
976+
"type": "integer",
977+
"description": "Workgroup position in the 3D."
978+
},
979+
"vm_id":{
980+
"type": "integer",
981+
"description": "Virtual memory ID."
982+
},
983+
"queue_id":{
984+
"type": "integer",
985+
"description": "Queue id."
986+
},
987+
"microengine_id":{
988+
"type": "integer",
989+
"description": "ACE (microengine) index."
990+
}
991+
}
992+
},
993+
"pc": {
994+
"type": "object",
995+
"description": "Encapsulates information about sampled PC.",
996+
"properties": {
997+
"code_object_id":{
998+
"type": "integer",
999+
"description": "Code object id"
1000+
},
1001+
"code_object_offset":{
1002+
"type": "integer",
1003+
"description": "Offset within the object if the latter is known. Otherwise, virtual address of the PC."
1004+
}
1005+
}
1006+
},
1007+
"exec_mask":{
1008+
"type": "integer",
1009+
"description": "Execution mask indicating active SIMD lanes of sampled wave."
1010+
},
1011+
"timestamp":{
1012+
"type": "integer",
1013+
"description": "Timestamp."
1014+
},
1015+
"dispatch_id":{
1016+
"type": "integer",
1017+
"description": "Dispatch id."
1018+
},
1019+
"correlation_id": {
1020+
"type": "object",
1021+
"description": "Correlation ID information.",
1022+
"properties": {
1023+
"internal": {
1024+
"type": "integer",
1025+
"description": "Internal correlation ID."
1026+
},
1027+
"external": {
1028+
"type": "integer",
1029+
"description": "External correlation ID."
1030+
}
1031+
}
1032+
},
1033+
"rocprofiler_dim3_t": {
1034+
"type": "object",
1035+
"description": " Position of the workgroup in 3D grid.",
1036+
"properties": {
1037+
"x": {
1038+
"type": "integer",
1039+
"description": "Dimension x."
1040+
},
1041+
"y": {
1042+
"type": "integer",
1043+
"description": "Dimension y."
1044+
},
1045+
"z": {
1046+
"type": "integer",
1047+
"description": "Dimension z."
1048+
}
1049+
}
1050+
},
1051+
"wave_in_group": {
1052+
"type": "integer",
1053+
"description": "Wave position within the workgroup (0-31)."
1054+
}
1055+
}
1056+
}
1057+
}
1058+
9291059
}
9301060
},
9311061
"buffer_records": {

source/docs/rocprofv3_input_schema.json

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,19 @@
144144
"preload":{
145145
"type": "array",
146146
"description": "Libraries to prepend to LD_PRELOAD (usually for sanitizers)"
147-
}
147+
},
148+
"pc_sampling_unit": {
149+
"type": "string",
150+
"description": "pc sampling unit"
151+
},
152+
"pc_sampling_method": {
153+
"type": "string",
154+
"description": "pc sampling method"
155+
},
156+
"pc_sampling_interval": {
157+
"type": "integer",
158+
"description": "pc sampling interval"
159+
}
148160
}
149161
}
150162
}

source/include/rocprofiler-sdk/cxx/serialization.hpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,44 @@ save(ArchiveT& ar, rocprofiler_agent_cache_t data)
735735
ROCP_SDK_SAVE_DATA_FIELD(latency);
736736
ROCP_SDK_SAVE_DATA_FIELD(type);
737737
}
738+
template <typename ArchiveT>
739+
void
740+
save(ArchiveT& ar, rocprofiler_pc_t data)
741+
{
742+
ROCP_SDK_SAVE_DATA_FIELD(code_object_id);
743+
ROCP_SDK_SAVE_DATA_FIELD(code_object_offset);
744+
}
745+
746+
template <typename ArchiveT>
747+
void
748+
save(ArchiveT& ar, rocprofiler_pc_sampling_hw_id_v0_t data)
749+
{
750+
ROCP_SDK_SAVE_DATA_BITFIELD("chiplet", chiplet);
751+
ROCP_SDK_SAVE_DATA_BITFIELD("wave_id", wave_id);
752+
ROCP_SDK_SAVE_DATA_BITFIELD("simd_id", simd_id);
753+
ROCP_SDK_SAVE_DATA_BITFIELD("pipe_id", pipe_id);
754+
ROCP_SDK_SAVE_DATA_BITFIELD("cu_or_wgp_id", cu_or_wgp_id);
755+
ROCP_SDK_SAVE_DATA_BITFIELD("shader_array_id", shader_array_id);
756+
ROCP_SDK_SAVE_DATA_BITFIELD("shader_engine_id", shader_engine_id);
757+
ROCP_SDK_SAVE_DATA_BITFIELD("workgroup_id ", workgroup_id);
758+
ROCP_SDK_SAVE_DATA_BITFIELD("vm_id", vm_id);
759+
ROCP_SDK_SAVE_DATA_BITFIELD("queue_id", queue_id);
760+
ROCP_SDK_SAVE_DATA_BITFIELD("microengine_id", microengine_id);
761+
}
762+
763+
template <typename ArchiveT>
764+
void
765+
save(ArchiveT& ar, rocprofiler_pc_sampling_record_host_trap_v0_t data)
766+
{
767+
ROCP_SDK_SAVE_DATA_FIELD(hw_id);
768+
ROCP_SDK_SAVE_DATA_FIELD(pc);
769+
ROCP_SDK_SAVE_DATA_FIELD(exec_mask);
770+
ROCP_SDK_SAVE_DATA_FIELD(timestamp);
771+
ROCP_SDK_SAVE_DATA_FIELD(dispatch_id);
772+
ROCP_SDK_SAVE_DATA_VALUE("corr_id", correlation_id);
773+
ROCP_SDK_SAVE_DATA_VALUE("wrkgrp_id", workgroup_id);
774+
ROCP_SDK_SAVE_DATA_BITFIELD("wave_in_grp", wave_in_group);
775+
}
738776

739777
template <typename ArchiveT>
740778
void

source/lib/output/buffered_output.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
#include "counter_info.hpp"
2626
#include "generator.hpp"
27+
#include "pc_sample_transform.hpp"
2728
#include "statistics.hpp"
2829
#include "tmp_file_buffer.hpp"
2930

@@ -159,5 +160,8 @@ using memory_allocation_buffered_output_t =
159160
using counter_records_buffered_output_t =
160161
::rocprofiler::tool::buffered_output<rocprofiler::tool::serialized_counter_record_t,
161162
domain_type::COUNTER_VALUES>;
163+
using pc_sampling_host_trap_buffered_output_t =
164+
buffered_output<rocprofiler::tool::rocprofiler_tool_pc_sampling_host_trap_record_t,
165+
domain_type::PC_SAMPLING_HOST_TRAP>;
162166
} // namespace tool
163167
} // namespace rocprofiler

0 commit comments

Comments
 (0)