Skip to content

Commit 0d2307e

Browse files
committed
Add Duration_ns field to kernel trace CSV output
Fixes: SWDEV-561821 This change adds the missing Duration_ns field to kernel trace CSV output, addressing a regression from rocprof to rocprofv3. Problem: rocprofv3 --kernel-trace only provides timestamps for each kernel invocation, unlike the CSV output from rocprof --stats which contained durations. This is critical for applications like HemeLB where the same hotspot kernel is called twice per iteration with different durations. Solution: Added Duration_ns field (calculated as end - start) to all trace CSV outputs. Changes: 1. Python (rocpd/csv.py): - Added (end - start) AS Duration_ns to 6 trace functions: * write_kernel_csv * write_memory_copy_csv * write_memory_allocation_csv * write_counters_csv * write_scratch_memory_csv * write_region_csv - Ensured the column name uses the consistent "Duration_NS" spelling. 2. C++ (generateCSV.cpp): - Added \"Duration_ns\" to CSV header - Added (record.end_timestamp - record.start_timestamp) as the value for the Duration_NS column in kernel trace rows. 3. C++ (csv.hpp): - Updated kernel_trace_with_stream_csv_encoder from csv_encoder<22> to csv_encoder<23> to account for the new Duration_NS column. 4. Tests (rocprofv3 integration): - Added a new test under tests/rocprofv3/kernel-duration-ns: * rocprofv3-test-kernel-duration-ns-execute - Runs `rocprofv3 --kernel-trace -f csv` on the simple-transpose test application to generate out_kernel_trace.csv. * rocprofv3-test-kernel-duration-ns-validation - Runs validate.py with --csv-input=<out_kernel_trace.csv> and: - Asserts that Start_Timestamp, End_Timestamp, and Duration_NS columns exist in the header. - Verifies that Duration_NS == End_Timestamp - Start_Timestamp for multiple rows. Testing: - Environment: MI300X, ROCm 6.4.1, Docker container - Test application: HPCTrainingExamples/daxpy_5 - Verified Duration_ns field appears in column 12 - Verified Duration_ns = End_Timestamp - Start_Timestamp - Sample values: 80360 ns, 63895 ns, 54680 ns - New tests: * ctest -R "kernel-duration-ns" -V - rocprofv3-test-kernel-duration-ns-execute - rocprofv3-test-kernel-duration-ns-validation * Both tests pass locally. Impact: - Restores functionality from legacy rocprof tool - Enables efficient per-invocation kernel duration analysis - No breaking changes to existing CSV format - Backward compatible
1 parent 9cac2e4 commit 0d2307e

File tree

8 files changed

+222
-30
lines changed

8 files changed

+222
-30
lines changed

projects/rocprofiler-sdk/source/lib/output/csv.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ using list_derived_metrics_csv_encoder = csv_encoder<5>;
109109
using scratch_memory_encoder = csv_encoder<9>;
110110
using stats_csv_encoder = csv_encoder<8>;
111111
using pc_sampling_host_trap_csv_encoder = csv_encoder<6>;
112-
using kernel_trace_with_stream_csv_encoder = csv_encoder<22>;
112+
using kernel_trace_with_stream_csv_encoder = csv_encoder<23>;
113113
using memory_copy_with_stream_csv_encoder = csv_encoder<8>;
114114
using pc_sampling_stochastic_csv_encoder = csv_encoder<10>;
115115
} // namespace csv

projects/rocprofiler-sdk/source/lib/output/generateCSV.cpp

Lines changed: 11 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -260,31 +260,16 @@ generate_csv(const output_config&
260260

261261
if(cfg.stats && stats)
262262
write_stats(get_stats_output_file(cfg, domain_type::KERNEL_DISPATCH), stats.entries);
263-
auto ofs = tool::csv_output_file{cfg,
264-
domain_type::KERNEL_DISPATCH,
265-
tool::csv::kernel_trace_with_stream_csv_encoder{},
266-
{"Kind",
267-
"Agent_Id",
268-
"Queue_Id",
269-
"Stream_Id",
270-
"Thread_Id",
271-
"Dispatch_Id",
272-
"Kernel_Id",
273-
"Kernel_Name",
274-
"Correlation_Id",
275-
"Start_Timestamp",
276-
"End_Timestamp",
277-
"LDS_Block_Size",
278-
"Scratch_Size",
279-
"VGPR_Count",
280-
"Accum_VGPR_Count",
281-
"SGPR_Count",
282-
"Workgroup_Size_X",
283-
"Workgroup_Size_Y",
284-
"Workgroup_Size_Z",
285-
"Grid_Size_X",
286-
"Grid_Size_Y",
287-
"Grid_Size_Z"}};
263+
auto ofs = tool::csv_output_file{
264+
cfg,
265+
domain_type::KERNEL_DISPATCH,
266+
tool::csv::kernel_trace_with_stream_csv_encoder{},
267+
{"Kind", "Agent_Id", "Queue_Id", "Stream_Id",
268+
"Thread_Id", "Dispatch_Id", "Kernel_Id", "Kernel_Name",
269+
"Correlation_Id", "Start_Timestamp", "End_Timestamp", "Duration_NS",
270+
"LDS_Block_Size", "Scratch_Size", "VGPR_Count", "Accum_VGPR_Count",
271+
"SGPR_Count", "Workgroup_Size_X", "Workgroup_Size_Y", "Workgroup_Size_Z",
272+
"Grid_Size_X", "Grid_Size_Y", "Grid_Size_Z"}};
288273

289274
for(auto ditr : data)
290275
{
@@ -313,6 +298,7 @@ generate_csv(const output_config&
313298
record.correlation_id.internal,
314299
record.start_timestamp,
315300
record.end_timestamp,
301+
(record.end_timestamp - record.start_timestamp),
316302
lds_block_size_v,
317303
record.dispatch_info.private_segment_size,
318304
kernel_info->arch_vgpr_count,

projects/rocprofiler-sdk/source/lib/python/rocpd/csv.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ def write_kernel_csv(importData, config) -> None:
236236
"stack_id AS Correlation_Id",
237237
"start AS Start_Timestamp",
238238
"end AS End_Timestamp",
239+
"(end - start) AS Duration_NS",
239240
"lds_size AS Lds_Block_Size",
240241
"scratch_size",
241242
"vgpr_count",
@@ -280,7 +281,8 @@ def write_memory_copy_csv(importData, config) -> None:
280281
{dst_agent_id} AS Destination_Agent_Id,
281282
stack_id AS Correlation_Id,
282283
start AS Start_Timestamp,
283-
end AS End_Timestamp
284+
end AS End_Timestamp,
285+
(end - start) AS Duration_NS
284286
FROM "memory_copies"
285287
ORDER BY
286288
guid ASC, start ASC, end DESC
@@ -310,7 +312,8 @@ def write_memory_allocation_csv(importData, config) -> None:
310312
'0x' || printf('%016X', address) AS Address,
311313
stack_id AS Correlation_Id,
312314
start AS Start_Timestamp,
313-
end AS End_Timestamp
315+
end AS End_Timestamp,
316+
(end - start) AS Duration_NS
314317
FROM "memory_allocations"
315318
ORDER BY
316319
guid ASC, start ASC, end DESC
@@ -343,6 +346,7 @@ def write_counters_csv(importData, config) -> None:
343346
"value AS Counter_Value",
344347
"start AS Start_Timestamp",
345348
"end AS End_Timestamp",
349+
"(end - start) AS Duration_NS",
346350
]
347351

348352
aliased_headers = []
@@ -375,7 +379,8 @@ def write_scratch_memory_csv(importData, config) -> None:
375379
tid AS Thread_Id,
376380
alloc_flags,
377381
start AS Start_Timestamp,
378-
end AS End_Timestamp
382+
end AS End_Timestamp,
383+
(end - start) AS Duration_NS
379384
FROM "scratch_memory"
380385
ORDER BY
381386
guid ASC, start ASC, end DESC
@@ -394,7 +399,8 @@ def write_region_csv(importData, config) -> None:
394399
tid AS Thread_Id,
395400
stack_id AS Correlation_Id,
396401
start AS Start_Timestamp,
397-
end AS End_Timestamp
402+
end AS End_Timestamp,
403+
(end - start) AS Duration_NS
398404
FROM "regions"
399405
ORDER BY
400406
guid ASC, start ASC, end DESC

projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ add_subdirectory(conversion-script)
4848
add_subdirectory(python-bindings)
4949
add_subdirectory(rocpd)
5050
add_subdirectory(rocpd-kernel-rename)
51+
add_subdirectory(kernel-duration-ns)
5152
add_subdirectory(attachment)
5253
add_subdirectory(rocpd-scratch)
5354
add_subdirectory(att-consecutive-kernels)
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#
2+
# rocprofv3 tool tests for kernel duration ns
3+
#
4+
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
5+
6+
project(
7+
rocprofiler-sdk-tests-rocprofv3-kernel-duration-ns
8+
LANGUAGES CXX
9+
VERSION 0.0.0)
10+
11+
find_package(rocprofiler-sdk REQUIRED)
12+
find_package(Python3 REQUIRED)
13+
14+
set(rocprofv3-env
15+
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}"
16+
"PYTHONPATH=${rocprofiler-sdk_LIB_DIR}/python${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}/site-packages"
17+
)
18+
19+
rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY conftest.py validate.py)
20+
21+
add_test(
22+
NAME rocprofv3-test-kernel-duration-ns-execute
23+
COMMAND
24+
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> -d
25+
${CMAKE_CURRENT_BINARY_DIR}/kernel-duration-ns -o out --output-format csv
26+
--kernel-trace -- $<TARGET_FILE:simple-transpose>)
27+
28+
set_tests_properties(
29+
rocprofv3-test-kernel-duration-ns-execute
30+
PROPERTIES TIMEOUT
31+
120
32+
LABELS
33+
"integration-tests;kernel-duration-ns"
34+
ENVIRONMENT
35+
"${rocprofv3-env}"
36+
FAIL_REGULAR_EXPRESSION
37+
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
38+
FIXTURES_SETUP
39+
rocprofv3-test-kernel-duration-ns-run)
40+
41+
add_test(
42+
NAME rocprofv3-test-kernel-duration-ns-validation
43+
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --csv-input
44+
${CMAKE_CURRENT_BINARY_DIR}/kernel-duration-ns/out_kernel_trace.csv)
45+
46+
set_tests_properties(
47+
rocprofv3-test-kernel-duration-ns-validation
48+
PROPERTIES TIMEOUT
49+
120
50+
LABELS
51+
"integration-tests;kernel-duration-ns"
52+
ENVIRONMENT
53+
"${rocprofv3-env}"
54+
DEPENDS
55+
"rocprofv3-test-kernel-duration-ns-execute"
56+
FAIL_REGULAR_EXPRESSION
57+
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
58+
FIXTURES_REQUIRED
59+
rocprofv3-test-kernel-duration-ns-run)
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/usr/bin/env python3
2+
3+
# MIT License
4+
#
5+
# Copyright (c) 2024-2025 Advanced Micro Devices,
6+
# Inc. All rights reserved.
7+
#
8+
# Permission is hereby granted, free of charge, to any person obtaining a copy
9+
# of this software and associated documentation files (the "Software"), to deal
10+
# in the Software without restriction, including without limitation the rights
11+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12+
# copies of the Software, and to permit persons to whom the Software is
13+
# furnished to do so, subject to the following conditions:
14+
#
15+
# The above copyright notice and this permission notice shall be included in
16+
# all copies or substantial portions of the Software.
17+
#
18+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24+
# THE SOFTWARE.
25+
26+
import csv
27+
import os
28+
from pathlib import Path
29+
30+
import pytest
31+
32+
33+
def pytest_addoption(parser):
34+
"""Register command-line options for this test module."""
35+
parser.addoption(
36+
"--csv-input",
37+
action="store",
38+
help="Path to kernel trace CSV file to validate.",
39+
)
40+
41+
42+
@pytest.fixture
43+
def csv_path(request) -> Path:
44+
"""Return the path to the kernel trace CSV file passed via --csv-input."""
45+
filename = request.config.getoption("--csv-input")
46+
if not filename:
47+
raise RuntimeError("--csv-input option is required for this test")
48+
path = Path(filename)
49+
if not path.is_file():
50+
raise FileNotFoundError(f"{path} does not exist")
51+
return path
52+
53+
54+
@pytest.fixture
55+
def csv_rows(csv_path: Path):
56+
"""Yield all rows from the kernel trace CSV as a list of dicts."""
57+
rows = []
58+
with csv_path.open("r", newline="") as inp:
59+
reader = csv.DictReader(inp)
60+
for row in reader:
61+
rows.append(row)
62+
if not rows:
63+
raise RuntimeError(f"No data rows found in {csv_path}")
64+
return rows
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
[pytest]
2+
addopts = --durations=20 -rA -s -vv
3+
testpaths = validate.py
4+
pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import csv
5+
from pathlib import Path
6+
7+
8+
def parse_args():
9+
parser = argparse.ArgumentParser()
10+
parser.add_argument(
11+
"--csv-input",
12+
type=Path,
13+
required=True,
14+
help="Path to the generated kernel_trace CSV",
15+
)
16+
return parser.parse_args()
17+
18+
19+
def main():
20+
args = parse_args()
21+
csv_path: Path = args.csv_input
22+
23+
if not csv_path.is_file():
24+
raise FileNotFoundError(f"CSV file not found: {csv_path}")
25+
26+
with csv_path.open("r", newline="") as f:
27+
reader = csv.reader(f)
28+
header = next(reader, None)
29+
if header is None:
30+
raise RuntimeError("CSV has no header row")
31+
32+
# Build a mapping from column name to index
33+
col_idx = {name: i for i, name in enumerate(header)}
34+
35+
required_cols = ["Start_Timestamp", "End_Timestamp", "Duration_NS"]
36+
missing = [c for c in required_cols if c not in col_idx]
37+
if missing:
38+
raise RuntimeError(f"Missing required columns: {missing}")
39+
40+
s_idx = col_idx["Start_Timestamp"]
41+
e_idx = col_idx["End_Timestamp"]
42+
d_idx = col_idx["Duration_NS"]
43+
44+
# Only check the first few rows to avoid processing very large files
45+
checked_rows = 0
46+
for row in reader:
47+
if not row:
48+
continue
49+
50+
start = int(row[s_idx])
51+
end = int(row[e_idx])
52+
duration = int(row[d_idx])
53+
calc = end - start
54+
55+
if duration != calc:
56+
raise RuntimeError(
57+
f"Duration_NS mismatch: got {duration}, expected {calc} "
58+
f"(start={start}, end={end})"
59+
)
60+
61+
checked_rows += 1
62+
if checked_rows >= 5:
63+
break
64+
65+
if checked_rows == 0:
66+
raise RuntimeError("No data rows found to validate Duration_NS")
67+
68+
print(f"[OK] Validated Duration_NS for {checked_rows} row(s) in {csv_path}")
69+
70+
71+
if __name__ == "__main__":
72+
main()

0 commit comments

Comments
 (0)