|
2 | 2 |
|
3 | 3 | # MIT License |
4 | 4 | # |
5 | | -# Copyright (c) 2024-2025 Advanced Micro Devices, |
6 | | -# Inc. All rights reserved. |
| 5 | +# Copyright (c) 2024-2025 Advanced Micro Devices, Inc. |
7 | 6 | # |
8 | 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy |
9 | 8 | # of this software and associated documentation files (the "Software"), to deal |
|
23 | 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
24 | 23 | # THE SOFTWARE. |
25 | 24 |
|
| 25 | + |
26 | 26 | import os |
27 | 27 | import sys |
28 | 28 | import csv |
29 | | -import json |
30 | 29 | import subprocess |
31 | 30 | import pytest |
32 | 31 |
|
33 | | -def node_exists(name, data, min_len=1): |
34 | | - assert name in data, f"missing key: {name}" |
35 | | - assert data[name] is not None, f"key is None: {name}" |
36 | | - if hasattr(data[name], "__len__"): |
37 | | - assert len(data[name]) >= min_len, f"key '{name}' too small" |
38 | 32 |
|
39 | 33 | def run_rocpd_convert(db_path, out_dir): |
| 34 | + """Convert rocpd database to CSV format.""" |
40 | 35 | os.makedirs(out_dir, exist_ok=True) |
41 | 36 | cmd = [sys.executable, "-m", "rocpd", "convert", "-i", db_path, "--output-format", "csv", "-d", out_dir] |
42 | 37 | res = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) |
43 | 38 | assert res.returncode == 0, f"rocpd convert failed\ncmd={' '.join(cmd)}\nstdout={res.stdout}\nstderr={res.stderr}" |
44 | 39 |
|
| 40 | + |
45 | 41 | def find_kernel_trace_csv(out_dir): |
| 42 | + """Locate kernel_trace CSV file in output directory.""" |
46 | 43 | for fn in os.listdir(out_dir): |
47 | 44 | if fn.endswith("kernel_trace.csv"): |
48 | 45 | return os.path.join(out_dir, fn) |
49 | 46 | assert False, f"kernel trace CSV not found in {out_dir}" |
50 | 47 |
|
| 48 | + |
51 | 49 | def load_csv_rows(path): |
| 50 | + """Load CSV file and return rows as list of dicts.""" |
52 | 51 | assert os.path.isfile(path), f"missing CSV: {path}" |
53 | 52 | with open(path, newline="") as f: |
54 | 53 | reader = csv.DictReader(f) |
55 | 54 | rows = list(reader) |
56 | 55 | assert len(rows) > 0, f"empty CSV: {path}" |
57 | 56 | return rows |
58 | 57 |
|
| 58 | + |
59 | 59 | def extract_json_kernel_records(json_root): |
60 | | - node_exists("rocprofiler-sdk-tool", json_root) |
| 60 | + """Extract kernel dispatch records from JSON output.""" |
| 61 | + assert "rocprofiler-sdk-tool" in json_root, "missing rocprofiler-sdk-tool in JSON" |
61 | 62 | tool = json_root["rocprofiler-sdk-tool"] |
62 | 63 | if isinstance(tool, list) and len(tool) > 0: |
63 | 64 | tool = tool[0] |
64 | | - node_exists("buffer_records", tool) |
| 65 | + assert "buffer_records" in tool, "missing buffer_records in JSON" |
65 | 66 | br = tool["buffer_records"] |
| 67 | + |
66 | 68 | for key in ("kernel_dispatch", "kernel_trace", "kernel_dispatch_trace"): |
67 | 69 | if key in br and isinstance(br[key], list) and len(br[key]) > 0: |
68 | 70 | return br[key] |
69 | | - assert False, f"cannot find kernel dispatch records in buffer_records keys={list(br.keys())}" |
| 71 | + assert False, f"no kernel dispatch records found in JSON buffer_records keys={list(br.keys())}" |
70 | 72 |
|
71 | | -def build_json_map(records): |
| 73 | + |
| 74 | +def build_json_duration_map(records): |
| 75 | + """Build map of dispatch_id -> (start, end, duration) from JSON records.""" |
72 | 76 | m = {} |
73 | 77 | for r in records: |
| 78 | + # Extract dispatch ID |
74 | 79 | dispatch_info = r.get("dispatch_info", {}) |
75 | 80 | dispatch_id = dispatch_info.get("dispatch_id") if isinstance(dispatch_info, dict) else None |
76 | | - corr_id = r.get("correlation_id", {}) |
77 | | - if isinstance(corr_id, dict): |
78 | | - corr_id = corr_id.get("internal", 0) |
| 81 | + |
| 82 | + # Fallback to correlation_id if no dispatch_id |
| 83 | + if dispatch_id is None: |
| 84 | + corr_id = r.get("correlation_id", {}) |
| 85 | + if isinstance(corr_id, dict): |
| 86 | + dispatch_id = corr_id.get("internal", 0) |
| 87 | + else: |
| 88 | + dispatch_id = corr_id |
| 89 | + |
| 90 | + # Extract timestamps |
79 | 91 | start = r.get("start_timestamp") |
80 | 92 | end = r.get("end_timestamp") |
81 | | - assert start is not None and end is not None, f"missing start/end in json record: {r}" |
| 93 | + assert start is not None and end is not None, f"missing timestamps in JSON record: {r}" |
| 94 | + |
82 | 95 | start = int(start) |
83 | 96 | end = int(end) |
84 | | - assert start > 0 and end > 0, f"non-positive timestamps start={start} end={end}" |
| 97 | + assert start > 0 and end > 0, f"invalid timestamps start={start} end={end}" |
85 | 98 | assert end >= start, f"end before start: start={start} end={end}" |
86 | | - key = dispatch_id if dispatch_id is not None else corr_id |
87 | | - assert key is not None, f"no key to match json record: {r}" |
88 | | - m[str(key)] = (start, end) |
89 | | - assert len(m) > 0, "no records found in JSON" |
| 99 | + |
| 100 | + duration = end - start |
| 101 | + m[str(dispatch_id)] = (start, end, duration) |
| 102 | + |
| 103 | + assert len(m) > 0, "no kernel records extracted from JSON" |
90 | 104 | return m |
91 | 105 |
|
| 106 | + |
92 | 107 | def test_rocpd_kernel_trace_duration(json_data, db_path, tmp_path): |
| 108 | + """ |
| 109 | + Test that rocpd CSV output contains Duration column and values match JSON. |
| 110 | + |
| 111 | + Test strategy: |
| 112 | + 1. Generate JSON and rocpd output from SAME execution (using ROCPROF_OUTPUT_FORMAT env var) |
| 113 | + 2. Use rocpd to convert database to CSV |
| 114 | + 3. Compare CSV Duration with JSON-derived duration |
| 115 | + |
| 116 | + Since JSON and rocpd come from the same execution, timestamps should be IDENTICAL. |
| 117 | + We expect ZERO tolerance for differences. |
| 118 | + |
| 119 | + Validates: |
| 120 | + - Duration column exists in CSV |
| 121 | + - Duration values EXACTLY match between JSON and CSV (zero tolerance) |
| 122 | + - Duration correctly calculated as End - Start |
| 123 | + - Start and End timestamps also match exactly |
| 124 | + """ |
| 125 | + # Convert rocpd DB to CSV |
93 | 126 | out_dir = tmp_path / "rocpd_csv" |
94 | 127 | run_rocpd_convert(db_path, str(out_dir)) |
95 | 128 | csv_path = find_kernel_trace_csv(str(out_dir)) |
96 | | - rows = load_csv_rows(csv_path) |
| 129 | + csv_rows = load_csv_rows(csv_path) |
97 | 130 |
|
98 | | - # Main test: verify Duration column exists |
99 | | - assert "Duration" in rows[0], f"missing 'Duration' column; columns={list(rows[0].keys())}" |
| 131 | + # Verify Duration column exists |
| 132 | + assert "Duration" in csv_rows[0], f"missing 'Duration' column; columns={list(csv_rows[0].keys())}" |
100 | 133 |
|
| 134 | + # Extract JSON data |
101 | 135 | json_records = extract_json_kernel_records(json_data) |
102 | | - json_map = build_json_map(json_records) |
| 136 | + json_map = build_json_duration_map(json_records) |
103 | 137 |
|
104 | | - for row in rows: |
105 | | - key = row.get("Dispatch_Id") or row.get("Correlation_Id") |
106 | | - assert key is not None, f"cannot match row: {row}" |
| 138 | + # Track statistics |
| 139 | + matched_count = 0 |
| 140 | + total_count = len(csv_rows) |
| 141 | + mismatches = [] |
| 142 | + |
| 143 | + for csv_row in csv_rows: |
| 144 | + # Get CSV values |
| 145 | + csv_start = int(csv_row["Start_Timestamp"]) |
| 146 | + csv_end = int(csv_row["End_Timestamp"]) |
| 147 | + csv_dur = int(csv_row["Duration"]) |
| 148 | + |
| 149 | + # Validate CSV internal consistency |
| 150 | + assert csv_start > 0 and csv_end > 0, f"invalid CSV timestamps: start={csv_start} end={csv_end}" |
| 151 | + assert csv_end >= csv_start, f"CSV end before start: {csv_end} < {csv_start}" |
| 152 | + assert csv_dur >= 0, f"negative CSV duration: {csv_dur}" |
| 153 | + assert csv_dur == (csv_end - csv_start), f"CSV duration mismatch: {csv_dur} != {csv_end - csv_start}" |
107 | 154 |
|
108 | | - start = int(row["Start_Timestamp"]) |
109 | | - end = int(row["End_Timestamp"]) |
110 | | - dur = int(row["Duration"]) |
| 155 | + # Match with JSON and require EXACT match (zero tolerance) |
| 156 | + dispatch_id = csv_row.get("Dispatch_Id") or csv_row.get("Correlation_Id") |
| 157 | + if dispatch_id and str(dispatch_id) in json_map: |
| 158 | + matched_count += 1 |
| 159 | + json_start, json_end, json_dur = json_map[str(dispatch_id)] |
| 160 | + |
| 161 | + # Check for exact match on all three values |
| 162 | + start_diff = csv_start - json_start |
| 163 | + end_diff = csv_end - json_end |
| 164 | + dur_diff = csv_dur - json_dur |
| 165 | + |
| 166 | + if start_diff != 0 or end_diff != 0 or dur_diff != 0: |
| 167 | + mismatches.append({ |
| 168 | + 'dispatch_id': dispatch_id, |
| 169 | + 'csv_start': csv_start, |
| 170 | + 'json_start': json_start, |
| 171 | + 'start_diff': start_diff, |
| 172 | + 'csv_end': csv_end, |
| 173 | + 'json_end': json_end, |
| 174 | + 'end_diff': end_diff, |
| 175 | + 'csv_dur': csv_dur, |
| 176 | + 'json_dur': json_dur, |
| 177 | + 'dur_diff': dur_diff |
| 178 | + }) |
| 179 | + |
| 180 | + # Report any mismatches |
| 181 | + if mismatches: |
| 182 | + error_lines = [ |
| 183 | + "", |
| 184 | + "TIMESTAMP MISMATCHES DETECTED", |
| 185 | + f"{'Dispatch':<10} {'Start Diff':<12} {'End Diff':<12} {'Dur Diff':<12}", |
| 186 | + "=" * 50 |
| 187 | + ] |
111 | 188 |
|
112 | | - # Verify timestamps are reasonable |
113 | | - assert start > 0 and end > 0, f"non-positive timestamps in CSV: start={start} end={end}" |
114 | | - assert end >= start, f"end before start in CSV: start={start} end={end}" |
115 | | - assert dur >= 0, f"negative duration in CSV: dur={dur}" |
| 189 | + for m in mismatches[:10]: # Show first 10 |
| 190 | + error_lines.append( |
| 191 | + f"{m['dispatch_id']:<10} {m['start_diff']:<12} {m['end_diff']:<12} {m['dur_diff']:<12}" |
| 192 | + ) |
116 | 193 |
|
117 | | - # Verify Duration column is correctly calculated from Start and End |
118 | | - assert dur == (end - start), f"duration mismatch in CSV: dur={dur} vs calculated={end - start}" |
| 194 | + if len(mismatches) > 10: |
| 195 | + error_lines.append(f"... and {len(mismatches) - 10} more mismatches") |
119 | 196 |
|
120 | | - if str(key) in json_map: |
121 | | - jstart, jend = json_map[str(key)] |
122 | | - json_dur = jend - jstart |
123 | | - diff = abs(json_dur - dur) |
124 | | - # Allow small differences due to clock skew/adjustments |
125 | | - # Just warn if difference is significant (>10% or >1000ns) |
126 | | - if diff > 1000 and diff > dur * 0.1: |
127 | | - print(f"INFO: Large timestamp difference for key={key}: csv_dur={dur} json_dur={json_dur} diff={diff}") |
| 197 | + # Fail the test with detailed error |
| 198 | + first = mismatches[0] |
| 199 | + error_msg = "\n".join(error_lines) + "\n\n" + ( |
| 200 | + f"Timestamp mismatch detected for dispatch {first['dispatch_id']}:\n" |
| 201 | + f" CSV: start={first['csv_start']}, end={first['csv_end']}, duration={first['csv_dur']}\n" |
| 202 | + f" JSON: start={first['json_start']}, end={first['json_end']}, duration={first['json_dur']}\n" |
| 203 | + f" Diff: start={first['start_diff']}, end={first['end_diff']}, duration={first['dur_diff']}\n" |
| 204 | + f"Total mismatches: {len(mismatches)}/{total_count}\n" |
| 205 | + f"NOTE: Since JSON and rocpd come from the same execution, timestamps should be identical." |
| 206 | + ) |
| 207 | + assert False, error_msg |
| 208 | + |
| 209 | + # Ensure we matched all records |
| 210 | + assert matched_count > 0, f"No CSV rows matched with JSON records" |
| 211 | + assert matched_count == total_count, f"Only {matched_count}/{total_count} CSV rows matched JSON" |
| 212 | + |
128 | 213 |
|
129 | 214 | if __name__ == "__main__": |
130 | 215 | rc = pytest.main(["-x", __file__] + sys.argv[1:]) |
|
0 commit comments