tests: update kernel-duration validate to use rocpd Python API instead of CLI

ihhethan · ihhethan · commit 5ba26a8880f4 · 2026-03-12T10:18:25.000-05:00
diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/kernel-duration-ns/validate.py b/projects/rocprofiler-sdk/tests/rocprofv3/kernel-duration-ns/validate.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-
 # MIT License
 #
 # Copyright (c) 2024-2025 Advanced Micro Devices, Inc.
@@ -22,193 +21,217 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.
 
-
-import os
 import sys
-import csv
-import subprocess
 import pytest
 
 
-def run_rocpd_convert(db_path, out_dir):
-    """Convert rocpd database to CSV format."""
-    os.makedirs(out_dir, exist_ok=True)
-    cmd = [sys.executable, "-m", "rocpd", "convert", "-i", db_path, "--output-format", "csv", "-d", out_dir]
-    res = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
-    assert res.returncode == 0, f"rocpd convert failed\ncmd={' '.join(cmd)}\nstdout={res.stdout}\nstderr={res.stderr}"
-
-
-def find_kernel_trace_csv(out_dir):
-    """Locate kernel_trace CSV file in output directory."""
-    for fn in os.listdir(out_dir):
-        if fn.endswith("kernel_trace.csv"):
-            return os.path.join(out_dir, fn)
-    assert False, f"kernel trace CSV not found in {out_dir}"
-
-
-def load_csv_rows(path):
-    """Load CSV file and return rows as list of dicts."""
-    assert os.path.isfile(path), f"missing CSV: {path}"
-    with open(path, newline="") as f:
-        reader = csv.DictReader(f)
-        rows = list(reader)
-    assert len(rows) > 0, f"empty CSV: {path}"
-    return rows
-
-
 def extract_json_kernel_records(json_root):
     """Extract kernel dispatch records from JSON output."""
     assert "rocprofiler-sdk-tool" in json_root, "missing rocprofiler-sdk-tool in JSON"
     tool = json_root["rocprofiler-sdk-tool"]
     if isinstance(tool, list) and len(tool) > 0:
         tool = tool[0]
+
     assert "buffer_records" in tool, "missing buffer_records in JSON"
     br = tool["buffer_records"]
-    
+
     for key in ("kernel_dispatch", "kernel_trace", "kernel_dispatch_trace"):
         if key in br and isinstance(br[key], list) and len(br[key]) > 0:
             return br[key]
+
     assert False, f"no kernel dispatch records found in JSON buffer_records keys={list(br.keys())}"
 
 
+def _as_int(val, *, field="value"):
+    assert val is not None, f"missing {field}"
+    try:
+        return int(val)
+    except Exception as e:
+        raise AssertionError(f"failed to parse int for {field}: {val!r} ({e})") from e
+
+
+def _extract_dispatch_id_from_json_record(r):
+    """
+    Prefer dispatch_info.dispatch_id.
+    Fallback to correlation_id.internal (or correlation_id if scalar).
+    Return int.
+    """
+    dispatch_info = r.get("dispatch_info", {})
+    dispatch_id = None
+    if isinstance(dispatch_info, dict):
+        dispatch_id = dispatch_info.get("dispatch_id", None)
+
+    if dispatch_id is None:
+        corr_id = r.get("correlation_id", {})
+        if isinstance(corr_id, dict):
+            dispatch_id = corr_id.get("internal", None)
+        else:
+            dispatch_id = corr_id
+
+    return _as_int(dispatch_id, field="dispatch_id/correlation_id")
+
+
 def build_json_duration_map(records):
-    """Build map of dispatch_id -> (start, end, duration) from JSON records."""
+    """
+    Build map:
+        key(int dispatch_id) -> (start, end, duration)
+    """
     m = {}
     for r in records:
-        # Extract dispatch ID
-        dispatch_info = r.get("dispatch_info", {})
-        dispatch_id = dispatch_info.get("dispatch_id") if isinstance(dispatch_info, dict) else None
-        
-        # Fallback to correlation_id if no dispatch_id
-        if dispatch_id is None:
-            corr_id = r.get("correlation_id", {})
-            if isinstance(corr_id, dict):
-                dispatch_id = corr_id.get("internal", 0)
-            else:
-                dispatch_id = corr_id
-        
-        # Extract timestamps
-        start = r.get("start_timestamp")
-        end = r.get("end_timestamp")
-        assert start is not None and end is not None, f"missing timestamps in JSON record: {r}"
-        
-        start = int(start)
-        end = int(end)
+        did = _extract_dispatch_id_from_json_record(r)
+
+        start = _as_int(r.get("start_timestamp"), field="start_timestamp")
+        end = _as_int(r.get("end_timestamp"), field="end_timestamp")
+
         assert start > 0 and end > 0, f"invalid timestamps start={start} end={end}"
         assert end >= start, f"end before start: start={start} end={end}"
-        
-        duration = end - start
-        m[str(dispatch_id)] = (start, end, duration)
-    
+
+        m[did] = (start, end, end - start)
+
     assert len(m) > 0, "no kernel records extracted from JSON"
     return m
 
 
-def test_rocpd_kernel_trace_duration(json_data, db_path, tmp_path):
+def load_kernel_rows_via_rocpd(db_path):
+    """
+    Use rocpd Python API to query the same underlying data used by rocpd/csv.py::write_kernel_csv().
+    Returns list[dict].
+    """
+    try:
+        import rocpd
+    except Exception as e:
+        raise AssertionError(
+            f"failed to import rocpd python module. Ensure PYTHONPATH is set for rocprofiler-sdk build tree. ({e})"
+        ) from e
+
+    # RocpdImportData can take a list of inputs
+    data = rocpd.connect([db_path])
+
+    # Minimal columns required for strict consistency checks
+    # NOTE: rocpd/csv.py::write_kernel_csv selects from "kernels"
+    query = """
+        SELECT
+            dispatch_id AS Dispatch_Id,
+            stack_id   AS Correlation_Id,
+            start      AS Start_Timestamp,
+            end        AS End_Timestamp,
+            (end - start) AS Duration
+        FROM "kernels"
+        ORDER BY
+            guid ASC, start ASC, end DESC
+    """
+
+    cur = rocpd.execute(data, query)
+    cols = [d[0] for d in cur.description]
+    rows = [dict(zip(cols, r)) for r in cur.fetchall()]
+
+    assert len(rows) > 0, f"no rows returned from kernels table in db: {db_path}"
+    return rows
+
+
+def test_rocpd_kernel_trace_duration(json_data, db_path):
     """
-    Test that rocpd CSV output contains Duration column and values match JSON.
-    
-    Test strategy:
-    1. Generate JSON and rocpd output from SAME execution (using ROCPROF_OUTPUT_FORMAT env var)
-    2. Use rocpd to convert database to CSV
-    3. Compare CSV Duration with JSON-derived duration
-    
-    Since JSON and rocpd come from the same execution, timestamps should be IDENTICAL.
-    We expect ZERO tolerance for differences.
-    
-    Validates:
-    - Duration column exists in CSV
-    - Duration values EXACTLY match between JSON and CSV (zero tolerance)
-    - Duration correctly calculated as End - Start
-    - Start and End timestamps also match exactly
+    Test that rocpd DB content for kernel trace has Duration and it matches JSON derived durations.
+
+    Strategy:
+      - JSON and rocpd DB are generated from the SAME rocprofv3 execution (ROCPROF_OUTPUT_FORMAT=json,rocpd)
+      - Read kernel records from JSON
+      - Read kernel rows from rocpd DB using rocpd Python API (no CSV I/O)
+      - Enforce:
+          * DB Duration == End - Start
+          * DB Start/End/Duration EXACTLY match JSON for each dispatch_id (zero tolerance)
+          * All kernel rows in DB match to a JSON record
     """
-    # Convert rocpd DB to CSV
-    out_dir = tmp_path / "rocpd_csv"
-    run_rocpd_convert(db_path, str(out_dir))
-    csv_path = find_kernel_trace_csv(str(out_dir))
-    csv_rows = load_csv_rows(csv_path)
-    
-    # Verify Duration column exists
-    assert "Duration" in csv_rows[0], f"missing 'Duration' column; columns={list(csv_rows[0].keys())}"
-    
-    # Extract JSON data
+    # Load DB rows via rocpd Python API
+    db_rows = load_kernel_rows_via_rocpd(db_path)
+
+    # Build JSON dispatch_id -> (start,end,dur)
     json_records = extract_json_kernel_records(json_data)
     json_map = build_json_duration_map(json_records)
-    
-    # Track statistics
+
+    total_count = len(db_rows)
     matched_count = 0
-    total_count = len(csv_rows)
     mismatches = []
-    
-    for csv_row in csv_rows:
-        # Get CSV values
-        csv_start = int(csv_row["Start_Timestamp"])
-        csv_end = int(csv_row["End_Timestamp"])
-        csv_dur = int(csv_row["Duration"])
-        
-        # Validate CSV internal consistency
-        assert csv_start > 0 and csv_end > 0, f"invalid CSV timestamps: start={csv_start} end={csv_end}"
-        assert csv_end >= csv_start, f"CSV end before start: {csv_end} < {csv_start}"
-        assert csv_dur >= 0, f"negative CSV duration: {csv_dur}"
-        assert csv_dur == (csv_end - csv_start), f"CSV duration mismatch: {csv_dur} != {csv_end - csv_start}"
-        
-        # Match with JSON and require EXACT match (zero tolerance)
-        dispatch_id = csv_row.get("Dispatch_Id") or csv_row.get("Correlation_Id")
-        if dispatch_id and str(dispatch_id) in json_map:
-            matched_count += 1
-            json_start, json_end, json_dur = json_map[str(dispatch_id)]
-            
-            # Check for exact match on all three values
-            start_diff = csv_start - json_start
-            end_diff = csv_end - json_end
-            dur_diff = csv_dur - json_dur
-            
-            if start_diff != 0 or end_diff != 0 or dur_diff != 0:
-                mismatches.append({
-                    'dispatch_id': dispatch_id,
-                    'csv_start': csv_start,
-                    'json_start': json_start,
-                    'start_diff': start_diff,
-                    'csv_end': csv_end,
-                    'json_end': json_end,
-                    'end_diff': end_diff,
-                    'csv_dur': csv_dur,
-                    'json_dur': json_dur,
-                    'dur_diff': dur_diff
-                })
-    
-    # Report any mismatches
+    missing_in_json = []
+
+    for row in db_rows:
+        did = _as_int(row.get("Dispatch_Id"), field="Dispatch_Id")
+        start = _as_int(row.get("Start_Timestamp"), field="Start_Timestamp")
+        end = _as_int(row.get("End_Timestamp"), field="End_Timestamp")
+        dur = _as_int(row.get("Duration"), field="Duration")
+
+        # DB internal consistency
+        assert start > 0 and end > 0, f"invalid DB timestamps: start={start} end={end} dispatch_id={did}"
+        assert end >= start, f"DB end before start: start={start} end={end} dispatch_id={did}"
+        assert dur >= 0, f"negative DB duration: duration={dur} dispatch_id={did}"
+        assert dur == (end - start), (
+            f"DB duration mismatch: duration={dur} != end-start={end - start} dispatch_id={did}"
+        )
+
+        if did not in json_map:
+            missing_in_json.append(did)
+            continue
+
+        matched_count += 1
+        j_start, j_end, j_dur = json_map[did]
+
+        sd = start - j_start
+        ed = end - j_end
+        dd = dur - j_dur
+
+        if sd != 0 or ed != 0 or dd != 0:
+            mismatches.append(
+                {
+                    "dispatch_id": did,
+                    "db_start": start,
+                    "json_start": j_start,
+                    "start_diff": sd,
+                    "db_end": end,
+                    "json_end": j_end,
+                    "end_diff": ed,
+                    "db_dur": dur,
+                    "json_dur": j_dur,
+                    "dur_diff": dd,
+                }
+            )
+
+    # Hard failures with actionable context
+    if missing_in_json:
+        sample = missing_in_json[:10]
+        raise AssertionError(
+            "Some DB kernel rows had dispatch_id not present in JSON records. "
+            "Since JSON and rocpd come from the same execution, dispatch IDs should align.\n"
+            f"Missing count: {len(missing_in_json)}/{total_count}\n"
+            f"Sample missing dispatch_ids: {sample}"
+        )
+
     if mismatches:
-        error_lines = [
+        lines = [
             "",
-            "TIMESTAMP MISMATCHES DETECTED",
-            f"{'Dispatch':<10} {'Start Diff':<12} {'End Diff':<12} {'Dur Diff':<12}",
-            "=" * 50
+            "TIMESTAMP/DURATION MISMATCHES DETECTED (zero tolerance)",
+            f"{'Dispatch':<12} {'StartDiff':<12} {'EndDiff':<12} {'DurDiff':<12}",
+            "=" * 56,
         ]
-        
-        for m in mismatches[:10]:  # Show first 10
-            error_lines.append(
-                f"{m['dispatch_id']:<10} {m['start_diff']:<12} {m['end_diff']:<12} {m['dur_diff']:<12}"
+        for m in mismatches[:10]:
+            lines.append(
+                f"{m['dispatch_id']:<12} {m['start_diff']:<12} {m['end_diff']:<12} {m['dur_diff']:<12}"
             )
-        
         if len(mismatches) > 10:
-            error_lines.append(f"... and {len(mismatches) - 10} more mismatches")
-        
-        # Fail the test with detailed error
+            lines.append(f"... and {len(mismatches) - 10} more mismatches")
+
         first = mismatches[0]
-        error_msg = "\n".join(error_lines) + "\n\n" + (
-            f"Timestamp mismatch detected for dispatch {first['dispatch_id']}:\n"
-            f"  CSV:  start={first['csv_start']}, end={first['csv_end']}, duration={first['csv_dur']}\n"
+        detail = (
+            f"\n\nExample mismatch for dispatch {first['dispatch_id']}:\n"
+            f"  DB:   start={first['db_start']}, end={first['db_end']}, duration={first['db_dur']}\n"
             f"  JSON: start={first['json_start']}, end={first['json_end']}, duration={first['json_dur']}\n"
             f"  Diff: start={first['start_diff']}, end={first['end_diff']}, duration={first['dur_diff']}\n"
             f"Total mismatches: {len(mismatches)}/{total_count}\n"
-            f"NOTE: Since JSON and rocpd come from the same execution, timestamps should be identical."
+            "NOTE: Since JSON and rocpd come from the same execution, these should be identical."
         )
-        assert False, error_msg
-    
-    # Ensure we matched all records
-    assert matched_count > 0, f"No CSV rows matched with JSON records"
-    assert matched_count == total_count, f"Only {matched_count}/{total_count} CSV rows matched JSON"
+        raise AssertionError("\n".join(lines) + detail)
+
+    assert matched_count > 0, "No DB rows matched JSON records"
+    assert matched_count == total_count, f"Only {matched_count}/{total_count} DB rows matched JSON"
 
 
 if __name__ == "__main__":