From 111c7d8ce6b1fd5dfa93cdb2c7e44f49721cd2a9 Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Wed, 12 Nov 2025 19:23:40 +0000
Subject: [PATCH 01/25] Add logging

---
 .../sharktuner/candidate_tuning_records.py    | 35 +++++++++++++++++++
 sharktuner/sharktuner/libtuner.py             | 34 ++++++++++++++++++
 2 files changed, 69 insertions(+)
 create mode 100644 sharktuner/sharktuner/candidate_tuning_records.py

diff --git a/sharktuner/sharktuner/candidate_tuning_records.py b/sharktuner/sharktuner/candidate_tuning_records.py
new file mode 100644
index 00000000000..436c39bd65f
--- /dev/null
+++ b/sharktuner/sharktuner/candidate_tuning_records.py
@@ -0,0 +1,35 @@
+from typing import Optional
+from dataclasses import dataclass
+
+from . import common, candidate_tuning_records
+
+
+@dataclass
+class TuningRecord:
+    gen_id: int
+    candidate_id: int
+    knob: Optional[common.KnobAssignment] = None
+    to_compile: bool = False
+    compile_status: bool = False
+    to_benchmark: bool = False
+    benchmark_device_id: Optional[str] = None
+    benchmark_queue_position: Optional[int] = None
+    benchmark_status: bool = False
+    baseline_benchmark_time_us: Optional[float] = None
+    benchmark_time_us: Optional[float] = None
+    benchmark_speedup: Optional[float] = None
+    benchmark_rank_order: Optional[int] = None
+
+def init_tuning_records(knobs: list[Optional[common.KnobAssignment]], sorted_order: list[int]) -> list[TuningRecord]:
+    tuning_records: list[TuningRecord] = []
+    tuning_records.append(TuningRecord(gen_id=0, candidate_id=0, to_compile=True, to_benchmark=True))
+
+    for can_idx, gen_idx in enumerate(sorted_order, start=1):
+        tr = TuningRecord(
+            gen_id=gen_idx,
+            candidate_id=can_idx,
+            knob=knobs[gen_idx],
+        )
+        tuning_records.append(tr)
+
+    return tuning_records
\ No newline at end of file
diff --git a/sharktuner/sharktuner/libtuner.py b/sharktuner/sharktuner/libtuner.py
index 111b24809ec..b682dc1848f 100644
--- a/sharktuner/sharktuner/libtuner.py
+++ b/sharktuner/sharktuner/libtuner.py
@@ -49,6 +49,7 @@
     dispatch_constraints,
     dispatch_parser,
     candidate_ordering,
+    candidate_tuning_records,
 )
 
 
@@ -125,6 +126,7 @@ def __init__(self, tuner_context: common.TunerContext):
         self.tuner_context = tuner_context
         self.candidate_trackers: list[CandidateTracker] = []
         self.target_info: Optional[iree_gpu.TargetInfo] = None
+        self.tuning_records: list[candidate_tuning_records.TuningRecord] = []
 
     @abstractmethod
     def get_iree_compile_flags(self) -> list[str]:
@@ -845,6 +847,10 @@ def generate_candidate_specs(
         # Total number of configs = candidates generated + baseline.
         assert len(config_specs) == len(solutions) + 1
 
+        tuning_client.tuning_records = candidate_tuning_records.init_tuning_records(
+            knobs, sorted_order
+        )
+
         knob_assignments = [dispatch_tuner.get_knob_assignment(s) for s in solutions]
         logging.debug("candidate_gen.py ends")
         handle_error(
@@ -1193,6 +1199,7 @@ def compile(
     # Set the source and output file paths for compilation of each candidate.
     path_config.compiled_dir.mkdir(parents=True, exist_ok=True)
     for i in candidates:
+        tuning_client.tuning_records[i].to_compile = True
         vmfb_file_name = path_config.get_candidate_vmfb_filename(
             tuning_client.candidate_trackers[i].candidate_id
         )
@@ -1231,6 +1238,7 @@ def compile(
     # Remove duplicate vmfbs from the candidate list.
     compiled_candidate_hashes = []
     for candidate_id in compiled_candidates:
+        tuning_client.tuning_records[candidate_id].compile_status = True
         candidate_vmfb = tuning_client.candidate_trackers[
             candidate_id
         ].compiled_vmfb_path
@@ -1268,6 +1276,7 @@ def benchmark(
 
     # Benchmarking baselines on each involved device.
     baseline_tracker = tuning_client.candidate_trackers[0]
+    tuning_client.tuning_records[0].to_benchmark = True
     first_baseline_result, subprocess_timeout_reference = benchmark_baseline(
         devices=args.devices,
         tuning_client=tuning_client,
@@ -1275,14 +1284,19 @@ def benchmark(
     )
     baseline_handler = BaselineResultHandler()
     baseline_handler.add_run(first_baseline_result)
+    tuning_client.tuning_records[0].benchmark_status = True
     if not baseline_handler.is_valid():
         logging.warning("Baseline run failed.")
+        tuning_client.tuning_records[0].benchmark_status = False
 
     if tuning_client.is_auto_iree_benchmark_timeout():
         logging.info(
             f"Smart candidate benchmark timeout is set to {subprocess_timeout_reference:.2f}s"
         )
     candidate_indices = [i for i in compiled_candidates if i != 0]
+    for i, idx in enumerate(candidate_indices, start=1):
+        tuning_client.tuning_records[idx].benchmark_queue_position = i
+        tuning_client.tuning_records[idx].to_benchmark = True
 
     candidate_results = benchmark_candidates(
         candidate_indices=candidate_indices,
@@ -1292,6 +1306,17 @@ def benchmark(
         benchmark_time=benchmark_time,  # Only candidate benchmark has time limit.
     )
 
+    for res in candidate_results:
+        tuning_client.tuning_records[
+            res.candidate_id
+        ].benchmark_device_id = res.device_id
+        if res.time == math.inf:
+            continue
+        tuning_client.tuning_records[res.candidate_id].benchmark_status = True
+        tuning_client.tuning_records[res.candidate_id].benchmark_time_us = round(
+            res.time, 2
+        )
+
     second_baseline_result, _ = benchmark_baseline(
         devices=args.devices,
         tuning_client=tuning_client,
@@ -1315,6 +1340,15 @@ def benchmark(
         candidate_results,
         prune_slow_candidates=tuning_client.should_prune_slower_candidates(),
     )
+    if all_candidates_with_speedup:
+        for i, handler_res in enumerate(all_candidates_with_speedup, start=1):
+            benchmark_res, speedup = handler_res
+            cid, _, device_id = benchmark_res
+            bas = baseline_handler.get_average_result_us(device_id)
+            tuning_client.tuning_records[cid].baseline_benchmark_time_us = round(bas, 2)
+            tuning_client.tuning_records[cid].benchmark_speedup = round(speedup, 5)
+            tuning_client.tuning_records[cid].benchmark_rank_order = i
+
     top_candidates_with_speedup = (
         all_candidates_with_speedup[:num_candidates]
         if num_candidates

From ab94d4e87427fc8845833c44a9b8a418c4f15042 Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Wed, 12 Nov 2025 19:58:09 +0000
Subject: [PATCH 02/25] Add dump

---
 sharktuner/dispatch_tuner/dispatch_tuner.py   |  8 +++
 .../sharktuner/candidate_tuning_records.py    | 52 +++++++++++++++++--
 2 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/sharktuner/dispatch_tuner/dispatch_tuner.py b/sharktuner/dispatch_tuner/dispatch_tuner.py
index 03a29a97bb2..0282e408c6b 100644
--- a/sharktuner/dispatch_tuner/dispatch_tuner.py
+++ b/sharktuner/dispatch_tuner/dispatch_tuner.py
@@ -159,3 +159,11 @@ def main() -> None:
             print(path_config.run_log.resolve())
         print("Check the summary in:")
         print(summary_log_file.resolve())
+
+        output_csv_name = (
+            f"tuning_{args.dispatch_file.stem.removesuffix('_benchmark')}.csv"
+        )
+        csv_path = libtuner.candidate_tuning_records.export_record_to_csv(
+            dispatch_tuner.tuning_records, path_config.base_dir, output_csv_name
+        )
+        print(f"Wrote tuning records CSV: {csv_path}")
diff --git a/sharktuner/sharktuner/candidate_tuning_records.py b/sharktuner/sharktuner/candidate_tuning_records.py
index 436c39bd65f..a6ae9909034 100644
--- a/sharktuner/sharktuner/candidate_tuning_records.py
+++ b/sharktuner/sharktuner/candidate_tuning_records.py
@@ -1,5 +1,8 @@
+import os
+import csv
 from typing import Optional
 from dataclasses import dataclass
+from pathlib import Path
 
 from . import common, candidate_tuning_records
 
@@ -20,9 +23,14 @@ class TuningRecord:
     benchmark_speedup: Optional[float] = None
     benchmark_rank_order: Optional[int] = None
 
-def init_tuning_records(knobs: list[Optional[common.KnobAssignment]], sorted_order: list[int]) -> list[TuningRecord]:
+
+def init_tuning_records(
+    knobs: list[Optional[common.KnobAssignment]], sorted_order: list[int]
+) -> list[TuningRecord]:
     tuning_records: list[TuningRecord] = []
-    tuning_records.append(TuningRecord(gen_id=0, candidate_id=0, to_compile=True, to_benchmark=True))
+    tuning_records.append(
+        TuningRecord(gen_id=0, candidate_id=0, to_compile=True, to_benchmark=True)
+    )
 
     for can_idx, gen_idx in enumerate(sorted_order, start=1):
         tr = TuningRecord(
@@ -32,4 +40,42 @@ def init_tuning_records(knobs: list[Optional[common.KnobAssignment]], sorted_ord
         )
         tuning_records.append(tr)
 
-    return tuning_records
\ No newline at end of file
+    return tuning_records
+
+
+def export_record_to_csv(
+    objects: list[TuningRecord], dest_dir: Path, filename: str = "export.csv"
+) -> Path:
+    if not objects:
+        return None
+
+    rows = []
+    headers = []
+
+    for obj in objects:
+        row = {}
+        for k, v in vars(obj).items():
+            if hasattr(v, "__dict__"):
+                nested = vars(v)
+                if nested:  # only if it has attrs
+                    for nk, nv in nested.items():
+                        key = f"{k}.{nk}"
+                        row[key] = nv
+                        if key not in headers:
+                            headers.append(key)
+                else:
+                    # skip empty nested object entirely
+                    continue
+            else:
+                row[k] = v
+                if k not in headers:
+                    headers.append(k)
+        rows.append(row)
+
+    path = os.path.join(dest_dir, filename)
+    with open(path, "w", newline="", encoding="utf-8") as f:
+        writer = csv.DictWriter(f, fieldnames=headers)
+        writer.writeheader()
+        writer.writerows(rows)
+
+    return path

From 669251275492303578b8f9cc42a432a8589181a8 Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Wed, 12 Nov 2025 20:15:39 +0000
Subject: [PATCH 03/25] Move code to candidate_ordering

---
 sharktuner/sharktuner/candidate_ordering.py   |  83 ++++++++-
 .../sharktuner/candidate_tuning_records.py    |  81 ---------
 sharktuner/sharktuner/libtuner.py             |   5 +-
 sharktuner/tests/candidate_ordering_test.py   | 157 +++++++++++-------
 4 files changed, 180 insertions(+), 146 deletions(-)
 delete mode 100644 sharktuner/sharktuner/candidate_tuning_records.py

diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py
index f61db9f021c..a247379216a 100644
--- a/sharktuner/sharktuner/candidate_ordering.py
+++ b/sharktuner/sharktuner/candidate_ordering.py
@@ -1,7 +1,12 @@
-from enum import Enum
-from typing import Optional, Callable
 import random
 import logging
+import os
+import csv
+from typing import Optional
+from dataclasses import dataclass
+from pathlib import Path
+from enum import Enum
+from typing import Optional, Callable
 
 from iree.compiler.dialects import iree_gpu  # type: ignore
 
@@ -104,3 +109,77 @@ def reorder_assignments(
             return indices
         case _:
             assert False
+
+
+@dataclass
+class TuningRecord:
+    gen_id: int
+    candidate_id: int
+    knob: Optional[common.KnobAssignment] = None
+    to_compile: bool = False
+    compile_status: bool = False
+    to_benchmark: bool = False
+    benchmark_device_id: Optional[str] = None
+    benchmark_queue_position: Optional[int] = None
+    benchmark_status: bool = False
+    baseline_benchmark_time_us: Optional[float] = None
+    benchmark_time_us: Optional[float] = None
+    benchmark_speedup: Optional[float] = None
+    benchmark_rank_order: Optional[int] = None
+
+
+def init_tuning_records(
+    knobs: list[Optional[common.KnobAssignment]], sorted_order: list[int]
+) -> list[TuningRecord]:
+    tuning_records: list[TuningRecord] = []
+    tuning_records.append(
+        TuningRecord(gen_id=0, candidate_id=0, to_compile=True, to_benchmark=True)
+    )
+
+    for can_idx, gen_idx in enumerate(sorted_order, start=1):
+        tr = TuningRecord(
+            gen_id=gen_idx,
+            candidate_id=can_idx,
+            knob=knobs[gen_idx],
+        )
+        tuning_records.append(tr)
+
+    return tuning_records
+
+
+def export_record_to_csv(
+    objects: list[TuningRecord], dest_dir: Path, filename: str = "export.csv"
+) -> Path:
+    if not objects:
+        return None
+
+    rows = []
+    headers = []
+
+    for obj in objects:
+        row = {}
+        for k, v in vars(obj).items():
+            if hasattr(v, "__dict__"):
+                nested = vars(v)
+                if nested:  # only if it has attrs
+                    for nk, nv in nested.items():
+                        key = f"{k}.{nk}"
+                        row[key] = nv
+                        if key not in headers:
+                            headers.append(key)
+                else:
+                    # skip empty nested object entirely
+                    continue
+            else:
+                row[k] = v
+                if k not in headers:
+                    headers.append(k)
+        rows.append(row)
+
+    path = os.path.join(dest_dir, filename)
+    with open(path, "w", newline="", encoding="utf-8") as f:
+        writer = csv.DictWriter(f, fieldnames=headers)
+        writer.writeheader()
+        writer.writerows(rows)
+
+    return path
diff --git a/sharktuner/sharktuner/candidate_tuning_records.py b/sharktuner/sharktuner/candidate_tuning_records.py
deleted file mode 100644
index a6ae9909034..00000000000
--- a/sharktuner/sharktuner/candidate_tuning_records.py
+++ /dev/null
@@ -1,81 +0,0 @@
-import os
-import csv
-from typing import Optional
-from dataclasses import dataclass
-from pathlib import Path
-
-from . import common, candidate_tuning_records
-
-
-@dataclass
-class TuningRecord:
-    gen_id: int
-    candidate_id: int
-    knob: Optional[common.KnobAssignment] = None
-    to_compile: bool = False
-    compile_status: bool = False
-    to_benchmark: bool = False
-    benchmark_device_id: Optional[str] = None
-    benchmark_queue_position: Optional[int] = None
-    benchmark_status: bool = False
-    baseline_benchmark_time_us: Optional[float] = None
-    benchmark_time_us: Optional[float] = None
-    benchmark_speedup: Optional[float] = None
-    benchmark_rank_order: Optional[int] = None
-
-
-def init_tuning_records(
-    knobs: list[Optional[common.KnobAssignment]], sorted_order: list[int]
-) -> list[TuningRecord]:
-    tuning_records: list[TuningRecord] = []
-    tuning_records.append(
-        TuningRecord(gen_id=0, candidate_id=0, to_compile=True, to_benchmark=True)
-    )
-
-    for can_idx, gen_idx in enumerate(sorted_order, start=1):
-        tr = TuningRecord(
-            gen_id=gen_idx,
-            candidate_id=can_idx,
-            knob=knobs[gen_idx],
-        )
-        tuning_records.append(tr)
-
-    return tuning_records
-
-
-def export_record_to_csv(
-    objects: list[TuningRecord], dest_dir: Path, filename: str = "export.csv"
-) -> Path:
-    if not objects:
-        return None
-
-    rows = []
-    headers = []
-
-    for obj in objects:
-        row = {}
-        for k, v in vars(obj).items():
-            if hasattr(v, "__dict__"):
-                nested = vars(v)
-                if nested:  # only if it has attrs
-                    for nk, nv in nested.items():
-                        key = f"{k}.{nk}"
-                        row[key] = nv
-                        if key not in headers:
-                            headers.append(key)
-                else:
-                    # skip empty nested object entirely
-                    continue
-            else:
-                row[k] = v
-                if k not in headers:
-                    headers.append(k)
-        rows.append(row)
-
-    path = os.path.join(dest_dir, filename)
-    with open(path, "w", newline="", encoding="utf-8") as f:
-        writer = csv.DictWriter(f, fieldnames=headers)
-        writer.writeheader()
-        writer.writerows(rows)
-
-    return path
diff --git a/sharktuner/sharktuner/libtuner.py b/sharktuner/sharktuner/libtuner.py
index b682dc1848f..1e346131bed 100644
--- a/sharktuner/sharktuner/libtuner.py
+++ b/sharktuner/sharktuner/libtuner.py
@@ -49,7 +49,6 @@
     dispatch_constraints,
     dispatch_parser,
     candidate_ordering,
-    candidate_tuning_records,
 )
 
 
@@ -126,7 +125,7 @@ def __init__(self, tuner_context: common.TunerContext):
         self.tuner_context = tuner_context
         self.candidate_trackers: list[CandidateTracker] = []
         self.target_info: Optional[iree_gpu.TargetInfo] = None
-        self.tuning_records: list[candidate_tuning_records.TuningRecord] = []
+        self.tuning_records: list[candidate_ordering.TuningRecord] = []
 
     @abstractmethod
     def get_iree_compile_flags(self) -> list[str]:
@@ -847,7 +846,7 @@ def generate_candidate_specs(
         # Total number of configs = candidates generated + baseline.
         assert len(config_specs) == len(solutions) + 1
 
-        tuning_client.tuning_records = candidate_tuning_records.init_tuning_records(
+        tuning_client.tuning_records = candidate_ordering.init_tuning_records(
             knobs, sorted_order
         )
 
diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py
index ab0bafd9ec1..ff13c4385e5 100644
--- a/sharktuner/tests/candidate_ordering_test.py
+++ b/sharktuner/tests/candidate_ordering_test.py
@@ -6,6 +6,7 @@
 
 import math
 import pytest
+from typing import Optional
 
 from iree.compiler import ir  # type: ignore
 from iree.compiler.dialects import iree_gpu  # type: ignore
@@ -13,60 +14,63 @@
 from sharktuner import candidate_ordering, common
 
 
-knob_1 = common.LLVMGPUVectorDistributeContractionKnobs(
-    M=2048,
-    N=10240,
-    K=1280,
-    tile_m=128,
-    tile_n=64,
-    tile_k=64,
-    wg_x=64,
-    wg_y=2,
-    wg_z=1,
-    subgroup_m_cnt=2,
-    subgroup_n_cnt=1,
-    intrinsic_mn=32,
-    intrinsic_k=8,
-    subgroup_m=0,
-    subgroup_n=0,
-    subgroup_k=0,
-)
-knob_2 = common.LLVMGPUVectorDistributeContractionKnobs(
-    M=2048,
-    N=10240,
-    K=1280,
-    tile_m=64,
-    tile_n=320,
-    tile_k=80,
-    wg_x=320,
-    wg_y=1,
-    wg_z=1,
-    subgroup_m_cnt=1,
-    subgroup_n_cnt=5,
-    intrinsic_mn=16,
-    intrinsic_k=16,
-    subgroup_m=0,
-    subgroup_n=0,
-    subgroup_k=0,
-)
-knob_3 = common.LLVMGPUVectorDistributeContractionKnobs(
-    M=2048,
-    N=10240,
-    K=1280,
-    tile_m=64,
-    tile_n=256,
-    tile_k=16,
-    wg_x=256,
-    wg_y=2,
-    wg_z=1,
-    subgroup_m_cnt=2,
-    subgroup_n_cnt=4,
-    intrinsic_mn=16,
-    intrinsic_k=16,
-    subgroup_m=0,
-    subgroup_n=0,
-    subgroup_k=0,
-)
+@pytest.fixture
+def sample_knobs() -> list[Optional[common.KnobAssignment]]:
+    knob_1 = common.LLVMGPUVectorDistributeContractionKnobs(
+        M=2048,
+        N=10240,
+        K=1280,
+        tile_m=128,
+        tile_n=64,
+        tile_k=64,
+        wg_x=64,
+        wg_y=2,
+        wg_z=1,
+        subgroup_m_cnt=2,
+        subgroup_n_cnt=1,
+        intrinsic_mn=32,
+        intrinsic_k=8,
+        subgroup_m=0,
+        subgroup_n=0,
+        subgroup_k=0,
+    )
+    knob_2 = common.LLVMGPUVectorDistributeContractionKnobs(
+        M=2048,
+        N=10240,
+        K=1280,
+        tile_m=64,
+        tile_n=320,
+        tile_k=80,
+        wg_x=320,
+        wg_y=1,
+        wg_z=1,
+        subgroup_m_cnt=1,
+        subgroup_n_cnt=5,
+        intrinsic_mn=16,
+        intrinsic_k=16,
+        subgroup_m=0,
+        subgroup_n=0,
+        subgroup_k=0,
+    )
+    knob_3 = common.LLVMGPUVectorDistributeContractionKnobs(
+        M=2048,
+        N=10240,
+        K=1280,
+        tile_m=64,
+        tile_n=256,
+        tile_k=16,
+        wg_x=256,
+        wg_y=2,
+        wg_z=1,
+        subgroup_m_cnt=2,
+        subgroup_n_cnt=4,
+        intrinsic_mn=16,
+        intrinsic_k=16,
+        subgroup_m=0,
+        subgroup_n=0,
+        subgroup_k=0,
+    )
+    return [knob_1, knob_2, knob_3]
 
 
 @pytest.fixture
@@ -100,14 +104,15 @@ def test_math_expression() -> None:
     assert math.isclose(ai, expected, rel_tol=1e-9)
 
 
-def test_reorder_assignments(target_info: iree_gpu.TargetInfo) -> None:
-    knobs: list[common.KnobAssignment | None] = [knob_1, knob_2, knob_3]
-
+def test_reorder_assignments(
+    target_info: iree_gpu.TargetInfo,
+    sample_knobs: list[Optional[common.KnobAssignment]],
+) -> None:
     expected_order = [0, 1, 2]
     assert (
         candidate_ordering.reorder_assignments(
             target_info=target_info,
-            knobs=knobs,
+            knobs=sample_knobs,
             strategy=candidate_ordering.CandidateOrderKind.no_sort,
         )
         == expected_order
@@ -117,7 +122,7 @@ def test_reorder_assignments(target_info: iree_gpu.TargetInfo) -> None:
     assert (
         candidate_ordering.reorder_assignments(
             target_info=target_info,
-            knobs=knobs,
+            knobs=sample_knobs,
             strategy=candidate_ordering.CandidateOrderKind.heuristic,
         )
         == expected_order
@@ -126,7 +131,7 @@ def test_reorder_assignments(target_info: iree_gpu.TargetInfo) -> None:
     expected_order = [0, 2, 1]
     assert (
         candidate_ordering.reorder_assignments(
-            knobs=knobs,
+            knobs=sample_knobs,
             strategy=candidate_ordering.CandidateOrderKind.heuristic,
             key_fn=lambda knob: knob.tile_n,
         )
@@ -152,3 +157,35 @@ def test_reorder_assignments(target_info: iree_gpu.TargetInfo) -> None:
         )
         == []
     )
+
+
+def test_init_tuning_records(
+    sample_knobs: list[Optional[common.KnobAssignment]],
+) -> None:
+    sorted_order = [2, 0, 1]
+    tuning_records = candidate_ordering.init_tuning_records(sample_knobs, sorted_order)
+    expected: list[candidate_ordering.TuningRecord] = []
+
+    expected: list[candidate_ordering.TuningRecord] = [
+        candidate_ordering.TuningRecord(
+            gen_id=0, candidate_id=0, to_compile=True, to_benchmark=True
+        )
+    ]
+    tr1 = candidate_ordering.TuningRecord(
+        gen_id=2,
+        candidate_id=1,
+        knob=sample_knobs[2],
+    )
+    tr2 = candidate_ordering.TuningRecord(
+        gen_id=0,
+        candidate_id=2,
+        knob=sample_knobs[0],
+    )
+    tr3 = candidate_ordering.TuningRecord(
+        gen_id=1,
+        candidate_id=3,
+        knob=sample_knobs[1],
+    )
+    expected += [tr1, tr2, tr3]
+
+    assert tuning_records == expected

From ecea646380fac6cd5a144739344c8e7b8f42d79b Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Wed, 12 Nov 2025 20:31:24 +0000
Subject: [PATCH 04/25] Small fix

---
 sharktuner/sharktuner/candidate_ordering.py | 4 ++--
 sharktuner/sharktuner/libtuner.py           | 6 ++++--
 sharktuner/tests/candidate_ordering_test.py | 3 +--
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py
index a247379216a..8558cd25166 100644
--- a/sharktuner/sharktuner/candidate_ordering.py
+++ b/sharktuner/sharktuner/candidate_ordering.py
@@ -149,7 +149,7 @@ def init_tuning_records(
 
 def export_record_to_csv(
     objects: list[TuningRecord], dest_dir: Path, filename: str = "export.csv"
-) -> Path:
+) -> Optional[Path]:
     if not objects:
         return None
 
@@ -176,7 +176,7 @@ def export_record_to_csv(
                     headers.append(k)
         rows.append(row)
 
-    path = os.path.join(dest_dir, filename)
+    path = Path(os.path.join(dest_dir, filename))
     with open(path, "w", newline="", encoding="utf-8") as f:
         writer = csv.DictWriter(f, fieldnames=headers)
         writer.writeheader()
diff --git a/sharktuner/sharktuner/libtuner.py b/sharktuner/sharktuner/libtuner.py
index 1e346131bed..6c38048d7f2 100644
--- a/sharktuner/sharktuner/libtuner.py
+++ b/sharktuner/sharktuner/libtuner.py
@@ -1343,8 +1343,10 @@ def benchmark(
         for i, handler_res in enumerate(all_candidates_with_speedup, start=1):
             benchmark_res, speedup = handler_res
             cid, _, device_id = benchmark_res
-            bas = baseline_handler.get_average_result_us(device_id)
-            tuning_client.tuning_records[cid].baseline_benchmark_time_us = round(bas, 2)
+            baseline_res = baseline_handler.get_average_result_us(device_id)
+            tuning_client.tuning_records[cid].baseline_benchmark_time_us = (
+                round(baseline_res, 2) if baseline_res else None
+            )
             tuning_client.tuning_records[cid].benchmark_speedup = round(speedup, 5)
             tuning_client.tuning_records[cid].benchmark_rank_order = i
 
diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py
index ff13c4385e5..8fa9d662cd3 100644
--- a/sharktuner/tests/candidate_ordering_test.py
+++ b/sharktuner/tests/candidate_ordering_test.py
@@ -138,7 +138,7 @@ def test_reorder_assignments(
         == expected_order
     )
 
-    knobs = [None, None, None]
+    knobs: list[Optional[common.KnobAssignment]] = [None, None, None]
     assert (
         candidate_ordering.reorder_assignments(
             target_info=target_info,
@@ -164,7 +164,6 @@ def test_init_tuning_records(
 ) -> None:
     sorted_order = [2, 0, 1]
     tuning_records = candidate_ordering.init_tuning_records(sample_knobs, sorted_order)
-    expected: list[candidate_ordering.TuningRecord] = []
 
     expected: list[candidate_ordering.TuningRecord] = [
         candidate_ordering.TuningRecord(

From 1f38ece9392802b742a184a2da1a85fc478ae045 Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Wed, 12 Nov 2025 20:47:19 +0000
Subject: [PATCH 05/25] Fix lint

---
 sharktuner/sharktuner/candidate_ordering.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py
index 8558cd25166..aeec7ec9c04 100644
--- a/sharktuner/sharktuner/candidate_ordering.py
+++ b/sharktuner/sharktuner/candidate_ordering.py
@@ -148,27 +148,35 @@ def init_tuning_records(
 
 
 def export_record_to_csv(
-    objects: list[TuningRecord], dest_dir: Path, filename: str = "export.csv"
+    tuning_records: list[TuningRecord], dest_dir: Path, filename: str = "export.csv"
 ) -> Optional[Path]:
-    if not objects:
+    """
+    Exports a list of `TuningRecord` objects to a CSV file.
+
+    - Each record becomes one CSV row.
+    - Top-level attributes (e.g., `gen_id`, `benchmark_time_us`) are written as individual columns.
+    - Nested object (i.e., `knob`) is flattened using dot notation: knob.tile_m, knob.intrinsic_mn
+
+    """
+    if not tuning_records:
         return None
 
     rows = []
     headers = []
 
-    for obj in objects:
+    for tuning_record in tuning_records:
         row = {}
-        for k, v in vars(obj).items():
+        for k, v in vars(tuning_record).items():
             if hasattr(v, "__dict__"):
                 nested = vars(v)
-                if nested:  # only if it has attrs
+                if nested:  # Only if it has attrs.
                     for nk, nv in nested.items():
                         key = f"{k}.{nk}"
                         row[key] = nv
                         if key not in headers:
                             headers.append(key)
                 else:
-                    # skip empty nested object entirely
+                    # Skip empty nested object entirely.
                     continue
             else:
                 row[k] = v

From 988325d09eee63a87ab3ddf302b150bbf73119c4 Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Wed, 12 Nov 2025 20:49:37 +0000
Subject: [PATCH 06/25] Small fix

---
 sharktuner/dispatch_tuner/dispatch_tuner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sharktuner/dispatch_tuner/dispatch_tuner.py b/sharktuner/dispatch_tuner/dispatch_tuner.py
index 0282e408c6b..0b9de3f6232 100644
--- a/sharktuner/dispatch_tuner/dispatch_tuner.py
+++ b/sharktuner/dispatch_tuner/dispatch_tuner.py
@@ -163,7 +163,7 @@ def main() -> None:
         output_csv_name = (
             f"tuning_{args.dispatch_file.stem.removesuffix('_benchmark')}.csv"
         )
-        csv_path = libtuner.candidate_tuning_records.export_record_to_csv(
+        csv_path = libtuner.candidate_ordering.export_record_to_csv(
             dispatch_tuner.tuning_records, path_config.base_dir, output_csv_name
         )
         print(f"Wrote tuning records CSV: {csv_path}")

From 5ed14c349ddf7eb0b6bdd8cc7eccad9b1e6712ce Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Wed, 12 Nov 2025 20:50:57 +0000
Subject: [PATCH 07/25] Small fix

---
 sharktuner/dispatch_tuner/dispatch_tuner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sharktuner/dispatch_tuner/dispatch_tuner.py b/sharktuner/dispatch_tuner/dispatch_tuner.py
index 0b9de3f6232..9bb22146920 100644
--- a/sharktuner/dispatch_tuner/dispatch_tuner.py
+++ b/sharktuner/dispatch_tuner/dispatch_tuner.py
@@ -161,7 +161,7 @@ def main() -> None:
         print(summary_log_file.resolve())
 
         output_csv_name = (
-            f"tuning_{args.dispatch_file.stem.removesuffix('_benchmark')}.csv"
+            f"tuning_{args.dispatch_file.stem}.csv"
         )
         csv_path = libtuner.candidate_ordering.export_record_to_csv(
             dispatch_tuner.tuning_records, path_config.base_dir, output_csv_name

From 414ad27046c7f96d63cb11975bfcc4d466d04fa3 Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Wed, 12 Nov 2025 20:52:05 +0000
Subject: [PATCH 08/25] Small fix

---
 sharktuner/sharktuner/candidate_ordering.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py
index aeec7ec9c04..1f6935a486d 100644
--- a/sharktuner/sharktuner/candidate_ordering.py
+++ b/sharktuner/sharktuner/candidate_ordering.py
@@ -169,14 +169,13 @@ def export_record_to_csv(
         for k, v in vars(tuning_record).items():
             if hasattr(v, "__dict__"):
                 nested = vars(v)
-                if nested:  # Only if it has attrs.
+                if nested:
                     for nk, nv in nested.items():
                         key = f"{k}.{nk}"
                         row[key] = nv
                         if key not in headers:
                             headers.append(key)
                 else:
-                    # Skip empty nested object entirely.
                     continue
             else:
                 row[k] = v

From c1440f772f815cb28bf10352ded2caf7fe2798db Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Wed, 12 Nov 2025 20:56:49 +0000
Subject: [PATCH 09/25] Add comments

---
 sharktuner/dispatch_tuner/dispatch_tuner.py | 4 +---
 sharktuner/sharktuner/candidate_ordering.py | 4 ++--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/sharktuner/dispatch_tuner/dispatch_tuner.py b/sharktuner/dispatch_tuner/dispatch_tuner.py
index 9bb22146920..6b24d586176 100644
--- a/sharktuner/dispatch_tuner/dispatch_tuner.py
+++ b/sharktuner/dispatch_tuner/dispatch_tuner.py
@@ -160,9 +160,7 @@ def main() -> None:
         print("Check the summary in:")
         print(summary_log_file.resolve())
 
-        output_csv_name = (
-            f"tuning_{args.dispatch_file.stem}.csv"
-        )
+        output_csv_name = f"tuning_{args.dispatch_file.stem}.csv"
         csv_path = libtuner.candidate_ordering.export_record_to_csv(
             dispatch_tuner.tuning_records, path_config.base_dir, output_csv_name
         )
diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py
index 1f6935a486d..6e875e19f68 100644
--- a/sharktuner/sharktuner/candidate_ordering.py
+++ b/sharktuner/sharktuner/candidate_ordering.py
@@ -113,8 +113,8 @@ def reorder_assignments(
 
 @dataclass
 class TuningRecord:
-    gen_id: int
-    candidate_id: int
+    gen_id: int  # Original index from candidate generation.
+    candidate_id: int  # Index in candidate_trackers after sorting.
     knob: Optional[common.KnobAssignment] = None
     to_compile: bool = False
     compile_status: bool = False

From 66cf70f432bc89578ce10e8aa43d30452b0c4b46 Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Wed, 12 Nov 2025 23:23:02 +0000
Subject: [PATCH 10/25] Add test for flatten_class

---
 sharktuner/dispatch_tuner/dispatch_tuner.py |   5 +-
 sharktuner/sharktuner/candidate_ordering.py |  30 ++--
 sharktuner/tests/candidate_ordering_test.py | 161 ++++++++++++++++++--
 3 files changed, 171 insertions(+), 25 deletions(-)

diff --git a/sharktuner/dispatch_tuner/dispatch_tuner.py b/sharktuner/dispatch_tuner/dispatch_tuner.py
index 6b24d586176..a2a344d35fb 100644
--- a/sharktuner/dispatch_tuner/dispatch_tuner.py
+++ b/sharktuner/dispatch_tuner/dispatch_tuner.py
@@ -161,7 +161,8 @@ def main() -> None:
         print(summary_log_file.resolve())
 
         output_csv_name = f"tuning_{args.dispatch_file.stem}.csv"
-        csv_path = libtuner.candidate_ordering.export_record_to_csv(
-            dispatch_tuner.tuning_records, path_config.base_dir, output_csv_name
+        csv_path = Path(path_config.base_dir / output_csv_name)
+        libtuner.candidate_ordering.export_record_to_csv(
+            dispatch_tuner.tuning_records, csv_path
         )
         print(f"Wrote tuning records CSV: {csv_path}")
diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py
index 6e875e19f68..2fd4c33b673 100644
--- a/sharktuner/sharktuner/candidate_ordering.py
+++ b/sharktuner/sharktuner/candidate_ordering.py
@@ -2,7 +2,7 @@
 import logging
 import os
 import csv
-from typing import Optional
+from typing import Optional, Any
 from dataclasses import dataclass
 from pathlib import Path
 from enum import Enum
@@ -147,20 +147,16 @@ def init_tuning_records(
     return tuning_records
 
 
-def export_record_to_csv(
-    tuning_records: list[TuningRecord], dest_dir: Path, filename: str = "export.csv"
-) -> Optional[Path]:
+def flatten_records(
+    tuning_records: list[TuningRecord],
+) -> tuple[list[str], list[dict[str, Any]]]:
     """
-    Exports a list of `TuningRecord` objects to a CSV file.
+    Flatten a list of `TuningRecord` objects to CSV headers and rows
 
     - Each record becomes one CSV row.
     - Top-level attributes (e.g., `gen_id`, `benchmark_time_us`) are written as individual columns.
     - Nested object (i.e., `knob`) is flattened using dot notation: knob.tile_m, knob.intrinsic_mn
-
     """
-    if not tuning_records:
-        return None
-
     rows = []
     headers = []
 
@@ -179,14 +175,20 @@ def export_record_to_csv(
                     continue
             else:
                 row[k] = v
-                if k not in headers:
+                if k not in headers and k != "knob":
                     headers.append(k)
         rows.append(row)
 
-    path = Path(os.path.join(dest_dir, filename))
-    with open(path, "w", newline="", encoding="utf-8") as f:
+    return headers, rows
+
+
+def export_record_to_csv(tuning_records: list[TuningRecord], dest_file: Path) -> None:
+    if not tuning_records:
+        return None
+
+    headers, rows = flatten_records(tuning_records)
+
+    with open(dest_file, "w", newline="", encoding="utf-8") as f:
         writer = csv.DictWriter(f, fieldnames=headers)
         writer.writeheader()
         writer.writerows(rows)
-
-    return path
diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py
index 8fa9d662cd3..065dbe57427 100644
--- a/sharktuner/tests/candidate_ordering_test.py
+++ b/sharktuner/tests/candidate_ordering_test.py
@@ -162,14 +162,12 @@ def test_reorder_assignments(
 def test_init_tuning_records(
     sample_knobs: list[Optional[common.KnobAssignment]],
 ) -> None:
-    sorted_order = [2, 0, 1]
-    tuning_records = candidate_ordering.init_tuning_records(sample_knobs, sorted_order)
-
-    expected: list[candidate_ordering.TuningRecord] = [
-        candidate_ordering.TuningRecord(
-            gen_id=0, candidate_id=0, to_compile=True, to_benchmark=True
-        )
-    ]
+    tr0 = candidate_ordering.TuningRecord(
+        gen_id=0,
+        candidate_id=0,
+        to_compile=True,
+        to_benchmark=True,
+    )
     tr1 = candidate_ordering.TuningRecord(
         gen_id=2,
         candidate_id=1,
@@ -185,6 +183,151 @@ def test_init_tuning_records(
         candidate_id=3,
         knob=sample_knobs[1],
     )
-    expected += [tr1, tr2, tr3]
+    sorted_order = [2, 0, 1]
+    tuning_records = candidate_ordering.init_tuning_records(sample_knobs, sorted_order)
+
+    expected = [tr0, tr1, tr2, tr3]
 
     assert tuning_records == expected
+
+
+def test_flatten_records(
+    sample_knobs: list[Optional[common.KnobAssignment]],
+):
+    tr0 = candidate_ordering.TuningRecord(
+        gen_id=0,
+        candidate_id=0,
+        to_compile=True,
+        to_benchmark=True,
+    )
+    tr1 = candidate_ordering.TuningRecord(
+        gen_id=2,
+        candidate_id=1,
+        knob=sample_knobs[2],
+        to_compile=True,
+        benchmark_device_id="hip://2",
+        benchmark_queue_position=1,
+        baseline_benchmark_time_us=123.4,
+        benchmark_speedup=1.5,
+    )
+    tr2 = candidate_ordering.TuningRecord(
+        gen_id=1,
+        candidate_id=2,
+        knob=sample_knobs[1],
+        to_benchmark=True,
+        benchmark_time_us=153.56,
+    )
+    sample_tuning_records = [tr0, tr1, tr2]
+
+    headers, rows = candidate_ordering.flatten_records(sample_tuning_records)
+
+    expected_headers = [
+        "gen_id",
+        "candidate_id",
+        "to_compile",
+        "compile_status",
+        "to_benchmark",
+        "benchmark_device_id",
+        "benchmark_queue_position",
+        "benchmark_status",
+        "baseline_benchmark_time_us",
+        "benchmark_time_us",
+        "benchmark_speedup",
+        "benchmark_rank_order",
+        "knob.M",
+        "knob.N",
+        "knob.K",
+        "knob.tile_m",
+        "knob.tile_n",
+        "knob.tile_k",
+        "knob.wg_x",
+        "knob.wg_y",
+        "knob.wg_z",
+        "knob.subgroup_m_cnt",
+        "knob.subgroup_n_cnt",
+        "knob.intrinsic_mn",
+        "knob.intrinsic_k",
+        "knob.subgroup_m",
+        "knob.subgroup_n",
+        "knob.subgroup_k",
+    ]
+    assert headers == expected_headers
+
+    expected_rows = [
+        {
+            "baseline_benchmark_time_us": None,
+            "benchmark_device_id": None,
+            "benchmark_queue_position": None,
+            "benchmark_rank_order": None,
+            "benchmark_speedup": None,
+            "benchmark_status": False,
+            "benchmark_time_us": None,
+            "candidate_id": 0,
+            "compile_status": False,
+            "gen_id": 0,
+            "knob": None,
+            "to_benchmark": True,
+            "to_compile": True,
+        },
+        {
+            "baseline_benchmark_time_us": 123.4,
+            "benchmark_device_id": "hip://2",
+            "benchmark_queue_position": 1,
+            "benchmark_rank_order": None,
+            "benchmark_speedup": 1.5,
+            "benchmark_status": False,
+            "benchmark_time_us": None,
+            "candidate_id": 1,
+            "compile_status": False,
+            "gen_id": 2,
+            "knob.K": 1280,
+            "knob.M": 2048,
+            "knob.N": 10240,
+            "knob.intrinsic_k": 16,
+            "knob.intrinsic_mn": 16,
+            "knob.subgroup_k": 0,
+            "knob.subgroup_m": 0,
+            "knob.subgroup_m_cnt": 2,
+            "knob.subgroup_n": 0,
+            "knob.subgroup_n_cnt": 4,
+            "knob.tile_k": 16,
+            "knob.tile_m": 64,
+            "knob.tile_n": 256,
+            "knob.wg_x": 256,
+            "knob.wg_y": 2,
+            "knob.wg_z": 1,
+            "to_benchmark": False,
+            "to_compile": True,
+        },
+        {
+            "baseline_benchmark_time_us": None,
+            "benchmark_device_id": None,
+            "benchmark_queue_position": None,
+            "benchmark_rank_order": None,
+            "benchmark_speedup": None,
+            "benchmark_status": False,
+            "benchmark_time_us": 153.56,
+            "candidate_id": 2,
+            "compile_status": False,
+            "gen_id": 1,
+            "knob.K": 1280,
+            "knob.M": 2048,
+            "knob.N": 10240,
+            "knob.intrinsic_k": 16,
+            "knob.intrinsic_mn": 16,
+            "knob.subgroup_k": 0,
+            "knob.subgroup_m": 0,
+            "knob.subgroup_m_cnt": 1,
+            "knob.subgroup_n": 0,
+            "knob.subgroup_n_cnt": 5,
+            "knob.tile_k": 80,
+            "knob.tile_m": 64,
+            "knob.tile_n": 320,
+            "knob.wg_x": 320,
+            "knob.wg_y": 1,
+            "knob.wg_z": 1,
+            "to_benchmark": True,
+            "to_compile": False,
+        },
+    ]
+    assert rows == expected_rows

From 39026fbe6308f57ddd49273e97db4617ac4b8732 Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Thu, 13 Nov 2025 18:20:33 +0000
Subject: [PATCH 11/25] Fix flatten function

---
 sharktuner/sharktuner/candidate_ordering.py | 41 +++++++++++++--------
 sharktuner/tests/candidate_ordering_test.py |  1 -
 2 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py
index 2fd4c33b673..20698336255 100644
--- a/sharktuner/sharktuner/candidate_ordering.py
+++ b/sharktuner/sharktuner/candidate_ordering.py
@@ -151,34 +151,43 @@ def flatten_records(
     tuning_records: list[TuningRecord],
 ) -> tuple[list[str], list[dict[str, Any]]]:
     """
-    Flatten a list of `TuningRecord` objects to CSV headers and rows
+    Flatten a list of `TuningRecord` objects into CSV headers and rows.
 
     - Each record becomes one CSV row.
-    - Top-level attributes (e.g., `gen_id`, `benchmark_time_us`) are written as individual columns.
-    - Nested object (i.e., `knob`) is flattened using dot notation: knob.tile_m, knob.intrinsic_mn
+    - Top-level attributes (e.g., `gen_id`, `benchmark_time_us`) appear as individual columns.
+    - Nested objects (e.g., `knob`) are flattened into columns like `knob.M`, `knob.tile_m`.
+
+    The original top-level attribute (e.g., 'knob') is removed once nesting is flattened.
     """
     rows = []
     headers = []
+    unneeded_headers = []
 
     for tuning_record in tuning_records:
         row = {}
-        for k, v in vars(tuning_record).items():
-            if hasattr(v, "__dict__"):
-                nested = vars(v)
-                if nested:
-                    for nk, nv in nested.items():
-                        key = f"{k}.{nk}"
-                        row[key] = nv
-                        if key not in headers:
-                            headers.append(key)
-                else:
+        for attr, val in vars(tuning_record).items():
+            if hasattr(val, "__dict__"):
+                nested = vars(val)
+                if not nested:
                     continue
+                unneeded_headers.append(attr)
+                for sub_attr, sub_val in nested.items():
+                    key = f"{attr}.{sub_attr}"
+                    row[key] = sub_val
+                    if key not in headers:
+                        headers.append(key)
             else:
-                row[k] = v
-                if k not in headers and k != "knob":
-                    headers.append(k)
+                row[attr] = val
+                if attr not in headers:
+                    headers.append(attr)
         rows.append(row)
 
+    # Remove top-level attributes (e.g., 'knob') that were replaced by flattened nested fields.
+    headers = [h for h in headers if h not in unneeded_headers]
+    for row in rows:
+        for unneeded in unneeded_headers:
+            row.pop(unneeded, None)
+
     return headers, rows
 
 
diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py
index 065dbe57427..38157e209af 100644
--- a/sharktuner/tests/candidate_ordering_test.py
+++ b/sharktuner/tests/candidate_ordering_test.py
@@ -265,7 +265,6 @@ def test_flatten_records(
             "candidate_id": 0,
             "compile_status": False,
             "gen_id": 0,
-            "knob": None,
             "to_benchmark": True,
             "to_compile": True,
         },

From dc2510bdaf3fffb2004c474304ff488b61ecde25 Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Thu, 13 Nov 2025 18:21:49 +0000
Subject: [PATCH 12/25] Remove unneeded import

---
 sharktuner/sharktuner/candidate_ordering.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py
index 20698336255..49669fb1745 100644
--- a/sharktuner/sharktuner/candidate_ordering.py
+++ b/sharktuner/sharktuner/candidate_ordering.py
@@ -1,6 +1,5 @@
 import random
 import logging
-import os
 import csv
 from typing import Optional, Any
 from dataclasses import dataclass

From c5eb9be8b6383c12b779d5e2a5fbd25b09b6b6ec Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Thu, 13 Nov 2025 21:31:39 +0000
Subject: [PATCH 13/25] Remove baseline from tuning records

---
 sharktuner/sharktuner/candidate_ordering.py |  30 +----
 sharktuner/sharktuner/libtuner.py           |   3 -
 sharktuner/tests/candidate_ordering_test.py | 126 ++++++++------------
 3 files changed, 55 insertions(+), 104 deletions(-)

diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py
index 49669fb1745..cc50c017588 100644
--- a/sharktuner/sharktuner/candidate_ordering.py
+++ b/sharktuner/sharktuner/candidate_ordering.py
@@ -131,9 +131,6 @@ def init_tuning_records(
     knobs: list[Optional[common.KnobAssignment]], sorted_order: list[int]
 ) -> list[TuningRecord]:
     tuning_records: list[TuningRecord] = []
-    tuning_records.append(
-        TuningRecord(gen_id=0, candidate_id=0, to_compile=True, to_benchmark=True)
-    )
 
     for can_idx, gen_idx in enumerate(sorted_order, start=1):
         tr = TuningRecord(
@@ -155,37 +152,20 @@ def flatten_records(
     - Each record becomes one CSV row.
     - Top-level attributes (e.g., `gen_id`, `benchmark_time_us`) appear as individual columns.
     - Nested objects (e.g., `knob`) are flattened into columns like `knob.M`, `knob.tile_m`.
-
-    The original top-level attribute (e.g., 'knob') is removed once nesting is flattened.
     """
     rows = []
-    headers = []
-    unneeded_headers = []
-
     for tuning_record in tuning_records:
         row = {}
         for attr, val in vars(tuning_record).items():
-            if hasattr(val, "__dict__"):
-                nested = vars(val)
-                if not nested:
-                    continue
-                unneeded_headers.append(attr)
-                for sub_attr, sub_val in nested.items():
-                    key = f"{attr}.{sub_attr}"
-                    row[key] = sub_val
-                    if key not in headers:
-                        headers.append(key)
+            if isinstance(val, common.KnobAssignment):
+                knob_dict = val.get_knobs()
+                for k, v in knob_dict.items():
+                    row[f"{attr}_{k}"] = v
             else:
                 row[attr] = val
-                if attr not in headers:
-                    headers.append(attr)
         rows.append(row)
 
-    # Remove top-level attributes (e.g., 'knob') that were replaced by flattened nested fields.
-    headers = [h for h in headers if h not in unneeded_headers]
-    for row in rows:
-        for unneeded in unneeded_headers:
-            row.pop(unneeded, None)
+    headers = list(row.keys())
 
     return headers, rows
 
diff --git a/sharktuner/sharktuner/libtuner.py b/sharktuner/sharktuner/libtuner.py
index 6c38048d7f2..bf7c0e45690 100644
--- a/sharktuner/sharktuner/libtuner.py
+++ b/sharktuner/sharktuner/libtuner.py
@@ -1275,7 +1275,6 @@ def benchmark(
 
     # Benchmarking baselines on each involved device.
     baseline_tracker = tuning_client.candidate_trackers[0]
-    tuning_client.tuning_records[0].to_benchmark = True
     first_baseline_result, subprocess_timeout_reference = benchmark_baseline(
         devices=args.devices,
         tuning_client=tuning_client,
@@ -1283,10 +1282,8 @@ def benchmark(
     )
     baseline_handler = BaselineResultHandler()
     baseline_handler.add_run(first_baseline_result)
-    tuning_client.tuning_records[0].benchmark_status = True
     if not baseline_handler.is_valid():
         logging.warning("Baseline run failed.")
-        tuning_client.tuning_records[0].benchmark_status = False
 
     if tuning_client.is_auto_iree_benchmark_timeout():
         logging.info(
diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py
index 38157e209af..4e5b505407b 100644
--- a/sharktuner/tests/candidate_ordering_test.py
+++ b/sharktuner/tests/candidate_ordering_test.py
@@ -162,12 +162,6 @@ def test_reorder_assignments(
 def test_init_tuning_records(
     sample_knobs: list[Optional[common.KnobAssignment]],
 ) -> None:
-    tr0 = candidate_ordering.TuningRecord(
-        gen_id=0,
-        candidate_id=0,
-        to_compile=True,
-        to_benchmark=True,
-    )
     tr1 = candidate_ordering.TuningRecord(
         gen_id=2,
         candidate_id=1,
@@ -186,7 +180,7 @@ def test_init_tuning_records(
     sorted_order = [2, 0, 1]
     tuning_records = candidate_ordering.init_tuning_records(sample_knobs, sorted_order)
 
-    expected = [tr0, tr1, tr2, tr3]
+    expected = [tr1, tr2, tr3]
 
     assert tuning_records == expected
 
@@ -194,12 +188,6 @@ def test_init_tuning_records(
 def test_flatten_records(
     sample_knobs: list[Optional[common.KnobAssignment]],
 ):
-    tr0 = candidate_ordering.TuningRecord(
-        gen_id=0,
-        candidate_id=0,
-        to_compile=True,
-        to_benchmark=True,
-    )
     tr1 = candidate_ordering.TuningRecord(
         gen_id=2,
         candidate_id=1,
@@ -217,13 +205,29 @@ def test_flatten_records(
         to_benchmark=True,
         benchmark_time_us=153.56,
     )
-    sample_tuning_records = [tr0, tr1, tr2]
+    sample_tuning_records = [tr1, tr2]
 
     headers, rows = candidate_ordering.flatten_records(sample_tuning_records)
 
     expected_headers = [
         "gen_id",
         "candidate_id",
+        "knob_M",
+        "knob_N",
+        "knob_K",
+        "knob_tile_m",
+        "knob_tile_n",
+        "knob_tile_k",
+        "knob_wg_x",
+        "knob_wg_y",
+        "knob_wg_z",
+        "knob_subgroup_m_cnt",
+        "knob_subgroup_n_cnt",
+        "knob_intrinsic_mn",
+        "knob_intrinsic_k",
+        "knob_subgroup_m",
+        "knob_subgroup_n",
+        "knob_subgroup_k",
         "to_compile",
         "compile_status",
         "to_benchmark",
@@ -234,40 +238,10 @@ def test_flatten_records(
         "benchmark_time_us",
         "benchmark_speedup",
         "benchmark_rank_order",
-        "knob.M",
-        "knob.N",
-        "knob.K",
-        "knob.tile_m",
-        "knob.tile_n",
-        "knob.tile_k",
-        "knob.wg_x",
-        "knob.wg_y",
-        "knob.wg_z",
-        "knob.subgroup_m_cnt",
-        "knob.subgroup_n_cnt",
-        "knob.intrinsic_mn",
-        "knob.intrinsic_k",
-        "knob.subgroup_m",
-        "knob.subgroup_n",
-        "knob.subgroup_k",
     ]
     assert headers == expected_headers
 
     expected_rows = [
-        {
-            "baseline_benchmark_time_us": None,
-            "benchmark_device_id": None,
-            "benchmark_queue_position": None,
-            "benchmark_rank_order": None,
-            "benchmark_speedup": None,
-            "benchmark_status": False,
-            "benchmark_time_us": None,
-            "candidate_id": 0,
-            "compile_status": False,
-            "gen_id": 0,
-            "to_benchmark": True,
-            "to_compile": True,
-        },
         {
             "baseline_benchmark_time_us": 123.4,
             "benchmark_device_id": "hip://2",
@@ -279,22 +253,22 @@ def test_flatten_records(
             "candidate_id": 1,
             "compile_status": False,
             "gen_id": 2,
-            "knob.K": 1280,
-            "knob.M": 2048,
-            "knob.N": 10240,
-            "knob.intrinsic_k": 16,
-            "knob.intrinsic_mn": 16,
-            "knob.subgroup_k": 0,
-            "knob.subgroup_m": 0,
-            "knob.subgroup_m_cnt": 2,
-            "knob.subgroup_n": 0,
-            "knob.subgroup_n_cnt": 4,
-            "knob.tile_k": 16,
-            "knob.tile_m": 64,
-            "knob.tile_n": 256,
-            "knob.wg_x": 256,
-            "knob.wg_y": 2,
-            "knob.wg_z": 1,
+            "knob_K": 1280,
+            "knob_M": 2048,
+            "knob_N": 10240,
+            "knob_intrinsic_k": 16,
+            "knob_intrinsic_mn": 16,
+            "knob_subgroup_k": 0,
+            "knob_subgroup_m": 0,
+            "knob_subgroup_m_cnt": 2,
+            "knob_subgroup_n": 0,
+            "knob_subgroup_n_cnt": 4,
+            "knob_tile_k": 16,
+            "knob_tile_m": 64,
+            "knob_tile_n": 256,
+            "knob_wg_x": 256,
+            "knob_wg_y": 2,
+            "knob_wg_z": 1,
             "to_benchmark": False,
             "to_compile": True,
         },
@@ -309,22 +283,22 @@ def test_flatten_records(
             "candidate_id": 2,
             "compile_status": False,
             "gen_id": 1,
-            "knob.K": 1280,
-            "knob.M": 2048,
-            "knob.N": 10240,
-            "knob.intrinsic_k": 16,
-            "knob.intrinsic_mn": 16,
-            "knob.subgroup_k": 0,
-            "knob.subgroup_m": 0,
-            "knob.subgroup_m_cnt": 1,
-            "knob.subgroup_n": 0,
-            "knob.subgroup_n_cnt": 5,
-            "knob.tile_k": 80,
-            "knob.tile_m": 64,
-            "knob.tile_n": 320,
-            "knob.wg_x": 320,
-            "knob.wg_y": 1,
-            "knob.wg_z": 1,
+            "knob_K": 1280,
+            "knob_M": 2048,
+            "knob_N": 10240,
+            "knob_intrinsic_k": 16,
+            "knob_intrinsic_mn": 16,
+            "knob_subgroup_k": 0,
+            "knob_subgroup_m": 0,
+            "knob_subgroup_m_cnt": 1,
+            "knob_subgroup_n": 0,
+            "knob_subgroup_n_cnt": 5,
+            "knob_tile_k": 80,
+            "knob_tile_m": 64,
+            "knob_tile_n": 320,
+            "knob_wg_x": 320,
+            "knob_wg_y": 1,
+            "knob_wg_z": 1,
             "to_benchmark": True,
             "to_compile": False,
         },

From b9aea37882ad00e789a1573255b77b6036a5c44c Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Thu, 13 Nov 2025 21:56:29 +0000
Subject: [PATCH 14/25] Rename output csv filename

---
 sharktuner/dispatch_tuner/dispatch_tuner.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sharktuner/dispatch_tuner/dispatch_tuner.py b/sharktuner/dispatch_tuner/dispatch_tuner.py
index a2a344d35fb..ac159da392e 100644
--- a/sharktuner/dispatch_tuner/dispatch_tuner.py
+++ b/sharktuner/dispatch_tuner/dispatch_tuner.py
@@ -160,8 +160,9 @@ def main() -> None:
         print("Check the summary in:")
         print(summary_log_file.resolve())
 
-        output_csv_name = f"tuning_{args.dispatch_file.stem}.csv"
-        csv_path = Path(path_config.base_dir / output_csv_name)
+        output_csv_name = f"{args.dispatch_file.stem}_candidate_analysis.csv"
+        csv_path = Path(path_config.base_dir) / output_csv_name
+
         libtuner.candidate_ordering.export_record_to_csv(
             dispatch_tuner.tuning_records, csv_path
         )

From 98128a26d68a559bcf095d3d34f3457d5a68c128 Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Thu, 13 Nov 2025 22:12:21 +0000
Subject: [PATCH 15/25] Add docstr

---
 sharktuner/sharktuner/candidate_ordering.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py
index cc50c017588..781eb84c2ae 100644
--- a/sharktuner/sharktuner/candidate_ordering.py
+++ b/sharktuner/sharktuner/candidate_ordering.py
@@ -112,6 +112,12 @@ def reorder_assignments(
 
 @dataclass
 class TuningRecord:
+    """
+    Records a candidate's knob configuration and tuning results. Used for analyzing the
+    candidate search space and evaluating the effectiveness of heuristics used for
+    candidate ordering.
+    """
+
     gen_id: int  # Original index from candidate generation.
     candidate_id: int  # Index in candidate_trackers after sorting.
     knob: Optional[common.KnobAssignment] = None

From 0c3ec6506a2b9b51ce6afd6a22672f5090d52e35 Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Thu, 13 Nov 2025 22:17:38 +0000
Subject: [PATCH 16/25] Rename var

---
 sharktuner/sharktuner/candidate_ordering.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py
index 781eb84c2ae..66b681c4dc4 100644
--- a/sharktuner/sharktuner/candidate_ordering.py
+++ b/sharktuner/sharktuner/candidate_ordering.py
@@ -138,11 +138,11 @@ def init_tuning_records(
 ) -> list[TuningRecord]:
     tuning_records: list[TuningRecord] = []
 
-    for can_idx, gen_idx in enumerate(sorted_order, start=1):
+    for sorted_position, original_gen_index in enumerate(sorted_order, start=1):
         tr = TuningRecord(
-            gen_id=gen_idx,
-            candidate_id=can_idx,
-            knob=knobs[gen_idx],
+            gen_id=original_gen_index,
+            candidate_id=sorted_position,
+            knob=knobs[original_gen_index],
         )
         tuning_records.append(tr)
 

From 51be60fd65087974ab30aa21cc656a48fb756d2d Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Thu, 13 Nov 2025 22:18:03 +0000
Subject: [PATCH 17/25] Assert empty list

---
 sharktuner/sharktuner/candidate_ordering.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py
index 66b681c4dc4..12e896c9555 100644
--- a/sharktuner/sharktuner/candidate_ordering.py
+++ b/sharktuner/sharktuner/candidate_ordering.py
@@ -177,8 +177,7 @@ def flatten_records(
 
 
 def export_record_to_csv(tuning_records: list[TuningRecord], dest_file: Path) -> None:
-    if not tuning_records:
-        return None
+    assert tuning_records
 
     headers, rows = flatten_records(tuning_records)
 

From 68d543cda6ce5eeb33a51e49ea5af4e7cca9b2a7 Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Thu, 13 Nov 2025 22:20:36 +0000
Subject: [PATCH 18/25] Store var val

---
 sharktuner/sharktuner/libtuner.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/sharktuner/sharktuner/libtuner.py b/sharktuner/sharktuner/libtuner.py
index bf7c0e45690..5334a444646 100644
--- a/sharktuner/sharktuner/libtuner.py
+++ b/sharktuner/sharktuner/libtuner.py
@@ -1303,15 +1303,13 @@ def benchmark(
     )
 
     for res in candidate_results:
-        tuning_client.tuning_records[
-            res.candidate_id
-        ].benchmark_device_id = res.device_id
-        if res.time == math.inf:
+        c_id = res.candidate_id
+        res_time = res.time
+        tuning_client.tuning_records[c_id].benchmark_device_id = res.device_id
+        if res_time == math.inf:
             continue
-        tuning_client.tuning_records[res.candidate_id].benchmark_status = True
-        tuning_client.tuning_records[res.candidate_id].benchmark_time_us = round(
-            res.time, 2
-        )
+        tuning_client.tuning_records[c_id].benchmark_status = True
+        tuning_client.tuning_records[c_id].benchmark_time_us = round(res_time, 2)
 
     second_baseline_result, _ = benchmark_baseline(
         devices=args.devices,

From 03033e1683a1255a24ba07d662c6487d30bad47d Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Thu, 13 Nov 2025 22:22:21 +0000
Subject: [PATCH 19/25] Small fix

---
 sharktuner/tests/candidate_ordering_test.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py
index 4e5b505407b..e2dc4456e7d 100644
--- a/sharktuner/tests/candidate_ordering_test.py
+++ b/sharktuner/tests/candidate_ordering_test.py
@@ -180,9 +180,7 @@ def test_init_tuning_records(
     sorted_order = [2, 0, 1]
     tuning_records = candidate_ordering.init_tuning_records(sample_knobs, sorted_order)
 
-    expected = [tr1, tr2, tr3]
-
-    assert tuning_records == expected
+    assert tuning_records == [tr1, tr2, tr3]
 
 
 def test_flatten_records(

From 6393140bd89be0b2e020e8957f3085ba3bf5d693 Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Thu, 13 Nov 2025 22:26:34 +0000
Subject: [PATCH 20/25] Rename func

---
 sharktuner/sharktuner/candidate_ordering.py | 2 +-
 sharktuner/sharktuner/libtuner.py           | 4 ++--
 sharktuner/tests/candidate_ordering_test.py | 6 ++++--
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py
index 12e896c9555..ee1ea43cdcb 100644
--- a/sharktuner/sharktuner/candidate_ordering.py
+++ b/sharktuner/sharktuner/candidate_ordering.py
@@ -133,7 +133,7 @@ class TuningRecord:
     benchmark_rank_order: Optional[int] = None
 
 
-def init_tuning_records(
+def build_tuning_records_from_order(
     knobs: list[Optional[common.KnobAssignment]], sorted_order: list[int]
 ) -> list[TuningRecord]:
     tuning_records: list[TuningRecord] = []
diff --git a/sharktuner/sharktuner/libtuner.py b/sharktuner/sharktuner/libtuner.py
index 5334a444646..a769247b265 100644
--- a/sharktuner/sharktuner/libtuner.py
+++ b/sharktuner/sharktuner/libtuner.py
@@ -846,8 +846,8 @@ def generate_candidate_specs(
         # Total number of configs = candidates generated + baseline.
         assert len(config_specs) == len(solutions) + 1
 
-        tuning_client.tuning_records = candidate_ordering.init_tuning_records(
-            knobs, sorted_order
+        tuning_client.tuning_records = (
+            candidate_ordering.build_tuning_records_from_order(knobs, sorted_order)
         )
 
         knob_assignments = [dispatch_tuner.get_knob_assignment(s) for s in solutions]
diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py
index e2dc4456e7d..59d464f0adb 100644
--- a/sharktuner/tests/candidate_ordering_test.py
+++ b/sharktuner/tests/candidate_ordering_test.py
@@ -159,7 +159,7 @@ def test_reorder_assignments(
     )
 
 
-def test_init_tuning_records(
+def test_build_tuning_records_from_order(
     sample_knobs: list[Optional[common.KnobAssignment]],
 ) -> None:
     tr1 = candidate_ordering.TuningRecord(
@@ -178,7 +178,9 @@ def test_init_tuning_records(
         knob=sample_knobs[1],
     )
     sorted_order = [2, 0, 1]
-    tuning_records = candidate_ordering.init_tuning_records(sample_knobs, sorted_order)
+    tuning_records = candidate_ordering.build_tuning_records_from_order(
+        sample_knobs, sorted_order
+    )
 
     assert tuning_records == [tr1, tr2, tr3]
 

From bf9a64a4e562c7e92d0762896198e2b577261503 Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Mon, 17 Nov 2025 20:11:18 +0000
Subject: [PATCH 21/25] Add comments

---
 sharktuner/sharktuner/candidate_ordering.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py
index ee1ea43cdcb..a0e5fdcd91d 100644
--- a/sharktuner/sharktuner/candidate_ordering.py
+++ b/sharktuner/sharktuner/candidate_ordering.py
@@ -113,9 +113,10 @@ def reorder_assignments(
 @dataclass
 class TuningRecord:
     """
-    Records a candidate's knob configuration and tuning results. Used for analyzing the
-    candidate search space and evaluating the effectiveness of heuristics used for
-    candidate ordering.
+    Records a candidate's knob configuration and tuning results.
+
+    Used to analyze the candidate search space and to evaluate the
+    effectiveness of candidate ordering heuristics.
     """
 
     gen_id: int  # Original index from candidate generation.
@@ -137,7 +138,7 @@ def build_tuning_records_from_order(
     knobs: list[Optional[common.KnobAssignment]], sorted_order: list[int]
 ) -> list[TuningRecord]:
     tuning_records: list[TuningRecord] = []
-
+    # candidate_id = 0 is the baseline and is not included in tuning_records.
     for sorted_position, original_gen_index in enumerate(sorted_order, start=1):
         tr = TuningRecord(
             gen_id=original_gen_index,

From 4beab33faa486d57a76e85b1d2b9abcc00b151cb Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Mon, 17 Nov 2025 21:42:01 +0000
Subject: [PATCH 22/25] Remove redundant test part

---
 sharktuner/tests/candidate_ordering_test.py | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py
index 59d464f0adb..0eb4690d0d6 100644
--- a/sharktuner/tests/candidate_ordering_test.py
+++ b/sharktuner/tests/candidate_ordering_test.py
@@ -241,17 +241,13 @@ def test_flatten_records(
     ]
     assert headers == expected_headers
 
-    expected_rows = [
+    expected_key_rows = [
         {
             "baseline_benchmark_time_us": 123.4,
             "benchmark_device_id": "hip://2",
             "benchmark_queue_position": 1,
-            "benchmark_rank_order": None,
             "benchmark_speedup": 1.5,
-            "benchmark_status": False,
-            "benchmark_time_us": None,
             "candidate_id": 1,
-            "compile_status": False,
             "gen_id": 2,
             "knob_K": 1280,
             "knob_M": 2048,
@@ -269,19 +265,11 @@ def test_flatten_records(
             "knob_wg_x": 256,
             "knob_wg_y": 2,
             "knob_wg_z": 1,
-            "to_benchmark": False,
             "to_compile": True,
         },
         {
-            "baseline_benchmark_time_us": None,
-            "benchmark_device_id": None,
-            "benchmark_queue_position": None,
-            "benchmark_rank_order": None,
-            "benchmark_speedup": None,
-            "benchmark_status": False,
             "benchmark_time_us": 153.56,
             "candidate_id": 2,
-            "compile_status": False,
             "gen_id": 1,
             "knob_K": 1280,
             "knob_M": 2048,
@@ -300,7 +288,9 @@ def test_flatten_records(
             "knob_wg_y": 1,
             "knob_wg_z": 1,
             "to_benchmark": True,
-            "to_compile": False,
         },
     ]
-    assert rows == expected_rows
+
+    for expected_key_row, actual_row in zip(expected_key_rows, rows):
+        for attr, val in expected_key_row.items():
+            assert actual_row[attr] == val

From 1fcb9549d43cc5768d5dd9b7b8fb36172504ac2b Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Mon, 17 Nov 2025 22:08:35 +0000
Subject: [PATCH 23/25] Fix mypy

---
 sharktuner/tests/candidate_ordering_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py
index 0eb4690d0d6..a7c136d17a8 100644
--- a/sharktuner/tests/candidate_ordering_test.py
+++ b/sharktuner/tests/candidate_ordering_test.py
@@ -241,7 +241,7 @@ def test_flatten_records(
     ]
     assert headers == expected_headers
 
-    expected_key_rows = [
+    expected_key_rows: list[dict] = [
         {
             "baseline_benchmark_time_us": 123.4,
             "benchmark_device_id": "hip://2",

From f34cc44b9bdd7768ca2cd5ec05dd2b1cac77e69b Mon Sep 17 00:00:00 2001
From: RattataKing <46631728+RattataKing@users.noreply.github.com>
Date: Tue, 18 Nov 2025 11:27:52 -0500
Subject: [PATCH 24/25] Fix comments

Co-authored-by: Jakub Kuderski <kubakuderski@gmail.com>
---
 sharktuner/sharktuner/candidate_ordering.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py
index a0e5fdcd91d..849ef935cb7 100644
--- a/sharktuner/sharktuner/candidate_ordering.py
+++ b/sharktuner/sharktuner/candidate_ordering.py
@@ -120,7 +120,7 @@ class TuningRecord:
     """
 
     gen_id: int  # Original index from candidate generation.
-    candidate_id: int  # Index in candidate_trackers after sorting.
+    candidate_id: int  # Index in candidate_trackers after reordering.
     knob: Optional[common.KnobAssignment] = None
     to_compile: bool = False
     compile_status: bool = False

From 7533307a88716ef6368a3ec95d5ec30f06c22a60 Mon Sep 17 00:00:00 2001
From: Amily Wu <amilywu2@amd.com>
Date: Tue, 18 Nov 2025 18:11:49 +0000
Subject: [PATCH 25/25] Fix code

---
 sharktuner/sharktuner/candidate_ordering.py |  9 +++---
 sharktuner/sharktuner/libtuner.py           | 21 +++++++------
 sharktuner/tests/candidate_ordering_test.py | 34 +--------------------
 3 files changed, 16 insertions(+), 48 deletions(-)

diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py
index 849ef935cb7..7a20020df45 100644
--- a/sharktuner/sharktuner/candidate_ordering.py
+++ b/sharktuner/sharktuner/candidate_ordering.py
@@ -152,7 +152,7 @@ def build_tuning_records_from_order(
 
 def flatten_records(
     tuning_records: list[TuningRecord],
-) -> tuple[list[str], list[dict[str, Any]]]:
+) -> list[dict[str, Any]]:
     """
     Flatten a list of `TuningRecord` objects into CSV headers and rows.
 
@@ -172,15 +172,14 @@ def flatten_records(
                 row[attr] = val
         rows.append(row)
 
-    headers = list(row.keys())
-
-    return headers, rows
+    return rows
 
 
 def export_record_to_csv(tuning_records: list[TuningRecord], dest_file: Path) -> None:
     assert tuning_records
 
-    headers, rows = flatten_records(tuning_records)
+    rows = flatten_records(tuning_records)
+    headers = list(rows[0].keys())
 
     with open(dest_file, "w", newline="", encoding="utf-8") as f:
         writer = csv.DictWriter(f, fieldnames=headers)
diff --git a/sharktuner/sharktuner/libtuner.py b/sharktuner/sharktuner/libtuner.py
index a769247b265..6b1a3ddd6a6 100644
--- a/sharktuner/sharktuner/libtuner.py
+++ b/sharktuner/sharktuner/libtuner.py
@@ -1334,16 +1334,17 @@ def benchmark(
         candidate_results,
         prune_slow_candidates=tuning_client.should_prune_slower_candidates(),
     )
-    if all_candidates_with_speedup:
-        for i, handler_res in enumerate(all_candidates_with_speedup, start=1):
-            benchmark_res, speedup = handler_res
-            cid, _, device_id = benchmark_res
-            baseline_res = baseline_handler.get_average_result_us(device_id)
-            tuning_client.tuning_records[cid].baseline_benchmark_time_us = (
-                round(baseline_res, 2) if baseline_res else None
-            )
-            tuning_client.tuning_records[cid].benchmark_speedup = round(speedup, 5)
-            tuning_client.tuning_records[cid].benchmark_rank_order = i
+
+    # Best candidate gets rank 1.
+    for i, handler_res in enumerate(all_candidates_with_speedup, start=1):
+        benchmark_res, speedup = handler_res
+        cid, _, device_id = benchmark_res
+        baseline_res = baseline_handler.get_average_result_us(device_id)
+        tuning_client.tuning_records[cid].baseline_benchmark_time_us = (
+            round(baseline_res, 2) if baseline_res else None
+        )
+        tuning_client.tuning_records[cid].benchmark_speedup = round(speedup, 5)
+        tuning_client.tuning_records[cid].benchmark_rank_order = i
 
     top_candidates_with_speedup = (
         all_candidates_with_speedup[:num_candidates]
diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py
index a7c136d17a8..fc9654b315f 100644
--- a/sharktuner/tests/candidate_ordering_test.py
+++ b/sharktuner/tests/candidate_ordering_test.py
@@ -207,39 +207,7 @@ def test_flatten_records(
     )
     sample_tuning_records = [tr1, tr2]
 
-    headers, rows = candidate_ordering.flatten_records(sample_tuning_records)
-
-    expected_headers = [
-        "gen_id",
-        "candidate_id",
-        "knob_M",
-        "knob_N",
-        "knob_K",
-        "knob_tile_m",
-        "knob_tile_n",
-        "knob_tile_k",
-        "knob_wg_x",
-        "knob_wg_y",
-        "knob_wg_z",
-        "knob_subgroup_m_cnt",
-        "knob_subgroup_n_cnt",
-        "knob_intrinsic_mn",
-        "knob_intrinsic_k",
-        "knob_subgroup_m",
-        "knob_subgroup_n",
-        "knob_subgroup_k",
-        "to_compile",
-        "compile_status",
-        "to_benchmark",
-        "benchmark_device_id",
-        "benchmark_queue_position",
-        "benchmark_status",
-        "baseline_benchmark_time_us",
-        "benchmark_time_us",
-        "benchmark_speedup",
-        "benchmark_rank_order",
-    ]
-    assert headers == expected_headers
+    rows = candidate_ordering.flatten_records(sample_tuning_records)
 
     expected_key_rows: list[dict] = [
         {