diff --git a/sharktuner/dispatch_tuner/dispatch_tuner.py b/sharktuner/dispatch_tuner/dispatch_tuner.py
index 03a29a97bb2..ac159da392e 100644
--- a/sharktuner/dispatch_tuner/dispatch_tuner.py
+++ b/sharktuner/dispatch_tuner/dispatch_tuner.py
@@ -159,3 +159,11 @@ def main() -> None:
             print(path_config.run_log.resolve())
         print("Check the summary in:")
         print(summary_log_file.resolve())
+
+        output_csv_name = f"{args.dispatch_file.stem}_candidate_analysis.csv"
+        csv_path = Path(path_config.base_dir) / output_csv_name
+
+        libtuner.candidate_ordering.export_record_to_csv(
+            dispatch_tuner.tuning_records, csv_path
+        )
+        print(f"Wrote tuning records CSV: {csv_path}")
diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py
index f61db9f021c..7a20020df45 100644
--- a/sharktuner/sharktuner/candidate_ordering.py
+++ b/sharktuner/sharktuner/candidate_ordering.py
@@ -1,7 +1,11 @@
-from enum import Enum
-from typing import Optional, Callable
 import random
 import logging
+import csv
+from typing import Optional, Any
+from dataclasses import dataclass
+from pathlib import Path
+from enum import Enum
+from typing import Optional, Callable
 
 from iree.compiler.dialects import iree_gpu  # type: ignore
 
@@ -104,3 +108,80 @@ def reorder_assignments(
             return indices
         case _:
             assert False
+
+
+@dataclass
+class TuningRecord:
+    """
+    Records a candidate's knob configuration and tuning results.
+
+    Used to analyze the candidate search space and to evaluate the
+    effectiveness of candidate ordering heuristics.
+    """
+
+    gen_id: int  # Original index from candidate generation.
+    candidate_id: int  # Index in candidate_trackers after reordering.
+    knob: Optional[common.KnobAssignment] = None
+    to_compile: bool = False
+    compile_status: bool = False
+    to_benchmark: bool = False
+    benchmark_device_id: Optional[str] = None
+    benchmark_queue_position: Optional[int] = None
+    benchmark_status: bool = False
+    baseline_benchmark_time_us: Optional[float] = None
+    benchmark_time_us: Optional[float] = None
+    benchmark_speedup: Optional[float] = None
+    benchmark_rank_order: Optional[int] = None
+
+
+def build_tuning_records_from_order(
+    knobs: list[Optional[common.KnobAssignment]], sorted_order: list[int]
+) -> list[TuningRecord]:
+    tuning_records: list[TuningRecord] = []
+    # candidate_id = 0 is the baseline and is not included in tuning_records.
+    for sorted_position, original_gen_index in enumerate(sorted_order, start=1):
+        tr = TuningRecord(
+            gen_id=original_gen_index,
+            candidate_id=sorted_position,
+            knob=knobs[original_gen_index],
+        )
+        tuning_records.append(tr)
+
+    return tuning_records
+
+
+def flatten_records(
+    tuning_records: list[TuningRecord],
+) -> list[dict[str, Any]]:
+    """
+    Flatten a list of `TuningRecord` objects into CSV headers and rows.
+
+    - Each record becomes one CSV row.
+    - Top-level attributes (e.g., `gen_id`, `benchmark_time_us`) appear as individual columns.
+    - Nested objects (e.g., `knob`) are flattened into columns like `knob.M`, `knob.tile_m`.
+    """
+    rows = []
+    for tuning_record in tuning_records:
+        row = {}
+        for attr, val in vars(tuning_record).items():
+            if isinstance(val, common.KnobAssignment):
+                knob_dict = val.get_knobs()
+                for k, v in knob_dict.items():
+                    row[f"{attr}_{k}"] = v
+            else:
+                row[attr] = val
+        rows.append(row)
+
+    return rows
+
+
+def export_record_to_csv(tuning_records: list[TuningRecord], dest_file: Path) -> None:
+    assert tuning_records
+
+    rows = flatten_records(tuning_records)
+    headers = list(rows[0].keys())
+
+    with open(dest_file, "w", newline="", encoding="utf-8") as f:
+        writer = csv.DictWriter(f, fieldnames=headers)
+        writer.writeheader()
+        writer.writerows(rows)
diff --git a/sharktuner/sharktuner/libtuner.py b/sharktuner/sharktuner/libtuner.py
index 111b24809ec..6b1a3ddd6a6 100644
--- a/sharktuner/sharktuner/libtuner.py
+++ b/sharktuner/sharktuner/libtuner.py
@@ -125,6 +125,7 @@ def __init__(self, tuner_context: common.TunerContext):
         self.tuner_context = tuner_context
         self.candidate_trackers: list[CandidateTracker] = []
         self.target_info: Optional[iree_gpu.TargetInfo] = None
+        self.tuning_records: list[candidate_ordering.TuningRecord] = []
 
     @abstractmethod
     def get_iree_compile_flags(self) -> list[str]:
@@ -845,6 +846,10 @@ def generate_candidate_specs(
         # Total number of configs = candidates generated + baseline.
         assert len(config_specs) == len(solutions) + 1
 
+        tuning_client.tuning_records = (
+            candidate_ordering.build_tuning_records_from_order(knobs, sorted_order)
+        )
+
         knob_assignments = [dispatch_tuner.get_knob_assignment(s) for s in solutions]
         logging.debug("candidate_gen.py ends")
         handle_error(
@@ -1193,6 +1198,7 @@ def compile(
     # Set the source and output file paths for compilation of each candidate.
     path_config.compiled_dir.mkdir(parents=True, exist_ok=True)
     for i in candidates:
+        tuning_client.tuning_records[i].to_compile = True
         vmfb_file_name = path_config.get_candidate_vmfb_filename(
             tuning_client.candidate_trackers[i].candidate_id
         )
@@ -1231,6 +1237,7 @@ def compile(
     # Remove duplicate vmfbs from the candidate list.
     compiled_candidate_hashes = []
     for candidate_id in compiled_candidates:
+        tuning_client.tuning_records[candidate_id].compile_status = True
         candidate_vmfb = tuning_client.candidate_trackers[
             candidate_id
         ].compiled_vmfb_path
@@ -1283,6 +1290,9 @@ def benchmark(
             f"Smart candidate benchmark timeout is set to {subprocess_timeout_reference:.2f}s"
         )
     candidate_indices = [i for i in compiled_candidates if i != 0]
+    for i, idx in enumerate(candidate_indices, start=1):
+        tuning_client.tuning_records[idx].benchmark_queue_position = i
+        tuning_client.tuning_records[idx].to_benchmark = True
 
     candidate_results = benchmark_candidates(
         candidate_indices=candidate_indices,
@@ -1292,6 +1302,15 @@ def benchmark(
         benchmark_time=benchmark_time,  # Only candidate benchmark has time limit.
     )
 
+    for res in candidate_results:
+        c_id = res.candidate_id
+        res_time = res.time
+        tuning_client.tuning_records[c_id].benchmark_device_id = res.device_id
+        if res_time == math.inf:
+            continue
+        tuning_client.tuning_records[c_id].benchmark_status = True
+        tuning_client.tuning_records[c_id].benchmark_time_us = round(res_time, 2)
+
     second_baseline_result, _ = benchmark_baseline(
         devices=args.devices,
         tuning_client=tuning_client,
@@ -1315,6 +1334,18 @@ def benchmark(
         candidate_results,
         prune_slow_candidates=tuning_client.should_prune_slower_candidates(),
     )
+
+    # Best candidate gets rank 1.
+    for i, handler_res in enumerate(all_candidates_with_speedup, start=1):
+        benchmark_res, speedup = handler_res
+        cid, _, device_id = benchmark_res
+        baseline_res = baseline_handler.get_average_result_us(device_id)
+        tuning_client.tuning_records[cid].baseline_benchmark_time_us = (
+            round(baseline_res, 2) if baseline_res else None
+        )
+        tuning_client.tuning_records[cid].benchmark_speedup = round(speedup, 5)
+        tuning_client.tuning_records[cid].benchmark_rank_order = i
+
     top_candidates_with_speedup = (
         all_candidates_with_speedup[:num_candidates]
         if num_candidates
diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py
index ab0bafd9ec1..fc9654b315f 100644
--- a/sharktuner/tests/candidate_ordering_test.py
+++ b/sharktuner/tests/candidate_ordering_test.py
@@ -6,6 +6,7 @@
 
 import math
 import pytest
+from typing import Optional
 
 from iree.compiler import ir  # type: ignore
 from iree.compiler.dialects import iree_gpu  # type: ignore
@@ -13,60 +14,63 @@
 from sharktuner import candidate_ordering, common
 
 
-knob_1 = common.LLVMGPUVectorDistributeContractionKnobs(
-    M=2048,
-    N=10240,
-    K=1280,
-    tile_m=128,
-    tile_n=64,
-    tile_k=64,
-    wg_x=64,
-    wg_y=2,
-    wg_z=1,
-    subgroup_m_cnt=2,
-    subgroup_n_cnt=1,
-    intrinsic_mn=32,
-    intrinsic_k=8,
-    subgroup_m=0,
-    subgroup_n=0,
-    subgroup_k=0,
-)
-knob_2 = common.LLVMGPUVectorDistributeContractionKnobs(
-    M=2048,
-    N=10240,
-    K=1280,
-    tile_m=64,
-    tile_n=320,
-    tile_k=80,
-    wg_x=320,
-    wg_y=1,
-    wg_z=1,
-    subgroup_m_cnt=1,
-    subgroup_n_cnt=5,
-    intrinsic_mn=16,
-    intrinsic_k=16,
-    subgroup_m=0,
-    subgroup_n=0,
-    subgroup_k=0,
-)
-knob_3 = common.LLVMGPUVectorDistributeContractionKnobs(
-    M=2048,
-    N=10240,
-    K=1280,
-    tile_m=64,
-    tile_n=256,
-    tile_k=16,
-    wg_x=256,
-    wg_y=2,
-    wg_z=1,
-    subgroup_m_cnt=2,
-    subgroup_n_cnt=4,
-    intrinsic_mn=16,
-    intrinsic_k=16,
-    subgroup_m=0,
-    subgroup_n=0,
-    subgroup_k=0,
-)
+@pytest.fixture
+def sample_knobs() -> list[Optional[common.KnobAssignment]]:
+    knob_1 = common.LLVMGPUVectorDistributeContractionKnobs(
+        M=2048,
+        N=10240,
+        K=1280,
+        tile_m=128,
+        tile_n=64,
+        tile_k=64,
+        wg_x=64,
+        wg_y=2,
+        wg_z=1,
+        subgroup_m_cnt=2,
+        subgroup_n_cnt=1,
+        intrinsic_mn=32,
+        intrinsic_k=8,
+        subgroup_m=0,
+        subgroup_n=0,
+        subgroup_k=0,
+    )
+    knob_2 = common.LLVMGPUVectorDistributeContractionKnobs(
+        M=2048,
+        N=10240,
+        K=1280,
+        tile_m=64,
+        tile_n=320,
+        tile_k=80,
+        wg_x=320,
+        wg_y=1,
+        wg_z=1,
+        subgroup_m_cnt=1,
+        subgroup_n_cnt=5,
+        intrinsic_mn=16,
+        intrinsic_k=16,
+        subgroup_m=0,
+        subgroup_n=0,
+        subgroup_k=0,
+    )
+    knob_3 = common.LLVMGPUVectorDistributeContractionKnobs(
+        M=2048,
+        N=10240,
+        K=1280,
+        tile_m=64,
+        tile_n=256,
+        tile_k=16,
+        wg_x=256,
+        wg_y=2,
+        wg_z=1,
+        subgroup_m_cnt=2,
+        subgroup_n_cnt=4,
+        intrinsic_mn=16,
+        intrinsic_k=16,
+        subgroup_m=0,
+        subgroup_n=0,
+        subgroup_k=0,
+    )
+    return [knob_1, knob_2, knob_3]
 
 
 @pytest.fixture
@@ -100,14 +104,15 @@ def test_math_expression() -> None:
     assert math.isclose(ai, expected, rel_tol=1e-9)
 
 
-def test_reorder_assignments(target_info: iree_gpu.TargetInfo) -> None:
-    knobs: list[common.KnobAssignment | None] = [knob_1, knob_2, knob_3]
-
+def test_reorder_assignments(
+    target_info: iree_gpu.TargetInfo,
+    sample_knobs: list[Optional[common.KnobAssignment]],
+) -> None:
     expected_order = [0, 1, 2]
     assert (
         candidate_ordering.reorder_assignments(
             target_info=target_info,
-            knobs=knobs,
+            knobs=sample_knobs,
             strategy=candidate_ordering.CandidateOrderKind.no_sort,
         )
         == expected_order
@@ -117,7 +122,7 @@ def test_reorder_assignments(target_info: iree_gpu.TargetInfo) -> None:
     assert (
         candidate_ordering.reorder_assignments(
             target_info=target_info,
-            knobs=knobs,
+            knobs=sample_knobs,
             strategy=candidate_ordering.CandidateOrderKind.heuristic,
         )
         == expected_order
@@ -126,14 +131,14 @@ def test_reorder_assignments(target_info: iree_gpu.TargetInfo) -> None:
     expected_order = [0, 2, 1]
     assert (
         candidate_ordering.reorder_assignments(
-            knobs=knobs,
+            knobs=sample_knobs,
             strategy=candidate_ordering.CandidateOrderKind.heuristic,
             key_fn=lambda knob: knob.tile_n,
         )
         == expected_order
     )
 
-    knobs = [None, None, None]
+    knobs: list[Optional[common.KnobAssignment]] = [None, None, None]
     assert (
         candidate_ordering.reorder_assignments(
             target_info=target_info,
@@ -152,3 +157,108 @@ def test_reorder_assignments(target_info: iree_gpu.TargetInfo) -> None:
         )
         == []
     )
+
+
+def test_build_tuning_records_from_order(
+    sample_knobs: list[Optional[common.KnobAssignment]],
+) -> None:
+    tr1 = candidate_ordering.TuningRecord(
+        gen_id=2,
+        candidate_id=1,
+        knob=sample_knobs[2],
+    )
+    tr2 = candidate_ordering.TuningRecord(
+        gen_id=0,
+        candidate_id=2,
+        knob=sample_knobs[0],
+    )
+    tr3 = candidate_ordering.TuningRecord(
+        gen_id=1,
+        candidate_id=3,
+        knob=sample_knobs[1],
+    )
+    sorted_order = [2, 0, 1]
+    tuning_records = candidate_ordering.build_tuning_records_from_order(
+        sample_knobs, sorted_order
+    )
+
+    assert tuning_records == [tr1, tr2, tr3]
+
+
+def test_flatten_records(
+    sample_knobs: list[Optional[common.KnobAssignment]],
+):
+    tr1 = candidate_ordering.TuningRecord(
+        gen_id=2,
+        candidate_id=1,
+        knob=sample_knobs[2],
+        to_compile=True,
+        benchmark_device_id="hip://2",
+        benchmark_queue_position=1,
+        baseline_benchmark_time_us=123.4,
+        benchmark_speedup=1.5,
+    )
+    tr2 = candidate_ordering.TuningRecord(
+        gen_id=1,
+        candidate_id=2,
+        knob=sample_knobs[1],
+        to_benchmark=True,
+        benchmark_time_us=153.56,
+    )
+    sample_tuning_records = [tr1, tr2]
+
+    rows = candidate_ordering.flatten_records(sample_tuning_records)
+
+    expected_key_rows: list[dict] = [
+        {
+            "baseline_benchmark_time_us": 123.4,
+            "benchmark_device_id": "hip://2",
+            "benchmark_queue_position": 1,
+            "benchmark_speedup": 1.5,
+            "candidate_id": 1,
+            "gen_id": 2,
+            "knob_K": 1280,
+            "knob_M": 2048,
+            "knob_N": 10240,
+            "knob_intrinsic_k": 16,
+            "knob_intrinsic_mn": 16,
+            "knob_subgroup_k": 0,
+            "knob_subgroup_m": 0,
+            "knob_subgroup_m_cnt": 2,
+            "knob_subgroup_n": 0,
+            "knob_subgroup_n_cnt": 4,
+            "knob_tile_k": 16,
+            "knob_tile_m": 64,
+            "knob_tile_n": 256,
+            "knob_wg_x": 256,
+            "knob_wg_y": 2,
+            "knob_wg_z": 1,
+            "to_compile": True,
+        },
+        {
+            "benchmark_time_us": 153.56,
+            "candidate_id": 2,
+            "gen_id": 1,
+            "knob_K": 1280,
+            "knob_M": 2048,
+            "knob_N": 10240,
+            "knob_intrinsic_k": 16,
+            "knob_intrinsic_mn": 16,
+            "knob_subgroup_k": 0,
+            "knob_subgroup_m": 0,
+            "knob_subgroup_m_cnt": 1,
+            "knob_subgroup_n": 0,
+            "knob_subgroup_n_cnt": 5,
+            "knob_tile_k": 80,
+            "knob_tile_m": 64,
+            "knob_tile_n": 320,
+            "knob_wg_x": 320,
+            "knob_wg_y": 1,
+            "knob_wg_z": 1,
+            "to_benchmark": True,
+        },
+    ]
+
+    for expected_key_row, actual_row in zip(expected_key_rows, rows):
+        for attr, val in expected_key_row.items():
+            assert actual_row[attr] == val