From 111c7d8ce6b1fd5dfa93cdb2c7e44f49721cd2a9 Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Wed, 12 Nov 2025 19:23:40 +0000 Subject: [PATCH 01/25] Add logging --- .../sharktuner/candidate_tuning_records.py | 35 +++++++++++++++++++ sharktuner/sharktuner/libtuner.py | 34 ++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 sharktuner/sharktuner/candidate_tuning_records.py diff --git a/sharktuner/sharktuner/candidate_tuning_records.py b/sharktuner/sharktuner/candidate_tuning_records.py new file mode 100644 index 00000000000..436c39bd65f --- /dev/null +++ b/sharktuner/sharktuner/candidate_tuning_records.py @@ -0,0 +1,35 @@ +from typing import Optional +from dataclasses import dataclass + +from . import common, candidate_tuning_records + + +@dataclass +class TuningRecord: + gen_id: int + candidate_id: int + knob: Optional[common.KnobAssignment] = None + to_compile: bool = False + compile_status: bool = False + to_benchmark: bool = False + benchmark_device_id: Optional[str] = None + benchmark_queue_position: Optional[int] = None + benchmark_status: bool = False + baseline_benchmark_time_us: Optional[float] = None + benchmark_time_us: Optional[float] = None + benchmark_speedup: Optional[float] = None + benchmark_rank_order: Optional[int] = None + +def init_tuning_records(knobs: list[Optional[common.KnobAssignment]], sorted_order: list[int]) -> list[TuningRecord]: + tuning_records: list[TuningRecord] = [] + tuning_records.append(TuningRecord(gen_id=0, candidate_id=0, to_compile=True, to_benchmark=True)) + + for can_idx, gen_idx in enumerate(sorted_order, start=1): + tr = TuningRecord( + gen_id=gen_idx, + candidate_id=can_idx, + knob=knobs[gen_idx], + ) + tuning_records.append(tr) + + return tuning_records \ No newline at end of file diff --git a/sharktuner/sharktuner/libtuner.py b/sharktuner/sharktuner/libtuner.py index 111b24809ec..b682dc1848f 100644 --- a/sharktuner/sharktuner/libtuner.py +++ b/sharktuner/sharktuner/libtuner.py @@ -49,6 +49,7 @@ dispatch_constraints, dispatch_parser, candidate_ordering, + candidate_tuning_records, ) @@ -125,6 +126,7 @@ def __init__(self, tuner_context: common.TunerContext): self.tuner_context = tuner_context self.candidate_trackers: list[CandidateTracker] = [] self.target_info: Optional[iree_gpu.TargetInfo] = None + self.tuning_records: list[candidate_tuning_records.TuningRecord] = [] @abstractmethod def get_iree_compile_flags(self) -> list[str]: @@ -845,6 +847,10 @@ def generate_candidate_specs( # Total number of configs = candidates generated + baseline. assert len(config_specs) == len(solutions) + 1 + tuning_client.tuning_records = candidate_tuning_records.init_tuning_records( + knobs, sorted_order + ) + knob_assignments = [dispatch_tuner.get_knob_assignment(s) for s in solutions] logging.debug("candidate_gen.py ends") handle_error( @@ -1193,6 +1199,7 @@ def compile( # Set the source and output file paths for compilation of each candidate. path_config.compiled_dir.mkdir(parents=True, exist_ok=True) for i in candidates: + tuning_client.tuning_records[i].to_compile = True vmfb_file_name = path_config.get_candidate_vmfb_filename( tuning_client.candidate_trackers[i].candidate_id ) @@ -1231,6 +1238,7 @@ def compile( # Remove duplicate vmfbs from the candidate list. compiled_candidate_hashes = [] for candidate_id in compiled_candidates: + tuning_client.tuning_records[candidate_id].compile_status = True candidate_vmfb = tuning_client.candidate_trackers[ candidate_id ].compiled_vmfb_path @@ -1268,6 +1276,7 @@ def benchmark( # Benchmarking baselines on each involved device. baseline_tracker = tuning_client.candidate_trackers[0] + tuning_client.tuning_records[0].to_benchmark = True first_baseline_result, subprocess_timeout_reference = benchmark_baseline( devices=args.devices, tuning_client=tuning_client, @@ -1275,14 +1284,19 @@ def benchmark( ) baseline_handler = BaselineResultHandler() baseline_handler.add_run(first_baseline_result) + tuning_client.tuning_records[0].benchmark_status = True if not baseline_handler.is_valid(): logging.warning("Baseline run failed.") + tuning_client.tuning_records[0].benchmark_status = False if tuning_client.is_auto_iree_benchmark_timeout(): logging.info( f"Smart candidate benchmark timeout is set to {subprocess_timeout_reference:.2f}s" ) candidate_indices = [i for i in compiled_candidates if i != 0] + for i, idx in enumerate(candidate_indices, start=1): + tuning_client.tuning_records[idx].benchmark_queue_position = i + tuning_client.tuning_records[idx].to_benchmark = True candidate_results = benchmark_candidates( candidate_indices=candidate_indices, @@ -1292,6 +1306,17 @@ def benchmark( benchmark_time=benchmark_time, # Only candidate benchmark has time limit. ) + for res in candidate_results: + tuning_client.tuning_records[ + res.candidate_id + ].benchmark_device_id = res.device_id + if res.time == math.inf: + continue + tuning_client.tuning_records[res.candidate_id].benchmark_status = True + tuning_client.tuning_records[res.candidate_id].benchmark_time_us = round( + res.time, 2 + ) + second_baseline_result, _ = benchmark_baseline( devices=args.devices, tuning_client=tuning_client, @@ -1315,6 +1340,15 @@ def benchmark( candidate_results, prune_slow_candidates=tuning_client.should_prune_slower_candidates(), ) + if all_candidates_with_speedup: + for i, handler_res in enumerate(all_candidates_with_speedup, start=1): + benchmark_res, speedup = handler_res + cid, _, device_id = benchmark_res + bas = baseline_handler.get_average_result_us(device_id) + tuning_client.tuning_records[cid].baseline_benchmark_time_us = round(bas, 2) + tuning_client.tuning_records[cid].benchmark_speedup = round(speedup, 5) + tuning_client.tuning_records[cid].benchmark_rank_order = i + top_candidates_with_speedup = ( all_candidates_with_speedup[:num_candidates] if num_candidates From ab94d4e87427fc8845833c44a9b8a418c4f15042 Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Wed, 12 Nov 2025 19:58:09 +0000 Subject: [PATCH 02/25] Add dump --- sharktuner/dispatch_tuner/dispatch_tuner.py | 8 +++ .../sharktuner/candidate_tuning_records.py | 52 +++++++++++++++++-- 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/sharktuner/dispatch_tuner/dispatch_tuner.py b/sharktuner/dispatch_tuner/dispatch_tuner.py index 03a29a97bb2..0282e408c6b 100644 --- a/sharktuner/dispatch_tuner/dispatch_tuner.py +++ b/sharktuner/dispatch_tuner/dispatch_tuner.py @@ -159,3 +159,11 @@ def main() -> None: print(path_config.run_log.resolve()) print("Check the summary in:") print(summary_log_file.resolve()) + + output_csv_name = ( + f"tuning_{args.dispatch_file.stem.removesuffix('_benchmark')}.csv" + ) + csv_path = libtuner.candidate_tuning_records.export_record_to_csv( + dispatch_tuner.tuning_records, path_config.base_dir, output_csv_name + ) + print(f"Wrote tuning records CSV: {csv_path}") diff --git a/sharktuner/sharktuner/candidate_tuning_records.py b/sharktuner/sharktuner/candidate_tuning_records.py index 436c39bd65f..a6ae9909034 100644 --- a/sharktuner/sharktuner/candidate_tuning_records.py +++ b/sharktuner/sharktuner/candidate_tuning_records.py @@ -1,5 +1,8 @@ +import os +import csv from typing import Optional from dataclasses import dataclass +from pathlib import Path from . import common, candidate_tuning_records @@ -20,9 +23,14 @@ class TuningRecord: benchmark_speedup: Optional[float] = None benchmark_rank_order: Optional[int] = None -def init_tuning_records(knobs: list[Optional[common.KnobAssignment]], sorted_order: list[int]) -> list[TuningRecord]: + +def init_tuning_records( + knobs: list[Optional[common.KnobAssignment]], sorted_order: list[int] +) -> list[TuningRecord]: tuning_records: list[TuningRecord] = [] - tuning_records.append(TuningRecord(gen_id=0, candidate_id=0, to_compile=True, to_benchmark=True)) + tuning_records.append( + TuningRecord(gen_id=0, candidate_id=0, to_compile=True, to_benchmark=True) + ) for can_idx, gen_idx in enumerate(sorted_order, start=1): tr = TuningRecord( @@ -32,4 +40,42 @@ def init_tuning_records(knobs: list[Optional[common.KnobAssignment]], sorted_ord ) tuning_records.append(tr) - return tuning_records \ No newline at end of file + return tuning_records + + +def export_record_to_csv( + objects: list[TuningRecord], dest_dir: Path, filename: str = "export.csv" +) -> Path: + if not objects: + return None + + rows = [] + headers = [] + + for obj in objects: + row = {} + for k, v in vars(obj).items(): + if hasattr(v, "__dict__"): + nested = vars(v) + if nested: # only if it has attrs + for nk, nv in nested.items(): + key = f"{k}.{nk}" + row[key] = nv + if key not in headers: + headers.append(key) + else: + # skip empty nested object entirely + continue + else: + row[k] = v + if k not in headers: + headers.append(k) + rows.append(row) + + path = os.path.join(dest_dir, filename) + with open(path, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=headers) + writer.writeheader() + writer.writerows(rows) + + return path From 669251275492303578b8f9cc42a432a8589181a8 Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Wed, 12 Nov 2025 20:15:39 +0000 Subject: [PATCH 03/25] Move code to candidate_ordering --- sharktuner/sharktuner/candidate_ordering.py | 83 ++++++++- .../sharktuner/candidate_tuning_records.py | 81 --------- sharktuner/sharktuner/libtuner.py | 5 +- sharktuner/tests/candidate_ordering_test.py | 157 +++++++++++------- 4 files changed, 180 insertions(+), 146 deletions(-) delete mode 100644 sharktuner/sharktuner/candidate_tuning_records.py diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py index f61db9f021c..a247379216a 100644 --- a/sharktuner/sharktuner/candidate_ordering.py +++ b/sharktuner/sharktuner/candidate_ordering.py @@ -1,7 +1,12 @@ -from enum import Enum -from typing import Optional, Callable import random import logging +import os +import csv +from typing import Optional +from dataclasses import dataclass +from pathlib import Path +from enum import Enum +from typing import Optional, Callable from iree.compiler.dialects import iree_gpu # type: ignore @@ -104,3 +109,77 @@ def reorder_assignments( return indices case _: assert False + + +@dataclass +class TuningRecord: + gen_id: int + candidate_id: int + knob: Optional[common.KnobAssignment] = None + to_compile: bool = False + compile_status: bool = False + to_benchmark: bool = False + benchmark_device_id: Optional[str] = None + benchmark_queue_position: Optional[int] = None + benchmark_status: bool = False + baseline_benchmark_time_us: Optional[float] = None + benchmark_time_us: Optional[float] = None + benchmark_speedup: Optional[float] = None + benchmark_rank_order: Optional[int] = None + + +def init_tuning_records( + knobs: list[Optional[common.KnobAssignment]], sorted_order: list[int] +) -> list[TuningRecord]: + tuning_records: list[TuningRecord] = [] + tuning_records.append( + TuningRecord(gen_id=0, candidate_id=0, to_compile=True, to_benchmark=True) + ) + + for can_idx, gen_idx in enumerate(sorted_order, start=1): + tr = TuningRecord( + gen_id=gen_idx, + candidate_id=can_idx, + knob=knobs[gen_idx], + ) + tuning_records.append(tr) + + return tuning_records + + +def export_record_to_csv( + objects: list[TuningRecord], dest_dir: Path, filename: str = "export.csv" +) -> Path: + if not objects: + return None + + rows = [] + headers = [] + + for obj in objects: + row = {} + for k, v in vars(obj).items(): + if hasattr(v, "__dict__"): + nested = vars(v) + if nested: # only if it has attrs + for nk, nv in nested.items(): + key = f"{k}.{nk}" + row[key] = nv + if key not in headers: + headers.append(key) + else: + # skip empty nested object entirely + continue + else: + row[k] = v + if k not in headers: + headers.append(k) + rows.append(row) + + path = os.path.join(dest_dir, filename) + with open(path, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=headers) + writer.writeheader() + writer.writerows(rows) + + return path diff --git a/sharktuner/sharktuner/candidate_tuning_records.py b/sharktuner/sharktuner/candidate_tuning_records.py deleted file mode 100644 index a6ae9909034..00000000000 --- a/sharktuner/sharktuner/candidate_tuning_records.py +++ /dev/null @@ -1,81 +0,0 @@ -import os -import csv -from typing import Optional -from dataclasses import dataclass -from pathlib import Path - -from . import common, candidate_tuning_records - - -@dataclass -class TuningRecord: - gen_id: int - candidate_id: int - knob: Optional[common.KnobAssignment] = None - to_compile: bool = False - compile_status: bool = False - to_benchmark: bool = False - benchmark_device_id: Optional[str] = None - benchmark_queue_position: Optional[int] = None - benchmark_status: bool = False - baseline_benchmark_time_us: Optional[float] = None - benchmark_time_us: Optional[float] = None - benchmark_speedup: Optional[float] = None - benchmark_rank_order: Optional[int] = None - - -def init_tuning_records( - knobs: list[Optional[common.KnobAssignment]], sorted_order: list[int] -) -> list[TuningRecord]: - tuning_records: list[TuningRecord] = [] - tuning_records.append( - TuningRecord(gen_id=0, candidate_id=0, to_compile=True, to_benchmark=True) - ) - - for can_idx, gen_idx in enumerate(sorted_order, start=1): - tr = TuningRecord( - gen_id=gen_idx, - candidate_id=can_idx, - knob=knobs[gen_idx], - ) - tuning_records.append(tr) - - return tuning_records - - -def export_record_to_csv( - objects: list[TuningRecord], dest_dir: Path, filename: str = "export.csv" -) -> Path: - if not objects: - return None - - rows = [] - headers = [] - - for obj in objects: - row = {} - for k, v in vars(obj).items(): - if hasattr(v, "__dict__"): - nested = vars(v) - if nested: # only if it has attrs - for nk, nv in nested.items(): - key = f"{k}.{nk}" - row[key] = nv - if key not in headers: - headers.append(key) - else: - # skip empty nested object entirely - continue - else: - row[k] = v - if k not in headers: - headers.append(k) - rows.append(row) - - path = os.path.join(dest_dir, filename) - with open(path, "w", newline="", encoding="utf-8") as f: - writer = csv.DictWriter(f, fieldnames=headers) - writer.writeheader() - writer.writerows(rows) - - return path diff --git a/sharktuner/sharktuner/libtuner.py b/sharktuner/sharktuner/libtuner.py index b682dc1848f..1e346131bed 100644 --- a/sharktuner/sharktuner/libtuner.py +++ b/sharktuner/sharktuner/libtuner.py @@ -49,7 +49,6 @@ dispatch_constraints, dispatch_parser, candidate_ordering, - candidate_tuning_records, ) @@ -126,7 +125,7 @@ def __init__(self, tuner_context: common.TunerContext): self.tuner_context = tuner_context self.candidate_trackers: list[CandidateTracker] = [] self.target_info: Optional[iree_gpu.TargetInfo] = None - self.tuning_records: list[candidate_tuning_records.TuningRecord] = [] + self.tuning_records: list[candidate_ordering.TuningRecord] = [] @abstractmethod def get_iree_compile_flags(self) -> list[str]: @@ -847,7 +846,7 @@ def generate_candidate_specs( # Total number of configs = candidates generated + baseline. assert len(config_specs) == len(solutions) + 1 - tuning_client.tuning_records = candidate_tuning_records.init_tuning_records( + tuning_client.tuning_records = candidate_ordering.init_tuning_records( knobs, sorted_order ) diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py index ab0bafd9ec1..ff13c4385e5 100644 --- a/sharktuner/tests/candidate_ordering_test.py +++ b/sharktuner/tests/candidate_ordering_test.py @@ -6,6 +6,7 @@ import math import pytest +from typing import Optional from iree.compiler import ir # type: ignore from iree.compiler.dialects import iree_gpu # type: ignore @@ -13,60 +14,63 @@ from sharktuner import candidate_ordering, common -knob_1 = common.LLVMGPUVectorDistributeContractionKnobs( - M=2048, - N=10240, - K=1280, - tile_m=128, - tile_n=64, - tile_k=64, - wg_x=64, - wg_y=2, - wg_z=1, - subgroup_m_cnt=2, - subgroup_n_cnt=1, - intrinsic_mn=32, - intrinsic_k=8, - subgroup_m=0, - subgroup_n=0, - subgroup_k=0, -) -knob_2 = common.LLVMGPUVectorDistributeContractionKnobs( - M=2048, - N=10240, - K=1280, - tile_m=64, - tile_n=320, - tile_k=80, - wg_x=320, - wg_y=1, - wg_z=1, - subgroup_m_cnt=1, - subgroup_n_cnt=5, - intrinsic_mn=16, - intrinsic_k=16, - subgroup_m=0, - subgroup_n=0, - subgroup_k=0, -) -knob_3 = common.LLVMGPUVectorDistributeContractionKnobs( - M=2048, - N=10240, - K=1280, - tile_m=64, - tile_n=256, - tile_k=16, - wg_x=256, - wg_y=2, - wg_z=1, - subgroup_m_cnt=2, - subgroup_n_cnt=4, - intrinsic_mn=16, - intrinsic_k=16, - subgroup_m=0, - subgroup_n=0, - subgroup_k=0, -) +@pytest.fixture +def sample_knobs() -> list[Optional[common.KnobAssignment]]: + knob_1 = common.LLVMGPUVectorDistributeContractionKnobs( + M=2048, + N=10240, + K=1280, + tile_m=128, + tile_n=64, + tile_k=64, + wg_x=64, + wg_y=2, + wg_z=1, + subgroup_m_cnt=2, + subgroup_n_cnt=1, + intrinsic_mn=32, + intrinsic_k=8, + subgroup_m=0, + subgroup_n=0, + subgroup_k=0, + ) + knob_2 = common.LLVMGPUVectorDistributeContractionKnobs( + M=2048, + N=10240, + K=1280, + tile_m=64, + tile_n=320, + tile_k=80, + wg_x=320, + wg_y=1, + wg_z=1, + subgroup_m_cnt=1, + subgroup_n_cnt=5, + intrinsic_mn=16, + intrinsic_k=16, + subgroup_m=0, + subgroup_n=0, + subgroup_k=0, + ) + knob_3 = common.LLVMGPUVectorDistributeContractionKnobs( + M=2048, + N=10240, + K=1280, + tile_m=64, + tile_n=256, + tile_k=16, + wg_x=256, + wg_y=2, + wg_z=1, + subgroup_m_cnt=2, + subgroup_n_cnt=4, + intrinsic_mn=16, + intrinsic_k=16, + subgroup_m=0, + subgroup_n=0, + subgroup_k=0, + ) + return [knob_1, knob_2, knob_3] @pytest.fixture @@ -100,14 +104,15 @@ def test_math_expression() -> None: assert math.isclose(ai, expected, rel_tol=1e-9) -def test_reorder_assignments(target_info: iree_gpu.TargetInfo) -> None: - knobs: list[common.KnobAssignment | None] = [knob_1, knob_2, knob_3] - +def test_reorder_assignments( + target_info: iree_gpu.TargetInfo, + sample_knobs: list[Optional[common.KnobAssignment]], +) -> None: expected_order = [0, 1, 2] assert ( candidate_ordering.reorder_assignments( target_info=target_info, - knobs=knobs, + knobs=sample_knobs, strategy=candidate_ordering.CandidateOrderKind.no_sort, ) == expected_order @@ -117,7 +122,7 @@ def test_reorder_assignments(target_info: iree_gpu.TargetInfo) -> None: assert ( candidate_ordering.reorder_assignments( target_info=target_info, - knobs=knobs, + knobs=sample_knobs, strategy=candidate_ordering.CandidateOrderKind.heuristic, ) == expected_order @@ -126,7 +131,7 @@ def test_reorder_assignments(target_info: iree_gpu.TargetInfo) -> None: expected_order = [0, 2, 1] assert ( candidate_ordering.reorder_assignments( - knobs=knobs, + knobs=sample_knobs, strategy=candidate_ordering.CandidateOrderKind.heuristic, key_fn=lambda knob: knob.tile_n, ) @@ -152,3 +157,35 @@ def test_reorder_assignments(target_info: iree_gpu.TargetInfo) -> None: ) == [] ) + + +def test_init_tuning_records( + sample_knobs: list[Optional[common.KnobAssignment]], +) -> None: + sorted_order = [2, 0, 1] + tuning_records = candidate_ordering.init_tuning_records(sample_knobs, sorted_order) + expected: list[candidate_ordering.TuningRecord] = [] + + expected: list[candidate_ordering.TuningRecord] = [ + candidate_ordering.TuningRecord( + gen_id=0, candidate_id=0, to_compile=True, to_benchmark=True + ) + ] + tr1 = candidate_ordering.TuningRecord( + gen_id=2, + candidate_id=1, + knob=sample_knobs[2], + ) + tr2 = candidate_ordering.TuningRecord( + gen_id=0, + candidate_id=2, + knob=sample_knobs[0], + ) + tr3 = candidate_ordering.TuningRecord( + gen_id=1, + candidate_id=3, + knob=sample_knobs[1], + ) + expected += [tr1, tr2, tr3] + + assert tuning_records == expected From ecea646380fac6cd5a144739344c8e7b8f42d79b Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Wed, 12 Nov 2025 20:31:24 +0000 Subject: [PATCH 04/25] Small fix --- sharktuner/sharktuner/candidate_ordering.py | 4 ++-- sharktuner/sharktuner/libtuner.py | 6 ++++-- sharktuner/tests/candidate_ordering_test.py | 3 +-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py index a247379216a..8558cd25166 100644 --- a/sharktuner/sharktuner/candidate_ordering.py +++ b/sharktuner/sharktuner/candidate_ordering.py @@ -149,7 +149,7 @@ def init_tuning_records( def export_record_to_csv( objects: list[TuningRecord], dest_dir: Path, filename: str = "export.csv" -) -> Path: +) -> Optional[Path]: if not objects: return None @@ -176,7 +176,7 @@ def export_record_to_csv( headers.append(k) rows.append(row) - path = os.path.join(dest_dir, filename) + path = Path(os.path.join(dest_dir, filename)) with open(path, "w", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=headers) writer.writeheader() diff --git a/sharktuner/sharktuner/libtuner.py b/sharktuner/sharktuner/libtuner.py index 1e346131bed..6c38048d7f2 100644 --- a/sharktuner/sharktuner/libtuner.py +++ b/sharktuner/sharktuner/libtuner.py @@ -1343,8 +1343,10 @@ def benchmark( for i, handler_res in enumerate(all_candidates_with_speedup, start=1): benchmark_res, speedup = handler_res cid, _, device_id = benchmark_res - bas = baseline_handler.get_average_result_us(device_id) - tuning_client.tuning_records[cid].baseline_benchmark_time_us = round(bas, 2) + baseline_res = baseline_handler.get_average_result_us(device_id) + tuning_client.tuning_records[cid].baseline_benchmark_time_us = ( + round(baseline_res, 2) if baseline_res else None + ) tuning_client.tuning_records[cid].benchmark_speedup = round(speedup, 5) tuning_client.tuning_records[cid].benchmark_rank_order = i diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py index ff13c4385e5..8fa9d662cd3 100644 --- a/sharktuner/tests/candidate_ordering_test.py +++ b/sharktuner/tests/candidate_ordering_test.py @@ -138,7 +138,7 @@ def test_reorder_assignments( == expected_order ) - knobs = [None, None, None] + knobs: list[Optional[common.KnobAssignment]] = [None, None, None] assert ( candidate_ordering.reorder_assignments( target_info=target_info, @@ -164,7 +164,6 @@ def test_init_tuning_records( ) -> None: sorted_order = [2, 0, 1] tuning_records = candidate_ordering.init_tuning_records(sample_knobs, sorted_order) - expected: list[candidate_ordering.TuningRecord] = [] expected: list[candidate_ordering.TuningRecord] = [ candidate_ordering.TuningRecord( From 1f38ece9392802b742a184a2da1a85fc478ae045 Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Wed, 12 Nov 2025 20:47:19 +0000 Subject: [PATCH 05/25] Fix lint --- sharktuner/sharktuner/candidate_ordering.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py index 8558cd25166..aeec7ec9c04 100644 --- a/sharktuner/sharktuner/candidate_ordering.py +++ b/sharktuner/sharktuner/candidate_ordering.py @@ -148,27 +148,35 @@ def init_tuning_records( def export_record_to_csv( - objects: list[TuningRecord], dest_dir: Path, filename: str = "export.csv" + tuning_records: list[TuningRecord], dest_dir: Path, filename: str = "export.csv" ) -> Optional[Path]: - if not objects: + """ + Exports a list of `TuningRecord` objects to a CSV file. + + - Each record becomes one CSV row. + - Top-level attributes (e.g., `gen_id`, `benchmark_time_us`) are written as individual columns. + - Nested object (i.e., `knob`) is flattened using dot notation: knob.tile_m, knob.intrinsic_mn + + """ + if not tuning_records: return None rows = [] headers = [] - for obj in objects: + for tuning_record in tuning_records: row = {} - for k, v in vars(obj).items(): + for k, v in vars(tuning_record).items(): if hasattr(v, "__dict__"): nested = vars(v) - if nested: # only if it has attrs + if nested: # Only if it has attrs. for nk, nv in nested.items(): key = f"{k}.{nk}" row[key] = nv if key not in headers: headers.append(key) else: - # skip empty nested object entirely + # Skip empty nested object entirely. continue else: row[k] = v From 988325d09eee63a87ab3ddf302b150bbf73119c4 Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Wed, 12 Nov 2025 20:49:37 +0000 Subject: [PATCH 06/25] Small fix --- sharktuner/dispatch_tuner/dispatch_tuner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sharktuner/dispatch_tuner/dispatch_tuner.py b/sharktuner/dispatch_tuner/dispatch_tuner.py index 0282e408c6b..0b9de3f6232 100644 --- a/sharktuner/dispatch_tuner/dispatch_tuner.py +++ b/sharktuner/dispatch_tuner/dispatch_tuner.py @@ -163,7 +163,7 @@ def main() -> None: output_csv_name = ( f"tuning_{args.dispatch_file.stem.removesuffix('_benchmark')}.csv" ) - csv_path = libtuner.candidate_tuning_records.export_record_to_csv( + csv_path = libtuner.candidate_ordering.export_record_to_csv( dispatch_tuner.tuning_records, path_config.base_dir, output_csv_name ) print(f"Wrote tuning records CSV: {csv_path}") From 5ed14c349ddf7eb0b6bdd8cc7eccad9b1e6712ce Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Wed, 12 Nov 2025 20:50:57 +0000 Subject: [PATCH 07/25] Small fix --- sharktuner/dispatch_tuner/dispatch_tuner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sharktuner/dispatch_tuner/dispatch_tuner.py b/sharktuner/dispatch_tuner/dispatch_tuner.py index 0b9de3f6232..9bb22146920 100644 --- a/sharktuner/dispatch_tuner/dispatch_tuner.py +++ b/sharktuner/dispatch_tuner/dispatch_tuner.py @@ -161,7 +161,7 @@ def main() -> None: print(summary_log_file.resolve()) output_csv_name = ( - f"tuning_{args.dispatch_file.stem.removesuffix('_benchmark')}.csv" + f"tuning_{args.dispatch_file.stem}.csv" ) csv_path = libtuner.candidate_ordering.export_record_to_csv( dispatch_tuner.tuning_records, path_config.base_dir, output_csv_name From 414ad27046c7f96d63cb11975bfcc4d466d04fa3 Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Wed, 12 Nov 2025 20:52:05 +0000 Subject: [PATCH 08/25] Small fix --- sharktuner/sharktuner/candidate_ordering.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py index aeec7ec9c04..1f6935a486d 100644 --- a/sharktuner/sharktuner/candidate_ordering.py +++ b/sharktuner/sharktuner/candidate_ordering.py @@ -169,14 +169,13 @@ def export_record_to_csv( for k, v in vars(tuning_record).items(): if hasattr(v, "__dict__"): nested = vars(v) - if nested: # Only if it has attrs. + if nested: for nk, nv in nested.items(): key = f"{k}.{nk}" row[key] = nv if key not in headers: headers.append(key) else: - # Skip empty nested object entirely. continue else: row[k] = v From c1440f772f815cb28bf10352ded2caf7fe2798db Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Wed, 12 Nov 2025 20:56:49 +0000 Subject: [PATCH 09/25] Add comments --- sharktuner/dispatch_tuner/dispatch_tuner.py | 4 +--- sharktuner/sharktuner/candidate_ordering.py | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/sharktuner/dispatch_tuner/dispatch_tuner.py b/sharktuner/dispatch_tuner/dispatch_tuner.py index 9bb22146920..6b24d586176 100644 --- a/sharktuner/dispatch_tuner/dispatch_tuner.py +++ b/sharktuner/dispatch_tuner/dispatch_tuner.py @@ -160,9 +160,7 @@ def main() -> None: print("Check the summary in:") print(summary_log_file.resolve()) - output_csv_name = ( - f"tuning_{args.dispatch_file.stem}.csv" - ) + output_csv_name = f"tuning_{args.dispatch_file.stem}.csv" csv_path = libtuner.candidate_ordering.export_record_to_csv( dispatch_tuner.tuning_records, path_config.base_dir, output_csv_name ) diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py index 1f6935a486d..6e875e19f68 100644 --- a/sharktuner/sharktuner/candidate_ordering.py +++ b/sharktuner/sharktuner/candidate_ordering.py @@ -113,8 +113,8 @@ def reorder_assignments( @dataclass class TuningRecord: - gen_id: int - candidate_id: int + gen_id: int # Original index from candidate generation. + candidate_id: int # Index in candidate_trackers after sorting. knob: Optional[common.KnobAssignment] = None to_compile: bool = False compile_status: bool = False From 66cf70f432bc89578ce10e8aa43d30452b0c4b46 Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Wed, 12 Nov 2025 23:23:02 +0000 Subject: [PATCH 10/25] Add test for flatten_class --- sharktuner/dispatch_tuner/dispatch_tuner.py | 5 +- sharktuner/sharktuner/candidate_ordering.py | 30 ++-- sharktuner/tests/candidate_ordering_test.py | 161 ++++++++++++++++++-- 3 files changed, 171 insertions(+), 25 deletions(-) diff --git a/sharktuner/dispatch_tuner/dispatch_tuner.py b/sharktuner/dispatch_tuner/dispatch_tuner.py index 6b24d586176..a2a344d35fb 100644 --- a/sharktuner/dispatch_tuner/dispatch_tuner.py +++ b/sharktuner/dispatch_tuner/dispatch_tuner.py @@ -161,7 +161,8 @@ def main() -> None: print(summary_log_file.resolve()) output_csv_name = f"tuning_{args.dispatch_file.stem}.csv" - csv_path = libtuner.candidate_ordering.export_record_to_csv( - dispatch_tuner.tuning_records, path_config.base_dir, output_csv_name + csv_path = Path(path_config.base_dir / output_csv_name) + libtuner.candidate_ordering.export_record_to_csv( + dispatch_tuner.tuning_records, csv_path ) print(f"Wrote tuning records CSV: {csv_path}") diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py index 6e875e19f68..2fd4c33b673 100644 --- a/sharktuner/sharktuner/candidate_ordering.py +++ b/sharktuner/sharktuner/candidate_ordering.py @@ -2,7 +2,7 @@ import logging import os import csv -from typing import Optional +from typing import Optional, Any from dataclasses import dataclass from pathlib import Path from enum import Enum @@ -147,20 +147,16 @@ def init_tuning_records( return tuning_records -def export_record_to_csv( - tuning_records: list[TuningRecord], dest_dir: Path, filename: str = "export.csv" -) -> Optional[Path]: +def flatten_records( + tuning_records: list[TuningRecord], +) -> tuple[list[str], list[dict[str, Any]]]: """ - Exports a list of `TuningRecord` objects to a CSV file. + Flatten a list of `TuningRecord` objects to CSV headers and rows - Each record becomes one CSV row. - Top-level attributes (e.g., `gen_id`, `benchmark_time_us`) are written as individual columns. - Nested object (i.e., `knob`) is flattened using dot notation: knob.tile_m, knob.intrinsic_mn - """ - if not tuning_records: - return None - rows = [] headers = [] @@ -179,14 +175,20 @@ def export_record_to_csv( continue else: row[k] = v - if k not in headers: + if k not in headers and k != "knob": headers.append(k) rows.append(row) - path = Path(os.path.join(dest_dir, filename)) - with open(path, "w", newline="", encoding="utf-8") as f: + return headers, rows + + +def export_record_to_csv(tuning_records: list[TuningRecord], dest_file: Path) -> None: + if not tuning_records: + return None + + headers, rows = flatten_records(tuning_records) + + with open(dest_file, "w", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=headers) writer.writeheader() writer.writerows(rows) - - return path diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py index 8fa9d662cd3..065dbe57427 100644 --- a/sharktuner/tests/candidate_ordering_test.py +++ b/sharktuner/tests/candidate_ordering_test.py @@ -162,14 +162,12 @@ def test_reorder_assignments( def test_init_tuning_records( sample_knobs: list[Optional[common.KnobAssignment]], ) -> None: - sorted_order = [2, 0, 1] - tuning_records = candidate_ordering.init_tuning_records(sample_knobs, sorted_order) - - expected: list[candidate_ordering.TuningRecord] = [ - candidate_ordering.TuningRecord( - gen_id=0, candidate_id=0, to_compile=True, to_benchmark=True - ) - ] + tr0 = candidate_ordering.TuningRecord( + gen_id=0, + candidate_id=0, + to_compile=True, + to_benchmark=True, + ) tr1 = candidate_ordering.TuningRecord( gen_id=2, candidate_id=1, @@ -185,6 +183,151 @@ def test_init_tuning_records( candidate_id=3, knob=sample_knobs[1], ) - expected += [tr1, tr2, tr3] + sorted_order = [2, 0, 1] + tuning_records = candidate_ordering.init_tuning_records(sample_knobs, sorted_order) + + expected = [tr0, tr1, tr2, tr3] assert tuning_records == expected + + +def test_flatten_records( + sample_knobs: list[Optional[common.KnobAssignment]], +): + tr0 = candidate_ordering.TuningRecord( + gen_id=0, + candidate_id=0, + to_compile=True, + to_benchmark=True, + ) + tr1 = candidate_ordering.TuningRecord( + gen_id=2, + candidate_id=1, + knob=sample_knobs[2], + to_compile=True, + benchmark_device_id="hip://2", + benchmark_queue_position=1, + baseline_benchmark_time_us=123.4, + benchmark_speedup=1.5, + ) + tr2 = candidate_ordering.TuningRecord( + gen_id=1, + candidate_id=2, + knob=sample_knobs[1], + to_benchmark=True, + benchmark_time_us=153.56, + ) + sample_tuning_records = [tr0, tr1, tr2] + + headers, rows = candidate_ordering.flatten_records(sample_tuning_records) + + expected_headers = [ + "gen_id", + "candidate_id", + "to_compile", + "compile_status", + "to_benchmark", + "benchmark_device_id", + "benchmark_queue_position", + "benchmark_status", + "baseline_benchmark_time_us", + "benchmark_time_us", + "benchmark_speedup", + "benchmark_rank_order", + "knob.M", + "knob.N", + "knob.K", + "knob.tile_m", + "knob.tile_n", + "knob.tile_k", + "knob.wg_x", + "knob.wg_y", + "knob.wg_z", + "knob.subgroup_m_cnt", + "knob.subgroup_n_cnt", + "knob.intrinsic_mn", + "knob.intrinsic_k", + "knob.subgroup_m", + "knob.subgroup_n", + "knob.subgroup_k", + ] + assert headers == expected_headers + + expected_rows = [ + { + "baseline_benchmark_time_us": None, + "benchmark_device_id": None, + "benchmark_queue_position": None, + "benchmark_rank_order": None, + "benchmark_speedup": None, + "benchmark_status": False, + "benchmark_time_us": None, + "candidate_id": 0, + "compile_status": False, + "gen_id": 0, + "knob": None, + "to_benchmark": True, + "to_compile": True, + }, + { + "baseline_benchmark_time_us": 123.4, + "benchmark_device_id": "hip://2", + "benchmark_queue_position": 1, + "benchmark_rank_order": None, + "benchmark_speedup": 1.5, + "benchmark_status": False, + "benchmark_time_us": None, + "candidate_id": 1, + "compile_status": False, + "gen_id": 2, + "knob.K": 1280, + "knob.M": 2048, + "knob.N": 10240, + "knob.intrinsic_k": 16, + "knob.intrinsic_mn": 16, + "knob.subgroup_k": 0, + "knob.subgroup_m": 0, + "knob.subgroup_m_cnt": 2, + "knob.subgroup_n": 0, + "knob.subgroup_n_cnt": 4, + "knob.tile_k": 16, + "knob.tile_m": 64, + "knob.tile_n": 256, + "knob.wg_x": 256, + "knob.wg_y": 2, + "knob.wg_z": 1, + "to_benchmark": False, + "to_compile": True, + }, + { + "baseline_benchmark_time_us": None, + "benchmark_device_id": None, + "benchmark_queue_position": None, + "benchmark_rank_order": None, + "benchmark_speedup": None, + "benchmark_status": False, + "benchmark_time_us": 153.56, + "candidate_id": 2, + "compile_status": False, + "gen_id": 1, + "knob.K": 1280, + "knob.M": 2048, + "knob.N": 10240, + "knob.intrinsic_k": 16, + "knob.intrinsic_mn": 16, + "knob.subgroup_k": 0, + "knob.subgroup_m": 0, + "knob.subgroup_m_cnt": 1, + "knob.subgroup_n": 0, + "knob.subgroup_n_cnt": 5, + "knob.tile_k": 80, + "knob.tile_m": 64, + "knob.tile_n": 320, + "knob.wg_x": 320, + "knob.wg_y": 1, + "knob.wg_z": 1, + "to_benchmark": True, + "to_compile": False, + }, + ] + assert rows == expected_rows From 39026fbe6308f57ddd49273e97db4617ac4b8732 Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Thu, 13 Nov 2025 18:20:33 +0000 Subject: [PATCH 11/25] Fix flatten function --- sharktuner/sharktuner/candidate_ordering.py | 41 +++++++++++++-------- sharktuner/tests/candidate_ordering_test.py | 1 - 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py index 2fd4c33b673..20698336255 100644 --- a/sharktuner/sharktuner/candidate_ordering.py +++ b/sharktuner/sharktuner/candidate_ordering.py @@ -151,34 +151,43 @@ def flatten_records( tuning_records: list[TuningRecord], ) -> tuple[list[str], list[dict[str, Any]]]: """ - Flatten a list of `TuningRecord` objects to CSV headers and rows + Flatten a list of `TuningRecord` objects into CSV headers and rows. - Each record becomes one CSV row. - - Top-level attributes (e.g., `gen_id`, `benchmark_time_us`) are written as individual columns. - - Nested object (i.e., `knob`) is flattened using dot notation: knob.tile_m, knob.intrinsic_mn + - Top-level attributes (e.g., `gen_id`, `benchmark_time_us`) appear as individual columns. + - Nested objects (e.g., `knob`) are flattened into columns like `knob.M`, `knob.tile_m`. + + The original top-level attribute (e.g., 'knob') is removed once nesting is flattened. """ rows = [] headers = [] + unneeded_headers = [] for tuning_record in tuning_records: row = {} - for k, v in vars(tuning_record).items(): - if hasattr(v, "__dict__"): - nested = vars(v) - if nested: - for nk, nv in nested.items(): - key = f"{k}.{nk}" - row[key] = nv - if key not in headers: - headers.append(key) - else: + for attr, val in vars(tuning_record).items(): + if hasattr(val, "__dict__"): + nested = vars(val) + if not nested: continue + unneeded_headers.append(attr) + for sub_attr, sub_val in nested.items(): + key = f"{attr}.{sub_attr}" + row[key] = sub_val + if key not in headers: + headers.append(key) else: - row[k] = v - if k not in headers and k != "knob": - headers.append(k) + row[attr] = val + if attr not in headers: + headers.append(attr) rows.append(row) + # Remove top-level attributes (e.g., 'knob') that were replaced by flattened nested fields. + headers = [h for h in headers if h not in unneeded_headers] + for row in rows: + for unneeded in unneeded_headers: + row.pop(unneeded, None) + return headers, rows diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py index 065dbe57427..38157e209af 100644 --- a/sharktuner/tests/candidate_ordering_test.py +++ b/sharktuner/tests/candidate_ordering_test.py @@ -265,7 +265,6 @@ def test_flatten_records( "candidate_id": 0, "compile_status": False, "gen_id": 0, - "knob": None, "to_benchmark": True, "to_compile": True, }, From dc2510bdaf3fffb2004c474304ff488b61ecde25 Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Thu, 13 Nov 2025 18:21:49 +0000 Subject: [PATCH 12/25] Remove unneeded import --- sharktuner/sharktuner/candidate_ordering.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py index 20698336255..49669fb1745 100644 --- a/sharktuner/sharktuner/candidate_ordering.py +++ b/sharktuner/sharktuner/candidate_ordering.py @@ -1,6 +1,5 @@ import random import logging -import os import csv from typing import Optional, Any from dataclasses import dataclass From c5eb9be8b6383c12b779d5e2a5fbd25b09b6b6ec Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Thu, 13 Nov 2025 21:31:39 +0000 Subject: [PATCH 13/25] Remove baseline from tuning records --- sharktuner/sharktuner/candidate_ordering.py | 30 +---- sharktuner/sharktuner/libtuner.py | 3 - sharktuner/tests/candidate_ordering_test.py | 126 ++++++++------------ 3 files changed, 55 insertions(+), 104 deletions(-) diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py index 49669fb1745..cc50c017588 100644 --- a/sharktuner/sharktuner/candidate_ordering.py +++ b/sharktuner/sharktuner/candidate_ordering.py @@ -131,9 +131,6 @@ def init_tuning_records( knobs: list[Optional[common.KnobAssignment]], sorted_order: list[int] ) -> list[TuningRecord]: tuning_records: list[TuningRecord] = [] - tuning_records.append( - TuningRecord(gen_id=0, candidate_id=0, to_compile=True, to_benchmark=True) - ) for can_idx, gen_idx in enumerate(sorted_order, start=1): tr = TuningRecord( @@ -155,37 +152,20 @@ def flatten_records( - Each record becomes one CSV row. - Top-level attributes (e.g., `gen_id`, `benchmark_time_us`) appear as individual columns. - Nested objects (e.g., `knob`) are flattened into columns like `knob.M`, `knob.tile_m`. - - The original top-level attribute (e.g., 'knob') is removed once nesting is flattened. """ rows = [] - headers = [] - unneeded_headers = [] - for tuning_record in tuning_records: row = {} for attr, val in vars(tuning_record).items(): - if hasattr(val, "__dict__"): - nested = vars(val) - if not nested: - continue - unneeded_headers.append(attr) - for sub_attr, sub_val in nested.items(): - key = f"{attr}.{sub_attr}" - row[key] = sub_val - if key not in headers: - headers.append(key) + if isinstance(val, common.KnobAssignment): + knob_dict = val.get_knobs() + for k, v in knob_dict.items(): + row[f"{attr}_{k}"] = v else: row[attr] = val - if attr not in headers: - headers.append(attr) rows.append(row) - # Remove top-level attributes (e.g., 'knob') that were replaced by flattened nested fields. - headers = [h for h in headers if h not in unneeded_headers] - for row in rows: - for unneeded in unneeded_headers: - row.pop(unneeded, None) + headers = list(row.keys()) return headers, rows diff --git a/sharktuner/sharktuner/libtuner.py b/sharktuner/sharktuner/libtuner.py index 6c38048d7f2..bf7c0e45690 100644 --- a/sharktuner/sharktuner/libtuner.py +++ b/sharktuner/sharktuner/libtuner.py @@ -1275,7 +1275,6 @@ def benchmark( # Benchmarking baselines on each involved device. baseline_tracker = tuning_client.candidate_trackers[0] - tuning_client.tuning_records[0].to_benchmark = True first_baseline_result, subprocess_timeout_reference = benchmark_baseline( devices=args.devices, tuning_client=tuning_client, @@ -1283,10 +1282,8 @@ def benchmark( ) baseline_handler = BaselineResultHandler() baseline_handler.add_run(first_baseline_result) - tuning_client.tuning_records[0].benchmark_status = True if not baseline_handler.is_valid(): logging.warning("Baseline run failed.") - tuning_client.tuning_records[0].benchmark_status = False if tuning_client.is_auto_iree_benchmark_timeout(): logging.info( diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py index 38157e209af..4e5b505407b 100644 --- a/sharktuner/tests/candidate_ordering_test.py +++ b/sharktuner/tests/candidate_ordering_test.py @@ -162,12 +162,6 @@ def test_reorder_assignments( def test_init_tuning_records( sample_knobs: list[Optional[common.KnobAssignment]], ) -> None: - tr0 = candidate_ordering.TuningRecord( - gen_id=0, - candidate_id=0, - to_compile=True, - to_benchmark=True, - ) tr1 = candidate_ordering.TuningRecord( gen_id=2, candidate_id=1, @@ -186,7 +180,7 @@ def test_init_tuning_records( sorted_order = [2, 0, 1] tuning_records = candidate_ordering.init_tuning_records(sample_knobs, sorted_order) - expected = [tr0, tr1, tr2, tr3] + expected = [tr1, tr2, tr3] assert tuning_records == expected @@ -194,12 +188,6 @@ def test_init_tuning_records( def test_flatten_records( sample_knobs: list[Optional[common.KnobAssignment]], ): - tr0 = candidate_ordering.TuningRecord( - gen_id=0, - candidate_id=0, - to_compile=True, - to_benchmark=True, - ) tr1 = candidate_ordering.TuningRecord( gen_id=2, candidate_id=1, @@ -217,13 +205,29 @@ def test_flatten_records( to_benchmark=True, benchmark_time_us=153.56, ) - sample_tuning_records = [tr0, tr1, tr2] + sample_tuning_records = [tr1, tr2] headers, rows = candidate_ordering.flatten_records(sample_tuning_records) expected_headers = [ "gen_id", "candidate_id", + "knob_M", + "knob_N", + "knob_K", + "knob_tile_m", + "knob_tile_n", + "knob_tile_k", + "knob_wg_x", + "knob_wg_y", + "knob_wg_z", + "knob_subgroup_m_cnt", + "knob_subgroup_n_cnt", + "knob_intrinsic_mn", + "knob_intrinsic_k", + "knob_subgroup_m", + "knob_subgroup_n", + "knob_subgroup_k", "to_compile", "compile_status", "to_benchmark", @@ -234,40 +238,10 @@ def test_flatten_records( "benchmark_time_us", "benchmark_speedup", "benchmark_rank_order", - "knob.M", - "knob.N", - "knob.K", - "knob.tile_m", - "knob.tile_n", - "knob.tile_k", - "knob.wg_x", - "knob.wg_y", - "knob.wg_z", - "knob.subgroup_m_cnt", - "knob.subgroup_n_cnt", - "knob.intrinsic_mn", - "knob.intrinsic_k", - "knob.subgroup_m", - "knob.subgroup_n", - "knob.subgroup_k", ] assert headers == expected_headers expected_rows = [ - { - "baseline_benchmark_time_us": None, - "benchmark_device_id": None, - "benchmark_queue_position": None, - "benchmark_rank_order": None, - "benchmark_speedup": None, - "benchmark_status": False, - "benchmark_time_us": None, - "candidate_id": 0, - "compile_status": False, - "gen_id": 0, - "to_benchmark": True, - "to_compile": True, - }, { "baseline_benchmark_time_us": 123.4, "benchmark_device_id": "hip://2", @@ -279,22 +253,22 @@ def test_flatten_records( "candidate_id": 1, "compile_status": False, "gen_id": 2, - "knob.K": 1280, - "knob.M": 2048, - "knob.N": 10240, - "knob.intrinsic_k": 16, - "knob.intrinsic_mn": 16, - "knob.subgroup_k": 0, - "knob.subgroup_m": 0, - "knob.subgroup_m_cnt": 2, - "knob.subgroup_n": 0, - "knob.subgroup_n_cnt": 4, - "knob.tile_k": 16, - "knob.tile_m": 64, - "knob.tile_n": 256, - "knob.wg_x": 256, - "knob.wg_y": 2, - "knob.wg_z": 1, + "knob_K": 1280, + "knob_M": 2048, + "knob_N": 10240, + "knob_intrinsic_k": 16, + "knob_intrinsic_mn": 16, + "knob_subgroup_k": 0, + "knob_subgroup_m": 0, + "knob_subgroup_m_cnt": 2, + "knob_subgroup_n": 0, + "knob_subgroup_n_cnt": 4, + "knob_tile_k": 16, + "knob_tile_m": 64, + "knob_tile_n": 256, + "knob_wg_x": 256, + "knob_wg_y": 2, + "knob_wg_z": 1, "to_benchmark": False, "to_compile": True, }, @@ -309,22 +283,22 @@ def test_flatten_records( "candidate_id": 2, "compile_status": False, "gen_id": 1, - "knob.K": 1280, - "knob.M": 2048, - "knob.N": 10240, - "knob.intrinsic_k": 16, - "knob.intrinsic_mn": 16, - "knob.subgroup_k": 0, - "knob.subgroup_m": 0, - "knob.subgroup_m_cnt": 1, - "knob.subgroup_n": 0, - "knob.subgroup_n_cnt": 5, - "knob.tile_k": 80, - "knob.tile_m": 64, - "knob.tile_n": 320, - "knob.wg_x": 320, - "knob.wg_y": 1, - "knob.wg_z": 1, + "knob_K": 1280, + "knob_M": 2048, + "knob_N": 10240, + "knob_intrinsic_k": 16, + "knob_intrinsic_mn": 16, + "knob_subgroup_k": 0, + "knob_subgroup_m": 0, + "knob_subgroup_m_cnt": 1, + "knob_subgroup_n": 0, + "knob_subgroup_n_cnt": 5, + "knob_tile_k": 80, + "knob_tile_m": 64, + "knob_tile_n": 320, + "knob_wg_x": 320, + "knob_wg_y": 1, + "knob_wg_z": 1, "to_benchmark": True, "to_compile": False, }, From b9aea37882ad00e789a1573255b77b6036a5c44c Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Thu, 13 Nov 2025 21:56:29 +0000 Subject: [PATCH 14/25] Rename output csv filename --- sharktuner/dispatch_tuner/dispatch_tuner.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sharktuner/dispatch_tuner/dispatch_tuner.py b/sharktuner/dispatch_tuner/dispatch_tuner.py index a2a344d35fb..ac159da392e 100644 --- a/sharktuner/dispatch_tuner/dispatch_tuner.py +++ b/sharktuner/dispatch_tuner/dispatch_tuner.py @@ -160,8 +160,9 @@ def main() -> None: print("Check the summary in:") print(summary_log_file.resolve()) - output_csv_name = f"tuning_{args.dispatch_file.stem}.csv" - csv_path = Path(path_config.base_dir / output_csv_name) + output_csv_name = f"{args.dispatch_file.stem}_candidate_analysis.csv" + csv_path = Path(path_config.base_dir) / output_csv_name + libtuner.candidate_ordering.export_record_to_csv( dispatch_tuner.tuning_records, csv_path ) From 98128a26d68a559bcf095d3d34f3457d5a68c128 Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Thu, 13 Nov 2025 22:12:21 +0000 Subject: [PATCH 15/25] Add docstr --- sharktuner/sharktuner/candidate_ordering.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py index cc50c017588..781eb84c2ae 100644 --- a/sharktuner/sharktuner/candidate_ordering.py +++ b/sharktuner/sharktuner/candidate_ordering.py @@ -112,6 +112,12 @@ def reorder_assignments( @dataclass class TuningRecord: + """ + Records a candidate's knob configuration and tuning results. Used for analyzing the + candidate search space and evaluating the effectiveness of heuristics used for + candidate ordering. + """ + gen_id: int # Original index from candidate generation. candidate_id: int # Index in candidate_trackers after sorting. knob: Optional[common.KnobAssignment] = None From 0c3ec6506a2b9b51ce6afd6a22672f5090d52e35 Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Thu, 13 Nov 2025 22:17:38 +0000 Subject: [PATCH 16/25] Rename var --- sharktuner/sharktuner/candidate_ordering.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py index 781eb84c2ae..66b681c4dc4 100644 --- a/sharktuner/sharktuner/candidate_ordering.py +++ b/sharktuner/sharktuner/candidate_ordering.py @@ -138,11 +138,11 @@ def init_tuning_records( ) -> list[TuningRecord]: tuning_records: list[TuningRecord] = [] - for can_idx, gen_idx in enumerate(sorted_order, start=1): + for sorted_position, original_gen_index in enumerate(sorted_order, start=1): tr = TuningRecord( - gen_id=gen_idx, - candidate_id=can_idx, - knob=knobs[gen_idx], + gen_id=original_gen_index, + candidate_id=sorted_position, + knob=knobs[original_gen_index], ) tuning_records.append(tr) From 51be60fd65087974ab30aa21cc656a48fb756d2d Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Thu, 13 Nov 2025 22:18:03 +0000 Subject: [PATCH 17/25] Assert empty list --- sharktuner/sharktuner/candidate_ordering.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py index 66b681c4dc4..12e896c9555 100644 --- a/sharktuner/sharktuner/candidate_ordering.py +++ b/sharktuner/sharktuner/candidate_ordering.py @@ -177,8 +177,7 @@ def flatten_records( def export_record_to_csv(tuning_records: list[TuningRecord], dest_file: Path) -> None: - if not tuning_records: - return None + assert tuning_records headers, rows = flatten_records(tuning_records) From 68d543cda6ce5eeb33a51e49ea5af4e7cca9b2a7 Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Thu, 13 Nov 2025 22:20:36 +0000 Subject: [PATCH 18/25] Store var val --- sharktuner/sharktuner/libtuner.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/sharktuner/sharktuner/libtuner.py b/sharktuner/sharktuner/libtuner.py index bf7c0e45690..5334a444646 100644 --- a/sharktuner/sharktuner/libtuner.py +++ b/sharktuner/sharktuner/libtuner.py @@ -1303,15 +1303,13 @@ def benchmark( ) for res in candidate_results: - tuning_client.tuning_records[ - res.candidate_id - ].benchmark_device_id = res.device_id - if res.time == math.inf: + c_id = res.candidate_id + res_time = res.time + tuning_client.tuning_records[c_id].benchmark_device_id = res.device_id + if res_time == math.inf: continue - tuning_client.tuning_records[res.candidate_id].benchmark_status = True - tuning_client.tuning_records[res.candidate_id].benchmark_time_us = round( - res.time, 2 - ) + tuning_client.tuning_records[c_id].benchmark_status = True + tuning_client.tuning_records[c_id].benchmark_time_us = round(res_time, 2) second_baseline_result, _ = benchmark_baseline( devices=args.devices, From 03033e1683a1255a24ba07d662c6487d30bad47d Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Thu, 13 Nov 2025 22:22:21 +0000 Subject: [PATCH 19/25] Small fix --- sharktuner/tests/candidate_ordering_test.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py index 4e5b505407b..e2dc4456e7d 100644 --- a/sharktuner/tests/candidate_ordering_test.py +++ b/sharktuner/tests/candidate_ordering_test.py @@ -180,9 +180,7 @@ def test_init_tuning_records( sorted_order = [2, 0, 1] tuning_records = candidate_ordering.init_tuning_records(sample_knobs, sorted_order) - expected = [tr1, tr2, tr3] - - assert tuning_records == expected + assert tuning_records == [tr1, tr2, tr3] def test_flatten_records( From 6393140bd89be0b2e020e8957f3085ba3bf5d693 Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Thu, 13 Nov 2025 22:26:34 +0000 Subject: [PATCH 20/25] Rename func --- sharktuner/sharktuner/candidate_ordering.py | 2 +- sharktuner/sharktuner/libtuner.py | 4 ++-- sharktuner/tests/candidate_ordering_test.py | 6 ++++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py index 12e896c9555..ee1ea43cdcb 100644 --- a/sharktuner/sharktuner/candidate_ordering.py +++ b/sharktuner/sharktuner/candidate_ordering.py @@ -133,7 +133,7 @@ class TuningRecord: benchmark_rank_order: Optional[int] = None -def init_tuning_records( +def build_tuning_records_from_order( knobs: list[Optional[common.KnobAssignment]], sorted_order: list[int] ) -> list[TuningRecord]: tuning_records: list[TuningRecord] = [] diff --git a/sharktuner/sharktuner/libtuner.py b/sharktuner/sharktuner/libtuner.py index 5334a444646..a769247b265 100644 --- a/sharktuner/sharktuner/libtuner.py +++ b/sharktuner/sharktuner/libtuner.py @@ -846,8 +846,8 @@ def generate_candidate_specs( # Total number of configs = candidates generated + baseline. assert len(config_specs) == len(solutions) + 1 - tuning_client.tuning_records = candidate_ordering.init_tuning_records( - knobs, sorted_order + tuning_client.tuning_records = ( + candidate_ordering.build_tuning_records_from_order(knobs, sorted_order) ) knob_assignments = [dispatch_tuner.get_knob_assignment(s) for s in solutions] diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py index e2dc4456e7d..59d464f0adb 100644 --- a/sharktuner/tests/candidate_ordering_test.py +++ b/sharktuner/tests/candidate_ordering_test.py @@ -159,7 +159,7 @@ def test_reorder_assignments( ) -def test_init_tuning_records( +def test_build_tuning_records_from_order( sample_knobs: list[Optional[common.KnobAssignment]], ) -> None: tr1 = candidate_ordering.TuningRecord( @@ -178,7 +178,9 @@ def test_init_tuning_records( knob=sample_knobs[1], ) sorted_order = [2, 0, 1] - tuning_records = candidate_ordering.init_tuning_records(sample_knobs, sorted_order) + tuning_records = candidate_ordering.build_tuning_records_from_order( + sample_knobs, sorted_order + ) assert tuning_records == [tr1, tr2, tr3] From bf9a64a4e562c7e92d0762896198e2b577261503 Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Mon, 17 Nov 2025 20:11:18 +0000 Subject: [PATCH 21/25] Add comments --- sharktuner/sharktuner/candidate_ordering.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py index ee1ea43cdcb..a0e5fdcd91d 100644 --- a/sharktuner/sharktuner/candidate_ordering.py +++ b/sharktuner/sharktuner/candidate_ordering.py @@ -113,9 +113,10 @@ def reorder_assignments( @dataclass class TuningRecord: """ - Records a candidate's knob configuration and tuning results. Used for analyzing the - candidate search space and evaluating the effectiveness of heuristics used for - candidate ordering. + Records a candidate's knob configuration and tuning results. + + Used to analyze the candidate search space and to evaluate the + effectiveness of candidate ordering heuristics. """ gen_id: int # Original index from candidate generation. @@ -137,7 +138,7 @@ def build_tuning_records_from_order( knobs: list[Optional[common.KnobAssignment]], sorted_order: list[int] ) -> list[TuningRecord]: tuning_records: list[TuningRecord] = [] - + # candidate_id = 0 is the baseline and is not included in tuning_records. for sorted_position, original_gen_index in enumerate(sorted_order, start=1): tr = TuningRecord( gen_id=original_gen_index, From 4beab33faa486d57a76e85b1d2b9abcc00b151cb Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Mon, 17 Nov 2025 21:42:01 +0000 Subject: [PATCH 22/25] Remove redundant test part --- sharktuner/tests/candidate_ordering_test.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py index 59d464f0adb..0eb4690d0d6 100644 --- a/sharktuner/tests/candidate_ordering_test.py +++ b/sharktuner/tests/candidate_ordering_test.py @@ -241,17 +241,13 @@ def test_flatten_records( ] assert headers == expected_headers - expected_rows = [ + expected_key_rows = [ { "baseline_benchmark_time_us": 123.4, "benchmark_device_id": "hip://2", "benchmark_queue_position": 1, - "benchmark_rank_order": None, "benchmark_speedup": 1.5, - "benchmark_status": False, - "benchmark_time_us": None, "candidate_id": 1, - "compile_status": False, "gen_id": 2, "knob_K": 1280, "knob_M": 2048, @@ -269,19 +265,11 @@ def test_flatten_records( "knob_wg_x": 256, "knob_wg_y": 2, "knob_wg_z": 1, - "to_benchmark": False, "to_compile": True, }, { - "baseline_benchmark_time_us": None, - "benchmark_device_id": None, - "benchmark_queue_position": None, - "benchmark_rank_order": None, - "benchmark_speedup": None, - "benchmark_status": False, "benchmark_time_us": 153.56, "candidate_id": 2, - "compile_status": False, "gen_id": 1, "knob_K": 1280, "knob_M": 2048, @@ -300,7 +288,9 @@ def test_flatten_records( "knob_wg_y": 1, "knob_wg_z": 1, "to_benchmark": True, - "to_compile": False, }, ] - assert rows == expected_rows + + for expected_key_row, actual_row in zip(expected_key_rows, rows): + for attr, val in expected_key_row.items(): + assert actual_row[attr] == val From 1fcb9549d43cc5768d5dd9b7b8fb36172504ac2b Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Mon, 17 Nov 2025 22:08:35 +0000 Subject: [PATCH 23/25] Fix mypy --- sharktuner/tests/candidate_ordering_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py index 0eb4690d0d6..a7c136d17a8 100644 --- a/sharktuner/tests/candidate_ordering_test.py +++ b/sharktuner/tests/candidate_ordering_test.py @@ -241,7 +241,7 @@ def test_flatten_records( ] assert headers == expected_headers - expected_key_rows = [ + expected_key_rows: list[dict] = [ { "baseline_benchmark_time_us": 123.4, "benchmark_device_id": "hip://2", From f34cc44b9bdd7768ca2cd5ec05dd2b1cac77e69b Mon Sep 17 00:00:00 2001 From: RattataKing <46631728+RattataKing@users.noreply.github.com> Date: Tue, 18 Nov 2025 11:27:52 -0500 Subject: [PATCH 24/25] Fix comments Co-authored-by: Jakub Kuderski --- sharktuner/sharktuner/candidate_ordering.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py index a0e5fdcd91d..849ef935cb7 100644 --- a/sharktuner/sharktuner/candidate_ordering.py +++ b/sharktuner/sharktuner/candidate_ordering.py @@ -120,7 +120,7 @@ class TuningRecord: """ gen_id: int # Original index from candidate generation. - candidate_id: int # Index in candidate_trackers after sorting. + candidate_id: int # Index in candidate_trackers after reordering. knob: Optional[common.KnobAssignment] = None to_compile: bool = False compile_status: bool = False From 7533307a88716ef6368a3ec95d5ec30f06c22a60 Mon Sep 17 00:00:00 2001 From: Amily Wu Date: Tue, 18 Nov 2025 18:11:49 +0000 Subject: [PATCH 25/25] Fix code --- sharktuner/sharktuner/candidate_ordering.py | 9 +++--- sharktuner/sharktuner/libtuner.py | 21 +++++++------ sharktuner/tests/candidate_ordering_test.py | 34 +-------------------- 3 files changed, 16 insertions(+), 48 deletions(-) diff --git a/sharktuner/sharktuner/candidate_ordering.py b/sharktuner/sharktuner/candidate_ordering.py index 849ef935cb7..7a20020df45 100644 --- a/sharktuner/sharktuner/candidate_ordering.py +++ b/sharktuner/sharktuner/candidate_ordering.py @@ -152,7 +152,7 @@ def build_tuning_records_from_order( def flatten_records( tuning_records: list[TuningRecord], -) -> tuple[list[str], list[dict[str, Any]]]: +) -> list[dict[str, Any]]: """ Flatten a list of `TuningRecord` objects into CSV headers and rows. @@ -172,15 +172,14 @@ def flatten_records( row[attr] = val rows.append(row) - headers = list(row.keys()) - - return headers, rows + return rows def export_record_to_csv(tuning_records: list[TuningRecord], dest_file: Path) -> None: assert tuning_records - headers, rows = flatten_records(tuning_records) + rows = flatten_records(tuning_records) + headers = list(rows[0].keys()) with open(dest_file, "w", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=headers) diff --git a/sharktuner/sharktuner/libtuner.py b/sharktuner/sharktuner/libtuner.py index a769247b265..6b1a3ddd6a6 100644 --- a/sharktuner/sharktuner/libtuner.py +++ b/sharktuner/sharktuner/libtuner.py @@ -1334,16 +1334,17 @@ def benchmark( candidate_results, prune_slow_candidates=tuning_client.should_prune_slower_candidates(), ) - if all_candidates_with_speedup: - for i, handler_res in enumerate(all_candidates_with_speedup, start=1): - benchmark_res, speedup = handler_res - cid, _, device_id = benchmark_res - baseline_res = baseline_handler.get_average_result_us(device_id) - tuning_client.tuning_records[cid].baseline_benchmark_time_us = ( - round(baseline_res, 2) if baseline_res else None - ) - tuning_client.tuning_records[cid].benchmark_speedup = round(speedup, 5) - tuning_client.tuning_records[cid].benchmark_rank_order = i + + # Best candidate gets rank 1. + for i, handler_res in enumerate(all_candidates_with_speedup, start=1): + benchmark_res, speedup = handler_res + cid, _, device_id = benchmark_res + baseline_res = baseline_handler.get_average_result_us(device_id) + tuning_client.tuning_records[cid].baseline_benchmark_time_us = ( + round(baseline_res, 2) if baseline_res else None + ) + tuning_client.tuning_records[cid].benchmark_speedup = round(speedup, 5) + tuning_client.tuning_records[cid].benchmark_rank_order = i top_candidates_with_speedup = ( all_candidates_with_speedup[:num_candidates] diff --git a/sharktuner/tests/candidate_ordering_test.py b/sharktuner/tests/candidate_ordering_test.py index a7c136d17a8..fc9654b315f 100644 --- a/sharktuner/tests/candidate_ordering_test.py +++ b/sharktuner/tests/candidate_ordering_test.py @@ -207,39 +207,7 @@ def test_flatten_records( ) sample_tuning_records = [tr1, tr2] - headers, rows = candidate_ordering.flatten_records(sample_tuning_records) - - expected_headers = [ - "gen_id", - "candidate_id", - "knob_M", - "knob_N", - "knob_K", - "knob_tile_m", - "knob_tile_n", - "knob_tile_k", - "knob_wg_x", - "knob_wg_y", - "knob_wg_z", - "knob_subgroup_m_cnt", - "knob_subgroup_n_cnt", - "knob_intrinsic_mn", - "knob_intrinsic_k", - "knob_subgroup_m", - "knob_subgroup_n", - "knob_subgroup_k", - "to_compile", - "compile_status", - "to_benchmark", - "benchmark_device_id", - "benchmark_queue_position", - "benchmark_status", - "baseline_benchmark_time_us", - "benchmark_time_us", - "benchmark_speedup", - "benchmark_rank_order", - ] - assert headers == expected_headers + rows = candidate_ordering.flatten_records(sample_tuning_records) expected_key_rows: list[dict] = [ {