Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions sharktuner/dispatch_tuner/dispatch_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,11 @@ def main() -> None:
print(path_config.run_log.resolve())
print("Check the summary in:")
print(summary_log_file.resolve())

output_csv_name = f"{args.dispatch_file.stem}_candidate_analysis.csv"
csv_path = Path(path_config.base_dir) / output_csv_name

libtuner.candidate_ordering.export_record_to_csv(
dispatch_tuner.tuning_records, csv_path
)
print(f"Wrote tuning records CSV: {csv_path}")
85 changes: 83 additions & 2 deletions sharktuner/sharktuner/candidate_ordering.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from enum import Enum
from typing import Optional, Callable
import random
import logging
import csv
from typing import Optional, Any
from dataclasses import dataclass
from pathlib import Path
from enum import Enum
from typing import Optional, Callable

from iree.compiler.dialects import iree_gpu # type: ignore

Expand Down Expand Up @@ -104,3 +108,80 @@ def reorder_assignments(
return indices
case _:
assert False


@dataclass
class TuningRecord:
"""
Records a candidate's knob configuration and tuning results.

Used to analyze the candidate search space and to evaluate the
effectiveness of candidate ordering heuristics.
"""

gen_id: int # Original index from candidate generation.
candidate_id: int # Index in candidate_trackers after reordering.
knob: Optional[common.KnobAssignment] = None
to_compile: bool = False
compile_status: bool = False
to_benchmark: bool = False
benchmark_device_id: Optional[str] = None
benchmark_queue_position: Optional[int] = None
benchmark_status: bool = False
baseline_benchmark_time_us: Optional[float] = None
benchmark_time_us: Optional[float] = None
benchmark_speedup: Optional[float] = None
benchmark_rank_order: Optional[int] = None


def build_tuning_records_from_order(
knobs: list[Optional[common.KnobAssignment]], sorted_order: list[int]
) -> list[TuningRecord]:
tuning_records: list[TuningRecord] = []
# candidate_id = 0 is the baseline and is not included in tuning_records.
for sorted_position, original_gen_index in enumerate(sorted_order, start=1):
tr = TuningRecord(
gen_id=original_gen_index,
candidate_id=sorted_position,
knob=knobs[original_gen_index],
)
tuning_records.append(tr)

return tuning_records


def flatten_records(
tuning_records: list[TuningRecord],
) -> list[dict[str, Any]]:
"""
Flatten a list of `TuningRecord` objects into CSV headers and rows.

- Each record becomes one CSV row.
- Top-level attributes (e.g., `gen_id`, `benchmark_time_us`) appear as individual columns.
- Nested objects (e.g., `knob`) are flattened into columns like `knob.M`, `knob.tile_m`.
"""
rows = []
for tuning_record in tuning_records:
row = {}
for attr, val in vars(tuning_record).items():
if isinstance(val, common.KnobAssignment):
knob_dict = val.get_knobs()
for k, v in knob_dict.items():
row[f"{attr}_{k}"] = v
else:
row[attr] = val
rows.append(row)

return rows


def export_record_to_csv(tuning_records: list[TuningRecord], dest_file: Path) -> None:
assert tuning_records

rows = flatten_records(tuning_records)
headers = list(rows[0].keys())

with open(dest_file, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=headers)
writer.writeheader()
writer.writerows(rows)
31 changes: 31 additions & 0 deletions sharktuner/sharktuner/libtuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ def __init__(self, tuner_context: common.TunerContext):
self.tuner_context = tuner_context
self.candidate_trackers: list[CandidateTracker] = []
self.target_info: Optional[iree_gpu.TargetInfo] = None
self.tuning_records: list[candidate_ordering.TuningRecord] = []

@abstractmethod
def get_iree_compile_flags(self) -> list[str]:
Expand Down Expand Up @@ -845,6 +846,10 @@ def generate_candidate_specs(
# Total number of configs = candidates generated + baseline.
assert len(config_specs) == len(solutions) + 1

tuning_client.tuning_records = (
candidate_ordering.build_tuning_records_from_order(knobs, sorted_order)
)

knob_assignments = [dispatch_tuner.get_knob_assignment(s) for s in solutions]
logging.debug("candidate_gen.py ends")
handle_error(
Expand Down Expand Up @@ -1193,6 +1198,7 @@ def compile(
# Set the source and output file paths for compilation of each candidate.
path_config.compiled_dir.mkdir(parents=True, exist_ok=True)
for i in candidates:
tuning_client.tuning_records[i].to_compile = True
vmfb_file_name = path_config.get_candidate_vmfb_filename(
tuning_client.candidate_trackers[i].candidate_id
)
Expand Down Expand Up @@ -1231,6 +1237,7 @@ def compile(
# Remove duplicate vmfbs from the candidate list.
compiled_candidate_hashes = []
for candidate_id in compiled_candidates:
tuning_client.tuning_records[candidate_id].compile_status = True
candidate_vmfb = tuning_client.candidate_trackers[
candidate_id
].compiled_vmfb_path
Expand Down Expand Up @@ -1283,6 +1290,9 @@ def benchmark(
f"Smart candidate benchmark timeout is set to {subprocess_timeout_reference:.2f}s"
)
candidate_indices = [i for i in compiled_candidates if i != 0]
for i, idx in enumerate(candidate_indices, start=1):
tuning_client.tuning_records[idx].benchmark_queue_position = i
tuning_client.tuning_records[idx].to_benchmark = True

candidate_results = benchmark_candidates(
candidate_indices=candidate_indices,
Expand All @@ -1292,6 +1302,15 @@ def benchmark(
benchmark_time=benchmark_time, # Only candidate benchmark has time limit.
)

for res in candidate_results:
c_id = res.candidate_id
res_time = res.time
tuning_client.tuning_records[c_id].benchmark_device_id = res.device_id
if res_time == math.inf:
continue
tuning_client.tuning_records[c_id].benchmark_status = True
tuning_client.tuning_records[c_id].benchmark_time_us = round(res_time, 2)

second_baseline_result, _ = benchmark_baseline(
devices=args.devices,
tuning_client=tuning_client,
Expand All @@ -1315,6 +1334,18 @@ def benchmark(
candidate_results,
prune_slow_candidates=tuning_client.should_prune_slower_candidates(),
)

# Best candidate gets rank 1.
for i, handler_res in enumerate(all_candidates_with_speedup, start=1):
benchmark_res, speedup = handler_res
cid, _, device_id = benchmark_res
baseline_res = baseline_handler.get_average_result_us(device_id)
tuning_client.tuning_records[cid].baseline_benchmark_time_us = (
round(baseline_res, 2) if baseline_res else None
)
tuning_client.tuning_records[cid].benchmark_speedup = round(speedup, 5)
tuning_client.tuning_records[cid].benchmark_rank_order = i

top_candidates_with_speedup = (
all_candidates_with_speedup[:num_candidates]
if num_candidates
Expand Down
Loading
Loading