Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions examples/configs/beam_search.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,19 @@
# Usage:
# python examples/run_opt_manager.py \
# --kernel-dir examples/optimize_01_matvec \
# --config examples/configs/beam_search.yaml
# --strategy beam_search

strategy: beam_search
num_workers: 4
max_rounds: 10
strategy_config:
num_top_kernels: 2
num_bottlenecks: 2
openai_model: gpt-5
openai_model: claude-opus-4.5
high_reasoning_effort: true

# Worker configuration
benchmark_warmup: 25
benchmark_repeat: 100
divergence_threshold: 50.0
target_platform: cuda
gpu_name: "NVIDIA H100 NVL 94GB"
10 changes: 8 additions & 2 deletions examples/configs/greedy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,18 @@
# Usage:
# python examples/run_opt_manager.py \
# --kernel-dir examples/optimize_01_matvec \
# --config examples/configs/greedy.yaml
# --strategy greedy

strategy: greedy
num_workers: 1
max_rounds: 20
strategy_config:
max_no_improvement: 5
openai_model: gpt-5
high_reasoning_effort: true

# Worker configuration
benchmark_warmup: 25
benchmark_repeat: 100
divergence_threshold: 50.0
target_platform: cuda
gpu_name: "NVIDIA H100 NVL 94GB"
24 changes: 24 additions & 0 deletions examples/configs/noop.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# No-op platform config — dry-run without GPU hardware.
#
# The pipeline runs through all stages but skips verification,
# benchmarking, profiling, and worker optimisation. The initial
# kernel is returned unchanged.
#
# Usage:
# python examples/run_opt_manager.py \
# --kernel-dir examples/optimize_01_matvec \
# --strategy noop

strategy: greedy
num_workers: 1
strategy_config:
max_no_improvement: 1

# Worker configuration
benchmark_warmup: 25
benchmark_repeat: 100
divergence_threshold: 50.0
target_platform: cuda
gpu_name: "NVIDIA H100 NVL 94GB"

platform: noop
8 changes: 7 additions & 1 deletion examples/configs/nvidia.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,19 @@

strategy: beam_search
num_workers: 4
max_rounds: 10
strategy_config:
num_top_kernels: 2
num_bottlenecks: 2
openai_model: gpt-5
high_reasoning_effort: true

# Worker configuration
benchmark_warmup: 25
benchmark_repeat: 100
divergence_threshold: 50.0
target_platform: cuda
gpu_name: "NVIDIA H100 NVL 94GB"

platform:
# Manager-level components
verifier: nvidia
Expand Down
193 changes: 41 additions & 152 deletions examples/run_opt_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
using different search strategies:
- beam_search: Maintain top-N kernels, explore M bottlenecks each
- greedy: Simple single-best optimization with early termination
- noop: Dry-run without GPU hardware (returns initial kernel unchanged)

The OptimizationManager orchestrates parallel workers and persists optimization
history to a JSON database for analysis and resumption.
Expand All @@ -35,107 +36,36 @@
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
load_dotenv()

# Hardcoded config directory relative to this script.
_CONFIGS_DIR = Path(__file__).resolve().parent / "configs"

def run_beam_search_optimization(
kernel_code: str,
problem_file: Path,
test_code: str,
log_dir: Path,
max_rounds: int = 5,
) -> dict:
"""
Run optimization using beam search strategy.

Beam search maintains top-N kernels and explores M bottleneck directions
for each, giving N×M parallel workers per round.

Args:
kernel_code: Initial kernel source code
problem_file: Path to problem.py
test_code: Test code for verification
log_dir: Directory for logs and artifacts
max_rounds: Maximum optimization rounds

Returns:
Optimization result dict
"""
print("\n" + "=" * 80)
print("BEAM SEARCH OPTIMIZATION")
print("=" * 80)
# Available strategies and their config files.
_STRATEGIES = ["beam_search", "greedy", "noop", "nvidia"]

manager = OptimizationManager(
strategy="beam_search",
num_workers=4, # 2 top kernels × 2 bottlenecks
max_rounds=max_rounds,
log_dir=log_dir / "beam_search",
database_path=log_dir / "beam_search" / "program_db.json",
strategy_config={
"num_top_kernels": 2, # Keep top 2 kernels in beam
"num_bottlenecks": 2, # Explore 2 bottleneck directions each
},
openai_model="gpt-5",
high_reasoning_effort=True,
# Worker configuration
benchmark_warmup=25,
benchmark_repeat=100,
divergence_threshold=50.0,
target_platform="cuda",
gpu_name="NVIDIA H100 NVL 94GB",
)

return manager.run_optimization(
initial_kernel=kernel_code,
problem_file=problem_file,
test_code=test_code,
max_rounds=max_rounds,
)


def run_greedy_optimization(
def _run_strategy(
strategy: str,
kernel_code: str,
problem_file: Path,
test_code: str,
log_dir: Path,
max_rounds: int = 10,
max_rounds: int | None = None,
) -> dict:
"""
Run optimization using greedy strategy.

Greedy strategy always optimizes from the current best kernel
with a single worker. Terminates early if no improvement for
several consecutive rounds.

Args:
kernel_code: Initial kernel source code
problem_file: Path to problem.py
test_code: Test code for verification
log_dir: Directory for logs and artifacts
max_rounds: Maximum optimization rounds
"""Run a single strategy using its config file."""
config_path = _CONFIGS_DIR / f"{strategy}.yaml"
if not config_path.exists():
print(f"ERROR: config not found: {config_path}")
sys.exit(1)

Returns:
Optimization result dict
"""
print("\n" + "=" * 80)
print("GREEDY OPTIMIZATION")
print(f"{strategy.upper()} OPTIMIZATION")
print("=" * 80)
print(f"Config: {config_path}")

manager = OptimizationManager(
strategy="greedy",
num_workers=1, # Single worker
max_rounds=max_rounds,
log_dir=log_dir / "greedy",
database_path=log_dir / "greedy" / "program_db.json",
strategy_config={
"max_no_improvement": 5, # Early stop after 5 rounds without improvement
},
openai_model="gpt-5",
high_reasoning_effort=True,
# Worker configuration
benchmark_warmup=25,
benchmark_repeat=100,
divergence_threshold=50.0,
target_platform="cuda",
gpu_name="NVIDIA H100 NVL 94GB",
config=str(config_path),
log_dir=log_dir / strategy,
database_path=log_dir / strategy / "program_db.json",
)

return manager.run_optimization(
Expand Down Expand Up @@ -181,16 +111,9 @@ def main():
parser = argparse.ArgumentParser(
description="Optimize Triton kernels using different search strategies"
)
parser.add_argument(
"--config",
type=Path,
default=None,
help="Path to YAML config file (e.g. examples/configs/beam_search.yaml). "
"Overrides --strategy when provided.",
)
parser.add_argument(
"--strategy",
choices=["beam_search", "greedy", "all"],
choices=_STRATEGIES + ["all"],
default="beam_search",
help="Optimization strategy to use (default: beam_search)",
)
Expand Down Expand Up @@ -239,71 +162,37 @@ def main():
print(f"Log directory: {log_dir}")

# Run selected strategy
if args.config:
# ── Config-driven construction ──────────────────────────
print(f"\nUsing config: {args.config}")
manager = OptimizationManager(
config=str(args.config),
log_dir=log_dir / "configured",
database_path=log_dir / "configured" / "program_db.json",
)
result = manager.run_optimization(
initial_kernel=kernel_code,
problem_file=problem_file,
test_code=test_code,
)
print_result(result, "CONFIGURED", kernel_dir)

elif args.strategy == "beam_search":
result = run_beam_search_optimization(
kernel_code,
problem_file,
test_code,
log_dir,
args.max_rounds,
)
print_result(result, "BEAM_SEARCH", kernel_dir)

elif args.strategy == "greedy":
result = run_greedy_optimization(
kernel_code,
problem_file,
test_code,
log_dir,
args.max_rounds,
)
print_result(result, "GREEDY", kernel_dir)

elif args.strategy == "all":
# Run all strategies and compare
if args.strategy == "all":
results = {}

results["beam_search"] = run_beam_search_optimization(
kernel_code,
problem_file,
test_code,
log_dir,
args.max_rounds,
)
print_result(results["beam_search"], "BEAM_SEARCH", kernel_dir)

results["greedy"] = run_greedy_optimization(
kernel_code,
problem_file,
test_code,
log_dir,
args.max_rounds,
)
print_result(results["greedy"], "GREEDY", kernel_dir)
for strategy in _STRATEGIES:
results[strategy] = _run_strategy(
strategy,
kernel_code,
problem_file,
test_code,
log_dir,
max_rounds=args.max_rounds,
)
print_result(results[strategy], strategy.upper(), kernel_dir)

# Compare results
print("\n" + "=" * 80)
print("STRATEGY COMPARISON")
print("=" * 80)
for name, result in results.items():
status = "" if result["success"] else ""
status = "+" if result["success"] else "-"
time_str = f"{result['best_time_ms']:.4f}ms" if result["success"] else "N/A"
print(f" {status} {name:15} - Best: {time_str}")
else:
result = _run_strategy(
args.strategy,
kernel_code,
problem_file,
test_code,
log_dir,
max_rounds=args.max_rounds,
)
print_result(result, args.strategy.upper(), kernel_dir)


if __name__ == "__main__":
Expand Down
Loading