|
6 | 6 |
|
7 | 7 | import itertools
|
8 | 8 | import os
|
| 9 | +import sys |
9 | 10 |
|
10 | 11 | from dataclasses import dataclass
|
11 | 12 | from datetime import datetime
|
|
19 | 20 | import pandas as pd
|
20 | 21 | import seaborn as sns
|
21 | 22 | import torch
|
| 23 | +from tabulate import tabulate |
22 | 24 |
|
23 | 25 | try:
|
24 | 26 | from accelerators.utils.torch_profiler import profiler_or_nullcontext
|
@@ -401,6 +403,16 @@ def collect_kernels_to_profile(kernels: Optional[List[str]]) -> List[QuantizeOpB
|
401 | 403 | return [op for op in quantize_ops if op.name in kernels]
|
402 | 404 |
|
403 | 405 |
|
| 406 | +def print_kernels(kernels: Optional[List[str]]) -> List[QuantizeOpBase]: |
| 407 | + data = sorted( |
| 408 | + [ |
| 409 | + (op.name, "Yes" if op.cuda else "No", "Yes" if op.hip else "No") |
| 410 | + for op in get_quantize_ops() |
| 411 | + ] |
| 412 | + ) |
| 413 | + print(tabulate(data, headers=["Name", "CUDA", "ROCm"], tablefmt="orgtbl")) |
| 414 | + |
| 415 | + |
404 | 416 | @click.command()
|
405 | 417 | @click.option(
|
406 | 418 | "--output-dir",
|
@@ -542,12 +554,13 @@ def invoke_main(
|
542 | 554 | if enable_amd_env_vars:
|
543 | 555 | set_amd_env_vars()
|
544 | 556 | # If kernel filter is provided, parse it. Else, benchmark all kernels.
|
545 |
| - quantize_ops = collect_kernels_to_profile( |
546 |
| - kernels.strip().split(",") if kernels else None |
547 |
| - ) |
| 557 | + all_kernels = kernels.strip().split(",") if kernels else None |
| 558 | + quantize_ops = collect_kernels_to_profile(all_kernels) |
548 | 559 |
|
549 | 560 | if len(quantize_ops) == 0:
|
550 |
| - raise Exception("No valid kernels to benchmark.") |
| 561 | + print("No valid kernels to benchmark. Available kernels:") |
| 562 | + print_kernels(all_kernels) |
| 563 | + sys.exit(1) |
551 | 564 |
|
552 | 565 | if num_iters < 1:
|
553 | 566 | print("Warning: Number of iterations must be at least 1.")
|
|
0 commit comments