gemmbench: Generate benchmarks from supplied dtypes instead of filtering (#76)

andfau-amd · web-flow · commit af062d3889c5 · 2025-04-24T12:05:48.000+02:00
Prior to this commit, gemmbench's problems.py hardcoded a set of
datatypes for each benchmark, generating all possibilities ahead of
time. The --dtypes and --raw_accumulators command-line arguments were
then used to filter the resulting set of benchmarks.

This commit refactors problems.py to have less redundancy and to not
hardcode any datatypes. Now only the shapes are hardcoded, and the
--dtypes and --raw_accumulators command-line arguments control which
datatypes are used in the result. One consequence of this is that now we
get the same set of shapes no matter which datatype is requested, which
provides more thorough testing for types like i8 that previously only
had a very small number of hardcoded shapes.

To avoid an explosion in the amount of tests, the default set of
datatypes is changed to just f16. CI is changed to run f16 and i8 (in
separate job steps for better visibility); note that bf16 CI coverage is
removed.

Also, the padded LLaMA shapes are removed.
diff --git a/.github/workflows/run_bench.yml b/.github/workflows/run_bench.yml
@@ -46,15 +46,20 @@ jobs:
           source bench_venv/bin/activate
           python -m iree_kernel_benchmark.attentionbench
 
-      - name: TK GEMM
+      - name: TK GEMM FP16
         run: |
           source bench_venv/bin/activate
-          python -m iree_kernel_benchmark.gemmbench --tk
+          python -m iree_kernel_benchmark.gemmbench --tk --dtypes f16
 
-      - name: GEMM
+      - name: GEMM FP16
         run: |
           source bench_venv/bin/activate
-          python -m iree_kernel_benchmark.gemmbench
+          python -m iree_kernel_benchmark.gemmbench --dtypes f16
+
+      - name: GEMM I8
+        run: |
+          source bench_venv/bin/activate
+          python -m iree_kernel_benchmark.gemmbench --dtypes i8
 
       - name: Roofline Plots
         run: |
diff --git a/README.md b/README.md
@@ -60,12 +60,16 @@ python -m iree_kernel_benchmark.convbench --tk
 python -m iree_kernel_benchmark.gemmbench
 ```
 
+This will only generate FP16 benchmarks. You may want to specify a different set of types with `--dtypes`, e.g. `--dtypes i8 bf16`.
+
 ### TK GEMM Benchmarking
 
 ```
 python -m iree_kernel_benchmark.gemmbench --tk
 ```
 
+Same remark about types applies.
+
 ### Attention Benchmarking
 
 ```
diff --git a/iree_kernel_benchmark/gemmbench/__main__.py b/iree_kernel_benchmark/gemmbench/__main__.py
@@ -62,13 +62,18 @@ def compile_gemm(
         "--dtypes",
         nargs="+",
         default=[],
-        help="List of data types to benchmark. Defaults to all supported types.",
+        help="List of data types to generate benchmarks for. Defaults to f16. Other options include f32, bf16, i8.",
+    )
+    parser.add_argument(
+        "--raw_accumulators",
+        action="store_true",
+        help="If true, generate benchmark matmuls returning the raw accumulator type with no truncation. If false (default), generate benchmark matmuls where results are truncated and cast to the input element type.",
     )
     parser.add_argument(
         "--variants",
         nargs="+",
         default=[],
-        help="List of matmul variants to benchmark. Default to all variants: NN, NT, TN, and TT.",
+        help="List of matmul variants to filter benchmarks by. Default to all variants: NN, NT, TN, and TT.",
     )
     parser.add_argument(
         "--tag_regex",
@@ -102,15 +107,10 @@ def compile_gemm(
         default=None,
         help="Directory to which executable files will be dumped.",
     )
-    parser.add_argument(
-        "--raw_accumulators",
-        action="store_true",
-        help="If true, benchmark matmuls returning the raw accumulator type with no truncation. If false (default), the results are truncated and cast to the input element type.",
-    )
 
     args = parser.parse_args()
     # Handle default values here, since list args are not compatible with defaulted lists.
-    requested_dtypes = ["f16", "bf16", "i8"] if not args.dtypes else list(args.dtypes)
+    requested_dtypes = ["f16"] if not args.dtypes else list(args.dtypes)
     requested_variants = (
         ["NN", "NT", "TN", "TT"] if not args.variants else list(args.variants)
     )
@@ -129,14 +129,18 @@ def compile_gemm(
         sys.exit()
 
     tk = args.tk
-    configs = get_tk_gemm_configs() if tk else get_gemm_configs()
+    configs = []
+    for dtype in requested_dtypes:
+        configs += (
+            get_tk_gemm_configs(dtype, args.raw_accumulators)
+            if tk
+            else get_gemm_configs(dtype, args.raw_accumulators)
+        )
     configs = get_matching_configs(
         configs,
-        requested_dtypes,
         requested_variants,
         args.tag_regex,
         args.config_regex,
-        args.raw_accumulators,
     )
     print(f"Generated {len(configs)} gemm configs.")
 
diff --git a/iree_kernel_benchmark/gemmbench/problems.py b/iree_kernel_benchmark/gemmbench/problems.py