|
| 1 | +# MLIR Microkernel Template Guide |
| 2 | + |
| 3 | +## Data-Tiled Kernels |
| 4 | + |
| 5 | +### `pingpong_dt_medium_f4E2M1FN` |
| 6 | +```bash |
| 7 | +python mlir_ukernel_gen.py iree_uk_amdgpu_dt_scaled_matmul_f4E2M1FN.mlir.in \ |
| 8 | + -D INTRINSICS_M=4 INTRINSICS_N=8 INTRINSICS_K=2 SUBGROUPS_M=2 SUBGROUPS_N=2 ARCH=gfx950 \ |
| 9 | + -o iree_uk_amdgpu_dt_scaled_matmul_f4E2M1FN.mlir |
| 10 | +``` |
| 11 | + |
| 12 | +### `pingpong_dt_large_f16` |
| 13 | +```bash |
| 14 | +python mlir_ukernel_gen.py iree_uk_amdgpu_dt_matmul_large.mlir.in \ |
| 15 | + -D ELEM_TYPE=f16 INTRINSIC=MFMA_F32_16x16x16_F16 \ |
| 16 | + INTRINSICS_M=8 INTRINSICS_N=4 INTRINSICS_K=1 SUBGROUPS_M=2 SUBGROUPS_N=4 ARCH=gfx942 \ |
| 17 | + SIZE_MIN_0=512 SIZE_DIV_0=64 SIZE_MIN_1=32832 SIZE_DIV_1=64 SIZE_MIN_2=512 SIZE_DIV_2=64 \ |
| 18 | + -o iree_uk_amdgpu_dt_matmul_f16.mlir |
| 19 | +``` |
| 20 | + |
| 21 | +### `pingpong_dt_large_f8E4M3FNUZ` |
| 22 | +```bash |
| 23 | +python mlir_ukernel_gen.py iree_uk_amdgpu_dt_matmul_large.mlir.in \ |
| 24 | + -D ELEM_TYPE=f8E4M3FNUZ INTRINSIC=MFMA_F32_16x16x32_F8E4M3FNUZ \ |
| 25 | + INTRINSICS_M=4 INTRINSICS_N=8 INTRINSICS_K=1 SUBGROUPS_M=2 SUBGROUPS_N=2 ARCH=gfx942 \ |
| 26 | + SIZE_MIN_0=64 SIZE_MIN_1=2048 SIZE_MAX_1=8192 \ |
| 27 | + -o iree_uk_amdgpu_dt_matmul_f8E4M3FNUZ_large.mlir |
| 28 | +``` |
| 29 | + |
| 30 | +### `pingpong_dt_medium_f8E4M3FNUZ` |
| 31 | +```bash |
| 32 | +python mlir_ukernel_gen.py iree_uk_amdgpu_dt_matmul_medium.mlir.in \ |
| 33 | + -D ELEM_TYPE=f8E4M3FNUZ INTRINSIC=MFMA_F32_16x16x32_F8E4M3FNUZ \ |
| 34 | + INTRINSICS_M=8 INTRINSICS_N=2 INTRINSICS_K=2 SUBGROUPS_M=1 SUBGROUPS_N=8 ARCH=gfx942 \ |
| 35 | + SIZE_MIN_0=32 \ |
| 36 | + -o iree_uk_amdgpu_dt_matmul_f8E4M3FNUZ_medium.mlir |
| 37 | +``` |
| 38 | + |
| 39 | +### `pingpong_dt_large_f8E4M3FN` |
| 40 | +```bash |
| 41 | +python mlir_ukernel_gen.py iree_uk_amdgpu_dt_matmul_large.mlir.in \ |
| 42 | + -D ELEM_TYPE=f8E4M3FN INTRINSIC=MFMA_F32_16x16x32_F8E4M3FN \ |
| 43 | + INTRINSICS_M=4 INTRINSICS_N=8 INTRINSICS_K=1 SUBGROUPS_M=2 SUBGROUPS_N=2 ARCH=gfx950 \ |
| 44 | + SIZE_MIN_0=64 SIZE_MIN_1=2048 SIZE_MAX_1=8192 \ |
| 45 | + -o iree_uk_amdgpu_dt_matmul_f8E4M3FN_large.mlir |
| 46 | +``` |
| 47 | + |
| 48 | +### `pingpong_dt_medium_f8E4M3FN` |
| 49 | +```bash |
| 50 | +python mlir_ukernel_gen.py iree_uk_amdgpu_dt_matmul_medium.mlir.in \ |
| 51 | + -D ELEM_TYPE=f8E4M3FN INTRINSIC=MFMA_F32_16x16x32_F8E4M3FN \ |
| 52 | + INTRINSICS_M=8 INTRINSICS_N=2 INTRINSICS_K=2 SUBGROUPS_M=1 SUBGROUPS_N=8 ARCH=gfx950 \ |
| 53 | + SIZE_MIN_0=32 \ |
| 54 | + -o iree_uk_amdgpu_dt_matmul_f8E4M3FN_medium.mlir |
| 55 | +``` |
| 56 | + |
| 57 | +--- |
| 58 | + |
| 59 | +## Non-Data-Tiled Kernels |
| 60 | + |
| 61 | +### `pingpong_large_f16` |
| 62 | +```bash |
| 63 | +python mlir_ukernel_gen.py iree_uk_amdgpu_matmul_large.mlir.in \ |
| 64 | + -D ELEM_TYPE=f16 INTRINSIC=MFMA_F32_16x16x16_F16 \ |
| 65 | + INTRINSICS_M=8 INTRINSICS_N=4 INTRINSICS_K=4 \ |
| 66 | + SUBGROUPS_M=2 SUBGROUPS_N=4 ARCH=gfx942 \ |
| 67 | + -o pingpong_large_f16.mlir |
| 68 | +``` |
| 69 | + |
| 70 | +### `pingpong_medium_f16_expanded` |
| 71 | +```bash |
| 72 | +python mlir_ukernel_gen.py iree_uk_amdgpu_matmul_medium.mlir.in \ |
| 73 | + -D ELEM_TYPE=f16 INTRINSIC=MFMA_F32_16x16x16_F16 \ |
| 74 | + INTRINSICS_M=4 INTRINSICS_N=4 INTRINSICS_K=8 \ |
| 75 | + SUBGROUPS_M=2 SUBGROUPS_N=4 ARCH=gfx942 \ |
| 76 | + -o pingpong_medium_f16_expanded.mlir |
| 77 | +``` |
| 78 | + |
| 79 | +### `pingpong_large_bf16` |
| 80 | +```bash |
| 81 | +python mlir_ukernel_gen.py iree_uk_amdgpu_matmul_large.mlir.in \ |
| 82 | + -D ELEM_TYPE=bf16 INTRINSIC=MFMA_F32_16x16x16_BF16 \ |
| 83 | + INTRINSICS_M=8 INTRINSICS_N=4 INTRINSICS_K=4 \ |
| 84 | + SUBGROUPS_M=2 SUBGROUPS_N=4 ARCH=gfx942 \ |
| 85 | + -o pingpong_large_bf16.mlir |
| 86 | +``` |
| 87 | + |
| 88 | +### `pingpong_medium_bf16_expanded` |
| 89 | +```bash |
| 90 | +python mlir_ukernel_gen.py iree_uk_amdgpu_matmul_medium.mlir.in \ |
| 91 | + -D ELEM_TYPE=bf16 INTRINSIC=MFMA_F32_16x16x16_BF16 \ |
| 92 | + INTRINSICS_M=4 INTRINSICS_N=4 INTRINSICS_K=8 \ |
| 93 | + SUBGROUPS_M=2 SUBGROUPS_N=4 ARCH=gfx942 \ |
| 94 | + -o pingpong_medium_bf16_expanded.mlir |
| 95 | +``` |
| 96 | + |
| 97 | +### `pingpong_medium_f8E4M3FNUZ_expanded` |
| 98 | +```bash |
| 99 | +python mlir_ukernel_gen.py iree_uk_amdgpu_matmul_medium.mlir.in \ |
| 100 | + -D ELEM_TYPE=f8E4M3FNUZ INTRINSIC=MFMA_F32_16x16x32_F8E4M3FNUZ \ |
| 101 | + INTRINSICS_M=4 INTRINSICS_N=4 INTRINSICS_K=16 \ |
| 102 | + SUBGROUPS_M=2 SUBGROUPS_N=4 ARCH=gfx942 \ |
| 103 | + -o pingpong_medium_f8E4M3FNUZ_expanded.mlir |
| 104 | +``` |
| 105 | + |
| 106 | +### `pingpong_large_f8E4M3FNUZ_expanded` |
| 107 | +```bash |
| 108 | +python mlir_ukernel_gen.py iree_uk_amdgpu_matmul_large.mlir.in \ |
| 109 | + -D ELEM_TYPE=f8E4M3FNUZ INTRINSIC=MFMA_F32_16x16x32_F8E4M3FNUZ \ |
| 110 | + INTRINSICS_M=8 INTRINSICS_N=4 INTRINSICS_K=8 \ |
| 111 | + SUBGROUPS_M=2 SUBGROUPS_N=4 ARCH=gfx942 \ |
| 112 | + -o pingpong_large_f8E4M3FNUZ_expanded.mlir |
| 113 | +``` |
| 114 | + |
| 115 | +### `pingpong_medium_f8E4M3FN_expanded` |
| 116 | +```bash |
| 117 | +python mlir_ukernel_gen.py iree_uk_amdgpu_matmul_medium.mlir.in \ |
| 118 | + -D ELEM_TYPE=f8E4M3FN INTRINSIC=MFMA_F32_16x16x32_F8E4M3FN \ |
| 119 | + INTRINSICS_M=4 INTRINSICS_N=4 INTRINSICS_K=16 \ |
| 120 | + SUBGROUPS_M=2 SUBGROUPS_N=4 ARCH=gfx942 \ |
| 121 | + -o pingpong_medium_f8E4M3FN_expanded.mlir |
| 122 | +``` |
| 123 | + |
| 124 | +### `pingpong_large_f8E4M3FN_expanded` |
| 125 | +```bash |
| 126 | +python mlir_ukernel_gen.py iree_uk_amdgpu_matmul_large.mlir.in \ |
| 127 | + -D ELEM_TYPE=f8E4M3FN INTRINSIC=MFMA_F32_16x16x32_F8E4M3FN \ |
| 128 | + INTRINSICS_M=8 INTRINSICS_N=4 INTRINSICS_K=8 \ |
| 129 | + SUBGROUPS_M=2 SUBGROUPS_N=4 ARCH=gfx942 \ |
| 130 | + -o pingpong_large_f8E4M3FN_expanded.mlir |
| 131 | +``` |
0 commit comments