Skip to content

Commit e5bad4d

Browse files
Obtain new Tier1 tuning problems from MIGraphX (#1873)
--------- Signed-off-by: Djordje Antic <[email protected]> Co-authored-by: Djordje Antic <[email protected]>
1 parent 4f6e123 commit e5bad4d

File tree

4 files changed

+1396
-942
lines changed

4 files changed

+1396
-942
lines changed
Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
# bert_base_cased_1_fp16_gpu.onnx
1+
# bert_base_cased_1_fp16_gpu.onnx and distilgpt2_1_fp16_gpu.onnx
22
-t f16 -transQ false -transK true -transV false -transO false -g 12 -seq_len_q 384 -seq_len_k 384 -head_dim_qk 64 -head_dim_v 64
33

4-
# bert_large_uncased_1_fp16_gpu.onnx
4+
# bert_large_uncased_1_fp16_gpu.onnx and bert_large_mlperf
55
-t f16 -transQ false -transK true -transV false -transO false -g 16 -seq_len_q 384 -seq_len_k 384 -head_dim_qk 64 -head_dim_v 64
66

7-
# distilgpt2_1_fp16_gpu.onnx
8-
-t f16 -transQ false -transK true -transV false -transO false -g 12 -seq_len_q 384 -seq_len_k 384 -head_dim_qk 64 -head_dim_v 64
7+
# distilgpt2_1
8+
-t f32 -transQ false -transK true -transV false -transO false -g 12 -seq_len_q 384 -seq_len_k 384 -head_dim_qk 64 -head_dim_v 64
99

1010
# stable-diffusion-2-onnx-unet
1111
-t f16 -transQ false -transK true -transV false -transO false -g 10 -seq_len_q 4096 -seq_len_k 4096 -head_dim_qk 64 -head_dim_v 64
@@ -18,39 +18,46 @@
1818

1919
# stable-diffusion-2-onnx vae_decoder
2020
-t f16 -transQ false -transK false -transV false -transO false -g 1 -seq_len_q 4096 -seq_len_k 4096 -head_dim_qk 512 -head_dim_v 512
21+
-t f16 -transQ false -transK true -transV false -transO false -g 1 -seq_len_q 4096 -seq_len_k 4096 -head_dim_qk 512 -head_dim_v 512
2122

22-
# bert_large_mlperf.onnx
23-
-t f16 -transQ false -transK true -transV false -transO false -g 16 -seq_len_q 384 -seq_len_k 384 -head_dim_qk 64 -head_dim_v 64
24-
25-
# qwen1.5-7b fp16
23+
# qwen1.5-7b fp16, llama3_8b, mistral-7b
2624
-t f16 -transQ false -transK false -transV false -transO false -g 32 -seq_len_q 256 -seq_len_k 256 -head_dim_qk 128 -head_dim_v 128
25+
-t f16 -transQ false -transK true -transV false -transO false -g 32 -seq_len_q 256 -seq_len_k 256 -head_dim_qk 128 -head_dim_v 128
2726

2827
# phi3
2928
-t f16 -transQ false -transK false -transV false -transO false -g 32 -seq_len_q 256 -seq_len_k 256 -head_dim_qk 96 -head_dim_v 96
3029

31-
# llama3_8b model.onnx
32-
-t f16 -transQ false -transK false -transV false -transO false -g 32 -seq_len_q 256 -seq_len_k 256 -head_dim_qk 128 -head_dim_v 128
30+
# phi3_3_8b
31+
-t f16 -transQ false -transK true -transV false -transO false -g 32 -seq_len_q 256 -seq_len_k 256 -head_dim_qk 96 -head_dim_v 96
3332

3433
# whisper-large encoder_model.onnx
3534
-t f32 -transQ false -transK true -transV false -transO false -g 20 -seq_len_q 1500 -seq_len_k 1500 -head_dim_qk 64 -head_dim_v 64
3635

37-
# mistral-7b
38-
-t f16 -transQ false -transK false -transV false -transO false -g 32 -seq_len_q 256 -seq_len_k 256 -head_dim_qk 128 -head_dim_v 128
39-
40-
# Flux
36+
# Flux and sd3.5 text_encoder
4137
-t f16 -transQ false -transK false -transV false -transO false -g 12 -seq_len_q 77 -seq_len_k 77 -head_dim_qk 64 -head_dim_v 64
4238

43-
# sd3 text_encoder_3
44-
-t f16 -transQ false -transK true -transV false -transO false -g 64 -seq_len_q 77 -seq_len_k 77 -head_dim_qk 64 -head_dim_v 64
39+
# flux_text_encoder
40+
-t f16 -transQ false -transK true -transV false -transO false -g 12 -seq_len_q 77 -seq_len_k 77 -head_dim_qk 64 -head_dim_v 64
4541

46-
# sd3.5 text_encoder
47-
-t f16 -transQ false -transK false -transV false -transO false -g 12 -seq_len_q 77 -seq_len_k 77 -head_dim_qk 64 -head_dim_v 64
42+
# sd3 and sd3.5 text_encoder_3
43+
-t f16 -transQ false -transK true -transV false -transO false -g 64 -seq_len_q 77 -seq_len_k 77 -head_dim_qk 64 -head_dim_v 64
4844

4945
# sd3.5 text_encoder_2
5046
-t f16 -transQ false -transK false -transV false -transO false -g 20 -seq_len_q 77 -seq_len_k 77 -head_dim_qk 64 -head_dim_v 64
51-
52-
# sd3.5 text_encoder_3
53-
-t f16 -transQ false -transK true -transV false -transO false -g 64 -seq_len_q 77 -seq_len_k 77 -head_dim_qk 64 -head_dim_v 64
47+
-t f16 -transQ false -transK true -transV false -transO false -g 20 -seq_len_q 77 -seq_len_k 77 -head_dim_qk 64 -head_dim_v 64
5448

5549
# sd3_medium_vae_encoder
5650
-t f32 -transQ false -transK false -transV false -transO false -g 2 -seq_len_q 64 -seq_len_k 64 -head_dim_qk 512 -head_dim_v 512
51+
-t f32 -transQ false -transK true -transV false -transO false -g 1 -seq_len_q 64 -seq_len_k 64 -head_dim_qk 512 -head_dim_v 512
52+
53+
# llama2-7b-chat-hf-awq-int4-asym-gs128-onnx_prefill
54+
-t f16 -transQ false -transK true -transV false -transO false -causal true -return_lse false -g 32 -seq_len_q 4096 -seq_len_k 4096 -head_dim_qk 128 -head_dim_v 128
55+
56+
# llama-2-7b-chat-hf-awq-int4-asym-gs128-onnx_decode
57+
-t f16 -transQ false -transK true -transV false -transO false -causal false -return_lse false -g 32 -seq_len_q 1 -seq_len_k 4096 -head_dim_qk 128 -head_dim_v 128
58+
59+
# llama3_8b_kv_cache_prefill
60+
-t f32 -transQ false -transK true -transV false -transO false -causal true -return_lse false -g 32 -seq_len_q 4096 -seq_len_k 4096 -head_dim_qk 128 -head_dim_v 128
61+
62+
# mistral_8b_kv_cache_decode
63+
-t f32 -transQ false -transK true -transV false -transO false -g 32 -seq_len_q 1 -seq_len_k 4097 -head_dim_qk 128 -head_dim_v 128

0 commit comments

Comments
 (0)