Skip to content

Commit 1a17f0f

Browse files
authored
re-tune fp8 mixtral8x22B (ROCm#304)
1 parent 97fd542 commit 1a17f0f

8 files changed

+777
-121
lines changed
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
{
2+
"1": {
3+
"BLOCK_SIZE_M": 16,
4+
"BLOCK_SIZE_N": 64,
5+
"BLOCK_SIZE_K": 256,
6+
"GROUP_SIZE_M": 1,
7+
"num_warps": 4,
8+
"num_stages": 0,
9+
"waves_per_eu": 0
10+
},
11+
"2": {
12+
"BLOCK_SIZE_M": 16,
13+
"BLOCK_SIZE_N": 16,
14+
"BLOCK_SIZE_K": 256,
15+
"GROUP_SIZE_M": 1,
16+
"num_warps": 4,
17+
"num_stages": 0,
18+
"waves_per_eu": 0
19+
},
20+
"4": {
21+
"BLOCK_SIZE_M": 16,
22+
"BLOCK_SIZE_N": 32,
23+
"BLOCK_SIZE_K": 256,
24+
"GROUP_SIZE_M": 1,
25+
"num_warps": 2,
26+
"num_stages": 0,
27+
"waves_per_eu": 0
28+
},
29+
"8": {
30+
"BLOCK_SIZE_M": 16,
31+
"BLOCK_SIZE_N": 64,
32+
"BLOCK_SIZE_K": 256,
33+
"GROUP_SIZE_M": 1,
34+
"num_warps": 4,
35+
"num_stages": 0,
36+
"waves_per_eu": 0
37+
},
38+
"16": {
39+
"BLOCK_SIZE_M": 16,
40+
"BLOCK_SIZE_N": 64,
41+
"BLOCK_SIZE_K": 256,
42+
"GROUP_SIZE_M": 1,
43+
"num_warps": 4,
44+
"num_stages": 0,
45+
"waves_per_eu": 0
46+
},
47+
"24": {
48+
"BLOCK_SIZE_M": 16,
49+
"BLOCK_SIZE_N": 64,
50+
"BLOCK_SIZE_K": 256,
51+
"GROUP_SIZE_M": 1,
52+
"num_warps": 4,
53+
"num_stages": 0,
54+
"waves_per_eu": 0
55+
},
56+
"32": {
57+
"BLOCK_SIZE_M": 16,
58+
"BLOCK_SIZE_N": 64,
59+
"BLOCK_SIZE_K": 256,
60+
"GROUP_SIZE_M": 4,
61+
"num_warps": 4,
62+
"num_stages": 0,
63+
"waves_per_eu": 0
64+
},
65+
"48": {
66+
"BLOCK_SIZE_M": 16,
67+
"BLOCK_SIZE_N": 64,
68+
"BLOCK_SIZE_K": 256,
69+
"GROUP_SIZE_M": 1,
70+
"num_warps": 4,
71+
"num_stages": 0,
72+
"waves_per_eu": 0
73+
},
74+
"64": {
75+
"BLOCK_SIZE_M": 32,
76+
"BLOCK_SIZE_N": 128,
77+
"BLOCK_SIZE_K": 128,
78+
"GROUP_SIZE_M": 4,
79+
"num_warps": 4,
80+
"num_stages": 0,
81+
"waves_per_eu": 0
82+
},
83+
"96": {
84+
"BLOCK_SIZE_M": 32,
85+
"BLOCK_SIZE_N": 128,
86+
"BLOCK_SIZE_K": 128,
87+
"GROUP_SIZE_M": 1,
88+
"num_warps": 4,
89+
"num_stages": 0,
90+
"waves_per_eu": 0
91+
},
92+
"128": {
93+
"BLOCK_SIZE_M": 64,
94+
"BLOCK_SIZE_N": 128,
95+
"BLOCK_SIZE_K": 128,
96+
"GROUP_SIZE_M": 4,
97+
"num_warps": 8,
98+
"num_stages": 0,
99+
"waves_per_eu": 0
100+
},
101+
"256": {
102+
"BLOCK_SIZE_M": 64,
103+
"BLOCK_SIZE_N": 128,
104+
"BLOCK_SIZE_K": 128,
105+
"GROUP_SIZE_M": 1,
106+
"num_warps": 8,
107+
"num_stages": 0,
108+
"waves_per_eu": 0
109+
},
110+
"512": {
111+
"BLOCK_SIZE_M": 64,
112+
"BLOCK_SIZE_N": 128,
113+
"BLOCK_SIZE_K": 128,
114+
"GROUP_SIZE_M": 1,
115+
"num_warps": 8,
116+
"num_stages": 0,
117+
"waves_per_eu": 0
118+
},
119+
"1024": {
120+
"BLOCK_SIZE_M": 128,
121+
"BLOCK_SIZE_N": 256,
122+
"BLOCK_SIZE_K": 128,
123+
"GROUP_SIZE_M": 1,
124+
"num_warps": 8,
125+
"num_stages": 0,
126+
"waves_per_eu": 0
127+
},
128+
"1536": {
129+
"BLOCK_SIZE_M": 128,
130+
"BLOCK_SIZE_N": 256,
131+
"BLOCK_SIZE_K": 128,
132+
"GROUP_SIZE_M": 1,
133+
"num_warps": 8,
134+
"num_stages": 0,
135+
"waves_per_eu": 0
136+
},
137+
"2048": {
138+
"BLOCK_SIZE_M": 128,
139+
"BLOCK_SIZE_N": 256,
140+
"BLOCK_SIZE_K": 128,
141+
"GROUP_SIZE_M": 1,
142+
"num_warps": 8,
143+
"num_stages": 0,
144+
"waves_per_eu": 0
145+
},
146+
"3072": {
147+
"BLOCK_SIZE_M": 128,
148+
"BLOCK_SIZE_N": 256,
149+
"BLOCK_SIZE_K": 128,
150+
"GROUP_SIZE_M": 1,
151+
"num_warps": 8,
152+
"num_stages": 0,
153+
"waves_per_eu": 0
154+
},
155+
"4096": {
156+
"BLOCK_SIZE_M": 256,
157+
"BLOCK_SIZE_N": 256,
158+
"BLOCK_SIZE_K": 64,
159+
"GROUP_SIZE_M": 1,
160+
"num_warps": 8,
161+
"num_stages": 0,
162+
"waves_per_eu": 0
163+
}
164+
}

vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8.json

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"1": {
33
"BLOCK_SIZE_M": 16,
4-
"BLOCK_SIZE_N": 16,
4+
"BLOCK_SIZE_N": 64,
55
"BLOCK_SIZE_K": 256,
66
"GROUP_SIZE_M": 1,
7-
"num_warps": 1,
7+
"num_warps": 4,
88
"num_stages": 0,
99
"waves_per_eu": 0
1010
},
@@ -13,16 +13,16 @@
1313
"BLOCK_SIZE_N": 16,
1414
"BLOCK_SIZE_K": 256,
1515
"GROUP_SIZE_M": 1,
16-
"num_warps": 1,
16+
"num_warps": 4,
1717
"num_stages": 0,
1818
"waves_per_eu": 0
1919
},
2020
"4": {
2121
"BLOCK_SIZE_M": 16,
22-
"BLOCK_SIZE_N": 64,
22+
"BLOCK_SIZE_N": 32,
2323
"BLOCK_SIZE_K": 256,
2424
"GROUP_SIZE_M": 1,
25-
"num_warps": 4,
25+
"num_warps": 2,
2626
"num_stages": 0,
2727
"waves_per_eu": 0
2828
},
@@ -49,7 +49,7 @@
4949
"BLOCK_SIZE_N": 64,
5050
"BLOCK_SIZE_K": 256,
5151
"GROUP_SIZE_M": 1,
52-
"num_warps": 2,
52+
"num_warps": 4,
5353
"num_stages": 0,
5454
"waves_per_eu": 0
5555
},
@@ -73,61 +73,61 @@
7373
},
7474
"64": {
7575
"BLOCK_SIZE_M": 32,
76-
"BLOCK_SIZE_N": 64,
76+
"BLOCK_SIZE_N": 128,
7777
"BLOCK_SIZE_K": 128,
78-
"GROUP_SIZE_M": 1,
79-
"num_warps": 2,
78+
"GROUP_SIZE_M": 4,
79+
"num_warps": 4,
8080
"num_stages": 0,
8181
"waves_per_eu": 0
8282
},
8383
"96": {
8484
"BLOCK_SIZE_M": 32,
85-
"BLOCK_SIZE_N": 64,
85+
"BLOCK_SIZE_N": 128,
8686
"BLOCK_SIZE_K": 128,
8787
"GROUP_SIZE_M": 1,
88-
"num_warps": 2,
88+
"num_warps": 4,
8989
"num_stages": 0,
9090
"waves_per_eu": 0
9191
},
9292
"128": {
9393
"BLOCK_SIZE_M": 64,
94-
"BLOCK_SIZE_N": 64,
94+
"BLOCK_SIZE_N": 128,
9595
"BLOCK_SIZE_K": 128,
9696
"GROUP_SIZE_M": 4,
97-
"num_warps": 4,
97+
"num_warps": 8,
9898
"num_stages": 0,
9999
"waves_per_eu": 0
100100
},
101101
"256": {
102-
"BLOCK_SIZE_M": 128,
102+
"BLOCK_SIZE_M": 64,
103103
"BLOCK_SIZE_N": 128,
104104
"BLOCK_SIZE_K": 128,
105-
"GROUP_SIZE_M": 4,
106-
"num_warps": 2,
105+
"GROUP_SIZE_M": 1,
106+
"num_warps": 8,
107107
"num_stages": 0,
108108
"waves_per_eu": 0
109109
},
110110
"512": {
111-
"BLOCK_SIZE_M": 128,
111+
"BLOCK_SIZE_M": 64,
112112
"BLOCK_SIZE_N": 128,
113113
"BLOCK_SIZE_K": 128,
114114
"GROUP_SIZE_M": 1,
115-
"num_warps": 2,
115+
"num_warps": 8,
116116
"num_stages": 0,
117117
"waves_per_eu": 0
118118
},
119119
"1024": {
120120
"BLOCK_SIZE_M": 128,
121-
"BLOCK_SIZE_N": 128,
121+
"BLOCK_SIZE_N": 256,
122122
"BLOCK_SIZE_K": 128,
123123
"GROUP_SIZE_M": 1,
124124
"num_warps": 8,
125125
"num_stages": 0,
126126
"waves_per_eu": 0
127127
},
128128
"1536": {
129-
"BLOCK_SIZE_M": 256,
130-
"BLOCK_SIZE_N": 128,
129+
"BLOCK_SIZE_M": 128,
130+
"BLOCK_SIZE_N": 256,
131131
"BLOCK_SIZE_K": 128,
132132
"GROUP_SIZE_M": 1,
133133
"num_warps": 8,
@@ -144,8 +144,8 @@
144144
"waves_per_eu": 0
145145
},
146146
"3072": {
147-
"BLOCK_SIZE_M": 256,
148-
"BLOCK_SIZE_N": 128,
147+
"BLOCK_SIZE_M": 128,
148+
"BLOCK_SIZE_N": 256,
149149
"BLOCK_SIZE_K": 128,
150150
"GROUP_SIZE_M": 1,
151151
"num_warps": 8,
@@ -155,9 +155,9 @@
155155
"4096": {
156156
"BLOCK_SIZE_M": 256,
157157
"BLOCK_SIZE_N": 256,
158-
"BLOCK_SIZE_K": 128,
158+
"BLOCK_SIZE_K": 64,
159159
"GROUP_SIZE_M": 1,
160-
"num_warps": 4,
160+
"num_warps": 8,
161161
"num_stages": 0,
162162
"waves_per_eu": 0
163163
}

0 commit comments

Comments
 (0)