File tree Expand file tree Collapse file tree 7 files changed +72
-1473
lines changed
vllm/model_executor/layers/fused_moe/configs Expand file tree Collapse file tree 7 files changed +72
-1473
lines changed Original file line number Diff line number Diff line change 11{
22 "1" : {
33 "BLOCK_SIZE_M" : 16 ,
4- "BLOCK_SIZE_N" : 16 ,
5- "BLOCK_SIZE_K" : 128 ,
4+ "BLOCK_SIZE_N" : 128 ,
5+ "BLOCK_SIZE_K" : 256 ,
66 "GROUP_SIZE_M" : 1 ,
7- "num_warps" : 1 ,
7+ "num_warps" : 8 ,
88 "num_stages" : 2 ,
9- "waves_per_eu" : 0 ,
10- "matrix_instr_nonkdim" : 16 ,
11- "kpack" : 2
9+ "waves_per_eu" : 0
1210 },
1311 "2" : {
1412 "BLOCK_SIZE_M" : 16 ,
15- "BLOCK_SIZE_N" : 64 ,
16- "BLOCK_SIZE_K" : 128 ,
13+ "BLOCK_SIZE_N" : 128 ,
14+ "BLOCK_SIZE_K" : 256 ,
1715 "GROUP_SIZE_M" : 1 ,
18- "num_warps" : 4 ,
16+ "num_warps" : 8 ,
1917 "num_stages" : 2 ,
20- "waves_per_eu" : 0 ,
21- "matrix_instr_nonkdim" : 16 ,
22- "kpack" : 2
18+ "waves_per_eu" : 0
2319 },
2420 "4" : {
2521 "BLOCK_SIZE_M" : 16 ,
26- "BLOCK_SIZE_N" : 64 ,
27- "BLOCK_SIZE_K" : 128 ,
22+ "BLOCK_SIZE_N" : 128 ,
23+ "BLOCK_SIZE_K" : 256 ,
2824 "GROUP_SIZE_M" : 1 ,
29- "num_warps" : 4 ,
25+ "num_warps" : 8 ,
3026 "num_stages" : 2 ,
31- "waves_per_eu" : 0 ,
32- "matrix_instr_nonkdim" : 16 ,
33- "kpack" : 2
27+ "waves_per_eu" : 0
3428 },
3529 "8" : {
3630 "BLOCK_SIZE_M" : 16 ,
37- "BLOCK_SIZE_N" : 256 ,
38- "BLOCK_SIZE_K" : 64 ,
31+ "BLOCK_SIZE_N" : 128 ,
32+ "BLOCK_SIZE_K" : 128 ,
3933 "GROUP_SIZE_M" : 1 ,
4034 "num_warps" : 8 ,
4135 "num_stages" : 2 ,
42- "waves_per_eu" : 0 ,
43- "matrix_instr_nonkdim" : 16 ,
44- "kpack" : 1
36+ "waves_per_eu" : 0
4537 },
4638 "16" : {
4739 "BLOCK_SIZE_M" : 16 ,
48- "BLOCK_SIZE_N" : 64 ,
40+ "BLOCK_SIZE_N" : 128 ,
4941 "BLOCK_SIZE_K" : 128 ,
5042 "GROUP_SIZE_M" : 1 ,
5143 "num_warps" : 2 ,
5244 "num_stages" : 2 ,
53- "waves_per_eu" : 0 ,
54- "matrix_instr_nonkdim" : 16 ,
55- "kpack" : 2
45+ "waves_per_eu" : 0
5646 },
5747 "24" : {
5848 "BLOCK_SIZE_M" : 16 ,
59- "BLOCK_SIZE_N" : 64 ,
49+ "BLOCK_SIZE_N" : 128 ,
6050 "BLOCK_SIZE_K" : 128 ,
6151 "GROUP_SIZE_M" : 1 ,
62- "num_warps" : 4 ,
52+ "num_warps" : 2 ,
6353 "num_stages" : 2 ,
64- "waves_per_eu" : 0 ,
65- "matrix_instr_nonkdim" : 16 ,
66- "kpack" : 2
54+ "waves_per_eu" : 0
6755 },
6856 "32" : {
6957 "BLOCK_SIZE_M" : 16 ,
70- "BLOCK_SIZE_N" : 64 ,
58+ "BLOCK_SIZE_N" : 128 ,
7159 "BLOCK_SIZE_K" : 128 ,
7260 "GROUP_SIZE_M" : 4 ,
73- "num_warps" : 4 ,
61+ "num_warps" : 2 ,
7462 "num_stages" : 2 ,
75- "waves_per_eu" : 0 ,
76- "matrix_instr_nonkdim" : 16 ,
77- "kpack" : 2
63+ "waves_per_eu" : 0
7864 },
7965 "48" : {
8066 "BLOCK_SIZE_M" : 16 ,
81- "BLOCK_SIZE_N" : 64 ,
67+ "BLOCK_SIZE_N" : 128 ,
8268 "BLOCK_SIZE_K" : 128 ,
8369 "GROUP_SIZE_M" : 4 ,
84- "num_warps" : 4 ,
70+ "num_warps" : 2 ,
8571 "num_stages" : 2 ,
86- "waves_per_eu" : 0 ,
87- "matrix_instr_nonkdim" : 16 ,
88- "kpack" : 2
72+ "waves_per_eu" : 0
8973 },
9074 "64" : {
91- "BLOCK_SIZE_M" : 32 ,
92- "BLOCK_SIZE_N" : 64 ,
75+ "BLOCK_SIZE_M" : 16 ,
76+ "BLOCK_SIZE_N" : 128 ,
9377 "BLOCK_SIZE_K" : 128 ,
94- "GROUP_SIZE_M" : 4 ,
95- "num_warps" : 8 ,
78+ "GROUP_SIZE_M" : 1 ,
79+ "num_warps" : 2 ,
9680 "num_stages" : 2 ,
97- "waves_per_eu" : 0 ,
98- "matrix_instr_nonkdim" : 16 ,
99- "kpack" : 2
81+ "waves_per_eu" : 0
10082 },
10183 "96" : {
102- "BLOCK_SIZE_M" : 32 ,
103- "BLOCK_SIZE_N" : 64 ,
84+ "BLOCK_SIZE_M" : 16 ,
85+ "BLOCK_SIZE_N" : 128 ,
10486 "BLOCK_SIZE_K" : 128 ,
105- "GROUP_SIZE_M" : 1 ,
87+ "GROUP_SIZE_M" : 8 ,
10688 "num_warps" : 8 ,
10789 "num_stages" : 2 ,
108- "waves_per_eu" : 0 ,
109- "matrix_instr_nonkdim" : 16 ,
110- "kpack" : 2
90+ "waves_per_eu" : 0
11191 },
11292 "128" : {
113- "BLOCK_SIZE_M" : 64 ,
114- "BLOCK_SIZE_N" : 64 ,
93+ "BLOCK_SIZE_M" : 16 ,
94+ "BLOCK_SIZE_N" : 128 ,
11595 "BLOCK_SIZE_K" : 128 ,
116- "GROUP_SIZE_M" : 1 ,
117- "num_warps" : 8 ,
96+ "GROUP_SIZE_M" : 4 ,
97+ "num_warps" : 4 ,
11898 "num_stages" : 2 ,
119- "waves_per_eu" : 0 ,
120- "matrix_instr_nonkdim" : 16 ,
121- "kpack" : 2
99+ "waves_per_eu" : 0
122100 },
123101 "256" : {
124- "BLOCK_SIZE_M" : 64 ,
102+ "BLOCK_SIZE_M" : 16 ,
125103 "BLOCK_SIZE_N" : 128 ,
126104 "BLOCK_SIZE_K" : 128 ,
127- "GROUP_SIZE_M" : 1 ,
128- "num_warps" : 8 ,
105+ "GROUP_SIZE_M" : 8 ,
106+ "num_warps" : 4 ,
129107 "num_stages" : 2 ,
130- "waves_per_eu" : 0 ,
131- "matrix_instr_nonkdim" : 16 ,
132- "kpack" : 2
108+ "waves_per_eu" : 0
133109 },
134110 "512" : {
135- "BLOCK_SIZE_M" : 128 ,
111+ "BLOCK_SIZE_M" : 32 ,
136112 "BLOCK_SIZE_N" : 128 ,
137113 "BLOCK_SIZE_K" : 128 ,
138- "GROUP_SIZE_M" : 1 ,
139- "num_warps" : 8 ,
114+ "GROUP_SIZE_M" : 8 ,
115+ "num_warps" : 4 ,
140116 "num_stages" : 2 ,
141- "waves_per_eu" : 0 ,
142- "matrix_instr_nonkdim" : 32 ,
143- "kpack" : 2
117+ "waves_per_eu" : 0
144118 },
145119 "1024" : {
146- "BLOCK_SIZE_M" : 128 ,
120+ "BLOCK_SIZE_M" : 64 ,
147121 "BLOCK_SIZE_N" : 128 ,
148- "BLOCK_SIZE_K" : 64 ,
149- "GROUP_SIZE_M" : 4 ,
150- "num_warps" : 4 ,
122+ "BLOCK_SIZE_K" : 128 ,
123+ "GROUP_SIZE_M" : 8 ,
124+ "num_warps" : 2 ,
151125 "num_stages" : 2 ,
152- "waves_per_eu" : 0 ,
153- "matrix_instr_nonkdim" : 16 ,
154- "kpack" : 2
126+ "waves_per_eu" : 0
155127 },
156128 "1536" : {
157- "BLOCK_SIZE_M" : 128 ,
129+ "BLOCK_SIZE_M" : 64 ,
158130 "BLOCK_SIZE_N" : 128 ,
159- "BLOCK_SIZE_K" : 64 ,
131+ "BLOCK_SIZE_K" : 128 ,
160132 "GROUP_SIZE_M" : 4 ,
161- "num_warps" : 8 ,
133+ "num_warps" : 2 ,
162134 "num_stages" : 2 ,
163- "waves_per_eu" : 0 ,
164- "matrix_instr_nonkdim" : 16 ,
165- "kpack" : 2
135+ "waves_per_eu" : 0
166136 },
167137 "2048" : {
168138 "BLOCK_SIZE_M" : 128 ,
169- "BLOCK_SIZE_N" : 128 ,
170- "BLOCK_SIZE_K" : 64 ,
171- "GROUP_SIZE_M" : 1 ,
172- "num_warps" : 8 ,
139+ "BLOCK_SIZE_N" : 256 ,
140+ "BLOCK_SIZE_K" : 128 ,
141+ "GROUP_SIZE_M" : 8 ,
142+ "num_warps" : 4 ,
173143 "num_stages" : 2 ,
174- "waves_per_eu" : 0 ,
175- "matrix_instr_nonkdim" : 16 ,
176- "kpack" : 2
144+ "waves_per_eu" : 0
177145 },
178146 "3072" : {
179147 "BLOCK_SIZE_M" : 128 ,
180- "BLOCK_SIZE_N" : 128 ,
181- "BLOCK_SIZE_K" : 64 ,
182- "GROUP_SIZE_M" : 1 ,
183- "num_warps" : 8 ,
148+ "BLOCK_SIZE_N" : 256 ,
149+ "BLOCK_SIZE_K" : 128 ,
150+ "GROUP_SIZE_M" : 8 ,
151+ "num_warps" : 4 ,
184152 "num_stages" : 2 ,
185- "waves_per_eu" : 0 ,
186- "matrix_instr_nonkdim" : 16 ,
187- "kpack" : 2
153+ "waves_per_eu" : 0
188154 },
189155 "4096" : {
190156 "BLOCK_SIZE_M" : 128 ,
191- "BLOCK_SIZE_N" : 128 ,
192- "BLOCK_SIZE_K" : 64 ,
193- "GROUP_SIZE_M" : 1 ,
194- "num_warps" : 8 ,
157+ "BLOCK_SIZE_N" : 256 ,
158+ "BLOCK_SIZE_K" : 128 ,
159+ "GROUP_SIZE_M" : 4 ,
160+ "num_warps" : 4 ,
195161 "num_stages" : 2 ,
196- "waves_per_eu" : 0 ,
197- "matrix_instr_nonkdim" : 16 ,
198- "kpack" : 2
162+ "waves_per_eu" : 0
199163 }
200164}
You can’t perform that action at this time.
0 commit comments