File tree Expand file tree Collapse file tree 8 files changed +777
-121
lines changed
vllm/model_executor/layers/fused_moe/configs Expand file tree Collapse file tree 8 files changed +777
-121
lines changed Original file line number Diff line number Diff line change 1+ {
2+ "1" : {
3+ "BLOCK_SIZE_M" : 16 ,
4+ "BLOCK_SIZE_N" : 64 ,
5+ "BLOCK_SIZE_K" : 256 ,
6+ "GROUP_SIZE_M" : 1 ,
7+ "num_warps" : 4 ,
8+ "num_stages" : 0 ,
9+ "waves_per_eu" : 0
10+ },
11+ "2" : {
12+ "BLOCK_SIZE_M" : 16 ,
13+ "BLOCK_SIZE_N" : 16 ,
14+ "BLOCK_SIZE_K" : 256 ,
15+ "GROUP_SIZE_M" : 1 ,
16+ "num_warps" : 4 ,
17+ "num_stages" : 0 ,
18+ "waves_per_eu" : 0
19+ },
20+ "4" : {
21+ "BLOCK_SIZE_M" : 16 ,
22+ "BLOCK_SIZE_N" : 32 ,
23+ "BLOCK_SIZE_K" : 256 ,
24+ "GROUP_SIZE_M" : 1 ,
25+ "num_warps" : 2 ,
26+ "num_stages" : 0 ,
27+ "waves_per_eu" : 0
28+ },
29+ "8" : {
30+ "BLOCK_SIZE_M" : 16 ,
31+ "BLOCK_SIZE_N" : 64 ,
32+ "BLOCK_SIZE_K" : 256 ,
33+ "GROUP_SIZE_M" : 1 ,
34+ "num_warps" : 4 ,
35+ "num_stages" : 0 ,
36+ "waves_per_eu" : 0
37+ },
38+ "16" : {
39+ "BLOCK_SIZE_M" : 16 ,
40+ "BLOCK_SIZE_N" : 64 ,
41+ "BLOCK_SIZE_K" : 256 ,
42+ "GROUP_SIZE_M" : 1 ,
43+ "num_warps" : 4 ,
44+ "num_stages" : 0 ,
45+ "waves_per_eu" : 0
46+ },
47+ "24" : {
48+ "BLOCK_SIZE_M" : 16 ,
49+ "BLOCK_SIZE_N" : 64 ,
50+ "BLOCK_SIZE_K" : 256 ,
51+ "GROUP_SIZE_M" : 1 ,
52+ "num_warps" : 4 ,
53+ "num_stages" : 0 ,
54+ "waves_per_eu" : 0
55+ },
56+ "32" : {
57+ "BLOCK_SIZE_M" : 16 ,
58+ "BLOCK_SIZE_N" : 64 ,
59+ "BLOCK_SIZE_K" : 256 ,
60+ "GROUP_SIZE_M" : 4 ,
61+ "num_warps" : 4 ,
62+ "num_stages" : 0 ,
63+ "waves_per_eu" : 0
64+ },
65+ "48" : {
66+ "BLOCK_SIZE_M" : 16 ,
67+ "BLOCK_SIZE_N" : 64 ,
68+ "BLOCK_SIZE_K" : 256 ,
69+ "GROUP_SIZE_M" : 1 ,
70+ "num_warps" : 4 ,
71+ "num_stages" : 0 ,
72+ "waves_per_eu" : 0
73+ },
74+ "64" : {
75+ "BLOCK_SIZE_M" : 32 ,
76+ "BLOCK_SIZE_N" : 128 ,
77+ "BLOCK_SIZE_K" : 128 ,
78+ "GROUP_SIZE_M" : 4 ,
79+ "num_warps" : 4 ,
80+ "num_stages" : 0 ,
81+ "waves_per_eu" : 0
82+ },
83+ "96" : {
84+ "BLOCK_SIZE_M" : 32 ,
85+ "BLOCK_SIZE_N" : 128 ,
86+ "BLOCK_SIZE_K" : 128 ,
87+ "GROUP_SIZE_M" : 1 ,
88+ "num_warps" : 4 ,
89+ "num_stages" : 0 ,
90+ "waves_per_eu" : 0
91+ },
92+ "128" : {
93+ "BLOCK_SIZE_M" : 64 ,
94+ "BLOCK_SIZE_N" : 128 ,
95+ "BLOCK_SIZE_K" : 128 ,
96+ "GROUP_SIZE_M" : 4 ,
97+ "num_warps" : 8 ,
98+ "num_stages" : 0 ,
99+ "waves_per_eu" : 0
100+ },
101+ "256" : {
102+ "BLOCK_SIZE_M" : 64 ,
103+ "BLOCK_SIZE_N" : 128 ,
104+ "BLOCK_SIZE_K" : 128 ,
105+ "GROUP_SIZE_M" : 1 ,
106+ "num_warps" : 8 ,
107+ "num_stages" : 0 ,
108+ "waves_per_eu" : 0
109+ },
110+ "512" : {
111+ "BLOCK_SIZE_M" : 64 ,
112+ "BLOCK_SIZE_N" : 128 ,
113+ "BLOCK_SIZE_K" : 128 ,
114+ "GROUP_SIZE_M" : 1 ,
115+ "num_warps" : 8 ,
116+ "num_stages" : 0 ,
117+ "waves_per_eu" : 0
118+ },
119+ "1024" : {
120+ "BLOCK_SIZE_M" : 128 ,
121+ "BLOCK_SIZE_N" : 256 ,
122+ "BLOCK_SIZE_K" : 128 ,
123+ "GROUP_SIZE_M" : 1 ,
124+ "num_warps" : 8 ,
125+ "num_stages" : 0 ,
126+ "waves_per_eu" : 0
127+ },
128+ "1536" : {
129+ "BLOCK_SIZE_M" : 128 ,
130+ "BLOCK_SIZE_N" : 256 ,
131+ "BLOCK_SIZE_K" : 128 ,
132+ "GROUP_SIZE_M" : 1 ,
133+ "num_warps" : 8 ,
134+ "num_stages" : 0 ,
135+ "waves_per_eu" : 0
136+ },
137+ "2048" : {
138+ "BLOCK_SIZE_M" : 128 ,
139+ "BLOCK_SIZE_N" : 256 ,
140+ "BLOCK_SIZE_K" : 128 ,
141+ "GROUP_SIZE_M" : 1 ,
142+ "num_warps" : 8 ,
143+ "num_stages" : 0 ,
144+ "waves_per_eu" : 0
145+ },
146+ "3072" : {
147+ "BLOCK_SIZE_M" : 128 ,
148+ "BLOCK_SIZE_N" : 256 ,
149+ "BLOCK_SIZE_K" : 128 ,
150+ "GROUP_SIZE_M" : 1 ,
151+ "num_warps" : 8 ,
152+ "num_stages" : 0 ,
153+ "waves_per_eu" : 0
154+ },
155+ "4096" : {
156+ "BLOCK_SIZE_M" : 256 ,
157+ "BLOCK_SIZE_N" : 256 ,
158+ "BLOCK_SIZE_K" : 64 ,
159+ "GROUP_SIZE_M" : 1 ,
160+ "num_warps" : 8 ,
161+ "num_stages" : 0 ,
162+ "waves_per_eu" : 0
163+ }
164+ }
Original file line number Diff line number Diff line change 11{
22 "1" : {
33 "BLOCK_SIZE_M" : 16 ,
4- "BLOCK_SIZE_N" : 16 ,
4+ "BLOCK_SIZE_N" : 64 ,
55 "BLOCK_SIZE_K" : 256 ,
66 "GROUP_SIZE_M" : 1 ,
7- "num_warps" : 1 ,
7+ "num_warps" : 4 ,
88 "num_stages" : 0 ,
99 "waves_per_eu" : 0
1010 },
1313 "BLOCK_SIZE_N" : 16 ,
1414 "BLOCK_SIZE_K" : 256 ,
1515 "GROUP_SIZE_M" : 1 ,
16- "num_warps" : 1 ,
16+ "num_warps" : 4 ,
1717 "num_stages" : 0 ,
1818 "waves_per_eu" : 0
1919 },
2020 "4" : {
2121 "BLOCK_SIZE_M" : 16 ,
22- "BLOCK_SIZE_N" : 64 ,
22+ "BLOCK_SIZE_N" : 32 ,
2323 "BLOCK_SIZE_K" : 256 ,
2424 "GROUP_SIZE_M" : 1 ,
25- "num_warps" : 4 ,
25+ "num_warps" : 2 ,
2626 "num_stages" : 0 ,
2727 "waves_per_eu" : 0
2828 },
4949 "BLOCK_SIZE_N" : 64 ,
5050 "BLOCK_SIZE_K" : 256 ,
5151 "GROUP_SIZE_M" : 1 ,
52- "num_warps" : 2 ,
52+ "num_warps" : 4 ,
5353 "num_stages" : 0 ,
5454 "waves_per_eu" : 0
5555 },
7373 },
7474 "64" : {
7575 "BLOCK_SIZE_M" : 32 ,
76- "BLOCK_SIZE_N" : 64 ,
76+ "BLOCK_SIZE_N" : 128 ,
7777 "BLOCK_SIZE_K" : 128 ,
78- "GROUP_SIZE_M" : 1 ,
79- "num_warps" : 2 ,
78+ "GROUP_SIZE_M" : 4 ,
79+ "num_warps" : 4 ,
8080 "num_stages" : 0 ,
8181 "waves_per_eu" : 0
8282 },
8383 "96" : {
8484 "BLOCK_SIZE_M" : 32 ,
85- "BLOCK_SIZE_N" : 64 ,
85+ "BLOCK_SIZE_N" : 128 ,
8686 "BLOCK_SIZE_K" : 128 ,
8787 "GROUP_SIZE_M" : 1 ,
88- "num_warps" : 2 ,
88+ "num_warps" : 4 ,
8989 "num_stages" : 0 ,
9090 "waves_per_eu" : 0
9191 },
9292 "128" : {
9393 "BLOCK_SIZE_M" : 64 ,
94- "BLOCK_SIZE_N" : 64 ,
94+ "BLOCK_SIZE_N" : 128 ,
9595 "BLOCK_SIZE_K" : 128 ,
9696 "GROUP_SIZE_M" : 4 ,
97- "num_warps" : 4 ,
97+ "num_warps" : 8 ,
9898 "num_stages" : 0 ,
9999 "waves_per_eu" : 0
100100 },
101101 "256" : {
102- "BLOCK_SIZE_M" : 128 ,
102+ "BLOCK_SIZE_M" : 64 ,
103103 "BLOCK_SIZE_N" : 128 ,
104104 "BLOCK_SIZE_K" : 128 ,
105- "GROUP_SIZE_M" : 4 ,
106- "num_warps" : 2 ,
105+ "GROUP_SIZE_M" : 1 ,
106+ "num_warps" : 8 ,
107107 "num_stages" : 0 ,
108108 "waves_per_eu" : 0
109109 },
110110 "512" : {
111- "BLOCK_SIZE_M" : 128 ,
111+ "BLOCK_SIZE_M" : 64 ,
112112 "BLOCK_SIZE_N" : 128 ,
113113 "BLOCK_SIZE_K" : 128 ,
114114 "GROUP_SIZE_M" : 1 ,
115- "num_warps" : 2 ,
115+ "num_warps" : 8 ,
116116 "num_stages" : 0 ,
117117 "waves_per_eu" : 0
118118 },
119119 "1024" : {
120120 "BLOCK_SIZE_M" : 128 ,
121- "BLOCK_SIZE_N" : 128 ,
121+ "BLOCK_SIZE_N" : 256 ,
122122 "BLOCK_SIZE_K" : 128 ,
123123 "GROUP_SIZE_M" : 1 ,
124124 "num_warps" : 8 ,
125125 "num_stages" : 0 ,
126126 "waves_per_eu" : 0
127127 },
128128 "1536" : {
129- "BLOCK_SIZE_M" : 256 ,
130- "BLOCK_SIZE_N" : 128 ,
129+ "BLOCK_SIZE_M" : 128 ,
130+ "BLOCK_SIZE_N" : 256 ,
131131 "BLOCK_SIZE_K" : 128 ,
132132 "GROUP_SIZE_M" : 1 ,
133133 "num_warps" : 8 ,
144144 "waves_per_eu" : 0
145145 },
146146 "3072" : {
147- "BLOCK_SIZE_M" : 256 ,
148- "BLOCK_SIZE_N" : 128 ,
147+ "BLOCK_SIZE_M" : 128 ,
148+ "BLOCK_SIZE_N" : 256 ,
149149 "BLOCK_SIZE_K" : 128 ,
150150 "GROUP_SIZE_M" : 1 ,
151151 "num_warps" : 8 ,
155155 "4096" : {
156156 "BLOCK_SIZE_M" : 256 ,
157157 "BLOCK_SIZE_N" : 256 ,
158- "BLOCK_SIZE_K" : 128 ,
158+ "BLOCK_SIZE_K" : 64 ,
159159 "GROUP_SIZE_M" : 1 ,
160- "num_warps" : 4 ,
160+ "num_warps" : 8 ,
161161 "num_stages" : 0 ,
162162 "waves_per_eu" : 0
163163 }
You can’t perform that action at this time.
0 commit comments