File tree Expand file tree Collapse file tree 4 files changed +172
-428
lines changed
vllm/model_executor/layers/fused_moe/configs Expand file tree Collapse file tree 4 files changed +172
-428
lines changed Original file line number Diff line number Diff line change 1
1
{
2
2
"1" : {
3
3
"BLOCK_SIZE_M" : 64 ,
4
- "BLOCK_SIZE_N" : 128 ,
5
- "BLOCK_SIZE_K" : 256 ,
6
- "GROUP_SIZE_M" : 64 ,
4
+ "BLOCK_SIZE_N" : 256 ,
5
+ "BLOCK_SIZE_K" : 64 ,
6
+ "GROUP_SIZE_M" : 1 ,
7
7
"num_warps" : 4 ,
8
- "num_stages" : 4
8
+ "num_stages" : 3
9
9
},
10
10
"2" : {
11
- "BLOCK_SIZE_M" : 64 ,
12
- "BLOCK_SIZE_N" : 64 ,
11
+ "BLOCK_SIZE_M" : 32 ,
12
+ "BLOCK_SIZE_N" : 256 ,
13
13
"BLOCK_SIZE_K" : 256 ,
14
- "GROUP_SIZE_M" : 1 ,
14
+ "GROUP_SIZE_M" : 16 ,
15
15
"num_warps" : 4 ,
16
- "num_stages" : 5
16
+ "num_stages" : 4
17
17
},
18
18
"4" : {
19
- "BLOCK_SIZE_M" : 64 ,
20
- "BLOCK_SIZE_N" : 64 ,
21
- "BLOCK_SIZE_K" : 256 ,
22
- "GROUP_SIZE_M" : 1 ,
23
- "num_warps" : 4 ,
24
- "num_stages" : 5
25
- },
26
- "8" : {
27
- "BLOCK_SIZE_M" : 64 ,
19
+ "BLOCK_SIZE_M" : 32 ,
28
20
"BLOCK_SIZE_N" : 256 ,
29
- "BLOCK_SIZE_K" : 128 ,
30
- "GROUP_SIZE_M" : 32 ,
21
+ "BLOCK_SIZE_K" : 64 ,
22
+ "GROUP_SIZE_M" : 16 ,
31
23
"num_warps" : 4 ,
32
24
"num_stages" : 4
33
25
},
34
- "16 " : {
35
- "BLOCK_SIZE_M" : 64 ,
26
+ "8 " : {
27
+ "BLOCK_SIZE_M" : 128 ,
36
28
"BLOCK_SIZE_N" : 128 ,
37
- "BLOCK_SIZE_K" : 128 ,
38
- "GROUP_SIZE_M" : 32 ,
29
+ "BLOCK_SIZE_K" : 256 ,
30
+ "GROUP_SIZE_M" : 8 ,
39
31
"num_warps" : 4 ,
40
- "num_stages" : 5
32
+ "num_stages" : 3
41
33
},
42
- "24 " : {
43
- "BLOCK_SIZE_M" : 64 ,
34
+ "16 " : {
35
+ "BLOCK_SIZE_M" : 128 ,
44
36
"BLOCK_SIZE_N" : 64 ,
45
- "BLOCK_SIZE_K" : 256 ,
46
- "GROUP_SIZE_M" : 1 ,
47
- "num_warps" : 4 ,
37
+ "BLOCK_SIZE_K" : 64 ,
38
+ "GROUP_SIZE_M" : 8 ,
39
+ "num_warps" : 8 ,
48
40
"num_stages" : 3
49
41
},
50
42
"32" : {
51
43
"BLOCK_SIZE_M" : 64 ,
52
- "BLOCK_SIZE_N" : 128 ,
53
- "BLOCK_SIZE_K" : 256 ,
54
- "GROUP_SIZE_M" : 1 ,
55
- "num_warps" : 4 ,
56
- "num_stages" : 4
57
- },
58
- "48" : {
59
- "BLOCK_SIZE_M" : 64 ,
60
- "BLOCK_SIZE_N" : 128 ,
44
+ "BLOCK_SIZE_N" : 256 ,
61
45
"BLOCK_SIZE_K" : 256 ,
62
46
"GROUP_SIZE_M" : 1 ,
63
- "num_warps" : 4 ,
64
- "num_stages" : 4
47
+ "num_warps" : 8 ,
48
+ "num_stages" : 3
65
49
},
66
50
"64" : {
67
- "BLOCK_SIZE_M" : 64 ,
68
- "BLOCK_SIZE_N" : 128 ,
69
- "BLOCK_SIZE_K" : 256 ,
70
- "GROUP_SIZE_M" : 1 ,
71
- "num_warps" : 4 ,
72
- "num_stages" : 4
73
- },
74
- "96" : {
75
- "BLOCK_SIZE_M" : 64 ,
51
+ "BLOCK_SIZE_M" : 128 ,
76
52
"BLOCK_SIZE_N" : 128 ,
77
53
"BLOCK_SIZE_K" : 256 ,
78
- "GROUP_SIZE_M" : 1 ,
79
- "num_warps" : 4 ,
54
+ "GROUP_SIZE_M" : 16 ,
55
+ "num_warps" : 8 ,
80
56
"num_stages" : 4
81
57
},
82
58
"128" : {
83
- "BLOCK_SIZE_M" : 64 ,
84
- "BLOCK_SIZE_N" : 128 ,
85
- "BLOCK_SIZE_K" : 256 ,
86
- "GROUP_SIZE_M" : 1 ,
87
- "num_warps" : 4 ,
88
- "num_stages" : 4
89
- },
90
- "256" : {
91
- "BLOCK_SIZE_M" : 64 ,
59
+ "BLOCK_SIZE_M" : 16 ,
92
60
"BLOCK_SIZE_N" : 128 ,
93
61
"BLOCK_SIZE_K" : 128 ,
94
- "GROUP_SIZE_M" : 64 ,
95
- "num_warps" : 4 ,
96
- "num_stages" : 3
97
- },
98
- "512" : {
99
- "BLOCK_SIZE_M" : 128 ,
100
- "BLOCK_SIZE_N" : 256 ,
101
- "BLOCK_SIZE_K" : 128 ,
102
- "GROUP_SIZE_M" : 64 ,
103
- "num_warps" : 8 ,
104
- "num_stages" : 4
105
- },
106
- "1024" : {
107
- "BLOCK_SIZE_M" : 128 ,
108
- "BLOCK_SIZE_N" : 256 ,
109
- "BLOCK_SIZE_K" : 128 ,
110
- "GROUP_SIZE_M" : 32 ,
111
- "num_warps" : 8 ,
112
- "num_stages" : 4
113
- },
114
- "1536" : {
115
- "BLOCK_SIZE_M" : 128 ,
116
- "BLOCK_SIZE_N" : 256 ,
117
- "BLOCK_SIZE_K" : 128 ,
118
- "GROUP_SIZE_M" : 64 ,
62
+ "GROUP_SIZE_M" : 16 ,
119
63
"num_warps" : 8 ,
120
- "num_stages" : 4
64
+ "num_stages" : 2
121
65
},
122
- "2048" : {
123
- "BLOCK_SIZE_M" : 128 ,
124
- "BLOCK_SIZE_N" : 256 ,
125
- "BLOCK_SIZE_K" : 128 ,
126
- "GROUP_SIZE_M" : 64 ,
127
- "num_warps" : 8 ,
128
- "num_stages" : 4
129
- },
130
- "3072" : {
131
- "BLOCK_SIZE_M" : 128 ,
132
- "BLOCK_SIZE_N" : 256 ,
66
+ "256" : {
67
+ "BLOCK_SIZE_M" : 32 ,
68
+ "BLOCK_SIZE_N" : 64 ,
133
69
"BLOCK_SIZE_K" : 128 ,
134
- "GROUP_SIZE_M" : 32 ,
70
+ "GROUP_SIZE_M" : 8 ,
135
71
"num_warps" : 8 ,
136
- "num_stages" : 4
72
+ "num_stages" : 2
137
73
},
138
- "4096 " : {
74
+ "512 " : {
139
75
"BLOCK_SIZE_M" : 128 ,
140
- "BLOCK_SIZE_N" : 256 ,
141
- "BLOCK_SIZE_K" : 128 ,
76
+ "BLOCK_SIZE_N" : 64 ,
77
+ "BLOCK_SIZE_K" : 64 ,
142
78
"GROUP_SIZE_M" : 16 ,
143
- "num_warps" : 8 ,
144
- "num_stages" : 4
79
+ "num_warps" : 4 ,
80
+ "num_stages" : 2
145
81
}
146
82
}
Original file line number Diff line number Diff line change 1
1
{
2
2
"1" : {
3
3
"BLOCK_SIZE_M" : 64 ,
4
- "BLOCK_SIZE_N" : 128 ,
5
- "BLOCK_SIZE_K" : 256 ,
6
- "GROUP_SIZE_M" : 64 ,
7
- "num_warps" : 4 ,
8
- "num_stages" : 4
4
+ "BLOCK_SIZE_N" : 256 ,
5
+ "BLOCK_SIZE_K" : 64 ,
6
+ "GROUP_SIZE_M" : 1 ,
7
+ "num_warps" : 8 ,
8
+ "num_stages" : 3
9
9
},
10
10
"2" : {
11
- "BLOCK_SIZE_M" : 64 ,
11
+ "BLOCK_SIZE_M" : 16 ,
12
12
"BLOCK_SIZE_N" : 64 ,
13
- "BLOCK_SIZE_K" : 256 ,
14
- "GROUP_SIZE_M" : 1 ,
15
- "num_warps" : 4 ,
16
- "num_stages" : 5
13
+ "BLOCK_SIZE_K" : 64 ,
14
+ "GROUP_SIZE_M" : 16 ,
15
+ "num_warps" : 8 ,
16
+ "num_stages" : 3
17
17
},
18
18
"4" : {
19
- "BLOCK_SIZE_M" : 64 ,
19
+ "BLOCK_SIZE_M" : 32 ,
20
20
"BLOCK_SIZE_N" : 64 ,
21
- "BLOCK_SIZE_K" : 256 ,
22
- "GROUP_SIZE_M" : 1 ,
23
- "num_warps" : 4 ,
24
- "num_stages" : 5
21
+ "BLOCK_SIZE_K" : 64 ,
22
+ "GROUP_SIZE_M" : 16 ,
23
+ "num_warps" : 8 ,
24
+ "num_stages" : 4
25
25
},
26
26
"8" : {
27
- "BLOCK_SIZE_M" : 64 ,
27
+ "BLOCK_SIZE_M" : 16 ,
28
28
"BLOCK_SIZE_N" : 256 ,
29
29
"BLOCK_SIZE_K" : 128 ,
30
- "GROUP_SIZE_M" : 32 ,
31
- "num_warps" : 4 ,
32
- "num_stages" : 4
33
- },
34
- "16" : {
35
- "BLOCK_SIZE_M" : 64 ,
36
- "BLOCK_SIZE_N" : 128 ,
37
- "BLOCK_SIZE_K" : 128 ,
38
- "GROUP_SIZE_M" : 32 ,
39
- "num_warps" : 4 ,
40
- "num_stages" : 5
41
- },
42
- "24" : {
43
- "BLOCK_SIZE_M" : 64 ,
44
- "BLOCK_SIZE_N" : 64 ,
45
- "BLOCK_SIZE_K" : 256 ,
46
- "GROUP_SIZE_M" : 1 ,
47
- "num_warps" : 4 ,
48
- "num_stages" : 3
49
- },
50
- "32" : {
51
- "BLOCK_SIZE_M" : 64 ,
52
- "BLOCK_SIZE_N" : 128 ,
53
- "BLOCK_SIZE_K" : 256 ,
54
- "GROUP_SIZE_M" : 1 ,
55
- "num_warps" : 4 ,
56
- "num_stages" : 4
57
- },
58
- "48" : {
59
- "BLOCK_SIZE_M" : 64 ,
60
- "BLOCK_SIZE_N" : 128 ,
61
- "BLOCK_SIZE_K" : 256 ,
62
- "GROUP_SIZE_M" : 1 ,
63
- "num_warps" : 4 ,
64
- "num_stages" : 4
65
- },
66
- "64" : {
67
- "BLOCK_SIZE_M" : 64 ,
68
- "BLOCK_SIZE_N" : 128 ,
69
- "BLOCK_SIZE_K" : 256 ,
70
- "GROUP_SIZE_M" : 1 ,
71
- "num_warps" : 4 ,
72
- "num_stages" : 4
73
- },
74
- "96" : {
75
- "BLOCK_SIZE_M" : 64 ,
76
- "BLOCK_SIZE_N" : 128 ,
77
- "BLOCK_SIZE_K" : 256 ,
78
- "GROUP_SIZE_M" : 1 ,
79
- "num_warps" : 4 ,
80
- "num_stages" : 4
81
- },
82
- "128" : {
83
- "BLOCK_SIZE_M" : 64 ,
84
- "BLOCK_SIZE_N" : 128 ,
85
- "BLOCK_SIZE_K" : 256 ,
86
30
"GROUP_SIZE_M" : 1 ,
87
- "num_warps" : 4 ,
31
+ "num_warps" : 8 ,
88
32
"num_stages" : 4
89
33
},
90
- "256 " : {
91
- "BLOCK_SIZE_M" : 64 ,
34
+ "16 " : {
35
+ "BLOCK_SIZE_M" : 16 ,
92
36
"BLOCK_SIZE_N" : 128 ,
93
37
"BLOCK_SIZE_K" : 128 ,
94
- "GROUP_SIZE_M" : 64 ,
95
- "num_warps" : 4 ,
96
- "num_stages" : 3
97
- },
98
- "512" : {
99
- "BLOCK_SIZE_M" : 128 ,
100
- "BLOCK_SIZE_N" : 256 ,
101
- "BLOCK_SIZE_K" : 128 ,
102
- "GROUP_SIZE_M" : 64 ,
38
+ "GROUP_SIZE_M" : 8 ,
103
39
"num_warps" : 8 ,
104
- "num_stages" : 4
40
+ "num_stages" : 2
105
41
},
106
- "1024 " : {
107
- "BLOCK_SIZE_M" : 128 ,
108
- "BLOCK_SIZE_N" : 256 ,
42
+ "32 " : {
43
+ "BLOCK_SIZE_M" : 16 ,
44
+ "BLOCK_SIZE_N" : 64 ,
109
45
"BLOCK_SIZE_K" : 128 ,
110
- "GROUP_SIZE_M" : 32 ,
46
+ "GROUP_SIZE_M" : 16 ,
111
47
"num_warps" : 8 ,
112
- "num_stages" : 4
48
+ "num_stages" : 2
113
49
},
114
- "1536 " : {
115
- "BLOCK_SIZE_M" : 128 ,
50
+ "64 " : {
51
+ "BLOCK_SIZE_M" : 32 ,
116
52
"BLOCK_SIZE_N" : 256 ,
117
53
"BLOCK_SIZE_K" : 128 ,
118
- "GROUP_SIZE_M" : 64 ,
54
+ "GROUP_SIZE_M" : 16 ,
119
55
"num_warps" : 8 ,
120
56
"num_stages" : 4
121
57
},
122
- "2048 " : {
123
- "BLOCK_SIZE_M" : 128 ,
58
+ "128 " : {
59
+ "BLOCK_SIZE_M" : 16 ,
124
60
"BLOCK_SIZE_N" : 256 ,
125
- "BLOCK_SIZE_K" : 128 ,
126
- "GROUP_SIZE_M" : 64 ,
61
+ "BLOCK_SIZE_K" : 64 ,
62
+ "GROUP_SIZE_M" : 8 ,
127
63
"num_warps" : 8 ,
128
- "num_stages" : 4
64
+ "num_stages" : 3
129
65
},
130
- "3072 " : {
131
- "BLOCK_SIZE_M" : 128 ,
132
- "BLOCK_SIZE_N" : 256 ,
66
+ "256 " : {
67
+ "BLOCK_SIZE_M" : 16 ,
68
+ "BLOCK_SIZE_N" : 64 ,
133
69
"BLOCK_SIZE_K" : 128 ,
134
- "GROUP_SIZE_M" : 32 ,
135
- "num_warps" : 8 ,
136
- "num_stages" : 4
70
+ "GROUP_SIZE_M" : 8 ,
71
+ "num_warps" : 4 ,
72
+ "num_stages" : 2
137
73
},
138
- "4096 " : {
139
- "BLOCK_SIZE_M" : 128 ,
74
+ "512 " : {
75
+ "BLOCK_SIZE_M" : 32 ,
140
76
"BLOCK_SIZE_N" : 256 ,
141
- "BLOCK_SIZE_K" : 128 ,
77
+ "BLOCK_SIZE_K" : 64 ,
142
78
"GROUP_SIZE_M" : 16 ,
143
79
"num_warps" : 8 ,
144
- "num_stages" : 4
80
+ "num_stages" : 2
145
81
}
146
82
}
You can’t perform that action at this time.
0 commit comments