Skip to content

Commit b1e0620

Browse files
authored
Merge pull request #6 from EmbeddedLLM/add_deepseekv3_tuned_config
format code; add deepseekv3 tuning config
2 parents c5b0cf9 + 68825ee commit b1e0620

File tree

26 files changed

+4115
-26
lines changed

26 files changed

+4115
-26
lines changed
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
{
2+
"1": {
3+
"BLOCK_SIZE_M": 16,
4+
"BLOCK_SIZE_N": 64,
5+
"BLOCK_SIZE_K": 256,
6+
"GROUP_SIZE_M": 1,
7+
"num_warps": 4,
8+
"num_stages": 2,
9+
"waves_per_eu": 0
10+
},
11+
"2": {
12+
"BLOCK_SIZE_M": 32,
13+
"BLOCK_SIZE_N": 128,
14+
"BLOCK_SIZE_K": 128,
15+
"GROUP_SIZE_M": 1,
16+
"num_warps": 4,
17+
"num_stages": 2,
18+
"waves_per_eu": 0
19+
},
20+
"4": {
21+
"BLOCK_SIZE_M": 16,
22+
"BLOCK_SIZE_N": 64,
23+
"BLOCK_SIZE_K": 128,
24+
"GROUP_SIZE_M": 1,
25+
"num_warps": 2,
26+
"num_stages": 2,
27+
"waves_per_eu": 0
28+
},
29+
"8": {
30+
"BLOCK_SIZE_M": 16,
31+
"BLOCK_SIZE_N": 64,
32+
"BLOCK_SIZE_K": 128,
33+
"GROUP_SIZE_M": 1,
34+
"num_warps": 2,
35+
"num_stages": 2,
36+
"waves_per_eu": 0
37+
},
38+
"16": {
39+
"BLOCK_SIZE_M": 16,
40+
"BLOCK_SIZE_N": 64,
41+
"BLOCK_SIZE_K": 128,
42+
"GROUP_SIZE_M": 1,
43+
"num_warps": 4,
44+
"num_stages": 2,
45+
"waves_per_eu": 0
46+
},
47+
"24": {
48+
"BLOCK_SIZE_M": 16,
49+
"BLOCK_SIZE_N": 64,
50+
"BLOCK_SIZE_K": 128,
51+
"GROUP_SIZE_M": 1,
52+
"num_warps": 4,
53+
"num_stages": 2,
54+
"waves_per_eu": 0
55+
},
56+
"32": {
57+
"BLOCK_SIZE_M": 16,
58+
"BLOCK_SIZE_N": 64,
59+
"BLOCK_SIZE_K": 128,
60+
"GROUP_SIZE_M": 1,
61+
"num_warps": 4,
62+
"num_stages": 2,
63+
"waves_per_eu": 0
64+
},
65+
"48": {
66+
"BLOCK_SIZE_M": 16,
67+
"BLOCK_SIZE_N": 64,
68+
"BLOCK_SIZE_K": 128,
69+
"GROUP_SIZE_M": 1,
70+
"num_warps": 4,
71+
"num_stages": 2,
72+
"waves_per_eu": 0
73+
},
74+
"64": {
75+
"BLOCK_SIZE_M": 16,
76+
"BLOCK_SIZE_N": 64,
77+
"BLOCK_SIZE_K": 128,
78+
"GROUP_SIZE_M": 1,
79+
"num_warps": 4,
80+
"num_stages": 2,
81+
"waves_per_eu": 0
82+
},
83+
"96": {
84+
"BLOCK_SIZE_M": 16,
85+
"BLOCK_SIZE_N": 64,
86+
"BLOCK_SIZE_K": 128,
87+
"GROUP_SIZE_M": 1,
88+
"num_warps": 4,
89+
"num_stages": 2,
90+
"waves_per_eu": 0
91+
},
92+
"128": {
93+
"BLOCK_SIZE_M": 16,
94+
"BLOCK_SIZE_N": 64,
95+
"BLOCK_SIZE_K": 128,
96+
"GROUP_SIZE_M": 1,
97+
"num_warps": 4,
98+
"num_stages": 2,
99+
"waves_per_eu": 0
100+
},
101+
"256": {
102+
"BLOCK_SIZE_M": 16,
103+
"BLOCK_SIZE_N": 64,
104+
"BLOCK_SIZE_K": 128,
105+
"GROUP_SIZE_M": 1,
106+
"num_warps": 4,
107+
"num_stages": 2,
108+
"waves_per_eu": 0
109+
},
110+
"512": {
111+
"BLOCK_SIZE_M": 32,
112+
"BLOCK_SIZE_N": 128,
113+
"BLOCK_SIZE_K": 128,
114+
"GROUP_SIZE_M": 1,
115+
"num_warps": 4,
116+
"num_stages": 2,
117+
"waves_per_eu": 0
118+
},
119+
"1024": {
120+
"BLOCK_SIZE_M": 64,
121+
"BLOCK_SIZE_N": 128,
122+
"BLOCK_SIZE_K": 64,
123+
"GROUP_SIZE_M": 4,
124+
"num_warps": 4,
125+
"num_stages": 2,
126+
"waves_per_eu": 0
127+
},
128+
"1536": {
129+
"BLOCK_SIZE_M": 64,
130+
"BLOCK_SIZE_N": 128,
131+
"BLOCK_SIZE_K": 64,
132+
"GROUP_SIZE_M": 4,
133+
"num_warps": 4,
134+
"num_stages": 2,
135+
"waves_per_eu": 0
136+
},
137+
"2048": {
138+
"BLOCK_SIZE_M": 64,
139+
"BLOCK_SIZE_N": 128,
140+
"BLOCK_SIZE_K": 64,
141+
"GROUP_SIZE_M": 1,
142+
"num_warps": 4,
143+
"num_stages": 2,
144+
"waves_per_eu": 0
145+
},
146+
"3072": {
147+
"BLOCK_SIZE_M": 128,
148+
"BLOCK_SIZE_N": 128,
149+
"BLOCK_SIZE_K": 64,
150+
"GROUP_SIZE_M": 4,
151+
"num_warps": 8,
152+
"num_stages": 2,
153+
"waves_per_eu": 0
154+
},
155+
"4096": {
156+
"BLOCK_SIZE_M": 64,
157+
"BLOCK_SIZE_N": 256,
158+
"BLOCK_SIZE_K": 64,
159+
"GROUP_SIZE_M": 1,
160+
"num_warps": 4,
161+
"num_stages": 2,
162+
"waves_per_eu": 0
163+
}
164+
}

vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=MI308X.json

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"1": {
33
"BLOCK_SIZE_M": 16,
4-
"BLOCK_SIZE_N": 64,
4+
"BLOCK_SIZE_N": 32,
55
"BLOCK_SIZE_K": 128,
66
"GROUP_SIZE_M": 1,
77
"num_warps": 2,
@@ -19,11 +19,11 @@
1919
"num_stages": 2,
2020
"waves_per_eu": 0,
2121
"matrix_instr_nonkdim": 16,
22-
"kpack": 2
22+
"kpack": 1
2323
},
2424
"4": {
2525
"BLOCK_SIZE_M": 16,
26-
"BLOCK_SIZE_N": 64,
26+
"BLOCK_SIZE_N": 32,
2727
"BLOCK_SIZE_K": 128,
2828
"GROUP_SIZE_M": 1,
2929
"num_warps": 2,
@@ -34,7 +34,7 @@
3434
},
3535
"8": {
3636
"BLOCK_SIZE_M": 16,
37-
"BLOCK_SIZE_N": 64,
37+
"BLOCK_SIZE_N": 32,
3838
"BLOCK_SIZE_K": 128,
3939
"GROUP_SIZE_M": 1,
4040
"num_warps": 2,
@@ -45,7 +45,7 @@
4545
},
4646
"16": {
4747
"BLOCK_SIZE_M": 16,
48-
"BLOCK_SIZE_N": 64,
48+
"BLOCK_SIZE_N": 32,
4949
"BLOCK_SIZE_K": 128,
5050
"GROUP_SIZE_M": 1,
5151
"num_warps": 2,
@@ -56,7 +56,7 @@
5656
},
5757
"24": {
5858
"BLOCK_SIZE_M": 16,
59-
"BLOCK_SIZE_N": 64,
59+
"BLOCK_SIZE_N": 32,
6060
"BLOCK_SIZE_K": 128,
6161
"GROUP_SIZE_M": 1,
6262
"num_warps": 2,
@@ -78,7 +78,7 @@
7878
},
7979
"48": {
8080
"BLOCK_SIZE_M": 16,
81-
"BLOCK_SIZE_N": 64,
81+
"BLOCK_SIZE_N": 32,
8282
"BLOCK_SIZE_K": 128,
8383
"GROUP_SIZE_M": 1,
8484
"num_warps": 2,
@@ -89,8 +89,8 @@
8989
},
9090
"64": {
9191
"BLOCK_SIZE_M": 32,
92-
"BLOCK_SIZE_N": 32,
93-
"BLOCK_SIZE_K": 256,
92+
"BLOCK_SIZE_N": 64,
93+
"BLOCK_SIZE_K": 128,
9494
"GROUP_SIZE_M": 4,
9595
"num_warps": 4,
9696
"num_stages": 2,
@@ -100,8 +100,8 @@
100100
},
101101
"96": {
102102
"BLOCK_SIZE_M": 32,
103-
"BLOCK_SIZE_N": 32,
104-
"BLOCK_SIZE_K": 256,
103+
"BLOCK_SIZE_N": 64,
104+
"BLOCK_SIZE_K": 128,
105105
"GROUP_SIZE_M": 4,
106106
"num_warps": 4,
107107
"num_stages": 2,
@@ -123,19 +123,8 @@
123123
"256": {
124124
"BLOCK_SIZE_M": 64,
125125
"BLOCK_SIZE_N": 64,
126-
"BLOCK_SIZE_K": 64,
127-
"GROUP_SIZE_M": 4,
128-
"num_warps": 4,
129-
"num_stages": 2,
130-
"waves_per_eu": 0,
131-
"matrix_instr_nonkdim": 32,
132-
"kpack": 2
133-
},
134-
"256": {
135-
"BLOCK_SIZE_M": 128,
136-
"BLOCK_SIZE_N": 128,
137-
"BLOCK_SIZE_K": 64,
138-
"GROUP_SIZE_M": 4,
126+
"BLOCK_SIZE_K": 128,
127+
"GROUP_SIZE_M": 1,
139128
"num_warps": 8,
140129
"num_stages": 2,
141130
"waves_per_eu": 0,
@@ -151,13 +140,13 @@
151140
"num_stages": 2,
152141
"waves_per_eu": 0,
153142
"matrix_instr_nonkdim": 16,
154-
"kpack": 2
143+
"kpack": 1
155144
},
156145
"1024": {
157146
"BLOCK_SIZE_M": 128,
158147
"BLOCK_SIZE_N": 128,
159148
"BLOCK_SIZE_K": 64,
160-
"GROUP_SIZE_M": 1,
149+
"GROUP_SIZE_M": 4,
161150
"num_warps": 8,
162151
"num_stages": 2,
163152
"waves_per_eu": 0,

0 commit comments

Comments
 (0)