Skip to content

Commit bdb01a3

Browse files
[Hardware][AMD][Model] Triton MoE tuning configs for GLM-4.6 for MI300X (#27323)
Signed-off-by: minatoaquaMK2 <[email protected]>
1 parent 5b3c35a commit bdb01a3

File tree

1 file changed

+201
-0
lines changed

1 file changed

+201
-0
lines changed
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
{
2+
"triton_version": "3.4.0",
3+
"1": {
4+
"BLOCK_SIZE_M": 16,
5+
"BLOCK_SIZE_N": 16,
6+
"BLOCK_SIZE_K": 256,
7+
"GROUP_SIZE_M": 1,
8+
"num_warps": 4,
9+
"num_stages": 2,
10+
"waves_per_eu": 0,
11+
"matrix_instr_nonkdim": 16,
12+
"kpack": 2
13+
},
14+
"2": {
15+
"BLOCK_SIZE_M": 16,
16+
"BLOCK_SIZE_N": 64,
17+
"BLOCK_SIZE_K": 128,
18+
"GROUP_SIZE_M": 1,
19+
"num_warps": 4,
20+
"num_stages": 2,
21+
"waves_per_eu": 0,
22+
"matrix_instr_nonkdim": 16,
23+
"kpack": 2
24+
},
25+
"4": {
26+
"BLOCK_SIZE_M": 16,
27+
"BLOCK_SIZE_N": 64,
28+
"BLOCK_SIZE_K": 128,
29+
"GROUP_SIZE_M": 1,
30+
"num_warps": 4,
31+
"num_stages": 2,
32+
"waves_per_eu": 0,
33+
"matrix_instr_nonkdim": 16,
34+
"kpack": 2
35+
},
36+
"8": {
37+
"BLOCK_SIZE_M": 16,
38+
"BLOCK_SIZE_N": 64,
39+
"BLOCK_SIZE_K": 128,
40+
"GROUP_SIZE_M": 1,
41+
"num_warps": 4,
42+
"num_stages": 2,
43+
"waves_per_eu": 0,
44+
"matrix_instr_nonkdim": 16,
45+
"kpack": 1
46+
},
47+
"16": {
48+
"BLOCK_SIZE_M": 16,
49+
"BLOCK_SIZE_N": 64,
50+
"BLOCK_SIZE_K": 64,
51+
"GROUP_SIZE_M": 4,
52+
"num_warps": 4,
53+
"num_stages": 2,
54+
"waves_per_eu": 0,
55+
"matrix_instr_nonkdim": 16,
56+
"kpack": 2
57+
},
58+
"24": {
59+
"BLOCK_SIZE_M": 16,
60+
"BLOCK_SIZE_N": 32,
61+
"BLOCK_SIZE_K": 64,
62+
"GROUP_SIZE_M": 1,
63+
"num_warps": 1,
64+
"num_stages": 2,
65+
"waves_per_eu": 0,
66+
"matrix_instr_nonkdim": 16,
67+
"kpack": 2
68+
},
69+
"32": {
70+
"BLOCK_SIZE_M": 16,
71+
"BLOCK_SIZE_N": 64,
72+
"BLOCK_SIZE_K": 128,
73+
"GROUP_SIZE_M": 4,
74+
"num_warps": 2,
75+
"num_stages": 2,
76+
"waves_per_eu": 0,
77+
"matrix_instr_nonkdim": 16,
78+
"kpack": 2
79+
},
80+
"48": {
81+
"BLOCK_SIZE_M": 16,
82+
"BLOCK_SIZE_N": 64,
83+
"BLOCK_SIZE_K": 64,
84+
"GROUP_SIZE_M": 8,
85+
"num_warps": 1,
86+
"num_stages": 2,
87+
"waves_per_eu": 0,
88+
"matrix_instr_nonkdim": 16,
89+
"kpack": 2
90+
},
91+
"64": {
92+
"BLOCK_SIZE_M": 16,
93+
"BLOCK_SIZE_N": 128,
94+
"BLOCK_SIZE_K": 64,
95+
"GROUP_SIZE_M": 4,
96+
"num_warps": 8,
97+
"num_stages": 2,
98+
"waves_per_eu": 0,
99+
"matrix_instr_nonkdim": 16,
100+
"kpack": 2
101+
},
102+
"96": {
103+
"BLOCK_SIZE_M": 16,
104+
"BLOCK_SIZE_N": 128,
105+
"BLOCK_SIZE_K": 64,
106+
"GROUP_SIZE_M": 32,
107+
"num_warps": 8,
108+
"num_stages": 2,
109+
"waves_per_eu": 0,
110+
"matrix_instr_nonkdim": 16,
111+
"kpack": 1
112+
},
113+
"128": {
114+
"BLOCK_SIZE_M": 16,
115+
"BLOCK_SIZE_N": 128,
116+
"BLOCK_SIZE_K": 64,
117+
"GROUP_SIZE_M": 16,
118+
"num_warps": 8,
119+
"num_stages": 2,
120+
"waves_per_eu": 0,
121+
"matrix_instr_nonkdim": 16,
122+
"kpack": 2
123+
},
124+
"256": {
125+
"BLOCK_SIZE_M": 16,
126+
"BLOCK_SIZE_N": 128,
127+
"BLOCK_SIZE_K": 64,
128+
"GROUP_SIZE_M": 32,
129+
"num_warps": 8,
130+
"num_stages": 2,
131+
"waves_per_eu": 0,
132+
"matrix_instr_nonkdim": 16,
133+
"kpack": 2
134+
},
135+
"512": {
136+
"BLOCK_SIZE_M": 32,
137+
"BLOCK_SIZE_N": 128,
138+
"BLOCK_SIZE_K": 64,
139+
"GROUP_SIZE_M": 32,
140+
"num_warps": 4,
141+
"num_stages": 2,
142+
"waves_per_eu": 0,
143+
"matrix_instr_nonkdim": 16,
144+
"kpack": 1
145+
},
146+
"1024": {
147+
"BLOCK_SIZE_M": 64,
148+
"BLOCK_SIZE_N": 128,
149+
"BLOCK_SIZE_K": 64,
150+
"GROUP_SIZE_M": 4,
151+
"num_warps": 8,
152+
"num_stages": 2,
153+
"waves_per_eu": 0,
154+
"matrix_instr_nonkdim": 16,
155+
"kpack": 2
156+
},
157+
"1536": {
158+
"BLOCK_SIZE_M": 128,
159+
"BLOCK_SIZE_N": 128,
160+
"BLOCK_SIZE_K": 64,
161+
"GROUP_SIZE_M": 16,
162+
"num_warps": 4,
163+
"num_stages": 2,
164+
"waves_per_eu": 0,
165+
"matrix_instr_nonkdim": 16,
166+
"kpack": 2
167+
},
168+
"2048": {
169+
"BLOCK_SIZE_M": 128,
170+
"BLOCK_SIZE_N": 128,
171+
"BLOCK_SIZE_K": 64,
172+
"GROUP_SIZE_M": 4,
173+
"num_warps": 8,
174+
"num_stages": 2,
175+
"waves_per_eu": 0,
176+
"matrix_instr_nonkdim": 16,
177+
"kpack": 1
178+
},
179+
"3072": {
180+
"BLOCK_SIZE_M": 256,
181+
"BLOCK_SIZE_N": 128,
182+
"BLOCK_SIZE_K": 64,
183+
"GROUP_SIZE_M": 32,
184+
"num_warps": 8,
185+
"num_stages": 2,
186+
"waves_per_eu": 0,
187+
"matrix_instr_nonkdim": 16,
188+
"kpack": 2
189+
},
190+
"4096": {
191+
"BLOCK_SIZE_M": 256,
192+
"BLOCK_SIZE_N": 128,
193+
"BLOCK_SIZE_K": 64,
194+
"GROUP_SIZE_M": 16,
195+
"num_warps": 8,
196+
"num_stages": 2,
197+
"waves_per_eu": 0,
198+
"matrix_instr_nonkdim": 16,
199+
"kpack": 2
200+
}
201+
}

0 commit comments

Comments
 (0)