Skip to content

Commit 6688429

Browse files
committed
add deepseek-r1 tp8 configs
1 parent e7f58a5 commit 6688429

6 files changed

+636
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
{
2+
"1024": {
3+
"BLOCK_SIZE_K": 128,
4+
"BLOCK_SIZE_M": 16,
5+
"BLOCK_SIZE_N": 128,
6+
"GROUP_SIZE_M": 1,
7+
"NUM_STAGE": 2,
8+
"NUM_WARPS": 4
9+
},
10+
"128": {
11+
"BLOCK_SIZE_K": 32,
12+
"BLOCK_SIZE_M": 16,
13+
"BLOCK_SIZE_N": 128,
14+
"GROUP_SIZE_M": 1,
15+
"NUM_STAGE": 3,
16+
"NUM_WARPS": 2
17+
},
18+
"131072": {
19+
"BLOCK_SIZE_K": 128,
20+
"BLOCK_SIZE_M": 128,
21+
"BLOCK_SIZE_N": 64,
22+
"GROUP_SIZE_M": 8,
23+
"NUM_STAGE": 2,
24+
"NUM_WARPS": 4
25+
},
26+
"16": {
27+
"BLOCK_SIZE_K": 32,
28+
"BLOCK_SIZE_M": 16,
29+
"BLOCK_SIZE_N": 128,
30+
"GROUP_SIZE_M": 1,
31+
"NUM_STAGE": 3,
32+
"NUM_WARPS": 2
33+
},
34+
"16384": {
35+
"BLOCK_SIZE_K": 128,
36+
"BLOCK_SIZE_M": 128,
37+
"BLOCK_SIZE_N": 64,
38+
"GROUP_SIZE_M": 2,
39+
"NUM_STAGE": 2,
40+
"NUM_WARPS": 4
41+
},
42+
"2048": {
43+
"BLOCK_SIZE_K": 128,
44+
"BLOCK_SIZE_M": 16,
45+
"BLOCK_SIZE_N": 128,
46+
"GROUP_SIZE_M": 1,
47+
"NUM_STAGE": 2,
48+
"NUM_WARPS": 4
49+
},
50+
"256": {
51+
"BLOCK_SIZE_K": 64,
52+
"BLOCK_SIZE_M": 16,
53+
"BLOCK_SIZE_N": 128,
54+
"GROUP_SIZE_M": 8,
55+
"NUM_STAGE": 3,
56+
"NUM_WARPS": 2
57+
},
58+
"32": {
59+
"BLOCK_SIZE_K": 32,
60+
"BLOCK_SIZE_M": 16,
61+
"BLOCK_SIZE_N": 128,
62+
"GROUP_SIZE_M": 1,
63+
"NUM_STAGE": 3,
64+
"NUM_WARPS": 2
65+
},
66+
"32768": {
67+
"BLOCK_SIZE_K": 128,
68+
"BLOCK_SIZE_M": 128,
69+
"BLOCK_SIZE_N": 64,
70+
"GROUP_SIZE_M": 2,
71+
"NUM_STAGE": 2,
72+
"NUM_WARPS": 4
73+
},
74+
"4096": {
75+
"BLOCK_SIZE_K": 128,
76+
"BLOCK_SIZE_M": 64,
77+
"BLOCK_SIZE_N": 128,
78+
"GROUP_SIZE_M": 2,
79+
"NUM_STAGE": 3,
80+
"NUM_WARPS": 4
81+
},
82+
"512": {
83+
"BLOCK_SIZE_K": 128,
84+
"BLOCK_SIZE_M": 16,
85+
"BLOCK_SIZE_N": 128,
86+
"GROUP_SIZE_M": 8,
87+
"NUM_STAGE": 2,
88+
"NUM_WARPS": 4
89+
},
90+
"64": {
91+
"BLOCK_SIZE_K": 32,
92+
"BLOCK_SIZE_M": 16,
93+
"BLOCK_SIZE_N": 128,
94+
"GROUP_SIZE_M": 1,
95+
"NUM_STAGE": 3,
96+
"NUM_WARPS": 2
97+
},
98+
"65536": {
99+
"BLOCK_SIZE_K": 128,
100+
"BLOCK_SIZE_M": 128,
101+
"BLOCK_SIZE_N": 64,
102+
"GROUP_SIZE_M": 2,
103+
"NUM_STAGE": 2,
104+
"NUM_WARPS": 4
105+
},
106+
"8": {
107+
"BLOCK_SIZE_K": 32,
108+
"BLOCK_SIZE_M": 16,
109+
"BLOCK_SIZE_N": 128,
110+
"GROUP_SIZE_M": 8,
111+
"NUM_STAGE": 3,
112+
"NUM_WARPS": 2
113+
},
114+
"8192": {
115+
"BLOCK_SIZE_K": 128,
116+
"BLOCK_SIZE_M": 128,
117+
"BLOCK_SIZE_N": 64,
118+
"GROUP_SIZE_M": 2,
119+
"NUM_STAGE": 2,
120+
"NUM_WARPS": 4
121+
}
122+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
{
2+
"1024": {
3+
"BLOCK_SIZE_K": 128,
4+
"BLOCK_SIZE_M": 16,
5+
"BLOCK_SIZE_N": 64,
6+
"GROUP_SIZE_M": 2,
7+
"NUM_STAGE": 5,
8+
"NUM_WARPS": 4
9+
},
10+
"128": {
11+
"BLOCK_SIZE_K": 128,
12+
"BLOCK_SIZE_M": 16,
13+
"BLOCK_SIZE_N": 64,
14+
"GROUP_SIZE_M": 2,
15+
"NUM_STAGE": 5,
16+
"NUM_WARPS": 4
17+
},
18+
"131072": {
19+
"BLOCK_SIZE_K": 128,
20+
"BLOCK_SIZE_M": 128,
21+
"BLOCK_SIZE_N": 64,
22+
"GROUP_SIZE_M": 4,
23+
"NUM_STAGE": 4,
24+
"NUM_WARPS": 4
25+
},
26+
"16": {
27+
"BLOCK_SIZE_K": 128,
28+
"BLOCK_SIZE_M": 16,
29+
"BLOCK_SIZE_N": 64,
30+
"GROUP_SIZE_M": 8,
31+
"NUM_STAGE": 4,
32+
"NUM_WARPS": 4
33+
},
34+
"16384": {
35+
"BLOCK_SIZE_K": 128,
36+
"BLOCK_SIZE_M": 128,
37+
"BLOCK_SIZE_N": 64,
38+
"GROUP_SIZE_M": 1,
39+
"NUM_STAGE": 3,
40+
"NUM_WARPS": 4
41+
},
42+
"2048": {
43+
"BLOCK_SIZE_K": 128,
44+
"BLOCK_SIZE_M": 32,
45+
"BLOCK_SIZE_N": 128,
46+
"GROUP_SIZE_M": 4,
47+
"NUM_STAGE": 4,
48+
"NUM_WARPS": 4
49+
},
50+
"256": {
51+
"BLOCK_SIZE_K": 128,
52+
"BLOCK_SIZE_M": 32,
53+
"BLOCK_SIZE_N": 64,
54+
"GROUP_SIZE_M": 2,
55+
"NUM_STAGE": 4,
56+
"NUM_WARPS": 4
57+
},
58+
"32": {
59+
"BLOCK_SIZE_K": 128,
60+
"BLOCK_SIZE_M": 16,
61+
"BLOCK_SIZE_N": 64,
62+
"GROUP_SIZE_M": 1,
63+
"NUM_STAGE": 5,
64+
"NUM_WARPS": 4
65+
},
66+
"32768": {
67+
"BLOCK_SIZE_K": 128,
68+
"BLOCK_SIZE_M": 128,
69+
"BLOCK_SIZE_N": 64,
70+
"GROUP_SIZE_M": 1,
71+
"NUM_STAGE": 3,
72+
"NUM_WARPS": 4
73+
},
74+
"4096": {
75+
"BLOCK_SIZE_K": 128,
76+
"BLOCK_SIZE_M": 128,
77+
"BLOCK_SIZE_N": 64,
78+
"GROUP_SIZE_M": 2,
79+
"NUM_STAGE": 3,
80+
"NUM_WARPS": 4
81+
},
82+
"512": {
83+
"BLOCK_SIZE_K": 128,
84+
"BLOCK_SIZE_M": 32,
85+
"BLOCK_SIZE_N": 128,
86+
"GROUP_SIZE_M": 2,
87+
"NUM_STAGE": 5,
88+
"NUM_WARPS": 4
89+
},
90+
"64": {
91+
"BLOCK_SIZE_K": 128,
92+
"BLOCK_SIZE_M": 16,
93+
"BLOCK_SIZE_N": 64,
94+
"GROUP_SIZE_M": 1,
95+
"NUM_STAGE": 5,
96+
"NUM_WARPS": 4
97+
},
98+
"65536": {
99+
"BLOCK_SIZE_K": 128,
100+
"BLOCK_SIZE_M": 128,
101+
"BLOCK_SIZE_N": 64,
102+
"GROUP_SIZE_M": 4,
103+
"NUM_STAGE": 4,
104+
"NUM_WARPS": 4
105+
},
106+
"8": {
107+
"BLOCK_SIZE_K": 128,
108+
"BLOCK_SIZE_M": 16,
109+
"BLOCK_SIZE_N": 64,
110+
"GROUP_SIZE_M": 2,
111+
"NUM_STAGE": 4,
112+
"NUM_WARPS": 4
113+
},
114+
"8192": {
115+
"BLOCK_SIZE_K": 128,
116+
"BLOCK_SIZE_M": 128,
117+
"BLOCK_SIZE_N": 64,
118+
"GROUP_SIZE_M": 2,
119+
"NUM_STAGE": 4,
120+
"NUM_WARPS": 4
121+
}
122+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
{
2+
"1": {
3+
"BLOCK_DIM": 512,
4+
"BLOCK_M": 16,
5+
"NUM_STAGE": 1,
6+
"num_warps": 8
7+
},
8+
"1024": {
9+
"BLOCK_DIM": 256,
10+
"BLOCK_M": 8,
11+
"NUM_STAGE": 1,
12+
"num_warps": 1
13+
},
14+
"128": {
15+
"BLOCK_DIM": 1024,
16+
"BLOCK_M": 1,
17+
"NUM_STAGE": 4,
18+
"num_warps": 2
19+
},
20+
"16": {
21+
"BLOCK_DIM": 512,
22+
"BLOCK_M": 1,
23+
"NUM_STAGE": 1,
24+
"num_warps": 8
25+
},
26+
"16384": {
27+
"BLOCK_DIM": 1024,
28+
"BLOCK_M": 1,
29+
"NUM_STAGE": 4,
30+
"num_warps": 4
31+
},
32+
"2": {
33+
"BLOCK_DIM": 256,
34+
"BLOCK_M": 1,
35+
"NUM_STAGE": 4,
36+
"num_warps": 8
37+
},
38+
"2048": {
39+
"BLOCK_DIM": 256,
40+
"BLOCK_M": 1,
41+
"NUM_STAGE": 1,
42+
"num_warps": 1
43+
},
44+
"256": {
45+
"BLOCK_DIM": 1024,
46+
"BLOCK_M": 1,
47+
"NUM_STAGE": 4,
48+
"num_warps": 2
49+
},
50+
"32": {
51+
"BLOCK_DIM": 512,
52+
"BLOCK_M": 1,
53+
"NUM_STAGE": 1,
54+
"num_warps": 8
55+
},
56+
"4": {
57+
"BLOCK_DIM": 128,
58+
"BLOCK_M": 1,
59+
"NUM_STAGE": 2,
60+
"num_warps": 4
61+
},
62+
"4096": {
63+
"BLOCK_DIM": 256,
64+
"BLOCK_M": 1,
65+
"NUM_STAGE": 1,
66+
"num_warps": 1
67+
},
68+
"512": {
69+
"BLOCK_DIM": 256,
70+
"BLOCK_M": 4,
71+
"NUM_STAGE": 1,
72+
"num_warps": 1
73+
},
74+
"64": {
75+
"BLOCK_DIM": 512,
76+
"BLOCK_M": 1,
77+
"NUM_STAGE": 1,
78+
"num_warps": 8
79+
},
80+
"8": {
81+
"BLOCK_DIM": 512,
82+
"BLOCK_M": 1,
83+
"NUM_STAGE": 1,
84+
"num_warps": 8
85+
},
86+
"8192": {
87+
"BLOCK_DIM": 1024,
88+
"BLOCK_M": 1,
89+
"NUM_STAGE": 4,
90+
"num_warps": 4
91+
}
92+
}

0 commit comments

Comments
 (0)