File tree Expand file tree Collapse file tree 6 files changed +636
-0
lines changed
lightllm/common/triton_utils/all_kernel_configs/triton_3.3.1/NVIDIA H200 Expand file tree Collapse file tree 6 files changed +636
-0
lines changed Original file line number Diff line number Diff line change 1+ {
2+ "1024" : {
3+ "BLOCK_SIZE_K" : 128 ,
4+ "BLOCK_SIZE_M" : 16 ,
5+ "BLOCK_SIZE_N" : 128 ,
6+ "GROUP_SIZE_M" : 1 ,
7+ "NUM_STAGE" : 2 ,
8+ "NUM_WARPS" : 4
9+ },
10+ "128" : {
11+ "BLOCK_SIZE_K" : 32 ,
12+ "BLOCK_SIZE_M" : 16 ,
13+ "BLOCK_SIZE_N" : 128 ,
14+ "GROUP_SIZE_M" : 1 ,
15+ "NUM_STAGE" : 3 ,
16+ "NUM_WARPS" : 2
17+ },
18+ "131072" : {
19+ "BLOCK_SIZE_K" : 128 ,
20+ "BLOCK_SIZE_M" : 128 ,
21+ "BLOCK_SIZE_N" : 64 ,
22+ "GROUP_SIZE_M" : 8 ,
23+ "NUM_STAGE" : 2 ,
24+ "NUM_WARPS" : 4
25+ },
26+ "16" : {
27+ "BLOCK_SIZE_K" : 32 ,
28+ "BLOCK_SIZE_M" : 16 ,
29+ "BLOCK_SIZE_N" : 128 ,
30+ "GROUP_SIZE_M" : 1 ,
31+ "NUM_STAGE" : 3 ,
32+ "NUM_WARPS" : 2
33+ },
34+ "16384" : {
35+ "BLOCK_SIZE_K" : 128 ,
36+ "BLOCK_SIZE_M" : 128 ,
37+ "BLOCK_SIZE_N" : 64 ,
38+ "GROUP_SIZE_M" : 2 ,
39+ "NUM_STAGE" : 2 ,
40+ "NUM_WARPS" : 4
41+ },
42+ "2048" : {
43+ "BLOCK_SIZE_K" : 128 ,
44+ "BLOCK_SIZE_M" : 16 ,
45+ "BLOCK_SIZE_N" : 128 ,
46+ "GROUP_SIZE_M" : 1 ,
47+ "NUM_STAGE" : 2 ,
48+ "NUM_WARPS" : 4
49+ },
50+ "256" : {
51+ "BLOCK_SIZE_K" : 64 ,
52+ "BLOCK_SIZE_M" : 16 ,
53+ "BLOCK_SIZE_N" : 128 ,
54+ "GROUP_SIZE_M" : 8 ,
55+ "NUM_STAGE" : 3 ,
56+ "NUM_WARPS" : 2
57+ },
58+ "32" : {
59+ "BLOCK_SIZE_K" : 32 ,
60+ "BLOCK_SIZE_M" : 16 ,
61+ "BLOCK_SIZE_N" : 128 ,
62+ "GROUP_SIZE_M" : 1 ,
63+ "NUM_STAGE" : 3 ,
64+ "NUM_WARPS" : 2
65+ },
66+ "32768" : {
67+ "BLOCK_SIZE_K" : 128 ,
68+ "BLOCK_SIZE_M" : 128 ,
69+ "BLOCK_SIZE_N" : 64 ,
70+ "GROUP_SIZE_M" : 2 ,
71+ "NUM_STAGE" : 2 ,
72+ "NUM_WARPS" : 4
73+ },
74+ "4096" : {
75+ "BLOCK_SIZE_K" : 128 ,
76+ "BLOCK_SIZE_M" : 64 ,
77+ "BLOCK_SIZE_N" : 128 ,
78+ "GROUP_SIZE_M" : 2 ,
79+ "NUM_STAGE" : 3 ,
80+ "NUM_WARPS" : 4
81+ },
82+ "512" : {
83+ "BLOCK_SIZE_K" : 128 ,
84+ "BLOCK_SIZE_M" : 16 ,
85+ "BLOCK_SIZE_N" : 128 ,
86+ "GROUP_SIZE_M" : 8 ,
87+ "NUM_STAGE" : 2 ,
88+ "NUM_WARPS" : 4
89+ },
90+ "64" : {
91+ "BLOCK_SIZE_K" : 32 ,
92+ "BLOCK_SIZE_M" : 16 ,
93+ "BLOCK_SIZE_N" : 128 ,
94+ "GROUP_SIZE_M" : 1 ,
95+ "NUM_STAGE" : 3 ,
96+ "NUM_WARPS" : 2
97+ },
98+ "65536" : {
99+ "BLOCK_SIZE_K" : 128 ,
100+ "BLOCK_SIZE_M" : 128 ,
101+ "BLOCK_SIZE_N" : 64 ,
102+ "GROUP_SIZE_M" : 2 ,
103+ "NUM_STAGE" : 2 ,
104+ "NUM_WARPS" : 4
105+ },
106+ "8" : {
107+ "BLOCK_SIZE_K" : 32 ,
108+ "BLOCK_SIZE_M" : 16 ,
109+ "BLOCK_SIZE_N" : 128 ,
110+ "GROUP_SIZE_M" : 8 ,
111+ "NUM_STAGE" : 3 ,
112+ "NUM_WARPS" : 2
113+ },
114+ "8192" : {
115+ "BLOCK_SIZE_K" : 128 ,
116+ "BLOCK_SIZE_M" : 128 ,
117+ "BLOCK_SIZE_N" : 64 ,
118+ "GROUP_SIZE_M" : 2 ,
119+ "NUM_STAGE" : 2 ,
120+ "NUM_WARPS" : 4
121+ }
122+ }
Original file line number Diff line number Diff line change 1+ {
2+ "1024" : {
3+ "BLOCK_SIZE_K" : 128 ,
4+ "BLOCK_SIZE_M" : 16 ,
5+ "BLOCK_SIZE_N" : 64 ,
6+ "GROUP_SIZE_M" : 2 ,
7+ "NUM_STAGE" : 5 ,
8+ "NUM_WARPS" : 4
9+ },
10+ "128" : {
11+ "BLOCK_SIZE_K" : 128 ,
12+ "BLOCK_SIZE_M" : 16 ,
13+ "BLOCK_SIZE_N" : 64 ,
14+ "GROUP_SIZE_M" : 2 ,
15+ "NUM_STAGE" : 5 ,
16+ "NUM_WARPS" : 4
17+ },
18+ "131072" : {
19+ "BLOCK_SIZE_K" : 128 ,
20+ "BLOCK_SIZE_M" : 128 ,
21+ "BLOCK_SIZE_N" : 64 ,
22+ "GROUP_SIZE_M" : 4 ,
23+ "NUM_STAGE" : 4 ,
24+ "NUM_WARPS" : 4
25+ },
26+ "16" : {
27+ "BLOCK_SIZE_K" : 128 ,
28+ "BLOCK_SIZE_M" : 16 ,
29+ "BLOCK_SIZE_N" : 64 ,
30+ "GROUP_SIZE_M" : 8 ,
31+ "NUM_STAGE" : 4 ,
32+ "NUM_WARPS" : 4
33+ },
34+ "16384" : {
35+ "BLOCK_SIZE_K" : 128 ,
36+ "BLOCK_SIZE_M" : 128 ,
37+ "BLOCK_SIZE_N" : 64 ,
38+ "GROUP_SIZE_M" : 1 ,
39+ "NUM_STAGE" : 3 ,
40+ "NUM_WARPS" : 4
41+ },
42+ "2048" : {
43+ "BLOCK_SIZE_K" : 128 ,
44+ "BLOCK_SIZE_M" : 32 ,
45+ "BLOCK_SIZE_N" : 128 ,
46+ "GROUP_SIZE_M" : 4 ,
47+ "NUM_STAGE" : 4 ,
48+ "NUM_WARPS" : 4
49+ },
50+ "256" : {
51+ "BLOCK_SIZE_K" : 128 ,
52+ "BLOCK_SIZE_M" : 32 ,
53+ "BLOCK_SIZE_N" : 64 ,
54+ "GROUP_SIZE_M" : 2 ,
55+ "NUM_STAGE" : 4 ,
56+ "NUM_WARPS" : 4
57+ },
58+ "32" : {
59+ "BLOCK_SIZE_K" : 128 ,
60+ "BLOCK_SIZE_M" : 16 ,
61+ "BLOCK_SIZE_N" : 64 ,
62+ "GROUP_SIZE_M" : 1 ,
63+ "NUM_STAGE" : 5 ,
64+ "NUM_WARPS" : 4
65+ },
66+ "32768" : {
67+ "BLOCK_SIZE_K" : 128 ,
68+ "BLOCK_SIZE_M" : 128 ,
69+ "BLOCK_SIZE_N" : 64 ,
70+ "GROUP_SIZE_M" : 1 ,
71+ "NUM_STAGE" : 3 ,
72+ "NUM_WARPS" : 4
73+ },
74+ "4096" : {
75+ "BLOCK_SIZE_K" : 128 ,
76+ "BLOCK_SIZE_M" : 128 ,
77+ "BLOCK_SIZE_N" : 64 ,
78+ "GROUP_SIZE_M" : 2 ,
79+ "NUM_STAGE" : 3 ,
80+ "NUM_WARPS" : 4
81+ },
82+ "512" : {
83+ "BLOCK_SIZE_K" : 128 ,
84+ "BLOCK_SIZE_M" : 32 ,
85+ "BLOCK_SIZE_N" : 128 ,
86+ "GROUP_SIZE_M" : 2 ,
87+ "NUM_STAGE" : 5 ,
88+ "NUM_WARPS" : 4
89+ },
90+ "64" : {
91+ "BLOCK_SIZE_K" : 128 ,
92+ "BLOCK_SIZE_M" : 16 ,
93+ "BLOCK_SIZE_N" : 64 ,
94+ "GROUP_SIZE_M" : 1 ,
95+ "NUM_STAGE" : 5 ,
96+ "NUM_WARPS" : 4
97+ },
98+ "65536" : {
99+ "BLOCK_SIZE_K" : 128 ,
100+ "BLOCK_SIZE_M" : 128 ,
101+ "BLOCK_SIZE_N" : 64 ,
102+ "GROUP_SIZE_M" : 4 ,
103+ "NUM_STAGE" : 4 ,
104+ "NUM_WARPS" : 4
105+ },
106+ "8" : {
107+ "BLOCK_SIZE_K" : 128 ,
108+ "BLOCK_SIZE_M" : 16 ,
109+ "BLOCK_SIZE_N" : 64 ,
110+ "GROUP_SIZE_M" : 2 ,
111+ "NUM_STAGE" : 4 ,
112+ "NUM_WARPS" : 4
113+ },
114+ "8192" : {
115+ "BLOCK_SIZE_K" : 128 ,
116+ "BLOCK_SIZE_M" : 128 ,
117+ "BLOCK_SIZE_N" : 64 ,
118+ "GROUP_SIZE_M" : 2 ,
119+ "NUM_STAGE" : 4 ,
120+ "NUM_WARPS" : 4
121+ }
122+ }
Original file line number Diff line number Diff line change 1+ {
2+ "1" : {
3+ "BLOCK_DIM" : 512 ,
4+ "BLOCK_M" : 16 ,
5+ "NUM_STAGE" : 1 ,
6+ "num_warps" : 8
7+ },
8+ "1024" : {
9+ "BLOCK_DIM" : 256 ,
10+ "BLOCK_M" : 8 ,
11+ "NUM_STAGE" : 1 ,
12+ "num_warps" : 1
13+ },
14+ "128" : {
15+ "BLOCK_DIM" : 1024 ,
16+ "BLOCK_M" : 1 ,
17+ "NUM_STAGE" : 4 ,
18+ "num_warps" : 2
19+ },
20+ "16" : {
21+ "BLOCK_DIM" : 512 ,
22+ "BLOCK_M" : 1 ,
23+ "NUM_STAGE" : 1 ,
24+ "num_warps" : 8
25+ },
26+ "16384" : {
27+ "BLOCK_DIM" : 1024 ,
28+ "BLOCK_M" : 1 ,
29+ "NUM_STAGE" : 4 ,
30+ "num_warps" : 4
31+ },
32+ "2" : {
33+ "BLOCK_DIM" : 256 ,
34+ "BLOCK_M" : 1 ,
35+ "NUM_STAGE" : 4 ,
36+ "num_warps" : 8
37+ },
38+ "2048" : {
39+ "BLOCK_DIM" : 256 ,
40+ "BLOCK_M" : 1 ,
41+ "NUM_STAGE" : 1 ,
42+ "num_warps" : 1
43+ },
44+ "256" : {
45+ "BLOCK_DIM" : 1024 ,
46+ "BLOCK_M" : 1 ,
47+ "NUM_STAGE" : 4 ,
48+ "num_warps" : 2
49+ },
50+ "32" : {
51+ "BLOCK_DIM" : 512 ,
52+ "BLOCK_M" : 1 ,
53+ "NUM_STAGE" : 1 ,
54+ "num_warps" : 8
55+ },
56+ "4" : {
57+ "BLOCK_DIM" : 128 ,
58+ "BLOCK_M" : 1 ,
59+ "NUM_STAGE" : 2 ,
60+ "num_warps" : 4
61+ },
62+ "4096" : {
63+ "BLOCK_DIM" : 256 ,
64+ "BLOCK_M" : 1 ,
65+ "NUM_STAGE" : 1 ,
66+ "num_warps" : 1
67+ },
68+ "512" : {
69+ "BLOCK_DIM" : 256 ,
70+ "BLOCK_M" : 4 ,
71+ "NUM_STAGE" : 1 ,
72+ "num_warps" : 1
73+ },
74+ "64" : {
75+ "BLOCK_DIM" : 512 ,
76+ "BLOCK_M" : 1 ,
77+ "NUM_STAGE" : 1 ,
78+ "num_warps" : 8
79+ },
80+ "8" : {
81+ "BLOCK_DIM" : 512 ,
82+ "BLOCK_M" : 1 ,
83+ "NUM_STAGE" : 1 ,
84+ "num_warps" : 8
85+ },
86+ "8192" : {
87+ "BLOCK_DIM" : 1024 ,
88+ "BLOCK_M" : 1 ,
89+ "NUM_STAGE" : 4 ,
90+ "num_warps" : 4
91+ }
92+ }
You can’t perform that action at this time.
0 commit comments