Skip to content

Commit b9cc80d

Browse files
committed
Revert "refactor"
This reverts commit 1030efb.
1 parent 1030efb commit b9cc80d

16 files changed

+1365
-48
lines changed

lightllm/common/basemodel/basemodel.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,6 @@ def __init__(self, kvargs):
8181

8282
self._init_datatype()
8383
self._init_config()
84-
85-
if os.environ.get("LIGHTLLM_TRITON_AUTOTUNE", "0") == "1":
86-
self.layers_num = self.autotune_layers()
87-
8884
self._verify_must()
8985
self._verify_params()
9086
self._init_quant()
@@ -748,6 +744,8 @@ def _autotune_warmup(self):
748744

749745
warmup_lengths.sort(reverse=True)
750746

747+
layer_num_bak = self.layers_num
748+
self.layers_num = self.autotune_layers()
751749
for input_len in warmup_lengths:
752750
try:
753751
logger.info(f"autotune warmup for length {input_len}")
@@ -779,16 +777,14 @@ def _autotune_warmup(self):
779777
del model_output
780778
self.req_manager.free_all()
781779
self.mem_manager.free_all()
782-
torch.cuda.empty_cache()
783780
logger.info(f"autotune warmup for length {input_len} ok")
784781
except Exception as e:
785782
logger.warning(f"autotune warmup for length {input_len} failed: {str(e)}")
786783
self.req_manager.free_all()
787784
self.mem_manager.free_all()
788-
torch.cuda.empty_cache()
785+
self.layers_num = layer_num_bak
789786
torch.distributed.barrier()
790-
logger.info("autotune warmup done, exit!")
791-
exit(0)
787+
os.environ["LIGHTLLM_TRITON_AUTOTUNE"] = "0"
792788

793789
@final
794790
@torch.no_grad()

lightllm/common/fused_moe/grouped_fused_moe.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ def get_grouped_matmul_static_key(
478478
"BLOCK_SIZE_N": bn,
479479
"BLOCK_SIZE_K": bk,
480480
"GROUP_SIZE_M": gm,
481-
"num_warps": nw,
481+
"NUM_WARPS": nw,
482482
"NUM_STAGE": ns,
483483
}
484484
for ns in [1, 2, 3, 4, 5]
@@ -493,7 +493,7 @@ def get_grouped_matmul_static_key(
493493
"BLOCK_SIZE_N": 64,
494494
"BLOCK_SIZE_K": 32,
495495
"GROUP_SIZE_M": 8,
496-
"num_warps": 4,
496+
"NUM_WARPS": 4,
497497
"NUM_STAGE": 1,
498498
},
499499
static_key_func=get_grouped_matmul_static_key,
@@ -550,7 +550,7 @@ def grouped_matmul(
550550
BLOCK_SIZE_N = run_config["BLOCK_SIZE_N"]
551551
BLOCK_SIZE_K = run_config["BLOCK_SIZE_K"]
552552
GROUP_SIZE_M = run_config["GROUP_SIZE_M"]
553-
num_warps = run_config["num_warps"]
553+
num_warps = run_config["NUM_WARPS"]
554554
num_stages = run_config["NUM_STAGE"]
555555

556556
if block_size_k != 0:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
{
2+
"1024": {
3+
"BLOCK_SIZE_K": 32,
4+
"BLOCK_SIZE_M": 64,
5+
"BLOCK_SIZE_N": 128,
6+
"GROUP_SIZE_M": 2,
7+
"NUM_STAGE": 3,
8+
"NUM_WARPS": 4
9+
},
10+
"128": {
11+
"BLOCK_SIZE_K": 32,
12+
"BLOCK_SIZE_M": 16,
13+
"BLOCK_SIZE_N": 128,
14+
"GROUP_SIZE_M": 2,
15+
"NUM_STAGE": 3,
16+
"NUM_WARPS": 4
17+
},
18+
"131072": {
19+
"BLOCK_SIZE_K": 64,
20+
"BLOCK_SIZE_M": 128,
21+
"BLOCK_SIZE_N": 128,
22+
"GROUP_SIZE_M": 2,
23+
"NUM_STAGE": 3,
24+
"NUM_WARPS": 4
25+
},
26+
"16": {
27+
"BLOCK_SIZE_K": 32,
28+
"BLOCK_SIZE_M": 16,
29+
"BLOCK_SIZE_N": 128,
30+
"GROUP_SIZE_M": 2,
31+
"NUM_STAGE": 3,
32+
"NUM_WARPS": 4
33+
},
34+
"16384": {
35+
"BLOCK_SIZE_K": 64,
36+
"BLOCK_SIZE_M": 128,
37+
"BLOCK_SIZE_N": 128,
38+
"GROUP_SIZE_M": 4,
39+
"NUM_STAGE": 3,
40+
"NUM_WARPS": 4
41+
},
42+
"2048": {
43+
"BLOCK_SIZE_K": 32,
44+
"BLOCK_SIZE_M": 64,
45+
"BLOCK_SIZE_N": 128,
46+
"GROUP_SIZE_M": 2,
47+
"NUM_STAGE": 3,
48+
"NUM_WARPS": 4
49+
},
50+
"256": {
51+
"BLOCK_SIZE_K": 64,
52+
"BLOCK_SIZE_M": 16,
53+
"BLOCK_SIZE_N": 128,
54+
"GROUP_SIZE_M": 4,
55+
"NUM_STAGE": 3,
56+
"NUM_WARPS": 2
57+
},
58+
"32": {
59+
"BLOCK_SIZE_K": 32,
60+
"BLOCK_SIZE_M": 16,
61+
"BLOCK_SIZE_N": 128,
62+
"GROUP_SIZE_M": 2,
63+
"NUM_STAGE": 3,
64+
"NUM_WARPS": 4
65+
},
66+
"32768": {
67+
"BLOCK_SIZE_K": 64,
68+
"BLOCK_SIZE_M": 128,
69+
"BLOCK_SIZE_N": 128,
70+
"GROUP_SIZE_M": 4,
71+
"NUM_STAGE": 3,
72+
"NUM_WARPS": 4
73+
},
74+
"4096": {
75+
"BLOCK_SIZE_K": 32,
76+
"BLOCK_SIZE_M": 64,
77+
"BLOCK_SIZE_N": 128,
78+
"GROUP_SIZE_M": 1,
79+
"NUM_STAGE": 4,
80+
"NUM_WARPS": 4
81+
},
82+
"512": {
83+
"BLOCK_SIZE_K": 32,
84+
"BLOCK_SIZE_M": 64,
85+
"BLOCK_SIZE_N": 128,
86+
"GROUP_SIZE_M": 1,
87+
"NUM_STAGE": 3,
88+
"NUM_WARPS": 4
89+
},
90+
"64": {
91+
"BLOCK_SIZE_K": 32,
92+
"BLOCK_SIZE_M": 16,
93+
"BLOCK_SIZE_N": 128,
94+
"GROUP_SIZE_M": 2,
95+
"NUM_STAGE": 3,
96+
"NUM_WARPS": 4
97+
},
98+
"65536": {
99+
"BLOCK_SIZE_K": 64,
100+
"BLOCK_SIZE_M": 128,
101+
"BLOCK_SIZE_N": 128,
102+
"GROUP_SIZE_M": 2,
103+
"NUM_STAGE": 3,
104+
"NUM_WARPS": 4
105+
},
106+
"8": {
107+
"BLOCK_SIZE_K": 32,
108+
"BLOCK_SIZE_M": 16,
109+
"BLOCK_SIZE_N": 128,
110+
"GROUP_SIZE_M": 2,
111+
"NUM_STAGE": 3,
112+
"NUM_WARPS": 4
113+
},
114+
"8192": {
115+
"BLOCK_SIZE_K": 32,
116+
"BLOCK_SIZE_M": 64,
117+
"BLOCK_SIZE_N": 128,
118+
"GROUP_SIZE_M": 8,
119+
"NUM_STAGE": 4,
120+
"NUM_WARPS": 4
121+
}
122+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
{
2+
"1024": {
3+
"BLOCK_SIZE_K": 64,
4+
"BLOCK_SIZE_M": 16,
5+
"BLOCK_SIZE_N": 128,
6+
"GROUP_SIZE_M": 8,
7+
"NUM_STAGE": 2,
8+
"NUM_WARPS": 4
9+
},
10+
"128": {
11+
"BLOCK_SIZE_K": 64,
12+
"BLOCK_SIZE_M": 16,
13+
"BLOCK_SIZE_N": 128,
14+
"GROUP_SIZE_M": 1,
15+
"NUM_STAGE": 3,
16+
"NUM_WARPS": 4
17+
},
18+
"131072": {
19+
"BLOCK_SIZE_K": 64,
20+
"BLOCK_SIZE_M": 128,
21+
"BLOCK_SIZE_N": 64,
22+
"GROUP_SIZE_M": 4,
23+
"NUM_STAGE": 3,
24+
"NUM_WARPS": 4
25+
},
26+
"16": {
27+
"BLOCK_SIZE_K": 64,
28+
"BLOCK_SIZE_M": 16,
29+
"BLOCK_SIZE_N": 128,
30+
"GROUP_SIZE_M": 1,
31+
"NUM_STAGE": 3,
32+
"NUM_WARPS": 2
33+
},
34+
"16384": {
35+
"BLOCK_SIZE_K": 64,
36+
"BLOCK_SIZE_M": 128,
37+
"BLOCK_SIZE_N": 64,
38+
"GROUP_SIZE_M": 4,
39+
"NUM_STAGE": 3,
40+
"NUM_WARPS": 4
41+
},
42+
"2048": {
43+
"BLOCK_SIZE_K": 64,
44+
"BLOCK_SIZE_M": 128,
45+
"BLOCK_SIZE_N": 128,
46+
"GROUP_SIZE_M": 8,
47+
"NUM_STAGE": 2,
48+
"NUM_WARPS": 4
49+
},
50+
"256": {
51+
"BLOCK_SIZE_K": 64,
52+
"BLOCK_SIZE_M": 32,
53+
"BLOCK_SIZE_N": 128,
54+
"GROUP_SIZE_M": 4,
55+
"NUM_STAGE": 3,
56+
"NUM_WARPS": 2
57+
},
58+
"32": {
59+
"BLOCK_SIZE_K": 64,
60+
"BLOCK_SIZE_M": 16,
61+
"BLOCK_SIZE_N": 128,
62+
"GROUP_SIZE_M": 4,
63+
"NUM_STAGE": 3,
64+
"NUM_WARPS": 2
65+
},
66+
"32768": {
67+
"BLOCK_SIZE_K": 64,
68+
"BLOCK_SIZE_M": 128,
69+
"BLOCK_SIZE_N": 64,
70+
"GROUP_SIZE_M": 4,
71+
"NUM_STAGE": 3,
72+
"NUM_WARPS": 4
73+
},
74+
"4096": {
75+
"BLOCK_SIZE_K": 64,
76+
"BLOCK_SIZE_M": 128,
77+
"BLOCK_SIZE_N": 128,
78+
"GROUP_SIZE_M": 4,
79+
"NUM_STAGE": 2,
80+
"NUM_WARPS": 4
81+
},
82+
"512": {
83+
"BLOCK_SIZE_K": 64,
84+
"BLOCK_SIZE_M": 16,
85+
"BLOCK_SIZE_N": 128,
86+
"GROUP_SIZE_M": 1,
87+
"NUM_STAGE": 2,
88+
"NUM_WARPS": 4
89+
},
90+
"64": {
91+
"BLOCK_SIZE_K": 64,
92+
"BLOCK_SIZE_M": 16,
93+
"BLOCK_SIZE_N": 128,
94+
"GROUP_SIZE_M": 1,
95+
"NUM_STAGE": 3,
96+
"NUM_WARPS": 2
97+
},
98+
"65536": {
99+
"BLOCK_SIZE_K": 64,
100+
"BLOCK_SIZE_M": 128,
101+
"BLOCK_SIZE_N": 64,
102+
"GROUP_SIZE_M": 4,
103+
"NUM_STAGE": 3,
104+
"NUM_WARPS": 4
105+
},
106+
"8": {
107+
"BLOCK_SIZE_K": 64,
108+
"BLOCK_SIZE_M": 16,
109+
"BLOCK_SIZE_N": 128,
110+
"GROUP_SIZE_M": 1,
111+
"NUM_STAGE": 3,
112+
"NUM_WARPS": 4
113+
},
114+
"8192": {
115+
"BLOCK_SIZE_K": 64,
116+
"BLOCK_SIZE_M": 128,
117+
"BLOCK_SIZE_N": 64,
118+
"GROUP_SIZE_M": 4,
119+
"NUM_STAGE": 3,
120+
"NUM_WARPS": 4
121+
}
122+
}

0 commit comments

Comments
 (0)