Skip to content

Commit 93d878c

Browse files
authored
fix enable dp.
1 parent c686225 commit 93d878c

File tree

16 files changed

+25
-45
lines changed

16 files changed

+25
-45
lines changed

lightllm/common/basemodel/layer_infer/template/transformer_layer_infer_template.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ def __init__(self, layer_num, network_config, mode):
2222
self.tp_o_head_num_ = -1
2323
self.head_dim_ = -1
2424
self.embed_dim_ = -1
25-
self.enable_dp = os.getenv("ENABLE_DP", "0").upper() in ["ON", "TRUE", "1"]
2625
return
2726

2827
def _att_norm(self, input, infer_state: InferStateInfo, layer_weight) -> torch.Tensor:

lightllm/models/deepseek2/infer_struct.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ class Deepseek2InferStateInfo(LlamaInferStateInfo):
99
def __init__(self):
1010
super().__init__()
1111
self.kv_starts = None
12-
self.enable_dp = os.getenv("ENABLE_DP", "0").upper() in ["ON", "TRUE", "1"]
1312

1413
def init_some_extra_state(self, model, input_ids: torch.Tensor):
1514
super().init_some_extra_state(model, input_ids)

lightllm/models/deepseek2/layer_infer/transformer_layer_infer.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ def __init__(self, layer_num, network_config, mode=[]):
6666
self.softmax_scale = self.softmax_scale * mscale * mscale
6767
self.enable_cc_method = not os.getenv("DISABLE_CC_METHOD", "False").upper() in ["ON", "TRUE", "1"]
6868
super().__init__(layer_num, network_config, mode)
69-
self.enable_dp = os.getenv("ENABLE_DP", "0").upper() in ["ON", "TRUE", "1"]
7069
self.num_heads = network_config["num_attention_heads"]
7170
self.num_kv_heads = network_config["num_key_value_heads"]
7271
return
@@ -78,9 +77,8 @@ def _bind_func(self):
7877

7978
def _bind_ffn(self):
8079
if self.is_moe:
81-
if self.enable_dp:
82-
moe_mode = os.environ.get("MOE_MODE", "TP")
83-
if moe_mode == "EP":
80+
moe_mode = os.environ.get("MOE_MODE", "TP")
81+
if moe_mode == "EP":
8482
self._ffn = partial(Deepseek2TransformerLayerInfer._moe_ffn_edp, self)
8583
else:
8684
self._ffn = partial(Deepseek2TransformerLayerInfer._moe_ffn, self)

lightllm/models/deepseek2/layer_weights/transformer_layer_weight.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
class Deepseek2TransformerLayerWeight(TransformerLayerWeight):
2121
def __init__(self, layer_num, data_type, network_config, mode=[], quant_cfg=None):
22-
self.enable_dp = os.getenv("ENABLE_DP", "0").upper() in ["ON", "TRUE", "1"]
2322
self.enable_cc_method = not os.getenv("DISABLE_CC_METHOD", "False").upper() in ["ON", "TRUE", "1"]
2423
super().__init__(layer_num, data_type, network_config, mode, quant_cfg)
2524
return

lightllm/models/deepseek2/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
class FlashInferStateExtraInfo:
2323
def __init__(self, model):
2424
num_heads = model.config["num_attention_heads"]
25-
self.tp_q_head_num = num_heads if enable_env_vars("ENABLE_DP") else num_heads // get_dp_world_size()
25+
self.tp_q_head_num = num_heads // get_dp_world_size()
2626
self.qk_nope_head_dim = model.qk_nope_head_dim
2727
self.qk_rope_head_dim = model.qk_rope_head_dim
2828
self.kv_lora_rank = model.kv_lora_rank

lightllm/models/llama/layer_infer/post_layer_infer.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ def __init__(self, network_config, mode):
2222
self.eps_ = network_config["rms_norm_eps"]
2323
self.vocab_size_ = network_config["vocab_size"]
2424
self.embed_dim_ = network_config["n_embed"]
25-
self.enable_dp = os.getenv("ENABLE_DP", "0").upper() in ["ON", "TRUE", "1"]
2625
return
2726

2827
def _norm(self, input, infer_state, layer_weight: LlamaPreAndPostLayerWeight) -> torch.Tensor:

lightllm/models/llama/layer_infer/pre_layer_infer.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,8 @@ class LlamaPreLayerInfer(PreLayerInferTpl):
1616

1717
def __init__(self, network_config, mode):
1818
super().__init__(network_config, mode)
19-
self.enable_dp = os.getenv("ENABLE_DP", "0").upper() in ["ON", "TRUE", "1"]
20-
if not self.enable_dp:
21-
tp_vob_ids = np.linspace(0, network_config["vocab_size"], self.tp_world_size_ + 1, dtype=np.int64)
22-
self.vob_start_id_, self.vob_end_id_ = int(tp_vob_ids[self.tp_rank_]), int(tp_vob_ids[self.tp_rank_ + 1])
23-
else:
24-
self.vob_start_id_, self.vob_end_id_ = 0, network_config["vocab_size"]
19+
tp_vob_ids = np.linspace(0, network_config["vocab_size"], self.tp_world_size_ + 1, dtype=np.int64)
20+
self.vob_start_id_, self.vob_end_id_ = int(tp_vob_ids[self.tp_rank_]), int(tp_vob_ids[self.tp_rank_ + 1])
2521

2622
return
2723

lightllm/models/llama/layer_weights/pre_and_post_layer_weight.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
class LlamaPreAndPostLayerWeight(PreAndPostLayerWeight):
88
def __init__(self, data_type, network_config, mode):
99
super().__init__(data_type, network_config, mode)
10-
self.enable_dp = os.getenv("ENABLE_DP", "0").upper() in ["ON", "TRUE", "1"]
1110
return
1211

1312
def load_hf_weights(self, weights):
@@ -16,18 +15,12 @@ def load_hf_weights(self, weights):
1615
split_start = split_indexes[self.tp_rank_]
1716
split_end = split_indexes[self.tp_rank_ + 1]
1817
if "model.embed_tokens.weight" in weights:
19-
if self.enable_dp:
20-
self.wte_weight_ = self._cuda(weights["model.embed_tokens.weight"])
21-
else:
22-
self.wte_weight_ = self._cuda(weights["model.embed_tokens.weight"][split_start:split_end, :])
18+
self.wte_weight_ = self._cuda(weights["model.embed_tokens.weight"][split_start:split_end, :])
2319
tie_word_embeddings = self.network_config_.get("tie_word_embeddings", False)
2420
if tie_word_embeddings:
2521
self.lm_head_weight_ = self.wte_weight_
2622
if "lm_head.weight" in weights:
27-
if self.enable_dp:
28-
self.lm_head_weight_ = self._cuda(weights["lm_head.weight"])
29-
else:
30-
self.lm_head_weight_ = self._cuda(weights["lm_head.weight"][split_start:split_end, :])
23+
self.lm_head_weight_ = self._cuda(weights["lm_head.weight"][split_start:split_end, :])
3124
if "model.norm.weight" in weights:
3225
self.final_norm_weight_ = self._cuda(weights["model.norm.weight"])
3326

lightllm/server/api_start.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ def normal_or_p_d_start(args):
8787

8888
# 这些模式不能同时设置。
8989
assert [
90+
args.disable_chunked_prefill,
9091
args.diverse_mode,
9192
args.token_healing_mode,
9293
args.use_reward_model,

lightllm/server/httpserver/manager.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ async def abort(self, group_req_id: int):
523523
return
524524

525525
async def recycle_resource_loop(self):
526-
# pre_time_mark = time.time()
526+
pre_time_mark = time.time()
527527

528528
while True:
529529

@@ -547,14 +547,14 @@ async def recycle_resource_loop(self):
547547
await self._release_multimodal_resources(req_status.group_req_objs.multimodal_params)
548548

549549
# 先保留这个关键得日志,用于方便定位重构中的问题。
550-
# if time.time() - pre_time_mark > 20:
551-
# pre_time_mark = time.time()
552-
# for req_status in self.req_id_to_out_inf.values():
553-
# logger.info(
554-
# f"left req id {req_status.group_req_objs.group_req_id}"
555-
# f"can release {req_status.group_req_objs.shm_req_objs[0].can_released_mark} "
556-
# f"refcount {req_status.group_req_objs.shm_req_objs[0].ref_count}"
557-
# )
550+
if time.time() - pre_time_mark > 120:
551+
pre_time_mark = time.time()
552+
for req_status in self.req_id_to_out_inf.values():
553+
logger.info(
554+
f"left req id {req_status.group_req_objs.group_req_id}"
555+
f"can release {req_status.group_req_objs.shm_req_objs[0].can_released_mark} "
556+
f"refcount {req_status.group_req_objs.shm_req_objs[0].ref_count}"
557+
)
558558
return
559559

560560
async def handle_loop(self):

0 commit comments

Comments
 (0)