Skip to content

Commit 2f62a86

Browse files
author
root
committed
Merge branch 'dp_balancer' of https://github.com/ModelTC/lightllm into dp_balancer
2 parents d038f96 + 69c9bb8 commit 2f62a86

File tree

3 files changed

+3
-6
lines changed

3 files changed

+3
-6
lines changed

lightllm/server/api_cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ def make_argument_parser() -> argparse.ArgumentParser:
119119
help="tool call parser type",
120120
)
121121
parser.add_argument(
122-
"--running_max_req_size", type=int, default=2048, help="the max size for forward requests in the same time"
122+
"--running_max_req_size", type=int, default=1000, help="the max size for forward requests in the same time"
123123
)
124124
parser.add_argument("--nnodes", type=int, default=1, help="the number of nodes")
125125
parser.add_argument("--node_rank", type=int, default=0, help="the rank of the current node")

lightllm/server/router/model_infer/mode_backend/base_backend.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -446,11 +446,9 @@ def _get_classed_reqs(
446446
else:
447447
token_num = req_obj.prefill_need_token_num(is_chuncked_prefill=not self.disable_chunked_prefill)
448448
if prefill_tokens + token_num > self.batch_max_tokens:
449-
# 跳过等下次prefill,避免oom
450-
prefill_tokens = 0
451-
break
452-
prefill_tokens += token_num
449+
continue
453450
if token_num <= can_alloc_token_num:
451+
prefill_tokens += token_num
454452
prefill_reqs.append(req_obj)
455453
can_alloc_token_num -= token_num
456454
else:

lightllm/server/router/req_queue/chunked_prefill/impl.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ def generate_new_batch(self, current_batch: Batch):
6969
new_batch_first_router_need_tokens = (
7070
0 if current_batch is None else current_batch.get_batch_decode_need_tokens()[self.dp_index]
7171
)
72-
print(f"new_batch_first_router_need_tokens: {new_batch_first_router_need_tokens}")
7372

7473
self._init_cache_list(current_batch, is_busy)
7574
can_run_list = []

0 commit comments

Comments
 (0)