Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lightllm/common/req_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def init_req_sampling_params(self, req):
else:
self.req_to_out_token_id_counter[req.req_idx].fill_(0)
if req.sampling_param.shm_param.input_penalty and req.need_out_token_id_statistics:
prompt_ids = torch.from_numpy(req.shm_req.get_prompt_ids()).pin_memory().cuda(non_blocking=True)
prompt_ids = torch.from_numpy(req.shm_req.get_prompt_ids_numpy()).pin_memory().cuda(non_blocking=True)
token_id_counter(
prompt_ids=prompt_ids, out_token_id_counter=self.req_to_out_token_id_counter[req.req_idx]
)
Expand Down
3 changes: 3 additions & 0 deletions lightllm/server/core/objs/req.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ def link_logprobs_shm_array(self):
def get_prompt_ids(self):
return self.shm_prompt_ids.arr[: self.input_len].tolist()

def get_prompt_ids_numpy(self):
return self.shm_prompt_ids.arr[: self.input_len]

def to_router_rpc_obj(self):
if hasattr(self, "multimodal_params"):
return (
Expand Down
11 changes: 5 additions & 6 deletions lightllm/server/router/req_queue/chunked_prefill/beam_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,14 @@ def generate_new_batch(self, current_batch: Batch):
if ok_insert:
can_run_list.extend(cur_group_reqs)

new_batch = None
if len(can_run_list) != 0:
new_batch = Batch(uuid.uuid4().int, can_run_list, dp_size_in_node=self.dp_size_in_node)
Comment on lines 120 to 121
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Consider using a more concise way to initialize new_batch based on whether can_run_list is populated. Using the boolean value of the list directly can improve readability.

new_batch = Batch(uuid.uuid4().int, can_run_list, dp_size_in_node=self.dp_size_in_node) if can_run_list else None

for req in abort_req_list:
self.router.shm_req_manager.put_back_req_obj(req)

self.waiting_req_list = self.waiting_req_list[len(can_run_list) + aborted_count :]
return new_batch
else:
return None
for req in abort_req_list:
self.router.shm_req_manager.put_back_req_obj(req)
self.waiting_req_list = self.waiting_req_list[len(can_run_list) + aborted_count :]
return new_batch

def _add_to_group(self, cur_group_reqs, req: Req):
if len(cur_group_reqs) == 0:
Expand Down
12 changes: 5 additions & 7 deletions lightllm/server/router/req_queue/chunked_prefill/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,13 @@ def generate_new_batch(self, current_batch: Batch):
can_run_list.append(req)
else:
break

new_batch = None
if len(can_run_list) != 0:
new_batch = Batch(uuid.uuid4().int, can_run_list, dp_size_in_node=self.dp_size_in_node)
Comment on lines 95 to 96
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Consider using a more concise way to initialize new_batch based on whether can_run_list is populated. Using the boolean value of the list directly can improve readability.

new_batch = Batch(uuid.uuid4().int, can_run_list, dp_size_in_node=self.dp_size_in_node) if can_run_list else None

for req in abort_req_list:
self.router.shm_req_manager.put_back_req_obj(req)
self.waiting_req_list = self.waiting_req_list[len(can_run_list) + aborted_count :]
return new_batch
else:
return None
for req in abort_req_list:
self.router.shm_req_manager.put_back_req_obj(req)
self.waiting_req_list = self.waiting_req_list[len(can_run_list) + aborted_count :]
return new_batch

def _calcu_batch_token_load_batch_not_none(self, current_batch: Batch):
is_busy = self.is_busy()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,13 @@ def generate_new_batch(self, current_batch: Batch):
can_run_list.append(req)
else:
break

new_batch = None
if len(can_run_list) != 0:
new_batch = Batch(uuid.uuid4().int, can_run_list, dp_size_in_node=self.dp_size_in_node)
Comment on lines 52 to 53
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Consider using a more concise way to initialize new_batch based on whether can_run_list is populated. Using the boolean value of the list directly can improve readability.

new_batch = Batch(uuid.uuid4().int, can_run_list, dp_size_in_node=self.dp_size_in_node) if can_run_list else None

for req in abort_req_list:
self.router.shm_req_manager.put_back_req_obj(req)
self.waiting_req_list = self.waiting_req_list[len(can_run_list) + aborted_count :]
return new_batch
else:
return None
for req in abort_req_list:
self.router.shm_req_manager.put_back_req_obj(req)
self.waiting_req_list = self.waiting_req_list[len(can_run_list) + aborted_count :]
return new_batch

def _calcu_batch_token_load_batch_not_none(self, current_batch: Batch):
is_busy = self.is_busy()
Expand Down