Skip to content

Commit f579157

Browse files
authored
fix
1 parent 19fd574 commit f579157

File tree

3 files changed

+6
-8
lines changed

3 files changed

+6
-8
lines changed

lightllm/common/basemodel/basemodel.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -842,7 +842,8 @@ def _init_padded_req(self):
842842
)
843843
b_seq_len = torch.ones(batch_size, dtype=torch.int32, device="cuda")
844844
b_ready_cache_len = torch.zeros(batch_size, dtype=torch.int32, device="cuda")
845-
b_prefill_start_loc = F.pad(torch.cumsum(b_seq_len, dim=0), (1, 0), value=0)[:-1]
845+
b_q_seq_len = b_seq_len - b_ready_cache_len
846+
b_prefill_start_loc = b_q_seq_len.cumsum(dim=0, dtype=torch.int32) - b_q_seq_len
846847
total_token_num = prefill_input_len * batch_size
847848
b_mtp_index = torch.zeros(batch_size, dtype=torch.int32, device="cuda")
848849
model_input = ModelInput(

lightllm/server/router/model_infer/mode_backend/generic_padded_pre_process.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ def padded_prepare_prefill_inputs(
3636
b_ready_cache_len = []
3737
b_mtp_index = []
3838
b_prefill_has_output = []
39-
b_prefill_start_loc = [0]
4039

4140
for req in req_objs:
4241

@@ -57,7 +56,6 @@ def padded_prepare_prefill_inputs(
5756
prefix_total_token_num += req.cur_kv_len
5857
b_ready_cache_len.append(req.cur_kv_len)
5958
b_mtp_index.append(0)
60-
b_prefill_start_loc.append(b_prefill_start_loc[-1] + input_token_len)
6159

6260
# padding fake req for prefill
6361
for _ in range(padded_req_num):
@@ -68,7 +66,6 @@ def padded_prepare_prefill_inputs(
6866
b_mtp_index.append(0)
6967
b_prefill_has_output.append(False)
7068
b_ready_cache_len.append(0)
71-
b_prefill_start_loc.append(b_prefill_start_loc[-1] + 1)
7269
total_token_num += 1
7370
prefix_total_token_num += 0
7471

@@ -83,7 +80,8 @@ def padded_prepare_prefill_inputs(
8380
b_seq_len = torch.tensor(b_seq_len, dtype=torch.int32, device="cpu")
8481
b_mtp_index = torch.tensor(b_mtp_index, dtype=torch.int32, device="cpu")
8582
b_ready_cache_len = torch.tensor(b_ready_cache_len, dtype=torch.int32, device="cpu")
86-
b_prefill_start_loc = torch.tensor(b_prefill_start_loc, dtype=torch.int32, device="cpu")
83+
b_q_seq_len = torch.tensor(b_q_seq_len, dtype=torch.int32, device="cpu")
84+
b_prefill_start_loc = b_q_seq_len.cumsum(dim=0, dtype=torch.int32) - b_q_seq_len
8785

8886
# dynamic prompt cache 准备 token
8987
g_infer_state_lock.acquire()

lightllm/server/router/model_infer/mode_backend/generic_pre_process.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ def prepare_prefill_inputs(
2020
b_ready_cache_len = []
2121
b_mtp_index = []
2222
b_prefill_has_output = []
23-
b_prefill_start_loc = [0]
2423

2524
for req in req_objs:
2625
run_reqs.append(req)
@@ -41,7 +40,6 @@ def prepare_prefill_inputs(
4140

4241
b_seq_len.append(seq_len)
4342
b_q_seq_len.append(input_token_len)
44-
b_prefill_start_loc.append(b_prefill_start_loc[-1] + input_token_len)
4543
input_ids.append(input_id)
4644
total_token_num += seq_len
4745
prefix_total_token_num += req.cur_kv_len
@@ -59,7 +57,8 @@ def prepare_prefill_inputs(
5957
b_seq_len = torch.tensor(b_seq_len, dtype=torch.int32, device="cpu")
6058
b_mtp_index = torch.tensor(b_mtp_index, dtype=torch.int32, device="cpu")
6159
b_ready_cache_len = torch.tensor(b_ready_cache_len, dtype=torch.int32, device="cpu")
62-
b_prefill_start_loc = torch.tensor(b_prefill_start_loc, dtype=torch.int32, device="cpu")
60+
b_q_seq_len = torch.tensor(b_q_seq_len, dtype=torch.int32, device="cpu")
61+
b_prefill_start_loc = b_q_seq_len.cumsum(dim=0, dtype=torch.int32) - b_q_seq_len
6362

6463
# dynamic prompt cache 准备 token
6564
g_infer_state_lock.acquire()

0 commit comments

Comments
 (0)