Skip to content

Commit b2bb37d

Browse files
authored
[Fix] when prompt token ids is numpy (#3944)
1 parent c6e2a37 commit b2bb37d

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

fastdeploy/cache_manager/prefix_cache_manager.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,11 @@ def update_cache_blocks(self, task, block_size, num_computed_tokens):
467467
block_tables = task.block_tables
468468

469469
last_node, num_cached_tokens = self.cache_info[req_id]
470-
input_ids = task.prompt_token_ids + task.output_token_ids
470+
if isinstance(task.prompt_token_ids, np.ndarray):
471+
prompt_token_ids = task.prompt_token_ids.tolist()
472+
else:
473+
prompt_token_ids = task.prompt_token_ids
474+
input_ids = prompt_token_ids + task.output_token_ids
471475
can_cache_computed_tokens = num_computed_tokens - num_computed_tokens % block_size
472476
left_input_ids = input_ids[num_cached_tokens:can_cache_computed_tokens]
473477
gpu_extra_block_ids = block_tables[num_cached_tokens // block_size :]
@@ -517,7 +521,11 @@ def request_match_blocks(self, task, block_size, *args):
517521
hit_info["gpu_cache_blocks"] = 0
518522
hit_info["cpu_cache_blocks"] = 0
519523
self.metrics.req_count += 1
520-
input_ids = task.prompt_token_ids + task.output_token_ids
524+
if isinstance(task.prompt_token_ids, np.ndarray):
525+
prompt_token_ids = task.prompt_token_ids.tolist()
526+
else:
527+
prompt_token_ids = task.prompt_token_ids
528+
input_ids = prompt_token_ids + task.output_token_ids
521529
req_id = task.request_id
522530
logger.info(f"request_match_blocks: start to allocate blocks for req_id {req_id}")
523531
input_token_num = len(input_ids)

0 commit comments

Comments
 (0)