Skip to content

Commit 68e1cee

Browse files
committed
fix cudagraph
1 parent b539632 commit 68e1cee

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

lightllm/models/qwen3next/mem_manager.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def __init__(
6868
self.ssm_state_shape = ssm_state_shape
6969

7070
assert linear_attn_cache_size is not None
71+
self.HOLD_BUFFER_INDEX = linear_attn_cache_size
7172
self.conv_state_mem_manager = LayerCacheMemoryManager(
7273
linear_attn_cache_size, conv_state_dtype, conv_state_shape, self.linear_attn_layer_num, "conv_state"
7374
)

lightllm/models/qwen3next/req_manager.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ def __init__(self, max_request_num, max_sequence_length, mem_manager: Qwen3NextM
1111
super().__init__(max_request_num, max_sequence_length, mem_manager)
1212
self.EMPTY_BUFFER_INDEX = -1
1313
self.req_to_buffer_indexes = torch.zeros((self.max_request_num + 1), dtype=torch.int32, device="cuda")
14-
self.req_to_buffer_indexes[:] = self.EMPTY_BUFFER_INDEX
14+
self.req_to_buffer_indexes[:-1] = self.EMPTY_BUFFER_INDEX
15+
self.req_to_buffer_indexes[-1] = self.mem_manager.HOLD_BUFFER_INDEX
1516

1617
@override
1718
def free(self, free_req_indexes: List[int], free_token_index):
@@ -20,7 +21,7 @@ def free(self, free_req_indexes: List[int], free_token_index):
2021

2122
@override
2223
def free_all(self):
23-
self.req_to_buffer_indexes[:] = self.EMPTY_BUFFER_INDEX
24+
self.req_to_buffer_indexes[:-1] = self.EMPTY_BUFFER_INDEX
2425
super().free_all()
2526
return
2627

0 commit comments

Comments
 (0)