We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 1a7a631 commit 8e25066Copy full SHA for 8e25066
lightllm/common/basemodel/prefill_cuda_graph.py
@@ -31,7 +31,7 @@ def __init__(self, decode_cuda_graph: CudaGraph):
31
32
graph_handle_token_nums = []
33
for i in range(2048):
34
- token_num = int(2 ** i)
+ token_num = int(2 ** (2 * i))
35
if token_num < self.max_handle_token_num:
36
graph_handle_token_nums.append(token_num)
37
else:
0 commit comments