@@ -14,15 +14,17 @@ class QueueItem(ctypes.Structure):
1414 ("special" , ctypes .c_bool ),
1515 ("count_output_tokens" , ctypes .c_int ),
1616 ("src_index" , ctypes .c_int ), # 在源token队列的索引位置
17+ ("force_stop" , ctypes .c_bool ), # 强制停止所有推理并让客户端返回
1718 ]
1819
1920 def __init__ (self ):
2021 self .data_len = 0
2122 self .src_index = - 1
2223 self .special = False
2324 self .count_output_tokens = - 1
25+ self .force_stop = False
2426
25- def set (self , token_str : str , src_index : int , special : bool , count_output_tokens : int ):
27+ def set (self , token_str : str , src_index : int , special : bool , count_output_tokens : int , force_stop : bool ):
2628 str_bytes = token_str .encode ("utf-8" )
2729 assert (
2830 len (str_bytes ) <= LIGHTLLM_TOKEN_MAX_BYTES
@@ -32,6 +34,7 @@ def set(self, token_str: str, src_index: int, special: bool, count_output_tokens
3234 self .src_index = src_index
3335 self .special = special
3436 self .count_output_tokens = count_output_tokens
37+ self .force_stop = force_stop
3538 return
3639
3740 def get (self ):
@@ -40,6 +43,7 @@ def get(self):
4043 self .src_index ,
4144 self .special ,
4245 self .count_output_tokens ,
46+ self .force_stop ,
4347 )
4448
4549
@@ -62,13 +66,13 @@ def is_empty(self):
6266 def is_full (self ):
6367 return (self .tail + 1 ) % LIGHTLLM_OUT_TOKEN_QUEUE_SIZE == self .head
6468
65- def push (self , token_str : str , src_index : int , special : bool , count_output_tokens : int ):
69+ def push (self , token_str : str , src_index : int , special : bool , count_output_tokens : int , force_stop : bool ):
6670 if self .is_full ():
6771 raise Exception ("Queue is full" )
6872
6973 # 添加元素
7074 item : QueueItem = self .items [self .tail ]
71- item .set (token_str , src_index , special , count_output_tokens )
75+ item .set (token_str , src_index , special , count_output_tokens , force_stop )
7276
7377 # 更新尾部
7478 self .tail = (self .tail + 1 ) % LIGHTLLM_OUT_TOKEN_QUEUE_SIZE
@@ -85,7 +89,7 @@ def pop(self) -> Tuple[str, int, bool, int]:
8589 self .head = (self .head + 1 ) % LIGHTLLM_OUT_TOKEN_QUEUE_SIZE
8690 return result
8791
88- def peek (self ) -> Tuple [str , int , bool , int ]:
92+ def peek (self ) -> Tuple [str , int , bool , int , bool ]:
8993 if self .is_empty ():
9094 raise Exception ("Queue is empty" )
9195
0 commit comments