3333from lightllm .utils .statics_utils import MovingAverage
3434from lightllm .utils .config_utils import get_vocab_size
3535from lightllm .utils .envs_utils import get_unique_server_name
36+ from lightllm .utils .error_utils import NixlPrefillNodeStopGenToken
3637from rpyc .utils .classic import obtain
3738
3839logger = init_logger (__name__ )
@@ -280,10 +281,17 @@ async def generate(
280281
281282 # 记录请求到达的相关信息
282283 await self ._log_req_header (request_headers , group_request_id )
283- # 监控
284-
284+ # encode
285285 prompt_ids = await self ._encode (prompt , multimodal_params , sampling_params )
286286
287+ prompt_tokens = len (prompt_ids )
288+ # 监控
289+ if group_request_id > 0 :
290+ self .metric_client .counter_inc ("lightllm_request_count" )
291+ self .metric_client .histogram_observe ("lightllm_request_input_length" , prompt_tokens )
292+ self .metric_client .histogram_observe ("lightllm_request_max_new_tokens" , sampling_params .max_new_tokens )
293+ prompt_ids = await self ._check_and_repair_length (prompt_ids , sampling_params )
294+
287295 if nixl_pd_upload_websocket is not None and not is_health_req and self .pd_mode .is_NP ():
288296 # 在 nixl pd 模式下的 p 节点, 为了更好的兼容多模态的推理流程,np 节点需要先上报其 encode 好的 prompt ids 信息,然后
289297 # 再等待 pd_master 传输下来的对应的进行 decode 节点的decode信息,然后再执行后续的流程
@@ -302,13 +310,10 @@ async def generate(
302310 decode_node_info : NIXLDecodeNodeInfo = nixl_pd_event .decode_node_info
303311 sampling_params .nixl_params .set (pickle .dumps (decode_node_info ))
304312
305- prompt_tokens = len (prompt_ids )
306- # 监控
307- if group_request_id > 0 :
308- self .metric_client .counter_inc ("lightllm_request_count" )
309- self .metric_client .histogram_observe ("lightllm_request_input_length" , prompt_tokens )
310- self .metric_client .histogram_observe ("lightllm_request_max_new_tokens" , sampling_params .max_new_tokens )
311- prompt_ids = await self ._check_and_repair_length (prompt_ids , sampling_params )
313+ if decode_node_info .ready_kv_len == len (prompt_ids ) - 1 :
314+ # 如果 decode 节点的 ready_kv_len 和 prefill encode 的 len(prompt ids) -1 相等,说明不需要进行 prefill
315+ # 直接 raise NixlPrefillNodeStopGenToken
316+ raise NixlPrefillNodeStopGenToken (group_request_id = group_request_id )
312317
313318 # 申请资源并存储
314319 alloced_req_indexes = []
0 commit comments