|
6 | 6 | PytorchEngineConfig, |
7 | 7 | ) |
8 | 8 | from transformers import PreTrainedTokenizerBase |
9 | | -from typing import Any, Dict, AsyncGenerator |
| 9 | +from typing import Any, Dict, AsyncGenerator, List, Optional |
10 | 10 | from lmdeploy.archs import get_task |
11 | 11 | from gpt_server.model_handler.reasoning_parser import ReasoningParserManager |
12 | 12 | from lmdeploy.serve.async_engine import get_names_from_model |
@@ -60,6 +60,39 @@ def is_messages_with_tool(messages: list): |
60 | 60 | return flag |
61 | 61 |
|
62 | 62 |
|
| 63 | +from lmdeploy.logger import RequestLogger |
| 64 | + |
| 65 | + |
| 66 | +class CustomRequestLogger(RequestLogger): |
| 67 | + def log_prompt(self, session_id: int, prompt: str) -> None: |
| 68 | + if not isinstance(prompt, str): |
| 69 | + # Prompt may be a GPT4V message with base64 images; |
| 70 | + # logging might be impractical due to length |
| 71 | + return |
| 72 | + |
| 73 | + def log_inputs( |
| 74 | + self, |
| 75 | + session_id: int, |
| 76 | + prompt: Optional[str], |
| 77 | + prompt_token_ids: Optional[List[int]], |
| 78 | + gen_config: GenerationConfig, |
| 79 | + adapter_name: str, |
| 80 | + ) -> None: |
| 81 | + max_log_len = self.max_log_len |
| 82 | + input_tokens = len(prompt_token_ids) |
| 83 | + if max_log_len is not None: |
| 84 | + if prompt is not None: |
| 85 | + prompt = prompt[:max_log_len] |
| 86 | + |
| 87 | + if prompt_token_ids is not None: |
| 88 | + prompt_token_ids = prompt_token_ids[:max_log_len] |
| 89 | + |
| 90 | + logger.info( |
| 91 | + f"session_id={session_id} adapter_name={adapter_name} gen_config={gen_config}" |
| 92 | + ) |
| 93 | + logger.info(f"prompt:\n{prompt}") |
| 94 | + |
| 95 | + |
63 | 96 | class LMDeployBackend(ModelBackend): |
64 | 97 | def __init__(self, model_path, tokenizer: PreTrainedTokenizerBase) -> None: |
65 | 98 | model_config = get_model_config() |
@@ -95,6 +128,8 @@ def __init__(self, model_path, tokenizer: PreTrainedTokenizerBase) -> None: |
95 | 128 | self.chat_template_name = chat_template_name |
96 | 129 | self.tokenizer = self.async_engine.tokenizer |
97 | 130 | self.reasoning_parser_cache = {} |
| 131 | + # 自定义日志 |
| 132 | + self.async_engine.request_logger = CustomRequestLogger(max_log_len=None) |
98 | 133 |
|
99 | 134 | async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator: |
100 | 135 | prompt = params.get("prompt", "") |
@@ -141,7 +176,6 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator: |
141 | 176 | messages = params["messages"] |
142 | 177 | if isinstance(messages, str): |
143 | 178 | logger.info(f"使用prompt模式") |
144 | | - logger.info(prompt) |
145 | 179 | else: |
146 | 180 | logger.info(f"使用messages模式") |
147 | 181 | results_generator = self.async_engine.generate( |
|
0 commit comments