shell-nlp
diff --git a/‎gpt_server/model_backend/lmdeploy_backend.py‎
Lines changed: 4 additions & 1 deletion b/‎gpt_server/model_backend/lmdeploy_backend.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎gpt_server/model_handler/react/v1/chatglm_react.py‎
Lines changed: 58 additions & 0 deletions b/‎gpt_server/model_handler/react/v1/chatglm_react.py‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎gpt_server/model_handler/react/v1/prompt.py‎
Lines changed: 93 additions & 0 deletions b/‎gpt_server/model_handler/react/v1/prompt.py‎
Lines changed: 93 additions & 0 deletions
diff --git a/‎gpt_server/model_handler/react/v1/prompts/__init__.py‎ b/‎gpt_server/model_handler/react/v1/prompts/__init__.py‎
diff --git a/‎gpt_server/model_handler/react/v1/prompts/qwen_prompt.py‎
Lines changed: 50 additions & 0 deletions b/‎gpt_server/model_handler/react/v1/prompts/qwen_prompt.py‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎gpt_server/model_handler/react/v1/qwen_react.py‎
Lines changed: 92 additions & 0 deletions b/‎gpt_server/model_handler/react/v1/qwen_react.py‎
Lines changed: 92 additions & 0 deletions
diff --git a/‎gpt_server/model_handler/react/v1/schema.py‎
Lines changed: 5 additions & 5 deletions b/‎gpt_server/model_handler/react/v1/schema.py‎
Lines changed: 5 additions & 5 deletions
@@ -45,6 +45,7 @@ def __init__(self, model_path) -> None:
         enable_prefix_caching = bool(os.getenv("enable_prefix_caching", False))
         max_model_len = os.getenv("max_model_len", None)
         gpu_memory_utilization = float(os.getenv("gpu_memory_utilization", 0.8))
+        kv_cache_quant_policy = int(os.getenv("kv_cache_quant_policy", 0))
         dtype = os.getenv("dtype", "auto")
         logger.info(f"后端 {backend}")
         if backend == "pytorch":
@@ -54,6 +55,7 @@ def __init__(self, model_path) -> None:
                 session_len=int(max_model_len) if max_model_len else None,
                 enable_prefix_caching=enable_prefix_caching,
                 cache_max_entry_count=gpu_memory_utilization,
+                quant_policy=kv_cache_quant_policy,
             )
         if backend == "turbomind":
             backend_config = TurbomindEngineConfig(
@@ -62,6 +64,7 @@ def __init__(self, model_path) -> None:
                 session_len=int(max_model_len) if max_model_len else None,
                 dtype=dtype,
                 cache_max_entry_count=gpu_memory_utilization,
+                quant_policy=kv_cache_quant_policy,  # 默认为：0
             )
         pipeline_type, pipeline_class = get_task(model_path)
         logger.info(f"模型架构：{pipeline_type}")
@@ -118,7 +121,7 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator:
                 # Abort the request if the client disconnects.
                 await self.async_engine.stop_session(session_id=request_id)
             text_outputs += request_output.response
-            
+
             usage = {
                 "prompt_tokens": request_output.input_token_len,
                 "completion_tokens": request_output.generate_token_len,
 
@@ -0,0 +1,58 @@
+from typing import Any, Dict, List, Tuple, Union, Optional
+import json
+import uuid
+
+from gpt_server.model_handler.react.v0.prompt import (
+    GLM4_TOOL_PROMPT,
+    TOOL_SUFFIX_PROMPT,
+)
+
+
+def glm4_tool_formatter(
+    tools: List[Dict[str, Any]], tool_choice_info: Optional[dict] = None
+) -> str:
+    tool_text = "\n"
+    tool_names = []
+    for tool in tools:
+        tool = tool["function"]
+        tool_name = tool["name"]
+        tool_text += f"## {tool_name}\n\n{json.dumps(tool, ensure_ascii=False, indent=4)}\n{TOOL_SUFFIX_PROMPT}\n\n"
+        tool_names.append(tool_name)
+    return GLM4_TOOL_PROMPT.format(
+        tool_text=tool_text, tool_names=", ".join(tool_names)
+    ).strip()
+
+
+def glm4_tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]:
+    i = content.rfind("Action:")
+    j = content.rfind("Action Input:")
+    tool_name = content[i + len("Action:") : j].strip().strip(".")
+    tool_input = content[j + len("Action Input:") :].strip()
+    try:
+        tool_input_obj = json.loads(tool_input)
+    except json.JSONDecodeError:
+        return content
+    tool_calls = []
+    tool_call = {
+        "index": 0,
+        "id": "call_{}".format(uuid.uuid4().hex),
+        "function": {"name": tool_name, "arguments": tool_input},
+    }
+    tool_calls.append(tool_call)
+
+    return tool_calls
+
+
+if __name__ == "__main__":
+    import json
+
+    tools_str = """[{'type': 'function', 'function': {'name': 'track', 'description': '追踪指定股票的实时价格', 'parameters': {'type': 'object', 'properties': {'symbol': {'description': '需要追踪的股票代码', 'type': 'integer'}}, 'required': ['symbol']}}}, {'type': 'function', 'function': {'name': 'text-to-speech', 'description': '将文本转换为语音', 'parameters': {'type': 'object', 'properties': {'text': {'description': '需要转换成语音的文本', 'type': 'string'}, 'voice': {'description': '要使用的语音类型（男声、女声等', 'default': '男声', 'type': 'string'}, 'speed': {'description': '语音的速度（快、中等、慢等', 'default': '中等', 'type': 'string'}}, 'required': ['text']}}}]"""
+    tools_str = tools_str.replace("'", '"')
+    tools = json.loads(tools_str)
+
+    res = glm4_tool_formatter(tools=tools)
+    print(res)
+    print()
+    out = 'multiply\n{"first_int": 8, "second_int": 9}'
+    r = glm4_tool_extractor(out)
+    print(r)
@@ -0,0 +1,93 @@
+TOOL_SUFFIX_PROMPT = (
+    "在调用上述工具时，Action Input的值必须使用 Json 格式来表示调用的参数。"
+)
+
+TOOL_CHOICE_SUFFIX_PROMPT = "\n注意: 上述工具必须被调用！"
+# default
+TOOL_SYSTEM_PROMPT = """Answer the following questions as best you can. You have access to the following tools:
+
+{tool_text}
+
+Use the following format:
+
+Question: the input question you must answer
+Thought: you should always think about what to do
+Action: the action to take, should be one of [{tool_names}]
+Action Input: the input to the action
+Observation: the result of the action
+... (this Thought/Action/Action Input/Observation can be repeated zero or more times)
+Thought: I now know the final answer
+Final Answer: the final answer to the original input question
+
+Begin!
+
+Question:"""
+TOOL_SYSTEM_PROMPT_CN = """尽可能回答用户问题，你有权使用以下工具：
+
+{tool_text}
+
+如果使用工具请遵循以下格式回复：
+
+Thought: 思考你当前步骤需要解决什么问题，是否需要使用工具
+Action: 工具名称，你的工具必须从 [{tool_names}] 选择
+Action Input: 工具输入参数, Action Input的值必须使用 Json 格式来表示调用的参数。
+Observation: 调用工具后的结果
+... (Thought/Action/Action Input/Observation 可以重复零次或多次)
+Thought: 我现在知道了最终答案
+Final Answer: 原始输入问题的最终答案
+
+开始!"""
+
+TOOl_CHOICE_SYSTEM_PROMPT_CN = """你是一个工具的执行助手，提供的工具可能是用于将用户的输入格式化为符合工具描述的json模式或者是其它功能。你需要自己判断，你必须强制使用以下工具:
+
+{tool_text}
+
+遵循以下格式：
+
+Thought: 我必须强制执行 {tool_names} 工具 
+Action: 工具名称必须是 {tool_names}
+Action Input: 工具输入参数, Action Input的值必须使用 Json 格式来表示调用的参数。
+Observation: 调用工具后的结果
+Thought: 我现在知道了最终答案
+Final Answer: 原始输入问题的最终答案
+
+开始!"""
+TOOl_CHOICE_SYSTEM_PROMPT = """You must use the following tools:
+
+{tool_text}
+
+Use the following format:
+
+Question: the input question you must answer
+Thought: I have to execute tool {tool_names}
+Action: the action to take, should be one of [{tool_names}]
+Action Input: the input to the action
+Observation: the result of the action
+... (this Thought/Action/Action Input/Observation can be repeated zero or more times)
+Thought: I now know the final answer
+Final Answer: the final answer to the original input question
+
+Begin!
+
+Question:"""
+
+# 你的任务是针对用户的问题和要求提供适当的答复和支持
+GLM4_TOOL_PROMPT = """"你可以使用以下工具提供适当的答复和支持。
+
+# 可用工具
+{tool_text}
+Use the following format:
+
+Question: the input question you must answer
+Thought: you should always think about what to do
+Action: the action to take, should be one of [{tool_names}]
+Action Input: the input to the action
+Observation: the result of the action
+... (this Thought/Action/Action Input/Observation can be repeated zero or more times)
+Thought: I now know the final answer
+Final Answer: the final answer to the original input question
+
+Begin!
+
+Question:
+"""
@@ -0,0 +1,50 @@
+TOOL_SUFFIX_PROMPT = (
+    "在调用上述工具时，action_input的值必须使用 Json 格式来表示调用的参数。"
+)
+
+TOOL_CHOICE_SUFFIX_PROMPT = "\n\n## 注意: \n上述工具必须被调用!"
+# default
+
+TOOL_SYSTEM_PROMPT_CN = """# 工具
+## 你拥有如下工具：
+
+{tool_text}
+
+## 如果使用工具，你可以回复零次、一次或多次以下json格式内容，以调用工具,调用工具后,Observation 表示调用工具后的结果,json格式如下:
+{{
+    "thought":"你应该时刻思考自己该做什么",
+    "reason":{{
+        "action":"工具名称，必须是 [{tool_names}] 之一",
+        "action_input":"工具输入, 值必须使用 json 格式"
+    }}
+}}
+或
+{{
+    "thought":"你应该时刻思考自己该做什么",
+    "reason":{{
+        "final_answer":"根据工具结果进行回复，如果工具返回值存在图片url,需将图片用![](url)渲染出来"
+    }}
+}}
+"""
+
+TOOl_CHOICE_SYSTEM_PROMPT_CN = """# 提供的工具是用于将用户的输入或回复格式化为符合工具描述的json模式,你必须强制使用以下工具:
+## 工具
+## #你拥有如下工具：
+
+{tool_text}
+
+### 你可以在回复中插入零次、一次或多次以下json格式内容，以调用工具,调用工具后,Observation 表示调用工具后的结果,json格式如下:
+{{
+    "thought":"你应该时刻思考自己该做什么",
+    "reason":{{
+        "action":"工具名称，必须是 [{tool_names}] 之一",
+        "action_input":"工具输入, 值必须使用 json 格式"
+    }}
+}}
+或
+{{
+    "thought":"你应该时刻思考自己该做什么",
+    "reason":{{
+        "final_answer":"根据工具结果进行回复，如果工具返回值存在图片url,需将图片用![](url)渲染出来"
+    }}
+}}"""
@@ -0,0 +1,92 @@
+from loguru import logger
+from typing import Any, Dict, List, Tuple, Union, Optional
+import json
+import uuid
+
+from gpt_server.model_handler.react.v1.prompts.qwen_prompt import (
+    TOOL_SYSTEM_PROMPT_CN,
+    TOOl_CHOICE_SYSTEM_PROMPT_CN,
+    TOOL_CHOICE_SUFFIX_PROMPT,
+    TOOL_SUFFIX_PROMPT,
+)
+
+
+def qwen_tool_formatter(
+    tools: List[Dict[str, Any]], tool_choice_info: Optional[dict] = None
+) -> str:
+    tool_chooce_suffix_prompt = ""
+    logger.info(f"tool_choice_info: {tool_choice_info}")
+    tool_system_prompt = TOOL_SYSTEM_PROMPT_CN
+    if tool_choice_info:
+        tool_chooce_suffix_prompt = TOOL_CHOICE_SUFFIX_PROMPT
+        tools = [tools[tool_choice_info["tool_choice_idx"]]]
+        logger.info(f"tools 已被替换为tool_choic: {tools}")
+        tool_system_prompt = TOOl_CHOICE_SYSTEM_PROMPT_CN
+
+    tool_names = []
+    param_text_list = []
+    for tool in tools:
+        tool = tool["function"]
+        tool_name = tool["name"]
+        description = tool["description"]
+        parameters = tool["parameters"]
+        param_text = (
+            """### {tool_name}\n\n{tool_name}: {description} 输入参数： {parameters} \n"""
+            + TOOL_SUFFIX_PROMPT
+            + tool_chooce_suffix_prompt
+        )
+        param_text_str = param_text.format(
+            tool_name=tool_name,
+            description=description,
+            parameters=parameters,
+        )
+        param_text_list.append(param_text_str)
+
+        tool_names.append(tool_name)
+
+    tool_text = "\n\n".join(param_text_list).strip()
+    return tool_system_prompt.format(
+        tool_text=tool_text,
+        tool_names=", ".join(tool_names),
+    )
+
+
+def qwen_tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]:
+    output = json.loads(content)
+    reason = output["reason"]
+    final_answer = reason.get("final_answer", None)
+    if final_answer:  # 最终回答
+        return output
+    else:  # 工具
+        tool_name = reason["action"]
+        tool_input = reason["action_input"]
+        tool_calls = []
+        tool_call = {
+            "index": 0,
+            "id": "call_{}".format(uuid.uuid4().hex),
+            "function": {"name": tool_name, "arguments": tool_input},
+        }
+        tool_calls.append(tool_call)
+
+    return tool_calls
+
+
+if __name__ == "__main__":
+    import json
+
+    tools_str = """[{'type': 'function', 'function': {'name': 'track', 'description': '追踪指定股票的实时价格', 'parameters': {'type': 'object', 'properties': {'symbol': {'description': '需要追踪的股票代码', 'type': 'integer'}}, 'required': ['symbol']}}}, {'type': 'function', 'function': {'name': 'text-to-speech', 'description': '将文本转换为语音', 'parameters': {'type': 'object', 'properties': {'text': {'description': '需要转换成语音的文本', 'type': 'string'}, 'voice': {'description': '要使用的语音类型（男声、女声等', 'default': '男声', 'type': 'string'}, 'speed': {'description': '语音的速度（快、中等、慢等', 'default': '中等', 'type': 'string'}}, 'required': ['text']}}}]"""
+    tools_str = tools_str.replace("'", '"')
+    tools = json.loads(tools_str)
+    res = qwen_tool_formatter(tools=tools)
+    print(res)
+
+    out = """{
+    "thought":"你应该时刻思考自己该做什么",
+    "reason":{
+        "action":"track",
+        "action_input":{"a":"1"}
+    }
+}"""
+    r = qwen_tool_extractor(out)
+    print("\n\n")
+    print(r)
@@ -3,14 +3,14 @@
 
 
 class Action(BaseModel):
-    Action: str = Field(description="工具名称，必须是 [{tool_names}] 之一")
-    Action_Input: str = Field(description="工具输入, 值必须使用 json 格式")
+    action: str = Field(description="工具名称，必须是 [{tool_names}] 之一")
+    action_input: str = Field(description="工具输入, 值必须使用 json 格式")
 
 
 class Answer(BaseModel):
-    Final_Answer: str = Field(description="问题的最终回答")
+    final_answer: str = Field(description="问题的最终回答")
 
 
 class React(BaseModel):
-    Thought: str = Field(description="你应该时刻思考自己该做什么")
-    Reason: Union[Action, Answer]
+    thought: str = Field(description="你应该时刻思考自己该做什么")
+    reason: Union[Action, Answer]