feat: add right paper cite

jihe520 · jihe520 · commit 621402525cf0 · 2025-05-14T08:44:53.000+08:00
diff --git a/backend/.env.dev.example b/backend/.env.dev.example
@@ -13,6 +13,9 @@ MAX_RETRIES=5
 # E2B_API_KEY=
 SERVER_HOST=http://localhost:8000
 
+# 使用 email 注册账号从 https://openalex.org/ 文献
+OPENALEX_EMAIL=
+
 LOG_LEVEL=DEBUG
 DEBUG=true
 # 确保安装 Redis
diff --git a/backend/app/core/agents.py b/backend/app/core/agents.py
@@ -7,7 +7,7 @@
     CODER_PROMPT,
     MODELER_PROMPT,
 )
-from app.core.functions import tools
+from app.core.functions import coder_tools, writer_tools
 from app.models.model import CoderToWriter
 from app.models.user_output import UserOutput
 from app.utils.enums import CompTemplate, FormatOutPut
@@ -17,6 +17,7 @@
 from app.utils.redis_manager import redis_manager
 from app.schemas.response import SystemMessage
 from app.tools.base_interpreter import BaseCodeInterpreter
+from app.tools.openalex_scholar import OpenAlexScholar
 
 
 class Agent:
@@ -26,7 +27,7 @@ def __init__(
         model: LLM,
         max_chat_turns: int = 30,  # 单个agent最大对话轮次
         user_output: UserOutput = None,
-        max_memory: int = 20,  # 最大记忆轮次
+        max_memory: int = 25,  # 最大记忆轮次
     ) -> None:
         self.task_id = task_id
         self.model = model
@@ -85,7 +86,7 @@ def clear_memory(self):
         self.chat_history = self.chat_history[:2] + self.chat_history[-5:]
 
 
-class ModelerAgent(Agent):  # 继承自Agent类而不是BaseModel
+class ModelerAgent(Agent):  # 继承自Agent类
     def __init__(
         self,
         model: LLM,
@@ -168,7 +169,7 @@ async def run(self, prompt: str, subtask_title: str) -> CoderToWriter:
             logger.info(f"当前对话轮次: {self.current_chat_turns}")
             response = await self.model.chat(
                 history=self.chat_history,
-                tools=tools,
+                tools=coder_tools,
                 tool_choice="auto",
                 agent_name=self.__class__.__name__,
             )
@@ -274,7 +275,7 @@ async def run(self, prompt: str, subtask_title: str) -> CoderToWriter:
 
                     completion_response = await self.model.chat(
                         history=self.chat_history,
-                        tools=tools,
+                        tools=coder_tools,
                         tool_choice="auto",
                         agent_name=self.__class__.__name__,
                     )
@@ -318,10 +319,12 @@ def __init__(
         comp_template: CompTemplate = CompTemplate,
         format_output: FormatOutPut = FormatOutPut.Markdown,
         user_output: UserOutput = None,
+        scholar: OpenAlexScholar = None,
     ) -> None:
         super().__init__(task_id, model, max_chat_turns, user_output)
         self.format_out_put = format_output
         self.comp_template = comp_template
+        self.scholar = scholar
         self.system_prompt = get_writer_prompt(format_output)
         self.available_images: list[str] = []
 
@@ -347,28 +350,86 @@ async def run(
             image_prompt = f"\n可用的图片链接列表：\n{image_list}\n请在写作时适当引用这些图片链接。"
             prompt = prompt + image_prompt
 
-        try:
-            logger.info(f"{self.__class__.__name__}:开始:执行对话")
-            self.current_chat_turns = 0  # 重置对话轮次计数器
+        logger.info(f"{self.__class__.__name__}:开始:执行对话")
+        self.current_chat_turns += 1  # 重置对话轮次计数器
 
-            # 更新对话历史
-            self.append_chat_history({"role": "system", "content": self.system_prompt})
-            self.append_chat_history({"role": "user", "content": prompt})
+        # 更新对话历史
+        self.append_chat_history({"role": "system", "content": self.system_prompt})
+        self.append_chat_history({"role": "user", "content": prompt})
 
-            # 获取历史消息用于本次对话
-            response = await self.model.chat(
-                history=self.chat_history,
-                agent_name=self.__class__.__name__,
-                sub_title=sub_title,
-            )
+        # 获取历史消息用于本次对话
+        response = await self.model.chat(
+            history=self.chat_history,
+            tools=writer_tools,
+            tool_choice="auto",
+            agent_name=self.__class__.__name__,
+            sub_title=sub_title,
+        )
+
+        if (
+            hasattr(response.choices[0].message, "tool_calls")
+            and response.choices[0].message.tool_calls
+        ):
+            logger.info("检测到工具调用")
+            tool_call = response.choices[0].message.tool_calls[0]
+            tool_id = tool_call.id
+            tool_call.function.name
+            if tool_call.function.name == "search_papers":
+                logger.info("调用工具: search_papers")
+                await redis_manager.publish_message(
+                    self.task_id,
+                    SystemMessage(content=f"写作手调用{tool_call.function.name}工具"),
+                )
+
+                query = json.loads(tool_call.function.arguments)["query"]
+
+                full_content = response.choices[0].message.content
+                # 更新对话历史 - 添加助手的响应
+                self.append_chat_history(
+                    {
+                        "role": "assistant",
+                        "content": full_content,
+                        "tool_calls": [
+                            {
+                                "id": tool_id,
+                                "type": "function",
+                                "function": {
+                                    "name": "search_papers",
+                                    "arguments": json.dumps({"query": query}),
+                                },
+                            }
+                        ],
+                    }
+                )
+
+                try:
+                    papers = self.scholar.search_papers(query)
+                except Exception as e:
+                    logger.error(f"搜索文献失败: {str(e)}")
+                    return f"搜索文献失败: {str(e)}"
+                # TODO: pass to frontend
+                self.scholar.print_papers(papers)
+                self.append_chat_history(
+                    {
+                        "role": "tool",
+                        "content": papers,
+                        "tool_call_id": tool_id,
+                        "name": "search_papers",
+                    }
+                )
+                next_response = await self.model.chat(
+                    history=self.chat_history,
+                    tools=writer_tools,
+                    tool_choice="auto",
+                    agent_name=self.__class__.__name__,
+                    sub_title=sub_title,
+                )
+                response_content = next_response.choices[0].message.content
+        else:
             response_content = response.choices[0].message.content
-            self.chat_history.append({"role": "assistant", "content": response_content})
-            logger.info(f"{self.__class__.__name__}:完成:执行对话")
-            return response_content
-        except Exception as e:
-            error_msg = f"执行过程中遇到错误: {str(e)}"
-            logger.error(f"Agent执行失败: {str(e)}")
-            return error_msg
+        self.chat_history.append({"role": "assistant", "content": response_content})
+        logger.info(f"{self.__class__.__name__}:完成:执行对话")
+        return response_content
 
     async def summarize(self) -> str:
         """
diff --git a/backend/app/core/functions.py b/backend/app/core/functions.py
@@ -1,7 +1,4 @@
-from typing import List, Dict, Any
-from semanticscholar import SemanticScholar, PaginatedResults
-
-tools = [
+coder_tools = [
     {
         "type": "function",
         "function": {
@@ -33,19 +30,22 @@
 # TODO: get_cites
 
 
-def search_papers(query: str) -> List[Dict[str, Any]]:
-    """Search for papers using a query string."""
-    sch = SemanticScholar()
-    results: PaginatedResults = sch.search_paper(query, limit=10)
-    return [
-        {
-            "title": paper.title,
-            "abstract": paper.abstract,
-            "authorsName": [author.name for author in paper.authors],
-            "citations": [citation.title for citation in paper.citations],
-        }
-        for paper in results
-    ]
-
-
 ## writeragent tools
+writer_tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "search_papers",
+            "description": "Search for papers using a query string.",
+            "strict": True,
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {"type": "string", "description": "The query string"}
+                },
+            },
+            "required": ["query"],
+            "additionalProperties": False,
+        },
+    },
+]
diff --git a/backend/app/core/prompts.py b/backend/app/core/prompts.py
@@ -25,7 +25,7 @@
 4. The working directory is already set up, and any uploaded files are already in the current directory
 5. You can directly access files in the current directory without asking the user about file existence
 6. For data analysis tasks, if you see Excel files (.xlsx), use pandas to read them directly
-7. try to visualize the data , process and  results using seaborn and matplotlibs
+7. try to visualize the data , process and  results using seaborn first and then matplotlibs,be nature sci style.
 
 For example:
 # Correct:
@@ -60,7 +60,7 @@
 11. 保存的图片名称需要语义化，方便用户理解
 12. 在生成代码时，对于包含单引号的字符串，请使用双引号包裹，避免使用转义字符
 13. **你尽量在较少的对话轮次内完成任务。减少反复思考的次数**
-14. 在求解问题和建立模型过程中，进行充分可视化
+14. 在求解问题和建立模型**过程中**，进行充分可视化
 
 
 Important:
@@ -89,6 +89,7 @@ def get_writer_prompt(
         4. 严格按照参考用户输入的格式模板以及**正确的编号顺序**
         5. 不需要询问用户 
         6. 当提到图片时，请使用提供的图片列表中的文件名
+        7. when you write,check if you need to use tools search_papers to cite.if you need, markdown Footnote e.g.[^1]
         """
 
 
diff --git a/backend/app/core/workflow.py b/backend/app/core/workflow.py
@@ -1,8 +1,8 @@
 from app.core.agents import WriterAgent, CoderAgent
 from app.core.llm import LLM, simple_chat
-from app.models.model import CoderToWriter
 from app.schemas.request import Problem
 from app.schemas.response import SystemMessage
+from app.tools.openalex_scholar import OpenAlexScholar
 from app.utils.log_util import logger
 from app.utils.common_utils import create_work_dir, get_config_template
 from app.models.user_output import UserOutput
@@ -66,6 +66,10 @@ async def execute(self, problem: Problem):
             timeout=3000,
         )
 
+        # Example usage
+
+        scholar = OpenAlexScholar(email=settings.OPENALEX_EMAIL)  # 请替换为您的真实邮箱
+
         await redis_manager.publish_message(
             self.task_id,
             SystemMessage(content="创建完成"),
@@ -91,6 +95,7 @@ async def execute(self, problem: Problem):
             model=llm_model,
             comp_template=problem.comp_template,
             format_output=problem.format_output,
+            scholar=scholar,
         )
 
         ################################################ solution steps
diff --git a/backend/app/tools/openalex_scholar.py b/backend/app/tools/openalex_scholar.py
@@ -70,6 +70,8 @@ def search_papers(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
         # 添加邮箱参数到请求URL
         if self.email:
             params["mailto"] = self.email
+        else:
+            raise ValueError("配置OpenAlex邮箱获取访问文献权利")
 
         # 设置请求头，包含User-Agent和邮箱信息
         headers = {
@@ -146,6 +148,21 @@ def search_papers(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
 
         return papers
 
+    def print_papers(self, papers: List[Dict[str, Any]]):
+        for paper in papers:
+            print("\n" + "=" * 80)
+            print(f"标题: {paper['title']}")
+            print(f"\n摘要: {paper['abstract']}")
+            print("\n作者:")
+        for author in paper["authors"]:
+            print(f"- {author['name']}")
+            if author["institution"]:
+                print(f"  所属机构: {author['institution']}")
+        print(f"\n引用次数: {paper['citations_count']}")
+        print(f"发表年份: {paper['publication_year']}")
+        print(f"\n引用格式:\n{paper['citation_format']}")
+        print("=" * 80)
+
     def _format_citation(self, work: Dict[str, Any]) -> str:
         """Format citation in a readable format."""
         # 获取所有作者
@@ -176,26 +193,3 @@ def _format_citation(self, work: Dict[str, Any]) -> str:
             citation += f" DOI: {doi}"
 
         return citation
-
-
-if __name__ == "__main__":
-    # Example usage
-    scholar = OpenAlexScholar(email="xxx@xxx.com")  # 请替换为您的真实邮箱
-    try:
-        papers = scholar.search_papers("machine learning")
-        for paper in papers:
-            print("\n" + "=" * 80)
-            print(f"标题: {paper['title']}")
-            print(f"\n摘要: {paper['abstract']}")
-            print("\n作者:")
-            for author in paper["authors"]:
-                print(f"- {author['name']}")
-                if author["institution"]:
-                    print(f"  所属机构: {author['institution']}")
-            print(f"\n引用次数: {paper['citations_count']}")
-            print(f"发表年份: {paper['publication_year']}")
-            print(f"\n引用格式:\n{paper['citation_format']}")
-            print("=" * 80)
-    except Exception as e:
-        print(f"发生错误: {e}")
-        print("请检查您的网络连接或API访问权限。")