Add RAG (agentscope-ai#26)

zhilingluo · XGenerationLab · web-flow · commit 992ea34b16d1 · 2025-09-02T10:16:37.000+08:00
## Description
[Describe what this PR does and why]

**Related Issue:** Fixes #[issue_number] or Relates to #[issue_number]

**Security Considerations:** [If applicable, especially for sandbox
changes]

## Type of Change
- [ ] Bug fix
- [x] New feature
- [ ] Breaking change
- [x] Documentation
- [ ] Refactoring

## Component(s) Affected
- [x] Engine
- [ ] Sandbox
- [x] Documentation
- [x] Tests
- [ ] CI/CD

## Checklist
- [x] Pre-commit hooks pass
- [x] Tests pass locally
- [x] Documentation updated (if needed)
- [x] Ready for review

## Testing
[How to test these changes]

## Additional Notes
[Optional: any other context]

---------

Co-authored-by: Bruce &lt;godot.lzl@alibaba-inc.com&gt;
diff --git a/cookbook/en/context_manager.md b/cookbook/en/context_manager.md
@@ -59,6 +59,20 @@ The `MemoryService` contains the following methods:
 
 Like `SessionHistoryService`, prefer using a concrete implementation such as `InMemoryMemoryService`. For details, see {ref}`here <memory-service>`
 
+### RAGService
+
+The `RAGService` is a basic class to provide retrieval augmented generation (RAG) capabilities.
+When asked by an end-user, the agent may need to retrieve relevant information from the knowledge base.
+The knowledge base can be a database or a collection of documents.
+The `RAGService` contains the following methods:
+- `retrieve`: retrieve relevant information from the knowledge base
+
+The `LangChainRAGService` is a concrete implementation of `RAGService` that uses LangChain to retrieve relevant information from Milvus.
+It can be initialized by:
+- `uri` the Milvus URI, either a local file (`.\xxx.db`) or a remote URL (`http://localhost:19530`).
+- `docs` the documents to be indexed.
+
+
 ## Life-cycle of a context manager
 The context manager can be initialized by two ways:
 
diff --git a/cookbook/zh/context_manager.md b/cookbook/zh/context_manager.md
@@ -55,6 +55,14 @@ kernelspec:
 
 与 `SessionHistoryService`一样，优先使用具体实现，如`InMemoryMemoryService`。详细信息请参见{ref}`这里 <memory-service-zh>`
 
+### RAGService
+`RAGService` 是一个基本类，用于提供检索增强生成（RAG）功能。当最终用户提出请求时，代理可能需要从知识库中检索相关信息。知识库可以是数据库或文档集合。`RAGService` 包含以下方法：
+- `retrieve`：从知识库中检索相关信息。
+
+`LangChainRAGService` 是 `RAGService` 的具体实现，它使用 LangChain 从 Milvus 中检索相关信息。可以通过以下方式初始化：
+- `uri`：Milvus 的 URI，可以是本地文件（例如 `.\xxx.db`）或远程 URL（例如 `http://localhost:19530`）。
+- `docs`：要索引的文档。
+
 ## 上下文管理器的生命周期
 
 上下文管理器可以通过两种方式初始化：
diff --git a/pyproject.toml b/pyproject.toml
@@ -66,3 +66,9 @@ autogen = [
     "autogen-agentchat>=0.7.4",
     "autogen-ext[openai]>=0.7.4",
 ]
+
+langchain_rag=[
+    "langchain>= 0.3.25",
+    "pymilvus>=2.6.0",
+    "langchain_milvus"
+]
diff --git a/src/agentscope_runtime/engine/services/context_manager.py b/src/agentscope_runtime/engine/services/context_manager.py
@@ -4,12 +4,19 @@
 
 from .manager import ServiceManager
 from .memory_service import MemoryService, InMemoryMemoryService
+from .rag_service import RAGService
 from .session_history_service import (
     SessionHistoryService,
     Session,
     InMemorySessionHistoryService,
 )
-from ..schemas.agent_schemas import Message
+from ..schemas.agent_schemas import (
+    Message,
+    MessageType,
+    Role,
+    TextContent,
+    ContentType,
+)
 
 
 class ContextComposer:
@@ -19,6 +26,7 @@ async def compose(
         session: Session,  # session
         memory_service: MemoryService = None,
         session_history_service: SessionHistoryService = None,
+        rag_service: RAGService = None,
     ):
         # session
         if session_history_service:
@@ -42,6 +50,18 @@ async def compose(
             )
             session.messages = memories + session.messages
 
+        # rag
+        if rag_service:
+            query = await rag_service.get_query_text(request_input[-1])
+            docs = await rag_service.retrieve(query=query, k=5)
+            cooked_doc = "\n".join(docs)
+            message = Message(
+                type=MessageType.MESSAGE,
+                role=Role.SYSTEM,
+                content=[TextContent(type=ContentType.TEXT, text=cooked_doc)],
+            )
+            session.messages.append(message)
+
 
 class ContextManager(ServiceManager):
     """
@@ -53,10 +73,12 @@ def __init__(
         context_composer_cls=ContextComposer,
         session_history_service: SessionHistoryService = None,
         memory_service: MemoryService = None,
+        rag_service: RAGService = None,
     ):
         self._context_composer_cls = context_composer_cls
         self._session_history_service = session_history_service
         self._memory_service = memory_service
+        self._rag_service = rag_service
         super().__init__()
 
     def _register_default_services(self):
@@ -68,6 +90,7 @@ def _register_default_services(self):
 
         self.register_service("session", self._session_history_service)
         self.register_service("memory", self._memory_service)
+        self.register_service("rag", self._rag_service)
 
     async def compose_context(
         self,
@@ -77,6 +100,7 @@ async def compose_context(
         await self._context_composer_cls.compose(
             memory_service=self._memory_service,
             session_history_service=self._session_history_service,
+            rag_service=self._rag_service,
             session=session,
             request_input=request_input,
         )
@@ -119,10 +143,12 @@ async def append(self, session: Session, event_output: List[Message]):
 async def create_context_manager(
     memory_service: MemoryService = None,
     session_history_service: SessionHistoryService = None,
+    rag_service: RAGService = None,
 ):
     manager = ContextManager(
         memory_service=memory_service,
         session_history_service=session_history_service,
+        rag_service=rag_service,
     )
 
     async with manager:
diff --git a/src/agentscope_runtime/engine/services/rag_service.py b/src/agentscope_runtime/engine/services/rag_service.py
@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+from typing import Optional
+
+from langchain_community.embeddings import DashScopeEmbeddings
+from langchain_milvus import Milvus
+
+from .base import ServiceWithLifecycleManager
+from ..schemas.agent_schemas import Message, MessageType
+
+
+class RAGService(ServiceWithLifecycleManager):
+    """
+    RAG Service
+    """
+
+    async def get_query_text(self, message: Message) -> str:
+        """
+        Gets the query text from the messages.
+
+        Args:
+            message: A list of messages.
+
+        Returns:
+            The query text.
+        """
+        if message:
+            if message.type == MessageType.MESSAGE:
+                for content in message.content:
+                    if content.type == "text":
+                        return content.text
+        return ""
+
+    async def retrieve(self, query: str, k: int = 1) -> list[str]:
+        raise NotImplementedError
+
+
+DEFAULT_URI = "milvus_demo.db"
+
+
+class LangChainRAGService(RAGService):
+    """
+    RAG Service using LangChain
+    """
+
+    def __init__(
+        self,
+        uri: Optional[str] = None,
+        docs: Optional[list[str]] = None,
+    ):
+        self.embeddings = DashScopeEmbeddings()
+        self.vectorstore = None
+
+        if uri:
+            self.uri = uri
+            self.from_db()
+        elif docs:
+            self.uri = DEFAULT_URI
+            self.from_docs(docs)
+        else:
+            docs = []
+            self.uri = DEFAULT_URI
+            self.from_docs(docs)
+
+    def from_docs(self, docs=None):
+        if docs is None:
+            docs = []
+
+        self.vectorstore = Milvus.from_documents(
+            documents=docs,
+            embedding=self.embeddings,
+            connection_args={
+                "uri": self.uri,
+            },
+            drop_old=False,
+        )
+
+    def from_db(self):
+        self.vectorstore = Milvus(
+            embedding_function=self.embeddings,
+            connection_args={"uri": self.uri},
+            index_params={"index_type": "FLAT", "metric_type": "L2"},
+        )
+
+    async def retrieve(self, query: str, k: int = 1) -> list[str]:
+        if self.vectorstore is None:
+            raise ValueError(
+                "Vector store not initialized. Call build_index first.",
+            )
+        docs = self.vectorstore.similarity_search(query, k=k)
+        return [doc.page_content for doc in docs]
+
+    async def start(self) -> None:
+        """Starts the service."""
+
+    async def stop(self) -> None:
+        """Stops the service."""
+
+    async def health(self) -> bool:
+        """Checks the health of the service."""
+        return True
diff --git a/tests/unit/assets/milvus_demo.db b/tests/unit/assets/milvus_demo.db
diff --git a/tests/unit/test_rag_service.py b/tests/unit/test_rag_service.py
@@ -0,0 +1,122 @@
+# -*- coding: utf-8 -*-
+import os
+
+import pytest
+from dotenv import load_dotenv
+
+from agentscope_runtime.engine import Runner
+from agentscope_runtime.engine.agents.llm_agent import LLMAgent
+from agentscope_runtime.engine.llms import QwenLLM
+from agentscope_runtime.engine.schemas.agent_schemas import (
+    MessageType,
+    AgentRequest,
+    RunStatus,
+)
+from agentscope_runtime.engine.services.context_manager import (
+    create_context_manager,
+)
+from agentscope_runtime.engine.services.rag_service import LangChainRAGService
+
+if os.path.exists("../../.env"):
+    load_dotenv("../../.env")
+
+
+def load_docs():
+    import bs4
+    from langchain_community.document_loaders import WebBaseLoader
+    from langchain_text_splitters import RecursiveCharacterTextSplitter
+
+    loader = WebBaseLoader(
+        web_paths=(
+            "https://lilianweng.github.io/posts/2023-06-23-agent/",
+            "https://lilianweng.github.io/posts/2023-03-15-prompt"
+            "-engineering/",
+        ),
+        bs_kwargs={
+            "parse_only": bs4.SoupStrainer(
+                class_=("post-content", "post-title", "post-header"),
+            ),
+        },
+    )
+    documents = loader.load()
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=2000,
+        chunk_overlap=200,
+    )
+
+    docs = text_splitter.split_documents(documents)
+    return docs
+
+
+@pytest.mark.asyncio
+async def test_from_docs():
+    docs = load_docs()
+    rag_service = LangChainRAGService(docs=docs)
+
+    ret_docs = await rag_service.retrieve(
+        "What is self-reflection of an AI Agent?",
+    )
+    assert len(ret_docs) == 1
+    assert ret_docs[0].startswith("Self-Reflection")
+
+
+@pytest.mark.asyncio
+async def test_from_db():
+    rag_service = LangChainRAGService(uri="./assets/milvus_demo.db")
+    ret_docs = await rag_service.retrieve(
+        "What is self-reflection of an AI Agent?",
+    )
+    assert len(ret_docs) == 1
+    assert ret_docs[0].startswith("Self-Reflection")
+
+
+@pytest.mark.asyncio
+async def test_rag():
+    rag_service = LangChainRAGService(uri="./assets/milvus_demo.db")
+    USER_ID = "user2"
+    SESSION_ID = "session1"
+    query = "What is self-reflection of an AI Agent?"
+
+    llm_agent = LLMAgent(
+        model=QwenLLM(),
+        name="llm_agent",
+        description="A simple LLM agent",
+    )
+
+    async with create_context_manager(
+        rag_service=rag_service,
+    ) as context_manager:
+        runner = Runner(
+            agent=llm_agent,
+            context_manager=context_manager,
+            environment_manager=None,
+        )
+
+        all_result = ""
+        # print("\n")
+        request = AgentRequest(
+            input=[
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": query,
+                        },
+                    ],
+                },
+            ],
+            session_id=SESSION_ID,
+        )
+
+        async for message in runner.stream_query(
+            user_id=USER_ID,
+            request=request,
+        ):
+            if (
+                message.object == "message"
+                and MessageType.MESSAGE == message.type
+                and RunStatus.Completed == message.status
+            ):
+                all_result = message.content[0].text
+        print(all_result)