docs: Update README (#704)

akotyla · micpst · web-flow · commit f2c65e0c2238 · 2025-07-09T18:05:51.000+02:00
Co-authored-by: Michal Pstrag &lt;47692610+micpst@users.noreply.github.com&gt;
diff --git a/README.md b/README.md
@@ -34,6 +34,12 @@
 - **Connect to any data source** – Use prebuilt connectors for S3, GCS, Azure, or implement your own.
 - **Scale ingestion** – Process large datasets quickly with [Ray-based parallel processing](https://ragbits.deepsense.ai/how-to/document_search/distributed_ingestion/#how-to-ingest-documents-in-a-distributed-fashion).
 
+### 🤖 Build Multi-Agent Workflows with Ease
+
+- **Multi-agent coordination** – Create teams of specialized agents with role-based collaboration using [A2A protocol](https://ragbits.deepsense.ai/tutorials/agents) for interoperability.
+- **Real-time data integration** – Leverage [Model Context Protocol (MCP)](https://ragbits.deepsense.ai/how-to/agents/provide_mcp_tools) for live web access, database queries, and API integrations.
+- **Conversation state management** – Maintain context across interactions with [automatic history tracking](https://ragbits.deepsense.ai/how-to/agents/define_and_use_agents/#conversation-history).
+
 ### 🚀 Deploy & Monitor with Confidence
 
 - **Real-time observability** – Track performance with [OpenTelemetry](https://ragbits.deepsense.ai/how-to/project/use_tracing/#opentelemetry-trace-handler) and [CLI insights](https://ragbits.deepsense.ai/how-to/project/use_tracing/#cli-trace-handler).
@@ -165,55 +171,95 @@ if __name__ == "__main__":
     asyncio.run(run())
 ```
 
+### Agentic RAG
+
+To build an agentic RAG pipeline:
+
+```python
+import asyncio
+from ragbits.agents import Agent
+from ragbits.core.embeddings import LiteLLMEmbedder
+from ragbits.core.llms import LiteLLM
+from ragbits.core.vector_stores import InMemoryVectorStore
+from ragbits.document_search import DocumentSearch
+
+embedder = LiteLLMEmbedder(model_name="text-embedding-3-small")
+vector_store = InMemoryVectorStore(embedder=embedder)
+document_search = DocumentSearch(vector_store=vector_store)
+
+llm = LiteLLM(model_name="gpt-4.1-nano")
+agent = Agent(llm=llm, tools=[document_search.search])
+
+async def main() -> None:
+    await document_search.ingest("web://https://arxiv.org/pdf/1706.03762")
+    response = await agent.run("What are the key findings presented in this paper?")
+    print(response.content)
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
 ### Chat UI
 
-To expose your RAG application through Ragbits UI:
+To expose your GenAI application through Ragbits API:
 
 ```python
-from collections.abc import AsyncGenerator, Iterable
-from pydantic import BaseModel
+from collections.abc import AsyncGenerator
+from ragbits.agents import Agent, ToolCallResult
 from ragbits.chat.api import RagbitsAPI
 from ragbits.chat.interface import ChatInterface
-from ragbits.chat.interface.types import ChatContext, ChatResponse
+from ragbits.chat.interface.types import ChatContext, ChatResponse, LiveUpdateType
 from ragbits.core.embeddings import LiteLLMEmbedder
-from ragbits.core.llms import LiteLLM
-from ragbits.core.prompt import Prompt, ChatFormat
+from ragbits.core.llms import LiteLLM, ToolCall
+from ragbits.core.prompt import ChatFormat
 from ragbits.core.vector_stores import InMemoryVectorStore
 from ragbits.document_search import DocumentSearch
-from ragbits.document_search.documents.element import Element
 
-class QuestionAnswerPromptInput(BaseModel):
-    question: str
-    context: Iterable[Element]
+embedder = LiteLLMEmbedder(model_name="text-embedding-3-small")
+vector_store = InMemoryVectorStore(embedder=embedder)
+document_search = DocumentSearch(vector_store=vector_store)
 
-class QuestionAnswerPrompt(Prompt[QuestionAnswerPromptInput, str]):
-    system_prompt = """
-    You are a question answering agent. Answer the question that will be provided using context.
-    If in the given context there is not enough information refuse to answer.
-    """
-    user_prompt = """
-    Question: {{ question }}
-    Context: {% for chunk in context %}{{ chunk.text_representation }}{%- endfor %}
-    """
+llm = LiteLLM(model_name="gpt-4.1-nano")
+agent = Agent(llm=llm, tools=[document_search.search])
 
 class MyChat(ChatInterface):
     async def setup(self) -> None:
-        self.llm = LiteLLM(model_name="gpt-4.1-nano")
-        self.embedder = LiteLLMEmbedder(model_name="text-embedding-3-small")
-        self.vector_store = InMemoryVectorStore(embedder=self.embedder)
-        self.document_search = DocumentSearch(vector_store=self.vector_store)
-        await self.document_search.ingest("web://https://arxiv.org/pdf/1706.03762")
+        await document_search.ingest("web://https://arxiv.org/pdf/1706.03762")
 
     async def chat(
         self,
         message: str,
         history: ChatFormat | None = None,
         context: ChatContext | None = None,
-    ) -> AsyncGenerator[ChatResponse, None]:
-        chunks = await self.document_search.search(message)
-        prompt = QuestionAnswerPrompt(QuestionAnswerPromptInput(question=message, context=chunks))
-        async for text in self.llm.generate_streaming(prompt):
-            yield self.create_text_response(text)
+    ) -> AsyncGenerator[ChatResponse]:
+        async for result in agent.run_streaming(message):
+            match result:
+                case str():
+                    yield self.create_live_update(
+                        update_id="1",
+                        type=LiveUpdateType.START,
+                        label="Answering...",
+                    )
+                    yield self.create_text_response(result)
+                case ToolCall():
+                    yield self.create_live_update(
+                        update_id="2",
+                        type=LiveUpdateType.START,
+                        label="Searching...",
+                    )
+                case ToolCallResult():
+                    yield self.create_live_update(
+                        update_id="2",
+                        type=LiveUpdateType.FINISH,
+                        label="Search",
+                        description=f"Found {len(result.result)} relevant chunks.",
+                    )
+
+        yield self.create_live_update(
+            update_id="1",
+            type=LiveUpdateType.FINISH,
+            label="Answer",
+        )
 
 if __name__ == "__main__":
     api = RagbitsAPI(MyChat)
@@ -232,7 +278,7 @@ Explore `create-ragbits-app` repo [here](https://github.com/deepsense-ai/create-
 
 ## Documentation
 
-- [Quickstart](https://ragbits.deepsense.ai/quickstart/quickstart1_prompts/) - Get started with Ragbits in a few minutes
+- [Tutorials](https://ragbits.deepsense.ai/tutorials/intro) - Get started with Ragbits in a few minutes
 - [How-to](https://ragbits.deepsense.ai/how-to/prompts/use_prompting) - Learn how to use Ragbits in your projects
 - [CLI](https://ragbits.deepsense.ai/cli/main) - Learn how to run Ragbits in your terminal
 - [API reference](https://ragbits.deepsense.ai/api_reference/core/prompt) - Explore the underlying Ragbits API
diff --git a/docs/index.md b/docs/index.md
@@ -54,6 +54,12 @@ hide:
 - **Connect to any data source** – Use prebuilt connectors for S3, GCS, Azure, or implement your own.
 - **Scale ingestion** – Process large datasets quickly with [Ray-based parallel processing](https://ragbits.deepsense.ai/how-to/document_search/distributed_ingestion/#how-to-ingest-documents-in-a-distributed-fashion).
 
+### 🤖 Build Multi-Agent Workflows with Ease
+
+- **Multi-agent coordination** – Create teams of specialized agents with role-based collaboration using [A2A protocol](https://ragbits.deepsense.ai/tutorials/agents/) for interoperability.
+- **Real-time data integration** – Leverage [Model Context Protocol (MCP)](https://ragbits.deepsense.ai/how-to/provide_mcp_tools/) for live web access, database queries, and API integrations.
+- **Conversation state management** – Maintain context across interactions with [automatic history tracking](https://ragbits.deepsense.ai/how-to/agents/define_and_use_agents/#conversation-history/).
+
 ### 🚀 Deploy & Monitor with Confidence
 
 - **Real-time observability** – Track performance with [OpenTelemetry](https://ragbits.deepsense.ai/how-to/project/use_tracing/#opentelemetry-trace-handler) and [CLI insights](https://ragbits.deepsense.ai/how-to/project/use_tracing/#cli-trace-handler).
@@ -194,55 +200,95 @@ if __name__ == "__main__":
     asyncio.run(run())
 ```
 
+### Agentic RAG
+
+To build an agentic RAG pipeline:
+
+```python
+import asyncio
+from ragbits.agents import Agent
+from ragbits.core.embeddings import LiteLLMEmbedder
+from ragbits.core.llms import LiteLLM
+from ragbits.core.vector_stores import InMemoryVectorStore
+from ragbits.document_search import DocumentSearch
+
+embedder = LiteLLMEmbedder(model_name="text-embedding-3-small")
+vector_store = InMemoryVectorStore(embedder=embedder)
+document_search = DocumentSearch(vector_store=vector_store)
+
+llm = LiteLLM(model_name="gpt-4.1-nano")
+agent = Agent(llm=llm, tools=[document_search.search])
+
+async def main() -> None:
+    await document_search.ingest("web://https://arxiv.org/pdf/1706.03762")
+    response = await agent.run("What are the key findings presented in this paper?")
+    print(response.content)
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
 ### Chat UI
 
-To expose your RAG application through Ragbits UI:
+To expose your GenAI application through Ragbits API:
 
 ```python
-from collections.abc import AsyncGenerator, Iterable
-from pydantic import BaseModel
+from collections.abc import AsyncGenerator
+from ragbits.agents import Agent, ToolCallResult
 from ragbits.chat.api import RagbitsAPI
 from ragbits.chat.interface import ChatInterface
-from ragbits.chat.interface.types import ChatContext, ChatResponse
+from ragbits.chat.interface.types import ChatContext, ChatResponse, LiveUpdateType
 from ragbits.core.embeddings import LiteLLMEmbedder
-from ragbits.core.llms import LiteLLM
-from ragbits.core.prompt import Prompt, ChatFormat
+from ragbits.core.llms import LiteLLM, ToolCall
+from ragbits.core.prompt import ChatFormat
 from ragbits.core.vector_stores import InMemoryVectorStore
 from ragbits.document_search import DocumentSearch
-from ragbits.document_search.documents.element import Element
 
-class QuestionAnswerPromptInput(BaseModel):
-    question: str
-    context: Iterable[Element]
+embedder = LiteLLMEmbedder(model_name="text-embedding-3-small")
+vector_store = InMemoryVectorStore(embedder=embedder)
+document_search = DocumentSearch(vector_store=vector_store)
 
-class QuestionAnswerPrompt(Prompt[QuestionAnswerPromptInput, str]):
-    system_prompt = """
-    You are a question answering agent. Answer the question that will be provided using context.
-    If in the given context there is not enough information refuse to answer.
-    """
-    user_prompt = """
-    Question: {{ question }}
-    Context: {% for chunk in context %}{{ chunk.text_representation }}{%- endfor %}
-    """
+llm = LiteLLM(model_name="gpt-4.1-nano")
+agent = Agent(llm=llm, tools=[document_search.search])
 
 class MyChat(ChatInterface):
     async def setup(self) -> None:
-        self.llm = LiteLLM(model_name="gpt-4.1-nano")
-        self.embedder = LiteLLMEmbedder(model_name="text-embedding-3-small")
-        self.vector_store = InMemoryVectorStore(embedder=self.embedder)
-        self.document_search = DocumentSearch(vector_store=self.vector_store)
-        await self.document_search.ingest("web://https://arxiv.org/pdf/1706.03762")
+        await document_search.ingest("web://https://arxiv.org/pdf/1706.03762")
 
     async def chat(
         self,
         message: str,
         history: ChatFormat | None = None,
         context: ChatContext | None = None,
-    ) -> AsyncGenerator[ChatResponse, None]:
-        chunks = await self.document_search.search(message)
-        prompt = QuestionAnswerPrompt(QuestionAnswerPromptInput(question=message, context=chunks))
-        async for text in self.llm.generate_streaming(prompt):
-            yield self.create_text_response(text)
+    ) -> AsyncGenerator[ChatResponse]:
+        async for result in agent.run_streaming(message):
+            match result:
+                case str():
+                    yield self.create_live_update(
+                        update_id="1",
+                        type=LiveUpdateType.START,
+                        label="Answering...",
+                    )
+                    yield self.create_text_response(result)
+                case ToolCall():
+                    yield self.create_live_update(
+                        update_id="2",
+                        type=LiveUpdateType.START,
+                        label="Searching...",
+                    )
+                case ToolCallResult():
+                    yield self.create_live_update(
+                        update_id="2",
+                        type=LiveUpdateType.FINISH,
+                        label="Search",
+                        description=f"Found {len(result.result)} relevant chunks.",
+                    )
+
+        yield self.create_live_update(
+            update_id="1",
+            type=LiveUpdateType.FINISH,
+            label="Answer",
+        )
 
 if __name__ == "__main__":
     api = RagbitsAPI(MyChat)