diff --git a/llama-index-integrations/memory/llama-index-memory-zep/.gitignore b/llama-index-integrations/memory/llama-index-memory-zep/.gitignore
new file mode 100644
index 0000000000..990c18de22
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-zep/.gitignore
@@ -0,0 +1,153 @@
+llama_index/_static
+.DS_Store
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+bin/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+etc/
+include/
+lib/
+lib64/
+parts/
+sdist/
+share/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+.ruff_cache
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+notebooks/
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+pyvenv.cfg
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Jetbrains
+.idea
+modules/
+*.swp
+
+# VsCode
+.vscode
+
+# pipenv
+Pipfile
+Pipfile.lock
+
+# pyright
+pyrightconfig.json
diff --git a/llama-index-integrations/memory/llama-index-memory-zep/BUILD b/llama-index-integrations/memory/llama-index-memory-zep/BUILD
new file mode 100644
index 0000000000..0896ca890d
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-zep/BUILD
@@ -0,0 +1,3 @@
+poetry_requirements(
+    name="poetry",
+)
diff --git a/llama-index-integrations/memory/llama-index-memory-zep/README.md b/llama-index-integrations/memory/llama-index-memory-zep/README.md
new file mode 100644
index 0000000000..3ecb995a91
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-zep/README.md
@@ -0,0 +1,218 @@
+`Note`: Before running or testing the code in this notebook, ensure that you have set up the `Zep server`.
+
+# 🧠 Zep Memory Integration with LlamaIndex Agents
+
+This notebook demonstrates how to use [Zep Memory](https://docs.getzep.com/) with various agent types from [LlamaIndex](https://github.com/jerryjliu/llama_index), including:
+
+- `SimpleChatEngine`
+- `ReActAgent`
+- `FunctionCallingAgent`
+- `AgentWorkflow`
+
+Both **synchronous** and **asynchronous** memory clients are supported and demonstrated.
+
+---
+
+## 📦 Install Dependencies
+
+```bash
+# pip install llama_index_memory_zep
+# pip install llama-index zep-python openai
+```
+
+---
+
+## 🔐 Environment Setup
+
+```python
+import os
+
+os.environ["OPENAI_API_KEY"] = "sk-..."  # Replace with your actual OpenAI key
+```
+
+---
+
+## 📚 Import Required Packages
+
+```python
+import uuid
+from zep_python.client import Zep, AsyncZep
+from llamaindex.memory.zep import ZepMemory
+from llama_index.llms.openai import OpenAI
+```
+
+---
+
+## 🔁 Initialize Clients and IDs
+
+```python
+zep_client = Zep(api_key="mysupersecretkey", base_url="http://localhost:8000")
+azep_client = AsyncZep(
+    api_key="mysupersecretkey", base_url="http://localhost:8000"
+)
+
+user_id = uuid.uuid4().hex
+session_id = uuid.uuid4().hex
+
+# Register user
+zep_client.user.add(user_id=user_id)
+
+# Start memory session
+zep_client.memory.add_session(session_id=session_id, user_id=user_id)
+```
+
+---
+
+## 🧠 Initialize Zep Memory
+
+```python
+memory = ZepMemory.from_defaults(
+    zep_client=zep_client, session_id=session_id, user_id=user_id
+)
+amemory = ZepMemory.from_defaults(
+    zep_client=azep_client, session_id=session_id, user_id=user_id
+)
+```
+
+---
+
+## 🤖 LLM Setup
+
+```python
+llm = OpenAI(model="gpt-4o-mini")
+```
+
+---
+
+## 💬 SimpleChatEngine
+
+### ✅ Sync Example
+
+```python
+from llama_index.core.chat_engine.simple import SimpleChatEngine
+
+agent = SimpleChatEngine.from_defaults(llm=llm, memory=memory)
+
+agent.chat("Hi, my name is Younis")
+agent.chat("What was my name?")
+```
+
+### 🌀 Async Example
+
+```python
+agent = SimpleChatEngine.from_defaults(llm=llm, memory=amemory)
+
+agent.chat("Hi, my name is Younis")
+agent.chat("What was my name?")
+```
+
+---
+
+## 🔁 ReActAgent
+
+### ✅ Sync Example
+
+```python
+from llama_index.core.agent import ReActAgent
+
+agent = ReActAgent.from_tools(tools=[], llm=llm, memory=memory, verbose=True)
+
+agent.chat("What's the capital of France?")
+agent.chat("What was my previous question?")
+```
+
+### 🌀 Async Example
+
+```python
+agent = ReActAgent.from_tools(tools=[], llm=llm, memory=amemory, verbose=True)
+
+agent.chat("What's the capital of France?")
+agent.chat("What was my previous question?")
+```
+
+---
+
+## ⚙️ FunctionCallingAgent
+
+### ✅ Sync Example
+
+```python
+from llama_index.core.agent import FunctionCallingAgent
+
+agent = FunctionCallingAgent.from_tools(
+    [], llm=llm, memory=memory, verbose=True
+)
+
+agent.chat("Hi, my name is Younis")
+agent.chat("What was my name?")
+```
+
+### 🌀 Async Example
+
+```python
+agent = FunctionCallingAgent.from_tools(
+    [], llm=llm, memory=amemory, verbose=True
+)
+
+agent.chat("Hi, my name is Younis")
+agent.chat("What was my name?")
+```
+
+---
+
+## 🧩 AgentWorkflow
+
+```python
+from llama_index.core.agent.workflow import (
+    AgentWorkflow,
+    AgentStream,
+    FunctionAgent,
+)
+
+research_agent = FunctionAgent(
+    name="ResearchAgent",
+    description="Responsible for synthesizing structured responses.",
+    system_prompt="""
+    You are the ResearchAgent. Your task is to compile and synthesize information based on context.
+    Be systematic, transparent, and clear in your responses.
+    """,
+    llm=llm,
+    tools=[],
+    verbose=True,
+)
+
+agent_workflow = AgentWorkflow(
+    agents=[research_agent],
+    root_agent=research_agent.name,
+    initial_state={"answer_content": ""},
+)
+
+# Run with sync memory
+handler = agent_workflow.run(
+    user_msg="Explain the heuristic function in detail.", memory=memory
+)
+
+# Stream response
+current_agent = None
+async for event in handler.stream_events():
+    if (
+        hasattr(event, "current_agent_name")
+        and event.current_agent_name != current_agent
+    ):
+        current_agent = event.current_agent_name
+        print(f"\\n{'='*50}\\n🤖 Agent: {current_agent}\\n{'='*50}\\n")
+    if isinstance(event, AgentStream):
+        print(event.delta, end="", flush=True)
+```
+
+### 🌀 Async Memory?
+
+Just replace `memory=memory` with `memory=amemory`.
+
+---
+
+## ✅ Final Notes
+
+- This example assumes your Zep server is running locally at `http://localhost:8000`
+- All memory-aware agents should now retain previous conversation turns
+- Use this setup as a base for tool-enhanced agents, longer workflows, or integrations
diff --git a/llama-index-integrations/memory/llama-index-memory-zep/llama_index/memory/zep/BUILD b/llama-index-integrations/memory/llama-index-memory-zep/llama_index/memory/zep/BUILD
new file mode 100644
index 0000000000..db46e8d6c9
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-zep/llama_index/memory/zep/BUILD
@@ -0,0 +1 @@
+python_sources()
diff --git a/llama-index-integrations/memory/llama-index-memory-zep/llama_index/memory/zep/__init__.py b/llama-index-integrations/memory/llama-index-memory-zep/llama_index/memory/zep/__init__.py
new file mode 100644
index 0000000000..6cef3f4caf
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-zep/llama_index/memory/zep/__init__.py
@@ -0,0 +1,3 @@
+from llama_index.memory.zep.base import ZepMemory
+
+__all__ = ["ZepMemory"]
diff --git a/llama-index-integrations/memory/llama-index-memory-zep/llama_index/memory/zep/base.py b/llama-index-integrations/memory/llama-index-memory-zep/llama_index/memory/zep/base.py
new file mode 100644
index 0000000000..a8d6a892c5
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-zep/llama_index/memory/zep/base.py
@@ -0,0 +1,435 @@
+from typing import List, Optional, Dict, Any
+import uuid
+from llama_index.core.memory.types import BaseMemory
+from llama_index.core.memory.chat_memory_buffer import ChatMemoryBuffer
+from llama_index.core.base.llms.types import ChatMessage, MessageRole
+from llama_index.core.bridge.pydantic import Field, PrivateAttr
+from zep_python.types import Message
+
+
+class ZepMemory(BaseMemory):
+    """Zep Memory for LlamaIndex."""
+
+    session_id: str = Field(description="Zep session ID")
+    user_id: Optional[str] = Field(
+        default=None, description="User ID for user-specific context"
+    )
+    memory_key: str = Field(default="chat_history", description="Memory key for prompt")
+    max_message_length: int = Field(
+        default=2500, description="Maximum character length for messages"
+    )
+
+    # Private attributes
+    _client = PrivateAttr(default=None)
+    _primary_memory: BaseMemory = PrivateAttr(default=None)
+
+    def __init__(
+        self,
+        session_id: str,
+        zep_client,
+        user_id: Optional[str] = None,
+        memory_key: str = "chat_history",
+        max_message_length: int = 2500,
+    ):
+        """Initialize with Zep client and session."""
+        super().__init__(
+            session_id=session_id,
+            user_id=user_id,
+            memory_key=memory_key,
+            max_message_length=max_message_length,
+        )
+        self._client = zep_client
+        self._primary_memory = ChatMemoryBuffer.from_defaults()
+        self._sync_from_zep()
+
+    @classmethod
+    def class_name(cls) -> str:
+        return "ZepMemory"
+
+    @classmethod
+    def from_defaults(
+        cls,
+        zep_client=None,
+        session_id=None,
+        user_id: Optional[str] = None,
+    ):
+        if zep_client is None:
+            raise ValueError("zep_client is required")
+        if session_id is None:
+            session_id = str(uuid.uuid4())
+        return cls(
+            zep_client=zep_client,
+            session_id=session_id,
+            user_id=user_id,  # Now you can pass the user ID
+            memory_key="chat_history",
+            max_message_length=2500,
+        )
+
+    def _convert_to_zep_message(self, message: ChatMessage) -> Any:
+        role_map = {
+            MessageRole.USER: "user",
+            MessageRole.ASSISTANT: "assistant",
+            MessageRole.SYSTEM: "system",
+            MessageRole.TOOL: "tool",
+        }
+        role = role_map.get(message.role, "user")
+        content = message.content if message.content is not None else ""
+        if len(content) > self.max_message_length:
+            content = content[: self.max_message_length]
+        return Message(
+            role=role,
+            content=content,
+            role_type=role,
+            metadata=message.additional_kwargs or {},
+        )
+
+    def _sync_from_zep(self) -> None:
+        """Synchronously retrieve memory from Zep and update local memory."""
+        if self._client is None:
+            return
+        try:
+            zep_memory = self._client.memory.get(session_id=self.session_id)
+            if not zep_memory:
+                return
+            messages: List[ChatMessage] = []
+            if hasattr(zep_memory, "messages") and zep_memory.messages:
+                for msg in zep_memory.messages:
+                    role_map = {
+                        "user": MessageRole.USER,
+                        "assistant": MessageRole.ASSISTANT,
+                        "system": MessageRole.SYSTEM,
+                        "tool": MessageRole.TOOL,
+                        "function": MessageRole.TOOL,
+                    }
+                    role_str = getattr(msg, "role", None)
+                    if not role_str and hasattr(msg, "role_type"):
+                        role_str = msg.role_type
+                    role_str = role_str.lower() if role_str else "user"
+                    role = role_map.get(role_str, MessageRole.USER)
+                    content = getattr(msg, "content", "")
+                    metadata = getattr(msg, "metadata", {}) or {}
+                    chat_message = ChatMessage(
+                        role=role,
+                        content=content,
+                        additional_kwargs=metadata,
+                    )
+                    messages.append(chat_message)
+            if messages:
+                self._primary_memory.set(messages)
+        except Exception:
+            pass  # Silently ignore errors during sync
+
+    async def _async_sync_from_zep(self) -> None:
+        """Asynchronously retrieve memory from Zep and update local memory."""
+        if self._client:
+            return
+        try:
+            zep_memory = await self._client.memory.get(session_id=self.session_id)
+            if not zep_memory:
+                return
+            messages: List[ChatMessage] = []
+            if hasattr(zep_memory, "messages") and zep_memory.messages:
+                for msg in zep_memory.messages:
+                    role_map = {
+                        "user": MessageRole.USER,
+                        "assistant": MessageRole.ASSISTANT,
+                        "system": MessageRole.SYSTEM,
+                        "tool": MessageRole.TOOL,
+                        "function": MessageRole.TOOL,
+                    }
+                    role_str = getattr(msg, "role", None)
+                    if not role_str and hasattr(msg, "role_type"):
+                        role_str = msg.role_type
+                    role_str = role_str.lower() if role_str else "user"
+                    role = role_map.get(role_str, MessageRole.USER)
+                    content = getattr(msg, "content", "")
+                    metadata = getattr(msg, "metadata", {}) or {}
+                    chat_message = ChatMessage(
+                        role=role,
+                        content=content,
+                        additional_kwargs=metadata,
+                    )
+                    messages.append(chat_message)
+            if messages:
+                self._primary_memory.set(messages)
+        except Exception:
+            pass  # Silently ignore errors during sync
+
+    def _get_context_from_memory(self, query: Optional[str] = None) -> str:
+        """Retrieve and compile context from Zep memory."""
+        if self._client is None:
+            return ""
+        try:
+            zep_memory = self._client.memory.get(session_id=self.session_id)
+            context_parts: List[str] = []
+            if hasattr(zep_memory, "facts") and zep_memory.facts:
+                context_parts.append("Facts:")
+                for fact in zep_memory.facts:
+                    context_parts.append(f"- {fact}")
+            if (
+                hasattr(zep_memory, "summary")
+                and zep_memory.summary
+                and hasattr(zep_memory.summary, "content")
+                and zep_memory.summary.content
+            ):
+                context_parts.append("\nSummary:")
+                context_parts.append(zep_memory.summary.content)
+            if hasattr(zep_memory, "context") and zep_memory.context:
+                context_parts.append("\nContext:")
+                context_parts.append(zep_memory.context)
+            if query and self.user_id:
+                try:
+                    edge_results = self._client.memory.search_sessions(
+                        user_id=self.user_id,
+                        text=query,
+                        search_scope="edges",
+                        limit=5,
+                    )
+                    if (
+                        edge_results
+                        and hasattr(edge_results, "edges")
+                        and edge_results.edges
+                    ):
+                        context_parts.append("\nRelevant information:")
+                        for edge in edge_results.edges:
+                            if hasattr(edge, "fact"):
+                                context_parts.append(f"- {edge.fact}")
+                except Exception:
+                    pass
+            return "\n".join(context_parts)
+        except Exception:
+            return ""
+
+    async def _async_get_context_from_memory(self, query: Optional[str] = None) -> str:
+        """Asynchronously retrieve and compile context from Zep memory."""
+        try:
+            zep_memory = await self._client.memory.get(session_id=self.session_id)
+            context_parts: List[str] = []
+            if hasattr(zep_memory, "facts") and zep_memory.facts:
+                context_parts.append("Facts:")
+                for fact in zep_memory.facts:
+                    context_parts.append(f"- {fact}")
+            if (
+                hasattr(zep_memory, "summary")
+                and zep_memory.summary
+                and hasattr(zep_memory.summary, "content")
+                and zep_memory.summary.content
+            ):
+                context_parts.append("\nSummary:")
+                context_parts.append(zep_memory.summary.content)
+            if hasattr(zep_memory, "context") and zep_memory.context:
+                context_parts.append("\nContext:")
+                context_parts.append(zep_memory.context)
+            if query and self.user_id:
+                try:
+                    edge_results = await self._client.memory.search_sessions(
+                        user_id=self.user_id,
+                        text=query,
+                        search_scope="edges",
+                        limit=5,
+                    )
+                    if (
+                        edge_results
+                        and hasattr(edge_results, "edges")
+                        and edge_results.edges
+                    ):
+                        context_parts.append("\nRelevant information:")
+                        for edge in edge_results.edges:
+                            if hasattr(edge, "fact"):
+                                context_parts.append(f"- {edge.fact}")
+                except Exception:
+                    pass
+            return "\n".join(context_parts)
+        except Exception:
+            return ""
+
+    def get(self, input: Optional[str] = None, **kwargs) -> List[ChatMessage]:
+        """Retrieve chat history with context enrichment."""
+        messages = self._primary_memory.get(input=input, **kwargs)
+        if self._client is None:
+            return messages
+        context = self._get_context_from_memory(input)
+        if context:
+            if messages and messages[0].role == MessageRole.SYSTEM:
+                updated_content = f"{messages[0].content}\n\n{context}"
+                messages[0] = ChatMessage(
+                    role=MessageRole.SYSTEM,
+                    content=updated_content,
+                    additional_kwargs=messages[0].additional_kwargs,
+                )
+            else:
+                system_message = ChatMessage(
+                    role=MessageRole.SYSTEM,
+                    content=context,
+                )
+                messages.insert(0, system_message)
+        return messages
+
+    async def aget(self, input: Optional[str] = None, **kwargs) -> List[ChatMessage]:
+        """Asynchronously retrieve chat history with context enrichment."""
+        await self._async_sync_from_zep()
+
+        # Now get the messages from the primary memory
+        if hasattr(self._primary_memory, "aget"):
+            messages = await self._primary_memory.aget(input=input, **kwargs)
+        else:
+            messages = self._primary_memory.get(input=input, **kwargs)
+
+        if self._client is None:
+            return messages
+
+        context = await self._async_get_context_from_memory(input)
+        if context:
+            if messages and messages[0].role == MessageRole.SYSTEM:
+                updated_content = f"{messages[0].content}\n\n{context}"
+                messages[0] = ChatMessage(
+                    role=MessageRole.SYSTEM,
+                    content=updated_content,
+                    additional_kwargs=messages[0].additional_kwargs,
+                )
+            else:
+                system_message = ChatMessage(
+                    role=MessageRole.SYSTEM,
+                    content=context,
+                )
+                messages.insert(0, system_message)
+        return messages
+
+    def get_all(self) -> List[ChatMessage]:
+        """Retrieve all chat history without context enrichment."""
+        return self._primary_memory.get_all()
+
+    async def aget_all(self) -> List[ChatMessage]:
+        """Asynchronously retrieve all chat history without context enrichment."""
+        # First sync from Zep
+        await self._async_sync_from_zep()
+
+        if hasattr(self._primary_memory, "aget_all"):
+            return await self._primary_memory.aget_all()
+        return self._primary_memory.get_all()
+
+    def _add_msgs_to_zep(self, messages: List[ChatMessage]) -> None:
+        """Add new messages to Zep  memory with truncation."""
+        if self._client is None or not messages:
+            return
+        try:
+            zep_messages = []
+            for msg in messages:
+                zep_msg = self._convert_to_zep_message(msg)
+                if hasattr(zep_msg, "content") and zep_msg.content:
+                    if len(zep_msg.content) > self.max_message_length:
+                        zep_msg.content = (
+                            zep_msg.content[: self.max_message_length - 3] + "..."
+                        )
+                zep_messages.append(zep_msg)
+            try:
+                self._client.memory.get(session_id=self.session_id)
+            except Exception:
+                pass
+            if zep_messages:
+                self._client.memory.add(
+                    session_id=self.session_id,
+                    messages=zep_messages,
+                )
+        except Exception:
+            pass
+
+    async def _async_add_msgs_to_zep(self, messages: List[ChatMessage]) -> None:
+        """Asynchronously add new messages to Zep memory with truncation."""
+        try:
+            zep_messages = []
+            for msg in messages:
+                zep_msg = self._convert_to_zep_message(msg)
+                if hasattr(zep_msg, "content") and zep_msg.content:
+                    if len(zep_msg.content) > self.max_message_length:
+                        zep_msg.content = (
+                            zep_msg.content[: self.max_message_length - 3] + "..."
+                        )
+                zep_messages.append(zep_msg)
+            try:
+                await self._client.memory.get(session_id=self.session_id)
+            except Exception:
+                pass
+            if zep_messages:
+                await self._client.memory.add(
+                    session_id=self.session_id,
+                    messages=zep_messages,
+                )
+        except Exception:
+            pass
+
+    def put(self, message: ChatMessage) -> None:
+        """Add a message to memory."""
+        self._primary_memory.put(message)
+        self._add_msgs_to_zep([message])
+
+    async def aput(self, message: ChatMessage) -> None:
+        """Asynchronously add a message to memory."""
+        if hasattr(self._primary_memory, "aput"):
+            await self._primary_memory.aput(message)
+        else:
+            self._primary_memory.put(message)
+        await self._async_add_msgs_to_zep([message])
+
+    def set(self, messages: List[ChatMessage]) -> None:
+        """Replace the entire chat history."""
+        initial_chat_len = len(self._primary_memory.get_all())
+        self._primary_memory.set(messages)
+        if len(messages) > initial_chat_len:
+            self._add_msgs_to_zep(messages[initial_chat_len:])
+
+    async def aset(self, messages: List[ChatMessage]) -> None:
+        """Asynchronously replace the entire chat history."""
+        initial_chat_len = len(self._primary_memory.get_all())
+        self._primary_memory.set(messages)  # No async version typically available
+        if len(messages) > initial_chat_len:
+            await self._async_add_msgs_to_zep(messages[initial_chat_len:])
+
+    def reset(self) -> None:
+        """Clear the memory."""
+        self._primary_memory.reset()
+        if self._client is not None:
+            try:
+                self._client.memory.delete(session_id=self.session_id)
+            except Exception:
+                pass
+
+    async def areset(self) -> None:
+        """Asynchronously clear the memory."""
+        if hasattr(self._primary_memory, "areset"):
+            await self._primary_memory.areset()
+        else:
+            self._primary_memory.reset()
+        if self._client is not None:
+            try:
+                await self._client.memory.delete(session_id=self.session_id)
+            except Exception:
+                pass
+
+    def search(self, query: str, **kwargs) -> Optional[Dict[str, Any]]:
+        """Search memory for relevant content."""
+        if self._client is None:
+            raise ValueError("Client is not initialized")
+        try:
+            return self._client.memory.search_sessions(
+                session_ids=[self.session_id],
+                user_id=self.user_id,
+                text=query,
+                **kwargs,
+            )
+        except Exception:
+            return None
+
+    async def asearch(self, query: str, **kwargs) -> Optional[Dict[str, Any]]:
+        """Asynchronously search memory for relevant content."""
+        if self._client is None:
+            raise ValueError("Client is not initialized")
+        try:
+            return await self._client.memory.search_sessions(
+                session_ids=[self.session_id],
+                user_id=self.user_id,
+                text=query,
+                **kwargs,
+            )
+        except Exception:
+            return None
diff --git a/llama-index-integrations/memory/llama-index-memory-zep/pyproject.toml b/llama-index-integrations/memory/llama-index-memory-zep/pyproject.toml
new file mode 100644
index 0000000000..110a174f48
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-zep/pyproject.toml
@@ -0,0 +1,27 @@
+[build-system]
+build-backend = "poetry.core.masonry.api"
+requires = ["poetry-core"]
+
+[tool.poetry]
+authors = ["younis yzbashir98@gmail.com"]
+description = "Zep memory integration for LlamaIndex"
+keywords = ["chat-memory", "llama-index", "memory", "zep"]
+name = "llama-index-memory-zep"
+packages = [{include = "llamaindex"}]
+readme = "README.md"
+version = "0.1.0"
+
+[tool.poetry.dependencies]
+python = ">=3.9,<4.0"  # consider downgrading from 3.12 to 3.8+ for wider compatibility
+zep-python = "^2.0.2"
+llama-index = "^0.12.30"
+pytest-asyncio = "^0.26.0"
+
+[tool.poetry.dev-dependencies]
+black = "^24.0"
+isort = "^5.12"
+mypy = "^1.0"
+pytest = "^8.0"
+
+[tool.poetry.group.dev.dependencies]
+pre-commit = "^4.2.0"
diff --git a/llama-index-integrations/memory/llama-index-memory-zep/tests/BUILD b/llama-index-integrations/memory/llama-index-memory-zep/tests/BUILD
new file mode 100644
index 0000000000..dabf212d7e
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-zep/tests/BUILD
@@ -0,0 +1 @@
+python_tests()
diff --git a/llama-index-integrations/memory/llama-index-memory-zep/tests/__init__.py b/llama-index-integrations/memory/llama-index-memory-zep/tests/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/llama-index-integrations/memory/llama-index-memory-zep/tests/test_zep_memory_async.py b/llama-index-integrations/memory/llama-index-memory-zep/tests/test_zep_memory_async.py
new file mode 100644
index 0000000000..15e10192ef
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-zep/tests/test_zep_memory_async.py
@@ -0,0 +1,140 @@
+import uuid
+import pytest
+from unittest.mock import AsyncMock, MagicMock
+from llama_index.memory.zep import ZepMemory
+from llama_index.core.base.llms.types import ChatMessage, MessageRole
+
+# ---------------------------
+# Fixtures
+# ---------------------------
+
+
+@pytest.fixture()
+def session_and_user_ids():
+    return str(uuid.uuid4()), str(uuid.uuid4())
+
+
+@pytest.fixture()
+def mock_async_zep_client():
+    mock_memory = MagicMock()
+    mock_memory.get = AsyncMock(return_value=MagicMock(messages=[]))
+    mock_memory.add = AsyncMock()
+    mock_memory.delete = AsyncMock()
+    mock_memory.search_sessions = AsyncMock()
+
+    client = MagicMock()
+    client.memory = mock_memory
+    return client
+
+
+@pytest.fixture(autouse=True)
+def disable_sync_from_zep(monkeypatch):
+    monkeypatch.setattr(ZepMemory, "_sync_from_zep", lambda self: None)
+
+
+# ---------------------------
+# Async Tests
+# ---------------------------
+
+
+@pytest.mark.asyncio()
+async def test_zep_memory_from_defaults(mock_async_zep_client, session_and_user_ids):
+    session_id, user_id = session_and_user_ids
+
+    memory = ZepMemory.from_defaults(
+        zep_client=mock_async_zep_client, session_id=session_id, user_id=user_id
+    )
+
+    assert memory.session_id == session_id
+    assert memory.user_id == user_id
+    assert memory._client == mock_async_zep_client
+    assert memory.memory_key == "chat_history"
+
+
+@pytest.mark.asyncio()
+async def test_zep_memory_aget_returns_messages_with_context(
+    mock_async_zep_client, session_and_user_ids
+):
+    session_id, user_id = session_and_user_ids
+
+    mock_async_zep_client.memory.get.return_value = MagicMock(
+        messages=[],
+        facts=["The user likes cats."],
+        summary=MagicMock(content="User enjoys cat memes."),
+        context="Recently discussed memes.",
+    )
+
+    memory = ZepMemory.from_defaults(mock_async_zep_client, session_id, user_id)
+
+    await memory.aset(
+        [
+            ChatMessage(role=MessageRole.USER, content="Tell me a meme"),
+            ChatMessage(role=MessageRole.ASSISTANT, content="Here's a funny one..."),
+        ]
+    )
+
+    result = await memory.aget()
+
+    assert result[0].role == MessageRole.SYSTEM
+    assert "User enjoys cat memes" in result[0].content
+    assert "Recently discussed memes" in result[0].content
+    assert result[1].role == MessageRole.USER
+    assert result[1].content == "Tell me a meme"
+
+
+@pytest.mark.asyncio()
+async def test_zep_memory_aset_stores_messages(
+    mock_async_zep_client, session_and_user_ids
+):
+    session_id, user_id = session_and_user_ids
+    memory = ZepMemory.from_defaults(mock_async_zep_client, session_id, user_id)
+
+    messages = [
+        ChatMessage(role=MessageRole.USER, content="Hello"),
+        ChatMessage(role=MessageRole.ASSISTANT, content="Hi there!"),
+    ]
+
+    await memory.aset(messages)
+
+    assert memory._primary_memory.get_all() == messages
+    mock_async_zep_client.memory.add.assert_awaited_once()
+
+
+@pytest.mark.asyncio()
+async def test_zep_memory_aput(mock_async_zep_client, session_and_user_ids):
+    session_id, user_id = session_and_user_ids
+
+    memory = ZepMemory.from_defaults(mock_async_zep_client, session_id, user_id)
+
+    msg = ChatMessage(role=MessageRole.USER, content="What's the weather?")
+    await memory.aput(msg)
+
+    all_msgs = await memory.aget_all()
+    assert all_msgs[-1].content == "What's the weather?"
+    assert all_msgs[-1].role == MessageRole.USER
+    mock_async_zep_client.memory.add.assert_called_once()
+
+
+@pytest.mark.asyncio()
+async def test_zep_memory_areset(mock_async_zep_client, session_and_user_ids):
+    session_id, user_id = session_and_user_ids
+    memory = ZepMemory.from_defaults(mock_async_zep_client, session_id, user_id)
+
+    await memory.areset()
+    mock_async_zep_client.memory.delete.assert_awaited_once_with(session_id=session_id)
+
+
+@pytest.mark.asyncio()
+async def test_zep_memory_asearch(mock_async_zep_client, session_and_user_ids):
+    session_id, user_id = session_and_user_ids
+    memory = ZepMemory.from_defaults(mock_async_zep_client, session_id, user_id)
+
+    mock_async_zep_client.memory.search_sessions.return_value = {
+        "results": [{"session_id": "session1"}, {"session_id": "session2"}]
+    }
+
+    result = await memory.asearch("search query")
+
+    assert "results" in result
+    assert result["results"][0]["session_id"] == "session1"
+    assert result["results"][1]["session_id"] == "session2"
diff --git a/llama-index-integrations/memory/llama-index-memory-zep/tests/test_zep_memory_sync.py b/llama-index-integrations/memory/llama-index-memory-zep/tests/test_zep_memory_sync.py
new file mode 100644
index 0000000000..111bf7f778
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-zep/tests/test_zep_memory_sync.py
@@ -0,0 +1,119 @@
+import uuid
+from unittest.mock import MagicMock
+import pytest
+from llama_index.memory.zep import ZepMemory
+from llama_index.core.base.llms.types import ChatMessage, MessageRole
+
+# ---------------------------
+# Fixtures
+# ---------------------------
+
+
+@pytest.fixture()
+def session_and_user_ids():
+    return str(uuid.uuid4()), str(uuid.uuid4())
+
+
+@pytest.fixture()
+def mock_zep_client():
+    client = MagicMock()
+    client.memory.get.return_value = MagicMock(messages=[])
+    return client
+
+
+# ---------------------------
+# Sync Tests
+# ---------------------------
+
+
+def test_zep_memory_from_defaults(mock_zep_client, session_and_user_ids):
+    session_id, user_id = session_and_user_ids
+
+    memory = ZepMemory.from_defaults(
+        zep_client=mock_zep_client, session_id=session_id, user_id=user_id
+    )
+
+    assert memory.session_id == session_id
+    assert memory.user_id == user_id
+    assert memory._client == mock_zep_client
+    assert memory.memory_key == "chat_history"
+
+
+def test_zep_memory_set_stores_messages(mock_zep_client, session_and_user_ids):
+    session_id, user_id = session_and_user_ids
+    memory = ZepMemory.from_defaults(mock_zep_client, session_id, user_id)
+
+    messages = [
+        ChatMessage(role=MessageRole.USER, content="Hello"),
+        ChatMessage(role=MessageRole.ASSISTANT, content="Hi there!"),
+    ]
+
+    memory.set(messages)
+
+    # Confirm messages are stored in the local buffer
+    assert memory._primary_memory.get_all() == messages
+
+    mock_zep_client.memory.add.assert_called_once()
+
+
+def test_zep_memory_get_returns_messages_with_context(
+    mock_zep_client, session_and_user_ids
+):
+    session_id, user_id = session_and_user_ids
+
+    mock_zep_client.memory.get.return_value = MagicMock(
+        messages=[],
+        facts=["The user likes cats."],
+        summary=MagicMock(content="User enjoys cat memes."),
+        context="Recently discussed memes.",
+    )
+
+    memory = ZepMemory.from_defaults(mock_zep_client, session_id, user_id)
+
+    memory._primary_memory.set(
+        [
+            ChatMessage(role=MessageRole.USER, content="Tell me a meme"),
+            ChatMessage(role=MessageRole.ASSISTANT, content="Here's a funny one..."),
+        ]
+    )
+
+    result = memory.get()
+
+    assert result[0].role == MessageRole.SYSTEM
+    assert "User enjoys cat memes" in result[0].content
+    assert "Recently discussed memes" in result[0].content
+    assert result[1].role == MessageRole.USER
+    assert result[1].content == "Tell me a meme"
+
+
+def test_zep_memory_put(mock_zep_client, session_and_user_ids):
+    session_id, user_id = session_and_user_ids
+    memory = ZepMemory.from_defaults(mock_zep_client, session_id, user_id)
+
+    msg = ChatMessage(role=MessageRole.USER, content="What's the weather?")
+    memory.put(msg)
+
+    assert memory._primary_memory.get_all()[-1] == msg
+    mock_zep_client.memory.add.assert_called_once()
+
+
+def test_zep_memory_reset(mock_zep_client, session_and_user_ids):
+    session_id, user_id = session_and_user_ids
+    memory = ZepMemory.from_defaults(mock_zep_client, session_id, user_id)
+
+    memory.reset()
+
+    memory._primary_memory.reset()
+    mock_zep_client.memory.delete.assert_called_once_with(session_id=session_id)
+
+
+def test_zep_memory_search(mock_zep_client, session_and_user_ids):
+    session_id, user_id = session_and_user_ids
+    memory = ZepMemory.from_defaults(mock_zep_client, session_id, user_id)
+
+    query = "Tell me a joke"
+    memory.search(query)
+
+    mock_zep_client.memory.search_sessions.assert_called_once_with(
+        session_ids=[session_id], user_id=user_id, text=query
+    )
diff --git a/llama-index-integrations/memory/llama-index-memory-zep/zep_memory_example.ipynb b/llama-index-integrations/memory/llama-index-memory-zep/zep_memory_example.ipynb
new file mode 100644
index 0000000000..3e7c2dc740
--- /dev/null
+++ b/llama-index-integrations/memory/llama-index-memory-zep/zep_memory_example.ipynb
@@ -0,0 +1,718 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "`Note`: Before running or testing the code in this notebook, ensure that you have set up the `Zep server`. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 🧠 Zep Memory Integration with LlamaIndex Agents\n",
+    "\n",
+    "This notebook demonstrates how to use [Zep memory](https://docs.getzep.com/) with various agent types from LlamaIndex, including:\n",
+    "\n",
+    "- `SimpleChatEngine`\n",
+    "- `ReActAgent`\n",
+    "- `FunctionCallingAgent`\n",
+    "- `AgentWorkflow`\n",
+    "\n",
+    "Both **sync** and **async** memory clients are covered.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Install Dependencies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %pip install llama-index zep-python openai"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Environment Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxxx\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import Required Packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import uuid\n",
+    "from zep_python.client import Zep, AsyncZep\n",
+    "from llamaindex.memory.zep import ZepMemory\n",
+    "from llama_index.llms.openai import OpenAI"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Initialize Clients and IDs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "zep_client = Zep(api_key=\"mysupersecretkey\", base_url=\"http://localhost:8000\")\n",
+    "azep_client = AsyncZep(api_key=\"mysupersecretkey\", base_url=\"http://localhost:8000\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "user_id = uuid.uuid4().hex  # A new user identifier\n",
+    "new_user = zep_client.user.add(\n",
+    "    user_id=user_id,\n",
+    ")\n",
+    "\n",
+    "# create a chat session\n",
+    "session_id = uuid.uuid4().hex  # A new session identifier\n",
+    "session = zep_client.memory.add_session(\n",
+    "    session_id=session_id,\n",
+    "    user_id=user_id,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Initialize Memory (Sync and Async)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/younis/Desktop/dev/llama_index/llama-index-integrations/memory/llama-index-memory-zep/llamaindex/memory/zep/base.py:39: RuntimeWarning: coroutine 'AsyncMemoryClient.get' was never awaited\n",
+      "  self._sync_from_zep()\n",
+      "RuntimeWarning: Enable tracemalloc to get the object allocation traceback\n"
+     ]
+    }
+   ],
+   "source": [
+    "memory = ZepMemory.from_defaults(\n",
+    "    zep_client=zep_client,  # Zep client\n",
+    "    session_id=session_id,  # Optional: provide a session ID or one will be generated\n",
+    "    user_id=user_id,  # Optional: provide a user ID for user-specific context\n",
+    ")\n",
+    "\n",
+    "amemory = ZepMemory.from_defaults(\n",
+    "    zep_client=azep_client,  # AsyncZep client\n",
+    "    session_id=session_id,  # Optional: provide a session ID or one will be generated\n",
+    "    user_id=user_id,  # Optional: provide a user ID for user-specific context\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## LLM Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = OpenAI(model=\"gpt-4o-mini\")  # You can swap this with other supported LLMs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## SimpleChatEngine Demo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.core.chat_engine.simple import SimpleChatEngine"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Hello again, Younis! How can I help you today?\n",
+      "Your name is Younis.\n"
+     ]
+    }
+   ],
+   "source": [
+    "agent = SimpleChatEngine.from_defaults(llm=llm, memory=memory)  # set you memory here\n",
+    "\n",
+    "# Start the chat\n",
+    "response = agent.chat(\"Hi, My name is Younis\")\n",
+    "print(response)\n",
+    "\n",
+    "# Now test memory retention:\n",
+    "response = agent.chat(\"What was my name?\")\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Hello, Younis! How can I assist you today?\n",
+      "Your name is Younis.\n"
+     ]
+    }
+   ],
+   "source": [
+    "agent = SimpleChatEngine.from_defaults(llm=llm, memory=amemory)  # set you memory here\n",
+    "\n",
+    "# Start the chat\n",
+    "response = agent.chat(\"Hi, My name is Younis\")\n",
+    "print(response)\n",
+    "\n",
+    "# Now test memory retention:\n",
+    "response = agent.chat(\"What was my name?\")\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## React Agent  Demo "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.core.agent import ReActAgent\n",
+    "\n",
+    "agent = ReActAgent.from_tools(\n",
+    "    tools=[],\n",
+    "    llm=llm,\n",
+    "    memory=memory,\n",
+    "    verbose=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> Running step 9be3ba30-3d10-4617-844b-124ae0e5f17b. Step input: What's the capital of France?\n",
+      "\u001b[1;3;38;5;200mThought: I can answer without using any more tools. I'll use the user's language to answer\n",
+      "Answer: La capitale de la France est Paris.\n",
+      "```\n",
+      "\u001b[0mLa capitale de la France est Paris.\n",
+      "```\n",
+      "> Running step 0efcf470-f5b1-4aca-94c2-91497aa3ce39. Step input: What was my previous question?\n",
+      "\u001b[1;3;38;5;200mThought: The current language of the user is: English. I need to answer the question without using any tools.\n",
+      "Answer: Your previous question was \"What's the capital of France?\"\n",
+      "\u001b[0mYour previous question was \"What's the capital of France?\"\n"
+     ]
+    }
+   ],
+   "source": [
+    "response = agent.chat(\"What's the capital of France?\")\n",
+    "print(response)\n",
+    "\n",
+    "# Now test memory retention:\n",
+    "response = agent.chat(\"What was my previous question?\")\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.core.agent import ReActAgent\n",
+    "\n",
+    "agent = ReActAgent.from_tools(\n",
+    "    tools=[],\n",
+    "    llm=llm,\n",
+    "    memory=amemory,\n",
+    "    verbose=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> Running step a31ec9cc-0d68-4948-b413-1bc0fee98611. Step input: What's the capital of France?\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1;3;38;5;200mThought: The current language of the user is: English. I can answer without using any more tools.\n",
+      "Answer: The capital of France is Paris.\n",
+      "\u001b[0mThe capital of France is Paris.\n",
+      "> Running step 8be75b08-36bd-4520-b23d-b62b06fec08f. Step input: What was my previous question?\n",
+      "\u001b[1;3;38;5;200mThought: (Implicit) I can answer without any more tools!\n",
+      "Answer: Your previous question was, \"What's the capital of France?\"\n",
+      "\u001b[0mYour previous question was, \"What's the capital of France?\"\n"
+     ]
+    }
+   ],
+   "source": [
+    "response = agent.chat(\"What's the capital of France?\")\n",
+    "print(response)\n",
+    "\n",
+    "# Now test memory retention:\n",
+    "response = agent.chat(\"What was my previous question?\")\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## FunctionCallingAgent Demo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.core.agent import FunctionCallingAgent\n",
+    "\n",
+    "agent = FunctionCallingAgent.from_tools(\n",
+    "    [],\n",
+    "    llm=llm,\n",
+    "    memory=memory,\n",
+    "    verbose=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> Running step 08a916cc-f2d1-4f5a-9b7a-87cba46b93dd. Step input: Hi, My name is Younis\n",
+      "Added user message to memory: Hi, My name is Younis\n",
+      "=== LLM Response ===\n",
+      "Hello, Younis! How can I assist you today?\n",
+      "Hello, Younis! How can I assist you today?\n",
+      "> Running step ee67d336-b260-4ea4-885d-f280b731ea6b. Step input: What was my name?\n",
+      "Added user message to memory: What was my name?\n",
+      "=== LLM Response ===\n",
+      "Your name is Younis.\n",
+      "Your name is Younis.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Start the chat\n",
+    "response = agent.chat(\"Hi, My name is Younis\")\n",
+    "print(response)\n",
+    "\n",
+    "# Now test memory retention:\n",
+    "response = agent.chat(\"What was my name?\")\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.core.agent import FunctionCallingAgent\n",
+    "\n",
+    "agent = FunctionCallingAgent.from_tools(\n",
+    "    [],\n",
+    "    llm=llm,\n",
+    "    memory=amemory,\n",
+    "    verbose=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> Running step 6a92d2f6-a596-49d6-a1e8-62533eae3baa. Step input: Hi, My name is Younis\n",
+      "Added user message to memory: Hi, My name is Younis\n",
+      "=== LLM Response ===\n",
+      "Hello again, Younis! How can I help you today?\n",
+      "Hello again, Younis! How can I help you today?\n",
+      "> Running step 712ec8b5-c820-4364-92d1-0ccaa81da621. Step input: What was my name?\n",
+      "Added user message to memory: What was my name?\n",
+      "=== LLM Response ===\n",
+      "Your name is Younis.\n",
+      "Your name is Younis.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Start the chat\n",
+    "response = agent.chat(\"Hi, My name is Younis\")\n",
+    "print(response)\n",
+    "\n",
+    "# Now test memory retention:\n",
+    "response = agent.chat(\"What was my name?\")\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##  AgentWorkflow Demo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.embeddings.openai import OpenAIEmbedding\n",
+    "from llama_index.core.agent.workflow import AgentWorkflow\n",
+    "from llama_index.core.agent.workflow import (\n",
+    "    AgentInput,\n",
+    "    AgentOutput,\n",
+    "    ToolCall,\n",
+    "    ToolCallResult,\n",
+    "    AgentStream,\n",
+    ")\n",
+    "from llama_index.core.agent.workflow import FunctionAgent, ReActAgent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a ResearchAgent with a system prompt that guides it to be systematic\n",
+    "research_agent = FunctionAgent(\n",
+    "    name=\"ResearchAgent\",\n",
+    "    description=\"Responsible for generating well-structured responses based on internal knowledge and context.\",\n",
+    "    system_prompt=\"\"\"\n",
+    "    You are the ResearchAgent. Your task is to compile and synthesize information based solely on the provided context.\n",
+    "    Work in a systematic, transparent manner, explaining your thought process and summarizing the key insights clearly.\n",
+    "    \"\"\",\n",
+    "    llm=llm,\n",
+    "    tools=[],\n",
+    "    verbose=True,\n",
+    ")\n",
+    "\n",
+    "# Define the agent workflow with the ResearchAgent as the root agent\n",
+    "agent_workflow = AgentWorkflow(\n",
+    "    agents=[research_agent],\n",
+    "    root_agent=research_agent.name,\n",
+    "    initial_state={\"answer_content\": \"\"},\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "==================================================\n",
+      "🤖 Agent: ResearchAgent\n",
+      "==================================================\n",
+      "\n",
+      "To explain the heuristic function in detail, I will break down the concept into several key components: definition, purpose, types, and examples.\n",
+      "\n",
+      "### Definition\n",
+      "A heuristic function is a method used in algorithms, particularly in search and optimization problems, to estimate the cost or distance from a given state to a goal state. It provides a way to guide the search process by evaluating which paths are more promising based on certain criteria.\n",
+      "\n",
+      "### Purpose\n",
+      "The primary purpose of a heuristic function is to improve the efficiency of search algorithms. By providing an estimate of the cost to reach the goal, the heuristic helps the algorithm prioritize which nodes to explore first. This can significantly reduce the number of nodes that need to be evaluated, leading to faster solutions.\n",
+      "\n",
+      "### Types of Heuristic Functions\n",
+      "1. **Admissible Heuristic**: A heuristic is admissible if it never overestimates the cost to reach the goal. This property ensures that the algorithm using it (like A*) is guaranteed to find the optimal solution.\n",
+      "\n",
+      "2. **Consistent Heuristic (or Monotonic)**: A heuristic is consistent if, for every node n and every successor n' of n, the estimated cost from n to the goal is less than or equal to the cost of reaching n' plus the estimated cost from n' to the goal. This property ensures that the heuristic is not only admissible but also helps maintain the optimality of the path.\n",
+      "\n",
+      "3. **Inadmissible Heuristic**: A heuristic that may overestimate the cost to reach the goal. While it can speed up the search process, it does not guarantee an optimal solution.\n",
+      "\n",
+      "### Examples\n",
+      "- **Straight-Line Distance**: In pathfinding problems (like navigating a map), the straight-line distance from the current location to the destination can serve as a heuristic. It provides a lower bound on the actual distance, making it admissible.\n",
+      "\n",
+      "- **Manhattan Distance**: In grid-based pathfinding (like in a city layout), the Manhattan distance (the sum of the absolute differences of the coordinates) can be used as a heuristic. It is particularly useful in scenarios where movement is restricted to horizontal and vertical directions.\n",
+      "\n",
+      "- **Domain-Specific Heuristics**: In games like chess, heuristics can be based on the evaluation of board positions, such as material advantage or control of the center, which helps guide the search for the best move.\n",
+      "\n",
+      "### Conclusion\n",
+      "Heuristic functions are crucial in optimizing search algorithms by providing estimates that guide the search process. Understanding the types of heuristics and their properties can help in designing more efficient algorithms for various applications, from artificial intelligence to operations research. By carefully selecting or designing a heuristic, one can significantly enhance the performance of search strategies.\n",
+      "💭 Agent thinking: To explain the heuristic function in detail, I will break down the concept into several key components: definition, purpose, types, and examples.\n",
+      "\n",
+      "### Definition\n",
+      "A heuristic function is a method used in algorithms, particularly in search and optimization problems, to estimate the cost or distance from a given state to a goal state. It provides a way to guide the search process by evaluating which paths are more promising based on certain criteria.\n",
+      "\n",
+      "### Purpose\n",
+      "The primary purpose of a heuristic function is to improve the efficiency of search algorithms. By providing an estimate of the cost to reach the goal, the heuristic helps the algorithm prioritize which nodes to explore first. This can significantly reduce the number of nodes that need to be evaluated, leading to faster solutions.\n",
+      "\n",
+      "### Types of Heuristic Functions\n",
+      "1. **Admissible Heuristic**: A heuristic is admissible if it never overestimates the cost to reach the goal. This property ensures that the algorithm using it (like A*) is guaranteed to find the optimal solution.\n",
+      "\n",
+      "2. **Consistent Heuristic (or Monotonic)**: A heuristic is consistent if, for every node n and every successor n' of n, the estimated cost from n to the goal is less than or equal to the cost of reaching n' plus the estimated cost from n' to the goal. This property ensures that the heuristic is not only admissible but also helps maintain the optimality of the path.\n",
+      "\n",
+      "3. **Inadmissible Heuristic**: A heuristic that may overestimate the cost to reach the goal. While it can speed up the search process, it does not guarantee an optimal solution.\n",
+      "\n",
+      "### Examples\n",
+      "- **Straight-Line Distance**: In pathfinding problems (like navigating a map), the straight-line distance from the current location to the destination can serve as a heuristic. It provides a lower bound on the actual distance, making it admissible.\n",
+      "\n",
+      "- **Manhattan Distance**: In grid-based pathfinding (like in a city layout), the Manhattan distance (the sum of the absolute differences of the coordinates) can be used as a heuristic. It is particularly useful in scenarios where movement is restricted to horizontal and vertical directions.\n",
+      "\n",
+      "- **Domain-Specific Heuristics**: In games like chess, heuristics can be based on the evaluation of board positions, such as material advantage or control of the center, which helps guide the search for the best move.\n",
+      "\n",
+      "### Conclusion\n",
+      "Heuristic functions are crucial in optimizing search algorithms by providing estimates that guide the search process. Understanding the types of heuristics and their properties can help in designing more efficient algorithms for various applications, from artificial intelligence to operations research. By carefully selecting or designing a heuristic, one can significantly enhance the performance of search strategies.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# await ensure_weaviate_connection(aclient)\n",
+    "\n",
+    "handler = agent_workflow.run(\n",
+    "    user_msg=\"Explain the heuristic function in detail.\", memory=memory\n",
+    ")\n",
+    "\n",
+    "current_agent = None\n",
+    "tool_output_buffer = \"\"\n",
+    "\n",
+    "async for event in handler.stream_events():\n",
+    "    # Handle agent changes with clear visual separation\n",
+    "    if (\n",
+    "        hasattr(event, \"current_agent_name\")\n",
+    "        and event.current_agent_name != current_agent\n",
+    "    ):\n",
+    "        current_agent = event.current_agent_name\n",
+    "        print(f\"\\n{'='*50}\")\n",
+    "        print(f\"🤖 Agent: {current_agent}\")\n",
+    "        print(f\"{'='*50}\\n\")\n",
+    "\n",
+    "    # Stream all content from AgentStream events in real-time\n",
+    "    if isinstance(event, AgentStream):\n",
+    "        print(event.delta, end=\"\", flush=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "==================================================\n",
+      "🤖 Agent: ResearchAgent\n",
+      "==================================================\n",
+      "\n",
+      "To explain the heuristic function in detail, I will provide a comprehensive overview, including its definition, purpose, characteristics, types, and applications, particularly in the context of search algorithms.\n",
+      "\n",
+      "### Definition\n",
+      "A heuristic function, often denoted as \\( h(n) \\), is a function that estimates the cost or distance from a given node \\( n \\) to the goal node in a search space. It is used in various algorithms to guide the search process toward the most promising paths.\n",
+      "\n",
+      "### Purpose\n",
+      "The main purpose of a heuristic function is to improve the efficiency of search algorithms by providing an estimate of how far a node is from the goal. This allows the algorithm to prioritize which nodes to explore first, thereby reducing the overall search time and space.\n",
+      "\n",
+      "### Characteristics of Heuristic Functions\n",
+      "1. **Admissibility**: A heuristic is admissible if it never overestimates the actual cost to reach the goal from any node. This property ensures that the search algorithm will find the optimal solution if one exists.\n",
+      "   \n",
+      "2. **Consistency (or Monotonicity)**: A heuristic is consistent if, for every node \\( n \\) and every successor \\( n' \\) of \\( n \\), the estimated cost from \\( n \\) to the goal is no greater than the cost of reaching \\( n' \\) plus the estimated cost from \\( n' \\) to the goal. This property helps maintain the optimality of the search.\n",
+      "\n",
+      "3. **Domain-specific**: Heuristics can be tailored to specific problems, leveraging domain knowledge to provide better estimates. This can lead to more efficient searches compared to general heuristics.\n",
+      "\n",
+      "### Types of Heuristic Functions\n",
+      "1. **Domain-independent heuristics**: These are general heuristics that can be applied to a wide range of problems. An example is the Manhattan distance in grid-based pathfinding, which calculates the distance between two points based on a grid layout.\n",
+      "\n",
+      "2. **Domain-specific heuristics**: These are designed for specific problems. For instance, in a puzzle-solving scenario like the 8-puzzle, a heuristic might count the number of misplaced tiles.\n",
+      "\n",
+      "### Applications\n",
+      "Heuristic functions are widely used in various fields, including:\n",
+      "\n",
+      "- **Artificial Intelligence**: In AI, heuristics are crucial for search algorithms in problem-solving, such as in game playing (e.g., chess) and planning.\n",
+      "  \n",
+      "- **Pathfinding Algorithms**: Algorithms like A* use heuristic functions to find the shortest path in navigation systems, video games, and robotics.\n",
+      "\n",
+      "- **Optimization Problems**: Heuristics are employed in optimization problems where finding an exact solution is computationally expensive, such as in scheduling and resource allocation.\n",
+      "\n",
+      "### Example: A* Algorithm\n",
+      "In the A* search algorithm, the heuristic function is combined with the cost to reach the current node (denoted as \\( g(n) \\)) to form a total estimated cost \\( f(n) = g(n) + h(n) \\). This combination allows A* to efficiently find the shortest path by exploring the most promising nodes first, ensuring both optimality and completeness.\n",
+      "\n",
+      "### Conclusion\n",
+      "In summary, heuristic functions are essential tools in search algorithms that help estimate the cost to reach a goal, guiding the search process efficiently. Their design and implementation can significantly impact the performance of algorithms in various applications, making them a critical area of study in computer science and artificial intelligence.\n",
+      "💭 Agent thinking: To explain the heuristic function in detail, I will provide a comprehensive overview, including its definition, purpose, characteristics, types, and applications, particularly in the context of search algorithms.\n",
+      "\n",
+      "### Definition\n",
+      "A heuristic function, often denoted as \\( h(n) \\), is a function that estimates the cost or distance from a given node \\( n \\) to the goal node in a search space. It is used in various algorithms to guide the search process toward the most promising paths.\n",
+      "\n",
+      "### Purpose\n",
+      "The main purpose of a heuristic function is to improve the efficiency of search algorithms by providing an estimate of how far a node is from the goal. This allows the algorithm to prioritize which nodes to explore first, thereby reducing the overall search time and space.\n",
+      "\n",
+      "### Characteristics of Heuristic Functions\n",
+      "1. **Admissibility**: A heuristic is admissible if it never overestimates the actual cost to reach the goal from any node. This property ensures that the search algorithm will find the optimal solution if one exists.\n",
+      "   \n",
+      "2. **Consistency (or Monotonicity)**: A heuristic is consistent if, for every node \\( n \\) and every successor \\( n' \\) of \\( n \\), the estimated cost from \\( n \\) to the goal is no greater than the cost of reaching \\( n' \\) plus the estimated cost from \\( n' \\) to the goal. This property helps maintain the optimality of the search.\n",
+      "\n",
+      "3. **Domain-specific**: Heuristics can be tailored to specific problems, leveraging domain knowledge to provide better estimates. This can lead to more efficient searches compared to general heuristics.\n",
+      "\n",
+      "### Types of Heuristic Functions\n",
+      "1. **Domain-independent heuristics**: These are general heuristics that can be applied to a wide range of problems. An example is the Manhattan distance in grid-based pathfinding, which calculates the distance between two points based on a grid layout.\n",
+      "\n",
+      "2. **Domain-specific heuristics**: These are designed for specific problems. For instance, in a puzzle-solving scenario like the 8-puzzle, a heuristic might count the number of misplaced tiles.\n",
+      "\n",
+      "### Applications\n",
+      "Heuristic functions are widely used in various fields, including:\n",
+      "\n",
+      "- **Artificial Intelligence**: In AI, heuristics are crucial for search algorithms in problem-solving, such as in game playing (e.g., chess) and planning.\n",
+      "  \n",
+      "- **Pathfinding Algorithms**: Algorithms like A* use heuristic functions to find the shortest path in navigation systems, video games, and robotics.\n",
+      "\n",
+      "- **Optimization Problems**: Heuristics are employed in optimization problems where finding an exact solution is computationally expensive, such as in scheduling and resource allocation.\n",
+      "\n",
+      "### Example: A* Algorithm\n",
+      "In the A* search algorithm, the heuristic function is combined with the cost to reach the current node (denoted as \\( g(n) \\)) to form a total estimated cost \\( f(n) = g(n) + h(n) \\). This combination allows A* to efficiently find the shortest path by exploring the most promising nodes first, ensuring both optimality and completeness.\n",
+      "\n",
+      "### Conclusion\n",
+      "In summary, heuristic functions are essential tools in search algorithms that help estimate the cost to reach a goal, guiding the search process efficiently. Their design and implementation can significantly impact the performance of algorithms in various applications, making them a critical area of study in computer science and artificial intelligence.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# await ensure_weaviate_connection(aclient)\n",
+    "\n",
+    "handler = agent_workflow.run(\n",
+    "    user_msg=\"Explain the heuristic function in detail.\", memory=amemory\n",
+    ")\n",
+    "\n",
+    "current_agent = None\n",
+    "tool_output_buffer = \"\"\n",
+    "\n",
+    "async for event in handler.stream_events():\n",
+    "    # Handle agent changes with clear visual separation\n",
+    "    if (\n",
+    "        hasattr(event, \"current_agent_name\")\n",
+    "        and event.current_agent_name != current_agent\n",
+    "    ):\n",
+    "        current_agent = event.current_agent_name\n",
+    "        print(f\"\\n{'='*50}\")\n",
+    "        print(f\"🤖 Agent: {current_agent}\")\n",
+    "        print(f\"{'='*50}\\n\")\n",
+    "\n",
+    "    # Stream all content from AgentStream events in real-time\n",
+    "    if isinstance(event, AgentStream):\n",
+    "        print(event.delta, end=\"\", flush=True)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "llamaindex",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}