linagora · EnjoyBacon7 · Oct 22, 2025 · Oct 22, 2025 · Nov 13, 2025 · Nov 20, 2025
diff --git a/.env.example b/.env.example
@@ -2,11 +2,13 @@
 BASE_URL=
 API_KEY=
 MODEL=
+# LLM_CONTEXT_WINDOW=8192  # Token context window of the LLM (used to cap RAG context size)
 
 # VLM (Visual Language Model) you can set it to the same as LLM if your LLM supports images
 VLM_BASE_URL=
 VLM_API_KEY=
 VLM_MODEL=
+# VLM_CONTEXT_WINDOW=8192  # Token context window of the VLM (used to cap chunk contextualizer input size)
 
 ## FastAPI App (no need to change it)
 # APP_PORT=8080 # this is the forwarded port
@@ -60,3 +62,12 @@ LOG_LEVEL=DEBUG # See possible values https://loguru.readthedocs.io/en/stable/ap
 # SERVER
 # Set the preferred URL scheme for generated URLs (e.g., task_status_url).
 PREFERRED_URL_SCHEME=https
+
+# MCP SERVER
+# OPENRAG_MCP_SERVER_NAME="OpenRAG MCP"
+# OPENRAG_MCP_HOST=0.0.0.0
+# OPENRAG_MCP_PORT=8081
+# OPENRAG_MCP_PATH=/mcp
+# OPENRAG_MCP_DEFAULT_TOP_K=5
+# OPENRAG_MCP_MAX_TOP_K=50
+# OPENRAG_MCP_SIMILARITY_THRESHOLD=0.8
diff --git a/.hydra_config/config.yaml b/.hydra_config/config.yaml
@@ -15,12 +15,14 @@ llm:
   base_url: ${oc.env:BASE_URL}
   model: ${oc.env:MODEL}
   api_key: ${oc.env:API_KEY}
+  context_window: ${oc.decode:${oc.env:LLM_CONTEXT_WINDOW, 8192}}
 
 vlm:
   <<: *llm_params
   base_url: ${oc.env:VLM_BASE_URL}
   model: ${oc.env:VLM_MODEL}
   api_key: ${oc.env:VLM_API_KEY}
+  context_window: ${oc.decode:${oc.env:VLM_CONTEXT_WINDOW, 8192}}
 
 semaphore:
   llm_semaphore: ${oc.decode:${oc.env:LLM_SEMAPHORE, 10}}
@@ -72,6 +74,15 @@ verbose:
 server:
   preferred_url_scheme: ${oc.env:PREFERRED_URL_SCHEME, null}
 
+mcp:
+  server_name: ${oc.env:OPENRAG_MCP_SERVER_NAME, OpenRAG MCP}
+  host: ${oc.env:OPENRAG_MCP_HOST, 0.0.0.0}
+  port: ${oc.decode:${oc.env:OPENRAG_MCP_PORT, 8081}}
+  path: ${oc.env:OPENRAG_MCP_PATH, /mcp}
+  default_top_k: ${oc.decode:${oc.env:OPENRAG_MCP_DEFAULT_TOP_K, 5}}
+  max_top_k: ${oc.decode:${oc.env:OPENRAG_MCP_MAX_TOP_K, 50}}
+  similarity_threshold: ${oc.decode:${oc.env:OPENRAG_MCP_SIMILARITY_THRESHOLD, 0.8}}
+
 paths:
   prompts_dir: ${oc.env:PROMPTS_DIR, ../prompts/example1}
   data_dir: ${oc.env:DATA_DIR, ../data}

diff --git a/docs/content/docs/documentation/env_vars.md b/docs/content/docs/documentation/env_vars.md
@@ -225,6 +225,7 @@ The RAG pipeline comes with preconfigured prompts **`./prompts/example1`**. Here
 | Template File | Purpose |
 |---------------|---------|
 | `sys_prompt_tmpl.txt` | System prompt that defines the assistant's behavior and role |
+| `spoken_style_answer_tmpl.txt` | Template for converting responses to a more natural, conversational spoken style (oral / audio type of answer)|
 | `query_contextualizer_tmpl.txt` | Template for adding context to user queries |
 | `chunk_contextualizer_tmpl.txt` | Template for contextualizing document chunks during indexing |
 | `image_captioning_tmpl.txt` | Template for generating image descriptions using the VLM |

diff --git a/openrag/api.py b/openrag/api.py
@@ -1,5 +1,6 @@
 import os
 import warnings
+from contextlib import AsyncExitStack, asynccontextmanager
 from enum import Enum
 from importlib.metadata import version as get_package_version
 from pathlib import Path
@@ -29,6 +30,9 @@
 from routers.search import router as search_router
 from routers.tools import router as tools_router
 from routers.users import router as users_router
+from mcp_server import create_mcp_http_app
+from mcp_server import path as mcp_path
+from mcp_server import server as mcp_server
 from starlette.middleware.base import BaseHTTPMiddleware
 from utils.dependencies import get_vectordb
 from utils.exceptions import OpenRAGError
@@ -82,7 +86,17 @@ def __init__(self, config):
 except Exception:
     app_version = "unknown"
 
-app = FastAPI(version=app_version)
+_mcp_lifespan_stack = AsyncExitStack()
+
+
+@asynccontextmanager
+async def lifespan(app):
+    await _mcp_lifespan_stack.enter_async_context(mcp_server.session_manager.run())
+    yield
+    await _mcp_lifespan_stack.aclose()
+
+
+app = FastAPI(version=app_version, lifespan=lifespan)
 
 
 def custom_openapi():
@@ -180,10 +194,12 @@ async def openrag_exception_handler(request: Request, exc: OpenRAGError):
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
+    expose_headers=["Mcp-Session-Id"],
 )
 
 app.state.app_state = AppState(config)
 app.mount("/static", StaticFiles(directory=DATA_DIR.resolve(), check_dir=True), name="static")
+app.mount(mcp_path, create_mcp_http_app(), name="mcp")
 
 
 @app.get("/health_check", summary="Health check endpoint for API", dependencies=[])

diff --git a/openrag/components/app/__init__.py b/openrag/components/app/__init__.py
@@ -0,0 +1,11 @@
+from .interfaces import OpenRAGApiInterface
+
+__all__ = ["OpenRAGApiInterface", "OpenRAGApplicationService"]
+
+
+def __getattr__(name: str):
+    if name == "OpenRAGApplicationService":
+        from .service import OpenRAGApplicationService  # noqa: PLC0415
+
+        return OpenRAGApplicationService
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")