diff --git a/.gitignore b/.gitignore index bb77da7..b21a489 100644 --- a/.gitignore +++ b/.gitignore @@ -124,4 +124,6 @@ gunicorn.conf.py # ignore version file src/databricks/labs/mcp/_version.py -.ruff_cache/ \ No newline at end of file +.ruff_cache/ +.build/ +.databricks diff --git a/README.md b/README.md index cc0b5d3..8e86e10 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ Table of Contents - [Usage](#usage) - [Supported tools](#supported-tools) - [Developer Tools Server](#developer-tools-server) + - [Deploying MCP servers on Databricks Apps](#deploying-mcp-servers-on-databricks-apps) - [Support](#support) - [Contributing](#contributing) @@ -76,6 +77,27 @@ the following tools: This server is currently under construction. It is not yet usable, but contributions are welcome! +## Deploying MCP servers on Databricks Apps + +You can deploy the Unity Catalog MCP server as a Databricks app. To do so, follow the instructions below: + +1. Move into the project directory: +```bash +cd /path/to/this/repo +``` + +2. Push app code to Databricks: +```bash +databricks bundle deploy -p +``` + +3. Deploy the app: +```bash +databricks bundle run mcp-on-apps -p +``` + +If you are a developer iterating on the server implementation, you can repeat steps #2 and #3 to push your latest modifications to the server to your Databricks app. + ## Support Please note that all projects in the `databrickslabs` GitHub organization are provided for your exploration only, and are not formally supported by Databricks with Service Level Agreements (SLAs). They are provided AS-IS and we do not make any guarantees of any kind. Please do not submit a support ticket relating to any issues arising from the use of these projects. @@ -85,3 +107,4 @@ Any issues discovered through the use of this project should be filed as GitHub We welcome contributions :) - see [CONTRIBUTING.md](./CONTRIBUTING.md) for details. Please make sure to read this guide before submitting pull requests, to ensure your contribution has the best chance of being accepted. + diff --git a/databricks.yml b/databricks.yml new file mode 100644 index 0000000..70bd131 --- /dev/null +++ b/databricks.yml @@ -0,0 +1,26 @@ +bundle: + name: mcp-on-apps + +sync: + include: + - .build + +artifacts: + default: + type: whl + path: . + build: uv build --wheel + +resources: + apps: + mcp-on-apps: + name: "mcp-on-apps" + description: "MCP Server on Databricks Apps" + source_code_path: ./.build + config: + command: ["unitycatalog-mcp-app"] + +targets: + dev: + mode: development + default: true \ No newline at end of file diff --git a/hooks/apps_build.py b/hooks/apps_build.py new file mode 100644 index 0000000..e540358 --- /dev/null +++ b/hooks/apps_build.py @@ -0,0 +1,49 @@ +from typing import Any +from hatchling.builders.hooks.plugin.interface import BuildHookInterface +from pathlib import Path +import shutil + + +class AppsBuildHook(BuildHookInterface): + """Hook to create a Databricks Apps-compatible build. + + This hook is used to create a Databricks Apps-compatible build of the project. + + The following steps are performed: + - Remove the ./.build folder if it exists. + - Copy the artifact_path to the ./.build folder. + - Write the name of the artifact to a requirements.txt file in the ./.build folder. + - The resulting build directory is printed to the console. + + """ + + def finalize( + self, version: str, build_data: dict[str, Any], artifact_path: str + ) -> None: + self.app.display_info( + f"Running Databricks Apps build hook for project {self.metadata.name} in directory {Path.cwd()}" + ) + # remove the ./.build folder if it exists + build_dir = Path(".build") + self.app.display_info(f"Resulting build directory: {build_dir.absolute()}") + + if build_dir.exists(): + self.app.display_info(f"Removing {build_dir}") + shutil.rmtree(build_dir) + self.app.display_info(f"Removed {build_dir}") + else: + self.app.display_info(f"{build_dir} does not exist, skipping removal") + + # copy the artifact_path to the ./.build folder + build_dir.mkdir(exist_ok=True) + self.app.display_info(f"Copying {artifact_path} to {build_dir}") + shutil.copy(artifact_path, build_dir) + + # write the name of the artifact to a requirements.txt file in the ./.build folder + requirements_file = build_dir / "requirements.txt" + + requirements_file.write_text(Path(artifact_path).name, encoding="utf-8") + + self.app.display_info( + f"Apps-compatible build written to {build_dir.absolute()}" + ) diff --git a/pyproject.toml b/pyproject.toml index 4ca266e..4eea191 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,11 +8,11 @@ requires-python = ">=3.10" keywords = ["databricks", "unity catalog", "mcp", "agents", "llm", "automation", "genie"] dependencies = [ - "mcp>=1.2.1", + "mcp>=1.8.1", "pydantic>=2.10.6", "pydantic-settings>=2.7.1", "unitycatalog-ai>=0.1.0", - "databricks-sdk>=0.49.0", + "databricks-sdk>=0.53.0", "databricks-openai>=0.3.1", ] license-files = ["LICENSE", "NOTICE"] @@ -24,10 +24,15 @@ dev-dependencies = [ "ruff>=0.9.4", "pytest>=8.3.4", "isort>=6.0.1", + "hatchling>=1.27.0", ] +[tool.hatch.build.hooks.custom] +path = "hooks/apps_build.py" + [project.scripts] unitycatalog-mcp = "databricks.labs.mcp.servers.unity_catalog:main" +unitycatalog-mcp-app = "databricks.labs.mcp.servers.unity_catalog.app:start_app" [build-system] requires = ["hatchling", "hatch-fancy-pypi-readme", "hatch-vcs"] diff --git a/src/databricks/labs/mcp/base.py b/src/databricks/labs/mcp/base.py new file mode 100644 index 0000000..e82a509 --- /dev/null +++ b/src/databricks/labs/mcp/base.py @@ -0,0 +1,147 @@ +""" +Collection of base utils for MCP servers. +""" + +import contextlib +import logging +from collections import deque +from dataclasses import dataclass +from typing import AsyncIterator +from uuid import uuid4 +from starlette.applications import Starlette +from starlette.routing import Mount +from starlette.types import Receive, Scope, Send +from mcp.server.streamable_http_manager import StreamableHTTPSessionManager + +from mcp.server import Server + +from mcp.server.streamable_http import ( + EventCallback, + EventId, + EventMessage, + EventStore, + StreamId, +) +from mcp.types import JSONRPCMessage + +logger = logging.getLogger(__name__) + + +@dataclass +class EventEntry: + """ + Represents an event entry in the event store. + """ + + event_id: EventId + stream_id: StreamId + message: JSONRPCMessage + + +class InMemoryEventStore(EventStore): + """ + Simple in-memory implementation of the EventStore interface for resumability. + This is primarily intended for examples and testing, not for production use + where a persistent storage solution would be more appropriate. + + This implementation keeps only the last N events per stream for memory efficiency. + """ + + def __init__(self, max_events_per_stream: int = 100): + """Initialize the event store. + + Args: + max_events_per_stream: Maximum number of events to keep per stream + """ + self.max_events_per_stream = max_events_per_stream + # for maintaining last N events per stream + self.streams: dict[StreamId, deque[EventEntry]] = {} + # event_id -> EventEntry for quick lookup + self.event_index: dict[EventId, EventEntry] = {} + + async def store_event( + self, stream_id: StreamId, message: JSONRPCMessage + ) -> EventId: + """Stores an event with a generated event ID.""" + event_id = str(uuid4()) + event_entry = EventEntry( + event_id=event_id, stream_id=stream_id, message=message + ) + + # Get or create deque for this stream + if stream_id not in self.streams: + self.streams[stream_id] = deque(maxlen=self.max_events_per_stream) + + # If deque is full, the oldest event will be automatically removed + # We need to remove it from the event_index as well + if len(self.streams[stream_id]) == self.max_events_per_stream: + oldest_event = self.streams[stream_id][0] + self.event_index.pop(oldest_event.event_id, None) + + # Add new event + self.streams[stream_id].append(event_entry) + self.event_index[event_id] = event_entry + + return event_id + + async def replay_events_after( + self, + last_event_id: EventId, + send_callback: EventCallback, + ) -> StreamId | None: + """Replays events that occurred after the specified event ID.""" + if last_event_id not in self.event_index: + logger.warning(f"Event ID {last_event_id} not found in store") + return None + + # Get the stream and find events after the last one + last_event = self.event_index[last_event_id] + stream_id = last_event.stream_id + stream_events = self.streams.get(last_event.stream_id, deque()) + + # Events in deque are already in chronological order + found_last = False + for event in stream_events: + if found_last: + await send_callback(EventMessage(event.message, event.event_id)) + elif event.event_id == last_event_id: + found_last = True + + return stream_id + + +async def get_serveable_app(app: Server, json_response: bool = True) -> Starlette: + + event_store = InMemoryEventStore() + + # Create the session manager with our app and event store + session_manager = StreamableHTTPSessionManager( + app=app, + event_store=event_store, # Enable resumability + json_response=json_response, + ) + + # ASGI handler for streamable HTTP connections + async def handle_streamable_http( + scope: Scope, receive: Receive, send: Send + ) -> None: + await session_manager.handle_request(scope, receive, send) + + @contextlib.asynccontextmanager + async def lifespan(app: Starlette) -> AsyncIterator[None]: + """Context manager for managing session manager lifecycle.""" + async with session_manager.run(): + logger.info("Application started with StreamableHTTP session manager!") + try: + yield + finally: + logger.info("Application shutting down...") + + # Create an ASGI application using the transport + return Starlette( + debug=True, + routes=[ + Mount("/mcp", app=handle_streamable_http), + ], + lifespan=lifespan, + ) diff --git a/src/databricks/labs/mcp/servers/unity_catalog/app.py b/src/databricks/labs/mcp/servers/unity_catalog/app.py new file mode 100644 index 0000000..93b0b75 --- /dev/null +++ b/src/databricks/labs/mcp/servers/unity_catalog/app.py @@ -0,0 +1,35 @@ +from mcp.server import Server +from mcp.types import Tool as ToolSpec +import uvicorn +from databricks.labs.mcp.base import get_serveable_app +from databricks.labs.mcp.servers.unity_catalog.tools import ( + Content, +) +from databricks.labs.mcp.servers.unity_catalog.cli import get_settings + +from databricks.labs.mcp._version import __version__ as VERSION +from databricks.labs.mcp.servers.unity_catalog.server import get_tools_dict + + +app = Server(name="mcp-unitycatalog", version=VERSION) +tools_dict = get_tools_dict(settings=get_settings()) + + +@app.list_tools() +async def list_tools() -> list[ToolSpec]: + return [tool.tool_spec for tool in tools_dict.values()] + + +@app.call_tool() +async def call_tool(name: str, arguments: dict) -> list[Content]: + tool = tools_dict[name] + return tool.execute(**arguments) + + +def start_app(): + serveable = get_serveable_app(app) + uvicorn.run(serveable, host="0.0.0.0", port=8000) + + +if __name__ == "__main__": + start_app() diff --git a/uv.lock b/uv.lock index a5945ea..436b2b3 100644 --- a/uv.lock +++ b/uv.lock @@ -492,6 +492,7 @@ dependencies = [ [package.dev-dependencies] dev = [ { name = "black" }, + { name = "hatchling" }, { name = "isort" }, { name = "pyright" }, { name = "pytest" }, @@ -501,8 +502,8 @@ dev = [ [package.metadata] requires-dist = [ { name = "databricks-openai", specifier = ">=0.3.1" }, - { name = "databricks-sdk", specifier = ">=0.49.0" }, - { name = "mcp", specifier = ">=1.2.1" }, + { name = "databricks-sdk", specifier = ">=0.53.0" }, + { name = "mcp", specifier = ">=1.8.1" }, { name = "pydantic", specifier = ">=2.10.6" }, { name = "pydantic-settings", specifier = ">=2.7.1" }, { name = "unitycatalog-ai", specifier = ">=0.1.0" }, @@ -511,6 +512,7 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ { name = "black", specifier = ">=25.1.0" }, + { name = "hatchling", specifier = ">=1.27.0" }, { name = "isort", specifier = ">=6.0.1" }, { name = "pyright", specifier = ">=1.1.393" }, { name = "pytest", specifier = ">=8.3.4" }, @@ -537,15 +539,15 @@ wheels = [ [[package]] name = "databricks-sdk" -version = "0.50.0" +version = "0.53.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "google-auth" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/90/67/b1e1dff8f661c33e0ce0fb518e09beb460e9e1b92da237e43f7e89718da3/databricks_sdk-0.50.0.tar.gz", hash = "sha256:485a604389fad7e9e26c7c4aeeebab3f486e7740c3f54ed64a13cbec1adbd0c0", size = 731523 } +sdist = { url = "https://files.pythonhosted.org/packages/ed/99/d9137becb97518862e309a6a711eb81ed42025ba503891a7396f7b958703/databricks_sdk-0.53.0.tar.gz", hash = "sha256:501263d584f26c10ba2767f857260e6bbd569ceb6c036763773544f3d3a45cb7", size = 740813 } wheels = [ - { url = "https://files.pythonhosted.org/packages/95/ae/e6b2a98df2dcc743b71814a15bf7c6744acb8f2893e7c52cb9b75b305fcd/databricks_sdk-0.50.0-py3-none-any.whl", hash = "sha256:fa4c0b2a549d660a71432702da23197860c6f6d72320f326f8007257496a0a0a", size = 692306 }, + { url = "https://files.pythonhosted.org/packages/f6/80/2c6f792c1dce93f59e64cfbdb4766c9ea022061d5005e130978080b6a040/databricks_sdk-0.53.0-py3-none-any.whl", hash = "sha256:66eebaa580853a6f3889cd9875e4b53ddabdd493ddc85788626adfcc62214f42", size = 700157 }, ] [[package]] @@ -1004,6 +1006,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 }, ] +[[package]] +name = "hatchling" +version = "1.27.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, + { name = "pathspec" }, + { name = "pluggy" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "trove-classifiers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8f/8a/cc1debe3514da292094f1c3a700e4ca25442489731ef7c0814358816bb03/hatchling-1.27.0.tar.gz", hash = "sha256:971c296d9819abb3811112fc52c7a9751c8d381898f36533bb16f9791e941fd6", size = 54983 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/08/e7/ae38d7a6dfba0533684e0b2136817d667588ae3ec984c1a4e5df5eb88482/hatchling-1.27.0-py3-none-any.whl", hash = "sha256:d3a2f3567c4f926ea39849cdf924c7e99e6686c9c8e288ae1037c8fa2a5d937b", size = 75794 }, +] + [[package]] name = "httpcore" version = "1.0.9" @@ -1389,7 +1407,7 @@ wheels = [ [[package]] name = "mcp" -version = "1.6.0" +version = "1.8.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1397,13 +1415,14 @@ dependencies = [ { name = "httpx-sse" }, { name = "pydantic" }, { name = "pydantic-settings" }, + { name = "python-multipart" }, { name = "sse-starlette" }, { name = "starlette" }, - { name = "uvicorn" }, + { name = "uvicorn", marker = "sys_platform != 'emscripten'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/95/d2/f587cb965a56e992634bebc8611c5b579af912b74e04eb9164bd49527d21/mcp-1.6.0.tar.gz", hash = "sha256:d9324876de2c5637369f43161cd71eebfd803df5a95e46225cab8d280e366723", size = 200031 } +sdist = { url = "https://files.pythonhosted.org/packages/7c/13/16b712e8a3be6a736b411df2fc6b4e75eb1d3e99b1cd57a3a1decf17f612/mcp-1.8.1.tar.gz", hash = "sha256:ec0646271d93749f784d2316fb5fe6102fb0d1be788ec70a9e2517e8f2722c0e", size = 265605 } wheels = [ - { url = "https://files.pythonhosted.org/packages/10/30/20a7f33b0b884a9d14dd3aa94ff1ac9da1479fe2ad66dd9e2736075d2506/mcp-1.6.0-py3-none-any.whl", hash = "sha256:7bd24c6ea042dbec44c754f100984d186620d8b841ec30f1b19eda9b93a634d0", size = 76077 }, + { url = "https://files.pythonhosted.org/packages/1c/5d/91cf0d40e40ae9ecf8d4004e0f9611eea86085aa0b5505493e0ff53972da/mcp-1.8.1-py3-none-any.whl", hash = "sha256:948e03783859fa35abe05b9b6c0a1d5519be452fc079dc8d7f682549591c1770", size = 119761 }, ] [[package]] @@ -2188,6 +2207,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1e/18/98a99ad95133c6a6e2005fe89faedf294a748bd5dc803008059409ac9b1e/python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d", size = 20256 }, ] +[[package]] +name = "python-multipart" +version = "0.0.20" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/87/f44d7c9f274c7ee665a29b885ec97089ec5dc034c7f3fafa03da9e39a09e/python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13", size = 37158 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/45/58/38b5afbc1a800eeea951b9285d3912613f2603bdf897a4ab0f4bd7f405fc/python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104", size = 24546 }, +] + [[package]] name = "pytz" version = "2025.2" @@ -2695,6 +2723,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540 }, ] +[[package]] +name = "trove-classifiers" +version = "2025.4.28.22" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/f3/4d82f6d81391237842c4250766ae02c7a178a3a236e0fbd749ec154da17c/trove_classifiers-2025.4.28.22.tar.gz", hash = "sha256:42bef4957a74fe7724b8310dafd4b23e0a71406a4812cf4dfd65e2ee34f1943d", size = 16883 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/d6/647f7aca9314bcc83d5608166f89121f55ccc05402b0cd1a1e72cffa3e62/trove_classifiers-2025.4.28.22-py3-none-any.whl", hash = "sha256:fdb453fefa3a0da9c18b8d390846e6df7e961e8924703559ea9be07ec99c0925", size = 14056 }, +] + [[package]] name = "typing-extensions" version = "4.13.2"