diff --git a/servers/developer_tools/README.md b/servers/developer_tools/README.md index a261a5f..4e22393 100644 --- a/servers/developer_tools/README.md +++ b/servers/developer_tools/README.md @@ -6,6 +6,10 @@ ## Overview A Model Context Protocol server that exposes common Databricks developer actions as tools. +Note that if you're developing within the Databricks product (e.g. writing a Databricks notebook), you can +use the Databricks Assistant, which automates common developer tasks on Databricks for you. However, this server can +be useful when developing outside of Databricks, or to integrate Databricks developer tools into other agents. + ## Usage 1. Install [uv](https://docs.astral.sh/uv/getting-started/installation/) diff --git a/servers/unity_catalog/app.yaml b/servers/unity_catalog/app.yaml new file mode 100644 index 0000000..e549b49 --- /dev/null +++ b/servers/unity_catalog/app.yaml @@ -0,0 +1,14 @@ +command: [ + "uv", + "run", + "unitycatalog-mcp", + "-s", + "retail_prod.agents", + "-g", + "01efcca6fdc712d7be87a40ad4a2e33e" +] + + +#env: +# - name: "SERVING_ENDPOINT" +# valueFrom: "serving-endpoint" diff --git a/servers/unity_catalog/pyproject.toml b/servers/unity_catalog/pyproject.toml index d4ac565..0d8ee06 100644 --- a/servers/unity_catalog/pyproject.toml +++ b/servers/unity_catalog/pyproject.toml @@ -19,6 +19,7 @@ dependencies = [ "unitycatalog-ai>=0.1.0", "databricks-sdk>=0.49.0", "databricks-openai>=0.3.1", + "uvicorn>=0.34.0" ] [project.scripts] @@ -33,6 +34,7 @@ dev-dependencies = [ "pyright>=1.1.393", "ruff>=0.9.4", "pytest>=8.3.4", + "anthropic", ] [tool.pytest.ini_options] diff --git a/servers/unity_catalog/requirements.txt b/servers/unity_catalog/requirements.txt new file mode 100644 index 0000000..7061757 --- /dev/null +++ b/servers/unity_catalog/requirements.txt @@ -0,0 +1,364 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile pyproject.toml -o requirements.txt +aiohappyeyeballs==2.6.1 + # via aiohttp +aiohttp==3.11.18 + # via + # aiohttp-retry + # unitycatalog-client +aiohttp-retry==2.9.1 + # via unitycatalog-client +aiosignal==1.3.2 + # via aiohttp +alembic==1.15.2 + # via mlflow +annotated-types==0.7.0 + # via pydantic +anyio==4.9.0 + # via + # httpx + # mcp + # openai + # sse-starlette + # starlette +attrs==25.3.0 + # via aiohttp +blinker==1.9.0 + # via flask +cachetools==5.5.2 + # via + # google-auth + # mlflow-skinny +certifi==2025.4.26 + # via + # httpcore + # httpx + # requests +charset-normalizer==3.4.1 + # via requests +click==8.1.8 + # via + # flask + # mlflow-skinny + # uvicorn +cloudpickle==3.1.1 + # via + # mlflow-skinny + # unitycatalog-ai +contourpy==1.3.2 + # via matplotlib +cycler==0.12.1 + # via matplotlib +databricks-ai-bridge==0.4.1 + # via databricks-openai +databricks-connect==16.1.4 + # via + # databricks-openai + # unitycatalog-ai +databricks-openai==0.3.1 + # via unitycatalog-mcp (pyproject.toml) +databricks-sdk==0.50.0 + # via + # unitycatalog-mcp (pyproject.toml) + # databricks-ai-bridge + # databricks-connect + # mlflow-skinny + # unitycatalog-ai +databricks-vectorsearch==0.56 + # via databricks-openai +deprecated==1.2.18 + # via + # opentelemetry-api + # opentelemetry-semantic-conventions +deprecation==2.1.0 + # via databricks-vectorsearch +distro==1.9.0 + # via openai +docker==7.1.0 + # via mlflow +fastapi==0.115.12 + # via mlflow-skinny +flask==3.1.0 + # via mlflow +fonttools==4.57.0 + # via matplotlib +frozenlist==1.6.0 + # via + # aiohttp + # aiosignal +gitdb==4.0.12 + # via gitpython +gitpython==3.1.44 + # via mlflow-skinny +google-auth==2.39.0 + # via databricks-sdk +googleapis-common-protos==1.70.0 + # via + # databricks-connect + # grpcio-status +graphene==3.4.3 + # via mlflow +graphql-core==3.2.6 + # via + # graphene + # graphql-relay +graphql-relay==3.2.0 + # via graphene +grpcio==1.71.0 + # via + # databricks-connect + # grpcio-status +grpcio-status==1.71.0 + # via databricks-connect +gunicorn==23.0.0 + # via mlflow +h11==0.16.0 + # via + # httpcore + # uvicorn +httpcore==1.0.9 + # via httpx +httpx==0.28.1 + # via + # mcp + # openai +httpx-sse==0.4.0 + # via mcp +idna==3.10 + # via + # anyio + # httpx + # requests + # yarl +importlib-metadata==8.6.1 + # via + # mlflow-skinny + # opentelemetry-api +itsdangerous==2.2.0 + # via flask +jinja2==3.1.6 + # via + # flask + # mlflow +jiter==0.9.0 + # via openai +joblib==1.4.2 + # via scikit-learn +kiwisolver==1.4.8 + # via matplotlib +mako==1.3.10 + # via alembic +markdown==3.8 + # via mlflow +markupsafe==3.0.2 + # via + # jinja2 + # mako + # werkzeug +matplotlib==3.10.1 + # via mlflow +mcp==1.6.0 + # via unitycatalog-mcp (pyproject.toml) +mlflow==2.22.0 + # via databricks-openai +mlflow-skinny==2.22.0 + # via + # databricks-ai-bridge + # databricks-vectorsearch + # mlflow +multidict==6.4.3 + # via + # aiohttp + # yarl +nest-asyncio==1.6.0 + # via unitycatalog-ai +numpy==1.26.4 + # via + # contourpy + # databricks-connect + # matplotlib + # mlflow + # pandas + # scikit-learn + # scipy +openai==1.76.0 + # via + # databricks-openai + # unitycatalog-openai +opentelemetry-api==1.32.1 + # via + # mlflow-skinny + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-sdk==1.32.1 + # via mlflow-skinny +opentelemetry-semantic-conventions==0.53b1 + # via opentelemetry-sdk +packaging==24.2 + # via + # databricks-connect + # deprecation + # gunicorn + # matplotlib + # mlflow-skinny +pandas==2.2.3 + # via + # databricks-ai-bridge + # databricks-connect + # mlflow + # unitycatalog-ai +pillow==11.2.1 + # via matplotlib +propcache==0.3.1 + # via + # aiohttp + # yarl +protobuf==5.29.4 + # via + # databricks-vectorsearch + # googleapis-common-protos + # grpcio-status + # mlflow-skinny +py4j==0.10.9.7 + # via databricks-connect +pyarrow==19.0.1 + # via + # databricks-connect + # mlflow +pyasn1==0.6.1 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.4.2 + # via google-auth +pydantic==2.11.3 + # via + # unitycatalog-mcp (pyproject.toml) + # databricks-ai-bridge + # databricks-openai + # fastapi + # mcp + # mlflow-skinny + # openai + # pydantic-settings + # unitycatalog-ai + # unitycatalog-client + # unitycatalog-openai +pydantic-core==2.33.1 + # via pydantic +pydantic-settings==2.9.1 + # via + # unitycatalog-mcp (pyproject.toml) + # mcp +pyparsing==3.2.3 + # via matplotlib +python-dateutil==2.9.0.post0 + # via + # graphene + # matplotlib + # pandas + # unitycatalog-client +python-dotenv==1.1.0 + # via pydantic-settings +pytz==2025.2 + # via pandas +pyyaml==6.0.2 + # via mlflow-skinny +regex==2024.11.6 + # via tiktoken +requests==2.32.3 + # via + # databricks-sdk + # databricks-vectorsearch + # docker + # mlflow-skinny + # tiktoken +rsa==4.9.1 + # via google-auth +scikit-learn==1.6.1 + # via mlflow +scipy==1.15.2 + # via + # mlflow + # scikit-learn +setuptools==80.0.0 + # via databricks-connect +six==1.17.0 + # via + # databricks-connect + # python-dateutil +smmap==5.0.2 + # via gitdb +sniffio==1.3.1 + # via + # anyio + # openai +sqlalchemy==2.0.40 + # via + # alembic + # mlflow +sqlparse==0.5.3 + # via mlflow-skinny +sse-starlette==2.3.3 + # via mcp +starlette==0.46.2 + # via + # fastapi + # mcp + # sse-starlette +tabulate==0.9.0 + # via databricks-ai-bridge +threadpoolctl==3.6.0 + # via scikit-learn +tiktoken==0.9.0 + # via databricks-ai-bridge +tqdm==4.67.1 + # via openai +typing-extensions==4.13.2 + # via + # alembic + # anyio + # databricks-ai-bridge + # fastapi + # graphene + # mlflow-skinny + # openai + # opentelemetry-sdk + # pydantic + # pydantic-core + # sqlalchemy + # typing-inspection + # unitycatalog-ai + # unitycatalog-client +typing-inspection==0.4.0 + # via + # pydantic + # pydantic-settings +tzdata==2025.2 + # via pandas +unitycatalog-ai==0.3.0 + # via + # unitycatalog-mcp (pyproject.toml) + # unitycatalog-openai +unitycatalog-client==0.2.1 + # via unitycatalog-ai +unitycatalog-openai==0.2.0 + # via databricks-openai +urllib3==2.4.0 + # via + # docker + # requests + # unitycatalog-client +uvicorn==0.34.2 + # via + # unitycatalog-mcp (pyproject.toml) + # mcp + # mlflow-skinny +werkzeug==3.1.3 + # via flask +wrapt==1.17.2 + # via deprecated +yarl==1.20.0 + # via aiohttp +zipp==3.21.0 + # via importlib-metadata diff --git a/servers/unity_catalog/src/unitycatalog_mcp/__init__.py b/servers/unity_catalog/src/unitycatalog_mcp/__init__.py index 1b0a65b..36d3bb9 100644 --- a/servers/unity_catalog/src/unitycatalog_mcp/__init__.py +++ b/servers/unity_catalog/src/unitycatalog_mcp/__init__.py @@ -1,20 +1,8 @@ -import sys -from traceback import format_exc -from unitycatalog_mcp.server import start -from unitycatalog_mcp.version import VERSION - -from databricks.sdk.config import with_user_agent_extra - - -def main() -> None: - import asyncio - - with_user_agent_extra(key="unitycatalog-mcp", value=VERSION) - asyncio.run(start()) - - -if __name__ == "__main__": - try: - main() - except Exception as _: - print(format_exc(), file=sys.stderr) +import uvicorn + +def main(): + uvicorn.run( + "unitycatalog_mcp.server:app", + port=8000, + reload=True, + ) diff --git a/servers/unity_catalog/src/unitycatalog_mcp/__main__.py b/servers/unity_catalog/src/unitycatalog_mcp/__main__.py index edab862..929e38e 100644 --- a/servers/unity_catalog/src/unitycatalog_mcp/__main__.py +++ b/servers/unity_catalog/src/unitycatalog_mcp/__main__.py @@ -1,4 +1,30 @@ -from unitycatalog_mcp import main +#!/usr/bin/env python3 +""" +Entry point for unitycatalog_mcp server. +Parses --schema/-s and --genie-space-ids/-g via pydantic-settings, +then launches Uvicorn programmatically so that custom flags +aren't swallowed by Uvicorn's CLI. +""" +import uvicorn +from unitycatalog_mcp.server import app + +# import src.unitycatalog_mcp.cli as cli_mod + + +def main(): + # Parse CLI args into a CliSettings instance + # settings = get_settings() + + # Monkey-patch get_settings() so the server code sees these values + # cli_mod.get_settings = lambda: settings + + # Launch Uvicorn programmatically (no CLI flag conflicts) + uvicorn.run( + app, + host="localhost", + port=8000, + reload=True, + ) if __name__ == "__main__": diff --git a/servers/unity_catalog/src/unitycatalog_mcp/server.py b/servers/unity_catalog/src/unitycatalog_mcp/server.py index 100b7e4..84d811c 100644 --- a/servers/unity_catalog/src/unitycatalog_mcp/server.py +++ b/servers/unity_catalog/src/unitycatalog_mcp/server.py @@ -1,66 +1,109 @@ import logging import collections +import os from mcp.server import NotificationOptions, Server -from mcp.server.stdio import stdio_server -from mcp.types import Tool as ToolSpec -from unitycatalog_mcp.tools import ( - list_all_tools, - Content, -) +from mcp.server.sse import SseServerTransport +from starlette.applications import Starlette +from starlette.routing import Route, Mount +from starlette.requests import Request +from starlette.responses import FileResponse from unitycatalog_mcp.cli import get_settings - from unitycatalog_mcp.tools.base_tool import BaseTool +from unitycatalog_mcp.tools import list_all_tools, Content from unitycatalog_mcp.version import VERSION -# The logger instance for this module. LOGGER = logging.getLogger(__name__) def _warn_if_duplicate_tool_names(tools: list[BaseTool]): tool_names = [tool.tool_spec.name for tool in tools] - duplicate_tool_names = [ - item for item, count in collections.Counter(tool_names).items() if count > 1 + duplicates = [ + name for name, cnt in collections.Counter(tool_names).items() if cnt > 1 ] - if duplicate_tool_names: + if duplicates: LOGGER.warning( - f"Duplicate tool names detected: {duplicate_tool_names}. For each duplicate tool name, " - f"picking one of the tools with that name. This can happen if your UC schema " - f"contains a function and a vector search index with the same name" + f"Duplicate tool names detected: {duplicates}. Picking one per name." ) def get_tools_dict(settings) -> dict[str, BaseTool]: - """ - Returns a dictionary of all tools with their names as keys and tool objects as values. - """ - # TODO: if LLM tool name length limits allow, dedup tool names by tool type - # (e.g. function name and vector search index name) - all_tools = list_all_tools(settings=get_settings()) + all_tools = list_all_tools(settings=settings) _warn_if_duplicate_tool_names(all_tools) - return { - tool.tool_spec.name: tool for tool in list_all_tools(settings=get_settings()) - } + # build dict once + return {tool.tool_spec.name: tool for tool in all_tools} -async def start() -> None: - server = Server(name="mcp-unitycatalog", version=VERSION) - tools_dict = get_tools_dict(settings=get_settings()) +# ────── Instantiate MCP server + tools ────── +settings = get_settings() +tools_dict = get_tools_dict(settings) +server = Server(name="mcp-unitycatalog", version=VERSION) - @server.list_tools() - async def list_tools() -> list[ToolSpec]: - return [tool.tool_spec for tool in tools_dict.values()] - @server.call_tool() - async def call_tool(name: str, arguments: dict) -> list[Content]: - tool = tools_dict[name] - return tool.execute(**arguments) +@server.list_tools() +async def list_tools() -> list[BaseTool]: + return [tool.tool_spec for tool in tools_dict.values()] - options = server.create_initialization_options( - notification_options=NotificationOptions( - resources_changed=True, tools_changed=True - ) + +@server.call_tool() +async def call_tool(name: str, arguments: dict) -> list[Content]: + tool = tools_dict[name] + return tool.execute(**arguments) + + +options = server.create_initialization_options( + notification_options=NotificationOptions( + resources_changed=True, + tools_changed=True, ) - async with stdio_server() as (read_stream, write_stream): +) + +# ────── SSE transport setup ────── +sse = SseServerTransport("/messages") + + +async def handle_sse(scope, receive, send): + # this opens the SSE stream at POST /sse + async with sse.connect_sse(scope, receive, send) as (read_stream, write_stream): await server.run(read_stream, write_stream, options, raise_exceptions=True) + + +async def handle_messages(scope, receive, send): + # this handles POST /messages from the client + await sse.handle_post_message(scope, receive, send) + + +def create_starlette_app(debug: bool = False) -> Starlette: + """Create a Starlette application that can server the provied mcp server with SSE.""" + sse = SseServerTransport("/messages/") + + async def handle_sse(request: Request) -> None: + async with sse.connect_sse( + request.scope, + request.receive, + request._send, # noqa: SLF001 + ) as (read_stream, write_stream): + await server.run( + read_stream, + write_stream, + server.create_initialization_options(), + ) + + return Starlette( + debug=debug, + routes=[ + Route("/sse", endpoint=handle_sse, methods=["POST", "GET"]), + Mount("/messages/", app=sse.handle_post_message), + Route( + "/", + endpoint=lambda request: FileResponse( + os.path.join(os.path.dirname(__file__), "static", "index.html") + ), + ), + ], + ) + + +# ────── Expose as ASGI app ────── +app = create_starlette_app() diff --git a/servers/unity_catalog/src/unitycatalog_mcp/tools/__init__.py b/servers/unity_catalog/src/unitycatalog_mcp/tools/__init__.py index 567b08f..aae6753 100644 --- a/servers/unity_catalog/src/unitycatalog_mcp/tools/__init__.py +++ b/servers/unity_catalog/src/unitycatalog_mcp/tools/__init__.py @@ -7,7 +7,7 @@ from unitycatalog_mcp.tools.genie import list_genie_tools from unitycatalog_mcp.tools.functions import list_uc_function_tools -from unitycatalog_mcp.tools.vector_search import list_vector_search_tools +# from unitycatalog_mcp.tools.vector_search import list_vector_search_tools Content: TypeAlias = Union[TextContent, ImageContent, EmbeddedResource] @@ -15,6 +15,6 @@ def list_all_tools(settings): return ( list_genie_tools(settings) - + list_vector_search_tools(settings) + list_uc_function_tools(settings) + # + list_vector_search_tools(settings) ) diff --git a/servers/unity_catalog/uv.lock b/servers/unity_catalog/uv.lock index 15dac59..a78e634 100644 --- a/servers/unity_catalog/uv.lock +++ b/servers/unity_catalog/uv.lock @@ -144,6 +144,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, ] +[[package]] +name = "anthropic" +version = "0.50.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/40/85/4dd9f80da0727c56d7e7f7c627cb724edd9e6df062df6ecc0e90f06e6dbb/anthropic-0.50.0.tar.gz", hash = "sha256:42175ec04ce4ff2fa37cd436710206aadff546ee99d70d974699f59b49adc66f", size = 213021 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/35/ae/975f97ad5581a9e187a3717e21d79d6c7ad6be926fee9aa8a15b3d9f8f37/anthropic-0.50.0-py3-none-any.whl", hash = "sha256:defbd79327ca2fa61fd7b9eb2f1627dfb1f69c25d49288c52e167ddb84574f80", size = 245291 }, +] + [[package]] name = "anyio" version = "4.8.0" @@ -2568,10 +2586,12 @@ dependencies = [ { name = "pydantic" }, { name = "pydantic-settings" }, { name = "unitycatalog-ai" }, + { name = "uvicorn" }, ] [package.dev-dependencies] dev = [ + { name = "anthropic" }, { name = "pyright" }, { name = "pytest" }, { name = "ruff" }, @@ -2585,10 +2605,12 @@ requires-dist = [ { name = "pydantic", specifier = ">=2.10.6" }, { name = "pydantic-settings", specifier = ">=2.7.1" }, { name = "unitycatalog-ai", specifier = ">=0.1.0" }, + { name = "uvicorn", specifier = ">=0.34.0" }, ] [package.metadata.requires-dev] dev = [ + { name = "anthropic" }, { name = "pyright", specifier = ">=1.1.393" }, { name = "pytest", specifier = ">=8.3.4" }, { name = "ruff", specifier = ">=0.9.4" },