diff --git a/pypi-mcp-server/.actor/actor.json b/pypi-mcp-server/.actor/actor.json new file mode 100644 index 0000000..fc82ccf --- /dev/null +++ b/pypi-mcp-server/.actor/actor.json @@ -0,0 +1,14 @@ +{ + "actorSpecification": 1, + "name": "pypi-query-mcp-server", + "title": "PyPI Query MCP Server", + "description": "An Apify Actor that proxies the pypi-query-mcp-server over Streamable HTTP with optional charging and tool whitelisting.", + "version": "0.1", + "buildTag": "latest", + "usesStandbyMode": true, + "meta": { + "templateId": "python-mcp-server" + }, + "dockerfile": "../Dockerfile", + "webServerMcpPath": "/mcp" +} diff --git a/pypi-mcp-server/.actor/pay_per_event.json b/pypi-mcp-server/.actor/pay_per_event.json new file mode 100644 index 0000000..84251ca --- /dev/null +++ b/pypi-mcp-server/.actor/pay_per_event.json @@ -0,0 +1,12 @@ +{ + "apify-actor-start": { + "eventTitle": "MCP server startup", + "eventDescription": "Initial fee for starting the MCP Server Actor.", + "eventPriceUsd": 0.10 + }, + "tool-call": { + "eventTitle": "PyPI tool call", + "eventDescription": "Flat fee for any PyPI Query MCP tool invocation.", + "eventPriceUsd": 0.001 + } +} diff --git a/pypi-mcp-server/.gitignore b/pypi-mcp-server/.gitignore new file mode 100644 index 0000000..ae5f0df --- /dev/null +++ b/pypi-mcp-server/.gitignore @@ -0,0 +1,167 @@ +.mise.toml +.nvim.lua +storage + +# The rest is copied from https://github.com/github/gitignore/blob/main/Python.gitignore + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +.python-version + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ + +# Visual Studio Code +# Ignores the folder created by VS Code when changing workspace settings, doing debugger +# configuration, etc. Can be commented out to share Workspace Settings within a team +.vscode + +# Zed editor +# Ignores the folder created when setting Project Settings in the Zed editor. Can be commented out +# to share Project Settings within a team +.zed + +# Added by Apify CLI +node_modules diff --git a/pypi-mcp-server/Dockerfile b/pypi-mcp-server/Dockerfile new file mode 100644 index 0000000..58f09c7 --- /dev/null +++ b/pypi-mcp-server/Dockerfile @@ -0,0 +1,35 @@ +# First, specify the base Docker image. +# You can see the Docker images from Apify at https://hub.docker.com/r/apify/. +# You can also use any other image from Docker Hub. +FROM apify/actor-python:3.13 + +USER myuser + +# Second, copy just requirements.txt into the Actor image, +# since it should be the only file that affects the dependency installation in the next step, +# in order to speed up the build. +COPY --chown=myuser:myuser requirements.txt ./ + +# Install the packages specified in requirements.txt, +# print the installed Python version, pip version, +# and all installed packages with their versions for debugging. +RUN echo "Python version:" \ + && python --version \ + && echo "Pip version:" \ + && pip --version \ + && echo "Installing dependencies:" \ + && pip install -r requirements.txt \ + && echo "All installed Python packages:" \ + && pip freeze + +# Next, copy the remaining files and directories with the source code. +# Since we do this after installing the dependencies, quick builds will be really fast +# for most source file changes. +COPY --chown=myuser:myuser . ./ + +# Use compileall to ensure the runnability of the Actor Python code. +RUN python3 -m compileall -q src/ + +# Specify how to launch the source code of your Actor. +# By default, the "python3 -m ." command is run. +CMD ["python3", "-m", "src"] diff --git a/pypi-mcp-server/README.md b/pypi-mcp-server/README.md new file mode 100644 index 0000000..eddeb3c --- /dev/null +++ b/pypi-mcp-server/README.md @@ -0,0 +1,163 @@ +## PyPI Query MCP Server + +A Model Context Protocol (MCP) server that lets AI agents explore and analyze Python packages from PyPI (and private indexes). It’s deployed as an Apify Actor and proxies the excellent open-source **pypi-query-mcp-server** over Streamable HTTP, with optional per-tool charging and a configurable whitelist. + +**About this MCP Server:** To understand how to connect to and utilize this MCP server, please refer to the official Model Context Protocol documentation at [mcp.apify.com](https://mcp.apify.com). + +--- + +## Connection URL + +MCP clients can connect to this server at: + +```text +https://mcp-servers--pypi-query-mcp-server.apify.actor/mcp +``` + +--- + +## Client Configuration + +Add this to your MCP client configuration: + +```json +{ + "mcpServers": { + "pypi-query": { + "url": "https://mcp-servers--pypi-query-mcp-server.apify.actor/mcp", + "headers": { + "Authorization": "Bearer YOUR_APIFY_TOKEN" + } + } + } +} +``` + +**Note:** Replace `YOUR_APIFY_TOKEN` with your Apify API token (find it in the [Apify Console](https://console.apify.com/account/integrations)). + +--- + +## 🚩 Claim this MCP server + +All credit to the original authors of **pypi-query-mcp-server**: +To claim this server on Apify, please email [ai@apify.com](mailto:ai@apify.com). + +--- + +## What this server does + +- Proxies the **PyPI Query MCP Server** to HTTP so any MCP client can connect. +- Supports **per-tool charging** (via Apify’s `Actor.charge`) and a **tool whitelist**. +- Passes through environment variables for public mirrors and **private package indexes**. +- Runs reliably in **Apify Standby** with an `/mcp` endpoint and a small helper HTML page at `/`. + +--- + +## Features + +- Look up package info, versions, dependencies, and compatibility. +- Resolve dependency trees and detect conflicts. +- Gather download stats, trends, and top packages. +- Analyze package quality and security (if enabled upstream). +- Plan upgrades, migrations, and environment updates. + +--- + +## Available Tools + +This server exposes the PyPI tools provided by the upstream server (whitelisted by default). Tool identifiers: + +- `get_package_info` +- `get_package_versions` +- `get_package_dependencies` +- `check_package_python_compatibility` +- `get_package_compatible_python_versions` +- `resolve_dependencies` +- `download_package` +- `get_download_statistics` +- `get_download_trends` +- `get_top_downloaded_packages` +- `analyze_package_quality` +- `compare_packages` +- `suggest_alternatives` +- `resolve_dependency_conflicts` +- `plan_version_upgrade` +- `audit_security_risks` +- `plan_package_migration` +- `generate_migration_checklist` +- `analyze_environment_dependencies` +- `check_outdated_packages` +- `generate_update_plan` +- `analyze_daily_trends` +- `find_trending_packages` +- `track_package_updates` + +> Note: The exact set may evolve with upstream releases. This proxy can be configured to allow all tools or only a curated subset. + +--- + +## Environment Variables + +The proxy launches the upstream server via `uvx` (stdio) and forwards these environment variables: + +- Public indexes & mirrors + - `PYPI_INDEX_URL` (default: `https://pypi.org/pypi`) + - `PYPI_INDEX_URLS`, `PYPI_EXTRA_INDEX_URLS` (comma-separated) +- Caching & logging + - `PYPI_CACHE_TTL` (default: `3600`) + - `PYPI_LOG_LEVEL` (default: `INFO`) +- Networking + - `PYPI_REQUEST_TIMEOUT` (default: `30`) +- Private repository support (optional) + - `PYPI_PRIVATE_PYPI_URL` + - `PYPI_PRIVATE_PYPI_USERNAME` + - `PYPI_PRIVATE_PYPI_PASSWORD` +- Advanced (optional) + - `PYPI_DEPENDENCY_MAX_DEPTH` (default: `5`) + - `PYPI_DEPENDENCY_MAX_CONCURRENT` (default: `10`) + - `PYPI_ENABLE_SECURITY_ANALYSIS` (`true`|`false`, default: `false`) + +Set these in your Apify Actor (Secrets / Environment) as needed. + +--- + +## Usage Examples + +Ask your MCP client to: + +- “Get info for `requests`.” → `get_package_info` +- “Show versions for `pydantic`.” → `get_package_versions` +- “Resolve dependencies for `fastapi==0.115.0`.” → `resolve_dependencies` +- “Are `httpx 0.27` and Python 3.12 compatible?” → `check_package_python_compatibility` +- “Top downloads last week” → `get_top_downloaded_packages` +- “Compare `requests` vs `httpx`” → `compare_packages` + +--- + +## Pricing / Charging (optional) + +This server can charge per MCP operation. Defaults are defined in code (`src/const.py`) and can be tuned per tool (e.g., heavier “resolve” or “download” operations can cost more). If you don’t want charging, remove or relax the whitelist and omit charging hooks. + +--- + +## Local development & debugging + +- This Actor only serves in **STANDBY** on Apify. When running locally you can still start it, but the HTTP `/mcp` endpoint is designed for Standby on the platform. +- The server logs a ready-to-copy MCP JSON snippet with the endpoint URL at startup. + +--- + +## References + +To learn more: + +- [Apify SDK for Python](https://docs.apify.com/sdk/python) +- [Apify Platform](https://docs.apify.com/platform) +- [Apify MCP Server](https://docs.apify.com/platform/integrations/mcp) +- [Model Context Protocol docs](https://mcp.apify.com) +- [Webinar: Building and Monetizing MCP Servers on Apify](https://www.youtube.com/watch?v=w3AH3jIrXXo) +- [Join the Apify developer community on Discord](https://discord.com/invite/jyEM2PRvMU) + +--- + +**Attribution:** Built as a proxy over the open-source **pypi-query-mcp-server**. diff --git a/pypi-mcp-server/requirements.txt b/pypi-mcp-server/requirements.txt new file mode 100644 index 0000000..22c930f --- /dev/null +++ b/pypi-mcp-server/requirements.txt @@ -0,0 +1,9 @@ +# Actor + proxy deps +apify < 4.0.0 +apify-client +httpx>=0.24.0 +mcp==1.13.1 +pydantic>=2.0.0 +starlette>=0.37.2 +uvicorn>=0.27.0 +uv>=0.7.8 diff --git a/pypi-mcp-server/src/__init__.py b/pypi-mcp-server/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pypi-mcp-server/src/__main__.py b/pypi-mcp-server/src/__main__.py new file mode 100644 index 0000000..8a11883 --- /dev/null +++ b/pypi-mcp-server/src/__main__.py @@ -0,0 +1,6 @@ +import asyncio + +from .main import main + +# Execute the Actor entry point. +asyncio.run(main()) diff --git a/pypi-mcp-server/src/const.py b/pypi-mcp-server/src/const.py new file mode 100644 index 0000000..fb5e266 --- /dev/null +++ b/pypi-mcp-server/src/const.py @@ -0,0 +1,45 @@ +from enum import Enum + + +class ChargeEvents(str, Enum): + """Chargeable events.""" + + ACTOR_START = 'apify-actor-start' + TOOL_CALL = 'tool-call' # flat fee for ANY PyPI tool invocation + + +# Whitelist ONLY the PyPI Query MCP tools. +# All are billed under TOOL_CALL with a default count of 1. +# (Names match the pypi-query-mcp-server tool identifiers.) +TOOL_WHITELIST = { + # Package metadata & versions + 'get_package_info': (ChargeEvents.TOOL_CALL.value, 1), + 'get_package_versions': (ChargeEvents.TOOL_CALL.value, 1), + # Dependencies & compatibility + 'get_package_dependencies': (ChargeEvents.TOOL_CALL.value, 1), + 'check_package_python_compatibility': (ChargeEvents.TOOL_CALL.value, 1), + 'get_package_compatible_python_versions': (ChargeEvents.TOOL_CALL.value, 1), + 'resolve_dependencies': (ChargeEvents.TOOL_CALL.value, 1), + 'resolve_dependency_conflicts': (ChargeEvents.TOOL_CALL.value, 1), + # Downloads & trends + 'get_download_statistics': (ChargeEvents.TOOL_CALL.value, 1), + 'get_download_trends': (ChargeEvents.TOOL_CALL.value, 1), + 'get_top_downloaded_packages': (ChargeEvents.TOOL_CALL.value, 1), + 'analyze_daily_trends': (ChargeEvents.TOOL_CALL.value, 1), + 'find_trending_packages': (ChargeEvents.TOOL_CALL.value, 1), + 'track_package_updates': (ChargeEvents.TOOL_CALL.value, 1), + # Quality, audits, comparisons + 'analyze_package_quality': (ChargeEvents.TOOL_CALL.value, 1), + 'audit_security_risks': (ChargeEvents.TOOL_CALL.value, 1), + 'compare_packages': (ChargeEvents.TOOL_CALL.value, 1), + 'suggest_alternatives': (ChargeEvents.TOOL_CALL.value, 1), + # Planning & operations + 'plan_version_upgrade': (ChargeEvents.TOOL_CALL.value, 1), + 'plan_package_migration': (ChargeEvents.TOOL_CALL.value, 1), + 'generate_migration_checklist': (ChargeEvents.TOOL_CALL.value, 1), + 'analyze_environment_dependencies': (ChargeEvents.TOOL_CALL.value, 1), + 'check_outdated_packages': (ChargeEvents.TOOL_CALL.value, 1), + 'generate_update_plan': (ChargeEvents.TOOL_CALL.value, 1), + # Artifact ops + 'download_package': (ChargeEvents.TOOL_CALL.value, 1), +} diff --git a/pypi-mcp-server/src/event_store.py b/pypi-mcp-server/src/event_store.py new file mode 100644 index 0000000..034ae3e --- /dev/null +++ b/pypi-mcp-server/src/event_store.py @@ -0,0 +1,98 @@ +# Source https://github.com/modelcontextprotocol/python-sdk/blob/3978c6e1b91e8830e82d97ab3c4e3b6559972021/examples/servers/simple-streamablehttp/mcp_simple_streamablehttp/event_store.py +"""In-memory event store for demonstrating resumability functionality. + +This is a simple implementation intended for examples and testing, +not for production use where a persistent storage solution would be more appropriate. +""" + +import logging +from collections import deque +from dataclasses import dataclass +from uuid import uuid4 + +from mcp.server.streamable_http import ( + EventCallback, + EventId, + EventMessage, + EventStore, + StreamId, +) +from mcp.types import JSONRPCMessage + +logger = logging.getLogger(__name__) + + +@dataclass +class EventEntry: + """Represents an event entry in the event store.""" + + event_id: EventId + stream_id: StreamId + message: JSONRPCMessage + + +class InMemoryEventStore(EventStore): + """Simple in-memory implementation of the EventStore interface for resumability. + This is primarily intended for examples and testing, not for production use + where a persistent storage solution would be more appropriate. + + This implementation keeps only the last N events per stream for memory efficiency. + """ # noqa: D205 + + def __init__(self, max_events_per_stream: int = 100): # noqa: ANN204 + """Initialize the event store. + + Args: + max_events_per_stream: Maximum number of events to keep per stream + """ + self.max_events_per_stream = max_events_per_stream + # for maintaining last N events per stream + self.streams: dict[StreamId, deque[EventEntry]] = {} + # event_id -> EventEntry for quick lookup + self.event_index: dict[EventId, EventEntry] = {} + + async def store_event(self, stream_id: StreamId, message: JSONRPCMessage) -> EventId: + """Stores an event with a generated event ID.""" # noqa: D401 + event_id = str(uuid4()) + event_entry = EventEntry(event_id=event_id, stream_id=stream_id, message=message) + + # Get or create deque for this stream + if stream_id not in self.streams: + self.streams[stream_id] = deque(maxlen=self.max_events_per_stream) + + # If deque is full, the oldest event will be automatically removed + # We need to remove it from the event_index as well + if len(self.streams[stream_id]) == self.max_events_per_stream: + oldest_event = self.streams[stream_id][0] + self.event_index.pop(oldest_event.event_id, None) + + # Add new event + self.streams[stream_id].append(event_entry) + self.event_index[event_id] = event_entry + + return event_id + + async def replay_events_after( + self, + last_event_id: EventId, + send_callback: EventCallback, + ) -> StreamId | None: + """Replays events that occurred after the specified event ID.""" + if last_event_id not in self.event_index: + logger.warning(f'Event ID {last_event_id} not found in store') + return None + + # Get the stream and find events after the last one + last_event = self.event_index[last_event_id] + stream_id = last_event.stream_id + stream_events = self.streams.get(last_event.stream_id, deque()) + + # Events in deque are already in chronological order + found_last = False + for event in stream_events: + if found_last: + await send_callback(EventMessage(event.message, event.event_id)) + elif event.event_id == last_event_id: + found_last = True + + return stream_id diff --git a/pypi-mcp-server/src/main.py b/pypi-mcp-server/src/main.py new file mode 100644 index 0000000..2d9a058 --- /dev/null +++ b/pypi-mcp-server/src/main.py @@ -0,0 +1,112 @@ +"""Main entry point for the MCP Server Actor (PyPI Query MCP via stdio/uvx).""" + +import os + +from apify import Actor + +from .const import TOOL_WHITELIST, ChargeEvents +from .models import ServerType +from .server import ProxyServer + +# Actor configuration +STANDBY_MODE = os.environ.get('APIFY_META_ORIGIN') == 'STANDBY' +# Bind to all interfaces (0.0.0.0) as this is running in a containerized environment (Apify Actor) +# The container's network is isolated, so this is safe +HOST = '0.0.0.0' # noqa: S104 - Required for container networking at Apify platform +PORT = (Actor.is_at_home() and int(os.environ.get('ACTOR_STANDBY_PORT') or '5001')) or 5001 +SERVER_NAME = 'pypi-query-mcp-server' # Name of the MCP server, without spaces + +# ------------------------------------------------------------------------------ +# We use stdio transport and launch the PyPI Query MCP Server via `uvx`. +# See: pip install pypi-query-mcp-server (runtime fetched by uvx with --from) +# ------------------------------------------------------------------------------ + +from mcp.client.stdio import StdioServerParameters # noqa: E402 + +server_type = ServerType.STDIO +MCP_SERVER_PARAMS = StdioServerParameters( + command='uvx', + # Explicitly pull the entry from PyPI and run it. + args=['--from', 'pypi-query-mcp-server', 'pypi-query-mcp'], + # Pass-through environment variables supported by the PyPI Query MCP server. + env={ + # Public index / mirrors + 'PYPI_INDEX_URL': os.getenv('PYPI_INDEX_URL', 'https://pypi.org/pypi'), + 'PYPI_INDEX_URLS': os.getenv('PYPI_INDEX_URLS', ''), # comma-separated mirrors + 'PYPI_EXTRA_INDEX_URLS': os.getenv('PYPI_EXTRA_INDEX_URLS', ''), # optional extras + # Caching & logging + 'PYPI_CACHE_TTL': os.getenv('PYPI_CACHE_TTL', '3600'), + 'PYPI_LOG_LEVEL': os.getenv('PYPI_LOG_LEVEL', 'INFO'), + # Networking / timeouts + 'PYPI_REQUEST_TIMEOUT': os.getenv('PYPI_REQUEST_TIMEOUT', '30'), + # Private repository support (optional) + 'PYPI_PRIVATE_PYPI_URL': os.getenv('PYPI_PRIVATE_PYPI_URL', ''), + 'PYPI_PRIVATE_PYPI_USERNAME': os.getenv('PYPI_PRIVATE_PYPI_USERNAME', ''), + 'PYPI_PRIVATE_PYPI_PASSWORD': os.getenv('PYPI_PRIVATE_PYPI_PASSWORD', ''), + # Advanced knobs (optional; safe defaults) + 'PYPI_DEPENDENCY_MAX_DEPTH': os.getenv('PYPI_DEPENDENCY_MAX_DEPTH', '5'), + 'PYPI_DEPENDENCY_MAX_CONCURRENT': os.getenv('PYPI_DEPENDENCY_MAX_CONCURRENT', '10'), + 'PYPI_ENABLE_SECURITY_ANALYSIS': os.getenv('PYPI_ENABLE_SECURITY_ANALYSIS', 'false'), + }, +) + + +async def main() -> None: + """Run the MCP Server Actor. + + Flow: + 1. Initializes the Actor + 2. Charges for Actor startup + 3. Creates and starts the proxy server + 4. Configures charging for MCP operations using Actor.charge + + Charging: + - Generic MCP events (tool list/call, resource, etc.) + - PyPI-specific tool events via TOOL_WHITELIST (see const.py) + """ + async with Actor: + # Initialize and charge for Actor startup + Actor.log.info('Starting MCP Server Actor (PyPI Query MCP)') + await Actor.charge(ChargeEvents.ACTOR_START.value) + + url = os.environ.get('ACTOR_STANDBY_URL', HOST) + if not STANDBY_MODE: + msg = ( + 'Actor is designed to run in STANDBY mode only. Use the MCP server URL to connect.\n' + f'When running on Apify in standby, connect to {url}/mcp to establish a connection.\n' + 'Learn more at https://mcp.apify.com/' + ) + Actor.log.info(msg) + await Actor.exit(status_message=msg) + return + + try: + # Create and start the server with charging and whitelist enabled + Actor.log.info('Launching MCP proxy server') + Actor.log.info('Add the following configuration to your MCP client to use Streamable HTTP transport:') + Actor.log.info( + f""" + {{ + "mcpServers": {{ + "{SERVER_NAME}": {{ + "url": "{url}/mcp" + }} + }} + }} + """ + ) + + proxy_server = ProxyServer( + SERVER_NAME, + MCP_SERVER_PARAMS, + HOST, + PORT, + server_type, + actor_charge_function=Actor.charge, + tool_whitelist=TOOL_WHITELIST, + ) + await proxy_server.start() + except Exception as e: + Actor.log.exception(f'Server failed to start: {e}') + await Actor.exit() + raise diff --git a/pypi-mcp-server/src/mcp_gateway.py b/pypi-mcp-server/src/mcp_gateway.py new file mode 100644 index 0000000..3a2717e --- /dev/null +++ b/pypi-mcp-server/src/mcp_gateway.py @@ -0,0 +1,239 @@ +"""Create an MCP server that proxies requests through an MCP client. + +This server is created independent of any transport mechanism. +Source: https://github.com/sparfenyuk/mcp-proxy + +The server can optionally charge for MCP operations using a provided charging function. +This is typically used in Apify Actors to charge users for different types of MCP operations +like tool calls or resource access. +""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, Any + +from mcp import server, types + +from .const import ChargeEvents + +if TYPE_CHECKING: + from collections.abc import Awaitable, Callable + + from mcp.client.session import ClientSession + +logger = logging.getLogger('apify') + + +async def charge_mcp_operation( + charge_function: Callable[[str, int], Awaitable[Any]] | None, + event_name: str | None, + count: int = 1, +) -> None: + """Charge for an MCP operation. + + Args: + charge_function: Function to call for charging, or None if charging is disabled + event_name: The type of event to charge for + count: The number of times the event occurred (typically 1, but can be more) + """ + if not charge_function: + return + if not event_name: + return + + try: + await charge_function(event_name, count) + logger.info(f'Charged for event: {event_name} (count={count})') + except Exception: + logger.exception(f'Failed to charge for event {event_name}') + # Don't raise the exception - we want the operation to continue even if charging fails + + +async def create_gateway( # noqa: PLR0915 + client_session: ClientSession, + actor_charge_function: Callable[[str, int], Awaitable[Any]] | None = None, + tool_whitelist: dict[str, tuple[str, int]] | None = None, +) -> server.Server[object]: + """Create a server instance from a remote app. + + Args: + client_session: The MCP client session to proxy requests through + actor_charge_function: Optional function to charge for operations. + Should accept (event_name: str, count: int). + Typically, Actor.charge in Apify Actors. + If None, no charging will occur. + tool_whitelist: Optional dict mapping tool names to (event_name, default_count) tuples. + If provided, only whitelisted tools will be allowed and charged. + If None, all tools are allowed and charged under TOOL_CALL. + """ + logger.debug('Sending initialization request to remote MCP server...') + response = await client_session.initialize() + capabilities: types.ServerCapabilities = response.capabilities + + logger.debug('Configuring proxied MCP server...') + app: server.Server = server.Server(name=response.serverInfo.name, version=response.serverInfo.version) + + # ---------------------- + # Prompts (no charging) + # ---------------------- + if capabilities.prompts: + logger.debug('Capabilities: adding Prompts...') + + async def _list_prompts(_: Any) -> types.ServerResult: + result = await client_session.list_prompts() + return types.ServerResult(result) + + app.request_handlers[types.ListPromptsRequest] = _list_prompts + + async def _get_prompt(req: types.GetPromptRequest) -> types.ServerResult: + # If you wish to charge for prompt retrieval later, uncomment: + result = await client_session.get_prompt(req.params.name, req.params.arguments) + return types.ServerResult(result) + + app.request_handlers[types.GetPromptRequest] = _get_prompt + + # ---------------------- + # Resources (no charging by default) + # ---------------------- + if capabilities.resources: + logger.debug('Capabilities: adding Resources...') + + async def _list_resources(_: Any) -> types.ServerResult: + result = await client_session.list_resources() + return types.ServerResult(result) + + app.request_handlers[types.ListResourcesRequest] = _list_resources + + async def _list_resource_templates(_: Any) -> types.ServerResult: + result = await client_session.list_resource_templates() + return types.ServerResult(result) + + app.request_handlers[types.ListResourceTemplatesRequest] = _list_resource_templates + + async def _read_resource(req: types.ReadResourceRequest) -> types.ServerResult: + result = await client_session.read_resource(req.params.uri) + return types.ServerResult(result) + + app.request_handlers[types.ReadResourceRequest] = _read_resource + + # ---------------------- + # Logging level + # ---------------------- + if capabilities.logging: + logger.debug('Capabilities: adding Logging...') + + async def _set_logging_level(req: types.SetLevelRequest) -> types.ServerResult: + await client_session.set_logging_level(req.params.level) + return types.ServerResult(types.EmptyResult()) + + app.request_handlers[types.SetLevelRequest] = _set_logging_level + + # ---------------------- + # Resource subscriptions + # ---------------------- + if capabilities.resources: + logger.debug('Capabilities: adding Resource Subscriptions...') + + async def _subscribe_resource(req: types.SubscribeRequest) -> types.ServerResult: + await client_session.subscribe_resource(req.params.uri) + return types.ServerResult(types.EmptyResult()) + + app.request_handlers[types.SubscribeRequest] = _subscribe_resource + + async def _unsubscribe_resource(req: types.UnsubscribeRequest) -> types.ServerResult: + await client_session.unsubscribe_resource(req.params.uri) + return types.ServerResult(types.EmptyResult()) + + app.request_handlers[types.UnsubscribeRequest] = _unsubscribe_resource + + # ---------------------- + # Tools (whitelisting + charging per call) + # ---------------------- + if capabilities.tools: + logger.debug('Capabilities: adding Tools...') + + async def _list_tools(_: Any) -> types.ServerResult: + tools = await client_session.list_tools() + + # Filter tools to only include authorized ones if whitelist is provided + if tool_whitelist: + authorized = [tool for tool in tools.tools if tool.name in tool_whitelist] + tools.tools = authorized + + # NOTE: No charge for listing tools in the simplified billing model + return types.ServerResult(tools) + + app.request_handlers[types.ListToolsRequest] = _list_tools + + async def _call_tool(req: types.CallToolRequest) -> types.ServerResult: + tool_name = req.params.name + arguments = req.params.arguments or {} + + # Safe diagnostic logging for every tool call + logger.info(f"Received tool call. tool='{tool_name}', arguments={arguments}") + + # Enforce whitelist if provided + if tool_whitelist and tool_name not in tool_whitelist: + error_message = ( + f"The requested tool '{tool_name or 'unknown'}' is not authorized." + f' Authorized tools are: {list(tool_whitelist.keys())}' + ) + logger.error(f'Blocking unauthorized tool call for: {tool_name or "unknown tool"}') + return types.ServerResult( + types.CallToolResult( + content=[types.TextContent(type='text', text=error_message)], + isError=True, + ), + ) + + try: + # Execute the tool + result = await client_session.call_tool(tool_name, arguments) + logger.info(f"Tool executed successfully: '{tool_name}'") + + # Determine charging event and count + default_tool_call = (ChargeEvents.TOOL_CALL.value, 1) + event_name, default_count = ( + tool_whitelist.get(tool_name, default_tool_call) if tool_whitelist else default_tool_call + ) + + # Charge for the tool call (single flat event) + await charge_mcp_operation(actor_charge_function, event_name, default_count) + + return types.ServerResult(result) + + except Exception as e: + error_details = f"SERVER FAILED. Tool: '{tool_name}'. Arguments: {arguments}. Full exception: {e}" + logger.exception(error_details) + return types.ServerResult( + types.CallToolResult( + content=[types.TextContent(type='text', text=error_details)], + isError=True, + ), + ) + + app.request_handlers[types.CallToolRequest] = _call_tool + + # ---------------------- + # Progress notifications & completion + # ---------------------- + async def _send_progress_notification(req: types.ProgressNotification) -> None: + await client_session.send_progress_notification( + req.params.progressToken, + req.params.progress, + req.params.total, + ) + + app.notification_handlers[types.ProgressNotification] = _send_progress_notification + + async def _complete(req: types.CompleteRequest) -> types.ServerResult: + result = await client_session.complete( + req.params.ref, + req.params.argument.model_dump(), + ) + return types.ServerResult(result) + + app.request_handlers[types.CompleteRequest] = _complete + + return app diff --git a/pypi-mcp-server/src/models.py b/pypi-mcp-server/src/models.py new file mode 100644 index 0000000..31cebdf --- /dev/null +++ b/pypi-mcp-server/src/models.py @@ -0,0 +1,36 @@ +from enum import Enum +from typing import Any, TypeAlias + +import httpx +from mcp.client.stdio import StdioServerParameters +from pydantic import BaseModel, ConfigDict + + +class ServerType(str, Enum): + """Type of server to connect.""" + + STDIO = 'stdio' # Connect to a stdio server + SSE = 'sse' # Connect to an SSE server + HTTP = 'http' # Connect to an HTTP server (Streamable HTTP) + + +class RemoteServerParameters(BaseModel): + """Parameters for connecting to a Streamable HTTP or SSE-based MCP server. + + These parameters are passed either to the `streamable http_client` or `sse_client` from MCP SDK. + + Attributes: + url: The URL of the HTTP or SSE server endpoint + headers: Optional HTTP headers to include in the connection request + """ + + url: str + headers: dict[str, Any] | None = None + timeout: float = 60 # HTTP timeout for regular operations + sse_read_timeout: float = 60 * 5 # Timeout for SSE read operations + auth: httpx.Auth | None = None # Optional HTTPX authentication handler + model_config = ConfigDict(arbitrary_types_allowed=True) + + +# Type alias for server parameters +ServerParameters: TypeAlias = StdioServerParameters | RemoteServerParameters diff --git a/pypi-mcp-server/src/server.py b/pypi-mcp-server/src/server.py new file mode 100644 index 0000000..f7aaffa --- /dev/null +++ b/pypi-mcp-server/src/server.py @@ -0,0 +1,300 @@ +"""Module implementing an MCP server that can be used to connect to stdio or SSE based MCP servers. + +Heavily inspired by: https://github.com/sparfenyuk/mcp-proxy +""" + +from __future__ import annotations + +import contextlib +import logging +from typing import TYPE_CHECKING, Any + +import httpx +import uvicorn +from mcp.client.session import ClientSession +from mcp.client.sse import sse_client +from mcp.client.stdio import StdioServerParameters, stdio_client +from mcp.client.streamable_http import streamablehttp_client +from mcp.server.streamable_http_manager import StreamableHTTPSessionManager +from pydantic import ValidationError +from starlette.applications import Starlette +from starlette.middleware import Middleware +from starlette.middleware.base import BaseHTTPMiddleware +from starlette.requests import Request +from starlette.responses import JSONResponse, RedirectResponse, Response +from starlette.routing import Mount, Route + +from .event_store import InMemoryEventStore +from .mcp_gateway import create_gateway +from .models import RemoteServerParameters, ServerParameters, ServerType + +if TYPE_CHECKING: + from collections.abc import AsyncIterator, Awaitable, Callable + + from mcp.server import Server + from starlette import types as st + from starlette.types import Receive, Scope, Send + +logger = logging.getLogger('apify') + + +def is_html_browser(request: Request) -> bool: + """Detect if the request is from an HTML browser based on Accept header.""" + accept_header = request.headers.get('accept', '') + return 'text/html' in accept_header + + +def get_html_page(server_name: str, mcp_url: str) -> str: + """Generate simple HTML page with server URL and MCP client link.""" + return f""" + + + + {server_name} + + + +

{server_name}

+

MCP endpoint URL:

+
{mcp_url}
+ +

Add to your MCP client (e.g. VS code):

+
{{
+  "mcpServers": {{
+    "{server_name.lower().replace(' ', '-')}": {{
+      "type": "http",
+      "url": "{mcp_url}",
+      "headers": {{
+        "Authorization": "Bearer YOUR_APIFY_TOKEN"
+      }}
+    }}
+  }}
+}}
+ +""" + + +def serve_html_page(server_name: str, mcp_url: str) -> Response: + """Serve HTML page for browser requests.""" + html = get_html_page(server_name, mcp_url) + return Response(content=html, media_type='text/html') + + +class McpPathRewriteMiddleware(BaseHTTPMiddleware): + """Add middleware to rewrite /mcp to /mcp/ to ensure consistent path handling. + + This is necessary so that Starlette does not return a 307 Temporary Redirect on the /mcp path, + which would otherwise trigger the OAuth flow when the MCP server is deployed on the Apify platform. + """ + + async def dispatch(self, request: Request, call_next: Callable) -> Any: + """Rewrite the request path.""" + if request.url.path == '/mcp': + request.scope['path'] = '/mcp/' + request.scope['raw_path'] = b'/mcp/' + return await call_next(request) + + +class ProxyServer: + """Main class implementing the proxy functionality using MCP SDK. + + This proxy runs a Starlette app that exposes a /mcp endpoint for Streamable HTTP transport. + It then connects to stdio or remote MCP servers and forwards the messages to the client. + Note: SSE endpoint serving has been deprecated, but SSE client connections are still supported. + + The server can optionally charge for operations using a provided charging function. + This is typically used in Apify Actors to charge users for MCP operations. + The charging function should accept an event name and optional parameters. + """ + + def __init__( # noqa: PLR0913 + self, + server_name: str, + config: ServerParameters, + host: str, + port: int, + server_type: ServerType, + actor_charge_function: Callable[[str, int], Awaitable[Any]] | None = None, + tool_whitelist: dict[str, tuple[str, int]] | None = None, + ) -> None: + """Initialize the proxy server. + + Args: + server_name: Name of the server (used in HTML page) + config: Server configuration (stdio or SSE parameters) + host: Host to bind the server to + port: Port to bind the server to + server_type: Type of server to connect (stdio, SSE, or HTTP) + actor_charge_function: Optional function to charge for operations. + Should accept (event_name: str, count: int). + Typically, Actor.charge in Apify Actors. + If None, no charging will occur. + tool_whitelist: Optional dict mapping tool names to (event_name, default_count) tuples. + If provided, only whitelisted tools will be allowed and charged. + If None, all tools are allowed without specific charging. + """ + self.server_name = server_name + self.server_type = server_type + self.config = self._validate_config(self.server_type, config) + self.host: str = host + self.port: int = port + self.actor_charge_function = actor_charge_function + self.tool_whitelist = tool_whitelist + + @staticmethod + def _validate_config(client_type: ServerType, config: ServerParameters) -> ServerParameters | None: + """Validate and return the appropriate server parameters.""" + try: + match client_type: + case ServerType.STDIO: + return StdioServerParameters.model_validate(config) + case ServerType.SSE | ServerType.HTTP: + return RemoteServerParameters.model_validate(config) + case _: + raise ValueError(f'Unsupported server type: {client_type}') + except ValidationError as e: + raise ValueError(f'Invalid server configuration: {e}') from e + + @staticmethod + async def create_starlette_app(server_name: str, mcp_server: Server) -> Starlette: + """Create a Starlette app that exposes /mcp endpoint for Streamable HTTP transport.""" + event_store = InMemoryEventStore() + session_manager = StreamableHTTPSessionManager( + app=mcp_server, + event_store=event_store, # Enable resume ability for Streamable HTTP connections + json_response=False, + ) + + @contextlib.asynccontextmanager + async def lifespan(_app: Starlette) -> AsyncIterator[None]: + """Context manager for managing session manager lifecycle.""" + async with session_manager.run(): + logger.info('Application started with StreamableHTTP session manager!') + try: + yield + finally: + logger.info('Application shutting down...') + + async def handle_root(request: Request) -> st.Response: + """Handle root endpoint.""" + # Handle Apify standby readiness probe + if 'x-apify-container-server-readiness-probe' in request.headers: + return Response( + content=b'ok', + media_type='text/plain', + status_code=200, + ) + + # Browser client logic - Check if the request is from a HTML browser + if is_html_browser(request): + server_url = f'https://{request.headers.get("host", "localhost")}' + mcp_url = f'{server_url}/mcp' + return serve_html_page(server_name, mcp_url) + + return JSONResponse( + { + 'status': 'running', + 'type': 'mcp-server', + 'transport': 'streamable-http', + 'endpoints': { + 'streamableHttp': '/mcp', + }, + } + ) + + async def handle_favicon(_request: Request) -> st.Response: + """Handle favicon.ico requests by redirecting to Apify's favicon.""" + return RedirectResponse(url='https://apify.com/favicon.ico', status_code=301) + + async def handle_oauth_authorization_server(_request: Request) -> st.Response: + """Handle OAuth authorization server well-known endpoint.""" + try: + # Some MCP clients do not follow redirects, so we need to fetch the data and return it directly. + async with httpx.AsyncClient() as client: + response = await client.get('https://api.apify.com/.well-known/oauth-authorization-server') + response.raise_for_status() + data = response.json() + return JSONResponse(data, status_code=200) + except Exception: + logger.exception('Error fetching OAuth authorization server data') + return JSONResponse({'error': 'Failed to fetch OAuth authorization server data'}, status_code=500) + + # ASGI handler for Streamable HTTP connections + async def handle_streamable_http(scope: Scope, receive: Receive, send: Send) -> None: + # Check if this is a GET request from a browser + if scope['method'] == 'GET': + request = Request(scope, receive) + if is_html_browser(request): + server_url = f'https://{request.headers.get("host", "localhost")}' + mcp_url = f'{server_url}/mcp' + response = serve_html_page(server_name, mcp_url) + await response(scope, receive, send) + return + + # For non-browser requests or non-GET requests, delegate to session manager + await session_manager.handle_request(scope, receive, send) + + return Starlette( + debug=True, + routes=[ + Route('/', endpoint=handle_root), + Route('/favicon.ico', endpoint=handle_favicon, methods=['GET']), + Route( + '/.well-known/oauth-authorization-server', + endpoint=handle_oauth_authorization_server, + methods=['GET'], + ), + Mount('/mcp/', app=handle_streamable_http), + ], + lifespan=lifespan, + middleware=[Middleware(McpPathRewriteMiddleware)], + ) + + async def _run_server(self, app: Starlette) -> None: + """Run the Starlette app with uvicorn.""" + config_ = uvicorn.Config(app, host=self.host, port=self.port, log_level='info', access_log=True) + server = uvicorn.Server(config_) + await server.serve() + + async def start(self) -> None: + """Start Starlette app and connect to stdio, Streamable HTTP, or SSE based MCP server.""" + logger.info(f'Starting MCP server with client type: {self.server_type} and config {self.config}') + params: dict = (self.config and self.config.model_dump(exclude_unset=True)) or {} + + if self.server_type == ServerType.STDIO: + # validate config again to prevent mypy errors + config_ = StdioServerParameters.model_validate(self.config) + async with ( + stdio_client(config_) as (read_stream, write_stream), + ClientSession(read_stream, write_stream) as session, + ): + mcp_server = await create_gateway(session, self.actor_charge_function, self.tool_whitelist) + app = await self.create_starlette_app(self.server_name, mcp_server) + await self._run_server(app) + + elif self.server_type == ServerType.SSE: + async with ( + sse_client(**params) as (read_stream, write_stream), + ClientSession(read_stream, write_stream) as session, + ): + mcp_server = await create_gateway(session, self.actor_charge_function, self.tool_whitelist) + app = await self.create_starlette_app(self.server_name, mcp_server) + await self._run_server(app) + + elif self.server_type == ServerType.HTTP: + # HTTP streamable server needs to unpack three parameters + async with ( + streamablehttp_client(**params) as (read_stream, write_stream, _), + ClientSession(read_stream, write_stream) as session, + ): + mcp_server = await create_gateway(session, self.actor_charge_function, self.tool_whitelist) + app = await self.create_starlette_app(self.server_name, mcp_server) + await self._run_server(app) + else: + raise ValueError(f'Unknown server type: {self.server_type}')