diff --git a/python-mcp-crdb-docs/Dockerfile b/python-mcp-crdb-docs/Dockerfile new file mode 100644 index 00000000000..2108d437bb0 --- /dev/null +++ b/python-mcp-crdb-docs/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.11-slim AS build +WORKDIR /app +COPY pyproject.toml README.md ./ +COPY src ./src +RUN pip install --upgrade pip && pip install . + +FROM gcr.io/distroless/python3 +ENV PYTHONPATH=/usr/local/lib/python3.11/site-packages +WORKDIR /app +COPY --from=build /usr/local /usr/local +COPY --from=build /app/src /app/src +ENTRYPOINT ["python", "-m", "python_mcp_crdb_docs.server"] diff --git a/python-mcp-crdb-docs/Makefile b/python-mcp-crdb-docs/Makefile new file mode 100644 index 00000000000..7c7650aa975 --- /dev/null +++ b/python-mcp-crdb-docs/Makefile @@ -0,0 +1,16 @@ +.PHONY: install lint test run docker-build + +install: +pip install -e .[dev] + +lint: +python -m compileall src + +test: +pytest + +run: +python -m python_mcp_crdb_docs.server + +docker-build: +docker build -t python-mcp-crdb-docs . diff --git a/python-mcp-crdb-docs/README.md b/python-mcp-crdb-docs/README.md new file mode 100644 index 00000000000..2d48b31842b --- /dev/null +++ b/python-mcp-crdb-docs/README.md @@ -0,0 +1,25 @@ +# Python MCP Server for CockroachDB Docs + +This package ships a FastMCP-compatible server that exposes CockroachDB documentation via: + +* `search_docs` – Algolia-backed search results +* `get_page` – Fetch Markdown for a docs page, with HTML-to-source discovery +* `list_versions` – Enumerate available version folders from GitHub +* `doc://` resource scheme – Direct access to Markdown via version + path + +## Local development + +```bash +cd python-mcp-crdb-docs +pip install -e .[dev] +pytest +FASTMCP_LOG_LEVEL=INFO python -m python_mcp_crdb_docs.server +``` + +Configuration is handled with environment variables: + +* `ALGOLIA_APP_ID` (default `HPNPWALV9D`) +* `ALGOLIA_SEARCH_KEY` (default search-only key for staging index) +* `ALGOLIA_INDEX` (default `stage_cockroach_docs`) + +All HTTP requests are routed through a hardened client that enforces a 10s timeout, 512KB body limit, and allowlisted domains. diff --git a/python-mcp-crdb-docs/pyproject.toml b/python-mcp-crdb-docs/pyproject.toml new file mode 100644 index 00000000000..de30412a1cb --- /dev/null +++ b/python-mcp-crdb-docs/pyproject.toml @@ -0,0 +1,26 @@ +[project] +name = "python-mcp-crdb-docs" +version = "0.1.0" +description = "FastMCP server exposing CockroachDB docs search and fetching" +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "fastmcp>=0.1.7", + "httpx>=0.27.0", + "pydantic>=2.7.0", + "algoliasearch>=3.0.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.2.0", + "pytest-asyncio>=0.23.0", + "respx>=0.21.1", +] + +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.pytest.ini_options] +pythonpath = ["src"] diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/__init__.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/allowlist.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/allowlist.py new file mode 100644 index 00000000000..325d0d64718 --- /dev/null +++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/allowlist.py @@ -0,0 +1,26 @@ +"""Domain allowlist enforcement.""" + +from __future__ import annotations + +from urllib.parse import urlparse + +ALLOWED_HOST_SUFFIXES = ( + ".cockroachlabs.com", + ".github.com", + ".githubusercontent.com", + ".algolia.net", + ".algolianet.com", +) + + +class DomainBlockedError(RuntimeError): + """Raised when a URL is outside the allowlist.""" + + +def ensure_allowed(url: str) -> None: + parsed = urlparse(url) + hostname = parsed.hostname or "" + for suffix in ALLOWED_HOST_SUFFIXES: + if hostname == suffix.lstrip(".") or hostname.endswith(suffix): + return + raise DomainBlockedError(f"URL host '{hostname}' not in allowlist") diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/cache.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/cache.py new file mode 100644 index 00000000000..1a935217a10 --- /dev/null +++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/cache.py @@ -0,0 +1,43 @@ +"""Simple TTL-aware LRU cache.""" + +from __future__ import annotations + +import time +from collections import OrderedDict +from typing import Generic, Optional, TypeVar + +K = TypeVar("K") +V = TypeVar("V") + + +class TTLCache(Generic[K, V]): + def __init__(self, maxsize: int = 128, ttl: float = 60.0) -> None: + self.maxsize = maxsize + self.ttl = ttl + self._data: "OrderedDict[K, tuple[float, V]]" = OrderedDict() + + def _purge(self) -> None: + now = time.time() + keys_to_delete = [key for key, (expires, _) in self._data.items() if expires < now] + for key in keys_to_delete: + self._data.pop(key, None) + while len(self._data) > self.maxsize: + self._data.popitem(last=False) + + def get(self, key: K) -> Optional[V]: + self._purge() + if key not in self._data: + return None + expires, value = self._data.pop(key) + if expires < time.time(): + return None + self._data[key] = (expires, value) + return value + + def set(self, key: K, value: V) -> None: + expires = time.time() + self.ttl + self._data[key] = (expires, value) + self._purge() + + def clear(self) -> None: + self._data.clear() diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/fastmcp_compat.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/fastmcp_compat.py new file mode 100644 index 00000000000..7e0441b0833 --- /dev/null +++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/fastmcp_compat.py @@ -0,0 +1,41 @@ +"""Fallback shim for fastmcp when the real package is unavailable.""" + +from __future__ import annotations + +try: # pragma: no cover + from fastmcp import FastMCP, HTTPTransport, ResourceResult, TextResource, ToolResult +except ModuleNotFoundError: # pragma: no cover - exercised when dependency missing + class ToolResult: # type: ignore[override] + def __init__(self, content): + self.content = content + + class ResourceResult: # type: ignore[override] + def __init__(self, content): + self.content = content + + class TextResource(ResourceResult): # type: ignore[override] + def __init__(self, content: str, mime_type: str = "text/plain"): + super().__init__(content) + self.mime_type = mime_type + + class FastMCP: # type: ignore[override] + def __init__(self, name: str): + self.name = name + self.tools = {} + self.resources = {} + + def add_tool(self, name, func, args_model=None): + self.tools[name] = (func, args_model) + + def add_resource(self, name, scheme, handler): + self.resources[scheme] = handler + + def run(self, transport=None): + raise RuntimeError("fastmcp package not installed; server cannot run") + + class HTTPTransport: # type: ignore[override] + def __init__(self, host: str = "127.0.0.1", port: int = 3000): + self.host = host + self.port = port + +__all__ = ["FastMCP", "HTTPTransport", "ToolResult", "ResourceResult", "TextResource"] diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/fetch.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/fetch.py new file mode 100644 index 00000000000..500dfc21eb3 --- /dev/null +++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/fetch.py @@ -0,0 +1,82 @@ +"""Markdown fetching utilities.""" + +from __future__ import annotations + +import re +from typing import Optional + +from .cache import TTLCache +from .http import create_client +from .logging import debug +from .mapping import build_raw_url + +_MD_CACHE = TTLCache[str, str](maxsize=128, ttl=120) + + +async def _fetch_first(urls: list[str]) -> Optional[str]: + async with create_client() as client: + for url in urls: + resp = await client.get(url) + if resp.status_code == 200: + return resp.text + return None + + +async def fetch_markdown_from_raw(version: str, path: str) -> Optional[str]: + cache_key = f"raw:{version}:{path}" + cached = _MD_CACHE.get(cache_key) + if cached: + return cached + candidates = build_raw_url(version, path) + content = await _fetch_first(candidates) + if content: + _MD_CACHE.set(cache_key, content) + return content + + +def _extract_source_href(html: str) -> Optional[str]: + anchor_pattern = re.compile(r"]+href=\"([^\"]+)\"[^>]*>(.*?)", re.IGNORECASE | re.DOTALL) + tag_pattern = re.compile(r"<[^>]+>") + for match in anchor_pattern.finditer(html): + text = tag_pattern.sub("", match.group(2)).strip().lower() + if "view page source" in text: + return match.group(1) + return None + + +async def fetch_markdown_from_html_page(url: str) -> Optional[str]: + cache_key = f"html:{url}" + cached = _MD_CACHE.get(cache_key) + if cached: + return cached + async with create_client() as client: + resp = await client.get(url) + if resp.status_code != 200: + return None + href = _extract_source_href(resp.text) + if not href: + return None + source_url = href + if source_url.startswith("//"): + source_url = f"https:{source_url}" + elif source_url.startswith("/"): + source_url = f"https://www.cockroachlabs.com{source_url}" + raw_candidates = [source_url.replace("/blob/", "/raw/")] + content = await _fetch_first(raw_candidates) + if content: + _MD_CACHE.set(cache_key, content) + return content + + +async def list_versions_from_github() -> list[str]: + cache_key = "versions" + cached = _MD_CACHE.get(cache_key) + if cached: + return cached.split(",") + async with create_client() as client: + resp = await client.get("https://api.github.com/repos/cockroachdb/docs/contents/src") + resp.raise_for_status() + data = resp.json() + versions = sorted(item["name"] for item in data if item.get("type") == "dir") + _MD_CACHE.set(cache_key, ",".join(versions)) + return versions diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/http.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/http.py new file mode 100644 index 00000000000..021992263f6 --- /dev/null +++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/http.py @@ -0,0 +1,32 @@ +"""HTTP client with timeouts, allowlist, and response size guards.""" + +from __future__ import annotations + +import asyncio +from typing import Any + +import httpx + +from .allowlist import ensure_allowed +from .logging import debug + +MAX_RESPONSE_BYTES = 512 * 1024 +REQUEST_TIMEOUT = 10.0 + + +class SafeAsyncClient(httpx.AsyncClient): + async def _request(self, *args: Any, **kwargs: Any) -> httpx.Response: # type: ignore[override] + url = str(kwargs.get("url") or args[1]) + ensure_allowed(url) + kwargs.setdefault("timeout", REQUEST_TIMEOUT) + debug("http_request", method=args[0] if args else kwargs.get("method"), url=url) + response = await super()._request(*args, **kwargs) + content = await response.aread() + if len(content) > MAX_RESPONSE_BYTES: + raise httpx.HTTPStatusError("response too large", request=response.request, response=response) + response._content = content # type: ignore[attr-defined] + return response + + +def create_client() -> SafeAsyncClient: + return SafeAsyncClient(headers={"User-Agent": "python-mcp-crdb-docs/0.1"}) diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/logging.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/logging.py new file mode 100644 index 00000000000..2b1302fe2fa --- /dev/null +++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/logging.py @@ -0,0 +1,48 @@ +"""Structured logging helpers for the MCP server.""" + +from __future__ import annotations + +import json +import os +import sys +import time +from typing import Any, Dict + +_LOG_LEVEL = os.getenv("FASTMCP_LOG_LEVEL", "INFO").upper() +_LEVELS = {"DEBUG": 10, "INFO": 20, "WARNING": 30, "ERROR": 40} + + +def _should_log(level: str) -> bool: + return _LEVELS.get(level, 20) >= _LEVELS.get(_LOG_LEVEL, 20) + + +def log(level: str, message: str, **fields: Any) -> None: + """Emit a JSON log line.""" + if not _should_log(level): + return + payload: Dict[str, Any] = { + "level": level, + "message": message, + "time": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + } + if fields: + payload.update(fields) + json.dump(payload, sys.stdout) + sys.stdout.write("\n") + sys.stdout.flush() + + +def info(message: str, **fields: Any) -> None: + log("INFO", message, **fields) + + +def warning(message: str, **fields: Any) -> None: + log("WARNING", message, **fields) + + +def error(message: str, **fields: Any) -> None: + log("ERROR", message, **fields) + + +def debug(message: str, **fields: Any) -> None: + log("DEBUG", message, **fields) diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/mapping.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/mapping.py new file mode 100644 index 00000000000..4e83fdd2575 --- /dev/null +++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/mapping.py @@ -0,0 +1,42 @@ +"""Mapping helpers between docs URLs and GitHub raw URLs.""" + +from __future__ import annotations + +from urllib.parse import urlparse + +BASE_RAW = "https://raw.githubusercontent.com/cockroachdb/docs" +MAIN_BRANCH = "main" + + +def infer_version_path(url_path: str) -> str: + parts = [p for p in url_path.split("/") if p] + if len(parts) < 2: + return "src/current" + if parts[0] != "docs": + return "src/current" + version = parts[1] + remainder = "/".join(parts[2:]) or "index.html" + if remainder.endswith(".html"): + remainder = remainder[:-5] + ".md" + return f"src/{version}/{remainder}" + + +def raw_url_from_github_tree(tree_url: str) -> str: + parsed = urlparse(tree_url) + parts = [p for p in parsed.path.split("/") if p] + if len(parts) < 5: + raise ValueError("Unexpected GitHub tree URL") + owner, repo, _, branch, *path = parts + raw_path = "/".join(path) + return f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{'/'.join(path)}" + + +def build_raw_url(version: str, path: str, extension_priority: tuple[str, ...] = (".md", ".mdx")) -> list[str]: + base = f"{BASE_RAW}/{MAIN_BRANCH}/src/{version}/{path.strip('/') }" + candidates = [] + if "." in base.split("/")[-1]: + candidates.append(base) + else: + for ext in extension_priority: + candidates.append(f"{base}{ext}") + return candidates diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/resources/__init__.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/resources/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/resources/doc_resource.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/resources/doc_resource.py new file mode 100644 index 00000000000..e25b44ec973 --- /dev/null +++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/resources/doc_resource.py @@ -0,0 +1,17 @@ +"""doc:// resource handler.""" + +from __future__ import annotations + +from ..core.fastmcp_compat import ResourceResult, TextResource +from ..core.fetch import fetch_markdown_from_raw + + +async def resolve_doc_uri(uri: str) -> ResourceResult: + _, remainder = uri.split("doc://", 1) + version, _, path = remainder.partition("/") + if not version or not path: + raise ValueError("Resource must look like doc://{version}/{path}") + markdown = await fetch_markdown_from_raw(version, path) + if markdown is None: + raise FileNotFoundError(path) + return TextResource(content=markdown, mime_type="text/markdown") diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/server.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/server.py new file mode 100644 index 00000000000..583d4cbfe79 --- /dev/null +++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/server.py @@ -0,0 +1,27 @@ +"""Entry point for the FastMCP CockroachDB docs server.""" + +from __future__ import annotations + +import os + +from .core.fastmcp_compat import FastMCP, HTTPTransport + +from .resources.doc_resource import resolve_doc_uri +from .tools.get_page import GetPageArgs, get_page +from .tools.list_versions import list_versions +from .tools.search import SearchArgs, search_docs + +mcp = FastMCP("cockroachdb-docs") +mcp.add_tool("search_docs", search_docs, args_model=SearchArgs) +mcp.add_tool("get_page", get_page, args_model=GetPageArgs) +mcp.add_tool("list_versions", list_versions) +mcp.add_resource("doc", scheme="doc://", handler=resolve_doc_uri) + + +def main() -> None: + transport = HTTPTransport(host="0.0.0.0", port=int(os.getenv("PORT", "3000"))) + mcp.run(transport=transport) + + +if __name__ == "__main__": # pragma: no cover + main() diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/__init__.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/get_page.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/get_page.py new file mode 100644 index 00000000000..d65a9824856 --- /dev/null +++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/get_page.py @@ -0,0 +1,28 @@ +"""get_page tool.""" + +from __future__ import annotations + +from pydantic import BaseModel, Field + +from ..core.fastmcp_compat import ToolResult +from ..core.fetch import fetch_markdown_from_html_page, fetch_markdown_from_raw + + +class GetPageArgs(BaseModel): + url: str | None = Field(default=None, description="Docs page URL") + version: str | None = Field(default=None, description="Docs version folder, e.g. stable") + path: str | None = Field(default=None, description="Path inside version, e.g. sql-statements.md") + + +async def get_page(args: GetPageArgs) -> ToolResult: + if args.url: + markdown = await fetch_markdown_from_html_page(args.url) + resource = args.url + elif args.version and args.path: + markdown = await fetch_markdown_from_raw(args.version, args.path) + resource = f"doc://{args.version}/{args.path}" + else: + raise ValueError("Either url or version+path must be provided") + if markdown is None: + raise FileNotFoundError("Unable to fetch Markdown") + return ToolResult(content={"resource": resource, "markdown": markdown}) diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/list_versions.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/list_versions.py new file mode 100644 index 00000000000..2773446cd60 --- /dev/null +++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/list_versions.py @@ -0,0 +1,11 @@ +"""list_versions tool.""" + +from __future__ import annotations + +from ..core.fastmcp_compat import ToolResult +from ..core.fetch import list_versions_from_github + + +async def list_versions() -> ToolResult: + versions = await list_versions_from_github() + return ToolResult(content={"versions": versions}) diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/search.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/search.py new file mode 100644 index 00000000000..c7e67eb5a93 --- /dev/null +++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/search.py @@ -0,0 +1,61 @@ +"""Algolia search tool.""" + +from __future__ import annotations + +import os +from typing import Any, Dict, List + +try: # pragma: no cover - dependency optional during tests + from algoliasearch.search_client import SearchClient +except ModuleNotFoundError: # pragma: no cover + SearchClient = None # type: ignore + +import re +from pydantic import BaseModel, Field + +from ..core.fastmcp_compat import ToolResult + +ALGOLIA_APP_ID = os.getenv("ALGOLIA_APP_ID", "HPNPWALV9D") +ALGOLIA_SEARCH_KEY = os.getenv("ALGOLIA_SEARCH_KEY", "efe072446c86303530f568d267385786") +ALGOLIA_INDEX = os.getenv("ALGOLIA_INDEX", "stage_cockroach_docs") + +_client = SearchClient.create(ALGOLIA_APP_ID, ALGOLIA_SEARCH_KEY) if SearchClient else None +_index = _client.init_index(ALGOLIA_INDEX) if _client else None + + +class SearchArgs(BaseModel): + query: str = Field(..., description="Search query") + page: int = Field(0, ge=0, description="Algolia pagination page") + hits_per_page: int = Field(10, ge=1, le=20) + + +_TAG_PATTERN = re.compile(r"<[^>]+>") + + +def _strip_html(value: str | None) -> str: + if not value: + return "" + return _TAG_PATTERN.sub(" ", value).strip() + + +async def search_docs(args: SearchArgs) -> ToolResult: + if _index is None: + raise RuntimeError("algoliasearch dependency not installed") + res = _index.search(args.query, {"page": args.page, "hitsPerPage": args.hits_per_page}) + hits: List[Dict[str, Any]] = [] + for hit in res.get("hits", []): + hits.append( + { + "title": hit.get("hierarchy", {}).get("lvl0") or hit.get("title"), + "url": hit.get("url"), + "hierarchy": hit.get("hierarchy"), + "snippet": _strip_html(hit.get("_snippetResult", {}).get("content", {}).get("value")), + } + ) + return ToolResult( + content={ + "hits": hits, + "nbHits": res.get("nbHits", 0), + "page": res.get("page", 0), + } + ) diff --git a/python-mcp-crdb-docs/tests/test_algolia.py b/python-mcp-crdb-docs/tests/test_algolia.py new file mode 100644 index 00000000000..71ae3ae985e --- /dev/null +++ b/python-mcp-crdb-docs/tests/test_algolia.py @@ -0,0 +1,26 @@ +import pytest + +from python_mcp_crdb_docs.tools import search + + +@pytest.mark.asyncio +async def test_search_docs_formats_hits(monkeypatch): + class DummyIndex: + def search(self, query, params): + assert query == "txn" + assert params["hitsPerPage"] == 5 + return { + "hits": [ + { + "hierarchy": {"lvl0": "Transactions"}, + "url": "https://example.com", + "_snippetResult": {"content": {"value": "txn docs"}}, + } + ], + "nbHits": 1, + "page": 0, + } + + monkeypatch.setattr(search, "_index", DummyIndex()) + result = await search.search_docs(search.SearchArgs(query="txn", hits_per_page=5)) + assert result.content["hits"][0]["snippet"].startswith("txn") diff --git a/python-mcp-crdb-docs/tests/test_fetch.py b/python-mcp-crdb-docs/tests/test_fetch.py new file mode 100644 index 00000000000..b721440e4e5 --- /dev/null +++ b/python-mcp-crdb-docs/tests/test_fetch.py @@ -0,0 +1,77 @@ +import asyncio + +import pytest + +from python_mcp_crdb_docs.core import fetch + + +@pytest.mark.asyncio +async def test_fetch_markdown_from_raw_uses_cache(monkeypatch): + calls = 0 + + async def fake_fetch(urls): + nonlocal calls + calls += 1 + return "data" + + monkeypatch.setattr(fetch, "_fetch_first", fake_fetch) + fetch._MD_CACHE.clear() + result1 = await fetch.fetch_markdown_from_raw("stable", "foo") + result2 = await fetch.fetch_markdown_from_raw("stable", "foo") + assert result1 == result2 == "data" + assert calls == 1 + + +@pytest.mark.asyncio +async def test_list_versions(monkeypatch): + async def fake_client(): + class Dummy: + async def __aenter__(self): + return self + + async def __aexit__(self, *exc): + return False + + async def get(self, url): + class Resp: + status_code = 200 + + def raise_for_status(self): + pass + + def json(self): + return [ + {"type": "dir", "name": "current"}, + {"type": "file", "name": "README.md"}, + ] + + return Resp() + + return Dummy() + + class DummyClient: + async def __aenter__(self): + return self + + async def __aexit__(self, *exc): + return False + + async def get(self, url): + class Resp: + status_code = 200 + + def raise_for_status(self): + pass + + def json(self): + return [ + {"type": "dir", "name": "current"}, + {"type": "dir", "name": "stable"}, + ] + + return Resp() + + monkeypatch.setattr(fetch, "create_client", lambda: DummyClient()) + fetch._MD_CACHE.clear() + versions = await fetch.list_versions_from_github() + assert versions == ["current", "stable"] diff --git a/python-mcp-crdb-docs/tests/test_mapping.py b/python-mcp-crdb-docs/tests/test_mapping.py new file mode 100644 index 00000000000..c3cddaf2b7d --- /dev/null +++ b/python-mcp-crdb-docs/tests/test_mapping.py @@ -0,0 +1,11 @@ +from python_mcp_crdb_docs.core import mapping + + +def test_build_raw_url_adds_extensions(): + urls = mapping.build_raw_url("stable", "cockroach-start") + assert urls[0].endswith("cockroach-start.md") + assert urls[1].endswith("cockroach-start.mdx") + + +def test_infer_version_path_default(): + assert mapping.infer_version_path("/docs/stable/cockroach-start.html").endswith("cockroach-start.md")