diff --git a/python-mcp-crdb-docs/Dockerfile b/python-mcp-crdb-docs/Dockerfile
new file mode 100644
index 00000000000..2108d437bb0
--- /dev/null
+++ b/python-mcp-crdb-docs/Dockerfile
@@ -0,0 +1,12 @@
+FROM python:3.11-slim AS build
+WORKDIR /app
+COPY pyproject.toml README.md ./
+COPY src ./src
+RUN pip install --upgrade pip && pip install .
+
+FROM gcr.io/distroless/python3
+ENV PYTHONPATH=/usr/local/lib/python3.11/site-packages
+WORKDIR /app
+COPY --from=build /usr/local /usr/local
+COPY --from=build /app/src /app/src
+ENTRYPOINT ["python", "-m", "python_mcp_crdb_docs.server"]
diff --git a/python-mcp-crdb-docs/Makefile b/python-mcp-crdb-docs/Makefile
new file mode 100644
index 00000000000..7c7650aa975
--- /dev/null
+++ b/python-mcp-crdb-docs/Makefile
@@ -0,0 +1,16 @@
+.PHONY: install lint test run docker-build
+
+install:
+pip install -e .[dev]
+
+lint:
+python -m compileall src
+
+test:
+pytest
+
+run:
+python -m python_mcp_crdb_docs.server
+
+docker-build:
+docker build -t python-mcp-crdb-docs .
diff --git a/python-mcp-crdb-docs/README.md b/python-mcp-crdb-docs/README.md
new file mode 100644
index 00000000000..2d48b31842b
--- /dev/null
+++ b/python-mcp-crdb-docs/README.md
@@ -0,0 +1,25 @@
+# Python MCP Server for CockroachDB Docs
+
+This package ships a FastMCP-compatible server that exposes CockroachDB documentation via:
+
+* `search_docs` – Algolia-backed search results
+* `get_page` – Fetch Markdown for a docs page, with HTML-to-source discovery
+* `list_versions` – Enumerate available version folders from GitHub
+* `doc://` resource scheme – Direct access to Markdown via version + path
+
+## Local development
+
+```bash
+cd python-mcp-crdb-docs
+pip install -e .[dev]
+pytest
+FASTMCP_LOG_LEVEL=INFO python -m python_mcp_crdb_docs.server
+```
+
+Configuration is handled with environment variables:
+
+* `ALGOLIA_APP_ID` (default `HPNPWALV9D`)
+* `ALGOLIA_SEARCH_KEY` (default search-only key for staging index)
+* `ALGOLIA_INDEX` (default `stage_cockroach_docs`)
+
+All HTTP requests are routed through a hardened client that enforces a 10s timeout, 512KB body limit, and allowlisted domains.
diff --git a/python-mcp-crdb-docs/pyproject.toml b/python-mcp-crdb-docs/pyproject.toml
new file mode 100644
index 00000000000..de30412a1cb
--- /dev/null
+++ b/python-mcp-crdb-docs/pyproject.toml
@@ -0,0 +1,26 @@
+[project]
+name = "python-mcp-crdb-docs"
+version = "0.1.0"
+description = "FastMCP server exposing CockroachDB docs search and fetching"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+ "fastmcp>=0.1.7",
+ "httpx>=0.27.0",
+ "pydantic>=2.7.0",
+ "algoliasearch>=3.0.0",
+]
+
+[project.optional-dependencies]
+dev = [
+ "pytest>=8.2.0",
+ "pytest-asyncio>=0.23.0",
+ "respx>=0.21.1",
+]
+
+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[tool.pytest.ini_options]
+pythonpath = ["src"]
diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/__init__.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/allowlist.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/allowlist.py
new file mode 100644
index 00000000000..325d0d64718
--- /dev/null
+++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/allowlist.py
@@ -0,0 +1,26 @@
+"""Domain allowlist enforcement."""
+
+from __future__ import annotations
+
+from urllib.parse import urlparse
+
+ALLOWED_HOST_SUFFIXES = (
+ ".cockroachlabs.com",
+ ".github.com",
+ ".githubusercontent.com",
+ ".algolia.net",
+ ".algolianet.com",
+)
+
+
+class DomainBlockedError(RuntimeError):
+ """Raised when a URL is outside the allowlist."""
+
+
+def ensure_allowed(url: str) -> None:
+ parsed = urlparse(url)
+ hostname = parsed.hostname or ""
+ for suffix in ALLOWED_HOST_SUFFIXES:
+ if hostname == suffix.lstrip(".") or hostname.endswith(suffix):
+ return
+ raise DomainBlockedError(f"URL host '{hostname}' not in allowlist")
diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/cache.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/cache.py
new file mode 100644
index 00000000000..1a935217a10
--- /dev/null
+++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/cache.py
@@ -0,0 +1,43 @@
+"""Simple TTL-aware LRU cache."""
+
+from __future__ import annotations
+
+import time
+from collections import OrderedDict
+from typing import Generic, Optional, TypeVar
+
+K = TypeVar("K")
+V = TypeVar("V")
+
+
+class TTLCache(Generic[K, V]):
+ def __init__(self, maxsize: int = 128, ttl: float = 60.0) -> None:
+ self.maxsize = maxsize
+ self.ttl = ttl
+ self._data: "OrderedDict[K, tuple[float, V]]" = OrderedDict()
+
+ def _purge(self) -> None:
+ now = time.time()
+ keys_to_delete = [key for key, (expires, _) in self._data.items() if expires < now]
+ for key in keys_to_delete:
+ self._data.pop(key, None)
+ while len(self._data) > self.maxsize:
+ self._data.popitem(last=False)
+
+ def get(self, key: K) -> Optional[V]:
+ self._purge()
+ if key not in self._data:
+ return None
+ expires, value = self._data.pop(key)
+ if expires < time.time():
+ return None
+ self._data[key] = (expires, value)
+ return value
+
+ def set(self, key: K, value: V) -> None:
+ expires = time.time() + self.ttl
+ self._data[key] = (expires, value)
+ self._purge()
+
+ def clear(self) -> None:
+ self._data.clear()
diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/fastmcp_compat.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/fastmcp_compat.py
new file mode 100644
index 00000000000..7e0441b0833
--- /dev/null
+++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/fastmcp_compat.py
@@ -0,0 +1,41 @@
+"""Fallback shim for fastmcp when the real package is unavailable."""
+
+from __future__ import annotations
+
+try: # pragma: no cover
+ from fastmcp import FastMCP, HTTPTransport, ResourceResult, TextResource, ToolResult
+except ModuleNotFoundError: # pragma: no cover - exercised when dependency missing
+ class ToolResult: # type: ignore[override]
+ def __init__(self, content):
+ self.content = content
+
+ class ResourceResult: # type: ignore[override]
+ def __init__(self, content):
+ self.content = content
+
+ class TextResource(ResourceResult): # type: ignore[override]
+ def __init__(self, content: str, mime_type: str = "text/plain"):
+ super().__init__(content)
+ self.mime_type = mime_type
+
+ class FastMCP: # type: ignore[override]
+ def __init__(self, name: str):
+ self.name = name
+ self.tools = {}
+ self.resources = {}
+
+ def add_tool(self, name, func, args_model=None):
+ self.tools[name] = (func, args_model)
+
+ def add_resource(self, name, scheme, handler):
+ self.resources[scheme] = handler
+
+ def run(self, transport=None):
+ raise RuntimeError("fastmcp package not installed; server cannot run")
+
+ class HTTPTransport: # type: ignore[override]
+ def __init__(self, host: str = "127.0.0.1", port: int = 3000):
+ self.host = host
+ self.port = port
+
+__all__ = ["FastMCP", "HTTPTransport", "ToolResult", "ResourceResult", "TextResource"]
diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/fetch.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/fetch.py
new file mode 100644
index 00000000000..500dfc21eb3
--- /dev/null
+++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/fetch.py
@@ -0,0 +1,82 @@
+"""Markdown fetching utilities."""
+
+from __future__ import annotations
+
+import re
+from typing import Optional
+
+from .cache import TTLCache
+from .http import create_client
+from .logging import debug
+from .mapping import build_raw_url
+
+_MD_CACHE = TTLCache[str, str](maxsize=128, ttl=120)
+
+
+async def _fetch_first(urls: list[str]) -> Optional[str]:
+ async with create_client() as client:
+ for url in urls:
+ resp = await client.get(url)
+ if resp.status_code == 200:
+ return resp.text
+ return None
+
+
+async def fetch_markdown_from_raw(version: str, path: str) -> Optional[str]:
+ cache_key = f"raw:{version}:{path}"
+ cached = _MD_CACHE.get(cache_key)
+ if cached:
+ return cached
+ candidates = build_raw_url(version, path)
+ content = await _fetch_first(candidates)
+ if content:
+ _MD_CACHE.set(cache_key, content)
+ return content
+
+
+def _extract_source_href(html: str) -> Optional[str]:
+ anchor_pattern = re.compile(r"]+href=\"([^\"]+)\"[^>]*>(.*?)", re.IGNORECASE | re.DOTALL)
+ tag_pattern = re.compile(r"<[^>]+>")
+ for match in anchor_pattern.finditer(html):
+ text = tag_pattern.sub("", match.group(2)).strip().lower()
+ if "view page source" in text:
+ return match.group(1)
+ return None
+
+
+async def fetch_markdown_from_html_page(url: str) -> Optional[str]:
+ cache_key = f"html:{url}"
+ cached = _MD_CACHE.get(cache_key)
+ if cached:
+ return cached
+ async with create_client() as client:
+ resp = await client.get(url)
+ if resp.status_code != 200:
+ return None
+ href = _extract_source_href(resp.text)
+ if not href:
+ return None
+ source_url = href
+ if source_url.startswith("//"):
+ source_url = f"https:{source_url}"
+ elif source_url.startswith("/"):
+ source_url = f"https://www.cockroachlabs.com{source_url}"
+ raw_candidates = [source_url.replace("/blob/", "/raw/")]
+ content = await _fetch_first(raw_candidates)
+ if content:
+ _MD_CACHE.set(cache_key, content)
+ return content
+
+
+async def list_versions_from_github() -> list[str]:
+ cache_key = "versions"
+ cached = _MD_CACHE.get(cache_key)
+ if cached:
+ return cached.split(",")
+ async with create_client() as client:
+ resp = await client.get("https://api.github.com/repos/cockroachdb/docs/contents/src")
+ resp.raise_for_status()
+ data = resp.json()
+ versions = sorted(item["name"] for item in data if item.get("type") == "dir")
+ _MD_CACHE.set(cache_key, ",".join(versions))
+ return versions
diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/http.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/http.py
new file mode 100644
index 00000000000..021992263f6
--- /dev/null
+++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/http.py
@@ -0,0 +1,32 @@
+"""HTTP client with timeouts, allowlist, and response size guards."""
+
+from __future__ import annotations
+
+import asyncio
+from typing import Any
+
+import httpx
+
+from .allowlist import ensure_allowed
+from .logging import debug
+
+MAX_RESPONSE_BYTES = 512 * 1024
+REQUEST_TIMEOUT = 10.0
+
+
+class SafeAsyncClient(httpx.AsyncClient):
+ async def _request(self, *args: Any, **kwargs: Any) -> httpx.Response: # type: ignore[override]
+ url = str(kwargs.get("url") or args[1])
+ ensure_allowed(url)
+ kwargs.setdefault("timeout", REQUEST_TIMEOUT)
+ debug("http_request", method=args[0] if args else kwargs.get("method"), url=url)
+ response = await super()._request(*args, **kwargs)
+ content = await response.aread()
+ if len(content) > MAX_RESPONSE_BYTES:
+ raise httpx.HTTPStatusError("response too large", request=response.request, response=response)
+ response._content = content # type: ignore[attr-defined]
+ return response
+
+
+def create_client() -> SafeAsyncClient:
+ return SafeAsyncClient(headers={"User-Agent": "python-mcp-crdb-docs/0.1"})
diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/logging.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/logging.py
new file mode 100644
index 00000000000..2b1302fe2fa
--- /dev/null
+++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/logging.py
@@ -0,0 +1,48 @@
+"""Structured logging helpers for the MCP server."""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+import time
+from typing import Any, Dict
+
+_LOG_LEVEL = os.getenv("FASTMCP_LOG_LEVEL", "INFO").upper()
+_LEVELS = {"DEBUG": 10, "INFO": 20, "WARNING": 30, "ERROR": 40}
+
+
+def _should_log(level: str) -> bool:
+ return _LEVELS.get(level, 20) >= _LEVELS.get(_LOG_LEVEL, 20)
+
+
+def log(level: str, message: str, **fields: Any) -> None:
+ """Emit a JSON log line."""
+ if not _should_log(level):
+ return
+ payload: Dict[str, Any] = {
+ "level": level,
+ "message": message,
+ "time": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
+ }
+ if fields:
+ payload.update(fields)
+ json.dump(payload, sys.stdout)
+ sys.stdout.write("\n")
+ sys.stdout.flush()
+
+
+def info(message: str, **fields: Any) -> None:
+ log("INFO", message, **fields)
+
+
+def warning(message: str, **fields: Any) -> None:
+ log("WARNING", message, **fields)
+
+
+def error(message: str, **fields: Any) -> None:
+ log("ERROR", message, **fields)
+
+
+def debug(message: str, **fields: Any) -> None:
+ log("DEBUG", message, **fields)
diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/mapping.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/mapping.py
new file mode 100644
index 00000000000..4e83fdd2575
--- /dev/null
+++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/core/mapping.py
@@ -0,0 +1,42 @@
+"""Mapping helpers between docs URLs and GitHub raw URLs."""
+
+from __future__ import annotations
+
+from urllib.parse import urlparse
+
+BASE_RAW = "https://raw.githubusercontent.com/cockroachdb/docs"
+MAIN_BRANCH = "main"
+
+
+def infer_version_path(url_path: str) -> str:
+ parts = [p for p in url_path.split("/") if p]
+ if len(parts) < 2:
+ return "src/current"
+ if parts[0] != "docs":
+ return "src/current"
+ version = parts[1]
+ remainder = "/".join(parts[2:]) or "index.html"
+ if remainder.endswith(".html"):
+ remainder = remainder[:-5] + ".md"
+ return f"src/{version}/{remainder}"
+
+
+def raw_url_from_github_tree(tree_url: str) -> str:
+ parsed = urlparse(tree_url)
+ parts = [p for p in parsed.path.split("/") if p]
+ if len(parts) < 5:
+ raise ValueError("Unexpected GitHub tree URL")
+ owner, repo, _, branch, *path = parts
+ raw_path = "/".join(path)
+ return f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{'/'.join(path)}"
+
+
+def build_raw_url(version: str, path: str, extension_priority: tuple[str, ...] = (".md", ".mdx")) -> list[str]:
+ base = f"{BASE_RAW}/{MAIN_BRANCH}/src/{version}/{path.strip('/') }"
+ candidates = []
+ if "." in base.split("/")[-1]:
+ candidates.append(base)
+ else:
+ for ext in extension_priority:
+ candidates.append(f"{base}{ext}")
+ return candidates
diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/resources/__init__.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/resources/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/resources/doc_resource.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/resources/doc_resource.py
new file mode 100644
index 00000000000..e25b44ec973
--- /dev/null
+++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/resources/doc_resource.py
@@ -0,0 +1,17 @@
+"""doc:// resource handler."""
+
+from __future__ import annotations
+
+from ..core.fastmcp_compat import ResourceResult, TextResource
+from ..core.fetch import fetch_markdown_from_raw
+
+
+async def resolve_doc_uri(uri: str) -> ResourceResult:
+ _, remainder = uri.split("doc://", 1)
+ version, _, path = remainder.partition("/")
+ if not version or not path:
+ raise ValueError("Resource must look like doc://{version}/{path}")
+ markdown = await fetch_markdown_from_raw(version, path)
+ if markdown is None:
+ raise FileNotFoundError(path)
+ return TextResource(content=markdown, mime_type="text/markdown")
diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/server.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/server.py
new file mode 100644
index 00000000000..583d4cbfe79
--- /dev/null
+++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/server.py
@@ -0,0 +1,27 @@
+"""Entry point for the FastMCP CockroachDB docs server."""
+
+from __future__ import annotations
+
+import os
+
+from .core.fastmcp_compat import FastMCP, HTTPTransport
+
+from .resources.doc_resource import resolve_doc_uri
+from .tools.get_page import GetPageArgs, get_page
+from .tools.list_versions import list_versions
+from .tools.search import SearchArgs, search_docs
+
+mcp = FastMCP("cockroachdb-docs")
+mcp.add_tool("search_docs", search_docs, args_model=SearchArgs)
+mcp.add_tool("get_page", get_page, args_model=GetPageArgs)
+mcp.add_tool("list_versions", list_versions)
+mcp.add_resource("doc", scheme="doc://", handler=resolve_doc_uri)
+
+
+def main() -> None:
+ transport = HTTPTransport(host="0.0.0.0", port=int(os.getenv("PORT", "3000")))
+ mcp.run(transport=transport)
+
+
+if __name__ == "__main__": # pragma: no cover
+ main()
diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/__init__.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/get_page.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/get_page.py
new file mode 100644
index 00000000000..d65a9824856
--- /dev/null
+++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/get_page.py
@@ -0,0 +1,28 @@
+"""get_page tool."""
+
+from __future__ import annotations
+
+from pydantic import BaseModel, Field
+
+from ..core.fastmcp_compat import ToolResult
+from ..core.fetch import fetch_markdown_from_html_page, fetch_markdown_from_raw
+
+
+class GetPageArgs(BaseModel):
+ url: str | None = Field(default=None, description="Docs page URL")
+ version: str | None = Field(default=None, description="Docs version folder, e.g. stable")
+ path: str | None = Field(default=None, description="Path inside version, e.g. sql-statements.md")
+
+
+async def get_page(args: GetPageArgs) -> ToolResult:
+ if args.url:
+ markdown = await fetch_markdown_from_html_page(args.url)
+ resource = args.url
+ elif args.version and args.path:
+ markdown = await fetch_markdown_from_raw(args.version, args.path)
+ resource = f"doc://{args.version}/{args.path}"
+ else:
+ raise ValueError("Either url or version+path must be provided")
+ if markdown is None:
+ raise FileNotFoundError("Unable to fetch Markdown")
+ return ToolResult(content={"resource": resource, "markdown": markdown})
diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/list_versions.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/list_versions.py
new file mode 100644
index 00000000000..2773446cd60
--- /dev/null
+++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/list_versions.py
@@ -0,0 +1,11 @@
+"""list_versions tool."""
+
+from __future__ import annotations
+
+from ..core.fastmcp_compat import ToolResult
+from ..core.fetch import list_versions_from_github
+
+
+async def list_versions() -> ToolResult:
+ versions = await list_versions_from_github()
+ return ToolResult(content={"versions": versions})
diff --git a/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/search.py b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/search.py
new file mode 100644
index 00000000000..c7e67eb5a93
--- /dev/null
+++ b/python-mcp-crdb-docs/src/python_mcp_crdb_docs/tools/search.py
@@ -0,0 +1,61 @@
+"""Algolia search tool."""
+
+from __future__ import annotations
+
+import os
+from typing import Any, Dict, List
+
+try: # pragma: no cover - dependency optional during tests
+ from algoliasearch.search_client import SearchClient
+except ModuleNotFoundError: # pragma: no cover
+ SearchClient = None # type: ignore
+
+import re
+from pydantic import BaseModel, Field
+
+from ..core.fastmcp_compat import ToolResult
+
+ALGOLIA_APP_ID = os.getenv("ALGOLIA_APP_ID", "HPNPWALV9D")
+ALGOLIA_SEARCH_KEY = os.getenv("ALGOLIA_SEARCH_KEY", "efe072446c86303530f568d267385786")
+ALGOLIA_INDEX = os.getenv("ALGOLIA_INDEX", "stage_cockroach_docs")
+
+_client = SearchClient.create(ALGOLIA_APP_ID, ALGOLIA_SEARCH_KEY) if SearchClient else None
+_index = _client.init_index(ALGOLIA_INDEX) if _client else None
+
+
+class SearchArgs(BaseModel):
+ query: str = Field(..., description="Search query")
+ page: int = Field(0, ge=0, description="Algolia pagination page")
+ hits_per_page: int = Field(10, ge=1, le=20)
+
+
+_TAG_PATTERN = re.compile(r"<[^>]+>")
+
+
+def _strip_html(value: str | None) -> str:
+ if not value:
+ return ""
+ return _TAG_PATTERN.sub(" ", value).strip()
+
+
+async def search_docs(args: SearchArgs) -> ToolResult:
+ if _index is None:
+ raise RuntimeError("algoliasearch dependency not installed")
+ res = _index.search(args.query, {"page": args.page, "hitsPerPage": args.hits_per_page})
+ hits: List[Dict[str, Any]] = []
+ for hit in res.get("hits", []):
+ hits.append(
+ {
+ "title": hit.get("hierarchy", {}).get("lvl0") or hit.get("title"),
+ "url": hit.get("url"),
+ "hierarchy": hit.get("hierarchy"),
+ "snippet": _strip_html(hit.get("_snippetResult", {}).get("content", {}).get("value")),
+ }
+ )
+ return ToolResult(
+ content={
+ "hits": hits,
+ "nbHits": res.get("nbHits", 0),
+ "page": res.get("page", 0),
+ }
+ )
diff --git a/python-mcp-crdb-docs/tests/test_algolia.py b/python-mcp-crdb-docs/tests/test_algolia.py
new file mode 100644
index 00000000000..71ae3ae985e
--- /dev/null
+++ b/python-mcp-crdb-docs/tests/test_algolia.py
@@ -0,0 +1,26 @@
+import pytest
+
+from python_mcp_crdb_docs.tools import search
+
+
+@pytest.mark.asyncio
+async def test_search_docs_formats_hits(monkeypatch):
+ class DummyIndex:
+ def search(self, query, params):
+ assert query == "txn"
+ assert params["hitsPerPage"] == 5
+ return {
+ "hits": [
+ {
+ "hierarchy": {"lvl0": "Transactions"},
+ "url": "https://example.com",
+ "_snippetResult": {"content": {"value": "txn docs"}},
+ }
+ ],
+ "nbHits": 1,
+ "page": 0,
+ }
+
+ monkeypatch.setattr(search, "_index", DummyIndex())
+ result = await search.search_docs(search.SearchArgs(query="txn", hits_per_page=5))
+ assert result.content["hits"][0]["snippet"].startswith("txn")
diff --git a/python-mcp-crdb-docs/tests/test_fetch.py b/python-mcp-crdb-docs/tests/test_fetch.py
new file mode 100644
index 00000000000..b721440e4e5
--- /dev/null
+++ b/python-mcp-crdb-docs/tests/test_fetch.py
@@ -0,0 +1,77 @@
+import asyncio
+
+import pytest
+
+from python_mcp_crdb_docs.core import fetch
+
+
+@pytest.mark.asyncio
+async def test_fetch_markdown_from_raw_uses_cache(monkeypatch):
+ calls = 0
+
+ async def fake_fetch(urls):
+ nonlocal calls
+ calls += 1
+ return "data"
+
+ monkeypatch.setattr(fetch, "_fetch_first", fake_fetch)
+ fetch._MD_CACHE.clear()
+ result1 = await fetch.fetch_markdown_from_raw("stable", "foo")
+ result2 = await fetch.fetch_markdown_from_raw("stable", "foo")
+ assert result1 == result2 == "data"
+ assert calls == 1
+
+
+@pytest.mark.asyncio
+async def test_list_versions(monkeypatch):
+ async def fake_client():
+ class Dummy:
+ async def __aenter__(self):
+ return self
+
+ async def __aexit__(self, *exc):
+ return False
+
+ async def get(self, url):
+ class Resp:
+ status_code = 200
+
+ def raise_for_status(self):
+ pass
+
+ def json(self):
+ return [
+ {"type": "dir", "name": "current"},
+ {"type": "file", "name": "README.md"},
+ ]
+
+ return Resp()
+
+ return Dummy()
+
+ class DummyClient:
+ async def __aenter__(self):
+ return self
+
+ async def __aexit__(self, *exc):
+ return False
+
+ async def get(self, url):
+ class Resp:
+ status_code = 200
+
+ def raise_for_status(self):
+ pass
+
+ def json(self):
+ return [
+ {"type": "dir", "name": "current"},
+ {"type": "dir", "name": "stable"},
+ ]
+
+ return Resp()
+
+ monkeypatch.setattr(fetch, "create_client", lambda: DummyClient())
+ fetch._MD_CACHE.clear()
+ versions = await fetch.list_versions_from_github()
+ assert versions == ["current", "stable"]
diff --git a/python-mcp-crdb-docs/tests/test_mapping.py b/python-mcp-crdb-docs/tests/test_mapping.py
new file mode 100644
index 00000000000..c3cddaf2b7d
--- /dev/null
+++ b/python-mcp-crdb-docs/tests/test_mapping.py
@@ -0,0 +1,11 @@
+from python_mcp_crdb_docs.core import mapping
+
+
+def test_build_raw_url_adds_extensions():
+ urls = mapping.build_raw_url("stable", "cockroach-start")
+ assert urls[0].endswith("cockroach-start.md")
+ assert urls[1].endswith("cockroach-start.mdx")
+
+
+def test_infer_version_path_default():
+ assert mapping.infer_version_path("/docs/stable/cockroach-start.html").endswith("cockroach-start.md")