Skip to content

Commit 2258663

Browse files
authored
Add a docs server based on the AI Assistant Vectorize index (#90)
* Add a docs server based on the AI Assistant Vectorize index * Move docs2 -> docs Moved the older docs server to docs-autorag.mcp.cloudflare.com * Add streamable HTTP support to docs server at /mcp * Don't need ts-ignore for serve/serveSSE anymore * Use the newer createApiHandler to get sse+mcp endpoints
1 parent 94351b2 commit 2258663

File tree

14 files changed

+6117
-4
lines changed

14 files changed

+6117
-4
lines changed

apps/docs-autorag/src/index.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { McpAgent } from 'agents/mcp'
22

3+
import { createApiHandler } from '@repo/mcp-common/src/api-handler'
34
import { getEnv } from '@repo/mcp-common/src/env'
45
import { CloudflareMCPServer } from '@repo/mcp-common/src/server'
56

@@ -35,4 +36,4 @@ export class CloudflareDocumentationMCP extends McpAgent<Env, State, Props> {
3536
}
3637
}
3738

38-
export default CloudflareDocumentationMCP.mount('/sse')
39+
export default createApiHandler(CloudflareDocumentationMCP)

apps/docs-autorag/wrangler.jsonc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
"staging": {
5050
"name": "mcp-cloudflare-docs-autorag-staging",
5151
"account_id": "6702657b6aa048cf3081ff3ff3c9c52f",
52-
"routes": [{ "pattern": "docs-staging.mcp.cloudflare.com", "custom_domain": true }],
52+
"routes": [{ "pattern": "docs-autorag-staging.mcp.cloudflare.com", "custom_domain": true }],
5353
"durable_objects": {
5454
"bindings": [
5555
{
@@ -75,7 +75,7 @@
7575
"production": {
7676
"name": "mcp-cloudflare-docs-autorag-production",
7777
"account_id": "6702657b6aa048cf3081ff3ff3c9c52f",
78-
"routes": [{ "pattern": "docs.mcp.cloudflare.com", "custom_domain": true }],
78+
"routes": [{ "pattern": "docs-autorag.mcp.cloudflare.com", "custom_domain": true }],
7979
"durable_objects": {
8080
"bindings": [
8181
{

apps/docs-vectorize/.dev.vars.example

Whitespace-only changes.

apps/docs-vectorize/.eslintrc.cjs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
/** @type {import("eslint").Linter.Config} */
2+
module.exports = {
3+
root: true,
4+
extends: ['@repo/eslint-config/default.cjs'],
5+
}

apps/docs-vectorize/README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Model Context Protocol (MCP) Server + Cloudflare Documentation (via Autorag)
2+
3+
This is a [Model Context Protocol (MCP)](https://modelcontextprotocol.io/introduction) server that supports remote MCP connections. It connects to a Vectorize DB (in this case, indexed w/ the Cloudflare docs)
4+
5+
The Cloudflare account this worker is deployed on already has this Vectorize DB setup and indexed.
6+
7+
## Running locally
8+
9+
```
10+
pnpm run start
11+
```
12+
13+
Then connect to the server via remote MCP at `http://localhost:8976/sse`
14+
15+
## Deploying
16+
17+
```
18+
pnpm run deploy --env [ENVIRONMENT]
19+
```

apps/docs-vectorize/package.json

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
{
2+
"name": "docs-vectorize",
3+
"version": "0.0.1",
4+
"private": true,
5+
"scripts": {
6+
"check:lint": "run-eslint-workers",
7+
"check:types": "run-tsc",
8+
"deploy": "run-wrangler-deploy",
9+
"dev": "wrangler dev --experimental-vectorize-bind-to-prod",
10+
"start": "npm run dev",
11+
"types": "wrangler types --include-env=false",
12+
"test": "vitest run"
13+
},
14+
"dependencies": {
15+
"@cloudflare/workers-oauth-provider": "0.0.3",
16+
"@hono/zod-validator": "0.4.3",
17+
"@modelcontextprotocol/sdk": "1.10.2",
18+
"@repo/mcp-common": "workspace:*",
19+
"@repo/mcp-observability": "workspace:*",
20+
"agents": "0.0.67",
21+
"cloudflare": "4.2.0",
22+
"hono": "4.7.6",
23+
"mime": "4.0.6",
24+
"zod": "3.24.2"
25+
},
26+
"devDependencies": {
27+
"@cloudflare/vitest-pool-workers": "0.8.14",
28+
"@types/node": "22.14.1",
29+
"prettier": "3.5.3",
30+
"typescript": "5.5.4",
31+
"vitest": "3.0.9",
32+
"wrangler": "4.10.0"
33+
}
34+
}

apps/docs-vectorize/src/context.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import type { CloudflareDocumentationMCP } from './index'
2+
3+
export interface Env {
4+
ENVIRONMENT: 'development' | 'staging' | 'production'
5+
MCP_SERVER_NAME: string
6+
MCP_SERVER_VERSION: string
7+
MCP_OBJECT: DurableObjectNamespace<CloudflareDocumentationMCP>
8+
MCP_METRICS: AnalyticsEngineDataset
9+
AI: Ai
10+
VECTORIZE: VectorizeIndex
11+
}

apps/docs-vectorize/src/index.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import { McpAgent } from 'agents/mcp'
2+
3+
import { createApiHandler } from '@repo/mcp-common/src/api-handler'
4+
import { getEnv } from '@repo/mcp-common/src/env'
5+
import { CloudflareMCPServer } from '@repo/mcp-common/src/server'
6+
7+
import { registerDocsTools } from './tools/docs'
8+
9+
import type { Env } from './context'
10+
11+
const env = getEnv<Env>()
12+
13+
// The docs MCP server isn't stateful, so we don't have state/props
14+
export type Props = never
15+
16+
export type State = never
17+
18+
export class CloudflareDocumentationMCP extends McpAgent<Env, State, Props> {
19+
server = new CloudflareMCPServer({
20+
wae: env.MCP_METRICS,
21+
serverInfo: {
22+
name: env.MCP_SERVER_NAME,
23+
version: env.MCP_SERVER_VERSION,
24+
},
25+
})
26+
27+
constructor(
28+
public ctx: DurableObjectState,
29+
public env: Env
30+
) {
31+
super(ctx, env)
32+
}
33+
34+
async init() {
35+
registerDocsTools(this)
36+
}
37+
}
38+
39+
export default createApiHandler(CloudflareDocumentationMCP)

apps/docs-vectorize/src/tools/docs.ts

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import { z } from 'zod'
2+
3+
import type { CloudflareDocumentationMCP } from '../index'
4+
5+
// Always return 10 results for simplicity, don't make it configurable
6+
const TOP_K = 10
7+
8+
/**
9+
* Registers the docs search tool with the MCP server
10+
* @param agent The MCP server instance
11+
*/
12+
export function registerDocsTools(agent: CloudflareDocumentationMCP) {
13+
// Register the worker logs analysis tool by worker name
14+
agent.server.tool(
15+
'search_cloudflare_documentation',
16+
`Search the Cloudflare documentation.
17+
18+
This tool should be used to answer any question about Cloudflare products or features, including:
19+
- Workers, Pages, R2, Images, Stream, D1, Durable Objects, KV, Workflows, Hyperdrive, Queues
20+
- AutoRAG, Workers AI, Vectorize, AI Gateway, Browser Rendering
21+
- Zero Trust, Access, Tunnel, Gateway, Browser Isolation, WARP, DDOS, Magic Transit, Magic WAN
22+
- CDN, Cache, DNS, Zaraz, Argo, Rulesets, Terraform, Account and Billing
23+
24+
Results are returned as semantically similar chunks to the query.
25+
`,
26+
{
27+
query: z.string(),
28+
},
29+
async ({ query }) => {
30+
const results = await queryVectorize(agent.env.AI, agent.env.VECTORIZE, query, TOP_K)
31+
const resultsAsXml = results
32+
.map((result) => {
33+
return `<result>
34+
<url>${result.url}</url>
35+
<text>
36+
${result.text}
37+
</text>
38+
</result>`
39+
})
40+
.join('\n')
41+
return {
42+
content: [{ type: 'text', text: resultsAsXml }],
43+
}
44+
}
45+
)
46+
}
47+
48+
async function queryVectorize(ai: Ai, vectorizeIndex: VectorizeIndex, query: string, topK: number) {
49+
// Recommendation from: https://huggingface.co/BAAI/bge-base-en-v1.5#model-list
50+
const [queryEmbedding] = await getEmbeddings(ai, [
51+
'Represent this sentence for searching relevant passages: ' + query,
52+
])
53+
54+
const { matches } = await vectorizeIndex.query(queryEmbedding, {
55+
topK,
56+
returnMetadata: 'all',
57+
returnValues: false,
58+
})
59+
60+
return matches.map((match, _i) => ({
61+
similarity: Math.min(match.score, 1),
62+
id: match.id,
63+
url: sourceToUrl(String(match.metadata?.filePath ?? '')),
64+
text: String(match.metadata?.text ?? ''),
65+
}))
66+
}
67+
68+
const TOP_DIR = 'src/content/docs'
69+
function sourceToUrl(path: string) {
70+
const prefix = `${TOP_DIR}/`
71+
return (
72+
'https://developers.cloudflare.com/' +
73+
(path.startsWith(prefix) ? path.slice(prefix.length) : path)
74+
.replace(/index\.mdx$/, '')
75+
.replace(/\.mdx$/, '')
76+
)
77+
}
78+
79+
async function getEmbeddings(ai: Ai, strings: string[]) {
80+
const response = await doWithRetries(() =>
81+
ai.run('@cf/baai/bge-base-en-v1.5', {
82+
text: strings,
83+
// @ts-expect-error pooling not in types yet
84+
pooling: 'cls',
85+
})
86+
)
87+
88+
return response.data
89+
}
90+
91+
/**
92+
* @template T
93+
* @param {() => Promise<T>} action
94+
*/
95+
async function doWithRetries<T>(action: () => Promise<T>) {
96+
const NUM_RETRIES = 10
97+
const INIT_RETRY_MS = 50
98+
for (let i = 0; i <= NUM_RETRIES; i++) {
99+
try {
100+
return await action()
101+
} catch (e) {
102+
// TODO: distinguish between user errors (4xx) and system errors (5xx)
103+
console.error(e)
104+
if (i === NUM_RETRIES) {
105+
throw e
106+
}
107+
// Exponential backoff with full jitter
108+
await scheduler.wait(Math.random() * INIT_RETRY_MS * Math.pow(2, i))
109+
}
110+
}
111+
// Should never reach here – last loop iteration should return
112+
throw new Error('An unknown error occurred')
113+
}

apps/docs-vectorize/tsconfig.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"extends": "@repo/typescript-config/workers.json",
3+
"include": ["*/**.ts", "./vitest.config.ts"]
4+
}

0 commit comments

Comments
 (0)