|
| 1 | +import { z } from 'zod' |
| 2 | + |
| 3 | +import type { CloudflareDocumentationMCP } from '../index' |
| 4 | + |
| 5 | +// Always return 10 results for simplicity, don't make it configurable |
| 6 | +const TOP_K = 10 |
| 7 | + |
| 8 | +/** |
| 9 | + * Registers the docs search tool with the MCP server |
| 10 | + * @param agent The MCP server instance |
| 11 | + */ |
| 12 | +export function registerDocsTools(agent: CloudflareDocumentationMCP) { |
| 13 | + // Register the worker logs analysis tool by worker name |
| 14 | + agent.server.tool( |
| 15 | + 'search_cloudflare_documentation', |
| 16 | + `Search the Cloudflare documentation. |
| 17 | +
|
| 18 | + This tool should be used to answer any question about Cloudflare products or features, including: |
| 19 | + - Workers, Pages, R2, Images, Stream, D1, Durable Objects, KV, Workflows, Hyperdrive, Queues |
| 20 | + - AutoRAG, Workers AI, Vectorize, AI Gateway, Browser Rendering |
| 21 | + - Zero Trust, Access, Tunnel, Gateway, Browser Isolation, WARP, DDOS, Magic Transit, Magic WAN |
| 22 | + - CDN, Cache, DNS, Zaraz, Argo, Rulesets, Terraform, Account and Billing |
| 23 | +
|
| 24 | + Results are returned as semantically similar chunks to the query. |
| 25 | + `, |
| 26 | + { |
| 27 | + query: z.string(), |
| 28 | + }, |
| 29 | + async ({ query }) => { |
| 30 | + const results = await queryVectorize(agent.env.AI, agent.env.VECTORIZE, query, TOP_K) |
| 31 | + const resultsAsXml = results |
| 32 | + .map((result) => { |
| 33 | + return `<result> |
| 34 | +<url>${result.url}</url> |
| 35 | +<text> |
| 36 | +${result.text} |
| 37 | +</text> |
| 38 | +</result>` |
| 39 | + }) |
| 40 | + .join('\n') |
| 41 | + return { |
| 42 | + content: [{ type: 'text', text: resultsAsXml }], |
| 43 | + } |
| 44 | + } |
| 45 | + ) |
| 46 | +} |
| 47 | + |
| 48 | +async function queryVectorize(ai: Ai, vectorizeIndex: VectorizeIndex, query: string, topK: number) { |
| 49 | + // Recommendation from: https://huggingface.co/BAAI/bge-base-en-v1.5#model-list |
| 50 | + const [queryEmbedding] = await getEmbeddings(ai, [ |
| 51 | + 'Represent this sentence for searching relevant passages: ' + query, |
| 52 | + ]) |
| 53 | + |
| 54 | + const { matches } = await vectorizeIndex.query(queryEmbedding, { |
| 55 | + topK, |
| 56 | + returnMetadata: 'all', |
| 57 | + returnValues: false, |
| 58 | + }) |
| 59 | + |
| 60 | + return matches.map((match, _i) => ({ |
| 61 | + similarity: Math.min(match.score, 1), |
| 62 | + id: match.id, |
| 63 | + url: sourceToUrl(String(match.metadata?.filePath ?? '')), |
| 64 | + text: String(match.metadata?.text ?? ''), |
| 65 | + })) |
| 66 | +} |
| 67 | + |
| 68 | +const TOP_DIR = 'src/content/docs' |
| 69 | +function sourceToUrl(path: string) { |
| 70 | + const prefix = `${TOP_DIR}/` |
| 71 | + return ( |
| 72 | + 'https://developers.cloudflare.com/' + |
| 73 | + (path.startsWith(prefix) ? path.slice(prefix.length) : path) |
| 74 | + .replace(/index\.mdx$/, '') |
| 75 | + .replace(/\.mdx$/, '') |
| 76 | + ) |
| 77 | +} |
| 78 | + |
| 79 | +async function getEmbeddings(ai: Ai, strings: string[]) { |
| 80 | + const response = await doWithRetries(() => |
| 81 | + ai.run('@cf/baai/bge-base-en-v1.5', { |
| 82 | + text: strings, |
| 83 | + // @ts-expect-error pooling not in types yet |
| 84 | + pooling: 'cls', |
| 85 | + }) |
| 86 | + ) |
| 87 | + |
| 88 | + return response.data |
| 89 | +} |
| 90 | + |
| 91 | +/** |
| 92 | + * @template T |
| 93 | + * @param {() => Promise<T>} action |
| 94 | + */ |
| 95 | +async function doWithRetries<T>(action: () => Promise<T>) { |
| 96 | + const NUM_RETRIES = 10 |
| 97 | + const INIT_RETRY_MS = 50 |
| 98 | + for (let i = 0; i <= NUM_RETRIES; i++) { |
| 99 | + try { |
| 100 | + return await action() |
| 101 | + } catch (e) { |
| 102 | + // TODO: distinguish between user errors (4xx) and system errors (5xx) |
| 103 | + console.error(e) |
| 104 | + if (i === NUM_RETRIES) { |
| 105 | + throw e |
| 106 | + } |
| 107 | + // Exponential backoff with full jitter |
| 108 | + await scheduler.wait(Math.random() * INIT_RETRY_MS * Math.pow(2, i)) |
| 109 | + } |
| 110 | + } |
| 111 | + // Should never reach here – last loop iteration should return |
| 112 | + throw new Error('An unknown error occurred') |
| 113 | +} |
0 commit comments