diff --git a/.changeset/social-mammals-chew.md b/.changeset/social-mammals-chew.md new file mode 100644 index 00000000..e6f35b71 --- /dev/null +++ b/.changeset/social-mammals-chew.md @@ -0,0 +1,7 @@ +--- +'workers-observability': minor +'workers-bindings': minor +'docs-vectorize': minor +--- + +Updated the model for docs search to embeddinggemma-300m diff --git a/apps/docs-vectorize/wrangler.jsonc b/apps/docs-vectorize/wrangler.jsonc index bf99ac86..90c8edf1 100644 --- a/apps/docs-vectorize/wrangler.jsonc +++ b/apps/docs-vectorize/wrangler.jsonc @@ -31,7 +31,7 @@ "vectorize": [ { "binding": "VECTORIZE", - "index_name": "docs-bge-base" + "index_name": "docs-embeddinggemma-v1" } ], "vars": { @@ -72,7 +72,7 @@ "vectorize": [ { "binding": "VECTORIZE", - "index_name": "docs-bge-base" + "index_name": "docs-embeddinggemma-v1" } ], "analytics_engine_datasets": [ @@ -103,7 +103,7 @@ "vectorize": [ { "binding": "VECTORIZE", - "index_name": "docs-bge-base" + "index_name": "docs-embeddinggemma-v1" } ], "analytics_engine_datasets": [ diff --git a/apps/workers-bindings/wrangler.jsonc b/apps/workers-bindings/wrangler.jsonc index e171a91e..1e71e5dc 100644 --- a/apps/workers-bindings/wrangler.jsonc +++ b/apps/workers-bindings/wrangler.jsonc @@ -51,7 +51,7 @@ "vectorize": [ { "binding": "VECTORIZE", - "index_name": "docs-bge-base" + "index_name": "docs-embeddinggemma-v1" } ], "workers_dev": false, @@ -92,7 +92,7 @@ "vectorize": [ { "binding": "VECTORIZE", - "index_name": "docs-bge-base" + "index_name": "docs-embeddinggemma-v1" } ], "vars": { @@ -141,7 +141,7 @@ "vectorize": [ { "binding": "VECTORIZE", - "index_name": "docs-bge-base" + "index_name": "docs-embeddinggemma-v1" } ], "analytics_engine_datasets": [ diff --git a/apps/workers-observability/wrangler.jsonc b/apps/workers-observability/wrangler.jsonc index 2eaa2c95..8d60496e 100644 --- a/apps/workers-observability/wrangler.jsonc +++ b/apps/workers-observability/wrangler.jsonc @@ -41,7 +41,7 @@ "vectorize": [ { "binding": "VECTORIZE", - "index_name": "docs-bge-base" + "index_name": "docs-embeddinggemma-v1" } ], "vars": { @@ -98,7 +98,7 @@ "vectorize": [ { "binding": "VECTORIZE", - "index_name": "docs-bge-base" + "index_name": "docs-embeddinggemma-v1" } ], "analytics_engine_datasets": [ @@ -136,7 +136,7 @@ "vectorize": [ { "binding": "VECTORIZE", - "index_name": "docs-bge-base" + "index_name": "docs-embeddinggemma-v1" } ], "vars": { diff --git a/packages/mcp-common/src/tools/docs-vectorize.tools.ts b/packages/mcp-common/src/tools/docs-vectorize.tools.ts index 0079f7ad..0395a777 100644 --- a/packages/mcp-common/src/tools/docs-vectorize.tools.ts +++ b/packages/mcp-common/src/tools/docs-vectorize.tools.ts @@ -42,6 +42,7 @@ export function registerDocsTools(agent: CloudflareMcpAgentNoAccount, env: Requi .map((result) => { return ` ${result.url} +${result.title} ${result.text} @@ -93,10 +94,8 @@ ${result.text} } async function queryVectorize(ai: Ai, vectorizeIndex: VectorizeIndex, query: string, topK: number) { - // Recommendation from: https://huggingface.co/BAAI/bge-base-en-v1.5#model-list - const [queryEmbedding] = await getEmbeddings(ai, [ - 'Represent this sentence for searching relevant passages: ' + query, - ]) + // Recommendation from: https://ai.google.dev/gemma/docs/embeddinggemma/model_card#prompt_instructions + const [queryEmbedding] = await getEmbeddings(ai, ['task: search result | query: ' + query]) const { matches } = await vectorizeIndex.query(queryEmbedding, { topK, @@ -108,6 +107,7 @@ async function queryVectorize(ai: Ai, vectorizeIndex: VectorizeIndex, query: str similarity: Math.min(match.score, 1), id: match.id, url: sourceToUrl(String(match.metadata?.filePath ?? '')), + title: String(match.metadata?.title ?? ''), text: String(match.metadata?.text ?? ''), })) } @@ -123,15 +123,15 @@ function sourceToUrl(path: string) { ) } -async function getEmbeddings(ai: Ai, strings: string[]) { +async function getEmbeddings(ai: Ai, strings: string[]): Promise { const response = await doWithRetries(() => - ai.run('@cf/baai/bge-base-en-v1.5', { + // @ts-expect-error embeddinggemma not in types yet + ai.run('@cf/google/embeddinggemma-300m', { text: strings, - // @ts-expect-error pooling not in types yet - pooling: 'cls', }) ) + // @ts-expect-error embeddinggemma not in types yet return response.data }