From 194a537f3be31a53d7ae5d9a1f9263c101d3de90 Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Thu, 17 Oct 2024 14:29:47 +0530 Subject: [PATCH 1/3] VS-284: Enable query by id via Wrangler --- .changeset/happy-scissors-deliver.md | 5 + .../src/__tests__/vectorize/vectorize.test.ts | 192 ++++++++---------- packages/wrangler/src/vectorize/client.ts | 24 ++- packages/wrangler/src/vectorize/query.ts | 38 +++- 4 files changed, 142 insertions(+), 117 deletions(-) create mode 100644 .changeset/happy-scissors-deliver.md diff --git a/.changeset/happy-scissors-deliver.md b/.changeset/happy-scissors-deliver.md new file mode 100644 index 000000000000..2cf4b5304f6b --- /dev/null +++ b/.changeset/happy-scissors-deliver.md @@ -0,0 +1,5 @@ +--- +"wrangler": minor +--- + +feat: [VS-284] Enable Vectorize query by id via Wrangler diff --git a/packages/wrangler/src/__tests__/vectorize/vectorize.test.ts b/packages/wrangler/src/__tests__/vectorize/vectorize.test.ts index bb87fdac5ef0..9ed1057cfdf0 100644 --- a/packages/wrangler/src/__tests__/vectorize/vectorize.test.ts +++ b/packages/wrangler/src/__tests__/vectorize/vectorize.test.ts @@ -170,7 +170,8 @@ describe("vectorize help", () => { -v, --version Show version number [boolean] OPTIONS - --vector Vector to query the Vectorize Index [array] [required] + --vector Vector to query the Vectorize Index [array] + --vector-id Identifier for a vector in the index against which the index should be queried [string] --top-k The number of results (nearest neighbors) to return [number] [default: 5] --return-values Specify if the vector values should be included in the results [boolean] [default: false] --return-metadata Specify if the vector metadata should be included in the results [string] [choices: \\"all\\", \\"indexed\\", \\"none\\"] [default: \\"none\\"] @@ -514,43 +515,17 @@ describe("vectorize commands", () => { await runWrangler( "vectorize query test-index --vector 1 2 3 '4' 1.5 '2.6' a 'b' null 7 abc 8 undefined" ); - expect(std.out).toMatchInlineSnapshot(` - "📋 Searching for relevant vectors... -{ - \\"count\\": 2, - \\"matches\\": [ - { - \\"id\\": \\"a\\", - \\"score\\": 0.5, - \\"values\\": [ - 1, - 2, - 3, - 4 - ], - \\"namespace\\": \\"abcd\\", - \\"metadata\\": { - \\"a\\": true, - \\"b\\": 123 - } - }, - { - \\"id\\": \\"b\\", - \\"score\\": 0.75, - \\"values\\": [ - 5, - 6, - 7, - 8 - ], - \\"metadata\\": { - \\"c\\": false, - \\"b\\": \\"123\\" - } - } - ] -}" - `); + expect(std.out).toMatchInlineSnapshot(querySnapshot); + }); + + it("should handle a query with a vector-id", async () => { + mockVectorizeV2Request(); + await runWrangler("vectorize query test-index --vector-id some-vector-id"); + expect(std.out).toMatchInlineSnapshot(querySnapshot); + + // No warning or error + expect(std.warn).toMatchInlineSnapshot(`""`); + expect(std.err).toMatchInlineSnapshot(`""`); }); it("should handle a query on a vectorize index with all options", async () => { @@ -558,43 +533,7 @@ describe("vectorize commands", () => { await runWrangler( `vectorize query test-index --vector 1 2 3 '4' --top-k=2 --return-values=true --return-metadata=indexed --namespace=abc --filter '{ "p1": "abc", "p2": { "$ne": true }, "p3": 10, "p4": false, "nested.p5": "abcd" }'` ); - expect(std.out).toMatchInlineSnapshot(` - "📋 Searching for relevant vectors... -{ - \\"count\\": 2, - \\"matches\\": [ - { - \\"id\\": \\"a\\", - \\"score\\": 0.5, - \\"values\\": [ - 1, - 2, - 3, - 4 - ], - \\"namespace\\": \\"abcd\\", - \\"metadata\\": { - \\"a\\": true, - \\"b\\": 123 - } - }, - { - \\"id\\": \\"b\\", - \\"score\\": 0.75, - \\"values\\": [ - 5, - 6, - 7, - 8 - ], - \\"metadata\\": { - \\"c\\": false, - \\"b\\": \\"123\\" - } - } - ] -}" - `); + expect(std.out).toMatchInlineSnapshot(querySnapshot); // No warning > Valid filter expect(std.warn).toMatchInlineSnapshot(`""`); @@ -605,43 +544,7 @@ describe("vectorize commands", () => { await runWrangler( "vectorize query test-index --vector 1 2 3 '4' --filter='{ 'p1': [1,2,3] }'" ); - expect(std.out).toMatchInlineSnapshot(` - "📋 Searching for relevant vectors... -{ - \\"count\\": 2, - \\"matches\\": [ - { - \\"id\\": \\"a\\", - \\"score\\": 0.5, - \\"values\\": [ - 1, - 2, - 3, - 4 - ], - \\"namespace\\": \\"abcd\\", - \\"metadata\\": { - \\"a\\": true, - \\"b\\": 123 - } - }, - { - \\"id\\": \\"b\\", - \\"score\\": 0.75, - \\"values\\": [ - 5, - 6, - 7, - 8 - ], - \\"metadata\\": { - \\"c\\": false, - \\"b\\": \\"123\\" - } - } - ] -}" - `); + expect(std.out).toMatchInlineSnapshot(querySnapshot); expect(std.warn).toMatchInlineSnapshot(` "▲ [WARNING] 🚨 Invalid query filter. Please use the recommended format. @@ -660,6 +563,34 @@ describe("vectorize commands", () => { expect(std.warn).toMatchInlineSnapshot(` "▲ [WARNING] Could not find any relevant vectors +" + `); + }); + + it("should fail query when neither vector nor vector-id is provided", async () => { + mockVectorizeV2RequestError(); + await runWrangler( + "vectorize query test-index --top-k=2 --return-values=true" + ); + expect(std.out).toMatchInlineSnapshot(`""`); + + expect(std.err).toMatchInlineSnapshot(` + "X [ERROR] 🚨 Either vector or vector-id param must be provided, but not both. + +" + `); + }); + + it("should fail query when both vector and vector-id are provided", async () => { + mockVectorizeV2RequestError(); + await runWrangler( + "vectorize query test-index --vector 1 2 3 '4' --vector-id some-vector-id" + ); + expect(std.out).toMatchInlineSnapshot(`""`); + + expect(std.err).toMatchInlineSnapshot(` + "X [ERROR] 🚨 Either vector or vector-id param must be provided, but not both. + " `); }); @@ -857,6 +788,43 @@ describe("vectorize query filter", () => { }); }); +const querySnapshot = ` + "📋 Searching for relevant vectors... +{ + \\"count\\": 2, + \\"matches\\": [ + { + \\"id\\": \\"a\\", + \\"score\\": 0.5, + \\"values\\": [ + 1, + 2, + 3, + 4 + ], + \\"namespace\\": \\"abcd\\", + \\"metadata\\": { + \\"a\\": true, + \\"b\\": 123 + } + }, + { + \\"id\\": \\"b\\", + \\"score\\": 0.75, + \\"values\\": [ + 5, + 6, + 7, + 8 + ], + \\"metadata\\": { + \\"c\\": false, + \\"b\\": \\"123\\" + } + } + ] +}"`; + /** Create a mock handler for the Vectorize API */ function mockVectorizeRequest() { msw.use( diff --git a/packages/wrangler/src/vectorize/client.ts b/packages/wrangler/src/vectorize/client.ts index 63d0af18c481..71eeaf4db40d 100644 --- a/packages/wrangler/src/vectorize/client.ts +++ b/packages/wrangler/src/vectorize/client.ts @@ -131,7 +131,7 @@ export async function upsertIntoIndex( ); } -export async function queryIndex( +export async function queryIndexByVector( config: Config, indexName: string, vector: VectorFloatArray | number[], @@ -153,6 +153,28 @@ export async function queryIndex( ); } +export async function queryIndexByVectorId( + config: Config, + indexName: string, + vectorId: string, + options: VectorizeQueryOptions +): Promise { + const accountId = await requireAuth(config); + return await fetchResult( + `/accounts/${accountId}/vectorize/v2/indexes/${indexName}/query`, + { + method: "POST", + headers: { + "content-type": jsonContentType, + }, + body: JSON.stringify({ + ...options, + vectorId, + }), + } + ); +} + export async function getByIds( config: Config, indexName: string, diff --git a/packages/wrangler/src/vectorize/query.ts b/packages/wrangler/src/vectorize/query.ts index 8043bf546ae7..5701167c97bc 100644 --- a/packages/wrangler/src/vectorize/query.ts +++ b/packages/wrangler/src/vectorize/query.ts @@ -1,12 +1,13 @@ import { readConfig } from "../config"; import { logger } from "../logger"; -import { queryIndex } from "./client"; +import { queryIndexByVector, queryIndexByVectorId } from "./client"; import { vectorizeBetaWarning } from "./common"; import type { CommonYargsArgv, StrictYargsOptionsToInterface, } from "../yargs-types"; import type { + VectorizeMatches, VectorizeMetadataFilterInnerValue, VectorizeMetadataFilterValue, VectorizeMetadataRetrievalLevel, @@ -26,7 +27,6 @@ export function options(yargs: CommonYargsArgv) { .options({ vector: { type: "array", - demandOption: true, describe: "Vector to query the Vectorize Index", coerce: (arg: unknown[]) => arg @@ -38,6 +38,11 @@ export function options(yargs: CommonYargsArgv) { typeof value === "number" && !isNaN(value) ), }, + "vector-id": { + type: "string", + describe: + "Identifier for a vector in the index against which the index should be queried", + }, "top-k": { type: "number", default: 5, @@ -114,10 +119,35 @@ export async function handler( } } + if ( + (args.vector === undefined && args.vectorId === undefined) || + (args.vector !== undefined && args.vectorId !== undefined) + ) { + logger.error( + "🚨 Either vector or vector-id param must be provided, but not both." + ); + return; + } + logger.log(`📋 Searching for relevant vectors...`); - const res = await queryIndex(config, args.name, args.vector, queryOptions); + let res: VectorizeMatches | undefined; + if (args.vector !== undefined) { + res = await queryIndexByVector( + config, + args.name, + args.vector, + queryOptions + ); + } else if (args.vectorId !== undefined) { + res = await queryIndexByVectorId( + config, + args.name, + args.vectorId, + queryOptions + ); + } - if (res.count === 0) { + if (res === undefined || res.count === 0) { logger.warn(`Could not find any relevant vectors`); return; } From 91027895bb696ab8e90ed9da7896d5a9fddb4ebf Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Tue, 22 Oct 2024 07:44:56 -0500 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: Carmen Popoviciu --- packages/wrangler/src/__tests__/vectorize/vectorize.test.ts | 4 ++-- packages/wrangler/src/vectorize/query.ts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/wrangler/src/__tests__/vectorize/vectorize.test.ts b/packages/wrangler/src/__tests__/vectorize/vectorize.test.ts index 9ed1057cfdf0..3e0067f276f1 100644 --- a/packages/wrangler/src/__tests__/vectorize/vectorize.test.ts +++ b/packages/wrangler/src/__tests__/vectorize/vectorize.test.ts @@ -575,7 +575,7 @@ describe("vectorize commands", () => { expect(std.out).toMatchInlineSnapshot(`""`); expect(std.err).toMatchInlineSnapshot(` - "X [ERROR] 🚨 Either vector or vector-id param must be provided, but not both. + "X [ERROR] 🚨 Either vector or vector-id parameter must be provided, but not both. " `); @@ -589,7 +589,7 @@ describe("vectorize commands", () => { expect(std.out).toMatchInlineSnapshot(`""`); expect(std.err).toMatchInlineSnapshot(` - "X [ERROR] 🚨 Either vector or vector-id param must be provided, but not both. + "X [ERROR] 🚨 Either vector or vector-id parameter must be provided, but not both. " `); diff --git a/packages/wrangler/src/vectorize/query.ts b/packages/wrangler/src/vectorize/query.ts index 5701167c97bc..61990cbdf06f 100644 --- a/packages/wrangler/src/vectorize/query.ts +++ b/packages/wrangler/src/vectorize/query.ts @@ -124,7 +124,7 @@ export async function handler( (args.vector !== undefined && args.vectorId !== undefined) ) { logger.error( - "🚨 Either vector or vector-id param must be provided, but not both." + "🚨 Either vector or vector-id parameter must be provided, but not both." ); return; } From 03a38333221885344de8af70e3d680186546f3e4 Mon Sep 17 00:00:00 2001 From: emily-shen <69125074+emily-shen@users.noreply.github.com> Date: Fri, 25 Oct 2024 12:06:33 +0100 Subject: [PATCH 3/3] Update .changeset/happy-scissors-deliver.md Co-authored-by: Edmund Hung --- .changeset/happy-scissors-deliver.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changeset/happy-scissors-deliver.md b/.changeset/happy-scissors-deliver.md index 2cf4b5304f6b..3f2bae3a8cbe 100644 --- a/.changeset/happy-scissors-deliver.md +++ b/.changeset/happy-scissors-deliver.md @@ -2,4 +2,4 @@ "wrangler": minor --- -feat: [VS-284] Enable Vectorize query by id via Wrangler +feat: Enable Vectorize query by id via Wrangler