Skip to content

Commit 95f29b2

Browse files
committed
chore: Add interface for embeddings providers
We are using Zod to define the types so we can use the schemas later for tool contracts
1 parent dbaa468 commit 95f29b2

File tree

1 file changed

+82
-0
lines changed

1 file changed

+82
-0
lines changed
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import { createVoyage } from "voyage-ai-provider";
2+
import type { VoyageProvider } from "voyage-ai-provider";
3+
import { embedMany } from "ai";
4+
import type { UserConfig } from "../config.js";
5+
import assert from "assert";
6+
import { createFetch } from "@mongodb-js/devtools-proxy-support";
7+
import { z } from "zod";
8+
9+
const zEmbeddingsInput = z.string();
10+
type EmbeddingsInput = z.infer<typeof zEmbeddingsInput>;
11+
type Embeddings = number[];
12+
13+
interface EmbeddingsProvider<SupportedModels extends string> {
14+
embed(modelId: SupportedModels, content: EmbeddingsInput[], parameters: unknown): Promise<Embeddings[]>;
15+
}
16+
17+
const zVoyageSupportedDimensions = z
18+
.union([z.literal(256), z.literal(512), z.literal(1024), z.literal(2048)])
19+
.default(1024);
20+
21+
const zVoyageQuantization = z.enum(["float", "int8", "binary", "ubinary"]).default("float");
22+
const zVoyageInputQuery = z.enum(["query", "document"]);
23+
24+
export const zVoyageModels = z.enum(["voyage-3-large", "voyage-3.5", "voyage-3.5-lite", "voyage-code-3"]);
25+
export const zVoyageParameters = {
26+
"voyage-3-large": z.object({
27+
inputType: zVoyageInputQuery,
28+
outputDimensions: zVoyageSupportedDimensions,
29+
outputDtype: zVoyageQuantization,
30+
}),
31+
"voyage-3.5": z.object({
32+
inputType: zVoyageInputQuery,
33+
outputDimensions: zVoyageSupportedDimensions,
34+
outputDtype: zVoyageQuantization,
35+
}),
36+
"voyage-3.5-lite": z.object({
37+
inputType: zVoyageInputQuery,
38+
outputDimensions: zVoyageSupportedDimensions,
39+
outputDtype: zVoyageQuantization,
40+
}),
41+
"voyage-code-3": z.object({
42+
inputType: zVoyageInputQuery,
43+
outputDimensions: zVoyageSupportedDimensions,
44+
outputDtype: zVoyageQuantization,
45+
}),
46+
} as const;
47+
48+
type VoyageModels = z.infer<typeof zVoyageModels>;
49+
class VoyageEmbeddingsProvider implements EmbeddingsProvider<VoyageModels> {
50+
private readonly voyage: VoyageProvider;
51+
52+
constructor(userConfig: UserConfig, providedFetch?: typeof fetch) {
53+
assert(userConfig.voyageApiKey, "voyageApiKey does not exist. This is likely a bug.");
54+
55+
const customFetch: typeof fetch = (providedFetch ??
56+
createFetch({ useEnvironmentVariableProxies: true })) as unknown as typeof fetch;
57+
58+
this.voyage = createVoyage({ apiKey: userConfig.voyageApiKey, fetch: customFetch });
59+
}
60+
61+
static isConfiguredIn(userConfig: UserConfig): boolean {
62+
return !!userConfig.voyageApiKey;
63+
}
64+
65+
async embed<Model extends VoyageModels>(
66+
modelId: Model,
67+
content: EmbeddingsInput[],
68+
parameters: z.infer<(typeof zVoyageParameters)[Model]>
69+
): Promise<Embeddings[]> {
70+
const model = this.voyage.textEmbeddingModel(modelId);
71+
const { embeddings } = await embedMany({ model, values: content, providerOptions: { voyage: parameters } });
72+
return embeddings;
73+
}
74+
}
75+
76+
export function getEmbeddingsProvider(userConfig: UserConfig): EmbeddingsProvider<VoyageModels> | undefined {
77+
if (VoyageEmbeddingsProvider.isConfiguredIn(userConfig)) {
78+
return new VoyageEmbeddingsProvider(userConfig);
79+
}
80+
81+
return undefined;
82+
}

0 commit comments

Comments
 (0)