Skip to content

Commit 53fa376

Browse files
authored
[ENH] Auto load EF package if installed (#5811)
## Description of changes For our JS/TS SDK, users need to install EF packages separately. Upon import, these packages register themselves as "known" in `chromadb`. However, a user might not explicitly import an EF package if they just use `getCollection`. In that case they would still want us to be able to build the EF from the schema. ## Test plan _How are these changes tested?_ - [ ] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust
1 parent f0a1cb6 commit 53fa376

File tree

6 files changed

+870
-383
lines changed

6 files changed

+870
-383
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

clients/new-js/packages/chromadb/src/chroma-client.ts

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,16 @@ const resolveSchemaEmbeddingFunction = (
3232
}
3333

3434
const embeddingOverride =
35-
schema.keys[EMBEDDING_KEY]?.floatList?.vectorIndex?.config.embeddingFunction ?? undefined;
35+
schema.keys[EMBEDDING_KEY]?.floatList?.vectorIndex?.config
36+
.embeddingFunction ?? undefined;
3637
if (embeddingOverride) {
3738
return embeddingOverride;
3839
}
3940

40-
return schema.defaults.floatList?.vectorIndex?.config.embeddingFunction ?? undefined;
41+
return (
42+
schema.defaults.floatList?.vectorIndex?.config.embeddingFunction ??
43+
undefined
44+
);
4145
};
4246

4347
/**
@@ -235,13 +239,15 @@ export class ChromaClient {
235239

236240
return Promise.all(
237241
data.map(async (collection) => {
238-
const schema = Schema.deserializeFromJSON(collection.schema ?? undefined);
242+
const schema = await Schema.deserializeFromJSON(
243+
collection.schema ?? undefined,
244+
);
239245
const schemaEmbeddingFunction = resolveSchemaEmbeddingFunction(schema);
240246
const resolvedEmbeddingFunction =
241-
getEmbeddingFunction(
247+
(await getEmbeddingFunction(
242248
collection.name,
243249
collection.configuration_json.embedding_function ?? undefined,
244-
) ?? schemaEmbeddingFunction;
250+
)) ?? schemaEmbeddingFunction;
245251

246252
return new CollectionImpl({
247253
chromaClient: this,
@@ -312,14 +318,17 @@ export class ChromaClient {
312318
},
313319
});
314320

315-
const serverSchema = Schema.deserializeFromJSON(data.schema ?? undefined);
316-
const schemaEmbeddingFunction = resolveSchemaEmbeddingFunction(serverSchema);
321+
const serverSchema = await Schema.deserializeFromJSON(
322+
data.schema ?? undefined,
323+
);
324+
const schemaEmbeddingFunction =
325+
resolveSchemaEmbeddingFunction(serverSchema);
317326
const resolvedEmbeddingFunction =
318327
embeddingFunction ??
319-
getEmbeddingFunction(
328+
(await getEmbeddingFunction(
320329
data.name,
321330
data.configuration_json.embedding_function ?? undefined,
322-
) ??
331+
)) ??
323332
schemaEmbeddingFunction;
324333

325334
return new CollectionImpl({
@@ -354,14 +363,14 @@ export class ChromaClient {
354363
path: { ...(await this._path()), collection_id: name },
355364
});
356365

357-
const schema = Schema.deserializeFromJSON(data.schema ?? undefined);
366+
const schema = await Schema.deserializeFromJSON(data.schema ?? undefined);
358367
const schemaEmbeddingFunction = resolveSchemaEmbeddingFunction(schema);
359368
const resolvedEmbeddingFunction =
360369
embeddingFunction ??
361-
getEmbeddingFunction(
370+
(await getEmbeddingFunction(
362371
data.name,
363372
data.configuration_json.embedding_function ?? undefined,
364-
) ??
373+
)) ??
365374
schemaEmbeddingFunction;
366375

367376
return new CollectionImpl({
@@ -445,14 +454,17 @@ export class ChromaClient {
445454
},
446455
});
447456

448-
const serverSchema = Schema.deserializeFromJSON(data.schema ?? undefined);
449-
const schemaEmbeddingFunction = resolveSchemaEmbeddingFunction(serverSchema);
457+
const serverSchema = await Schema.deserializeFromJSON(
458+
data.schema ?? undefined,
459+
);
460+
const schemaEmbeddingFunction =
461+
resolveSchemaEmbeddingFunction(serverSchema);
450462
const resolvedEmbeddingFunction =
451463
embeddingFunction ??
452-
getEmbeddingFunction(
464+
(await getEmbeddingFunction(
453465
name,
454466
data.configuration_json.embedding_function ?? undefined,
455-
) ??
467+
)) ??
456468
schemaEmbeddingFunction;
457469

458470
return new CollectionImpl({

clients/new-js/packages/chromadb/src/collection-configuration.ts

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -80,35 +80,52 @@ export const processCreateCollectionConfig = async ({
8080
const overallEf = embeddingFunction || configuration?.embeddingFunction;
8181

8282
if (overallEf && overallEf.defaultSpace && overallEf.supportedSpaces) {
83-
if (configuration?.hnsw === undefined && configuration?.spann === undefined) {
83+
if (
84+
configuration?.hnsw === undefined &&
85+
configuration?.spann === undefined
86+
) {
8487
if (metadata === undefined || metadata?.["hnsw:space"] === undefined) {
8588
if (!configuration) configuration = {};
8689
configuration.hnsw = { space: overallEf.defaultSpace() };
8790
}
8891
}
8992

90-
if (configuration?.hnsw && !configuration.hnsw.space && overallEf.defaultSpace) {
93+
if (
94+
configuration?.hnsw &&
95+
!configuration.hnsw.space &&
96+
overallEf.defaultSpace
97+
) {
9198
configuration.hnsw.space = overallEf.defaultSpace();
9299
}
93100

94-
if (configuration?.spann && !configuration.spann.space && overallEf.defaultSpace) {
101+
if (
102+
configuration?.spann &&
103+
!configuration.spann.space &&
104+
overallEf.defaultSpace
105+
) {
95106
configuration.spann.space = overallEf.defaultSpace();
96107
}
97108

98109
if (overallEf.supportedSpaces) {
99110
const supportedSpaces = overallEf.supportedSpaces();
100111

101-
if (configuration?.hnsw?.space && !supportedSpaces.includes(configuration.hnsw.space)) {
112+
if (
113+
configuration?.hnsw?.space &&
114+
!supportedSpaces.includes(configuration.hnsw.space)
115+
) {
102116
console.warn(
103-
`Space '${configuration.hnsw.space}' is not supported by embedding function '${overallEf.name || 'unknown'}'. ` +
104-
`Supported spaces: ${supportedSpaces.join(', ')}`
117+
`Space '${configuration.hnsw.space}' is not supported by embedding function '${overallEf.name || "unknown"}'. ` +
118+
`Supported spaces: ${supportedSpaces.join(", ")}`,
105119
);
106120
}
107121

108-
if (configuration?.spann?.space && !supportedSpaces.includes(configuration.spann.space)) {
122+
if (
123+
configuration?.spann?.space &&
124+
!supportedSpaces.includes(configuration.spann.space)
125+
) {
109126
console.warn(
110-
`Space '${configuration.spann.space}' is not supported by embedding function '${overallEf.name || 'unknown'}'. ` +
111-
`Supported spaces: ${supportedSpaces.join(', ')}`
127+
`Space '${configuration.spann.space}' is not supported by embedding function '${overallEf.name || "unknown"}'. ` +
128+
`Supported spaces: ${supportedSpaces.join(", ")}`,
112129
);
113130
}
114131

@@ -117,11 +134,13 @@ export const processCreateCollectionConfig = async ({
117134
!configuration?.spann &&
118135
metadata &&
119136
typeof metadata["hnsw:space"] === "string" &&
120-
!supportedSpaces.includes(metadata["hnsw:space"] as EmbeddingFunctionSpace)
137+
!supportedSpaces.includes(
138+
metadata["hnsw:space"] as EmbeddingFunctionSpace,
139+
)
121140
) {
122141
console.warn(
123-
`Space '${metadata["hnsw:space"]}' from metadata is not supported by embedding function '${overallEf.name || 'unknown'}'. ` +
124-
`Supported spaces: ${supportedSpaces.join(', ')}`
142+
`Space '${metadata["hnsw:space"]}' from metadata is not supported by embedding function '${overallEf.name || "unknown"}'. ` +
143+
`Supported spaces: ${supportedSpaces.join(", ")}`,
125144
);
126145
}
127146
}
@@ -164,10 +183,10 @@ export const processUpdateCollectionConfig = async ({
164183

165184
const embeddingFunction =
166185
currentEmbeddingFunction ||
167-
getEmbeddingFunction(
186+
(await getEmbeddingFunction(
168187
collectionName,
169188
currentConfiguration.embeddingFunction ?? undefined,
170-
);
189+
));
171190

172191
const newEmbeddingFunction = newConfiguration.embeddingFunction;
173192

clients/new-js/packages/chromadb/src/embedding-function.ts

Lines changed: 81 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ export interface SparseEmbeddingFunction {
9292
*/
9393
export interface EmbeddingFunctionClass {
9494
/** Constructor for creating new instances */
95-
new(...args: any[]): EmbeddingFunction;
95+
new (...args: any[]): EmbeddingFunction;
9696
/** Name identifier for the embedding function */
9797
name: string;
9898
/** Static method to build instance from configuration */
@@ -105,7 +105,7 @@ export interface EmbeddingFunctionClass {
105105
*/
106106
export interface SparseEmbeddingFunctionClass {
107107
/** Constructor for creating new instances */
108-
new(...args: any[]): SparseEmbeddingFunction;
108+
new (...args: any[]): SparseEmbeddingFunction;
109109
/** Name identifier for the embedding function */
110110
name: string;
111111
/** Static method to build instance from configuration */
@@ -121,6 +121,25 @@ export const knownEmbeddingFunctions = new Map<
121121
EmbeddingFunctionClass
122122
>();
123123

124+
const pythonEmbeddingFunctions: Record<string, string> = {
125+
onnx_mini_lm_l6_v2: "default-embed",
126+
default: "default-embed",
127+
together_ai: "together-ai",
128+
};
129+
130+
const unsupportedEmbeddingFunctions: Set<string> = new Set([
131+
"amazon_bedrock",
132+
"baseten",
133+
"langchain",
134+
"google_palm",
135+
"huggingface",
136+
"instructor",
137+
"open_clip",
138+
"roboflow",
139+
"sentence_transformer",
140+
"text2vec",
141+
]);
142+
124143
/**
125144
* Registry of available sparse embedding functions.
126145
* Maps function names to their constructor classes.
@@ -130,6 +149,16 @@ export const knownSparseEmbeddingFunctions = new Map<
130149
SparseEmbeddingFunctionClass
131150
>();
132151

152+
const pythonSparseEmbeddingFunctions: Record<string, string> = {
153+
chroma_bm25: "chroma-bm25",
154+
};
155+
156+
const unsupportedSparseEmbeddingFunctions: Set<string> = new Set([
157+
"bm25",
158+
"fastembed_sparse",
159+
"huggingface_sparse",
160+
]);
161+
133162
/**
134163
* Union type covering both dense and sparse embedding functions.
135164
*/
@@ -177,7 +206,7 @@ export const registerSparseEmbeddingFunction = (
177206
* @param efConfig - Configuration for the embedding function
178207
* @returns EmbeddingFunction instance or undefined if it cannot be constructed
179208
*/
180-
export const getEmbeddingFunction = (
209+
export const getEmbeddingFunction = async (
181210
collectionName: string,
182211
efConfig?: EmbeddingFunctionConfiguration,
183212
) => {
@@ -206,16 +235,33 @@ export const getEmbeddingFunction = (
206235
return undefined;
207236
}
208237

209-
const name = efConfig.name;
210-
211-
const embeddingFunction = knownEmbeddingFunctions.get(name);
212-
if (!embeddingFunction) {
238+
if (unsupportedEmbeddingFunctions.has(efConfig.name)) {
213239
console.warn(
214-
`Collection ${collectionName} was created with the ${embeddingFunction} embedding function. However, the @chroma-core/${embeddingFunction} package is not install. 'add' and 'query' will fail unless you provide them embeddings directly, or install the @chroma-core/${embeddingFunction} package.`,
240+
`Embedding function ${efConfig.name} is not supported in the JS/TS SDK. 'add' and 'query' will fail unless you provide them embeddings directly.`,
215241
);
216242
return undefined;
217243
}
218244

245+
const packageName = pythonEmbeddingFunctions[efConfig.name] || efConfig.name;
246+
247+
let embeddingFunction = knownEmbeddingFunctions.get(packageName);
248+
if (!embeddingFunction) {
249+
try {
250+
const fullPackageName = `@chroma-core/${packageName}`;
251+
await import(fullPackageName);
252+
embeddingFunction = knownEmbeddingFunctions.get(packageName);
253+
} catch (error) {
254+
// Dynamic loading failed, proceed with warning
255+
}
256+
257+
if (!embeddingFunction) {
258+
console.warn(
259+
`Collection ${collectionName} was created with the ${packageName} embedding function. However, the @chroma-core/${packageName} package is not install. 'add' and 'query' will fail unless you provide them embeddings directly, or install the @chroma-core/${packageName} package.`,
260+
);
261+
return undefined;
262+
}
263+
}
264+
219265
let constructorConfig: Record<string, any> =
220266
efConfig.type === "known" ? (efConfig.config as Record<string, any>) : {};
221267

@@ -225,12 +271,12 @@ export const getEmbeddingFunction = (
225271
}
226272

227273
console.warn(
228-
`Embedding function ${name} does not define a 'buildFromConfig' function. 'add' and 'query' will fail unless you provide them embeddings directly.`,
274+
`Embedding function ${packageName} does not define a 'buildFromConfig' function. 'add' and 'query' will fail unless you provide them embeddings directly.`,
229275
);
230276
return undefined;
231277
} catch (e) {
232278
console.warn(
233-
`Embedding function ${name} failed to build with config: ${constructorConfig}. 'add' and 'query' will fail unless you provide them embeddings directly. Error: ${e}`,
279+
`Embedding function ${packageName} failed to build with config: ${constructorConfig}. 'add' and 'query' will fail unless you provide them embeddings directly. Error: ${e}`,
234280
);
235281
return undefined;
236282
}
@@ -242,7 +288,7 @@ export const getEmbeddingFunction = (
242288
* @param efConfig - Configuration for the sparse embedding function
243289
* @returns SparseEmbeddingFunction instance or undefined if it cannot be constructed
244290
*/
245-
export const getSparseEmbeddingFunction = (
291+
export const getSparseEmbeddingFunction = async (
246292
collectionName: string,
247293
efConfig?: EmbeddingFunctionConfiguration,
248294
) => {
@@ -254,25 +300,36 @@ export const getSparseEmbeddingFunction = (
254300
return undefined;
255301
}
256302

257-
if (efConfig.type === "unknown") {
258-
console.warn(
259-
`Unknown embedding function configuration for collection ${collectionName}. 'add' and 'query' will fail unless you provide them embeddings directly.`,
260-
);
303+
if (efConfig.type !== "known") {
261304
return undefined;
262305
}
263306

264-
if (efConfig.type !== "known") {
307+
if (unsupportedSparseEmbeddingFunctions.has(efConfig.name)) {
308+
console.warn(
309+
"Embedding function ${efConfig.name} is not supported in the JS/TS SDK. 'add' and 'query' will fail unless you provide them embeddings directly.",
310+
);
265311
return undefined;
266312
}
267313

268-
const name = efConfig.name;
314+
const packageName =
315+
pythonSparseEmbeddingFunctions[efConfig.name] || efConfig.name;
269316

270-
const sparseEmbeddingFunction = knownSparseEmbeddingFunctions.get(name);
317+
let sparseEmbeddingFunction = knownSparseEmbeddingFunctions.get(packageName);
271318
if (!sparseEmbeddingFunction) {
272-
console.warn(
273-
`Collection ${collectionName} was created with the ${name} sparse embedding function. However, the @chroma-core/${name} package is not installed.`,
274-
);
275-
return undefined;
319+
try {
320+
const fullPackageName = `@chroma-core/${packageName}`;
321+
await import(fullPackageName);
322+
sparseEmbeddingFunction = knownSparseEmbeddingFunctions.get(packageName);
323+
} catch (error) {
324+
// Dynamic loading failed, proceed with warning
325+
}
326+
327+
if (!sparseEmbeddingFunction) {
328+
console.warn(
329+
`Collection ${collectionName} was created with the ${packageName} sparse embedding function. However, the @chroma-core/${packageName} package is not installed.`,
330+
);
331+
return undefined;
332+
}
276333
}
277334

278335
let constructorConfig: Record<string, any> =
@@ -284,12 +341,12 @@ export const getSparseEmbeddingFunction = (
284341
}
285342

286343
console.warn(
287-
`Sparse embedding function ${name} does not define a 'buildFromConfig' function.`,
344+
`Sparse embedding function ${packageName} does not define a 'buildFromConfig' function.`,
288345
);
289346
return undefined;
290347
} catch (e) {
291348
console.warn(
292-
`Sparse embedding function ${name} failed to build with config: ${constructorConfig}. Error: ${e}`,
349+
`Sparse embedding function ${packageName} failed to build with config: ${constructorConfig}. Error: ${e}`,
293350
);
294351
return undefined;
295352
}

0 commit comments

Comments
 (0)