Skip to content

Commit f557b7c

Browse files
authored
[BUG] Use schema ef when trying to resolve js overallEf, remove exception on default ef (#5832)
## Description of changes _Summarize the changes made by this PR._ - Improvements & Bug fixes - Fixes #5829 - there was an issue in the js client where if the ef is not provided at the highest level or config, and if default-embed isn't installed an exception was thrown before sending the create/getOrCreateCall. 2 issues there: 1. there shouldnt be an exception thrown if default embed isnt installed, it should instead warn. That way the user is aware they must install prior to adding/querying 2. processing collection config should also handle the schema EF if available to determine whether to fallback to default config - New functionality - ... ## Test plan _How are these changes tested?_ - [ ] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust ## Migration plan _Are there any migrations, or any forwards/backwards compatibility changes needed in order to make sure this change deploys reliably?_ ## Observability plan _What is the plan to instrument and monitor this change?_ ## Documentation Changes _Are all docstrings for user-facing APIs updated if required? Do we need to make documentation changes in the [docs section](https://github.com/chroma-core/chroma/tree/main/docs/docs.trychroma.com)?_
1 parent 5d5b3b3 commit f557b7c

File tree

7 files changed

+180
-50
lines changed

7 files changed

+180
-50
lines changed

clients/new-js/packages/chromadb/src/chroma-client.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ export class ChromaClient {
304304
configuration,
305305
embeddingFunction,
306306
metadata,
307+
schema,
307308
});
308309

309310
const { data } = await Api.createCollection({

clients/new-js/packages/chromadb/src/chroma-fetch.ts

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ import {
1212
const offlineError = (error: any): boolean => {
1313
return Boolean(
1414
(error?.name === "TypeError" || error?.name === "FetchError") &&
15-
(error.message?.includes("fetch failed") ||
16-
error.message?.includes("Failed to fetch") ||
17-
error.message?.includes("ENOTFOUND")),
15+
(error.message?.includes("fetch failed") ||
16+
error.message?.includes("Failed to fetch") ||
17+
error.message?.includes("ENOTFOUND")),
1818
);
1919
};
2020

@@ -41,10 +41,9 @@ export const chromaFetch: typeof fetch = async (input, init) => {
4141
try {
4242
const responseBody = await response.json();
4343
status = responseBody.message || status;
44-
} catch {}
44+
} catch { }
4545
throw new ChromaClientError(
46-
`Bad request to ${
47-
(input as Request).url || "Chroma"
46+
`Bad request to ${(input as Request).url || "Chroma"
4847
} with status: ${status}`,
4948
);
5049
case 401:

clients/new-js/packages/chromadb/src/collection-configuration.ts

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import {
1313
serializeEmbeddingFunction,
1414
} from "./embedding-function";
1515
import { CollectionMetadata } from "./types";
16+
import { Schema } from "./schema";
1617

1718
export interface CollectionConfiguration {
1819
embeddingFunction?: EmbeddingFunctionConfiguration | null;
@@ -57,11 +58,17 @@ export const processCreateCollectionConfig = async ({
5758
configuration,
5859
embeddingFunction,
5960
metadata,
61+
schema,
6062
}: {
6163
configuration?: CreateCollectionConfiguration;
6264
embeddingFunction?: EmbeddingFunction | null;
6365
metadata?: CollectionMetadata;
66+
schema?: Schema;
6467
}) => {
68+
let schemaEmbeddingFunction: EmbeddingFunction | null | undefined = undefined;
69+
if (schema) {
70+
schemaEmbeddingFunction = schema.resolveEmbeddingFunction();
71+
}
6572
if (configuration?.hnsw && configuration?.spann) {
6673
throw new ChromaValueError(
6774
"Cannot specify both HNSW and SPANN configurations",
@@ -73,7 +80,7 @@ export const processCreateCollectionConfig = async ({
7380
configEmbeddingFunction: configuration?.embeddingFunction,
7481
});
7582

76-
if (!embeddingFunctionConfiguration && embeddingFunction !== null) {
83+
if (!embeddingFunctionConfiguration && embeddingFunction !== null && schemaEmbeddingFunction === undefined) {
7784
embeddingFunctionConfiguration = await getDefaultEFConfig();
7885
}
7986

@@ -115,7 +122,7 @@ export const processCreateCollectionConfig = async ({
115122
) {
116123
console.warn(
117124
`Space '${configuration.hnsw.space}' is not supported by embedding function '${overallEf.name || "unknown"}'. ` +
118-
`Supported spaces: ${supportedSpaces.join(", ")}`,
125+
`Supported spaces: ${supportedSpaces.join(", ")}`,
119126
);
120127
}
121128

@@ -125,7 +132,7 @@ export const processCreateCollectionConfig = async ({
125132
) {
126133
console.warn(
127134
`Space '${configuration.spann.space}' is not supported by embedding function '${overallEf.name || "unknown"}'. ` +
128-
`Supported spaces: ${supportedSpaces.join(", ")}`,
135+
`Supported spaces: ${supportedSpaces.join(", ")}`,
129136
);
130137
}
131138

@@ -140,7 +147,7 @@ export const processCreateCollectionConfig = async ({
140147
) {
141148
console.warn(
142149
`Space '${metadata["hnsw:space"]}' from metadata is not supported by embedding function '${overallEf.name || "unknown"}'. ` +
143-
`Supported spaces: ${supportedSpaces.join(", ")}`,
150+
`Supported spaces: ${supportedSpaces.join(", ")}`,
144151
);
145152
}
146153
}

clients/new-js/packages/chromadb/src/embedding-function.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ export interface SparseEmbeddingFunction {
9292
*/
9393
export interface EmbeddingFunctionClass {
9494
/** Constructor for creating new instances */
95-
new (...args: any[]): EmbeddingFunction;
95+
new(...args: any[]): EmbeddingFunction;
9696
/** Name identifier for the embedding function */
9797
name: string;
9898
/** Static method to build instance from configuration */
@@ -105,7 +105,7 @@ export interface EmbeddingFunctionClass {
105105
*/
106106
export interface SparseEmbeddingFunctionClass {
107107
/** Constructor for creating new instances */
108-
new (...args: any[]): SparseEmbeddingFunction;
108+
new(...args: any[]): SparseEmbeddingFunction;
109109
/** Name identifier for the embedding function */
110110
name: string;
111111
/** Static method to build instance from configuration */
@@ -408,8 +408,7 @@ export const getDefaultEFConfig =
408408
registerEmbeddingFunction("default", DefaultEmbeddingFunction);
409409
}
410410
} catch (e) {
411-
console.error(e);
412-
throw new Error(
411+
console.warn(
413412
"Cannot instantiate a collection with the DefaultEmbeddingFunction. Please install @chroma-core/default-embed, or provide a different embedding function",
414413
);
415414
}

clients/new-js/packages/chromadb/src/schema.ts

Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,14 @@ export interface VectorIndexConfigOptions {
6363
export class VectorIndexConfig {
6464
readonly type = "VectorIndexConfig";
6565
space: Space | null;
66-
embeddingFunction: EmbeddingFunction | null;
66+
embeddingFunction?: EmbeddingFunction | null;
6767
sourceKey: string | null;
6868
hnsw: ApiHnswIndexConfig | null;
6969
spann: ApiSpannIndexConfig | null;
7070

7171
constructor(options: VectorIndexConfigOptions = {}) {
7272
this.space = options.space ?? null;
73-
this.embeddingFunction = options.embeddingFunction ?? null;
73+
this.embeddingFunction = options.embeddingFunction;
7474
this.sourceKey = options.sourceKey ?? null;
7575
this.hnsw = options.hnsw ?? null;
7676
this.spann = options.spann ?? null;
@@ -85,12 +85,12 @@ export interface SparseVectorIndexConfigOptions {
8585

8686
export class SparseVectorIndexConfig {
8787
readonly type = "SparseVectorIndexConfig";
88-
embeddingFunction: SparseEmbeddingFunction | null;
88+
embeddingFunction?: SparseEmbeddingFunction | null;
8989
sourceKey: string | null;
9090
bm25: boolean | null;
9191

9292
constructor(options: SparseVectorIndexConfigOptions = {}) {
93-
this.embeddingFunction = options.embeddingFunction ?? null;
93+
this.embeddingFunction = options.embeddingFunction;
9494
this.sourceKey = options.sourceKey ?? null;
9595
this.bm25 = options.bm25 ?? null;
9696
}
@@ -100,78 +100,78 @@ export class FtsIndexType {
100100
constructor(
101101
public enabled: boolean,
102102
public config: FtsIndexConfig,
103-
) {}
103+
) { }
104104
}
105105

106106
export class StringInvertedIndexType {
107107
constructor(
108108
public enabled: boolean,
109109
public config: StringInvertedIndexConfig,
110-
) {}
110+
) { }
111111
}
112112

113113
export class VectorIndexType {
114114
constructor(
115115
public enabled: boolean,
116116
public config: VectorIndexConfig,
117-
) {}
117+
) { }
118118
}
119119

120120
export class SparseVectorIndexType {
121121
constructor(
122122
public enabled: boolean,
123123
public config: SparseVectorIndexConfig,
124-
) {}
124+
) { }
125125
}
126126

127127
export class IntInvertedIndexType {
128128
constructor(
129129
public enabled: boolean,
130130
public config: IntInvertedIndexConfig,
131-
) {}
131+
) { }
132132
}
133133

134134
export class FloatInvertedIndexType {
135135
constructor(
136136
public enabled: boolean,
137137
public config: FloatInvertedIndexConfig,
138-
) {}
138+
) { }
139139
}
140140

141141
export class BoolInvertedIndexType {
142142
constructor(
143143
public enabled: boolean,
144144
public config: BoolInvertedIndexConfig,
145-
) {}
145+
) { }
146146
}
147147

148148
export class StringValueType {
149149
constructor(
150150
public ftsIndex: FtsIndexType | null = null,
151151
public stringInvertedIndex: StringInvertedIndexType | null = null,
152-
) {}
152+
) { }
153153
}
154154

155155
export class FloatListValueType {
156-
constructor(public vectorIndex: VectorIndexType | null = null) {}
156+
constructor(public vectorIndex: VectorIndexType | null = null) { }
157157
}
158158

159159
export class SparseVectorValueType {
160-
constructor(public sparseVectorIndex: SparseVectorIndexType | null = null) {}
160+
constructor(public sparseVectorIndex: SparseVectorIndexType | null = null) { }
161161
}
162162

163163
export class IntValueType {
164-
constructor(public intInvertedIndex: IntInvertedIndexType | null = null) {}
164+
constructor(public intInvertedIndex: IntInvertedIndexType | null = null) { }
165165
}
166166

167167
export class FloatValueType {
168168
constructor(
169169
public floatInvertedIndex: FloatInvertedIndexType | null = null,
170-
) {}
170+
) { }
171171
}
172172

173173
export class BoolValueType {
174-
constructor(public boolInvertedIndex: BoolInvertedIndexType | null = null) {}
174+
constructor(public boolInvertedIndex: BoolInvertedIndexType | null = null) { }
175175
}
176176

177177
export class ValueTypes {
@@ -206,11 +206,11 @@ const cloneObject = <T>(value: T): T => {
206206
return Array.isArray(value)
207207
? (value.map((item) => cloneObject(item)) as T)
208208
: (Object.fromEntries(
209-
Object.entries(value as Record<string, unknown>).map(([k, v]) => [
210-
k,
211-
cloneObject(v),
212-
]),
213-
) as T);
209+
Object.entries(value as Record<string, unknown>).map(([k, v]) => [
210+
k,
211+
cloneObject(v),
212+
]),
213+
) as T);
214214
};
215215

216216
const resolveEmbeddingFunctionName = (
@@ -463,7 +463,7 @@ export class Schema {
463463
currentDefaultsVector.enabled,
464464
new VectorIndexConfig({
465465
space: config.space ?? null,
466-
embeddingFunction: config.embeddingFunction ?? null,
466+
embeddingFunction: config.embeddingFunction,
467467
sourceKey: config.sourceKey ?? null,
468468
hnsw: config.hnsw ? cloneObject(config.hnsw) : null,
469469
spann: config.spann ? cloneObject(config.spann) : null,
@@ -485,7 +485,7 @@ export class Schema {
485485
currentOverrideVector.enabled,
486486
new VectorIndexConfig({
487487
space: config.space ?? null,
488-
embeddingFunction: config.embeddingFunction ?? null,
488+
embeddingFunction: config.embeddingFunction,
489489
sourceKey: preservedSourceKey,
490490
hnsw: config.hnsw ? cloneObject(config.hnsw) : null,
491491
spann: config.spann ? cloneObject(config.spann) : null,
@@ -1039,11 +1039,8 @@ export class Schema {
10391039
(await getEmbeddingFunction(
10401040
"schema deserialization",
10411041
json.embedding_function as EmbeddingFunctionConfiguration,
1042-
)) ??
1043-
(config.embeddingFunction as EmbeddingFunction | null | undefined) ??
1044-
undefined;
1045-
1046-
config.embeddingFunction = embeddingFunction ?? null;
1042+
));
1043+
config.embeddingFunction = embeddingFunction;
10471044
if (!config.space && config.embeddingFunction?.defaultSpace) {
10481045
config.space = config.embeddingFunction.defaultSpace();
10491046
}
@@ -1073,4 +1070,14 @@ export class Schema {
10731070
config.embeddingFunction = embeddingFunction ?? null;
10741071
return config;
10751072
}
1073+
1074+
public resolveEmbeddingFunction(): EmbeddingFunction | null | undefined {
1075+
const embeddingOverride =
1076+
this.keys[EMBEDDING_KEY]?.floatList?.vectorIndex?.config
1077+
.embeddingFunction;
1078+
if (embeddingOverride !== undefined) {
1079+
return embeddingOverride;
1080+
}
1081+
return this.defaults.floatList?.vectorIndex?.config.embeddingFunction;
1082+
}
10761083
}

0 commit comments

Comments
 (0)