Skip to content

Commit 263c8a8

Browse files
committed
Add configure.multiVectors, allow quantizer and encoding in vectorizer/multiVectors factories
1 parent ac4c24f commit 263c8a8

File tree

8 files changed

+315
-48
lines changed

8 files changed

+315
-48
lines changed

src/collections/config/types/vectorIndex.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ export type VectorIndexConfigHNSW = {
1010
flatSearchCutoff: number;
1111
maxConnections: number;
1212
multiVector: MultiVectorConfig | undefined;
13-
quantizer: PQConfig | BQConfig | SQConfig | undefined;
13+
quantizer: QuantizerConfig | undefined;
1414
skip: boolean;
1515
vectorCacheMaxObjects: number;
1616
type: 'hnsw';
@@ -19,7 +19,7 @@ export type VectorIndexConfigHNSW = {
1919
export type VectorIndexConfigFlat = {
2020
distance: VectorDistance;
2121
vectorCacheMaxObjects: number;
22-
quantizer: BQConfig | undefined;
22+
quantizer: QuantizerConfig | undefined;
2323
type: 'flat';
2424
};
2525

src/collections/config/types/vectorizer.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ export type Vectorizer =
4444
| 'text2vec-transformers'
4545
| 'text2vec-voyageai'
4646
| 'text2vec-weaviate'
47+
| 'text2multivec-jinaai'
4748
| 'none';
4849

4950
/** The configuration for image vectorization using a neural network module.
@@ -365,6 +366,19 @@ export type Text2VecJinaAIConfig = {
365366
vectorizeCollectionName?: boolean;
366367
};
367368

369+
/** The configuration for text vectorization using the Jina AI multi-vector module.
370+
*
371+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings-colbert) for detailed usage.
372+
*/
373+
export type Text2MultiVecJinaAIConfig = {
374+
/** The dimensionality of the multi-vector. */
375+
dimensions?: number;
376+
/** The model to use. */
377+
model?: string;
378+
/** Whether to vectorize the collection name. */
379+
vectorizeCollectionName?: boolean;
380+
};
381+
368382
/** @deprecated Use `Text2VecJinaAIConfig` instead. */
369383
export type Text2VecJinaConfig = Text2VecJinaAIConfig;
370384

@@ -582,6 +596,8 @@ export type VectorizerConfigType<V> = V extends 'img2vec-neural'
582596
? Text2VecVoyageAIConfig | undefined
583597
: V extends 'text2vec-weaviate'
584598
? Text2VecWeaviateConfig | undefined
599+
: V extends 'text2multivec-jinaai'
600+
? Text2MultiVecJinaAIConfig | undefined
585601
: V extends 'none'
586602
? {}
587603
: V extends undefined

src/collections/configure/index.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import {
1515
import generative from './generative.js';
1616
import reranker from './reranker.js';
1717
import { configure as configureVectorIndex, reconfigure as reconfigureVectorIndex } from './vectorIndex.js';
18-
import { vectorizer } from './vectorizer.js';
18+
import { multiVectors, vectorizer } from './vectorizer.js';
1919

2020
import { parseWithDefault } from './parsing.js';
2121

@@ -58,6 +58,7 @@ const vectorDistances = {
5858

5959
const configure = {
6060
generative,
61+
multiVectors,
6162
reranker,
6263
vectorizer,
6364
vectorIndex: configureVectorIndex,

src/collections/configure/parsing.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,20 +40,20 @@ export class QuantizerGuards {
4040
}
4141
}
4242

43-
type VectorIndexConfig =
43+
type VectorIndexConfigCreate =
4444
| VectorIndexConfigHNSWCreate
4545
| VectorIndexConfigFlatCreate
4646
| VectorIndexConfigDynamicCreate
4747
| Record<string, any>;
4848

4949
export class VectorIndexGuards {
50-
static isHNSW(config?: VectorIndexConfig): config is VectorIndexConfigHNSWCreate {
50+
static isHNSW(config?: VectorIndexConfigCreate): config is VectorIndexConfigHNSWCreate {
5151
return (config as VectorIndexConfigHNSWCreate)?.type === 'hnsw';
5252
}
53-
static isFlat(config?: VectorIndexConfig): config is VectorIndexConfigFlatCreate {
53+
static isFlat(config?: VectorIndexConfigCreate): config is VectorIndexConfigFlatCreate {
5454
return (config as VectorIndexConfigFlatCreate)?.type === 'flat';
5555
}
56-
static isDynamic(config?: VectorIndexConfig): config is VectorIndexConfigDynamicCreate {
56+
static isDynamic(config?: VectorIndexConfigCreate): config is VectorIndexConfigDynamicCreate {
5757
return (config as VectorIndexConfigDynamicCreate)?.type === 'dynamic';
5858
}
5959
}

src/collections/configure/types/vectorIndex.ts

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,22 @@ export type SQConfigUpdate = {
4848
type: 'sq';
4949
};
5050

51-
export type MultiVectorConfigCreate = RecursivePartial<MultiVectorConfig>;
51+
export type QuantizerConfigCreate =
52+
| PQConfigCreate
53+
| BQConfigCreate
54+
| SQConfigCreate
55+
| Record<string, any>
56+
| undefined;
57+
58+
export type MultiVectorConfigCreate = {
59+
aggregation?: MultiVectorConfig['aggregation'];
60+
encoding?: MultiVectorEncodingConfigCreate;
61+
};
5262

5363
export type MuveraEncodingConfigCreate = RecursivePartial<MuveraEncodingConfig>;
5464

65+
export type MultiVectorEncodingConfigCreate = MuveraEncodingConfigCreate;
66+
5567
export type VectorIndexConfigHNSWCreate = RecursivePartial<VectorIndexConfigHNSW>;
5668

5769
export type VectorIndexConfigDynamicCreate = RecursivePartial<VectorIndexConfigDynamic>;
@@ -139,7 +151,7 @@ export type VectorIndexConfigHNSWCreateOptions = {
139151
/** The multi-vector configuration to use. Use `vectorIndex.multiVector` to make one. */
140152
multiVector?: MultiVectorConfigCreate;
141153
/** The quantizer configuration to use. Use `vectorIndex.quantizer.bq` or `vectorIndex.quantizer.pq` to make one. */
142-
quantizer?: PQConfigCreate | BQConfigCreate | SQConfigCreate;
154+
quantizer?: QuantizerConfigCreate;
143155
/** Whether to skip the index. Default is false. */
144156
skip?: boolean;
145157
/** The maximum number of objects to cache in the vector cache. Default is 1000000000000. */
@@ -152,7 +164,7 @@ export type VectorIndexConfigFlatCreateOptions = {
152164
/** The maximum number of objects to cache in the vector cache. Default is 1000000000000. */
153165
vectorCacheMaxObjects?: number;
154166
/** The quantizer configuration to use. Default is `bq`. */
155-
quantizer?: BQConfigCreate;
167+
quantizer?: QuantizerConfigCreate;
156168
};
157169

158170
export type VectorIndexConfigDynamicCreateOptions = {

src/collections/configure/types/vectorizer.ts

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import {
33
ModuleConfig,
44
Multi2VecField,
55
Ref2VecCentroidConfig,
6+
Text2MultiVecJinaAIConfig,
67
Text2VecAWSConfig,
78
Text2VecAzureOpenAIConfig,
89
Text2VecCohereConfig,
@@ -24,9 +25,16 @@ import {
2425
VectorizerConfigType,
2526
} from '../../config/types/index.js';
2627
import { PrimitiveKeys } from '../../types/internal.js';
27-
import { VectorIndexConfigCreateType, VectorIndexConfigUpdateType } from './vectorIndex.js';
28+
import {
29+
MultiVectorEncodingConfigCreate,
30+
QuantizerConfigCreate,
31+
VectorIndexConfigCreateType,
32+
VectorIndexConfigUpdateType,
33+
} from './vectorIndex.js';
2834

2935
export type VectorizerCreateOptions<P, I, V> = {
36+
encoding?: MultiVectorEncodingConfigCreate;
37+
quantizer?: QuantizerConfigCreate;
3038
sourceProperties?: P;
3139
vectorIndexConfig?: ModuleConfig<I, VectorIndexConfigCreateType<I>>;
3240
vectorizerConfig?: ModuleConfig<V, VectorizerConfigType<V>>;
@@ -72,6 +80,7 @@ export type ConfigureNonTextVectorizerOptions<
7280
V extends Vectorizer
7381
> = VectorizerConfigCreateType<V> & {
7482
name?: N;
83+
quantizer?: QuantizerConfigCreate;
7584
vectorIndexConfig?: ModuleConfig<I, VectorIndexConfigCreateType<I>>;
7685
};
7786

@@ -80,10 +89,25 @@ export type ConfigureTextVectorizerOptions<
8089
N extends string | undefined,
8190
I extends VectorIndexType,
8291
V extends Vectorizer
83-
> = VectorizerConfigCreateType<V> & {
84-
name?: N;
92+
> = ConfigureNonTextVectorizerOptions<N, I, V> & {
8593
sourceProperties?: PrimitiveKeys<T>[];
86-
vectorIndexConfig?: ModuleConfig<I, VectorIndexConfigCreateType<I>>;
94+
};
95+
96+
export type ConfigureNonTextMultiVectorizerOptions<
97+
N extends string | undefined,
98+
I extends VectorIndexType,
99+
V extends Vectorizer
100+
> = ConfigureNonTextVectorizerOptions<N, I, V> & {
101+
encoding?: MultiVectorEncodingConfigCreate;
102+
};
103+
104+
export type ConfigureTextMultiVectorizerOptions<
105+
T,
106+
N extends string | undefined,
107+
I extends VectorIndexType,
108+
V extends Vectorizer
109+
> = ConfigureTextVectorizerOptions<T, N, I, V> & {
110+
encoding?: MultiVectorEncodingConfigCreate;
87111
};
88112

89113
export type Img2VecNeuralConfigCreate = Img2VecNeuralConfig;
@@ -226,6 +250,8 @@ export type Text2VecVoyageAIConfigCreate = Text2VecVoyageAIConfig;
226250

227251
export type Text2VecWeaviateConfigCreate = Text2VecWeaviateConfig;
228252

253+
export type Text2MultiVecJinaAIConfigCreate = Text2MultiVecJinaAIConfig;
254+
229255
export type VectorizerConfigCreateType<V> = V extends 'img2vec-neural'
230256
? Img2VecNeuralConfigCreate | undefined
231257
: V extends 'multi2vec-clip'
@@ -278,6 +304,8 @@ export type VectorizerConfigCreateType<V> = V extends 'img2vec-neural'
278304
? Text2VecVoyageAIConfigCreate | undefined
279305
: V extends 'text2vec-weaviate'
280306
? Text2VecWeaviateConfigCreate | undefined
307+
: V extends 'text2multivec-jinaai'
308+
? Text2MultiVecJinaAIConfigCreate | undefined
281309
: V extends 'none'
282310
? {}
283311
: V extends undefined

src/collections/configure/unit.test.ts

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1532,13 +1532,24 @@ describe('Unit testing of the vectorizer factory class', () => {
15321532
baseURL: 'base-url',
15331533
dimensions: 256,
15341534
model: 'model',
1535+
quantizer: configure.vectorIndex.quantizer.pq(),
15351536
vectorizeCollectionName: true,
15361537
});
15371538
expect(config).toEqual<VectorConfigCreate<never, 'test', 'hnsw', 'text2vec-weaviate'>>({
15381539
name: 'test',
15391540
vectorIndex: {
15401541
name: 'hnsw',
1541-
config: undefined,
1542+
config: {
1543+
quantizer: {
1544+
bitCompression: undefined,
1545+
centroids: undefined,
1546+
encoder: undefined,
1547+
segments: undefined,
1548+
trainingLimit: undefined,
1549+
type: 'pq',
1550+
},
1551+
type: 'hnsw',
1552+
},
15421553
},
15431554
vectorizer: {
15441555
name: 'text2vec-weaviate',
@@ -1553,6 +1564,63 @@ describe('Unit testing of the vectorizer factory class', () => {
15531564
});
15541565
});
15551566

1567+
describe('Unit testing of the multiVectors factory class', () => {
1568+
it('should create the correct Text2MultiVecJinaAIConfig type with defaults', () => {
1569+
const config = configure.multiVectors.text2VecJinaAI();
1570+
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'text2multivec-jinaai'>>({
1571+
name: undefined,
1572+
vectorIndex: {
1573+
name: 'hnsw',
1574+
config: {
1575+
multiVector: {
1576+
aggregation: undefined,
1577+
encoding: undefined,
1578+
},
1579+
type: 'hnsw',
1580+
},
1581+
},
1582+
vectorizer: {
1583+
name: 'text2multivec-jinaai',
1584+
config: undefined,
1585+
},
1586+
});
1587+
});
1588+
1589+
it('should create the correct Text2MultiVecJinaAIConfig type with all values', () => {
1590+
const config = configure.multiVectors.text2VecJinaAI({
1591+
name: 'test',
1592+
encoding: configure.vectorIndex.multiVector.encoding.muvera({ ksim: 10 }),
1593+
model: 'model',
1594+
vectorizeCollectionName: true,
1595+
});
1596+
expect(config).toEqual<VectorConfigCreate<never, 'test', 'hnsw', 'text2multivec-jinaai'>>({
1597+
name: 'test',
1598+
vectorIndex: {
1599+
name: 'hnsw',
1600+
config: {
1601+
multiVector: {
1602+
aggregation: undefined,
1603+
encoding: {
1604+
dprojections: undefined,
1605+
ksim: 10,
1606+
repetitions: undefined,
1607+
type: 'muvera',
1608+
},
1609+
},
1610+
type: 'hnsw',
1611+
},
1612+
},
1613+
vectorizer: {
1614+
name: 'text2multivec-jinaai',
1615+
config: {
1616+
model: 'model',
1617+
vectorizeCollectionName: true,
1618+
},
1619+
},
1620+
});
1621+
});
1622+
});
1623+
15561624
describe('Unit testing of the generative factory class', () => {
15571625
it('should create the correct GenerativeAnthropicConfig type with required & default values', () => {
15581626
const config = configure.generative.anthropic();

0 commit comments

Comments
 (0)