Skip to content

Commit 98aefb1

Browse files
committed
feat: support multi2vec-nvidia vectorizer
1 parent bbac850 commit 98aefb1

File tree

4 files changed

+139
-3
lines changed

4 files changed

+139
-3
lines changed

src/collections/config/types/vectorizer.ts

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ type Text2VecPalmVectorizer = 'text2vec-palm';
1919

2020
export type Vectorizer =
2121
| 'img2vec-neural'
22+
| 'multi2vec-nvidia'
2223
| 'multi2vec-clip'
2324
| 'multi2vec-cohere'
2425
| 'multi2vec-bind'
@@ -65,6 +66,32 @@ export type Multi2VecField = {
6566
weight?: number;
6667
};
6768

69+
/** The configuration for multi-media vectorization using the NVIDIA module.
70+
*
71+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/nvidia/embeddings-multimodal) for detailed usage.
72+
*/
73+
export type Multi2VecNvidiaConfig = {
74+
/** The model to use. Defaults to `None`, which uses the server-defined default. */
75+
model?: string;
76+
/** The base URL where API requests should go. */
77+
baseURL?: string;
78+
/** Whether to apply truncation. */
79+
truncation?: boolean;
80+
/** Format in which the embeddings are encoded. Defaults to `None`, so the embeddings are represented as a list of floating-point numbers. */
81+
output_encoding?: string;
82+
/** The image fields used when vectorizing. */
83+
imageFields?: string[];
84+
/** The text fields used when vectorizing. */
85+
textFields?: string[];
86+
/** The weights of the fields used for vectorization. */
87+
weights?: {
88+
/** The weights of the image fields. */
89+
imageFields?: number[];
90+
/** The weights of the text fields. */
91+
textFields?: number[];
92+
};
93+
};
94+
6895
/** The configuration for multi-media vectorization using the CLIP module.
6996
*
7097
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings-multimodal) for detailed usage.
@@ -569,6 +596,8 @@ export type VectorizerConfig =
569596

570597
export type VectorizerConfigType<V> = V extends 'img2vec-neural'
571598
? Img2VecNeuralConfig | undefined
599+
: V extends 'multi2vec-nvidia'
600+
? Multi2VecNvidiaConfig | undefined
572601
: V extends 'multi2vec-clip'
573602
? Multi2VecClipConfig | undefined
574603
: V extends 'multi2vec-cohere'

src/collections/configure/types/vectorizer.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,37 @@ export type ConfigureTextMultiVectorizerOptions<
112112

113113
export type Img2VecNeuralConfigCreate = Img2VecNeuralConfig;
114114

115+
// model: Optional[str] = None,
116+
// truncation: Optional[bool] = None,
117+
// output_encoding: Optional[str],
118+
// vectorize_collection_name: bool = True,
119+
// base_url: Optional[AnyHttpUrl] = None,
120+
// image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
121+
// text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
122+
123+
// model: The model to use. Defaults to `None`, which uses the server-defined default.
124+
// output_encoding: Format in which the embeddings are encoded. Defaults to `None`, so the embeddings are represented as a list of floating-point numbers.
125+
// vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
126+
// base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
127+
// image_fields: The image fields to use in vectorization.
128+
// text_fields: The text fields to use in vectorization.
129+
130+
/** The configuration for the `multi2vec-nvidia` vectorizer. */
131+
export type Multi2VecNvidiaConfigCreate = {
132+
/** The model to use. Defaults to `None`, which uses the server-defined default. */
133+
model?: string;
134+
/** The base URL where API requests should go. */
135+
baseURL?: string;
136+
/** Whether to apply truncation. */
137+
truncation?: boolean;
138+
/** Format in which the embeddings are encoded. Defaults to `None`, so the embeddings are represented as a list of floating-point numbers. */
139+
outputEncoding?: string;
140+
/** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
141+
imageFields?: string[] | Multi2VecField[];
142+
/** The text fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
143+
textFields?: string[] | Multi2VecField[];
144+
};
145+
115146
/** The configuration for the `multi2vec-clip` vectorizer. */
116147
export type Multi2VecClipConfigCreate = {
117148
/** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
@@ -261,6 +292,8 @@ export type Text2MultiVecJinaAIConfigCreate = Text2MultiVecJinaAIConfig;
261292

262293
export type VectorizerConfigCreateType<V> = V extends 'img2vec-neural'
263294
? Img2VecNeuralConfigCreate | undefined
295+
: V extends 'multi2vec-nvidia'
296+
? Multi2VecNvidiaConfigCreate | undefined
264297
: V extends 'multi2vec-clip'
265298
? Multi2VecClipConfigCreate | undefined
266299
: V extends 'multi2vec-cohere'

src/collections/configure/unit.test.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,45 @@ describe('Unit testing of the vectorizer factory class', () => {
639639
},
640640
});
641641
});
642+
it('should create the correct Multi2VecNvidiaConfig type with all values and weights', () => {
643+
const config = configure.vectors.multi2VecNvidia({
644+
name: 'test',
645+
model: 'model-id',
646+
outputEncoding: 'base64',
647+
truncation: true,
648+
baseURL: 'example.com',
649+
imageFields: [
650+
{ name: 'field1', weight: 0.1 },
651+
{ name: 'field2', weight: 0.2 },
652+
],
653+
textFields: [
654+
{ name: 'field3', weight: 0.3 },
655+
{ name: 'field4', weight: 0.4 },
656+
],
657+
});
658+
expect(config).toEqual<VectorConfigCreate<never, 'test', 'hnsw', 'multi2vec-nvidia'>>({
659+
name: 'test',
660+
vectorIndex: {
661+
name: 'hnsw',
662+
config: undefined,
663+
},
664+
vectorizer: {
665+
name: 'multi2vec-nvidia',
666+
config: {
667+
output_encoding: 'base64',
668+
truncation: true,
669+
baseURL: 'example.com',
670+
imageFields: ['field1', 'field2'],
671+
textFields: ['field3', 'field4'],
672+
model: 'model-id',
673+
weights: {
674+
imageFields: [0.1, 0.2],
675+
textFields: [0.3, 0.4],
676+
},
677+
},
678+
},
679+
});
680+
});
642681

643682
it('should create the correct Multi2VecJinaAIConfig type with defaults', () => {
644683
const config = configure.vectors.multi2VecJinaAI();

src/collections/configure/vectorizer.ts

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import {
44
Multi2VecBindConfig,
55
Multi2VecClipConfig,
66
Multi2VecField,
7+
Multi2VecNvidiaConfig,
78
Multi2VecPalmConfig,
89
Multi2VecVoyageAIConfig,
910
VectorIndexType,
@@ -908,9 +909,43 @@ export const vectorizer = legacyVectors;
908909
// Remove deprecated vectorizers and module configuration parameters:
909910
// - PaLM vectorizers are called -Google now.
910911
// - __vectors_shaded hide/rename some parameters
911-
export const vectors = (({ text2VecPalm, multi2VecPalm, ...rest }) => ({ ...rest, ...__vectors_shaded }))(
912-
legacyVectors
913-
);
912+
export const vectors = (({ text2VecPalm, multi2VecPalm, ...rest }) => ({
913+
...rest,
914+
...__vectors_shaded,
915+
916+
/**
917+
* Create a `VectorConfigCreate` object with the vectorizer set to `'multi2vec-nvidia'`.
918+
*
919+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/nvidia/embeddings-multimodal) for detailed usage.
920+
*
921+
* @param {ConfigureNonTextVectorizerOptions<N, I, 'multi2vec-nvidia'>} [opts] The configuration options for the `multi2vec-nvidia` vectorizer.
922+
* @returns {VectorConfigCreate<PrimitiveKeys<T>[], N, I, 'multi2vec-nvidia'>} The configuration object.
923+
*/
924+
multi2VecNvidia: <N extends string | undefined = undefined, I extends VectorIndexType = 'hnsw'>(
925+
opts?: ConfigureNonTextVectorizerOptions<N, I, 'multi2vec-nvidia'>
926+
): VectorConfigCreate<never, N, I, 'multi2vec-nvidia'> => {
927+
const { name, quantizer, vectorIndexConfig, outputEncoding, ...config } = opts || {};
928+
const imageFields = config.imageFields?.map(mapMulti2VecField);
929+
const textFields = config.textFields?.map(mapMulti2VecField);
930+
let weights: Multi2VecNvidiaConfig['weights'] = {};
931+
weights = formatMulti2VecFields(weights, 'imageFields', imageFields);
932+
weights = formatMulti2VecFields(weights, 'textFields', textFields);
933+
return makeVectorizer(name, {
934+
quantizer,
935+
vectorIndexConfig,
936+
vectorizerConfig: {
937+
name: 'multi2vec-nvidia',
938+
config: {
939+
...config,
940+
output_encoding: outputEncoding,
941+
imageFields: imageFields?.map((f) => f.name),
942+
textFields: textFields?.map((f) => f.name),
943+
weights: Object.keys(weights).length === 0 ? undefined : weights,
944+
},
945+
},
946+
});
947+
},
948+
}))(legacyVectors);
914949

915950
export const multiVectors = {
916951
/**

0 commit comments

Comments
 (0)