feat: support multi2vec-nvidia vectorizer

bevzzz · bevzzz · commit 98aefb123bf3 · 2025-07-22T13:46:48.000+02:00
diff --git a/src/collections/config/types/vectorizer.ts b/src/collections/config/types/vectorizer.ts
@@ -19,6 +19,7 @@ type Text2VecPalmVectorizer = 'text2vec-palm';
 
 export type Vectorizer =
   | 'img2vec-neural'
+  | 'multi2vec-nvidia'
   | 'multi2vec-clip'
   | 'multi2vec-cohere'
   | 'multi2vec-bind'
@@ -65,6 +66,32 @@ export type Multi2VecField = {
   weight?: number;
 };
 
+/** The configuration for multi-media vectorization using the NVIDIA module.
+ *
+ * See the [documentation](https://weaviate.io/developers/weaviate/model-providers/nvidia/embeddings-multimodal) for detailed usage.
+ */
+export type Multi2VecNvidiaConfig = {
+  /** The model to use. Defaults to `None`, which uses the server-defined default. */
+  model?: string;
+  /** The base URL where API requests should go. */
+  baseURL?: string;
+  /** Whether to apply truncation. */
+  truncation?: boolean;
+  /** Format in which the embeddings are encoded. Defaults to `None`, so the embeddings are represented as a list of floating-point numbers. */
+  output_encoding?: string;
+  /** The image fields used when vectorizing. */
+  imageFields?: string[];
+  /** The text fields used when vectorizing. */
+  textFields?: string[];
+  /** The weights of the fields used for vectorization. */
+  weights?: {
+    /** The weights of the image fields. */
+    imageFields?: number[];
+    /** The weights of the text fields. */
+    textFields?: number[];
+  };
+};
+
 /** The configuration for multi-media vectorization using the CLIP module.
  *
  * See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings-multimodal) for detailed usage.
@@ -569,6 +596,8 @@ export type VectorizerConfig =
 
 export type VectorizerConfigType<V> = V extends 'img2vec-neural'
   ? Img2VecNeuralConfig | undefined
+  : V extends 'multi2vec-nvidia'
+  ? Multi2VecNvidiaConfig | undefined
   : V extends 'multi2vec-clip'
   ? Multi2VecClipConfig | undefined
   : V extends 'multi2vec-cohere'
diff --git a/src/collections/configure/types/vectorizer.ts b/src/collections/configure/types/vectorizer.ts
@@ -112,6 +112,37 @@ export type ConfigureTextMultiVectorizerOptions<
 
 export type Img2VecNeuralConfigCreate = Img2VecNeuralConfig;
 
+// model: Optional[str] = None,
+// truncation: Optional[bool] = None,
+// output_encoding: Optional[str],
+// vectorize_collection_name: bool = True,
+// base_url: Optional[AnyHttpUrl] = None,
+// image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
+// text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
+
+// model: The model to use. Defaults to `None`, which uses the server-defined default.
+// output_encoding: Format in which the embeddings are encoded. Defaults to `None`, so the embeddings are represented as a list of floating-point numbers.
+// vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
+// base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
+// image_fields: The image fields to use in vectorization.
+// text_fields: The text fields to use in vectorization.
+
+/** The configuration for the `multi2vec-nvidia` vectorizer. */
+export type Multi2VecNvidiaConfigCreate = {
+  /** The model to use. Defaults to `None`, which uses the server-defined default. */
+  model?: string;
+  /** The base URL where API requests should go. */
+  baseURL?: string;
+  /** Whether to apply truncation. */
+  truncation?: boolean;
+  /** Format in which the embeddings are encoded. Defaults to `None`, so the embeddings are represented as a list of floating-point numbers. */
+  outputEncoding?: string;
+  /** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
+  imageFields?: string[] | Multi2VecField[];
+  /** The text fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
+  textFields?: string[] | Multi2VecField[];
+};
+
 /** The configuration for the `multi2vec-clip` vectorizer. */
 export type Multi2VecClipConfigCreate = {
   /** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
@@ -261,6 +292,8 @@ export type Text2MultiVecJinaAIConfigCreate = Text2MultiVecJinaAIConfig;
 
 export type VectorizerConfigCreateType<V> = V extends 'img2vec-neural'
   ? Img2VecNeuralConfigCreate | undefined
+  : V extends 'multi2vec-nvidia'
+  ? Multi2VecNvidiaConfigCreate | undefined
   : V extends 'multi2vec-clip'
   ? Multi2VecClipConfigCreate | undefined
   : V extends 'multi2vec-cohere'
diff --git a/src/collections/configure/unit.test.ts b/src/collections/configure/unit.test.ts
@@ -639,6 +639,45 @@ describe('Unit testing of the vectorizer factory class', () => {
       },
     });
   });
+  it('should create the correct Multi2VecNvidiaConfig type with all values and weights', () => {
+    const config = configure.vectors.multi2VecNvidia({
+      name: 'test',
+      model: 'model-id',
+      outputEncoding: 'base64',
+      truncation: true,
+      baseURL: 'example.com',
+      imageFields: [
+        { name: 'field1', weight: 0.1 },
+        { name: 'field2', weight: 0.2 },
+      ],
+      textFields: [
+        { name: 'field3', weight: 0.3 },
+        { name: 'field4', weight: 0.4 },
+      ],
+    });
+    expect(config).toEqual<VectorConfigCreate<never, 'test', 'hnsw', 'multi2vec-nvidia'>>({
+      name: 'test',
+      vectorIndex: {
+        name: 'hnsw',
+        config: undefined,
+      },
+      vectorizer: {
+        name: 'multi2vec-nvidia',
+        config: {
+          output_encoding: 'base64',
+          truncation: true,
+          baseURL: 'example.com',
+          imageFields: ['field1', 'field2'],
+          textFields: ['field3', 'field4'],
+          model: 'model-id',
+          weights: {
+            imageFields: [0.1, 0.2],
+            textFields: [0.3, 0.4],
+          },
+        },
+      },
+    });
+  });
 
   it('should create the correct Multi2VecJinaAIConfig type with defaults', () => {
     const config = configure.vectors.multi2VecJinaAI();
diff --git a/src/collections/configure/vectorizer.ts b/src/collections/configure/vectorizer.ts
@@ -4,6 +4,7 @@ import {
   Multi2VecBindConfig,
   Multi2VecClipConfig,
   Multi2VecField,
+  Multi2VecNvidiaConfig,
   Multi2VecPalmConfig,
   Multi2VecVoyageAIConfig,
   VectorIndexType,
@@ -908,9 +909,43 @@ export const vectorizer = legacyVectors;
 // Remove deprecated vectorizers and module configuration parameters:
 // - PaLM vectorizers are called -Google now.
 // - __vectors_shaded hide/rename some parameters
-export const vectors = (({ text2VecPalm, multi2VecPalm, ...rest }) => ({ ...rest, ...__vectors_shaded }))(
-  legacyVectors
-);
+export const vectors = (({ text2VecPalm, multi2VecPalm, ...rest }) => ({
+  ...rest,
+  ...__vectors_shaded,
+
+  /**
+   * Create a `VectorConfigCreate` object with the vectorizer set to `'multi2vec-nvidia'`.
+   *
+   * See the [documentation](https://weaviate.io/developers/weaviate/model-providers/nvidia/embeddings-multimodal) for detailed usage.
+   *
+   * @param {ConfigureNonTextVectorizerOptions<N, I, 'multi2vec-nvidia'>} [opts] The configuration options for the `multi2vec-nvidia` vectorizer.
+   * @returns {VectorConfigCreate<PrimitiveKeys<T>[], N, I, 'multi2vec-nvidia'>} The configuration object.
+   */
+  multi2VecNvidia: <N extends string | undefined = undefined, I extends VectorIndexType = 'hnsw'>(
+    opts?: ConfigureNonTextVectorizerOptions<N, I, 'multi2vec-nvidia'>
+  ): VectorConfigCreate<never, N, I, 'multi2vec-nvidia'> => {
+    const { name, quantizer, vectorIndexConfig, outputEncoding, ...config } = opts || {};
+    const imageFields = config.imageFields?.map(mapMulti2VecField);
+    const textFields = config.textFields?.map(mapMulti2VecField);
+    let weights: Multi2VecNvidiaConfig['weights'] = {};
+    weights = formatMulti2VecFields(weights, 'imageFields', imageFields);
+    weights = formatMulti2VecFields(weights, 'textFields', textFields);
+    return makeVectorizer(name, {
+      quantizer,
+      vectorIndexConfig,
+      vectorizerConfig: {
+        name: 'multi2vec-nvidia',
+        config: {
+          ...config,
+          output_encoding: outputEncoding,
+          imageFields: imageFields?.map((f) => f.name),
+          textFields: textFields?.map((f) => f.name),
+          weights: Object.keys(weights).length === 0 ? undefined : weights,
+        },
+      },
+    });
+  },
+}))(legacyVectors);
 
 export const multiVectors = {
   /**