diff --git a/src/collections/config/types/vectorizer.ts b/src/collections/config/types/vectorizer.ts index 9d9bde93..d533d6ef 100644 --- a/src/collections/config/types/vectorizer.ts +++ b/src/collections/config/types/vectorizer.ts @@ -24,6 +24,7 @@ export type Vectorizer = | 'multi2vec-bind' | Multi2VecPalmVectorizer | 'multi2vec-google' + | 'multi2vec-jinaai' | 'multi2vec-voyageai' | 'ref2vec-centroid' | 'text2vec-aws' @@ -170,7 +171,7 @@ export type Multi2VecGoogleConfig = { videoFields?: string[]; /** The model ID in use. */ modelId?: string; - /** The number of dimensions in use. */ + /** The dimensionality of the vector once embedded. */ dimensions?: number; /** Whether the collection name is vectorized. */ vectorizeCollectionName?: boolean; @@ -185,6 +186,32 @@ export type Multi2VecGoogleConfig = { }; }; +/** The configuration for multi-media vectorization using the Jina module. + * + * See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings-multimodal) for detailed usage. + */ +export type Multi2VecJinaAIConfig = { + /** The base URL to use where API requests should go. */ + baseURL?: string; + /** The dimensionality of the vector once embedded. */ + dimensions?: number; + /** The image fields used when vectorizing. */ + imageFields?: string[]; + /** The model to use. */ + model?: string; + /** The text fields used when vectorizing. */ + textFields?: string[]; + /** Whether the collection name is vectorized. */ + vectorizeCollectionName?: boolean; + /** The weights of the fields used for vectorization. */ + weights?: { + /** The weights of the image fields. */ + imageFields?: number[]; + /** The weights of the text fields. */ + textFields?: number[]; + }; +}; + /** The configuration for multi-media vectorization using the VoyageAI module. * * See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings-multimodal) for detailed usage. @@ -359,7 +386,7 @@ export type Text2VecOllamaConfig = { export type Text2VecOpenAIConfig = { /** The base URL to use where API requests should go. */ baseURL?: string; - /** The dimensions to use. */ + /** The dimensionality of the vector once embedded. */ dimensions?: number; /** The model to use. */ model?: 'text-embedding-3-small' | 'text-embedding-3-large' | 'text-embedding-ada-002' | string; @@ -434,7 +461,7 @@ export type Text2VecVoyageAIConfig = { export type Text2VecWeaviateConfig = { /** The base URL to use where API requests should go. */ baseURL?: string; - /** The dimensions to use. */ + /** The dimensionality of the vector once embedded. */ dimensions?: number; /** The model to use. */ model?: 'Snowflake/snowflake-arctic-embed-m-v1.5' | string; @@ -449,6 +476,7 @@ export type VectorizerConfig = | Multi2VecClipConfig | Multi2VecBindConfig | Multi2VecGoogleConfig + | Multi2VecJinaAIConfig | Multi2VecPalmConfig | Multi2VecVoyageAIConfig | Ref2VecCentroidConfig @@ -478,6 +506,8 @@ export type VectorizerConfigType = V extends 'img2vec-neural' ? Multi2VecBindConfig | undefined : V extends 'multi2vec-google' ? Multi2VecGoogleConfig + : V extends 'multi2vec-jinaai' + ? Multi2VecJinaAIConfig | undefined : V extends Multi2VecPalmVectorizer ? Multi2VecPalmConfig : V extends 'multi2vec-voyageai' diff --git a/src/collections/configure/types/vectorizer.ts b/src/collections/configure/types/vectorizer.ts index b505d3e9..94a9712c 100644 --- a/src/collections/configure/types/vectorizer.ts +++ b/src/collections/configure/types/vectorizer.ts @@ -127,6 +127,19 @@ export type Multi2VecCohereConfigCreate = { vectorizeCollectionName?: boolean; }; +export type Multi2VecJinaAIConfigCreate = { + /** The base URL to use where API requests should go. */ + baseURL?: string; + /** The dimensionality of the vector once embedded. */ + dimensions?: number; + /** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */ + imageFields?: string[] | Multi2VecField[]; + /** The text fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */ + textFields?: string[] | Multi2VecField[]; + /** Whether to vectorize the collection name. */ + vectorizeCollectionName?: boolean; +}; + /** @deprecated Use `Multi2VecGoogleConfigCreate` instead.*/ export type Multi2VecPalmConfigCreate = Multi2VecGoogleConfigCreate; @@ -144,7 +157,7 @@ export type Multi2VecGoogleConfigCreate = { videoFields?: string[] | Multi2VecField[]; /** The model ID to use. */ modelId?: string; - /** The number of dimensions to use. */ + /** The dimensionality of the vector once embedded. */ dimensions?: number; /** Whether to vectorize the collection name. */ vectorizeCollectionName?: boolean; @@ -200,6 +213,8 @@ export type VectorizerConfigCreateType = V extends 'img2vec-neural' ? Multi2VecCohereConfigCreate | undefined : V extends 'multi2vec-bind' ? Multi2VecBindConfigCreate | undefined + : V extends 'multi2vec-jinaai' + ? Multi2VecJinaAIConfigCreate | undefined : V extends 'multi2vec-palm' ? Multi2VecPalmConfigCreate : V extends 'multi2vec-google' diff --git a/src/collections/configure/unit.test.ts b/src/collections/configure/unit.test.ts index 4bd02f1e..5b799d2f 100644 --- a/src/collections/configure/unit.test.ts +++ b/src/collections/configure/unit.test.ts @@ -621,7 +621,53 @@ describe('Unit testing of the vectorizer factory class', () => { }, }); }); - + it('should create the correct Multi2VecJinaAIConfig type with defaults', () => { + const config = configure.vectorizer.multi2VecJinaAI(); + expect(config).toEqual>({ + name: undefined, + vectorIndex: { + name: 'hnsw', + config: undefined, + }, + vectorizer: { + name: 'multi2vec-jinaai', + config: undefined, + }, + }); + }); + it('should create the correct Multi2VecJinaAIConfig type with all values and weights', () => { + const config = configure.vectorizer.multi2VecJinaAI({ + name: 'test', + imageFields: [ + { name: 'field1', weight: 0.1 }, + { name: 'field2', weight: 0.2 }, + ], + textFields: [ + { name: 'field3', weight: 0.3 }, + { name: 'field4', weight: 0.4 }, + ], + vectorizeCollectionName: true, + }); + expect(config).toEqual>({ + name: 'test', + vectorIndex: { + name: 'hnsw', + config: undefined, + }, + vectorizer: { + name: 'multi2vec-jinaai', + config: { + imageFields: ['field1', 'field2'], + textFields: ['field3', 'field4'], + vectorizeCollectionName: true, + weights: { + imageFields: [0.1, 0.2], + textFields: [0.3, 0.4], + }, + }, + }, + }); + }); it('should create the correct Multi2VecPalmConfig type using deprecated method with defaults', () => { const config = configure.vectorizer.multi2VecPalm({ projectId: 'project-id', diff --git a/src/collections/configure/vectorizer.ts b/src/collections/configure/vectorizer.ts index b4721edd..a6901b4e 100644 --- a/src/collections/configure/vectorizer.ts +++ b/src/collections/configure/vectorizer.ts @@ -196,6 +196,39 @@ export const vectorizer = { }, }); }, + /** + * Create a `VectorConfigCreate` object with the vectorizer set to `'multi2vec-jinaai'`. + * + * See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings-multimodal) for detailed usage. + * + * @param {ConfigureNonTextVectorizerOptions} [opts] The configuration options for the `multi2vec-jinaai` vectorizer. + * @returns {VectorConfigCreate[], N, I, 'multi2vec-jinaai'>} The configuration object. + */ + multi2VecJinaAI: ( + opts?: ConfigureNonTextVectorizerOptions + ): VectorConfigCreate => { + const { name, vectorIndexConfig, ...config } = opts || {}; + const imageFields = config.imageFields?.map(mapMulti2VecField); + const textFields = config.textFields?.map(mapMulti2VecField); + let weights: Multi2VecBindConfig['weights'] = {}; + weights = formatMulti2VecFields(weights, 'imageFields', imageFields); + weights = formatMulti2VecFields(weights, 'textFields', textFields); + return makeVectorizer(name, { + vectorIndexConfig, + vectorizerConfig: { + name: 'multi2vec-jinaai', + config: + Object.keys(config).length === 0 + ? undefined + : { + ...config, + imageFields: imageFields?.map((f) => f.name), + textFields: textFields?.map((f) => f.name), + weights: Object.keys(weights).length === 0 ? undefined : weights, + }, + }, + }); + }, /** * Create a `VectorConfigCreate` object with the vectorizer set to `'multi2vec-palm'`. *