From f2287702e8f6df76f9314878adac73ab861297e1 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 3 Jan 2025 13:39:37 +0000 Subject: [PATCH] Fix inputs and mappings for JinaAI and VoyageAI modules --- src/collections/config/types/vectorizer.ts | 23 +++++-- src/collections/configure/types/vectorizer.ts | 18 ++++-- src/collections/configure/unit.test.ts | 64 ++++++++++++++++--- src/collections/configure/vectorizer.ts | 29 +++++---- 4 files changed, 103 insertions(+), 31 deletions(-) diff --git a/src/collections/config/types/vectorizer.ts b/src/collections/config/types/vectorizer.ts index d533d6ef..b2e7cc82 100644 --- a/src/collections/config/types/vectorizer.ts +++ b/src/collections/config/types/vectorizer.ts @@ -34,7 +34,7 @@ export type Vectorizer = | 'text2vec-databricks' | 'text2vec-gpt4all' | 'text2vec-huggingface' - | 'text2vec-jina' + | 'text2vec-jinaai' | 'text2vec-mistral' | 'text2vec-ollama' | 'text2vec-openai' @@ -217,10 +217,18 @@ export type Multi2VecJinaAIConfig = { * See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings-multimodal) for detailed usage. */ export type Multi2VecVoyageAIConfig = { + /** The base URL to use where API requests should go. */ + baseURL?: string; /** The image fields used when vectorizing. */ imageFields?: string[]; + /** The model to use. */ + model?: string; /** The text fields used when vectorizing. */ textFields?: string[]; + /** Whether the input should be truncated to fit in the context window. */ + truncate?: boolean; + /** Whether the collection name is vectorized. */ + vectorizeCollectionName?: boolean; /** The weights of the fields used for vectorization. */ weights?: { /** The weights of the image fields. */ @@ -282,7 +290,7 @@ export type Text2VecCohereConfig = { baseURL?: string; /** The model to use. */ model?: string; - /** The truncation strategy to use. */ + /** Whether to truncate the input texts to fit within the context length. */ truncate?: boolean; /** Whether to vectorize the collection name. */ vectorizeCollectionName?: boolean; @@ -345,13 +353,16 @@ export type Text2VecHuggingFaceConfig = { * * See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings) for detailed usage. */ -export type Text2VecJinaConfig = { +export type Text2VecJinaAIConfig = { /** The model to use. */ model?: 'jina-embeddings-v2-base-en' | 'jina-embeddings-v2-small-en' | string; /** Whether to vectorize the collection name. */ vectorizeCollectionName?: boolean; }; +/** @deprecated Use `Text2VecJinaAIConfig` instead. */ +export type Text2VecJinaConfig = Text2VecJinaAIConfig; + /** * The configuration for text vectorization using the Mistral module. * @@ -488,7 +499,7 @@ export type VectorizerConfig = | Text2VecGoogleConfig | Text2VecGPT4AllConfig | Text2VecHuggingFaceConfig - | Text2VecJinaConfig + | Text2VecJinaAIConfig | Text2VecOpenAIConfig | Text2VecPalmConfig | Text2VecTransformersConfig @@ -528,8 +539,8 @@ export type VectorizerConfigType = V extends 'img2vec-neural' ? Text2VecGPT4AllConfig | undefined : V extends 'text2vec-huggingface' ? Text2VecHuggingFaceConfig | undefined - : V extends 'text2vec-jina' - ? Text2VecJinaConfig | undefined + : V extends 'text2vec-jinaai' + ? Text2VecJinaAIConfig | undefined : V extends 'text2vec-mistral' ? Text2VecMistralConfig | undefined : V extends 'text2vec-ollama' diff --git a/src/collections/configure/types/vectorizer.ts b/src/collections/configure/types/vectorizer.ts index 94a9712c..5318e8f4 100644 --- a/src/collections/configure/types/vectorizer.ts +++ b/src/collections/configure/types/vectorizer.ts @@ -11,7 +11,7 @@ import { Text2VecGPT4AllConfig, Text2VecGoogleConfig, Text2VecHuggingFaceConfig, - Text2VecJinaConfig, + Text2VecJinaAIConfig, Text2VecMistralConfig, Text2VecOllamaConfig, Text2VecOpenAIConfig, @@ -132,6 +132,8 @@ export type Multi2VecJinaAIConfigCreate = { baseURL?: string; /** The dimensionality of the vector once embedded. */ dimensions?: number; + /** The model to use. */ + model?: string; /** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */ imageFields?: string[] | Multi2VecField[]; /** The text fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */ @@ -164,10 +166,18 @@ export type Multi2VecGoogleConfigCreate = { }; export type Multi2VecVoyageAIConfigCreate = { + /** The base URL to use where API requests should go. */ + baseURL?: string; /** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */ imageFields?: string[] | Multi2VecField[]; + /** The model to use. */ + model?: string; /** The text fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */ textFields?: string[] | Multi2VecField[]; + /** Whether the input should be truncated to fit the context window. */ + truncate?: boolean; + /** Whether to vectorize the collection name. */ + vectorizeCollectionName?: boolean; }; export type Ref2VecCentroidConfigCreate = Ref2VecCentroidConfig; @@ -186,7 +196,7 @@ export type Text2VecGPT4AllConfigCreate = Text2VecGPT4AllConfig; export type Text2VecHuggingFaceConfigCreate = Text2VecHuggingFaceConfig; -export type Text2VecJinaConfigCreate = Text2VecJinaConfig; +export type Text2VecJinaAIConfigCreate = Text2VecJinaAIConfig; export type Text2VecMistralConfigCreate = Text2VecMistralConfig; @@ -235,8 +245,8 @@ export type VectorizerConfigCreateType = V extends 'img2vec-neural' ? Text2VecGPT4AllConfigCreate | undefined : V extends 'text2vec-huggingface' ? Text2VecHuggingFaceConfigCreate | undefined - : V extends 'text2vec-jina' - ? Text2VecJinaConfigCreate | undefined + : V extends 'text2vec-jinaai' + ? Text2VecJinaAIConfigCreate | undefined : V extends 'text2vec-mistral' ? Text2VecMistralConfigCreate | undefined : V extends 'text2vec-ollama' diff --git a/src/collections/configure/unit.test.ts b/src/collections/configure/unit.test.ts index 5b799d2f..9af374a5 100644 --- a/src/collections/configure/unit.test.ts +++ b/src/collections/configure/unit.test.ts @@ -621,6 +621,7 @@ describe('Unit testing of the vectorizer factory class', () => { }, }); }); + it('should create the correct Multi2VecJinaAIConfig type with defaults', () => { const config = configure.vectorizer.multi2VecJinaAI(); expect(config).toEqual>({ @@ -635,6 +636,7 @@ describe('Unit testing of the vectorizer factory class', () => { }, }); }); + it('should create the correct Multi2VecJinaAIConfig type with all values and weights', () => { const config = configure.vectorizer.multi2VecJinaAI({ name: 'test', @@ -668,6 +670,7 @@ describe('Unit testing of the vectorizer factory class', () => { }, }); }); + it('should create the correct Multi2VecPalmConfig type using deprecated method with defaults', () => { const config = configure.vectorizer.multi2VecPalm({ projectId: 'project-id', @@ -771,6 +774,51 @@ describe('Unit testing of the vectorizer factory class', () => { }); }); + it('should create the correct Multi2VecVoyageAIConfig type with defaults', () => { + const config = configure.vectorizer.multi2VecVoyageAI(); + expect(config).toEqual>({ + name: undefined, + vectorIndex: { + name: 'hnsw', + config: undefined, + }, + vectorizer: { + name: 'multi2vec-voyageai', + config: undefined, + }, + }); + }); + + it('should create the correct Multi2VecVoyageAIConfig type with all values', () => { + const config = configure.vectorizer.multi2VecVoyageAI({ + baseURL: 'base-url', + model: 'model', + name: 'test', + truncate: true, + imageFields: ['field1', 'field2'], + textFields: ['field3', 'field4'], + vectorizeCollectionName: true, + }); + expect(config).toEqual>({ + name: 'test', + vectorIndex: { + name: 'hnsw', + config: undefined, + }, + vectorizer: { + name: 'multi2vec-voyageai', + config: { + baseURL: 'base-url', + model: 'model', + truncate: true, + imageFields: ['field1', 'field2'], + textFields: ['field3', 'field4'], + vectorizeCollectionName: true, + }, + }, + }); + }); + it('should create the correct Text2VecAWSConfig type with defaults', () => { const config = configure.vectorizer.text2VecAWS({ region: 'region', @@ -1071,35 +1119,35 @@ describe('Unit testing of the vectorizer factory class', () => { }); }); - it('should create the correct Text2VecJinaConfig type with defaults', () => { - const config = configure.vectorizer.text2VecJina(); - expect(config).toEqual>({ + it('should create the correct Text2VecJinaAIConfig type with defaults', () => { + const config = configure.vectorizer.text2VecJinaAI(); + expect(config).toEqual>({ name: undefined, vectorIndex: { name: 'hnsw', config: undefined, }, vectorizer: { - name: 'text2vec-jina', + name: 'text2vec-jinaai', config: undefined, }, }); }); - it('should create the correct Text2VecJinaConfig type with all values', () => { - const config = configure.vectorizer.text2VecJina({ + it('should create the correct Text2VecJinaAIConfig type with all values', () => { + const config = configure.vectorizer.text2VecJinaAI({ name: 'test', model: 'model', vectorizeCollectionName: true, }); - expect(config).toEqual>({ + expect(config).toEqual>({ name: 'test', vectorIndex: { name: 'hnsw', config: undefined, }, vectorizer: { - name: 'text2vec-jina', + name: 'text2vec-jinaai', config: { model: 'model', vectorizeCollectionName: true, diff --git a/src/collections/configure/vectorizer.ts b/src/collections/configure/vectorizer.ts index a6901b4e..c0442658 100644 --- a/src/collections/configure/vectorizer.ts +++ b/src/collections/configure/vectorizer.ts @@ -318,12 +318,15 @@ export const vectorizer = { vectorIndexConfig, vectorizerConfig: { name: 'multi2vec-voyageai', - config: { - ...config, - imageFields: imageFields?.map((f) => f.name), - textFields: textFields?.map((f) => f.name), - weights: Object.keys(weights).length === 0 ? undefined : weights, - }, + config: + Object.keys(config).length === 0 + ? undefined + : { + ...config, + imageFields: imageFields?.map((f) => f.name), + textFields: textFields?.map((f) => f.name), + weights: Object.keys(weights).length === 0 ? undefined : weights, + }, }, }); }, @@ -495,22 +498,22 @@ export const vectorizer = { }); }, /** - * Create a `VectorConfigCreate` object with the vectorizer set to `'text2vec-jina'`. + * Create a `VectorConfigCreate` object with the vectorizer set to `'text2vec-jinaai'`. * * See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings) for detailed usage. * - * @param {ConfigureTextVectorizerOptions} [opts] The configuration for the `text2vec-jina` vectorizer. - * @returns {VectorConfigCreate, N, I, 'text2vec-jina'>} The configuration object. + * @param {ConfigureTextVectorizerOptions} [opts] The configuration for the `text2vec-jinaai` vectorizer. + * @returns {VectorConfigCreate, N, I, 'text2vec-jinaai'>} The configuration object. */ - text2VecJina: ( - opts?: ConfigureTextVectorizerOptions - ): VectorConfigCreate, N, I, 'text2vec-jina'> => { + text2VecJinaAI: ( + opts?: ConfigureTextVectorizerOptions + ): VectorConfigCreate, N, I, 'text2vec-jinaai'> => { const { name, sourceProperties, vectorIndexConfig, ...config } = opts || {}; return makeVectorizer(name, { sourceProperties, vectorIndexConfig, vectorizerConfig: { - name: 'text2vec-jina', + name: 'text2vec-jinaai', config: Object.keys(config).length === 0 ? undefined : config, }, });