Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 32 additions & 3 deletions src/collections/config/types/vectorizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export type Vectorizer =
| 'multi2vec-bind'
| Multi2VecPalmVectorizer
| 'multi2vec-google'
| 'multi2vec-jina'
| 'multi2vec-voyageai'
| 'ref2vec-centroid'
| 'text2vec-aws'
Expand Down Expand Up @@ -170,7 +171,7 @@ export type Multi2VecGoogleConfig = {
videoFields?: string[];
/** The model ID in use. */
modelId?: string;
/** The number of dimensions in use. */
/** The dimensionality of the vector once embedded. */
dimensions?: number;
/** Whether the collection name is vectorized. */
vectorizeCollectionName?: boolean;
Expand All @@ -185,6 +186,32 @@ export type Multi2VecGoogleConfig = {
};
};

/** The configuration for multi-media vectorization using the Jina module.
*
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings-multimodal) for detailed usage.
*/
export type Multi2VecJinaConfig = {
/** The base URL to use where API requests should go. */
baseURL?: string;
/** The dimensionality of the vector once embedded. */
dimensions?: number;
/** The image fields used when vectorizing. */
imageFields?: string[];
/** The model to use. */
model?: string;
/** The text fields used when vectorizing. */
textFields?: string[];
/** Whether the collection name is vectorized. */
vectorizeCollectionName?: boolean;
/** The weights of the fields used for vectorization. */
weights?: {
/** The weights of the image fields. */
imageFields?: number[];
/** The weights of the text fields. */
textFields?: number[];
};
};

/** The configuration for multi-media vectorization using the VoyageAI module.
*
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings-multimodal) for detailed usage.
Expand Down Expand Up @@ -359,7 +386,7 @@ export type Text2VecOllamaConfig = {
export type Text2VecOpenAIConfig = {
/** The base URL to use where API requests should go. */
baseURL?: string;
/** The dimensions to use. */
/** The dimensionality of the vector once embedded. */
dimensions?: number;
/** The model to use. */
model?: 'text-embedding-3-small' | 'text-embedding-3-large' | 'text-embedding-ada-002' | string;
Expand Down Expand Up @@ -434,7 +461,7 @@ export type Text2VecVoyageAIConfig = {
export type Text2VecWeaviateConfig = {
/** The base URL to use where API requests should go. */
baseURL?: string;
/** The dimensions to use. */
/** The dimensionality of the vector once embedded. */
dimensions?: number;
/** The model to use. */
model?: 'Snowflake/snowflake-arctic-embed-m-v1.5' | string;
Expand Down Expand Up @@ -478,6 +505,8 @@ export type VectorizerConfigType<V> = V extends 'img2vec-neural'
? Multi2VecBindConfig | undefined
: V extends 'multi2vec-google'
? Multi2VecGoogleConfig
: V extends 'multi2vec-jina'
? Multi2VecJinaConfig | undefined
: V extends Multi2VecPalmVectorizer
? Multi2VecPalmConfig
: V extends 'multi2vec-voyageai'
Expand Down
17 changes: 16 additions & 1 deletion src/collections/configure/types/vectorizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,19 @@ export type Multi2VecCohereConfigCreate = {
vectorizeCollectionName?: boolean;
};

export type Multi2VecJinaConfigCreate = {
/** The base URL to use where API requests should go. */
baseURL?: string;
/** The dimensionality of the vector once embedded. */
dimensions?: number;
/** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
imageFields?: string[] | Multi2VecField[];
/** The text fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
textFields?: string[] | Multi2VecField[];
/** Whether to vectorize the collection name. */
vectorizeCollectionName?: boolean;
};

/** @deprecated Use `Multi2VecGoogleConfigCreate` instead.*/
export type Multi2VecPalmConfigCreate = Multi2VecGoogleConfigCreate;

Expand All @@ -144,7 +157,7 @@ export type Multi2VecGoogleConfigCreate = {
videoFields?: string[] | Multi2VecField[];
/** The model ID to use. */
modelId?: string;
/** The number of dimensions to use. */
/** The dimensionality of the vector once embedded. */
dimensions?: number;
/** Whether to vectorize the collection name. */
vectorizeCollectionName?: boolean;
Expand Down Expand Up @@ -200,6 +213,8 @@ export type VectorizerConfigCreateType<V> = V extends 'img2vec-neural'
? Multi2VecCohereConfigCreate | undefined
: V extends 'multi2vec-bind'
? Multi2VecBindConfigCreate | undefined
: V extends 'multi2vec-jina'
? Multi2VecJinaConfigCreate | undefined
: V extends 'multi2vec-palm'
? Multi2VecPalmConfigCreate
: V extends 'multi2vec-google'
Expand Down
48 changes: 47 additions & 1 deletion src/collections/configure/unit.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,53 @@ describe('Unit testing of the vectorizer factory class', () => {
},
});
});

it('should create the correct Multi2VecJinaConfig type with defaults', () => {
const config = configure.vectorizer.multi2VecJina();
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'multi2vec-jina'>>({
name: undefined,
vectorIndex: {
name: 'hnsw',
config: undefined,
},
vectorizer: {
name: 'multi2vec-jina',
config: undefined,
},
});
});
it('should create the correct Multi2VecJinaConfig type with all values and weights', () => {
const config = configure.vectorizer.multi2VecJina({
name: 'test',
imageFields: [
{ name: 'field1', weight: 0.1 },
{ name: 'field2', weight: 0.2 },
],
textFields: [
{ name: 'field3', weight: 0.3 },
{ name: 'field4', weight: 0.4 },
],
vectorizeCollectionName: true,
});
expect(config).toEqual<VectorConfigCreate<never, 'test', 'hnsw', 'multi2vec-jina'>>({
name: 'test',
vectorIndex: {
name: 'hnsw',
config: undefined,
},
vectorizer: {
name: 'multi2vec-jina',
config: {
imageFields: ['field1', 'field2'],
textFields: ['field3', 'field4'],
vectorizeCollectionName: true,
weights: {
imageFields: [0.1, 0.2],
textFields: [0.3, 0.4],
},
},
},
});
});
it('should create the correct Multi2VecPalmConfig type using deprecated method with defaults', () => {
const config = configure.vectorizer.multi2VecPalm({
projectId: 'project-id',
Expand Down
33 changes: 33 additions & 0 deletions src/collections/configure/vectorizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,39 @@ export const vectorizer = {
},
});
},
/**
* Create a `VectorConfigCreate` object with the vectorizer set to `'multi2vec-jina'`.
*
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings-multimodal) for detailed usage.
*
* @param {ConfigureNonTextVectorizerOptions<N, I, 'multi2vec-jina'>} [opts] The configuration options for the `multi2vec-jina` vectorizer.
* @returns {VectorConfigCreate<PrimitiveKeys<T>[], N, I, 'multi2vec-jina'>} The configuration object.
*/
multi2VecJina: <N extends string | undefined = undefined, I extends VectorIndexType = 'hnsw'>(
opts?: ConfigureNonTextVectorizerOptions<N, I, 'multi2vec-jina'>
): VectorConfigCreate<never, N, I, 'multi2vec-jina'> => {
const { name, vectorIndexConfig, ...config } = opts || {};
const imageFields = config.imageFields?.map(mapMulti2VecField);
const textFields = config.textFields?.map(mapMulti2VecField);
let weights: Multi2VecBindConfig['weights'] = {};
weights = formatMulti2VecFields(weights, 'imageFields', imageFields);
weights = formatMulti2VecFields(weights, 'textFields', textFields);
return makeVectorizer(name, {
vectorIndexConfig,
vectorizerConfig: {
name: 'multi2vec-jina',
config:
Object.keys(config).length === 0
? undefined
: {
...config,
imageFields: imageFields?.map((f) => f.name),
textFields: textFields?.map((f) => f.name),
weights: Object.keys(weights).length === 0 ? undefined : weights,
},
},
});
},
/**
* Create a `VectorConfigCreate` object with the vectorizer set to `'multi2vec-palm'`.
*
Expand Down
Loading