Skip to content

Commit d15dc68

Browse files
authored
Make improvements to the modules system (#327)
* Make improvements to the modules system: - deprecate `text2vec-contextionary` - add `text2vec-model2vec`, which is a replacement for `text2vec-contextionary` - add `dimensions` parameter to `text2vec-transformers` * Add `dimensions` to `text2vec-transformers` test
1 parent c360306 commit d15dc68

File tree

4 files changed

+89
-7
lines changed

4 files changed

+89
-7
lines changed

src/collections/config/types/vectorizer.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ export type Vectorizer =
3939
| 'text2vec-jinaai'
4040
| 'text2vec-nvidia'
4141
| 'text2vec-mistral'
42+
| 'text2vec-model2vec'
4243
| 'text2vec-ollama'
4344
| 'text2vec-openai'
4445
| Text2VecPalmVectorizer
@@ -522,6 +523,8 @@ export type Text2VecGoogleConfig = {
522523
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings) for detailed usage.
523524
*/
524525
export type Text2VecTransformersConfig = {
526+
/** The number of dimensions for the generated embeddings. */
527+
dimensions?: number;
525528
/** The inference url to use where API requests should go. You can use either this OR (`passage_inference_url` & `query_inference_url`). */
526529
inferenceUrl?: string;
527530
/** The inference url to use where passage API requests should go. You can use either (this AND query_inference_url) OR `inference_url`. */
@@ -566,6 +569,16 @@ export type Text2VecWeaviateConfig = {
566569
vectorizeCollectionName?: boolean;
567570
};
568571

572+
/**
573+
* The configuration for text vectorization using the Model2Vec module.
574+
*/
575+
export type Text2VecModel2Vec = {
576+
/** The URL to use where API requests should go. */
577+
inferenceURL?: string;
578+
/** Whether to vectorize the collection name. */
579+
vectorizeCollectionName?: boolean;
580+
};
581+
569582
export type NoVectorizerConfig = {};
570583

571584
export type VectorizerConfig =
@@ -586,6 +599,7 @@ export type VectorizerConfig =
586599
| Text2VecGoogleConfig
587600
| Text2VecGPT4AllConfig
588601
| Text2VecHuggingFaceConfig
602+
| Text2VecModel2Vec
589603
| Text2VecJinaAIConfig
590604
| Text2VecOpenAIConfig
591605
| Text2VecPalmConfig
@@ -636,6 +650,8 @@ export type VectorizerConfigType<V> = V extends 'img2vec-neural'
636650
? Text2VecNvidiaConfig | undefined
637651
: V extends 'text2vec-mistral'
638652
? Text2VecMistralConfig | undefined
653+
: V extends 'text2vec-model2vec'
654+
? Text2VecModel2Vec | undefined
639655
: V extends 'text2vec-ollama'
640656
? Text2VecOllamaConfig | undefined
641657
: V extends 'text2vec-openai'

src/collections/configure/types/vectorizer.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import {
1414
Text2VecHuggingFaceConfig,
1515
Text2VecJinaAIConfig,
1616
Text2VecMistralConfig,
17+
Text2VecModel2Vec,
1718
Text2VecNvidiaConfig,
1819
Text2VecOllamaConfig,
1920
Text2VecOpenAIConfig,
@@ -273,6 +274,8 @@ export type Text2VecNvidiaConfigCreate = Text2VecNvidiaConfig;
273274

274275
export type Text2VecMistralConfigCreate = Text2VecMistralConfig;
275276

277+
export type Text2VecModel2VecConfigCreate = Text2VecModel2Vec;
278+
276279
export type Text2VecOllamaConfigCreate = Text2VecOllamaConfig;
277280

278281
export type Text2VecOpenAIConfigCreate = Text2VecOpenAIConfig;
@@ -330,6 +333,8 @@ export type VectorizerConfigCreateType<V> = V extends 'img2vec-neural'
330333
? Text2VecNvidiaConfigCreate | undefined
331334
: V extends 'text2vec-mistral'
332335
? Text2VecMistralConfigCreate | undefined
336+
: V extends 'text2vec-model2vec'
337+
? Text2VecModel2VecConfigCreate | undefined
333338
: V extends 'text2vec-ollama'
334339
? Text2VecOllamaConfigCreate | undefined
335340
: V extends 'text2vec-openai'

src/collections/configure/unit.test.ts

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1463,6 +1463,7 @@ describe('Unit testing of the vectorizer factory class', () => {
14631463
const config = configure.vectors.text2VecTransformers({
14641464
name: 'test',
14651465
poolingStrategy: 'pooling-strategy',
1466+
dimensions: 512,
14661467
});
14671468
expect(config).toEqual<VectorConfigCreate<never, 'test', 'hnsw', 'text2vec-transformers'>>({
14681469
name: 'test',
@@ -1474,6 +1475,7 @@ describe('Unit testing of the vectorizer factory class', () => {
14741475
name: 'text2vec-transformers',
14751476
config: {
14761477
poolingStrategy: 'pooling-strategy',
1478+
dimensions: 512,
14771479
},
14781480
},
14791481
});
@@ -1567,12 +1569,49 @@ describe('Unit testing of the vectorizer factory class', () => {
15671569
},
15681570
});
15691571
});
1570-
});
15711572

1572-
it('should alias "selfProvided" to "none"', () => {
1573-
expect(configure.vectors.selfProvided()).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'none'>>(
1574-
configure.vectors.none()
1575-
);
1573+
it('should alias "selfProvided" to "none"', () => {
1574+
expect(configure.vectors.selfProvided()).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'none'>>(
1575+
configure.vectors.none()
1576+
);
1577+
});
1578+
1579+
it('should create the correct Text2VecModel2VecConfig type with defaults', () => {
1580+
const config = configure.vectors.text2VecModel2Vec();
1581+
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'text2vec-model2vec'>>({
1582+
name: undefined,
1583+
vectorIndex: {
1584+
name: 'hnsw',
1585+
config: undefined,
1586+
},
1587+
vectorizer: {
1588+
name: 'text2vec-model2vec',
1589+
config: undefined,
1590+
},
1591+
});
1592+
});
1593+
1594+
it('should create the correct Text2VecModel2VecConfig type with all values', () => {
1595+
const config = configure.vectors.text2VecModel2Vec({
1596+
name: 'test',
1597+
inferenceURL: 'url',
1598+
vectorizeCollectionName: true,
1599+
});
1600+
expect(config).toEqual<VectorConfigCreate<never, 'test', 'hnsw', 'text2vec-model2vec'>>({
1601+
name: 'test',
1602+
vectorIndex: {
1603+
name: 'hnsw',
1604+
config: undefined,
1605+
},
1606+
vectorizer: {
1607+
name: 'text2vec-model2vec',
1608+
config: {
1609+
inferenceURL: 'url',
1610+
vectorizeCollectionName: true,
1611+
},
1612+
},
1613+
});
1614+
});
15761615
});
15771616

15781617
describe('Unit testing of the multiVectors factory class', () => {

src/collections/configure/vectorizer.ts

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,7 @@ const legacyVectors = {
514514
*
515515
* @param {ConfigureTextVectorizerOptions<T, N, I, 'text2vec-contextionary'>} [opts] The configuration for the `text2vec-contextionary` vectorizer.
516516
* @returns {VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-contextionary'>} The configuration object.
517+
* @deprecated The contextionary model is old and not recommended for use. If you are looking for a local, lightweight model try the new text2vec-model2vec module instead.
517518
*/
518519
text2VecContextionary: <T, N extends string | undefined = undefined, I extends VectorIndexType = 'hnsw'>(
519520
opts?: ConfigureTextVectorizerOptions<T, N, I, 'text2vec-contextionary'>
@@ -556,7 +557,7 @@ const legacyVectors = {
556557
*
557558
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/gpt4all/embeddings) for detailed usage.
558559
*
559-
* @param {ConfigureTextVectorizerOptions<T, N, I, 'text2vec-gpt4all'>} [opts] The configuration for the `text2vec-contextionary` vectorizer.
560+
* @param {ConfigureTextVectorizerOptions<T, N, I, 'text2vec-gpt4all'>} [opts] The configuration for the `text2vec-gpt4all` vectorizer.
560561
* @returns {VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-gpt4all'>} The configuration object.
561562
*/
562563
text2VecGPT4All: <T, N extends string | undefined = undefined, I extends VectorIndexType = 'hnsw'>(
@@ -578,7 +579,7 @@ const legacyVectors = {
578579
*
579580
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/huggingface/embeddings) for detailed usage.
580581
*
581-
* @param {ConfigureTextVectorizerOptions<T, N, I, 'text2vec-huggingface'>} [opts] The configuration for the `text2vec-contextionary` vectorizer.
582+
* @param {ConfigureTextVectorizerOptions<T, N, I, 'text2vec-huggingface'>} [opts] The configuration for the `text2vec-huggingface` vectorizer.
582583
* @returns {VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-huggingface'>} The configuration object.
583584
*/
584585
text2VecHuggingFace: <T, N extends string | undefined = undefined, I extends VectorIndexType = 'hnsw'>(
@@ -810,6 +811,27 @@ const legacyVectors = {
810811
},
811812
});
812813
},
814+
815+
/**
816+
* Create a `VectorConfigCreate` object with the vectorizer set to `'text2vec-model2vec'`.
817+
*
818+
* @param {ConfigureTextVectorizerOptions<T, N, I, 'text2vec-model2vec'>} [opts] The configuration for the `text2vec-model2vec` vectorizer.
819+
* @returns {VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-model2vec'>} The configuration object.
820+
*/
821+
text2VecModel2Vec: <T, N extends string | undefined = undefined, I extends VectorIndexType = 'hnsw'>(
822+
opts?: ConfigureTextVectorizerOptions<T, N, I, 'text2vec-model2vec'>
823+
): VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-model2vec'> => {
824+
const { name, sourceProperties, quantizer, vectorIndexConfig, ...config } = opts || {};
825+
return makeVectorizer(name, {
826+
sourceProperties,
827+
vectorIndexConfig,
828+
quantizer,
829+
vectorizerConfig: {
830+
name: 'text2vec-model2vec',
831+
config: Object.keys(config).length === 0 ? undefined : config,
832+
},
833+
});
834+
},
813835
};
814836

815837
/** __vectors_shaded modifies some parameters in legacy vectorizer configuration.

0 commit comments

Comments
 (0)