Skip to content

Commit 9364612

Browse files
committed
Add support for configuring muvera encoding with multivectors
1 parent c110e4c commit 9364612

File tree

10 files changed

+151
-44
lines changed

10 files changed

+151
-44
lines changed

src/collections/config/integration.test.ts

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -389,11 +389,7 @@ describe('Testing of the collection.config namespace', () => {
389389
]);
390390
});
391391

392-
requireAtLeast(
393-
1,
394-
31,
395-
0
396-
)('Mutable named vectors', () => {
392+
requireAtLeast(1, 31, 0)(describe)('Mutable named vectors', () => {
397393
it('should be able to add named vectors to a collection', async () => {
398394
const collectionName = 'TestCollectionConfigAddVector' as const;
399395
const collection = await client.collections.create({
@@ -715,4 +711,51 @@ describe('Testing of the collection.config namespace', () => {
715711
},
716712
});
717713
});
714+
715+
requireAtLeast(1, 31, 0)(it)(
716+
'should be able to create and get a multi-vector collection with encoding',
717+
async () => {
718+
const collectionName = 'TestCollectionConfigCreateWithMuveraEncoding';
719+
const collection = await client.collections.create({
720+
name: collectionName,
721+
vectorizers: weaviate.configure.vectorizer.none({
722+
vectorIndexConfig: weaviate.configure.vectorIndex.hnsw({
723+
multiVector: weaviate.configure.vectorIndex.multiVector.multiVector({
724+
aggregation: 'maxSim',
725+
encoding: weaviate.configure.vectorIndex.multiVector.encoding.muvera(),
726+
}),
727+
}),
728+
}),
729+
});
730+
const config = await collection.config.get();
731+
expect(config.name).toEqual(collectionName);
732+
733+
const indexConfig = config.vectorizers.default.indexConfig as VectorIndexConfigHNSW;
734+
expect(indexConfig.multiVector).toBeDefined();
735+
expect(indexConfig.multiVector?.aggregation).toEqual('maxSim');
736+
expect(indexConfig.multiVector?.encoding).toBeDefined();
737+
}
738+
);
739+
740+
requireAtLeast(1, 31, 0)(it)(
741+
'should be able to create and get a multi-vector collection without encoding',
742+
async () => {
743+
const collectionName = 'TestCollectionConfigCreateWithoutMuveraEncoding';
744+
const collection = await client.collections.create({
745+
name: collectionName,
746+
vectorizers: weaviate.configure.vectorizer.none({
747+
vectorIndexConfig: weaviate.configure.vectorIndex.hnsw({
748+
multiVector: weaviate.configure.vectorIndex.multiVector.multiVector(),
749+
}),
750+
}),
751+
});
752+
const config = await collection.config.get();
753+
expect(config.name).toEqual(collectionName);
754+
755+
const indexConfig = config.vectorizers.default.indexConfig as VectorIndexConfigHNSW;
756+
expect(indexConfig.multiVector).toBeDefined();
757+
expect(indexConfig.multiVector?.aggregation).toEqual('maxSim');
758+
expect(indexConfig.multiVector?.encoding).toBeUndefined();
759+
}
760+
);
718761
});

src/collections/config/types/vectorIndex.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,18 @@ export type PQConfig = {
6464

6565
export type MultiVectorConfig = {
6666
aggregation: 'maxSim' | string;
67+
encoding?: MultiVectorEncodingConfig;
6768
};
6869

70+
export type MuveraEncodingConfig = {
71+
ksim?: number;
72+
dprojections?: number;
73+
repetitions?: number;
74+
type: 'muvera';
75+
};
76+
77+
export type MultiVectorEncodingConfig = MuveraEncodingConfig | Record<string, any>;
78+
6979
export type PQEncoderConfig = {
7080
type: PQEncoderType;
7181
distribution: PQEncoderDistribution;

src/collections/config/utils.ts

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import {
1818
WeaviateVectorsConfig,
1919
} from '../../openapi/types.js';
2020
import { DbVersionSupport } from '../../utils/dbVersion.js';
21-
import { QuantizerGuards, VectorIndexGuards } from '../configure/parsing.js';
21+
import { MultiVectorEncodingGuards, QuantizerGuards, VectorIndexGuards } from '../configure/parsing.js';
2222
import {
2323
PropertyConfigCreate,
2424
ReferenceConfigCreate,
@@ -39,6 +39,7 @@ import {
3939
ModuleConfig,
4040
MultiTenancyConfig,
4141
MultiVectorConfig,
42+
MultiVectorEncodingConfig,
4243
PQConfig,
4344
PQEncoderConfig,
4445
PQEncoderDistribution,
@@ -147,18 +148,34 @@ export const parseVectorIndex = (module: ModuleConfig<VectorIndexType, VectorInd
147148
};
148149
}
149150

150-
let multiVector;
151+
let multivector: any;
151152
if (VectorIndexGuards.isHNSW(module.config) && module.config.multiVector !== undefined) {
152-
multiVector = {
153-
...module.config.multiVector,
153+
multivector = {
154+
aggregation: module.config.multiVector.aggregation,
154155
enabled: true,
155156
};
157+
if (
158+
module.config.multiVector.encoding !== undefined &&
159+
MultiVectorEncodingGuards.isMuvera(module.config.multiVector.encoding)
160+
) {
161+
multivector.muvera = {
162+
enabled: true,
163+
ksim: module.config.multiVector.encoding.ksim,
164+
dprojections: module.config.multiVector.encoding.dprojections,
165+
repetitions: module.config.multiVector.encoding.repetitions,
166+
};
167+
}
156168
}
157169

158-
const { quantizer, ...conf } = module.config as
170+
const { quantizer, ...rest } = module.config as
159171
| VectorIndexConfigFlatCreate
160172
| VectorIndexConfigHNSWCreate
161173
| Record<string, any>;
174+
175+
const conf = {
176+
...rest,
177+
multivector,
178+
};
162179
if (quantizer === undefined) return conf;
163180
if (QuantizerGuards.isBQCreate(quantizer)) {
164181
const { type, ...quant } = quantizer;
@@ -476,6 +493,7 @@ class ConfigMapping {
476493
} else {
477494
quantizer = undefined;
478495
}
496+
479497
return {
480498
cleanupIntervalSeconds: v.cleanupIntervalSeconds,
481499
distance: v.distance,
@@ -502,8 +520,25 @@ class ConfigMapping {
502520
if (v.enabled === false) return undefined;
503521
if (!exists<string>(v.aggregation))
504522
throw new WeaviateDeserializationError('Multi vector aggregation was not returned by Weaviate');
523+
let encoding: MultiVectorEncodingConfig | undefined;
524+
if (
525+
exists<{
526+
ksim: number;
527+
dprojections: number;
528+
repetitions: number;
529+
enabled: boolean;
530+
}>(v.muvera)
531+
) {
532+
encoding = v.muvera.enabled
533+
? {
534+
type: 'muvera',
535+
...v.muvera,
536+
}
537+
: undefined;
538+
}
505539
return {
506540
aggregation: v.aggregation,
541+
encoding,
507542
};
508543
}
509544
static bq(v?: Record<string, unknown>): BQConfig | undefined {

src/collections/configure/parsing.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { MuveraEncodingConfigCreate } from '../index.js';
12
import {
23
BQConfigCreate,
34
BQConfigUpdate,
@@ -57,6 +58,12 @@ export class VectorIndexGuards {
5758
}
5859
}
5960

61+
export class MultiVectorEncodingGuards {
62+
static isMuvera(config?: Record<string, any>): config is MuveraEncodingConfigCreate {
63+
return (config as { type: string })?.type === 'muvera';
64+
}
65+
}
66+
6067
export function parseWithDefault<D>(value: D | undefined, defaultValue: D): D {
6168
return value !== undefined ? value : defaultValue;
6269
}

src/collections/configure/types/vectorIndex.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import {
22
BQConfig,
33
ModuleConfig,
44
MultiVectorConfig,
5+
MuveraEncodingConfig,
56
PQConfig,
67
PQEncoderDistribution,
78
PQEncoderType,
@@ -49,6 +50,8 @@ export type SQConfigUpdate = {
4950

5051
export type MultiVectorConfigCreate = RecursivePartial<MultiVectorConfig>;
5152

53+
export type MuveraEncodingConfigCreate = RecursivePartial<MuveraEncodingConfig>;
54+
5255
export type VectorIndexConfigHNSWCreate = RecursivePartial<VectorIndexConfigHNSW>;
5356

5457
export type VectorIndexConfigDynamicCreate = RecursivePartial<VectorIndexConfigDynamic>;

src/collections/configure/vectorIndex.ts

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import {
88
BQConfigCreate,
99
BQConfigUpdate,
1010
MultiVectorConfigCreate,
11+
MuveraEncodingConfigCreate,
1112
PQConfigCreate,
1213
PQConfigUpdate,
1314
SQConfigCreate,
@@ -101,15 +102,45 @@ const configure = {
101102
* Define the configuration for a multi-vector index.
102103
*/
103104
multiVector: {
105+
/**
106+
* Specify the encoding configuration for a multi-vector index.
107+
*/
108+
encoding: {
109+
/**
110+
* Create an object of type `MuveraEncodingConfigCreate` to be used when defining the encoding configuration of a multi-vector index using MUVERA.
111+
*
112+
* @param {number} [options.ksim] The number of nearest neighbors to consider for similarity. Default is undefined.
113+
* @param {number} [options.dprojections] The number of projections to use. Default is undefined.
114+
* @param {number} [options.repetitions] The number of repetitions to use. Default is undefined.
115+
* @returns {MuveraEncodingConfigCreate} The object of type `MuveraEncodingConfigCreate`.
116+
*/
117+
muvera: (options?: {
118+
ksim?: number;
119+
dprojections?: number;
120+
repetitions?: number;
121+
}): MuveraEncodingConfigCreate => {
122+
return {
123+
ksim: options?.ksim,
124+
dprojections: options?.dprojections,
125+
repetitions: options?.repetitions,
126+
type: 'muvera',
127+
};
128+
},
129+
},
104130
/**
105131
* Create an object of type `MultiVectorConfigCreate` to be used when defining the configuration of a multi-vector index.
106132
*
107-
* @param {object} [options.aggregation] The aggregation method to use. Default is 'maxSim'.
133+
* @param {string} [options.aggregation] The aggregation method to use. Default is 'maxSim'.
134+
* @param {MultiVectorConfig['encoding']} [options.encoding] The encoding configuration for the multi-vector index. Default is undefined.
108135
* @returns {MultiVectorConfigCreate} The object of type `MultiVectorConfigCreate`.
109136
*/
110-
multiVector: (options?: { aggregation?: 'maxSim' | string }): MultiVectorConfigCreate => {
137+
multiVector: (options?: {
138+
aggregation?: 'maxSim' | string;
139+
encoding?: MultiVectorConfigCreate['encoding'];
140+
}): MultiVectorConfigCreate => {
111141
return {
112142
aggregation: options?.aggregation,
143+
encoding: options?.encoding,
113144
};
114145
},
115146
},

src/collections/query/integration.test.ts

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -134,11 +134,7 @@ describe('Testing of the collection.query methods with a simple collection', ()
134134
expect(ret.objects[0].uuid).toEqual(id);
135135
});
136136

137-
requireAtLeast(
138-
1,
139-
31,
140-
0
141-
)('bm25 search operator (minimum_should_match)', () => {
137+
requireAtLeast(1, 31, 0)(describe)('bm25 search operator (minimum_should_match)', () => {
142138
it('should query with bm25 + operator', async () => {
143139
const ret = await collection.query.bm25('carrot', {
144140
limit: 1,

src/roles/integration.test.ts

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -279,11 +279,7 @@ const testCases: TestCase[] = [
279279
},
280280
];
281281

282-
requireAtLeast(
283-
1,
284-
29,
285-
0
286-
)('Integration testing of the roles namespace', () => {
282+
requireAtLeast(1, 29, 0)(describe)('Integration testing of the roles namespace', () => {
287283
let client: WeaviateClient;
288284

289285
beforeAll(async () => {
@@ -317,11 +313,7 @@ requireAtLeast(
317313
expect(exists).toBeFalsy();
318314
});
319315

320-
requireAtLeast(
321-
1,
322-
30,
323-
0
324-
)('namespaced users', () => {
316+
requireAtLeast(1, 30, 0)(describe)('namespaced users', () => {
325317
it('retrieves assigned users with/without namespace', async () => {
326318
await client.roles.create('landlord', {
327319
collection: 'Buildings',

src/users/integration.test.ts

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,7 @@ import { requireAtLeast } from '../../test/version.js';
33
import { WeaviateUserTypeDB } from '../openapi/types.js';
44
import { GetUserOptions, UserDB } from './types.js';
55

6-
requireAtLeast(
7-
1,
8-
29,
9-
0
10-
)('Integration testing of the users namespace', () => {
6+
requireAtLeast(1, 29, 0)(describe)('Integration testing of the users namespace', () => {
117
const makeClient = (key: string) =>
128
weaviate.connectToLocal({
139
port: 8091,
@@ -62,11 +58,7 @@ requireAtLeast(
6258
expect(roles.test).toBeUndefined();
6359
});
6460

65-
requireAtLeast(
66-
1,
67-
30,
68-
0
69-
)('dynamic user management', () => {
61+
requireAtLeast(1, 30, 0)(describe)('dynamic user management', () => {
7062
/** List dynamic DB users. */
7163
const listDBUsers = (c: WeaviateClient, opts?: GetUserOptions) =>
7264
c.users.db.listAll(opts).then((all) => all.filter((u) => u.userType == 'db_user'));
@@ -172,11 +164,7 @@ requireAtLeast(
172164
expect(roles.Permissioner.nodesPermissions).toHaveLength(1);
173165
});
174166

175-
requireAtLeast(
176-
1,
177-
30,
178-
1
179-
)('additional DUM features', () => {
167+
requireAtLeast(1, 30, 1)(describe)('additional DUM features', () => {
180168
it('should be able to fetch additional user info', async () => {
181169
const admin = await makeClient('admin-key');
182170
const timKey = await admin.users.db.create('timely-tim');

test/version.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,7 @@ import { DbVersion } from '../src/utils/dbVersion';
33
const version = DbVersion.fromString(`v${process.env.WEAVIATE_VERSION!}`);
44

55
/** Run the suite / test only for Weaviate version above this. */
6-
export const requireAtLeast = (...semver: [...Parameters<DbVersion['isAtLeast']>]) =>
7-
version.isAtLeast(...semver) ? describe : describe.skip;
6+
export const requireAtLeast =
7+
(...semver: [...Parameters<DbVersion['isAtLeast']>]) =>
8+
(type: jest.Describe | jest.It) =>
9+
version.isAtLeast(...semver) ? type : type.skip;

0 commit comments

Comments
 (0)