Skip to content

Commit 9f5bca0

Browse files
authored
feat: enhance search functionality with ID support (#499)
* feat: enhance search functionality with ID support - Updated SearchReq and SearchSimpleReq interfaces to include optional `ids` field for searching by primary keys. - Modified buildSearchRequest function to handle ID-based searches, including validation for primary key types. - Added tests to validate search functionality using IDs, ensuring correct error handling for type mismatches and missing primary keys. Signed-off-by: ryjiang <jiangruiyi@gmail.com> * update docs Signed-off-by: ryjiang <jiangruiyi@gmail.com> * refactor: Consolidate primary key field and data type lookup in search utility. * test: add tests for searching with string IDs and VarChar primary keys Signed-off-by: ryjiang <jiangruiyi@gmail.com> * test: add test case for formatting search results with dynamic fields Signed-off-by: ryjiang <jiangruiyi@gmail.com> --------- Signed-off-by: ryjiang <jiangruiyi@gmail.com>
1 parent 07cb4f0 commit 9f5bca0

File tree

7 files changed

+583
-67
lines changed

7 files changed

+583
-67
lines changed

docs/content/operations/data-operations-query.mdx

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,24 @@ const results = await client.search({
113113
});
114114
```
115115

116+
### Search by ID
117+
118+
perform a vector similarity search using the vectors associated with exact entity IDs:
119+
120+
```javascript
121+
const results = await client.search({
122+
collection_name: 'my_collection',
123+
ids: [1, 2, 3], // primary keys
124+
limit: 10,
125+
output_fields: ['id', 'text'],
126+
});
127+
```
128+
129+
> [!NOTE]
130+
> - When `ids` are provided, the `data` parameter (dummy vectors) is not required.
131+
> - The provided `ids` must match the collection's Primary Key type (`Int64` or `VarChar`).
132+
> - For collections with multiple vector fields, explicitly specifying `anns_field` is recommended (and required to disambiguate).
133+
116134
### Search Multiple Vectors
117135

118136
```javascript

milvus/types/Search.ts

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,14 @@ export interface SearchReq extends collectionNameReq {
4141
expr?: string; // filter expression
4242
exprValues?: keyValueObj; // template values for filter expression, eg: {key: 'value'}
4343
search_params: SearchParam; // search parameters
44-
vectors: SearchData | SearchData[]; // vectors to search
44+
vectors?: SearchData | SearchData[]; // vectors to search
4545
output_fields?: string[]; // fields to return
4646
travel_timestamp?: string; // time travel
4747
vector_type: DataType.BinaryVector | DataType.FloatVector; // vector field type
4848
nq?: number; // number of query vectors
4949
consistency_level?: ConsistencyLevelEnum; // consistency level
5050
transformers?: OutputTransformers; // provide custom data transformer for specific data type like bf16 or f16 vectors
51+
ids?: number[] | string[]; // primary keys for search by IDs
5152
}
5253

5354
export interface FunctionScore {
@@ -59,7 +60,7 @@ export interface FunctionScore {
5960
export interface SearchSimpleReq extends collectionNameReq {
6061
partition_names?: string[]; // partition names
6162
anns_field?: string; // your vector field name,required if you are searching on multiple vector fields collection
62-
data: SearchData | SearchData[]; // vector or text to search
63+
data?: SearchData | SearchData[]; // vector or text to search
6364
vector?: SearchData | SearchData[];
6465
output_fields?: string[];
6566
limit?: number; // how many results you want
@@ -80,6 +81,7 @@ export interface SearchSimpleReq extends collectionNameReq {
8081
transformers?: OutputTransformers; // provide custom data transformer for specific data type like bf16 or f16 vectors
8182
rerank?: RerankerObj | FunctionObject | FunctionScore; // reranker
8283
nq?: number; // number of query vectors
84+
ids?: number[] | string[]; // primary keys for search by IDs
8385
}
8486

8587
export type HybridSearchSingleReq = Pick<
@@ -173,16 +175,16 @@ export type OutputTransformers = {
173175

174176
export type DetermineResultsType<T extends Record<string, any>> =
175177
T['vectors'] extends [SearchData]
176-
? SearchResultData[]
177-
: T['vectors'] extends SearchData[]
178-
? SearchResultData[][]
179-
: T['vector'] extends SearchData
180-
? SearchResultData[]
181-
: T['data'] extends SearchData
182-
? SearchResultData[]
183-
: T['data'] extends SearchData[]
184-
? SearchResultData[][]
185-
: SearchResultData[];
178+
? SearchResultData[]
179+
: T['vectors'] extends SearchData[]
180+
? SearchResultData[][]
181+
: T['vector'] extends SearchData
182+
? SearchResultData[]
183+
: T['data'] extends SearchData
184+
? SearchResultData[]
185+
: T['data'] extends SearchData[]
186+
? SearchResultData[][]
187+
: SearchResultData[];
186188

187189
export interface SearchResultData {
188190
[x: string]: any;

milvus/utils/Search.ts

Lines changed: 115 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ import {
2929
SparseFloatVector,
3030
PlaceholderType,
3131
SearchEmbList,
32+
DataType,
33+
DataTypeMap,
3234
} from '../';
3335

3436
/**
@@ -135,6 +137,7 @@ type FormatedSearchRequest = {
135137
dsl?: string;
136138
dsl_type?: DslType;
137139
placeholder_group?: Uint8Array;
140+
ids?: { int_id?: { data: number[] }; str_id?: { data: string[] } };
138141
search_params?: KeyValuePair[];
139142
consistency_level: ConsistencyLevelEnum;
140143
expr?: string;
@@ -243,19 +246,25 @@ export const buildSearchRequest = (
243246
// build user search requests
244247
const userRequests = isHybridSearch
245248
? searchHybridReq.data.map(d => ({
246-
...params,
247-
...d,
248-
}))
249+
...params,
250+
...d,
251+
}))
249252
: [
250-
{
251-
...searchSimpleReq,
252-
data:
253-
searchReq.vectors || searchSimpleReq.vector || searchSimpleReq.data, // data or vector or vectors
254-
anns_field:
255-
searchSimpleReq.anns_field ||
256-
Object.keys(collectionInfo.anns_fields || {})[0],
257-
},
258-
];
253+
{
254+
...searchSimpleReq,
255+
data:
256+
searchReq.vectors || searchSimpleReq.vector || searchSimpleReq.data, // data or vector or vectors
257+
anns_field:
258+
searchSimpleReq.anns_field ||
259+
Object.keys(collectionInfo.anns_fields || {})[0],
260+
},
261+
];
262+
263+
// get primary field type for ids
264+
const pkField = collectionInfo.schema.fields.find(f => f.is_primary_key);
265+
const pkDataType = pkField
266+
? pkField.dataType || DataTypeMap[pkField.data_type]
267+
: undefined;
259268

260269
for (const userRequest of userRequests) {
261270
const { data, anns_field } = userRequest;
@@ -265,28 +274,82 @@ export const buildSearchRequest = (
265274
throw new Error(ERROR_REASONS.NO_ANNS_FEILD_FOUND_IN_SEARCH);
266275
}
267276

277+
// get ids from request
278+
const ids =
279+
userRequest.ids || searchReq.ids || searchSimpleReq.ids || undefined;
280+
281+
// if ids is set, we use ids for search
282+
// check if ids is valid
283+
if (ids && ids.length > 0) {
284+
if (!pkField) {
285+
throw new Error(
286+
'Primary field not found. Cannot use ids parameter without primary field.'
287+
);
288+
}
289+
290+
// validation
291+
if (pkDataType === DataType.Int64) {
292+
if (
293+
!(ids as any[]).every(
294+
(id: any) =>
295+
typeof id === 'number' || (typeof id === 'string' && !isNaN(Number(id)))
296+
)
297+
) {
298+
throw new Error(
299+
`The type of ids should be integer/string number because the primary key field ${pkField.name} is Int64.`
300+
);
301+
}
302+
} else if (pkDataType === DataType.VarChar) {
303+
if (!(ids as any[]).every((id: any) => typeof id === 'string')) {
304+
throw new Error(
305+
`The type of ids should be string because the primary key field ${pkField.name} is VarChar.`
306+
);
307+
}
308+
} else {
309+
throw new Error(
310+
`The primary key field ${pkField.name} has unsupported type for ID search.`
311+
);
312+
}
313+
}
314+
268315
// get search data
269-
const searchData = formatSearchData(data, annsField);
316+
// if ids is set, we don't need to format search data
317+
// checks check if data is valid
318+
if ((!ids || ids.length === 0) && !data) {
319+
throw new Error('Search data is required');
320+
}
321+
const searchData =
322+
ids && ids.length > 0 ? [] : formatSearchData(data!, annsField);
270323

271324
const request: FormatedSearchRequest = {
272325
collection_name: params.collection_name,
273326
partition_names: params.partition_names || [],
274327
output_fields: params.output_fields || default_output_fields,
275-
nq: searchReq.nq || searchData.length,
328+
nq: ids && ids.length > 0 ? ids.length : searchReq.nq || searchData.length,
276329
dsl: userRequest.expr || searchReq.expr || searchSimpleReq.filter || '', // expr, inner expr or outer expr
277330
dsl_type: DslType.BoolExprV1,
278-
placeholder_group: buildPlaceholderGroupBytes(
279-
milvusProto,
280-
searchData,
281-
annsField
282-
),
283331
search_params: parseToKeyValue(
284332
searchReq.search_params || buildSearchParams(userRequest, anns_field)
285333
),
286334
consistency_level:
287335
params.consistency_level || (collectionInfo.consistency_level as any),
288336
};
289337

338+
if (ids && ids.length > 0) {
339+
if (pkDataType === DataType.Int64) {
340+
request.ids = { int_id: { data: ids as number[] } };
341+
} else if (pkDataType === DataType.VarChar) {
342+
request.ids = { str_id: { data: ids as string[] } };
343+
}
344+
} else {
345+
// use placeholder_group for vector search
346+
request.placeholder_group = buildPlaceholderGroupBytes(
347+
milvusProto,
348+
searchData,
349+
annsField
350+
);
351+
}
352+
290353
// if exprValues is set, add it to the request(inner)
291354
if (userRequest.exprValues) {
292355
request.expr_template_values = formatExprValues(userRequest.exprValues);
@@ -324,40 +387,40 @@ export const buildSearchRequest = (
324387
isHybridSearch: isHybridSearch,
325388
request: isHybridSearch
326389
? {
327-
collection_name: params.collection_name,
328-
partition_names: params.partition_names,
329-
requests: requests,
330-
output_fields: requests[0]?.output_fields,
331-
consistency_level: requests[0]?.consistency_level,
332-
333-
// if ranker is set and it is a hybrid search, add it to the request
334-
...createFunctionScore(rerank),
335-
336-
// if ranker is not exist, use RRFRanker ranker
337-
...{
338-
rank_params: [
339-
...(isRerankerObj
340-
? parseToKeyValue(convertRerankParams(rerank as RerankerObj))
341-
: !hasRerankFunction && !hasFunctionScore
390+
collection_name: params.collection_name,
391+
partition_names: params.partition_names,
392+
requests: requests,
393+
output_fields: requests[0]?.output_fields,
394+
consistency_level: requests[0]?.consistency_level,
395+
396+
// if ranker is set and it is a hybrid search, add it to the request
397+
...createFunctionScore(rerank),
398+
399+
// if ranker is not exist, use RRFRanker ranker
400+
...{
401+
rank_params: [
402+
...(isRerankerObj
403+
? parseToKeyValue(convertRerankParams(rerank as RerankerObj))
404+
: !hasRerankFunction && !hasFunctionScore
342405
? parseToKeyValue(convertRerankParams(RRFRanker()))
343406
: []),
344-
{ key: 'round_decimal', value: round_decimal },
345-
{
346-
key: 'limit',
347-
value:
348-
searchSimpleReq.limit ?? searchSimpleReq.topk ?? DEFAULT_TOPK,
349-
},
350-
{
351-
key: 'offset',
352-
value: searchSimpleReq.offset ?? 0,
353-
},
354-
],
355-
},
356-
}
357-
: {
358-
...requests[0],
359-
...createFunctionScore(rerank),
407+
{ key: 'round_decimal', value: round_decimal },
408+
{
409+
key: 'limit',
410+
value:
411+
searchSimpleReq.limit ?? searchSimpleReq.topk ?? DEFAULT_TOPK,
412+
},
413+
{
414+
key: 'offset',
415+
value: searchSimpleReq.offset ?? 0,
416+
},
417+
],
360418
},
419+
}
420+
: {
421+
...requests[0],
422+
...createFunctionScore(rerank),
423+
},
361424
// need for parsing the search results
362425
...(round_decimal !== -1 ? { round_decimal } : {}),
363426
nq: requests[0].nq,
@@ -435,8 +498,8 @@ export const formatSearchResult = (
435498
const value = isFixedSchema
436499
? dataArray[absoluteIndex]
437500
: dataArray[absoluteIndex]
438-
? dataArray[absoluteIndex][field_name]
439-
: undefined;
501+
? dataArray[absoluteIndex][field_name]
502+
: undefined;
440503

441504
result[field_name] = value;
442505
});

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "@zilliz/milvus2-sdk-node",
33
"author": "Zilliz",
4-
"milvusVersion": "v2.6.8",
4+
"milvusVersion": "v2.6.9",
55
"version": "2.6.9",
66
"main": "dist/milvus",
77
"files": [

0 commit comments

Comments
 (0)