Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 14 additions & 12 deletions milvus/types/Search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,14 @@ export interface SearchReq extends collectionNameReq {
expr?: string; // filter expression
exprValues?: keyValueObj; // template values for filter expression, eg: {key: 'value'}
search_params: SearchParam; // search parameters
vectors: SearchData | SearchData[]; // vectors to search
vectors?: SearchData | SearchData[]; // vectors to search
output_fields?: string[]; // fields to return
travel_timestamp?: string; // time travel
vector_type: DataType.BinaryVector | DataType.FloatVector; // vector field type
nq?: number; // number of query vectors
consistency_level?: ConsistencyLevelEnum; // consistency level
transformers?: OutputTransformers; // provide custom data transformer for specific data type like bf16 or f16 vectors
ids?: number[] | string[]; // primary keys for search by IDs
}

export interface FunctionScore {
Expand All @@ -59,7 +60,7 @@ export interface FunctionScore {
export interface SearchSimpleReq extends collectionNameReq {
partition_names?: string[]; // partition names
anns_field?: string; // your vector field name,required if you are searching on multiple vector fields collection
data: SearchData | SearchData[]; // vector or text to search
data?: SearchData | SearchData[]; // vector or text to search
vector?: SearchData | SearchData[];
output_fields?: string[];
limit?: number; // how many results you want
Expand All @@ -80,6 +81,7 @@ export interface SearchSimpleReq extends collectionNameReq {
transformers?: OutputTransformers; // provide custom data transformer for specific data type like bf16 or f16 vectors
rerank?: RerankerObj | FunctionObject | FunctionScore; // reranker
nq?: number; // number of query vectors
ids?: number[] | string[]; // primary keys for search by IDs
}

export type HybridSearchSingleReq = Pick<
Expand Down Expand Up @@ -173,16 +175,16 @@ export type OutputTransformers = {

export type DetermineResultsType<T extends Record<string, any>> =
T['vectors'] extends [SearchData]
? SearchResultData[]
: T['vectors'] extends SearchData[]
? SearchResultData[][]
: T['vector'] extends SearchData
? SearchResultData[]
: T['data'] extends SearchData
? SearchResultData[]
: T['data'] extends SearchData[]
? SearchResultData[][]
: SearchResultData[];
? SearchResultData[]
: T['vectors'] extends SearchData[]
? SearchResultData[][]
: T['vector'] extends SearchData
? SearchResultData[]
: T['data'] extends SearchData
? SearchResultData[]
: T['data'] extends SearchData[]
? SearchResultData[][]
: SearchResultData[];

export interface SearchResultData {
[x: string]: any;
Expand Down
174 changes: 122 additions & 52 deletions milvus/utils/Search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ import {
SparseFloatVector,
PlaceholderType,
SearchEmbList,
DataType,
DataTypeMap,
} from '../';

/**
Expand Down Expand Up @@ -135,6 +137,7 @@ type FormatedSearchRequest = {
dsl?: string;
dsl_type?: DslType;
placeholder_group?: Uint8Array;
ids?: { int_id?: { data: number[] }; str_id?: { data: string[] } };
search_params?: KeyValuePair[];
consistency_level: ConsistencyLevelEnum;
expr?: string;
Expand Down Expand Up @@ -243,19 +246,29 @@ export const buildSearchRequest = (
// build user search requests
const userRequests = isHybridSearch
? searchHybridReq.data.map(d => ({
...params,
...d,
}))
...params,
...d,
}))
: [
{
...searchSimpleReq,
data:
searchReq.vectors || searchSimpleReq.vector || searchSimpleReq.data, // data or vector or vectors
anns_field:
searchSimpleReq.anns_field ||
Object.keys(collectionInfo.anns_fields || {})[0],
},
];
{
...searchSimpleReq,
data:
searchReq.vectors || searchSimpleReq.vector || searchSimpleReq.data, // data or vector or vectors
anns_field:
searchSimpleReq.anns_field ||
Object.keys(collectionInfo.anns_fields || {})[0],
},
];

// get primary field type for ids
let pkField: FieldSchema | undefined;
for (let i = 0; i < collectionInfo.schema.fields.length; i++) {
const f = collectionInfo.schema.fields[i];
if (f.is_primary_key) {
pkField = f;
break;
}
}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This for loop to find the primary key field can be made more concise and idiomatic by using the Array.prototype.find() method.

Suggested change
let pkField: FieldSchema | undefined;
for (let i = 0; i < collectionInfo.schema.fields.length; i++) {
const f = collectionInfo.schema.fields[i];
if (f.is_primary_key) {
pkField = f;
break;
}
}
const pkField = collectionInfo.schema.fields.find(f => f.is_primary_key);


for (const userRequest of userRequests) {
const { data, anns_field } = userRequest;
Expand All @@ -265,28 +278,85 @@ export const buildSearchRequest = (
throw new Error(ERROR_REASONS.NO_ANNS_FEILD_FOUND_IN_SEARCH);
}

// get ids from request
const ids =
userRequest.ids || searchReq.ids || searchSimpleReq.ids || undefined;

// if ids is set, we use ids for search
// check if ids is valid
if (ids && ids.length > 0) {
if (!pkField) {
throw new Error(
'Primary field not found. Cannot use ids parameter without primary field.'
);
}

const pkDataType = pkField.dataType || DataTypeMap[pkField.data_type];

// validation
if (pkDataType === DataType.Int64) {
if (
!(ids as any[]).every(
(id: any) =>
typeof id === 'number' || (typeof id === 'string' && !isNaN(Number(id)))
)
) {
throw new Error(
`The type of ids should be integer/string number because the primary key field ${pkField.name} is Int64.`
);
}
} else if (pkDataType === DataType.VarChar) {
if (!(ids as any[]).every((id: any) => typeof id === 'string')) {
throw new Error(
`The type of ids should be string because the primary key field ${pkField.name} is VarChar.`
);
}
} else {
throw new Error(
`The primary key field ${pkField.name} has unsupported type for ID search.`
);
}
}

// get search data
const searchData = formatSearchData(data, annsField);
// if ids is set, we don't need to format search data
// checks check if data is valid
if ((!ids || ids.length === 0) && !data) {
throw new Error('Search data is required');
}
const searchData =
ids && ids.length > 0 ? [] : formatSearchData(data!, annsField);

const request: FormatedSearchRequest = {
collection_name: params.collection_name,
partition_names: params.partition_names || [],
output_fields: params.output_fields || default_output_fields,
nq: searchReq.nq || searchData.length,
nq: ids && ids.length > 0 ? ids.length : searchReq.nq || searchData.length,
dsl: userRequest.expr || searchReq.expr || searchSimpleReq.filter || '', // expr, inner expr or outer expr
dsl_type: DslType.BoolExprV1,
placeholder_group: buildPlaceholderGroupBytes(
milvusProto,
searchData,
annsField
),
search_params: parseToKeyValue(
searchReq.search_params || buildSearchParams(userRequest, anns_field)
),
consistency_level:
params.consistency_level || (collectionInfo.consistency_level as any),
};

if (ids && ids.length > 0) {
const pkDataType = pkField!.dataType || DataTypeMap[pkField!.data_type];

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The primary key data type (pkDataType) is calculated here and also earlier in the validation logic (line 294). This code duplication can be avoided by calculating it once and reusing the variable. Consider refactoring to define pkDataType a single time.

if (pkDataType === DataType.Int64) {
request.ids = { int_id: { data: ids as number[] } };

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

There's a critical type mismatch here. The validation for Int64 primary keys allows ids to be an array of strings representing numbers. However, this line performs a direct cast ids as number[] without converting the string values to numbers. This will cause a runtime error or send incorrect data to Milvus if ids contains strings. You must map the values to numbers before assigning them.

Suggested change
request.ids = { int_id: { data: ids as number[] } };
request.ids = { int_id: { data: (ids as (string | number)[]).map(Number) } };

} else if (pkDataType === DataType.VarChar) {
request.ids = { str_id: { data: ids as string[] } };
}
} else {
// use placeholder_group for vector search
request.placeholder_group = buildPlaceholderGroupBytes(
milvusProto,
searchData,
annsField
);
}

// if exprValues is set, add it to the request(inner)
if (userRequest.exprValues) {
request.expr_template_values = formatExprValues(userRequest.exprValues);
Expand Down Expand Up @@ -324,40 +394,40 @@ export const buildSearchRequest = (
isHybridSearch: isHybridSearch,
request: isHybridSearch
? {
collection_name: params.collection_name,
partition_names: params.partition_names,
requests: requests,
output_fields: requests[0]?.output_fields,
consistency_level: requests[0]?.consistency_level,

// if ranker is set and it is a hybrid search, add it to the request
...createFunctionScore(rerank),

// if ranker is not exist, use RRFRanker ranker
...{
rank_params: [
...(isRerankerObj
? parseToKeyValue(convertRerankParams(rerank as RerankerObj))
: !hasRerankFunction && !hasFunctionScore
collection_name: params.collection_name,
partition_names: params.partition_names,
requests: requests,
output_fields: requests[0]?.output_fields,
consistency_level: requests[0]?.consistency_level,

// if ranker is set and it is a hybrid search, add it to the request
...createFunctionScore(rerank),

// if ranker is not exist, use RRFRanker ranker
...{
rank_params: [
...(isRerankerObj
? parseToKeyValue(convertRerankParams(rerank as RerankerObj))
: !hasRerankFunction && !hasFunctionScore
? parseToKeyValue(convertRerankParams(RRFRanker()))
: []),
{ key: 'round_decimal', value: round_decimal },
{
key: 'limit',
value:
searchSimpleReq.limit ?? searchSimpleReq.topk ?? DEFAULT_TOPK,
},
{
key: 'offset',
value: searchSimpleReq.offset ?? 0,
},
],
},
}
: {
...requests[0],
...createFunctionScore(rerank),
{ key: 'round_decimal', value: round_decimal },
{
key: 'limit',
value:
searchSimpleReq.limit ?? searchSimpleReq.topk ?? DEFAULT_TOPK,
},
{
key: 'offset',
value: searchSimpleReq.offset ?? 0,
},
],
},
}
: {
...requests[0],
...createFunctionScore(rerank),
},
// need for parsing the search results
...(round_decimal !== -1 ? { round_decimal } : {}),
nq: requests[0].nq,
Expand Down Expand Up @@ -435,8 +505,8 @@ export const formatSearchResult = (
const value = isFixedSchema
? dataArray[absoluteIndex]
: dataArray[absoluteIndex]
? dataArray[absoluteIndex][field_name]
: undefined;
? dataArray[absoluteIndex][field_name]
: undefined;

result[field_name] = value;
});
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@zilliz/milvus2-sdk-node",
"author": "Zilliz",
"milvusVersion": "v2.6.8",
"milvusVersion": "v2.6.9",
"version": "2.6.9",
"main": "dist/milvus",
"files": [
Expand Down
37 changes: 37 additions & 0 deletions test/grpc/Basic.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,43 @@ describe(`Basic API without database`, () => {
expect(addWrongFields.error_code).toEqual(ErrorCode.IllegalArgument);
});

it(`search with ids should be successful`, async () => {
// 1. describe collection to get primary key and check if "autoID" is true
const desc = await milvusClient.describeCollection({
collection_name: COLLECTION_NAME,
});
const pk = desc.schema.fields.find(f => f.is_primary_key);
expect(pk).toBeDefined();

// 2. if autoID is true, random query some ids
let searchIds: any[] = [];
if (pk?.autoID) {
const query = await milvusClient.query({
collection_name: COLLECTION_NAME,
expr: `${pk.name} > 0`,
limit: 5,
output_fields: [pk.name],
});
expect(query.status.error_code).toEqual(ErrorCode.SUCCESS);
expect(query.data.length).toBeGreaterThan(0);
searchIds = query.data.map(d => d[pk.name]);
} else {
// if not autoID, we can just use some static ids or generate them
// but provided schema has autoID: true
}

// 3. search with ids
if (searchIds.length > 0) {
const search = await milvusClient.search({
collection_name: COLLECTION_NAME,
ids: searchIds,
});

expect(search.status.error_code).toEqual(ErrorCode.SUCCESS);
expect(search.results.length).toEqual(searchIds.length);
}
});

it(`release again should be successful`, async () => {
const release = await milvusClient.releaseCollection({
collection_name: COLLECTION_NAME,
Expand Down
33 changes: 32 additions & 1 deletion test/grpc/MultipleVectors.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,38 @@ describe(`Multiple vectors API testing`, () => {
anns_field: 'vectorxxx',
data: [1, 2, 3, 4, 5, 6, 7, 8],
});
} catch (e) {
} catch (e: any) {
expect(e.message).toEqual(ERROR_REASONS.NO_ANNS_FEILD_FOUND_IN_SEARCH);
}
});

it(`search with ids should be successful`, async () => {
const query = await milvusClient.query({
collection_name: COLLECTION_NAME,
filter: 'id > 0',
limit: 5,
output_fields: ['id'],
});

expect(query.status.error_code).toEqual(ErrorCode.SUCCESS);
const ids = query.data.map((d: any) => d.id);

const search = await milvusClient.search({
collection_name: COLLECTION_NAME,
ids: ids,
anns_field: 'vector',
});

expect(search.status.error_code).toEqual(ErrorCode.SUCCESS);
expect(search.results.length).toEqual(ids.length);

try {
await milvusClient.search({
collection_name: COLLECTION_NAME,
ids: ids,
anns_field: 'wrong_vector',
});
} catch (e: any) {
expect(e.message).toEqual(ERROR_REASONS.NO_ANNS_FEILD_FOUND_IN_SEARCH);
}
});
Expand Down
Loading