Skip to content

Commit d81a3f7

Browse files
Merge branch '208-hnsw-index' into 'dev'
HNSW index: new annotation, query condition and find methods See merge request objectbox/objectbox-java!135
2 parents 6e76251 + b3fb592 commit d81a3f7

30 files changed

+824
-26
lines changed
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*
2+
* Copyright 2024 ObjectBox Ltd. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package io.objectbox.annotation;
18+
19+
/**
20+
* Flags as a part of the {@link HnswIndex} configuration.
21+
*/
22+
public @interface HnswFlags {
23+
24+
/**
25+
* Enables debug logs.
26+
*/
27+
boolean debugLogs() default false;
28+
29+
/**
30+
* Enables "high volume" debug logs, e.g. individual gets/puts.
31+
*/
32+
boolean debugLogsDetailed() default false;
33+
34+
/**
35+
* Padding for SIMD is enabled by default, which uses more memory but may be faster. This flag turns it off.
36+
*/
37+
boolean vectorCacheSimdPaddingOff() default false;
38+
39+
/**
40+
* If the speed of removing nodes becomes a concern in your use case, you can speed it up by setting this flag. By
41+
* default, repairing the graph after node removals creates more connections to improve the graph's quality. The
42+
* extra costs for this are relatively low (e.g. vs. regular indexing), and thus the default is recommended.
43+
*/
44+
boolean reparationLimitCandidates() default false;
45+
46+
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
* Copyright 2024 ObjectBox Ltd. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package io.objectbox.annotation;
18+
19+
import java.lang.annotation.ElementType;
20+
import java.lang.annotation.Retention;
21+
import java.lang.annotation.RetentionPolicy;
22+
import java.lang.annotation.Target;
23+
24+
/**
25+
* Parameters to configure HNSW-based approximate nearest neighbor (ANN) search. Some of the parameters can influence
26+
* index construction and searching. Changing these values causes re-indexing, which can take a while due to the complex
27+
* nature of HNSW.
28+
*/
29+
@Retention(RetentionPolicy.CLASS)
30+
@Target(ElementType.FIELD)
31+
public @interface HnswIndex {
32+
33+
/**
34+
* Dimensions of vectors; vector data with fewer dimensions are ignored. Vectors with more dimensions than specified
35+
* here are only evaluated up to the given dimension value. Changing this value causes re-indexing.
36+
*/
37+
long dimensions();
38+
39+
/**
40+
* Aka "M": the max number of connections per node (default: 30). Higher numbers increase the graph connectivity,
41+
* which can lead to more accurate search results. However, higher numbers also increase the indexing time and
42+
* resource usage. Try e.g. 16 for faster but less accurate results, or 64 for more accurate results. Changing this
43+
* value causes re-indexing.
44+
*/
45+
long neighborsPerNode() default 0;
46+
47+
/**
48+
* Aka "efConstruction": the number of neighbor searched for while indexing (default: 100). The higher the value,
49+
* the more accurate the search, but the longer the indexing. If indexing time is not a major concern, a value of at
50+
* least 200 is recommended to improve search quality. Changing this value causes re-indexing.
51+
*/
52+
long indexingSearchCount() default 0;
53+
54+
/**
55+
* See {@link HnswFlags}.
56+
*/
57+
HnswFlags flags() default @HnswFlags;
58+
59+
/**
60+
* The distance type used for the HNSW index. Changing this value causes re-indexing.
61+
*/
62+
VectorDistanceType distanceType() default VectorDistanceType.DEFAULT;
63+
64+
/**
65+
* When repairing the graph after a node was removed, this gives the probability of adding backlinks to the repaired
66+
* neighbors. The default is 1.0 (aka "always") as this should be worth a bit of extra costs as it improves the
67+
* graph's quality.
68+
*/
69+
float reparationBacklinkProbability() default 1.0F;
70+
71+
/**
72+
* A non-binding hint at the maximum size of the vector cache in KB (default: 2097152 or 2 GB/GiB). The actual size
73+
* max cache size may be altered according to device and/or runtime settings. The vector cache is used to store
74+
* vectors in memory to speed up search and indexing.
75+
* <p>
76+
* Note 1: cache chunks are allocated only on demand, when they are actually used. Thus, smaller datasets will use
77+
* less memory.
78+
* <p>
79+
* Note 2: the cache is for one specific HNSW index; e.g. each index has its own cache.
80+
* <p>
81+
* Note 3: the memory consumption can temporarily exceed the cache size, e.g. for large changes, it can double due
82+
* to multi-version transactions.
83+
*/
84+
long vectorCacheHintSizeKB() default 0;
85+
86+
}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
* Copyright 2024 ObjectBox Ltd. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package io.objectbox.annotation;
18+
19+
/**
20+
* The vector distance algorithm used by an {@link HnswIndex} (vector search).
21+
*/
22+
public enum VectorDistanceType {
23+
24+
/**
25+
* The default; currently {@link #EUCLIDEAN}.
26+
*/
27+
DEFAULT,
28+
29+
/**
30+
* Typically "Euclidean squared" internally.
31+
*/
32+
EUCLIDEAN,
33+
34+
/**
35+
* Cosine similarity compares two vectors irrespective of their magnitude (compares the angle of two vectors).
36+
* <p>
37+
* Often used for document or semantic similarity.
38+
* <p>
39+
* Value range: 0.0 - 2.0 (0.0: same direction, 1.0: orthogonal, 2.0: opposite direction)
40+
*/
41+
COSINE,
42+
43+
/**
44+
* For normalized vectors (vector length == 1.0), the dot product is equivalent to the cosine similarity.
45+
* <p>
46+
* Because of this, the dot product is often preferred as it performs better.
47+
* <p>
48+
* Value range (normalized vectors): 0.0 - 2.0 (0.0: same direction, 1.0: orthogonal, 2.0: opposite direction)
49+
*/
50+
DOT_PRODUCT,
51+
52+
/**
53+
* A custom dot product similarity measure that does not require the vectors to be normalized.
54+
* <p>
55+
* Note: this is no replacement for cosine similarity (like DotProduct for normalized vectors is). The non-linear
56+
* conversion provides a high precision over the entire float range (for the raw dot product). The higher the dot
57+
* product, the lower the distance is (the nearer the vectors are). The more negative the dot product, the higher
58+
* the distance is (the farther the vectors are).
59+
* <p>
60+
* Value range: 0.0 - 2.0 (nonlinear; 0.0: nearest, 1.0: orthogonal, 2.0: farthest)
61+
*/
62+
DOT_PRODUCT_NON_NORMALIZED
63+
}

objectbox-java/src/main/java/io/objectbox/ModelBuilder.java

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2017 ObjectBox Ltd. All rights reserved.
2+
* Copyright 2017-2024 ObjectBox Ltd. All rights reserved.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -21,8 +21,12 @@
2121

2222
import javax.annotation.Nullable;
2323

24+
import io.objectbox.annotation.HnswIndex;
2425
import io.objectbox.annotation.apihint.Internal;
2526
import io.objectbox.flatbuffers.FlatBufferBuilder;
27+
import io.objectbox.model.HnswDistanceType;
28+
import io.objectbox.model.HnswFlags;
29+
import io.objectbox.model.HnswParams;
2630
import io.objectbox.model.IdUid;
2731
import io.objectbox.model.Model;
2832
import io.objectbox.model.ModelEntity;
@@ -63,6 +67,7 @@ public class PropertyBuilder {
6367
private int indexId;
6468
private long indexUid;
6569
private int indexMaxValueLength;
70+
private int hnswParamsOffset;
6671

6772
PropertyBuilder(String name, @Nullable String targetEntityName, @Nullable String virtualTarget, int type) {
6873
this.type = type;
@@ -91,6 +96,50 @@ public PropertyBuilder indexMaxValueLength(int indexMaxValueLength) {
9196
return this;
9297
}
9398

99+
/**
100+
* Set parameters for {@link HnswIndex}.
101+
*
102+
* @param dimensions see {@link HnswIndex#dimensions()}.
103+
* @param neighborsPerNode see {@link HnswIndex#neighborsPerNode()}.
104+
* @param indexingSearchCount see {@link HnswIndex#indexingSearchCount()}.
105+
* @param flags see {@link HnswIndex#flags()}, mapped to {@link HnswFlags}.
106+
* @param distanceType see {@link HnswIndex#distanceType()}, mapped to {@link HnswDistanceType}.
107+
* @param reparationBacklinkProbability see {@link HnswIndex#reparationBacklinkProbability()}.
108+
* @param vectorCacheHintSizeKb see {@link HnswIndex#vectorCacheHintSizeKB()}.
109+
* @return this builder.
110+
*/
111+
public PropertyBuilder hnswParams(long dimensions,
112+
@Nullable Long neighborsPerNode,
113+
@Nullable Long indexingSearchCount,
114+
@Nullable Integer flags,
115+
@Nullable Short distanceType,
116+
@Nullable Float reparationBacklinkProbability,
117+
@Nullable Long vectorCacheHintSizeKb) {
118+
checkNotFinished();
119+
HnswParams.startHnswParams(fbb);
120+
HnswParams.addDimensions(fbb, dimensions);
121+
if (neighborsPerNode != null) {
122+
HnswParams.addNeighborsPerNode(fbb, neighborsPerNode);
123+
}
124+
if (indexingSearchCount != null) {
125+
HnswParams.addIndexingSearchCount(fbb, indexingSearchCount);
126+
}
127+
if (flags != null) {
128+
HnswParams.addFlags(fbb, flags);
129+
}
130+
if (distanceType != null) {
131+
HnswParams.addDistanceType(fbb, distanceType);
132+
}
133+
if (reparationBacklinkProbability != null) {
134+
HnswParams.addReparationBacklinkProbability(fbb, reparationBacklinkProbability);
135+
}
136+
if (vectorCacheHintSizeKb != null) {
137+
HnswParams.addVectorCacheHintSizeKb(fbb, vectorCacheHintSizeKb);
138+
}
139+
hnswParamsOffset = HnswParams.endHnswParams(fbb);
140+
return this;
141+
}
142+
94143
public PropertyBuilder flags(int flags) {
95144
checkNotFinished();
96145
this.flags = flags;
@@ -134,6 +183,9 @@ public int finish() {
134183
if (indexMaxValueLength > 0) {
135184
ModelProperty.addMaxIndexValueLength(fbb, indexMaxValueLength);
136185
}
186+
if (hnswParamsOffset != 0) {
187+
ModelProperty.addHnswParams(fbb, hnswParamsOffset);
188+
}
137189
ModelProperty.addType(fbb, type);
138190
if (flags != 0) {
139191
ModelProperty.addFlags(fbb, flags);

objectbox-java/src/main/java/io/objectbox/Property.java

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2017-2019 ObjectBox Ltd. All rights reserved.
2+
* Copyright 2017-2024 ObjectBox Ltd. All rights reserved.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@
2222

2323
import javax.annotation.Nullable;
2424

25+
import io.objectbox.annotation.HnswIndex;
2526
import io.objectbox.annotation.apihint.Internal;
2627
import io.objectbox.converter.PropertyConverter;
2728
import io.objectbox.exception.DbException;
@@ -33,16 +34,18 @@
3334
import io.objectbox.query.PropertyQueryConditionImpl.LongArrayCondition;
3435
import io.objectbox.query.PropertyQueryConditionImpl.LongCondition;
3536
import io.objectbox.query.PropertyQueryConditionImpl.LongLongCondition;
37+
import io.objectbox.query.PropertyQueryConditionImpl.NearestNeighborCondition;
3638
import io.objectbox.query.PropertyQueryConditionImpl.NullCondition;
3739
import io.objectbox.query.PropertyQueryConditionImpl.StringArrayCondition;
3840
import io.objectbox.query.PropertyQueryConditionImpl.StringCondition;
3941
import io.objectbox.query.PropertyQueryConditionImpl.StringCondition.Operation;
4042
import io.objectbox.query.PropertyQueryConditionImpl.StringStringCondition;
43+
import io.objectbox.query.Query;
4144
import io.objectbox.query.QueryBuilder.StringOrder;
4245

4346
/**
4447
* Meta data describing a Property of an ObjectBox Entity.
45-
* Properties are typically used when defining {@link io.objectbox.query.Query Query} conditions
48+
* Properties are typically used when defining {@link Query Query} conditions
4649
* using {@link io.objectbox.query.QueryBuilder QueryBuilder}.
4750
* Access properties using the generated underscore class of an entity (e.g. {@code Example_.id}).
4851
*/
@@ -302,6 +305,25 @@ public PropertyQueryCondition<ENTITY> between(double lowerBoundary, double upper
302305
lowerBoundary, upperBoundary);
303306
}
304307

308+
/**
309+
* Performs an approximate nearest neighbor (ANN) search to find objects near to the given {@code queryVector}.
310+
* <p>
311+
* This requires the vector property to have an {@link HnswIndex}.
312+
* <p>
313+
* The dimensions of the query vector should be at least the dimensions of this vector property.
314+
* <p>
315+
* Use {@code maxResultCount} to set the maximum number of objects to return by the ANN condition. Hint: it can also
316+
* be used as the "ef" HNSW parameter to increase the search quality in combination with a query limit. For example,
317+
* use maxResultCount of 100 with a Query limit of 10 to have 10 results that are of potentially better quality than
318+
* just passing in 10 for maxResultCount (quality/performance tradeoff).
319+
* <p>
320+
* To change the given parameters after building the query, use {@link Query#setParameter(Property, float[])} and
321+
* {@link Query#setParameter(Property, long)} or their alias equivalent.
322+
*/
323+
public PropertyQueryCondition<ENTITY> nearestNeighbors(float[] queryVector, int maxResultCount) {
324+
return new NearestNeighborCondition<>(this, queryVector, maxResultCount);
325+
}
326+
305327
/** Creates an "equal ('=')" condition for this property. */
306328
public PropertyQueryCondition<ENTITY> equal(Date value) {
307329
return new LongCondition<>(this, LongCondition.Operation.EQUAL, value);

objectbox-java/src/main/java/io/objectbox/config/DebugFlags.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023 ObjectBox Ltd. All rights reserved.
2+
* Copyright 2024 ObjectBox Ltd. All rights reserved.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.

0 commit comments

Comments
 (0)