Skip to content

Commit a9d24f5

Browse files
HNSW index: add annotation
1 parent 6e76251 commit a9d24f5

File tree

3 files changed

+165
-0
lines changed

3 files changed

+165
-0
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
* Copyright 2024 ObjectBox Ltd. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package io.objectbox.annotation;
18+
19+
/**
20+
* The distance algorithm used by an {@link HnswIndex} (vector search).
21+
*/
22+
public enum HnswDistanceType {
23+
24+
/**
25+
* The default; currently {@link #EUCLIDEAN}.
26+
*/
27+
DEFAULT,
28+
29+
/**
30+
* Typically "Euclidean squared" internally.
31+
*/
32+
EUCLIDEAN
33+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*
2+
* Copyright 2024 ObjectBox Ltd. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package io.objectbox.annotation;
18+
19+
/**
20+
* Flags as a part of the {@link HnswIndex} configuration.
21+
*/
22+
public @interface HnswFlags {
23+
24+
/**
25+
* Enables debug logs.
26+
*/
27+
boolean debugLogs() default false;
28+
29+
/**
30+
* Enables "high volume" debug logs, e.g. individual gets/puts.
31+
*/
32+
boolean debugLogsDetailed() default false;
33+
34+
/**
35+
* Padding for SIMD is enabled by default, which uses more memory but may be faster. This flag turns it off.
36+
*/
37+
boolean vectorCacheSimdPaddingOff() default false;
38+
39+
/**
40+
* If the speed of removing nodes becomes a concern in your use case, you can speed it up by setting this flag. By
41+
* default, repairing the graph after node removals creates more connections to improve the graph's quality. The
42+
* extra costs for this are relatively low (e.g. vs. regular indexing), and thus the default is recommended.
43+
*/
44+
boolean reparationLimitCandidates() default false;
45+
46+
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
* Copyright 2024 ObjectBox Ltd. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package io.objectbox.annotation;
18+
19+
import java.lang.annotation.ElementType;
20+
import java.lang.annotation.Retention;
21+
import java.lang.annotation.RetentionPolicy;
22+
import java.lang.annotation.Target;
23+
24+
/**
25+
* Parameters to configure HNSW-based approximate nearest neighbor (ANN) search. Some of the parameters can influence
26+
* index construction and searching. Changing these values causes re-indexing, which can take a while due to the complex
27+
* nature of HNSW.
28+
*/
29+
@Retention(RetentionPolicy.CLASS)
30+
@Target(ElementType.FIELD)
31+
public @interface HnswIndex {
32+
33+
/**
34+
* Dimensions of vectors; vector data with fewer dimensions are ignored. Vectors with more dimensions than specified
35+
* here are only evaluated up to the given dimension value. Changing this value causes re-indexing.
36+
*/
37+
long dimensions();
38+
39+
/**
40+
* Aka "M": the max number of connections per node (default: 30). Higher numbers increase the graph connectivity,
41+
* which can lead to more accurate search results. However, higher numbers also increase the indexing time and
42+
* resource usage. Try e.g. 16 for faster but less accurate results, or 64 for more accurate results. Changing this
43+
* value causes re-indexing.
44+
*/
45+
long neighborsPerNode() default 0;
46+
47+
/**
48+
* Aka "efConstruction": the number of neighbor searched for while indexing (default: 100). The higher the value,
49+
* the more accurate the search, but the longer the indexing. If indexing time is not a major concern, a value of at
50+
* least 200 is recommended to improve search quality. Changing this value causes re-indexing.
51+
*/
52+
long indexingSearchCount() default 0;
53+
54+
/**
55+
* See {@link HnswFlags}.
56+
*/
57+
HnswFlags flags() default @HnswFlags;
58+
59+
/**
60+
* The distance type used for the HNSW index. Changing this value causes re-indexing.
61+
*/
62+
HnswDistanceType distanceType() default HnswDistanceType.DEFAULT;
63+
64+
/**
65+
* When repairing the graph after a node was removed, this gives the probability of adding backlinks to the repaired
66+
* neighbors. The default is 1.0 (aka "always") as this should be worth a bit of extra costs as it improves the
67+
* graph's quality.
68+
*/
69+
float reparationBacklinkProbability() default 1.0F;
70+
71+
/**
72+
* A non-binding hint at the maximum size of the vector cache in KB (default: 2097152 or 2 GB/GiB). The actual size
73+
* max cache size may be altered according to device and/or runtime settings. The vector cache is used to store
74+
* vectors in memory to speed up search and indexing.
75+
* <p>
76+
* Note 1: cache chunks are allocated only on demand, when they are actually used. Thus, smaller datasets will use
77+
* less memory.
78+
* <p>
79+
* Note 2: the cache is for one specific HNSW index; e.g. each index has its own cache.
80+
* <p>
81+
* Note 3: the memory consumption can temporarily exceed the cache size, e.g. for large changes, it can double due
82+
* to multi-version transactions.
83+
*/
84+
long vectorCacheHintSizeKB() default 0;
85+
86+
}

0 commit comments

Comments
 (0)