Skip to content

Commit cc3a5ff

Browse files
committed
Add direct IO to diskBBQ
1 parent 0d0b484 commit cc3a5ff

File tree

8 files changed

+48
-21
lines changed

8 files changed

+48
-21
lines changed

qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,11 @@ private static String formatIndexPath(CmdLineArgs args) {
106106
static Codec createCodec(CmdLineArgs args) {
107107
final KnnVectorsFormat format;
108108
if (args.indexType() == IndexType.IVF) {
109-
format = new ES920DiskBBQVectorsFormat(args.ivfClusterSize(), ES920DiskBBQVectorsFormat.DEFAULT_CENTROIDS_PER_PARENT_CLUSTER);
109+
format = new ES920DiskBBQVectorsFormat(
110+
args.ivfClusterSize(),
111+
ES920DiskBBQVectorsFormat.DEFAULT_CENTROIDS_PER_PARENT_CLUSTER,
112+
false
113+
);
110114
} else {
111115
if (args.quantizeBits() == 1) {
112116
if (args.indexType() == IndexType.FLAT) {

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormat.java

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.apache.lucene.index.SegmentWriteState;
2121
import org.elasticsearch.common.util.Maps;
2222
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
23+
import org.elasticsearch.index.codec.vectors.es818.DirectIOLucene99FlatVectorsFormat;
2324

2425
import java.io.IOException;
2526
import java.util.Collections;
@@ -63,7 +64,15 @@ public class ES920DiskBBQVectorsFormat extends KnnVectorsFormat {
6364
private static final FlatVectorsFormat rawVectorFormat = new Lucene99FlatVectorsFormat(
6465
FlatVectorScorerUtil.getLucene99FlatVectorsScorer()
6566
);
66-
private static final Map<String, FlatVectorsFormat> supportedFormats = Map.of(rawVectorFormat.getName(), rawVectorFormat);
67+
private static final FlatVectorsFormat directIORawVectorFormat = new DirectIOLucene99FlatVectorsFormat(
68+
FlatVectorScorerUtil.getLucene99FlatVectorsScorer()
69+
);
70+
private static final Map<String, FlatVectorsFormat> supportedFormats = Map.of(
71+
rawVectorFormat.getName(),
72+
rawVectorFormat,
73+
directIORawVectorFormat.getName(),
74+
directIORawVectorFormat
75+
);
6776

6877
// This dynamically sets the cluster probe based on the `k` requested and the number of clusters.
6978
// useful when searching with 'efSearch' type parameters instead of requiring a specific ratio.
@@ -77,8 +86,9 @@ public class ES920DiskBBQVectorsFormat extends KnnVectorsFormat {
7786

7887
private final int vectorPerCluster;
7988
private final int centroidsPerParentCluster;
89+
private final boolean directRawDiskReads;
8090

81-
public ES920DiskBBQVectorsFormat(int vectorPerCluster, int centroidsPerParentCluster) {
91+
public ES920DiskBBQVectorsFormat(int vectorPerCluster, int centroidsPerParentCluster, boolean directRawDiskReads) {
8292
super(NAME);
8393
if (vectorPerCluster < MIN_VECTORS_PER_CLUSTER || vectorPerCluster > MAX_VECTORS_PER_CLUSTER) {
8494
throw new IllegalArgumentException(
@@ -102,19 +112,21 @@ public ES920DiskBBQVectorsFormat(int vectorPerCluster, int centroidsPerParentClu
102112
}
103113
this.vectorPerCluster = vectorPerCluster;
104114
this.centroidsPerParentCluster = centroidsPerParentCluster;
115+
this.directRawDiskReads = directRawDiskReads;
105116
}
106117

107118
/** Constructs a format using the given graph construction parameters and scalar quantization. */
108119
public ES920DiskBBQVectorsFormat() {
109-
this(DEFAULT_VECTORS_PER_CLUSTER, DEFAULT_CENTROIDS_PER_PARENT_CLUSTER);
120+
this(DEFAULT_VECTORS_PER_CLUSTER, DEFAULT_CENTROIDS_PER_PARENT_CLUSTER, false);
110121
}
111122

112123
@Override
113124
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
125+
FlatVectorsFormat rawFormat = directRawDiskReads ? directIORawVectorFormat : rawVectorFormat;
114126
return new ES920DiskBBQVectorsWriter(
115-
rawVectorFormat.getName(),
127+
rawFormat.getName(),
116128
state,
117-
rawVectorFormat.fieldsWriter(state),
129+
rawFormat.fieldsWriter(state),
118130
vectorPerCluster,
119131
centroidsPerParentCluster
120132
);

server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,10 @@
4141
* Copied from Lucene99FlatVectorsFormat in Lucene 10.1
4242
*
4343
* This is copied to change the implementation of {@link #fieldsReader} only.
44-
* The codec format itself is not changed, so we keep the original {@link #NAME}
4544
*/
4645
public class DirectIOLucene99FlatVectorsFormat extends AbstractFlatVectorsFormat {
4746

48-
static final String NAME = "Lucene99FlatVectorsFormat";
47+
static final String NAME = "DirectIOLucene99FlatVectorsFormat";
4948

5049
public static final int VERSION_START = 0;
5150
public static final int VERSION_CURRENT = VERSION_START;

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1544,8 +1544,10 @@ public DenseVectorIndexOptions parseIndexOptions(String fieldName, Map<String, ?
15441544
);
15451545
}
15461546
}
1547+
Object directRawVectorReadsNode = indexOptionsMap.remove("direct_raw_vector_reads");
1548+
boolean directRawVectorReads = XContentMapValues.nodeBooleanValue(directRawVectorReadsNode, false);
15471549
MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap);
1548-
return new BBQIVFIndexOptions(clusterSize, visitPercentage, rescoreVector);
1550+
return new BBQIVFIndexOptions(clusterSize, visitPercentage, rescoreVector, directRawVectorReads);
15491551
}
15501552

15511553
@Override
@@ -2149,17 +2151,23 @@ public boolean validateDimension(int dim, boolean throwOnError) {
21492151
static class BBQIVFIndexOptions extends QuantizedIndexOptions {
21502152
final int clusterSize;
21512153
final double defaultVisitPercentage;
2154+
final boolean directRawDiskReads;
21522155

2153-
BBQIVFIndexOptions(int clusterSize, double defaultVisitPercentage, RescoreVector rescoreVector) {
2156+
BBQIVFIndexOptions(int clusterSize, double defaultVisitPercentage, RescoreVector rescoreVector, boolean directRawDiskReads) {
21542157
super(VectorIndexType.BBQ_DISK, rescoreVector);
21552158
this.clusterSize = clusterSize;
21562159
this.defaultVisitPercentage = defaultVisitPercentage;
2160+
this.directRawDiskReads = directRawDiskReads;
21572161
}
21582162

21592163
@Override
21602164
KnnVectorsFormat getVectorsFormat(ElementType elementType) {
21612165
assert elementType == ElementType.FLOAT;
2162-
return new ES920DiskBBQVectorsFormat(clusterSize, ES920DiskBBQVectorsFormat.DEFAULT_CENTROIDS_PER_PARENT_CLUSTER);
2166+
return new ES920DiskBBQVectorsFormat(
2167+
clusterSize,
2168+
ES920DiskBBQVectorsFormat.DEFAULT_CENTROIDS_PER_PARENT_CLUSTER,
2169+
directRawDiskReads
2170+
);
21632171
}
21642172

21652173
@Override

server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormatTests.java

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,15 @@ public void setUp() throws Exception {
6262
if (rarely()) {
6363
format = new ES920DiskBBQVectorsFormat(
6464
random().nextInt(2 * MIN_VECTORS_PER_CLUSTER, ES920DiskBBQVectorsFormat.MAX_VECTORS_PER_CLUSTER),
65-
random().nextInt(8, ES920DiskBBQVectorsFormat.MAX_CENTROIDS_PER_PARENT_CLUSTER)
65+
random().nextInt(8, ES920DiskBBQVectorsFormat.MAX_CENTROIDS_PER_PARENT_CLUSTER),
66+
random().nextBoolean()
6667
);
6768
} else {
6869
// run with low numbers to force many clusters with parents
6970
format = new ES920DiskBBQVectorsFormat(
7071
random().nextInt(MIN_VECTORS_PER_CLUSTER, 2 * MIN_VECTORS_PER_CLUSTER),
71-
random().nextInt(MIN_CENTROIDS_PER_PARENT_CLUSTER, 8)
72+
random().nextInt(MIN_CENTROIDS_PER_PARENT_CLUSTER, 8),
73+
random().nextBoolean()
7274
);
7375
}
7476
super.setUp();
@@ -129,7 +131,7 @@ public void testToString() {
129131
FilterCodec customCodec = new FilterCodec("foo", Codec.getDefault()) {
130132
@Override
131133
public KnnVectorsFormat knnVectorsFormat() {
132-
return new ES920DiskBBQVectorsFormat(128, 4);
134+
return new ES920DiskBBQVectorsFormat(128, 4, false);
133135
}
134136
};
135137
String expectedPattern = "ES920DiskBBQVectorsFormat(vectorPerCluster=128)";
@@ -140,10 +142,10 @@ public KnnVectorsFormat knnVectorsFormat() {
140142
}
141143

142144
public void testLimits() {
143-
expectThrows(IllegalArgumentException.class, () -> new ES920DiskBBQVectorsFormat(MIN_VECTORS_PER_CLUSTER - 1, 16));
144-
expectThrows(IllegalArgumentException.class, () -> new ES920DiskBBQVectorsFormat(MAX_VECTORS_PER_CLUSTER + 1, 16));
145-
expectThrows(IllegalArgumentException.class, () -> new ES920DiskBBQVectorsFormat(128, MIN_CENTROIDS_PER_PARENT_CLUSTER - 1));
146-
expectThrows(IllegalArgumentException.class, () -> new ES920DiskBBQVectorsFormat(128, MAX_CENTROIDS_PER_PARENT_CLUSTER + 1));
145+
expectThrows(IllegalArgumentException.class, () -> new ES920DiskBBQVectorsFormat(MIN_VECTORS_PER_CLUSTER - 1, 16, false));
146+
expectThrows(IllegalArgumentException.class, () -> new ES920DiskBBQVectorsFormat(MAX_VECTORS_PER_CLUSTER + 1, 16, false));
147+
expectThrows(IllegalArgumentException.class, () -> new ES920DiskBBQVectorsFormat(128, MIN_CENTROIDS_PER_PARENT_CLUSTER - 1, false));
148+
expectThrows(IllegalArgumentException.class, () -> new ES920DiskBBQVectorsFormat(128, MAX_CENTROIDS_PER_PARENT_CLUSTER + 1, false));
147149
}
148150

149151
public void testSimpleOffHeapSize() throws IOException {

server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,8 @@ public static DenseVectorFieldMapper.DenseVectorIndexOptions randomIndexOptionsA
114114
new DenseVectorFieldMapper.BBQIVFIndexOptions(
115115
randomIntBetween(MIN_VECTORS_PER_CLUSTER, MAX_VECTORS_PER_CLUSTER),
116116
randomFloatBetween(0.0f, 100.0f, true),
117-
randomFrom((DenseVectorFieldMapper.RescoreVector) null, randomRescoreVector())
117+
randomFrom((DenseVectorFieldMapper.RescoreVector) null, randomRescoreVector()),
118+
randomBoolean()
118119
)
119120
);
120121
}

server/src/test/java/org/elasticsearch/search/vectors/AbstractDiversifyingChildrenIVFKnnVectorQueryTestCase.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ public void setUp() throws Exception {
9898
random().nextInt(
9999
ES920DiskBBQVectorsFormat.MIN_CENTROIDS_PER_PARENT_CLUSTER,
100100
ES920DiskBBQVectorsFormat.MAX_CENTROIDS_PER_PARENT_CLUSTER
101-
)
101+
),
102+
random().nextBoolean()
102103
);
103104
}
104105

server/src/test/java/org/elasticsearch/search/vectors/AbstractIVFKnnVectorQueryTestCase.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ abstract class AbstractIVFKnnVectorQueryTestCase extends LuceneTestCase {
9898
@Before
9999
public void setUp() throws Exception {
100100
super.setUp();
101-
format = new ES920DiskBBQVectorsFormat(128, 4);
101+
format = new ES920DiskBBQVectorsFormat(128, 4, false);
102102
}
103103

104104
abstract AbstractIVFKnnVectorQuery getKnnVectorQuery(String field, float[] query, int k, Query queryFilter, float visitRatio);

0 commit comments

Comments
 (0)