Skip to content

Commit f986aa6

Browse files
committed
Merge remote-tracking branch 'upstream/es-gpu' into resource-manager-with-memory
2 parents 2fcf997 + ebb36e0 commit f986aa6

File tree

18 files changed

+694
-130
lines changed

18 files changed

+694
-130
lines changed

qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@
3131
import org.elasticsearch.xcontent.XContentParser;
3232
import org.elasticsearch.xcontent.XContentParserConfiguration;
3333
import org.elasticsearch.xcontent.XContentType;
34-
import org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat;
34+
import org.elasticsearch.xpack.gpu.codec.ESGpuHnswSQVectorsFormat;
35+
import org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat;
3536

3637
import java.io.InputStream;
3738
import java.lang.management.ThreadInfo;
@@ -68,15 +69,15 @@ enum IndexType {
6869
HNSW,
6970
FLAT,
7071
IVF,
71-
GPU
72+
GPU_HNSW
7273
}
7374

7475
private static String formatIndexPath(CmdLineArgs args) {
7576
List<String> suffix = new ArrayList<>();
7677
if (args.indexType() == IndexType.FLAT) {
7778
suffix.add("flat");
78-
} else if (args.indexType() == IndexType.GPU) {
79-
suffix.add("gpu");
79+
} else if (args.indexType() == IndexType.GPU_HNSW) {
80+
suffix.add("gpu_hnsw");
8081
} else if (args.indexType() == IndexType.IVF) {
8182
suffix.add("ivf");
8283
suffix.add(Integer.toString(args.ivfClusterSize()));
@@ -94,8 +95,16 @@ static Codec createCodec(CmdLineArgs args) {
9495
final KnnVectorsFormat format;
9596
if (args.indexType() == IndexType.IVF) {
9697
format = new IVFVectorsFormat(args.ivfClusterSize());
97-
} else if (args.indexType() == IndexType.GPU) {
98-
format = new GPUVectorsFormat();
98+
} else if (args.indexType() == IndexType.GPU_HNSW) {
99+
if (args.quantizeBits() == 32) {
100+
format = new ESGpuHnswVectorsFormat();
101+
} else if (args.quantizeBits() == 7) {
102+
format = new ESGpuHnswSQVectorsFormat();
103+
} else {
104+
throw new IllegalArgumentException(
105+
"GPU HNSW index type only supports 7 or 32 bits quantization, but got: " + args.quantizeBits()
106+
);
107+
}
99108
} else {
100109
if (args.quantizeBits() == 1) {
101110
if (args.indexType() == IndexType.FLAT) {

server/src/main/java/module-info.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@
482482
exports org.elasticsearch.plugins.internal.rewriter to org.elasticsearch.inference;
483483
exports org.elasticsearch.lucene.util.automaton;
484484
exports org.elasticsearch.index.codec.perfield;
485-
exports org.elasticsearch.index.codec.vectors to org.elasticsearch.test.knn;
485+
exports org.elasticsearch.index.codec.vectors to org.elasticsearch.test.knn, org.elasticsearch.gpu;
486486
exports org.elasticsearch.index.codec.vectors.es818 to org.elasticsearch.test.knn;
487487
exports org.elasticsearch.inference.telemetry;
488488
}

server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException
132132
);
133133
}
134134

135-
static final class ES814ScalarQuantizedVectorsWriter extends FlatVectorsWriter {
135+
public static final class ES814ScalarQuantizedVectorsWriter extends FlatVectorsWriter {
136136

137137
final Lucene99ScalarQuantizedVectorsWriter delegate;
138138

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2082,6 +2082,18 @@ boolean isFlat() {
20822082
return false;
20832083
}
20842084

2085+
public int m() {
2086+
return m;
2087+
}
2088+
2089+
public int efConstruction() {
2090+
return efConstruction;
2091+
}
2092+
2093+
public Float confidenceInterval() {
2094+
return confidenceInterval;
2095+
}
2096+
20852097
@Override
20862098
public String toString() {
20872099
return "{type="

x-pack/plugin/gpu/src/main/java/module-info.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* 2.0.
66
*/
77

8-
/** Provides GPU-accelerated support for vector search. */
8+
/** Provides GPU-accelerated support for vector indexing. */
99
module org.elasticsearch.gpu {
1010
requires org.elasticsearch.logging;
1111
requires org.apache.lucene.core;
@@ -16,6 +16,9 @@
1616

1717
exports org.elasticsearch.xpack.gpu.codec;
1818

19-
provides org.apache.lucene.codecs.KnnVectorsFormat with org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat;
2019
provides org.elasticsearch.features.FeatureSpecification with org.elasticsearch.xpack.gpu.GPUFeatures;
20+
provides org.apache.lucene.codecs.KnnVectorsFormat
21+
with
22+
org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat,
23+
org.elasticsearch.xpack.gpu.codec.ESGpuHnswSQVectorsFormat;
2124
}

x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider;
1515
import org.elasticsearch.plugins.MapperPlugin;
1616
import org.elasticsearch.plugins.Plugin;
17-
import org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat;
17+
import org.elasticsearch.xpack.gpu.codec.ESGpuHnswSQVectorsFormat;
18+
import org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat;
1819

1920
public class GPUPlugin extends Plugin implements MapperPlugin {
2021

@@ -28,10 +29,7 @@ public VectorsFormatProvider getVectorsFormatProvider() {
2829
if (gpuMode == IndexSettings.GpuMode.TRUE) {
2930
if (vectorIndexTypeSupported(indexOptions.getType()) == false) {
3031
throw new IllegalArgumentException(
31-
"[index.vectors.indexing.use_gpu] was set to [true], but GPU vector indexing is only supported "
32-
+ "for [hnsw] index_options.type, got: ["
33-
+ indexOptions.getType()
34-
+ "]"
32+
"[index.vectors.indexing.use_gpu] doesn't support [index_options.type] of [" + indexOptions.getType() + "]."
3533
);
3634
}
3735
if (GPUSupport.isSupported(true) == false) {
@@ -52,17 +50,30 @@ && vectorIndexTypeSupported(indexOptions.getType())
5250
}
5351

5452
private boolean vectorIndexTypeSupported(DenseVectorFieldMapper.VectorIndexType type) {
55-
return type == DenseVectorFieldMapper.VectorIndexType.HNSW;
53+
return type == DenseVectorFieldMapper.VectorIndexType.HNSW || type == DenseVectorFieldMapper.VectorIndexType.INT8_HNSW;
5654
}
5755

5856
private static KnnVectorsFormat getVectorsFormat(DenseVectorFieldMapper.DenseVectorIndexOptions indexOptions) {
5957
if (indexOptions.getType() == DenseVectorFieldMapper.VectorIndexType.HNSW) {
6058
DenseVectorFieldMapper.HnswIndexOptions hnswIndexOptions = (DenseVectorFieldMapper.HnswIndexOptions) indexOptions;
6159
int efConstruction = hnswIndexOptions.efConstruction();
6260
if (efConstruction == HnswGraphBuilder.DEFAULT_BEAM_WIDTH) {
63-
efConstruction = GPUVectorsFormat.DEFAULT_BEAM_WIDTH; // default value for GPU graph construction is 128
61+
efConstruction = ESGpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH; // default value for GPU graph construction is 128
6462
}
65-
return new GPUVectorsFormat(hnswIndexOptions.m(), efConstruction);
63+
return new ESGpuHnswVectorsFormat(hnswIndexOptions.m(), efConstruction);
64+
} else if (indexOptions.getType() == DenseVectorFieldMapper.VectorIndexType.INT8_HNSW) {
65+
DenseVectorFieldMapper.Int8HnswIndexOptions int8HnswIndexOptions = (DenseVectorFieldMapper.Int8HnswIndexOptions) indexOptions;
66+
int efConstruction = int8HnswIndexOptions.efConstruction();
67+
if (efConstruction == HnswGraphBuilder.DEFAULT_BEAM_WIDTH) {
68+
efConstruction = ESGpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH; // default value for GPU graph construction is 128
69+
}
70+
return new ESGpuHnswSQVectorsFormat(
71+
int8HnswIndexOptions.m(),
72+
efConstruction,
73+
int8HnswIndexOptions.confidenceInterval(),
74+
7,
75+
false
76+
);
6677
} else {
6778
throw new IllegalArgumentException(
6879
"GPU vector indexing is not supported on this vector type: [" + indexOptions.getType() + "]"

x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,5 @@ static DatasetUtils getInstance() {
2020
}
2121

2222
/** Returns a Dataset over the float32 vectors in the input. */
23-
CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException;
23+
CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims, CuVSMatrix.DataType dataType) throws IOException;
2424
}

x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ static DatasetUtils getInstance() {
1919
}
2020

2121
@Override
22-
public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) {
22+
public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims, CuVSMatrix.DataType dataType) {
2323
throw new UnsupportedOperationException("should not reach here");
2424
}
2525
}
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.gpu.codec;
9+
10+
import org.apache.lucene.codecs.KnnVectorsFormat;
11+
import org.apache.lucene.codecs.KnnVectorsReader;
12+
import org.apache.lucene.codecs.KnnVectorsWriter;
13+
import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
14+
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader;
15+
import org.apache.lucene.index.SegmentReadState;
16+
import org.apache.lucene.index.SegmentWriteState;
17+
import org.elasticsearch.index.codec.vectors.ES814ScalarQuantizedVectorsFormat;
18+
19+
import java.io.IOException;
20+
21+
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT;
22+
import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH;
23+
import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.DEFAULT_MAX_CONN;
24+
25+
/**
26+
* Codec format for GPU-accelerated scalar quantized HNSW vector indexes.
27+
* HNSW graph is built on GPU, while scalar quantization and search is performed on CPU.
28+
*/
29+
public class ESGpuHnswSQVectorsFormat extends KnnVectorsFormat {
30+
public static final String NAME = "ESGPUHnswScalarQuantizedVectorsFormat";
31+
static final int MAXIMUM_MAX_CONN = 512;
32+
static final int MAXIMUM_BEAM_WIDTH = 3200;
33+
private final int maxConn;
34+
private final int beamWidth;
35+
36+
/** The format for storing, reading, merging vectors on disk */
37+
private final FlatVectorsFormat flatVectorsFormat;
38+
final CuVSResourceManager cuVSResourceManager;
39+
40+
public ESGpuHnswSQVectorsFormat() {
41+
this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, null, 7, false);
42+
}
43+
44+
public ESGpuHnswSQVectorsFormat(int maxConn, int beamWidth, Float confidenceInterval, int bits, boolean compress) {
45+
super(NAME);
46+
this.cuVSResourceManager = CuVSResourceManager.pooling();
47+
if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) {
48+
throw new IllegalArgumentException(
49+
"maxConn must be positive and less than or equal to " + MAXIMUM_MAX_CONN + "; maxConn=" + maxConn
50+
);
51+
}
52+
if (beamWidth <= 0 || beamWidth > MAXIMUM_BEAM_WIDTH) {
53+
throw new IllegalArgumentException(
54+
"beamWidth must be positive and less than or equal to " + MAXIMUM_BEAM_WIDTH + "; beamWidth=" + beamWidth
55+
);
56+
}
57+
this.maxConn = maxConn;
58+
this.beamWidth = beamWidth;
59+
this.flatVectorsFormat = new ES814ScalarQuantizedVectorsFormat(confidenceInterval, bits, compress);
60+
}
61+
62+
@Override
63+
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
64+
return new ESGpuHnswVectorsWriter(cuVSResourceManager, state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state));
65+
}
66+
67+
@Override
68+
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
69+
return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state));
70+
}
71+
72+
@Override
73+
public int getMaxDimensions(String fieldName) {
74+
return MAX_DIMS_COUNT;
75+
}
76+
77+
@Override
78+
public String toString() {
79+
return NAME
80+
+ "(name="
81+
+ NAME
82+
+ ", maxConn="
83+
+ maxConn
84+
+ ", beamWidth="
85+
+ beamWidth
86+
+ ", flatVectorFormat="
87+
+ flatVectorsFormat
88+
+ ")";
89+
}
90+
}

x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java renamed to x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormat.java

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,14 @@
1919

2020
import java.io.IOException;
2121

22+
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT;
23+
2224
/**
2325
* Codec format for GPU-accelerated vector indexes. This format is designed to
2426
* leverage GPU processing capabilities for vector search operations.
2527
*/
26-
public class GPUVectorsFormat extends KnnVectorsFormat {
27-
public static final String NAME = "GPUVectorsFormat";
28+
public class ESGpuHnswVectorsFormat extends KnnVectorsFormat {
29+
public static final String NAME = "ESGpuHnswVectorsFormat";
2830
public static final int VERSION_START = 0;
2931

3032
static final String LUCENE99_HNSW_META_CODEC_NAME = "Lucene99HnswVectorsFormatMeta";
@@ -47,15 +49,15 @@ public class GPUVectorsFormat extends KnnVectorsFormat {
4749
private final int beamWidth;
4850
final CuVSResourceManager cuVSResourceManager;
4951

50-
public GPUVectorsFormat() {
52+
public ESGpuHnswVectorsFormat() {
5153
this(CuVSResourceManager.pooling(), DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH);
5254
}
5355

54-
public GPUVectorsFormat(int maxConn, int beamWidth) {
56+
public ESGpuHnswVectorsFormat(int maxConn, int beamWidth) {
5557
this(CuVSResourceManager.pooling(), maxConn, beamWidth);
5658
};
5759

58-
public GPUVectorsFormat(CuVSResourceManager cuVSResourceManager, int maxConn, int beamWidth) {
60+
public ESGpuHnswVectorsFormat(CuVSResourceManager cuVSResourceManager, int maxConn, int beamWidth) {
5961
super(NAME);
6062
this.cuVSResourceManager = cuVSResourceManager;
6163
this.maxConn = maxConn;
@@ -64,7 +66,7 @@ public GPUVectorsFormat(CuVSResourceManager cuVSResourceManager, int maxConn, in
6466

6567
@Override
6668
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
67-
return new GPUToHNSWVectorsWriter(cuVSResourceManager, state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state));
69+
return new ESGpuHnswVectorsWriter(cuVSResourceManager, state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state));
6870
}
6971

7072
@Override
@@ -74,11 +76,20 @@ public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException
7476

7577
@Override
7678
public int getMaxDimensions(String fieldName) {
77-
return 4096;
79+
return MAX_DIMS_COUNT;
7880
}
7981

8082
@Override
8183
public String toString() {
82-
return NAME + "(maxConn=" + maxConn + ", beamWidth=" + beamWidth + ", flatVectorFormat=" + flatVectorsFormat.getName() + ")";
84+
return NAME
85+
+ "(name="
86+
+ NAME
87+
+ ", maxConn="
88+
+ maxConn
89+
+ ", beamWidth="
90+
+ beamWidth
91+
+ ", flatVectorFormat="
92+
+ flatVectorsFormat.getName()
93+
+ ")";
8394
}
8495
}

0 commit comments

Comments
 (0)