Skip to content

Commit 70b7903

Browse files
authored
ES|QL - Dense vector tyoe - Enable bit element type (#134326)
1 parent 1a9ff77 commit 70b7903

File tree

11 files changed

+152
-37
lines changed

11 files changed

+152
-37
lines changed

server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java

Lines changed: 51 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,12 @@ public AllReader reader(LeafReaderContext context) throws IOException {
542542
return new ByteDenseVectorValuesBlockReader(byteVectorValues, dimensions);
543543
}
544544
}
545+
case BIT -> {
546+
ByteVectorValues byteVectorValues = context.reader().getByteVectorValues(fieldName);
547+
if (byteVectorValues != null) {
548+
return new BitDenseVectorValuesBlockReader(byteVectorValues, dimensions);
549+
}
550+
}
545551
}
546552

547553
return new ConstantNullsReader();
@@ -577,8 +583,7 @@ public void read(int docId, BlockLoader.StoredFields storedFields, Builder build
577583
}
578584

579585
private void read(int doc, BlockLoader.FloatBuilder builder) throws IOException {
580-
assert vectorValues.dimension() == dimensions
581-
: "unexpected dimensions for vector value; expected " + dimensions + " but got " + vectorValues.dimension();
586+
assertDimensions();
582587

583588
if (iterator.docID() > doc) {
584589
builder.appendNull();
@@ -597,6 +602,11 @@ private void read(int doc, BlockLoader.FloatBuilder builder) throws IOException
597602
public int docId() {
598603
return iterator.docID();
599604
}
605+
606+
protected void assertDimensions() {
607+
assert vectorValues.dimension() == dimensions
608+
: "unexpected dimensions for vector value; expected " + dimensions + " but got " + vectorValues.dimension();
609+
}
600610
}
601611

602612
private static class FloatDenseVectorValuesBlockReader extends DenseVectorValuesBlockReader<FloatVectorValues> {
@@ -668,6 +678,24 @@ public String toString() {
668678
}
669679
}
670680

681+
private static class BitDenseVectorValuesBlockReader extends ByteDenseVectorValuesBlockReader {
682+
683+
BitDenseVectorValuesBlockReader(ByteVectorValues floatVectorValues, int dimensions) {
684+
super(floatVectorValues, dimensions);
685+
}
686+
687+
@Override
688+
protected void assertDimensions() {
689+
assert vectorValues.dimension() * Byte.SIZE == dimensions
690+
: "unexpected dimensions for vector value; expected " + dimensions + " but got " + vectorValues.dimension() * Byte.SIZE;
691+
}
692+
693+
@Override
694+
public String toString() {
695+
return "BlockDocValuesReader.BitDenseVectorValuesBlockReader";
696+
}
697+
}
698+
671699
public static class BytesRefsFromOrdsBlockLoader extends DocValuesBlockLoader {
672700
private final String fieldName;
673701

@@ -1011,14 +1039,11 @@ public AllReader reader(LeafReaderContext context) throws IOException {
10111039
if (docValues == null) {
10121040
return new ConstantNullsReader();
10131041
}
1014-
switch (elementType) {
1015-
case FLOAT:
1016-
return new FloatDenseVectorFromBinary(docValues, dims, indexVersion);
1017-
case BYTE:
1018-
return new ByteDenseVectorFromBinary(docValues, dims, indexVersion);
1019-
default:
1020-
throw new IllegalArgumentException("Unknown element type [" + elementType + "]");
1021-
}
1042+
return switch (elementType) {
1043+
case FLOAT -> new FloatDenseVectorFromBinary(docValues, dims, indexVersion);
1044+
case BYTE -> new ByteDenseVectorFromBinary(docValues, dims, indexVersion);
1045+
case BIT -> new BitDenseVectorFromBinary(docValues, dims, indexVersion);
1046+
};
10221047
}
10231048
}
10241049

@@ -1101,7 +1126,11 @@ public String toString() {
11011126

11021127
private static class ByteDenseVectorFromBinary extends AbstractDenseVectorFromBinary<byte[]> {
11031128
ByteDenseVectorFromBinary(BinaryDocValues docValues, int dims, IndexVersion indexVersion) {
1104-
super(docValues, dims, indexVersion, new byte[dims]);
1129+
this(docValues, dims, indexVersion, dims);
1130+
}
1131+
1132+
protected ByteDenseVectorFromBinary(BinaryDocValues docValues, int dims, IndexVersion indexVersion, int readScratchSize) {
1133+
super(docValues, dims, indexVersion, new byte[readScratchSize]);
11051134
}
11061135

11071136
@Override
@@ -1120,6 +1149,17 @@ protected void decodeDenseVector(BytesRef bytesRef, byte[] scratch) {
11201149
}
11211150
}
11221151

1152+
private static class BitDenseVectorFromBinary extends ByteDenseVectorFromBinary {
1153+
BitDenseVectorFromBinary(BinaryDocValues docValues, int dims, IndexVersion indexVersion) {
1154+
super(docValues, dims, indexVersion, dims / Byte.SIZE);
1155+
}
1156+
1157+
@Override
1158+
public String toString() {
1159+
return "BitDenseVectorFromBinary.Bytes";
1160+
}
1161+
}
1162+
11231163
public static class BooleansBlockLoader extends DocValuesBlockLoader {
11241164
private final String fieldName;
11251165

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2643,11 +2643,6 @@ public DenseVectorIndexOptions getIndexOptions() {
26432643

26442644
@Override
26452645
public BlockLoader blockLoader(MappedFieldType.BlockLoaderContext blContext) {
2646-
if (element.elementType() == ElementType.BIT) {
2647-
// Just float and byte dense vector support for now
2648-
return null;
2649-
}
2650-
26512646
if (dims == null) {
26522647
// No data has been indexed yet
26532648
return BlockLoader.CONSTANT_NULLS;
Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
id:l, float_vector:dense_vector, byte_vector:dense_vector
2-
0, [1.0, 2.0, 3.0], [10, 20, 30]
3-
1, [4.0, 5.0, 6.0], [40, 50, 60]
4-
2, [9.0, 8.0, 7.0], [90, 80, 70]
5-
3, [0.054, 0.032, 0.012], [100, 110, 120]
1+
id:l, float_vector:dense_vector, byte_vector:dense_vector, bit_vector:dense_vector
2+
0, [1.0, 2.0, 3.0], [10, 20, 30], [13, 112]
3+
1, [4.0, 5.0, 6.0], [40, 50, 60], [45, 9]
4+
2, [9.0, 8.0, 7.0], [90, 80, 70], [127, 0]
5+
3, [0.054, 0.032, 0.012], [100, 110, 120], [88, 53]
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
retrieveBitVectorData
2+
required_capability: dense_vector_field_type_bit_elements
3+
4+
FROM dense_vector
5+
| KEEP id, bit_vector
6+
| SORT id
7+
;
8+
9+
id:l | bit_vector:dense_vector
10+
0 | [13, 112]
11+
1 | [45, 9]
12+
2 | [127, 0]
13+
3 | [88, 53]
14+
;
15+
16+
denseBitVectorWithEval
17+
required_capability: dense_vector_field_type_bit_elements
18+
19+
FROM dense_vector
20+
| EVAL v = bit_vector
21+
| KEEP id, v
22+
| SORT id
23+
;
24+
25+
id:l | v:dense_vector
26+
0 | [13, 112]
27+
1 | [45, 9]
28+
2 | [127, 0]
29+
3 | [88, 53]
30+
;
31+
32+
denseBitVectorWithRenameAndDrop
33+
required_capability: dense_vector_field_type_bit_elements
34+
35+
FROM dense_vector
36+
| EVAL v = bit_vector
37+
| RENAME v AS new_vector
38+
| DROP float_vector, byte_vector, bit_vector
39+
| SORT id
40+
;
41+
42+
id:l | new_vector:dense_vector
43+
0 | [13, 112]
44+
1 | [45, 9]
45+
2 | [127, 0]
46+
3 | [88, 53]
47+
;

x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector-byte.csv-spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ required_capability: dense_vector_field_type_byte_elements
3535
FROM dense_vector
3636
| EVAL v = byte_vector
3737
| RENAME v AS new_vector
38-
| DROP float_vector, byte_vector
38+
| DROP float_vector, byte_vector, bit_vector
3939
| SORT id
4040
;
4141

x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ required_capability: dense_vector_field_type
3535
FROM dense_vector
3636
| EVAL v = float_vector
3737
| RENAME v AS new_vector
38-
| DROP float_vector, byte_vector
38+
| DROP float_vector, byte_vector, bit_vector
3939
| SORT id
4040
;
4141

x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-dense_vector.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,17 @@
2121
"m": 16,
2222
"ef_construction": 100
2323
}
24+
},
25+
"bit_vector": {
26+
"type": "dense_vector",
27+
"dims": 16,
28+
"similarity": "l2_norm",
29+
"element_type": "bit",
30+
"index_options": {
31+
"type": "hnsw",
32+
"m": 16,
33+
"ef_construction": 100
34+
}
2435
}
2536
}
2637
}

x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/DenseVectorFieldTypeIT.java

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,12 @@ public class DenseVectorFieldTypeIT extends AbstractEsqlIntegTestCase {
5959
public static Iterable<Object[]> parameters() throws Exception {
6060
List<Object[]> params = new ArrayList<>();
6161

62-
for (ElementType elementType : List.of(ElementType.BYTE, ElementType.FLOAT)) {
62+
for (ElementType elementType : List.of(ElementType.BYTE, ElementType.FLOAT, ElementType.BIT)) {
6363
// Test all similarities
6464
for (DenseVectorFieldMapper.VectorSimilarity similarity : DenseVectorFieldMapper.VectorSimilarity.values()) {
65+
if (elementType == ElementType.BIT && similarity != DenseVectorFieldMapper.VectorSimilarity.L2_NORM) {
66+
continue;
67+
}
6568
params.add(new Object[] { elementType, similarity, true, false });
6669
}
6770

@@ -207,7 +210,7 @@ public void setup() throws IOException {
207210
for (int j = 0; j < numDims; j++) {
208211
switch (elementType) {
209212
case FLOAT -> vector.add(randomFloatBetween(0F, 1F, true));
210-
case BYTE -> vector.add((byte) (randomFloatBetween(0F, 1F, true) * 127.0f));
213+
case BYTE, BIT -> vector.add((byte) (randomFloatBetween(0F, 1F, true) * 127.0f));
211214
default -> throw new IllegalArgumentException("Unexpected element type: " + elementType);
212215
}
213216
}
@@ -238,9 +241,12 @@ private void createIndexWithDenseVector(String indexName) throws IOException {
238241
.field("index", index);
239242
if (index) {
240243
mapping.field("similarity", similarity.name().toLowerCase(Locale.ROOT));
241-
String indexType = elementType == ElementType.FLOAT
242-
? randomFrom(ALL_DENSE_VECTOR_INDEX_TYPES)
243-
: randomFrom(NON_QUANTIZED_DENSE_VECTOR_INDEX_TYPES);
244+
String indexType;
245+
if (elementType == ElementType.FLOAT) {
246+
indexType = randomFrom(ALL_DENSE_VECTOR_INDEX_TYPES);
247+
} else {
248+
indexType = randomFrom(NON_QUANTIZED_DENSE_VECTOR_INDEX_TYPES);
249+
}
244250
mapping.startObject("index_options").field("type", indexType).endObject();
245251
}
246252
mapping.endObject().endObject().endObject();

x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ public static Iterable<Object[]> parameters() throws Exception {
5656
}
5757
for (String indexType : NON_QUANTIZED_DENSE_VECTOR_INDEX_TYPES) {
5858
params.add(new Object[] { DenseVectorFieldMapper.ElementType.BYTE, indexType });
59+
params.add(new Object[] { DenseVectorFieldMapper.ElementType.BIT, indexType });
5960
}
6061

6162
// Remove flat index types, as knn does not do a top k for flat
@@ -234,14 +235,9 @@ public void setup() throws IOException {
234235
List<Number> vector = new ArrayList<>(numDims);
235236
for (int j = 0; j < numDims; j++) {
236237
switch (elementType) {
237-
case FLOAT:
238-
vector.add(randomFloatBetween(0F, 1F, true));
239-
break;
240-
case BYTE:
241-
vector.add((byte) (randomFloatBetween(0F, 1F, true) * 127));
242-
break;
243-
default:
244-
throw new IllegalArgumentException("Unexpected element type: " + elementType);
238+
case FLOAT -> vector.add(randomFloatBetween(0F, 1F, true));
239+
case BYTE, BIT -> vector.add((byte) (randomFloatBetween(0F, 1F, true) * 127.0f));
240+
default -> throw new IllegalArgumentException("Unexpected element type: " + elementType);
245241
}
246242
}
247243
docs[i] = prepareIndex("test").setId(String.valueOf(i)).setSource("id", String.valueOf(i), "vector", vector);

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1419,6 +1419,11 @@ public enum Cap {
14191419
*/
14201420
DENSE_VECTOR_FIELD_TYPE_BYTE_ELEMENTS(EsqlCorePlugin.DENSE_VECTOR_FEATURE_FLAG),
14211421

1422+
/**
1423+
* Bit elements dense vector field type support.
1424+
*/
1425+
DENSE_VECTOR_FIELD_TYPE_BIT_ELEMENTS(EsqlCorePlugin.DENSE_VECTOR_FEATURE_FLAG),
1426+
14221427
/**
14231428
* Support null elements on vector similarity functions
14241429
*/

0 commit comments

Comments
 (0)