Skip to content

Commit 540f90f

Browse files
Rassyandavidkyle
authored andcommitted
Fix Synthetic Source Handling for bit Type in dense_vector Field (elastic#114407)
**Description:** This PR addresses the issue described in [elastic#114402](elastic#114402), where the `synthetic_source` feature does not correctly handle the `bit` type in `dense_vector` fields when `index` is set to `false`. The root cause of the issue was that the `bit` type was not properly accounted for, leading to an array that is 8 times the size of the actual `dims` value of docvalue. This mismatch will causes an array out-of-bounds exception when reconstructing the document. **Changes:** - Adjusted the `synthetic_source` logic to correctly handle the `bit` type by ensuring the array size accounts for the 8x difference in dimensions. - Added yaml test to cover the `bit` type scenario in `dense_vector` fields with `index` set to `false`. **Related Issues:** - Closes [elastic#114402](elastic#114402) - Introduced in [elastic#110059](elastic#110059)
1 parent 2945dea commit 540f90f

File tree

7 files changed

+86
-8
lines changed

7 files changed

+86
-8
lines changed

docs/changelog/114407.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 114407
2+
summary: Fix synthetic source handling for `bit` type in `dense_vector` field
3+
area: Search
4+
type: bug
5+
issues:
6+
- 114402

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/45_knn_search_bit.yml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,3 +354,54 @@ setup:
354354
dims: 40
355355
index: true
356356
similarity: max_inner_product
357+
358+
359+
---
360+
"Search with synthetic source":
361+
- requires:
362+
capabilities:
363+
- method: POST
364+
path: /_search
365+
capabilities: [ bit_dense_vector_synthetic_source ]
366+
test_runner_features: capabilities
367+
reason: "Support for bit dense vector synthetic source capability required"
368+
- do:
369+
indices.create:
370+
index: test_synthetic_source
371+
body:
372+
mappings:
373+
properties:
374+
name:
375+
type: keyword
376+
vector1:
377+
type: dense_vector
378+
element_type: bit
379+
dims: 40
380+
index: false
381+
vector2:
382+
type: dense_vector
383+
element_type: bit
384+
dims: 40
385+
index: true
386+
similarity: l2_norm
387+
388+
- do:
389+
index:
390+
index: test_synthetic_source
391+
id: "1"
392+
body:
393+
name: cow.jpg
394+
vector1: [2, -1, 1, 4, -3]
395+
vector2: [2, -1, 1, 4, -3]
396+
397+
- do:
398+
indices.refresh: {}
399+
400+
- do:
401+
search:
402+
force_synthetic_source: true
403+
index: test_synthetic_source
404+
405+
- match: {hits.hits.0._id: "1"}
406+
- match: {hits.hits.0._source.vector1: [2, -1, 1, 4, -3]}
407+
- match: {hits.hits.0._source.vector2: [2, -1, 1, 4, -3]}

server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import java.io.IOException;
4242

4343
import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.DYNAMIC_CONFIDENCE_INTERVAL;
44+
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT;
4445

4546
public class ES814ScalarQuantizedVectorsFormat extends FlatVectorsFormat {
4647

@@ -291,4 +292,9 @@ public RandomVectorScorer getRandomVectorScorer(VectorSimilarityFunction sim, Ra
291292
return delegate.getRandomVectorScorer(sim, values, query);
292293
}
293294
}
295+
296+
@Override
297+
public int getMaxDimensions(String fieldName) {
298+
return MAX_DIMS_COUNT;
299+
}
294300
}

server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorsFormat.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525

2626
import java.io.IOException;
2727

28+
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT;
29+
2830
class ES815BitFlatVectorsFormat extends FlatVectorsFormat {
2931

3032
private static final FlatVectorsFormat delegate = new Lucene99FlatVectorsFormat(FlatBitVectorScorer.INSTANCE);
@@ -43,6 +45,11 @@ public FlatVectorsReader fieldsReader(SegmentReadState segmentReadState) throws
4345
return delegate.fieldsReader(segmentReadState);
4446
}
4547

48+
@Override
49+
public int getMaxDimensions(String fieldName) {
50+
return MAX_DIMS_COUNT;
51+
}
52+
4653
static class FlatBitVectorScorer implements FlatVectorsScorer {
4754

4855
static final FlatBitVectorScorer INSTANCE = new FlatBitVectorScorer();

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2270,7 +2270,7 @@ public void write(XContentBuilder b) throws IOException {
22702270
if (indexCreatedVersion.onOrAfter(LITTLE_ENDIAN_FLOAT_STORED_INDEX_VERSION)) {
22712271
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
22722272
}
2273-
int dims = fieldType().dims;
2273+
int dims = fieldType().elementType == ElementType.BIT ? fieldType().dims / Byte.SIZE : fieldType().dims;
22742274
for (int dim = 0; dim < dims; dim++) {
22752275
fieldType().elementType.readAndWriteValue(byteBuffer, b);
22762276
}

server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ private SearchCapabilities() {}
2020

2121
/** Support regex and range match rules in interval queries. */
2222
private static final String RANGE_REGEX_INTERVAL_QUERY_CAPABILITY = "range_regexp_interval_queries";
23+
/** Support synthetic source with `bit` type in `dense_vector` field when `index` is set to `false`. */
24+
private static final String BIT_DENSE_VECTOR_SYNTHETIC_SOURCE_CAPABILITY = "bit_dense_vector_synthetic_source";
2325

24-
public static final Set<String> CAPABILITIES = Set.of(RANGE_REGEX_INTERVAL_QUERY_CAPABILITY);
26+
public static final Set<String> CAPABILITIES = Set.of(
27+
RANGE_REGEX_INTERVAL_QUERY_CAPABILITY,
28+
BIT_DENSE_VECTOR_SYNTHETIC_SOURCE_CAPABILITY
29+
);
2530
}

server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2022,24 +2022,27 @@ protected boolean supportsEmptyInputArray() {
20222022

20232023
private static class DenseVectorSyntheticSourceSupport implements SyntheticSourceSupport {
20242024
private final int dims = between(5, 1000);
2025-
private final ElementType elementType = randomFrom(ElementType.BYTE, ElementType.FLOAT);
2025+
private final ElementType elementType = randomFrom(ElementType.BYTE, ElementType.FLOAT, ElementType.BIT);
20262026
private final boolean indexed = randomBoolean();
20272027
private final boolean indexOptionsSet = indexed && randomBoolean();
20282028

20292029
@Override
20302030
public SyntheticSourceExample example(int maxValues) throws IOException {
2031-
Object value = elementType == ElementType.BYTE
2032-
? randomList(dims, dims, ESTestCase::randomByte)
2033-
: randomList(dims, dims, ESTestCase::randomFloat);
2031+
Object value = switch (elementType) {
2032+
case BYTE, BIT:
2033+
yield randomList(dims, dims, ESTestCase::randomByte);
2034+
case FLOAT:
2035+
yield randomList(dims, dims, ESTestCase::randomFloat);
2036+
};
20342037
return new SyntheticSourceExample(value, value, this::mapping);
20352038
}
20362039

20372040
private void mapping(XContentBuilder b) throws IOException {
20382041
b.field("type", "dense_vector");
2039-
b.field("dims", dims);
2040-
if (elementType == ElementType.BYTE || randomBoolean()) {
2042+
if (elementType == ElementType.BYTE || elementType == ElementType.BIT || randomBoolean()) {
20412043
b.field("element_type", elementType.toString());
20422044
}
2045+
b.field("dims", elementType == ElementType.BIT ? dims * Byte.SIZE : dims);
20432046
if (indexed) {
20442047
b.field("index", true);
20452048
b.field("similarity", "l2_norm");

0 commit comments

Comments
 (0)