Skip to content

Commit a5e0226

Browse files
benwtrentRassyan
andauthored
Fix Synthetic Source Handling for bit Type in dense_vector Field (elastic#114407) (elastic#114756)
**Description:** This PR addresses the issue described in [elastic#114402](elastic#114402), where the `synthetic_source` feature does not correctly handle the `bit` type in `dense_vector` fields when `index` is set to `false`. The root cause of the issue was that the `bit` type was not properly accounted for, leading to an array that is 8 times the size of the actual `dims` value of docvalue. This mismatch will causes an array out-of-bounds exception when reconstructing the document. **Changes:** - Adjusted the `synthetic_source` logic to correctly handle the `bit` type by ensuring the array size accounts for the 8x difference in dimensions. - Added yaml test to cover the `bit` type scenario in `dense_vector` fields with `index` set to `false`. **Related Issues:** - Closes [elastic#114402](elastic#114402) - Introduced in [elastic#110059](elastic#110059) Co-authored-by: Rassyan <[email protected]>
1 parent 6a00e91 commit a5e0226

File tree

7 files changed

+86
-8
lines changed

7 files changed

+86
-8
lines changed

docs/changelog/114407.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 114407
2+
summary: Fix synthetic source handling for `bit` type in `dense_vector` field
3+
area: Search
4+
type: bug
5+
issues:
6+
- 114402

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/45_knn_search_bit.yml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,3 +354,54 @@ setup:
354354
dims: 40
355355
index: true
356356
similarity: max_inner_product
357+
358+
359+
---
360+
"Search with synthetic source":
361+
- requires:
362+
capabilities:
363+
- method: POST
364+
path: /_search
365+
capabilities: [ bit_dense_vector_synthetic_source ]
366+
test_runner_features: capabilities
367+
reason: "Support for bit dense vector synthetic source capability required"
368+
- do:
369+
indices.create:
370+
index: test_synthetic_source
371+
body:
372+
mappings:
373+
properties:
374+
name:
375+
type: keyword
376+
vector1:
377+
type: dense_vector
378+
element_type: bit
379+
dims: 40
380+
index: false
381+
vector2:
382+
type: dense_vector
383+
element_type: bit
384+
dims: 40
385+
index: true
386+
similarity: l2_norm
387+
388+
- do:
389+
index:
390+
index: test_synthetic_source
391+
id: "1"
392+
body:
393+
name: cow.jpg
394+
vector1: [2, -1, 1, 4, -3]
395+
vector2: [2, -1, 1, 4, -3]
396+
397+
- do:
398+
indices.refresh: {}
399+
400+
- do:
401+
search:
402+
force_synthetic_source: true
403+
index: test_synthetic_source
404+
405+
- match: {hits.hits.0._id: "1"}
406+
- match: {hits.hits.0._source.vector1: [2, -1, 1, 4, -3]}
407+
- match: {hits.hits.0._source.vector2: [2, -1, 1, 4, -3]}

server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import java.io.IOException;
4242

4343
import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.DYNAMIC_CONFIDENCE_INTERVAL;
44+
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT;
4445

4546
public class ES814ScalarQuantizedVectorsFormat extends FlatVectorsFormat {
4647

@@ -289,4 +290,9 @@ public RandomVectorScorer getRandomVectorScorer(VectorSimilarityFunction sim, Ra
289290
return delegate.getRandomVectorScorer(sim, values, query);
290291
}
291292
}
293+
294+
@Override
295+
public int getMaxDimensions(String fieldName) {
296+
return MAX_DIMS_COUNT;
297+
}
292298
}

server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorsFormat.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525

2626
import java.io.IOException;
2727

28+
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT;
29+
2830
class ES815BitFlatVectorsFormat extends FlatVectorsFormat {
2931

3032
private final FlatVectorsFormat delegate = new Lucene99FlatVectorsFormat(FlatBitVectorScorer.INSTANCE);
@@ -43,6 +45,11 @@ public FlatVectorsReader fieldsReader(SegmentReadState segmentReadState) throws
4345
return delegate.fieldsReader(segmentReadState);
4446
}
4547

48+
@Override
49+
public int getMaxDimensions(String fieldName) {
50+
return MAX_DIMS_COUNT;
51+
}
52+
4653
static class FlatBitVectorScorer implements FlatVectorsScorer {
4754

4855
static final FlatBitVectorScorer INSTANCE = new FlatBitVectorScorer();

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2270,7 +2270,7 @@ public void write(XContentBuilder b) throws IOException {
22702270
if (indexCreatedVersion.onOrAfter(LITTLE_ENDIAN_FLOAT_STORED_INDEX_VERSION)) {
22712271
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
22722272
}
2273-
int dims = fieldType().dims;
2273+
int dims = fieldType().elementType == ElementType.BIT ? fieldType().dims / Byte.SIZE : fieldType().dims;
22742274
for (int dim = 0; dim < dims; dim++) {
22752275
fieldType().elementType.readAndWriteValue(byteBuffer, b);
22762276
}

server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ private SearchCapabilities() {}
2020

2121
/** Support regex and range match rules in interval queries. */
2222
private static final String RANGE_REGEX_INTERVAL_QUERY_CAPABILITY = "range_regexp_interval_queries";
23+
/** Support synthetic source with `bit` type in `dense_vector` field when `index` is set to `false`. */
24+
private static final String BIT_DENSE_VECTOR_SYNTHETIC_SOURCE_CAPABILITY = "bit_dense_vector_synthetic_source";
2325

24-
public static final Set<String> CAPABILITIES = Set.of(RANGE_REGEX_INTERVAL_QUERY_CAPABILITY);
26+
public static final Set<String> CAPABILITIES = Set.of(
27+
RANGE_REGEX_INTERVAL_QUERY_CAPABILITY,
28+
BIT_DENSE_VECTOR_SYNTHETIC_SOURCE_CAPABILITY
29+
);
2530
}

server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2040,24 +2040,27 @@ protected boolean supportsEmptyInputArray() {
20402040

20412041
private static class DenseVectorSyntheticSourceSupport implements SyntheticSourceSupport {
20422042
private final int dims = between(5, 1000);
2043-
private final ElementType elementType = randomFrom(ElementType.BYTE, ElementType.FLOAT);
2043+
private final ElementType elementType = randomFrom(ElementType.BYTE, ElementType.FLOAT, ElementType.BIT);
20442044
private final boolean indexed = randomBoolean();
20452045
private final boolean indexOptionsSet = indexed && randomBoolean();
20462046

20472047
@Override
20482048
public SyntheticSourceExample example(int maxValues) throws IOException {
2049-
Object value = elementType == ElementType.BYTE
2050-
? randomList(dims, dims, ESTestCase::randomByte)
2051-
: randomList(dims, dims, ESTestCase::randomFloat);
2049+
Object value = switch (elementType) {
2050+
case BYTE, BIT:
2051+
yield randomList(dims, dims, ESTestCase::randomByte);
2052+
case FLOAT:
2053+
yield randomList(dims, dims, ESTestCase::randomFloat);
2054+
};
20522055
return new SyntheticSourceExample(value, value, this::mapping);
20532056
}
20542057

20552058
private void mapping(XContentBuilder b) throws IOException {
20562059
b.field("type", "dense_vector");
2057-
b.field("dims", dims);
2058-
if (elementType == ElementType.BYTE || randomBoolean()) {
2060+
if (elementType == ElementType.BYTE || elementType == ElementType.BIT || randomBoolean()) {
20592061
b.field("element_type", elementType.toString());
20602062
}
2063+
b.field("dims", elementType == ElementType.BIT ? dims * Byte.SIZE : dims);
20612064
if (indexed) {
20622065
b.field("index", true);
20632066
b.field("similarity", "l2_norm");

0 commit comments

Comments
 (0)