Skip to content

Commit f8386c3

Browse files
benwtrentRassyan
andauthored
[8.15] Fix Synthetic Source Handling for bit Type in dense_vector Field (#114407) (#114759)
* Fix Synthetic Source Handling for `bit` Type in `dense_vector` Field (#114407) **Description:** This PR addresses the issue described in [#114402](#114402), where the `synthetic_source` feature does not correctly handle the `bit` type in `dense_vector` fields when `index` is set to `false`. The root cause of the issue was that the `bit` type was not properly accounted for, leading to an array that is 8 times the size of the actual `dims` value of docvalue. This mismatch will causes an array out-of-bounds exception when reconstructing the document. **Changes:** - Adjusted the `synthetic_source` logic to correctly handle the `bit` type by ensuring the array size accounts for the 8x difference in dimensions. - Added yaml test to cover the `bit` type scenario in `dense_vector` fields with `index` set to `false`. **Related Issues:** - Closes [#114402](#114402) - Introduced in [#110059](#110059) (cherry picked from commit 465c65c) * fixing backport of search capabilities * fixing license header * adding capabilities to RestSearchAction * fixing backport * spotless * muting teset for ccs * adding capabilities to the ccs test runner --------- Co-authored-by: Rassyan <[email protected]>
1 parent 54a220a commit f8386c3

File tree

7 files changed

+97
-8
lines changed

7 files changed

+97
-8
lines changed

docs/changelog/114407.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 114407
2+
summary: Fix synthetic source handling for `bit` type in `dense_vector` field
3+
area: Search
4+
type: bug
5+
issues:
6+
- 114402

qa/ccs-common-rest/build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ apply plugin: 'elasticsearch.internal-yaml-rest-test'
1010

1111
restResources {
1212
restApi {
13-
include '_common', 'bulk', 'count', 'cluster', 'field_caps', 'get', 'knn_search', 'index', 'indices', 'msearch',
13+
include 'capabilities', '_common', 'bulk', 'count', 'cluster', 'field_caps', 'get', 'knn_search', 'index', 'indices', 'msearch',
1414
'search', 'async_search', 'graph', '*_point_in_time', 'info', 'scroll', 'clear_scroll', 'search_mvt', 'eql', 'sql'
1515
}
1616
restTests {

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/45_knn_search_bit.yml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,3 +354,54 @@ setup:
354354
dims: 40
355355
index: true
356356
similarity: max_inner_product
357+
358+
359+
---
360+
"Search with synthetic source":
361+
- requires:
362+
reason: "Support for bit dense vector synthetic source capability required"
363+
test_runner_features: [capabilities]
364+
capabilities:
365+
- method: POST
366+
path: /_search
367+
capabilities: [ bit_dense_vector_synthetic_source ]
368+
- do:
369+
indices.create:
370+
index: test_synthetic_source
371+
body:
372+
mappings:
373+
properties:
374+
name:
375+
type: keyword
376+
vector1:
377+
type: dense_vector
378+
element_type: bit
379+
dims: 40
380+
index: false
381+
vector2:
382+
type: dense_vector
383+
element_type: bit
384+
dims: 40
385+
index: true
386+
similarity: l2_norm
387+
388+
- do:
389+
index:
390+
index: test_synthetic_source
391+
id: "1"
392+
body:
393+
name: cow.jpg
394+
vector1: [2, -1, 1, 4, -3]
395+
vector2: [2, -1, 1, 4, -3]
396+
397+
- do:
398+
indices.refresh: {}
399+
400+
- do:
401+
search:
402+
force_synthetic_source: true
403+
index: test_synthetic_source
404+
405+
- match: {hits.hits.0._id: "1"}
406+
- match: {hits.hits.0._source.vector1: [2, -1, 1, 4, -3]}
407+
- match: {hits.hits.0._source.vector2: [2, -1, 1, 4, -3]}

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2218,7 +2218,7 @@ public void write(XContentBuilder b) throws IOException {
22182218
if (indexCreatedVersion.onOrAfter(LITTLE_ENDIAN_FLOAT_STORED_INDEX_VERSION)) {
22192219
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
22202220
}
2221-
int dims = fieldType().dims;
2221+
int dims = fieldType().elementType == ElementType.BIT ? fieldType().dims / Byte.SIZE : fieldType().dims;
22222222
for (int dim = 0; dim < dims; dim++) {
22232223
fieldType().elementType.readAndWriteValue(byteBuffer, b);
22242224
}

server/src/main/java/org/elasticsearch/rest/action/search/RestSearchAction.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,11 @@ public List<Route> routes() {
9494
);
9595
}
9696

97+
@Override
98+
public Set<String> supportedCapabilities() {
99+
return SearchCapabilities.CAPABILITIES;
100+
}
101+
97102
@Override
98103
public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException {
99104

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.rest.action.search;
10+
11+
import java.util.Set;
12+
13+
/**
14+
* A {@link Set} of "capabilities" supported by the {@link RestSearchAction}.
15+
*/
16+
public final class SearchCapabilities {
17+
18+
private SearchCapabilities() {}
19+
20+
/** Support synthetic source with `bit` type in `dense_vector` field when `index` is set to `false`. */
21+
private static final String BIT_DENSE_VECTOR_SYNTHETIC_SOURCE_CAPABILITY = "bit_dense_vector_synthetic_source";
22+
23+
public static final Set<String> CAPABILITIES = Set.of(BIT_DENSE_VECTOR_SYNTHETIC_SOURCE_CAPABILITY);
24+
}

server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1435,24 +1435,27 @@ protected boolean supportsEmptyInputArray() {
14351435

14361436
private static class DenseVectorSyntheticSourceSupport implements SyntheticSourceSupport {
14371437
private final int dims = between(5, 1000);
1438-
private final ElementType elementType = randomFrom(ElementType.BYTE, ElementType.FLOAT);
1438+
private final ElementType elementType = randomFrom(ElementType.BYTE, ElementType.FLOAT, ElementType.BIT);
14391439
private final boolean indexed = randomBoolean();
14401440
private final boolean indexOptionsSet = indexed && randomBoolean();
14411441

14421442
@Override
14431443
public SyntheticSourceExample example(int maxValues) throws IOException {
1444-
Object value = elementType == ElementType.BYTE
1445-
? randomList(dims, dims, ESTestCase::randomByte)
1446-
: randomList(dims, dims, ESTestCase::randomFloat);
1444+
Object value = switch (elementType) {
1445+
case BYTE, BIT:
1446+
yield randomList(dims, dims, ESTestCase::randomByte);
1447+
case FLOAT:
1448+
yield randomList(dims, dims, ESTestCase::randomFloat);
1449+
};
14471450
return new SyntheticSourceExample(value, value, this::mapping);
14481451
}
14491452

14501453
private void mapping(XContentBuilder b) throws IOException {
14511454
b.field("type", "dense_vector");
1452-
b.field("dims", dims);
1453-
if (elementType == ElementType.BYTE || randomBoolean()) {
1455+
if (elementType == ElementType.BYTE || elementType == ElementType.BIT || randomBoolean()) {
14541456
b.field("element_type", elementType.toString());
14551457
}
1458+
b.field("dims", elementType == ElementType.BIT ? dims * Byte.SIZE : dims);
14561459
if (indexed) {
14571460
b.field("index", true);
14581461
b.field("similarity", "l2_norm");

0 commit comments

Comments
 (0)