Skip to content

Commit 668b95c

Browse files
committed
Fix synthetic source bug that would mishandle nested dense_vector fields (elastic#122425)
When utilizing synthetic source with nested fields, we attempt to rebuild the child values in addition to all the parent values. While this generally works well, its potential that certain values might be missing from various child docs. Consequently, we will attempt to iterate the vector values strangely, resulting in seemingly missing values or potentially exceptions indicating EOFs. closes: elastic#122383
1 parent ac9d538 commit 668b95c

File tree

4 files changed

+170
-1
lines changed

4 files changed

+170
-1
lines changed

docs/changelog/122425.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 122425
2+
summary: Fix synthetic source bug that would mishandle nested `dense_vector` fields
3+
area: Mapping
4+
type: bug
5+
issues: []

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2008,3 +2008,143 @@ create index with use_synthetic_source:
20082008
flush: false
20092009
- gt: { test.store_size_in_bytes: 0 }
20102010
- is_false: test.fields._recovery_source
2011+
---
2012+
"Nested synthetic source with indexed dense vectors":
2013+
- requires:
2014+
test_runner_features: [ capabilities ]
2015+
capabilities:
2016+
- method: PUT
2017+
path: /{index}
2018+
capabilities: [ synthetic_nested_dense_vector_bug_fix ]
2019+
reason: "Requires synthetic source bugfix for dense vectors in nested objects"
2020+
- do:
2021+
indices.create:
2022+
index: nested_dense_vector_synthetic_test
2023+
body:
2024+
mappings:
2025+
properties:
2026+
parent:
2027+
type: nested
2028+
properties:
2029+
vector:
2030+
type: dense_vector
2031+
index: true
2032+
similarity: l2_norm
2033+
text:
2034+
type: text
2035+
settings:
2036+
index:
2037+
mapping:
2038+
source:
2039+
mode: synthetic
2040+
- do:
2041+
index:
2042+
index: nested_dense_vector_synthetic_test
2043+
id: 0
2044+
refresh: true
2045+
body: { "parent": [ { "vector": [ 1, 2 ],"text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] }
2046+
2047+
- do:
2048+
index:
2049+
index: nested_dense_vector_synthetic_test
2050+
id: 1
2051+
refresh: true
2052+
body: { "parent": [ { "text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] }
2053+
2054+
- do:
2055+
index:
2056+
index: nested_dense_vector_synthetic_test
2057+
id: 2
2058+
refresh: true
2059+
body: { "parent": [ { "vector": [ 1, 2 ] }, { "vector": [ 2, 2 ], "text": "bar" } ] }
2060+
2061+
2062+
- do:
2063+
search:
2064+
index: nested_dense_vector_synthetic_test
2065+
body:
2066+
query:
2067+
match_all: {}
2068+
2069+
- match: { hits.hits.0._source.parent.0.vector: [ 1.0, 2.0 ] }
2070+
- match: { hits.hits.0._source.parent.0.text: "foo" }
2071+
- match: { hits.hits.0._source.parent.1.vector: [ 2.0, 2.0 ] }
2072+
- match: { hits.hits.0._source.parent.1.text: "bar" }
2073+
- is_false: hits.hits.1._source.parent.0.vector
2074+
- match: { hits.hits.1._source.parent.0.text: "foo" }
2075+
- match: { hits.hits.1._source.parent.1.vector: [ 2.0, 2.0 ] }
2076+
- match: { hits.hits.1._source.parent.1.text: "bar" }
2077+
- match: {hits.hits.2._source.parent.0.vector: [ 1.0, 2.0 ] }
2078+
- is_false: hits.hits.2._source.parent.0.text
2079+
- match: { hits.hits.2._source.parent.1.vector: [ 2.0, 2.0 ] }
2080+
- match: { hits.hits.2._source.parent.1.text: "bar" }
2081+
---
2082+
"Nested synthetic source with un-indexed dense vectors":
2083+
- requires:
2084+
test_runner_features: [ capabilities ]
2085+
capabilities:
2086+
- method: PUT
2087+
path: /{index}
2088+
capabilities: [ synthetic_nested_dense_vector_bug_fix ]
2089+
reason: "Requires synthetic source bugfix for dense vectors in nested objects"
2090+
- do:
2091+
indices.create:
2092+
index: nested_dense_vector_synthetic_test
2093+
body:
2094+
mappings:
2095+
properties:
2096+
parent:
2097+
type: nested
2098+
properties:
2099+
vector:
2100+
type: dense_vector
2101+
index: false
2102+
text:
2103+
type: text
2104+
settings:
2105+
index:
2106+
mapping:
2107+
source:
2108+
mode: synthetic
2109+
- do:
2110+
index:
2111+
index: nested_dense_vector_synthetic_test
2112+
id: 0
2113+
refresh: true
2114+
body: { "parent": [ { "vector": [ 1, 2 ],"text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] }
2115+
2116+
- do:
2117+
index:
2118+
index: nested_dense_vector_synthetic_test
2119+
id: 1
2120+
refresh: true
2121+
body: { "parent": [ { "text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] }
2122+
2123+
- do:
2124+
index:
2125+
index: nested_dense_vector_synthetic_test
2126+
id: 2
2127+
refresh: true
2128+
body: { "parent": [ { "vector": [ 1, 2 ] }, { "vector": [ 2, 2 ], "text": "bar" } ] }
2129+
2130+
2131+
- do:
2132+
search:
2133+
index: nested_dense_vector_synthetic_test
2134+
body:
2135+
query:
2136+
match_all: {}
2137+
2138+
- match: { hits.hits.0._source.parent.0.vector: [ 1.0, 2.0 ] }
2139+
- match: { hits.hits.0._source.parent.0.text: "foo" }
2140+
- match: { hits.hits.0._source.parent.1.vector: [ 2.0, 2.0 ] }
2141+
- match: { hits.hits.0._source.parent.1.text: "bar" }
2142+
- is_false: hits.hits.1._source.parent.0.vector
2143+
- match: { hits.hits.1._source.parent.0.text: "foo" }
2144+
- match: { hits.hits.1._source.parent.1.vector: [ 2.0, 2.0 ] }
2145+
- match: { hits.hits.1._source.parent.1.text: "bar" }
2146+
- match: {hits.hits.2._source.parent.0.vector: [ 1.0, 2.0 ] }
2147+
- is_false: hits.hits.2._source.parent.0.text
2148+
- match: { hits.hits.2._source.parent.1.vector: [ 2.0, 2.0 ] }
2149+
- match: { hits.hits.2._source.parent.1.text: "bar" }
2150+

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2404,6 +2404,12 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
24042404
}
24052405
KnnVectorValues.DocIndexIterator iterator = values.iterator();
24062406
return docId -> {
2407+
if (iterator.docID() > docId) {
2408+
return hasValue = false;
2409+
}
2410+
if (iterator.docID() == docId) {
2411+
return hasValue = true;
2412+
}
24072413
hasValue = docId == iterator.advance(docId);
24082414
hasMagnitude = hasValue && magnitudeReader != null && magnitudeReader.advanceExact(docId);
24092415
ord = iterator.index();
@@ -2414,6 +2420,12 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
24142420
if (byteVectorValues != null) {
24152421
KnnVectorValues.DocIndexIterator iterator = byteVectorValues.iterator();
24162422
return docId -> {
2423+
if (iterator.docID() > docId) {
2424+
return hasValue = false;
2425+
}
2426+
if (iterator.docID() == docId) {
2427+
return hasValue = true;
2428+
}
24172429
hasValue = docId == iterator.advance(docId);
24182430
ord = iterator.index();
24192431
return hasValue;
@@ -2476,6 +2488,12 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
24762488
return null;
24772489
}
24782490
return docId -> {
2491+
if (values.docID() > docId) {
2492+
return hasValue = false;
2493+
}
2494+
if (values.docID() == docId) {
2495+
return hasValue = true;
2496+
}
24792497
hasValue = docId == values.advance(docId);
24802498
return hasValue;
24812499
};

server/src/main/java/org/elasticsearch/rest/action/admin/indices/CreateIndexCapabilities.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,11 @@ public class CreateIndexCapabilities {
2626
*/
2727
private static final String LOOKUP_INDEX_MODE_CAPABILITY = "lookup_index_mode";
2828

29-
public static final Set<String> CAPABILITIES = Set.of(LOGSDB_INDEX_MODE_CAPABILITY, LOOKUP_INDEX_MODE_CAPABILITY);
29+
private static final String NESTED_DENSE_VECTOR_SYNTHETIC_TEST = "nested_dense_vector_synthetic_test";
30+
31+
public static final Set<String> CAPABILITIES = Set.of(
32+
LOGSDB_INDEX_MODE_CAPABILITY,
33+
LOOKUP_INDEX_MODE_CAPABILITY,
34+
NESTED_DENSE_VECTOR_SYNTHETIC_TEST
35+
);
3036
}

0 commit comments

Comments
 (0)