Skip to content

Commit 65ee197

Browse files
committed
Fix synthetic source bug that would mishandle nested dense_vector fields (elastic#122425)
When utilizing synthetic source with nested fields, we attempt to rebuild the child values in addition to all the parent values. While this generally works well, its potential that certain values might be missing from various child docs. Consequently, we will attempt to iterate the vector values strangely, resulting in seemingly missing values or potentially exceptions indicating EOFs. closes: elastic#122383 (cherry picked from commit f5c901e)
1 parent c7398bc commit 65ee197

File tree

4 files changed

+169
-1
lines changed

4 files changed

+169
-1
lines changed

docs/changelog/122425.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 122425
2+
summary: Fix synthetic source bug that would mishandle nested `dense_vector` fields
3+
area: Mapping
4+
type: bug
5+
issues: []

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2012,3 +2012,142 @@ synthetic_source with copy_to pointing inside dynamic object:
20122012
hits.hits.2.fields:
20132013
c.copy.keyword: [ "hello", "zap" ]
20142014

2015+
---
2016+
"Nested synthetic source with indexed dense vectors":
2017+
- requires:
2018+
test_runner_features: [ capabilities ]
2019+
capabilities:
2020+
- method: PUT
2021+
path: /{index}
2022+
capabilities: [ synthetic_nested_dense_vector_bug_fix ]
2023+
reason: "Requires synthetic source bugfix for dense vectors in nested objects"
2024+
- do:
2025+
indices.create:
2026+
index: nested_dense_vector_synthetic_test
2027+
body:
2028+
mappings:
2029+
properties:
2030+
parent:
2031+
type: nested
2032+
properties:
2033+
vector:
2034+
type: dense_vector
2035+
index: true
2036+
similarity: l2_norm
2037+
text:
2038+
type: text
2039+
settings:
2040+
index:
2041+
mapping:
2042+
source:
2043+
mode: synthetic
2044+
- do:
2045+
index:
2046+
index: nested_dense_vector_synthetic_test
2047+
id: 0
2048+
refresh: true
2049+
body: { "parent": [ { "vector": [ 1, 2 ],"text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] }
2050+
2051+
- do:
2052+
index:
2053+
index: nested_dense_vector_synthetic_test
2054+
id: 1
2055+
refresh: true
2056+
body: { "parent": [ { "text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] }
2057+
2058+
- do:
2059+
index:
2060+
index: nested_dense_vector_synthetic_test
2061+
id: 2
2062+
refresh: true
2063+
body: { "parent": [ { "vector": [ 1, 2 ] }, { "vector": [ 2, 2 ], "text": "bar" } ] }
2064+
2065+
2066+
- do:
2067+
search:
2068+
index: nested_dense_vector_synthetic_test
2069+
body:
2070+
query:
2071+
match_all: {}
2072+
2073+
- match: { hits.hits.0._source.parent.0.vector: [ 1.0, 2.0 ] }
2074+
- match: { hits.hits.0._source.parent.0.text: "foo" }
2075+
- match: { hits.hits.0._source.parent.1.vector: [ 2.0, 2.0 ] }
2076+
- match: { hits.hits.0._source.parent.1.text: "bar" }
2077+
- is_false: hits.hits.1._source.parent.0.vector
2078+
- match: { hits.hits.1._source.parent.0.text: "foo" }
2079+
- match: { hits.hits.1._source.parent.1.vector: [ 2.0, 2.0 ] }
2080+
- match: { hits.hits.1._source.parent.1.text: "bar" }
2081+
- match: {hits.hits.2._source.parent.0.vector: [ 1.0, 2.0 ] }
2082+
- is_false: hits.hits.2._source.parent.0.text
2083+
- match: { hits.hits.2._source.parent.1.vector: [ 2.0, 2.0 ] }
2084+
- match: { hits.hits.2._source.parent.1.text: "bar" }
2085+
---
2086+
"Nested synthetic source with un-indexed dense vectors":
2087+
- requires:
2088+
test_runner_features: [ capabilities ]
2089+
capabilities:
2090+
- method: PUT
2091+
path: /{index}
2092+
capabilities: [ synthetic_nested_dense_vector_bug_fix ]
2093+
reason: "Requires synthetic source bugfix for dense vectors in nested objects"
2094+
- do:
2095+
indices.create:
2096+
index: nested_dense_vector_synthetic_test
2097+
body:
2098+
mappings:
2099+
properties:
2100+
parent:
2101+
type: nested
2102+
properties:
2103+
vector:
2104+
type: dense_vector
2105+
index: false
2106+
text:
2107+
type: text
2108+
settings:
2109+
index:
2110+
mapping:
2111+
source:
2112+
mode: synthetic
2113+
- do:
2114+
index:
2115+
index: nested_dense_vector_synthetic_test
2116+
id: 0
2117+
refresh: true
2118+
body: { "parent": [ { "vector": [ 1, 2 ],"text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] }
2119+
2120+
- do:
2121+
index:
2122+
index: nested_dense_vector_synthetic_test
2123+
id: 1
2124+
refresh: true
2125+
body: { "parent": [ { "text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] }
2126+
2127+
- do:
2128+
index:
2129+
index: nested_dense_vector_synthetic_test
2130+
id: 2
2131+
refresh: true
2132+
body: { "parent": [ { "vector": [ 1, 2 ] }, { "vector": [ 2, 2 ], "text": "bar" } ] }
2133+
2134+
2135+
- do:
2136+
search:
2137+
index: nested_dense_vector_synthetic_test
2138+
body:
2139+
query:
2140+
match_all: {}
2141+
2142+
- match: { hits.hits.0._source.parent.0.vector: [ 1.0, 2.0 ] }
2143+
- match: { hits.hits.0._source.parent.0.text: "foo" }
2144+
- match: { hits.hits.0._source.parent.1.vector: [ 2.0, 2.0 ] }
2145+
- match: { hits.hits.0._source.parent.1.text: "bar" }
2146+
- is_false: hits.hits.1._source.parent.0.vector
2147+
- match: { hits.hits.1._source.parent.0.text: "foo" }
2148+
- match: { hits.hits.1._source.parent.1.vector: [ 2.0, 2.0 ] }
2149+
- match: { hits.hits.1._source.parent.1.text: "bar" }
2150+
- match: {hits.hits.2._source.parent.0.vector: [ 1.0, 2.0 ] }
2151+
- is_false: hits.hits.2._source.parent.0.text
2152+
- match: { hits.hits.2._source.parent.1.vector: [ 2.0, 2.0 ] }
2153+
- match: { hits.hits.2._source.parent.1.text: "bar" }

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2350,6 +2350,12 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
23502350
magnitudeReader = leafReader.getNumericDocValues(fullPath() + COSINE_MAGNITUDE_FIELD_SUFFIX);
23512351
}
23522352
return docId -> {
2353+
if (values.docID() > docId) {
2354+
return hasValue = false;
2355+
}
2356+
if (values.docID() == docId) {
2357+
return hasValue = true;
2358+
}
23532359
hasValue = docId == values.advance(docId);
23542360
hasMagnitude = hasValue && magnitudeReader != null && magnitudeReader.advanceExact(docId);
23552361
return hasValue;
@@ -2358,6 +2364,12 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
23582364
byteVectorValues = leafReader.getByteVectorValues(fullPath());
23592365
if (byteVectorValues != null) {
23602366
return docId -> {
2367+
if (byteVectorValues.docID() > docId) {
2368+
return hasValue = false;
2369+
}
2370+
if (byteVectorValues.docID() == docId) {
2371+
return hasValue = true;
2372+
}
23612373
hasValue = docId == byteVectorValues.advance(docId);
23622374
return hasValue;
23632375
};
@@ -2419,6 +2431,12 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
24192431
return null;
24202432
}
24212433
return docId -> {
2434+
if (values.docID() > docId) {
2435+
return hasValue = false;
2436+
}
2437+
if (values.docID() == docId) {
2438+
return hasValue = true;
2439+
}
24222440
hasValue = docId == values.advance(docId);
24232441
return hasValue;
24242442
};

server/src/main/java/org/elasticsearch/rest/action/admin/indices/CreateIndexCapabilities.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,11 @@ public class CreateIndexCapabilities {
2626
*/
2727
private static final String LOOKUP_INDEX_MODE_CAPABILITY = "lookup_index_mode";
2828

29-
public static Set<String> CAPABILITIES = Set.of(LOGSDB_INDEX_MODE_CAPABILITY, LOOKUP_INDEX_MODE_CAPABILITY);
29+
private static final String NESTED_DENSE_VECTOR_SYNTHETIC_TEST = "nested_dense_vector_synthetic_test";
30+
31+
public static final Set<String> CAPABILITIES = Set.of(
32+
LOGSDB_INDEX_MODE_CAPABILITY,
33+
LOOKUP_INDEX_MODE_CAPABILITY,
34+
NESTED_DENSE_VECTOR_SYNTHETIC_TEST
35+
);
3036
}

0 commit comments

Comments
 (0)