Skip to content

Commit d1b2f13

Browse files
committed
Fix synthetic source bug that would mishandle nested dense_vector fields (elastic#122425)
When utilizing synthetic source with nested fields, we attempt to rebuild the child values in addition to all the parent values. While this generally works well, its potential that certain values might be missing from various child docs. Consequently, we will attempt to iterate the vector values strangely, resulting in seemingly missing values or potentially exceptions indicating EOFs. closes: elastic#122383 (cherry picked from commit f5c901e)
1 parent 2bf93c0 commit d1b2f13

File tree

4 files changed

+170
-1
lines changed

4 files changed

+170
-1
lines changed

docs/changelog/122425.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 122425
2+
summary: Fix synthetic source bug that would mishandle nested `dense_vector` fields
3+
area: Mapping
4+
type: bug
5+
issues: []

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2118,3 +2118,143 @@ create index with use_synthetic_source:
21182118
flush: false
21192119
- gt: { test.store_size_in_bytes: 0 }
21202120
- is_false: test.fields._recovery_source
2121+
---
2122+
"Nested synthetic source with indexed dense vectors":
2123+
- requires:
2124+
test_runner_features: [ capabilities ]
2125+
capabilities:
2126+
- method: PUT
2127+
path: /{index}
2128+
capabilities: [ synthetic_nested_dense_vector_bug_fix ]
2129+
reason: "Requires synthetic source bugfix for dense vectors in nested objects"
2130+
- do:
2131+
indices.create:
2132+
index: nested_dense_vector_synthetic_test
2133+
body:
2134+
mappings:
2135+
properties:
2136+
parent:
2137+
type: nested
2138+
properties:
2139+
vector:
2140+
type: dense_vector
2141+
index: true
2142+
similarity: l2_norm
2143+
text:
2144+
type: text
2145+
settings:
2146+
index:
2147+
mapping:
2148+
source:
2149+
mode: synthetic
2150+
- do:
2151+
index:
2152+
index: nested_dense_vector_synthetic_test
2153+
id: 0
2154+
refresh: true
2155+
body: { "parent": [ { "vector": [ 1, 2 ],"text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] }
2156+
2157+
- do:
2158+
index:
2159+
index: nested_dense_vector_synthetic_test
2160+
id: 1
2161+
refresh: true
2162+
body: { "parent": [ { "text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] }
2163+
2164+
- do:
2165+
index:
2166+
index: nested_dense_vector_synthetic_test
2167+
id: 2
2168+
refresh: true
2169+
body: { "parent": [ { "vector": [ 1, 2 ] }, { "vector": [ 2, 2 ], "text": "bar" } ] }
2170+
2171+
2172+
- do:
2173+
search:
2174+
index: nested_dense_vector_synthetic_test
2175+
body:
2176+
query:
2177+
match_all: {}
2178+
2179+
- match: { hits.hits.0._source.parent.0.vector: [ 1.0, 2.0 ] }
2180+
- match: { hits.hits.0._source.parent.0.text: "foo" }
2181+
- match: { hits.hits.0._source.parent.1.vector: [ 2.0, 2.0 ] }
2182+
- match: { hits.hits.0._source.parent.1.text: "bar" }
2183+
- is_false: hits.hits.1._source.parent.0.vector
2184+
- match: { hits.hits.1._source.parent.0.text: "foo" }
2185+
- match: { hits.hits.1._source.parent.1.vector: [ 2.0, 2.0 ] }
2186+
- match: { hits.hits.1._source.parent.1.text: "bar" }
2187+
- match: {hits.hits.2._source.parent.0.vector: [ 1.0, 2.0 ] }
2188+
- is_false: hits.hits.2._source.parent.0.text
2189+
- match: { hits.hits.2._source.parent.1.vector: [ 2.0, 2.0 ] }
2190+
- match: { hits.hits.2._source.parent.1.text: "bar" }
2191+
---
2192+
"Nested synthetic source with un-indexed dense vectors":
2193+
- requires:
2194+
test_runner_features: [ capabilities ]
2195+
capabilities:
2196+
- method: PUT
2197+
path: /{index}
2198+
capabilities: [ synthetic_nested_dense_vector_bug_fix ]
2199+
reason: "Requires synthetic source bugfix for dense vectors in nested objects"
2200+
- do:
2201+
indices.create:
2202+
index: nested_dense_vector_synthetic_test
2203+
body:
2204+
mappings:
2205+
properties:
2206+
parent:
2207+
type: nested
2208+
properties:
2209+
vector:
2210+
type: dense_vector
2211+
index: false
2212+
text:
2213+
type: text
2214+
settings:
2215+
index:
2216+
mapping:
2217+
source:
2218+
mode: synthetic
2219+
- do:
2220+
index:
2221+
index: nested_dense_vector_synthetic_test
2222+
id: 0
2223+
refresh: true
2224+
body: { "parent": [ { "vector": [ 1, 2 ],"text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] }
2225+
2226+
- do:
2227+
index:
2228+
index: nested_dense_vector_synthetic_test
2229+
id: 1
2230+
refresh: true
2231+
body: { "parent": [ { "text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] }
2232+
2233+
- do:
2234+
index:
2235+
index: nested_dense_vector_synthetic_test
2236+
id: 2
2237+
refresh: true
2238+
body: { "parent": [ { "vector": [ 1, 2 ] }, { "vector": [ 2, 2 ], "text": "bar" } ] }
2239+
2240+
2241+
- do:
2242+
search:
2243+
index: nested_dense_vector_synthetic_test
2244+
body:
2245+
query:
2246+
match_all: {}
2247+
2248+
- match: { hits.hits.0._source.parent.0.vector: [ 1.0, 2.0 ] }
2249+
- match: { hits.hits.0._source.parent.0.text: "foo" }
2250+
- match: { hits.hits.0._source.parent.1.vector: [ 2.0, 2.0 ] }
2251+
- match: { hits.hits.0._source.parent.1.text: "bar" }
2252+
- is_false: hits.hits.1._source.parent.0.vector
2253+
- match: { hits.hits.1._source.parent.0.text: "foo" }
2254+
- match: { hits.hits.1._source.parent.1.vector: [ 2.0, 2.0 ] }
2255+
- match: { hits.hits.1._source.parent.1.text: "bar" }
2256+
- match: {hits.hits.2._source.parent.0.vector: [ 1.0, 2.0 ] }
2257+
- is_false: hits.hits.2._source.parent.0.text
2258+
- match: { hits.hits.2._source.parent.1.vector: [ 2.0, 2.0 ] }
2259+
- match: { hits.hits.2._source.parent.1.text: "bar" }
2260+

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2405,6 +2405,12 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
24052405
magnitudeReader = leafReader.getNumericDocValues(fullPath() + COSINE_MAGNITUDE_FIELD_SUFFIX);
24062406
}
24072407
return docId -> {
2408+
if (values.docID() > docId) {
2409+
return hasValue = false;
2410+
}
2411+
if (values.docID() == docId) {
2412+
return hasValue = true;
2413+
}
24082414
hasValue = docId == values.advance(docId);
24092415
hasMagnitude = hasValue && magnitudeReader != null && magnitudeReader.advanceExact(docId);
24102416
return hasValue;
@@ -2413,6 +2419,12 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
24132419
byteVectorValues = leafReader.getByteVectorValues(fullPath());
24142420
if (byteVectorValues != null) {
24152421
return docId -> {
2422+
if (byteVectorValues.docID() > docId) {
2423+
return hasValue = false;
2424+
}
2425+
if (byteVectorValues.docID() == docId) {
2426+
return hasValue = true;
2427+
}
24162428
hasValue = docId == byteVectorValues.advance(docId);
24172429
return hasValue;
24182430
};
@@ -2474,6 +2486,12 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
24742486
return null;
24752487
}
24762488
return docId -> {
2489+
if (values.docID() > docId) {
2490+
return hasValue = false;
2491+
}
2492+
if (values.docID() == docId) {
2493+
return hasValue = true;
2494+
}
24772495
hasValue = docId == values.advance(docId);
24782496
return hasValue;
24792497
};

server/src/main/java/org/elasticsearch/rest/action/admin/indices/CreateIndexCapabilities.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,11 @@ public class CreateIndexCapabilities {
2626
*/
2727
private static final String LOOKUP_INDEX_MODE_CAPABILITY = "lookup_index_mode";
2828

29-
public static Set<String> CAPABILITIES = Set.of(LOGSDB_INDEX_MODE_CAPABILITY, LOOKUP_INDEX_MODE_CAPABILITY);
29+
private static final String NESTED_DENSE_VECTOR_SYNTHETIC_TEST = "nested_dense_vector_synthetic_test";
30+
31+
public static final Set<String> CAPABILITIES = Set.of(
32+
LOGSDB_INDEX_MODE_CAPABILITY,
33+
LOOKUP_INDEX_MODE_CAPABILITY,
34+
NESTED_DENSE_VECTOR_SYNTHETIC_TEST
35+
);
3036
}

0 commit comments

Comments
 (0)