Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/122425.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 122425
summary: Fix synthetic source bug that would mishandle nested `dense_vector` fields
area: "Mapping"
type: bug
issues: [122383]
Original file line number Diff line number Diff line change
Expand Up @@ -2008,3 +2008,143 @@ create index with use_synthetic_source:
flush: false
- gt: { test.store_size_in_bytes: 0 }
- is_false: test.fields._recovery_source
---
"Nested synthetic source with indexed dense vectors":
- requires:
test_runner_features: [ capabilities ]
capabilities:
- method: PUT
path: /{index}
capabilities: [ synthetic_nested_dense_vector_bug_fix ]
reason: "Requires synthetic source bugfix for dense vectors in nested objects"
- do:
indices.create:
index: nested_dense_vector_synthetic_test
body:
mappings:
properties:
parent:
type: nested
properties:
vector:
type: dense_vector
index: true
similarity: l2_norm
text:
type: text
settings:
index:
mapping:
source:
mode: synthetic
- do:
index:
index: nested_dense_vector_synthetic_test
id: 0
refresh: true
body: { "parent": [ { "vector": [ 1, 2 ],"text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] }

- do:
index:
index: nested_dense_vector_synthetic_test
id: 1
refresh: true
body: { "parent": [ { "text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] }

- do:
index:
index: nested_dense_vector_synthetic_test
id: 2
refresh: true
body: { "parent": [ { "vector": [ 1, 2 ] }, { "vector": [ 2, 2 ], "text": "bar" } ] }


- do:
search:
index: nested_dense_vector_synthetic_test
body:
query:
match_all: {}

- match: { hits.hits.0._source.parent.0.vector: [ 1.0, 2.0 ] }
- match: { hits.hits.0._source.parent.0.text: "foo" }
- match: { hits.hits.0._source.parent.1.vector: [ 2.0, 2.0 ] }
- match: { hits.hits.0._source.parent.1.text: "bar" }
- is_false: hits.hits.1._source.parent.0.vector
- match: { hits.hits.1._source.parent.0.text: "foo" }
- match: { hits.hits.1._source.parent.1.vector: [ 2.0, 2.0 ] }
- match: { hits.hits.1._source.parent.1.text: "bar" }
- match: {hits.hits.2._source.parent.0.vector: [ 1.0, 2.0 ] }
- is_false: hits.hits.2._source.parent.0.text
- match: { hits.hits.2._source.parent.1.vector: [ 2.0, 2.0 ] }
- match: { hits.hits.2._source.parent.1.text: "bar" }
---
"Nested synthetic source with un-indexed dense vectors":
- requires:
test_runner_features: [ capabilities ]
capabilities:
- method: PUT
path: /{index}
capabilities: [ synthetic_nested_dense_vector_bug_fix ]
reason: "Requires synthetic source bugfix for dense vectors in nested objects"
- do:
indices.create:
index: nested_dense_vector_synthetic_test
body:
mappings:
properties:
parent:
type: nested
properties:
vector:
type: dense_vector
index: false
text:
type: text
settings:
index:
mapping:
source:
mode: synthetic
- do:
index:
index: nested_dense_vector_synthetic_test
id: 0
refresh: true
body: { "parent": [ { "vector": [ 1, 2 ],"text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] }

- do:
index:
index: nested_dense_vector_synthetic_test
id: 1
refresh: true
body: { "parent": [ { "text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] }

- do:
index:
index: nested_dense_vector_synthetic_test
id: 2
refresh: true
body: { "parent": [ { "vector": [ 1, 2 ] }, { "vector": [ 2, 2 ], "text": "bar" } ] }


- do:
search:
index: nested_dense_vector_synthetic_test
body:
query:
match_all: {}

- match: { hits.hits.0._source.parent.0.vector: [ 1.0, 2.0 ] }
- match: { hits.hits.0._source.parent.0.text: "foo" }
- match: { hits.hits.0._source.parent.1.vector: [ 2.0, 2.0 ] }
- match: { hits.hits.0._source.parent.1.text: "bar" }
- is_false: hits.hits.1._source.parent.0.vector
- match: { hits.hits.1._source.parent.0.text: "foo" }
- match: { hits.hits.1._source.parent.1.vector: [ 2.0, 2.0 ] }
- match: { hits.hits.1._source.parent.1.text: "bar" }
- match: {hits.hits.2._source.parent.0.vector: [ 1.0, 2.0 ] }
- is_false: hits.hits.2._source.parent.0.text
- match: { hits.hits.2._source.parent.1.vector: [ 2.0, 2.0 ] }
- match: { hits.hits.2._source.parent.1.text: "bar" }

Original file line number Diff line number Diff line change
Expand Up @@ -2404,6 +2404,12 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
}
KnnVectorValues.DocIndexIterator iterator = values.iterator();
return docId -> {
if (iterator.docID() > docId) {
return hasValue = false;
}
if (iterator.docID() == docId) {
return hasValue = true;
}
hasValue = docId == iterator.advance(docId);
hasMagnitude = hasValue && magnitudeReader != null && magnitudeReader.advanceExact(docId);
ord = iterator.index();
Expand All @@ -2414,6 +2420,12 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
if (byteVectorValues != null) {
KnnVectorValues.DocIndexIterator iterator = byteVectorValues.iterator();
return docId -> {
if (iterator.docID() > docId) {
return hasValue = false;
}
if (iterator.docID() == docId) {
return hasValue = true;
}
hasValue = docId == iterator.advance(docId);
ord = iterator.index();
return hasValue;
Expand Down Expand Up @@ -2476,6 +2488,12 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
return null;
}
return docId -> {
if (values.docID() > docId) {
return hasValue = false;
}
if (values.docID() == docId) {
return hasValue = true;
}
hasValue = docId == values.advance(docId);
return hasValue;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,11 @@ public class CreateIndexCapabilities {
*/
private static final String LOOKUP_INDEX_MODE_CAPABILITY = "lookup_index_mode";

public static final Set<String> CAPABILITIES = Set.of(LOGSDB_INDEX_MODE_CAPABILITY, LOOKUP_INDEX_MODE_CAPABILITY);
private static final String NESTED_DENSE_VECTOR_SYNTHETIC_TEST = "nested_dense_vector_synthetic_test";

public static final Set<String> CAPABILITIES = Set.of(
LOGSDB_INDEX_MODE_CAPABILITY,
LOOKUP_INDEX_MODE_CAPABILITY,
NESTED_DENSE_VECTOR_SYNTHETIC_TEST
);
}