diff --git a/docs/changelog/122425.yaml b/docs/changelog/122425.yaml new file mode 100644 index 0000000000000..a0e590dcdc36c --- /dev/null +++ b/docs/changelog/122425.yaml @@ -0,0 +1,5 @@ +pr: 122425 +summary: Fix synthetic source bug that would mishandle nested `dense_vector` fields +area: Mapping +type: bug +issues: [] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml index d1c492caf9b48..dc476147c9601 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml @@ -2008,3 +2008,143 @@ create index with use_synthetic_source: flush: false - gt: { test.store_size_in_bytes: 0 } - is_false: test.fields._recovery_source +--- +"Nested synthetic source with indexed dense vectors": + - requires: + test_runner_features: [ capabilities ] + capabilities: + - method: PUT + path: /{index} + capabilities: [ synthetic_nested_dense_vector_bug_fix ] + reason: "Requires synthetic source bugfix for dense vectors in nested objects" + - do: + indices.create: + index: nested_dense_vector_synthetic_test + body: + mappings: + properties: + parent: + type: nested + properties: + vector: + type: dense_vector + index: true + similarity: l2_norm + text: + type: text + settings: + index: + mapping: + source: + mode: synthetic + - do: + index: + index: nested_dense_vector_synthetic_test + id: 0 + refresh: true + body: { "parent": [ { "vector": [ 1, 2 ],"text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] } + + - do: + index: + index: nested_dense_vector_synthetic_test + id: 1 + refresh: true + body: { "parent": [ { "text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] } + + - do: + index: + index: nested_dense_vector_synthetic_test + id: 2 + refresh: true + body: { "parent": [ { "vector": [ 1, 2 ] }, { "vector": [ 2, 2 ], "text": "bar" } ] } + + + - do: + search: + index: nested_dense_vector_synthetic_test + body: + query: + match_all: {} + + - match: { hits.hits.0._source.parent.0.vector: [ 1.0, 2.0 ] } + - match: { hits.hits.0._source.parent.0.text: "foo" } + - match: { hits.hits.0._source.parent.1.vector: [ 2.0, 2.0 ] } + - match: { hits.hits.0._source.parent.1.text: "bar" } + - is_false: hits.hits.1._source.parent.0.vector + - match: { hits.hits.1._source.parent.0.text: "foo" } + - match: { hits.hits.1._source.parent.1.vector: [ 2.0, 2.0 ] } + - match: { hits.hits.1._source.parent.1.text: "bar" } + - match: {hits.hits.2._source.parent.0.vector: [ 1.0, 2.0 ] } + - is_false: hits.hits.2._source.parent.0.text + - match: { hits.hits.2._source.parent.1.vector: [ 2.0, 2.0 ] } + - match: { hits.hits.2._source.parent.1.text: "bar" } +--- +"Nested synthetic source with un-indexed dense vectors": + - requires: + test_runner_features: [ capabilities ] + capabilities: + - method: PUT + path: /{index} + capabilities: [ synthetic_nested_dense_vector_bug_fix ] + reason: "Requires synthetic source bugfix for dense vectors in nested objects" + - do: + indices.create: + index: nested_dense_vector_synthetic_test + body: + mappings: + properties: + parent: + type: nested + properties: + vector: + type: dense_vector + index: false + text: + type: text + settings: + index: + mapping: + source: + mode: synthetic + - do: + index: + index: nested_dense_vector_synthetic_test + id: 0 + refresh: true + body: { "parent": [ { "vector": [ 1, 2 ],"text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] } + + - do: + index: + index: nested_dense_vector_synthetic_test + id: 1 + refresh: true + body: { "parent": [ { "text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] } + + - do: + index: + index: nested_dense_vector_synthetic_test + id: 2 + refresh: true + body: { "parent": [ { "vector": [ 1, 2 ] }, { "vector": [ 2, 2 ], "text": "bar" } ] } + + + - do: + search: + index: nested_dense_vector_synthetic_test + body: + query: + match_all: {} + + - match: { hits.hits.0._source.parent.0.vector: [ 1.0, 2.0 ] } + - match: { hits.hits.0._source.parent.0.text: "foo" } + - match: { hits.hits.0._source.parent.1.vector: [ 2.0, 2.0 ] } + - match: { hits.hits.0._source.parent.1.text: "bar" } + - is_false: hits.hits.1._source.parent.0.vector + - match: { hits.hits.1._source.parent.0.text: "foo" } + - match: { hits.hits.1._source.parent.1.vector: [ 2.0, 2.0 ] } + - match: { hits.hits.1._source.parent.1.text: "bar" } + - match: {hits.hits.2._source.parent.0.vector: [ 1.0, 2.0 ] } + - is_false: hits.hits.2._source.parent.0.text + - match: { hits.hits.2._source.parent.1.vector: [ 2.0, 2.0 ] } + - match: { hits.hits.2._source.parent.1.text: "bar" } + diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 0d514408c912f..ce41c2164e205 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -2404,6 +2404,12 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf } KnnVectorValues.DocIndexIterator iterator = values.iterator(); return docId -> { + if (iterator.docID() > docId) { + return hasValue = false; + } + if (iterator.docID() == docId) { + return hasValue = true; + } hasValue = docId == iterator.advance(docId); hasMagnitude = hasValue && magnitudeReader != null && magnitudeReader.advanceExact(docId); ord = iterator.index(); @@ -2414,6 +2420,12 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf if (byteVectorValues != null) { KnnVectorValues.DocIndexIterator iterator = byteVectorValues.iterator(); return docId -> { + if (iterator.docID() > docId) { + return hasValue = false; + } + if (iterator.docID() == docId) { + return hasValue = true; + } hasValue = docId == iterator.advance(docId); ord = iterator.index(); return hasValue; @@ -2476,6 +2488,12 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf return null; } return docId -> { + if (values.docID() > docId) { + return hasValue = false; + } + if (values.docID() == docId) { + return hasValue = true; + } hasValue = docId == values.advance(docId); return hasValue; }; diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/indices/CreateIndexCapabilities.java b/server/src/main/java/org/elasticsearch/rest/action/admin/indices/CreateIndexCapabilities.java index 9083c781ae167..334e68648d853 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/indices/CreateIndexCapabilities.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/indices/CreateIndexCapabilities.java @@ -26,5 +26,11 @@ public class CreateIndexCapabilities { */ private static final String LOOKUP_INDEX_MODE_CAPABILITY = "lookup_index_mode"; - public static final Set CAPABILITIES = Set.of(LOGSDB_INDEX_MODE_CAPABILITY, LOOKUP_INDEX_MODE_CAPABILITY); + private static final String NESTED_DENSE_VECTOR_SYNTHETIC_TEST = "nested_dense_vector_synthetic_test"; + + public static final Set CAPABILITIES = Set.of( + LOGSDB_INDEX_MODE_CAPABILITY, + LOOKUP_INDEX_MODE_CAPABILITY, + NESTED_DENSE_VECTOR_SYNTHETIC_TEST + ); }