diff --git a/docs/changelog/122425.yaml b/docs/changelog/122425.yaml new file mode 100644 index 0000000000000..a0e590dcdc36c --- /dev/null +++ b/docs/changelog/122425.yaml @@ -0,0 +1,5 @@ +pr: 122425 +summary: Fix synthetic source bug that would mishandle nested `dense_vector` fields +area: Mapping +type: bug +issues: [] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml index 90c0bc87c8ea3..97e6e96dc16f2 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml @@ -2118,3 +2118,143 @@ create index with use_synthetic_source: flush: false - gt: { test.store_size_in_bytes: 0 } - is_false: test.fields._recovery_source +--- +"Nested synthetic source with indexed dense vectors": + - requires: + test_runner_features: [ capabilities ] + capabilities: + - method: PUT + path: /{index} + capabilities: [ synthetic_nested_dense_vector_bug_fix ] + reason: "Requires synthetic source bugfix for dense vectors in nested objects" + - do: + indices.create: + index: nested_dense_vector_synthetic_test + body: + mappings: + properties: + parent: + type: nested + properties: + vector: + type: dense_vector + index: true + similarity: l2_norm + text: + type: text + settings: + index: + mapping: + source: + mode: synthetic + - do: + index: + index: nested_dense_vector_synthetic_test + id: 0 + refresh: true + body: { "parent": [ { "vector": [ 1, 2 ],"text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] } + + - do: + index: + index: nested_dense_vector_synthetic_test + id: 1 + refresh: true + body: { "parent": [ { "text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] } + + - do: + index: + index: nested_dense_vector_synthetic_test + id: 2 + refresh: true + body: { "parent": [ { "vector": [ 1, 2 ] }, { "vector": [ 2, 2 ], "text": "bar" } ] } + + + - do: + search: + index: nested_dense_vector_synthetic_test + body: + query: + match_all: {} + + - match: { hits.hits.0._source.parent.0.vector: [ 1.0, 2.0 ] } + - match: { hits.hits.0._source.parent.0.text: "foo" } + - match: { hits.hits.0._source.parent.1.vector: [ 2.0, 2.0 ] } + - match: { hits.hits.0._source.parent.1.text: "bar" } + - is_false: hits.hits.1._source.parent.0.vector + - match: { hits.hits.1._source.parent.0.text: "foo" } + - match: { hits.hits.1._source.parent.1.vector: [ 2.0, 2.0 ] } + - match: { hits.hits.1._source.parent.1.text: "bar" } + - match: {hits.hits.2._source.parent.0.vector: [ 1.0, 2.0 ] } + - is_false: hits.hits.2._source.parent.0.text + - match: { hits.hits.2._source.parent.1.vector: [ 2.0, 2.0 ] } + - match: { hits.hits.2._source.parent.1.text: "bar" } +--- +"Nested synthetic source with un-indexed dense vectors": + - requires: + test_runner_features: [ capabilities ] + capabilities: + - method: PUT + path: /{index} + capabilities: [ synthetic_nested_dense_vector_bug_fix ] + reason: "Requires synthetic source bugfix for dense vectors in nested objects" + - do: + indices.create: + index: nested_dense_vector_synthetic_test + body: + mappings: + properties: + parent: + type: nested + properties: + vector: + type: dense_vector + index: false + text: + type: text + settings: + index: + mapping: + source: + mode: synthetic + - do: + index: + index: nested_dense_vector_synthetic_test + id: 0 + refresh: true + body: { "parent": [ { "vector": [ 1, 2 ],"text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] } + + - do: + index: + index: nested_dense_vector_synthetic_test + id: 1 + refresh: true + body: { "parent": [ { "text": "foo" }, { "vector": [ 2, 2 ], "text": "bar" } ] } + + - do: + index: + index: nested_dense_vector_synthetic_test + id: 2 + refresh: true + body: { "parent": [ { "vector": [ 1, 2 ] }, { "vector": [ 2, 2 ], "text": "bar" } ] } + + + - do: + search: + index: nested_dense_vector_synthetic_test + body: + query: + match_all: {} + + - match: { hits.hits.0._source.parent.0.vector: [ 1.0, 2.0 ] } + - match: { hits.hits.0._source.parent.0.text: "foo" } + - match: { hits.hits.0._source.parent.1.vector: [ 2.0, 2.0 ] } + - match: { hits.hits.0._source.parent.1.text: "bar" } + - is_false: hits.hits.1._source.parent.0.vector + - match: { hits.hits.1._source.parent.0.text: "foo" } + - match: { hits.hits.1._source.parent.1.vector: [ 2.0, 2.0 ] } + - match: { hits.hits.1._source.parent.1.text: "bar" } + - match: {hits.hits.2._source.parent.0.vector: [ 1.0, 2.0 ] } + - is_false: hits.hits.2._source.parent.0.text + - match: { hits.hits.2._source.parent.1.vector: [ 2.0, 2.0 ] } + - match: { hits.hits.2._source.parent.1.text: "bar" } + diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 6d01ea21a6478..ae6140a1cafe5 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -2405,6 +2405,12 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf magnitudeReader = leafReader.getNumericDocValues(fullPath() + COSINE_MAGNITUDE_FIELD_SUFFIX); } return docId -> { + if (values.docID() > docId) { + return hasValue = false; + } + if (values.docID() == docId) { + return hasValue = true; + } hasValue = docId == values.advance(docId); hasMagnitude = hasValue && magnitudeReader != null && magnitudeReader.advanceExact(docId); return hasValue; @@ -2413,6 +2419,12 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf byteVectorValues = leafReader.getByteVectorValues(fullPath()); if (byteVectorValues != null) { return docId -> { + if (byteVectorValues.docID() > docId) { + return hasValue = false; + } + if (byteVectorValues.docID() == docId) { + return hasValue = true; + } hasValue = docId == byteVectorValues.advance(docId); return hasValue; }; @@ -2474,6 +2486,12 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf return null; } return docId -> { + if (values.docID() > docId) { + return hasValue = false; + } + if (values.docID() == docId) { + return hasValue = true; + } hasValue = docId == values.advance(docId); return hasValue; }; diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/indices/CreateIndexCapabilities.java b/server/src/main/java/org/elasticsearch/rest/action/admin/indices/CreateIndexCapabilities.java index 900a352d42f30..334e68648d853 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/indices/CreateIndexCapabilities.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/indices/CreateIndexCapabilities.java @@ -26,5 +26,11 @@ public class CreateIndexCapabilities { */ private static final String LOOKUP_INDEX_MODE_CAPABILITY = "lookup_index_mode"; - public static Set CAPABILITIES = Set.of(LOGSDB_INDEX_MODE_CAPABILITY, LOOKUP_INDEX_MODE_CAPABILITY); + private static final String NESTED_DENSE_VECTOR_SYNTHETIC_TEST = "nested_dense_vector_synthetic_test"; + + public static final Set CAPABILITIES = Set.of( + LOGSDB_INDEX_MODE_CAPABILITY, + LOOKUP_INDEX_MODE_CAPABILITY, + NESTED_DENSE_VECTOR_SYNTHETIC_TEST + ); }