diff --git a/docs/changelog/117272.yaml b/docs/changelog/117272.yaml new file mode 100644 index 0000000000000..06f6ef039442b --- /dev/null +++ b/docs/changelog/117272.yaml @@ -0,0 +1,5 @@ +pr: 117272 +summary: Handle semantic text partial updates when the field is in an object +area: Ingest +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/action/update/TransportUpdateAction.java b/server/src/main/java/org/elasticsearch/action/update/TransportUpdateAction.java index 0749512635f83..e703cbb21d9f5 100644 --- a/server/src/main/java/org/elasticsearch/action/update/TransportUpdateAction.java +++ b/server/src/main/java/org/elasticsearch/action/update/TransportUpdateAction.java @@ -44,6 +44,7 @@ import org.elasticsearch.index.IndexService; import org.elasticsearch.index.engine.VersionConflictEngineException; import org.elasticsearch.index.mapper.InferenceFieldMapper; +import org.elasticsearch.index.mapper.InferenceFieldMapperUtil; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MappingLookup; import org.elasticsearch.index.shard.IndexShard; @@ -412,7 +413,11 @@ private static UpdateHelper.Result deleteInferenceResults( // This has two important side effects: // - The inference field value will remain parsable by its mapper // - The inference results will be removed, forcing them to be re-generated downstream - updatedSource.put(inferenceFieldName, inferenceFieldMapper.getOriginalValue(updatedSource)); + InferenceFieldMapperUtil.insertValue( + inferenceFieldName, + updatedSource, + inferenceFieldMapper.getOriginalValue(updatedSource) + ); updatedSourceModified = true; break; } diff --git a/server/src/main/java/org/elasticsearch/action/update/UpdateHelper.java b/server/src/main/java/org/elasticsearch/action/update/UpdateHelper.java index d32e102b2e18b..dc541d83be773 100644 --- a/server/src/main/java/org/elasticsearch/action/update/UpdateHelper.java +++ b/server/src/main/java/org/elasticsearch/action/update/UpdateHelper.java @@ -14,15 +14,18 @@ import org.elasticsearch.action.DocWriteResponse; import org.elasticsearch.action.delete.DeleteRequest; import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Tuple; import org.elasticsearch.index.VersionType; import org.elasticsearch.index.engine.DocumentMissingException; import org.elasticsearch.index.engine.DocumentSourceMissingException; import org.elasticsearch.index.get.GetResult; +import org.elasticsearch.index.mapper.InferenceFieldMapperUtil; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MappingLookup; import org.elasticsearch.index.mapper.RoutingFieldMapper; @@ -38,7 +41,11 @@ import org.elasticsearch.xcontent.XContentType; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; +import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.function.LongSupplier; @@ -183,12 +190,27 @@ Result prepareUpdateIndexRequest(IndexShard indexShard, UpdateRequest request, G final Tuple> sourceAndContent = XContentHelper.convertToMap(getResult.internalSourceRef(), true); final XContentType updateSourceContentType = sourceAndContent.v1(); final Map updatedSourceAsMap = sourceAndContent.v2(); + final Map changes = currentRequest.sourceAsMap(XContentParserDecorator.NOOP); + final MappingLookup mappingLookup = indexShard.mapperService().mappingLookup(); + + // Get and remove inference field updates from the changes map so that we can apply these updates in a normalized way. + // Otherwise, XContentHelper.update may merge the changes in a way that results in multiple independent map entries for an inference + // field, which breaks downstream source transformation. + final Map inferenceFieldUpdates = new HashMap<>(); + final Map filteredChanges = getAndRemoveInferenceFieldUpdates( + changes, + mappingLookup.inferenceFields().values(), + inferenceFieldUpdates + ); + boolean noop = XContentHelper.update(updatedSourceAsMap, filteredChanges, detectNoop) == false; - final boolean noop = XContentHelper.update( - updatedSourceAsMap, - currentRequest.sourceAsMap(XContentParserDecorator.NOOP), - detectNoop - ) == false; + if (inferenceFieldUpdates.isEmpty() == false) { + // Apply inference field updates in a normalized way + noop = false; + for (var entry : inferenceFieldUpdates.entrySet()) { + InferenceFieldMapperUtil.insertValue(entry.getKey(), updatedSourceAsMap, entry.getValue()); + } + } // We can only actually turn the update into a noop if detectNoop is true to preserve backwards compatibility and to handle cases // where users repopulating multi-fields or adding synonyms, etc. @@ -205,7 +227,7 @@ Result prepareUpdateIndexRequest(IndexShard indexShard, UpdateRequest request, G extractGetResult( request, request.index(), - indexShard.mapperService().mappingLookup(), + mappingLookup, getResult.getSeqNo(), getResult.getPrimaryTerm(), getResult.getVersion(), @@ -229,6 +251,41 @@ Result prepareUpdateIndexRequest(IndexShard indexShard, UpdateRequest request, G } } + private static Map getAndRemoveInferenceFieldUpdates( + Map changes, + Collection inferenceFields, + Map inferenceFieldUpdates + ) { + Map filteredChanges = changes; + for (InferenceFieldMetadata inferenceFieldMetadata : inferenceFields) { + String inferenceFieldName = inferenceFieldMetadata.getName(); + Object inferenceFieldValue = XContentMapValues.extractValue(inferenceFieldName, changes, new ExplicitNullValue()); + if (inferenceFieldValue != null) { + inferenceFieldUpdates.put(inferenceFieldName, replaceExplicitNullValues(inferenceFieldValue)); + } + } + + if (inferenceFieldUpdates.isEmpty() == false) { + filteredChanges = XContentMapValues.filter(changes, null, inferenceFieldUpdates.keySet().toArray(new String[0])); + } + + return filteredChanges; + } + + private static class ExplicitNullValue {} + + private static Object replaceExplicitNullValues(Object inferenceFieldValue) { + if (inferenceFieldValue instanceof ExplicitNullValue) { + return null; + } else if (inferenceFieldValue instanceof List listValue) { + List processedList = new ArrayList<>(listValue.size()); + listValue.forEach(v -> processedList.add(replaceExplicitNullValues(v))); + return processedList; + } else { + return inferenceFieldValue; + } + } + /** * Prepare the request for updating an existing document using a script. Executes the script and returns a {@code Result} containing * either a new {@code IndexRequest} or {@code DeleteRequest} (depending on the script's returned "op" value) to be executed on the diff --git a/server/src/main/java/org/elasticsearch/index/mapper/InferenceFieldMapperUtil.java b/server/src/main/java/org/elasticsearch/index/mapper/InferenceFieldMapperUtil.java new file mode 100644 index 0000000000000..662e373c8813e --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/InferenceFieldMapperUtil.java @@ -0,0 +1,129 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +public class InferenceFieldMapperUtil { + private InferenceFieldMapperUtil() {} + + /** + *

+ * Insert or replace the path's value in the map with the provided new value. The map will be modified in-place. + * If the complete path does not exist in the map, it will be added to the deepest (sub-)map possible. + *

+ *

+ * For example, given the map: + *

+ *
+     * {
+     *   "path1": {
+     *     "path2": {
+     *       "key1": "value1"
+     *     }
+     *   }
+     * }
+     * 
+ *

+ * And the caller wanted to insert {@code "path1.path2.path3.key2": "value2"}, the method would emit the modified map: + *

+ *
+     * {
+     *   "path1": {
+     *     "path2": {
+     *       "key1": "value1",
+     *       "path3.key2": "value2"
+     *     }
+     *   }
+     * }
+     * 
+ * + * @param path the value's path in the map. + * @param map the map to search and modify in-place. + * @param newValue the new value to assign to the path. + * + * @throws IllegalArgumentException If either the path cannot be fully traversed or there is ambiguity about where to insert the new + * value. + */ + public static void insertValue(String path, Map map, Object newValue) { + String[] pathElements = path.split("\\."); + if (pathElements.length == 0) { + return; + } + + List suffixMaps = extractSuffixMaps(pathElements, 0, map); + if (suffixMaps.isEmpty()) { + // This should never happen. Throw in case it does for some reason. + throw new IllegalStateException("extractSuffixMaps returned an empty suffix map list"); + } else if (suffixMaps.size() == 1) { + SuffixMap suffixMap = suffixMaps.getFirst(); + suffixMap.map().put(suffixMap.suffix(), newValue); + } else { + throw new IllegalArgumentException( + "Path [" + path + "] could be inserted in " + suffixMaps.size() + " distinct ways, it is ambiguous which one to use" + ); + } + } + + private record SuffixMap(String suffix, Map map) {} + + private static List extractSuffixMaps(String[] pathElements, int index, Object currentValue) { + if (currentValue instanceof List valueList) { + List suffixMaps = new ArrayList<>(valueList.size()); + for (Object o : valueList) { + suffixMaps.addAll(extractSuffixMaps(pathElements, index, o)); + } + + return suffixMaps; + } else if (currentValue instanceof Map) { + @SuppressWarnings("unchecked") + Map map = (Map) currentValue; + List suffixMaps = new ArrayList<>(map.size()); + + String key = pathElements[index]; + while (index < pathElements.length) { + if (map.containsKey(key)) { + if (index + 1 == pathElements.length) { + // We found the complete path + suffixMaps.add(new SuffixMap(key, map)); + } else { + // We've matched that path partially, keep traversing to try to match it fully + suffixMaps.addAll(extractSuffixMaps(pathElements, index + 1, map.get(key))); + } + } + + if (++index < pathElements.length) { + key += "." + pathElements[index]; + } + } + + if (suffixMaps.isEmpty()) { + // We checked for all remaining elements in the path, and they do not exist. This means we found a leaf map that we should + // add the value to. + suffixMaps.add(new SuffixMap(key, map)); + } + + return suffixMaps; + } else { + throw new IllegalArgumentException( + "Path [" + + String.join(".", Arrays.copyOfRange(pathElements, 0, index)) + + "] has value [" + + currentValue + + "] of type [" + + currentValue.getClass().getSimpleName() + + "], which cannot be traversed into further" + ); + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/InferenceFieldMapperUtilTests.java b/server/src/test/java/org/elasticsearch/index/mapper/InferenceFieldMapperUtilTests.java new file mode 100644 index 0000000000000..1af004d439e91 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/InferenceFieldMapperUtilTests.java @@ -0,0 +1,353 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.json.JsonXContent; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Stream; + +import static org.hamcrest.Matchers.equalTo; + +public class InferenceFieldMapperUtilTests extends ESTestCase { + public void testInsertValueMapTraversal() throws IOException { + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject().field("test", "value").endObject(); + + Map map = toSourceMap(Strings.toString(builder)); + InferenceFieldMapperUtil.insertValue("test", map, "value2"); + assertThat(getMapValue(map, "test"), equalTo("value2")); + InferenceFieldMapperUtil.insertValue("something.else", map, "something_else_value"); + assertThat(getMapValue(map, "something\\.else"), equalTo("something_else_value")); + } + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + builder.startObject("path1").startObject("path2").field("test", "value").endObject().endObject(); + builder.endObject(); + + Map map = toSourceMap(Strings.toString(builder)); + InferenceFieldMapperUtil.insertValue("path1.path2.test", map, "value2"); + assertThat(getMapValue(map, "path1.path2.test"), equalTo("value2")); + InferenceFieldMapperUtil.insertValue("path1.path2.test_me", map, "test_me_value"); + assertThat(getMapValue(map, "path1.path2.test_me"), equalTo("test_me_value")); + InferenceFieldMapperUtil.insertValue("path1.non_path2.test", map, "test_value"); + assertThat(getMapValue(map, "path1.non_path2\\.test"), equalTo("test_value")); + + InferenceFieldMapperUtil.insertValue("path1.path2", map, Map.of("path3", "bar")); + assertThat(getMapValue(map, "path1.path2"), equalTo(Map.of("path3", "bar"))); + + InferenceFieldMapperUtil.insertValue("path1", map, "baz"); + assertThat(getMapValue(map, "path1"), equalTo("baz")); + + InferenceFieldMapperUtil.insertValue("path3.path4", map, Map.of("test", "foo")); + assertThat(getMapValue(map, "path3\\.path4"), equalTo(Map.of("test", "foo"))); + } + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + builder.startObject("path1").array("test", "value1", "value2").endObject(); + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + + InferenceFieldMapperUtil.insertValue("path1.test", map, List.of("value3", "value4", "value5")); + assertThat(getMapValue(map, "path1.test"), equalTo(List.of("value3", "value4", "value5"))); + + InferenceFieldMapperUtil.insertValue("path2.test", map, List.of("value6", "value7", "value8")); + assertThat(getMapValue(map, "path2\\.test"), equalTo(List.of("value6", "value7", "value8"))); + } + } + + public void testInsertValueListTraversal() throws IOException { + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + { + builder.startObject("path1"); + { + builder.startArray("path2"); + builder.startObject().field("test", "value1").endObject(); + builder.endArray(); + } + builder.endObject(); + } + { + builder.startObject("path3"); + { + builder.startArray("path4"); + builder.startObject().field("test", "value1").endObject(); + builder.endArray(); + } + builder.endObject(); + } + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + + InferenceFieldMapperUtil.insertValue("path1.path2.test", map, "value2"); + assertThat(getMapValue(map, "path1.path2.test"), equalTo("value2")); + InferenceFieldMapperUtil.insertValue("path1.path2.test2", map, "value3"); + assertThat(getMapValue(map, "path1.path2.test2"), equalTo("value3")); + assertThat(getMapValue(map, "path1.path2"), equalTo(List.of(Map.of("test", "value2", "test2", "value3")))); + + InferenceFieldMapperUtil.insertValue("path3.path4.test", map, "value4"); + assertThat(getMapValue(map, "path3.path4.test"), equalTo("value4")); + } + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + { + builder.startObject("path1"); + { + builder.startArray("path2"); + builder.startArray(); + builder.startObject().field("test", "value1").endObject(); + builder.endArray(); + builder.endArray(); + } + builder.endObject(); + } + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + + InferenceFieldMapperUtil.insertValue("path1.path2.test", map, "value2"); + assertThat(getMapValue(map, "path1.path2.test"), equalTo("value2")); + InferenceFieldMapperUtil.insertValue("path1.path2.test2", map, "value3"); + assertThat(getMapValue(map, "path1.path2.test2"), equalTo("value3")); + assertThat(getMapValue(map, "path1.path2"), equalTo(List.of(List.of(Map.of("test", "value2", "test2", "value3"))))); + } + } + + public void testInsertValueFieldsWithDots() throws IOException { + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject().field("xxx.yyy", "value1").endObject(); + Map map = toSourceMap(Strings.toString(builder)); + + InferenceFieldMapperUtil.insertValue("xxx.yyy", map, "value2"); + assertThat(getMapValue(map, "xxx\\.yyy"), equalTo("value2")); + + InferenceFieldMapperUtil.insertValue("xxx", map, "value3"); + assertThat(getMapValue(map, "xxx"), equalTo("value3")); + } + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + { + builder.startObject("path1.path2"); + { + builder.startObject("path3.path4"); + builder.field("test", "value1"); + builder.endObject(); + } + builder.endObject(); + } + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + + InferenceFieldMapperUtil.insertValue("path1.path2.path3.path4.test", map, "value2"); + assertThat(getMapValue(map, "path1\\.path2.path3\\.path4.test"), equalTo("value2")); + + InferenceFieldMapperUtil.insertValue("path1.path2.path3.path4.test2", map, "value3"); + assertThat(getMapValue(map, "path1\\.path2.path3\\.path4.test2"), equalTo("value3")); + assertThat(getMapValue(map, "path1\\.path2.path3\\.path4"), equalTo(Map.of("test", "value2", "test2", "value3"))); + } + } + + public void testInsertValueAmbiguousPath() throws IOException { + // Mixed dotted object notation + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + { + builder.startObject("path1.path2"); + { + builder.startObject("path3"); + builder.field("test1", "value1"); + builder.endObject(); + } + builder.endObject(); + } + { + builder.startObject("path1"); + { + builder.startObject("path2.path3"); + builder.field("test2", "value2"); + builder.endObject(); + } + builder.endObject(); + } + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + final Map originalMap = Collections.unmodifiableMap(toSourceMap(Strings.toString(builder))); + + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> InferenceFieldMapperUtil.insertValue("path1.path2.path3.test1", map, "value3") + ); + assertThat( + ex.getMessage(), + equalTo("Path [path1.path2.path3.test1] could be inserted in 2 distinct ways, it is ambiguous which one to use") + ); + + ex = assertThrows( + IllegalArgumentException.class, + () -> InferenceFieldMapperUtil.insertValue("path1.path2.path3.test3", map, "value4") + ); + assertThat( + ex.getMessage(), + equalTo("Path [path1.path2.path3.test3] could be inserted in 2 distinct ways, it is ambiguous which one to use") + ); + + assertThat(map, equalTo(originalMap)); + } + + // traversal through lists + { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + { + builder.startObject("path1.path2"); + { + builder.startArray("path3"); + builder.startObject().field("test1", "value1").endObject(); + builder.endArray(); + } + builder.endObject(); + } + { + builder.startObject("path1"); + { + builder.startArray("path2.path3"); + builder.startObject().field("test2", "value2").endObject(); + builder.endArray(); + } + builder.endObject(); + } + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + final Map originalMap = Collections.unmodifiableMap(toSourceMap(Strings.toString(builder))); + + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> InferenceFieldMapperUtil.insertValue("path1.path2.path3.test1", map, "value3") + ); + assertThat( + ex.getMessage(), + equalTo("Path [path1.path2.path3.test1] could be inserted in 2 distinct ways, it is ambiguous which one to use") + ); + + ex = assertThrows( + IllegalArgumentException.class, + () -> InferenceFieldMapperUtil.insertValue("path1.path2.path3.test3", map, "value4") + ); + assertThat( + ex.getMessage(), + equalTo("Path [path1.path2.path3.test3] could be inserted in 2 distinct ways, it is ambiguous which one to use") + ); + + assertThat(map, equalTo(originalMap)); + } + } + + public void testInsertValueCannotTraversePath() throws IOException { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); + { + builder.startObject("path1"); + { + builder.startArray("path2"); + builder.startArray(); + builder.startObject().field("test", "value1").endObject(); + builder.endArray(); + builder.endArray(); + } + builder.endObject(); + } + builder.endObject(); + Map map = toSourceMap(Strings.toString(builder)); + final Map originalMap = Collections.unmodifiableMap(toSourceMap(Strings.toString(builder))); + + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> InferenceFieldMapperUtil.insertValue("path1.path2.test.test2", map, "value2") + ); + assertThat( + ex.getMessage(), + equalTo("Path [path1.path2.test] has value [value1] of type [String], which cannot be traversed into further") + ); + + assertThat(map, equalTo(originalMap)); + } + + private Map toSourceMap(String source) throws IOException { + try (XContentParser parser = createParser(JsonXContent.jsonXContent, source)) { + return parser.map(); + } + } + + private static Object getMapValue(Map map, String key) { + // Split the path on unescaped "." chars and then unescape the escaped "." chars + final String[] pathElements = Arrays.stream(key.split("(? k.replace("\\.", ".")).toArray(String[]::new); + + Object value = null; + Object nextLayer = map; + for (int i = 0; i < pathElements.length; i++) { + if (nextLayer instanceof Map nextMap) { + value = nextMap.get(pathElements[i]); + } else if (nextLayer instanceof List nextList) { + final String pathElement = pathElements[i]; + List values = nextList.stream().flatMap(v -> { + Stream.Builder streamBuilder = Stream.builder(); + if (v instanceof List innerList) { + traverseList(innerList, streamBuilder); + } else { + streamBuilder.add(v); + } + return streamBuilder.build(); + }).filter(v -> v instanceof Map).map(v -> ((Map) v).get(pathElement)).filter(Objects::nonNull).toList(); + + if (values.isEmpty()) { + return null; + } else if (values.size() > 1) { + throw new AssertionError("List " + nextList + " contains multiple values for [" + pathElement + "]"); + } else { + value = values.getFirst(); + } + } else if (nextLayer == null) { + break; + } else { + throw new AssertionError( + "Path [" + + String.join(".", Arrays.copyOfRange(pathElements, 0, i)) + + "] has value [" + + value + + "] of type [" + + value.getClass().getSimpleName() + + "], which cannot be traversed into further" + ); + } + + nextLayer = value; + } + + return value; + } + + private static void traverseList(List list, Stream.Builder streamBuilder) { + for (Object value : list) { + if (value instanceof List innerList) { + traverseList(innerList, streamBuilder); + } else { + streamBuilder.add(value); + } + } + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index c82f287792a7c..c318d4e7a8f7f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -40,7 +40,8 @@ public Set getTestFeatures() { SemanticTextFieldMapper.SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX, SemanticTextFieldMapper.SEMANTIC_TEXT_DELETE_FIX, SemanticTextFieldMapper.SEMANTIC_TEXT_ZERO_SIZE_FIX, - SemanticTextFieldMapper.SEMANTIC_TEXT_ALWAYS_EMIT_INFERENCE_ID_FIX + SemanticTextFieldMapper.SEMANTIC_TEXT_ALWAYS_EMIT_INFERENCE_ID_FIX, + SemanticTextFieldMapper.SEMANTIC_TEXT_PARTIAL_UPDATE_IN_OBJECT_FIX ); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java index dd59230e575c4..26cbef47a2559 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java @@ -29,6 +29,7 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Releasable; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.mapper.InferenceFieldMapperUtil; import org.elasticsearch.inference.ChunkedInferenceServiceResults; import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.InferenceService; @@ -397,7 +398,7 @@ private void applyInferenceResponses(BulkItemRequest item, FieldInferenceRespons ), indexRequest.getContentType() ); - SemanticTextFieldMapper.insertValue(fieldName, newDocMap, result); + InferenceFieldMapperUtil.insertValue(fieldName, newDocMap, result); } indexRequest.source(newDocMap, indexRequest.getContentType()); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 3744bf2a6dbed..545ebfbfe1473 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -96,6 +96,9 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie public static final NodeFeature SEMANTIC_TEXT_ALWAYS_EMIT_INFERENCE_ID_FIX = new NodeFeature( "semantic_text.always_emit_inference_id_fix" ); + public static final NodeFeature SEMANTIC_TEXT_PARTIAL_UPDATE_IN_OBJECT_FIX = new NodeFeature( + "semantic_text.partial_update_in_object_fix" + ); public static final String CONTENT_TYPE = "semantic_text"; public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID; @@ -392,7 +395,7 @@ public InferenceFieldMetadata getMetadata(Set sourcePaths) { @Override public Object getOriginalValue(Map sourceAsMap) { - Object fieldValue = sourceAsMap.get(fullPath()); + Object fieldValue = XContentMapValues.extractValue(fullPath(), sourceAsMap); if (fieldValue == null) { return null; } else if (fieldValue instanceof Map == false) { @@ -629,116 +632,6 @@ public BlockLoader blockLoader(MappedFieldType.BlockLoaderContext blContext) { } } - /** - *

- * Insert or replace the path's value in the map with the provided new value. The map will be modified in-place. - * If the complete path does not exist in the map, it will be added to the deepest (sub-)map possible. - *

- *

- * For example, given the map: - *

- *
-     * {
-     *   "path1": {
-     *     "path2": {
-     *       "key1": "value1"
-     *     }
-     *   }
-     * }
-     * 
- *

- * And the caller wanted to insert {@code "path1.path2.path3.key2": "value2"}, the method would emit the modified map: - *

- *
-     * {
-     *   "path1": {
-     *     "path2": {
-     *       "key1": "value1",
-     *       "path3.key2": "value2"
-     *     }
-     *   }
-     * }
-     * 
- * - * @param path the value's path in the map. - * @param map the map to search and modify in-place. - * @param newValue the new value to assign to the path. - * - * @throws IllegalArgumentException If either the path cannot be fully traversed or there is ambiguity about where to insert the new - * value. - */ - public static void insertValue(String path, Map map, Object newValue) { - String[] pathElements = path.split("\\."); - if (pathElements.length == 0) { - return; - } - - List suffixMaps = extractSuffixMaps(pathElements, 0, map); - if (suffixMaps.isEmpty()) { - // This should never happen. Throw in case it does for some reason. - throw new IllegalStateException("extractSuffixMaps returned an empty suffix map list"); - } else if (suffixMaps.size() == 1) { - SuffixMap suffixMap = suffixMaps.getFirst(); - suffixMap.map().put(suffixMap.suffix(), newValue); - } else { - throw new IllegalArgumentException( - "Path [" + path + "] could be inserted in " + suffixMaps.size() + " distinct ways, it is ambiguous which one to use" - ); - } - } - - private record SuffixMap(String suffix, Map map) {} - - private static List extractSuffixMaps(String[] pathElements, int index, Object currentValue) { - if (currentValue instanceof List valueList) { - List suffixMaps = new ArrayList<>(valueList.size()); - for (Object o : valueList) { - suffixMaps.addAll(extractSuffixMaps(pathElements, index, o)); - } - - return suffixMaps; - } else if (currentValue instanceof Map) { - @SuppressWarnings("unchecked") - Map map = (Map) currentValue; - List suffixMaps = new ArrayList<>(map.size()); - - String key = pathElements[index]; - while (index < pathElements.length) { - if (map.containsKey(key)) { - if (index + 1 == pathElements.length) { - // We found the complete path - suffixMaps.add(new SuffixMap(key, map)); - } else { - // We've matched that path partially, keep traversing to try to match it fully - suffixMaps.addAll(extractSuffixMaps(pathElements, index + 1, map.get(key))); - } - } - - if (++index < pathElements.length) { - key += "." + pathElements[index]; - } - } - - if (suffixMaps.isEmpty()) { - // We checked for all remaining elements in the path, and they do not exist. This means we found a leaf map that we should - // add the value to. - suffixMaps.add(new SuffixMap(key, map)); - } - - return suffixMaps; - } else { - throw new IllegalArgumentException( - "Path [" - + String.join(".", Arrays.copyOfRange(pathElements, 0, index)) - + "] has value [" - + currentValue - + "] of type [" - + currentValue.getClass().getSimpleName() - + "], which cannot be traversed into further" - ); - } - } - private static ObjectMapper createInferenceField( MapperBuilderContext context, IndexVersion indexVersionCreated, diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java index 71ff9fc7d84cf..ae19e338d0bcc 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java @@ -57,7 +57,6 @@ import org.elasticsearch.search.NestedDocuments; import org.elasticsearch.search.SearchHit; import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xcontent.json.JsonXContent; @@ -66,16 +65,12 @@ import org.junit.AssumptionViolatedException; import java.io.IOException; -import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.Set; import java.util.function.BiConsumer; -import java.util.stream.Stream; import static java.util.Collections.singletonList; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKED_EMBEDDINGS_FIELD; @@ -797,266 +792,6 @@ public void testExistsQueryDenseVector() throws IOException { assertThat(existsQuery, instanceOf(ESToParentBlockJoinQuery.class)); } - public void testInsertValueMapTraversal() throws IOException { - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject().field("test", "value").endObject(); - - Map map = toSourceMap(Strings.toString(builder)); - SemanticTextFieldMapper.insertValue("test", map, "value2"); - assertThat(getMapValue(map, "test"), equalTo("value2")); - SemanticTextFieldMapper.insertValue("something.else", map, "something_else_value"); - assertThat(getMapValue(map, "something\\.else"), equalTo("something_else_value")); - } - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - builder.startObject("path1").startObject("path2").field("test", "value").endObject().endObject(); - builder.endObject(); - - Map map = toSourceMap(Strings.toString(builder)); - SemanticTextFieldMapper.insertValue("path1.path2.test", map, "value2"); - assertThat(getMapValue(map, "path1.path2.test"), equalTo("value2")); - SemanticTextFieldMapper.insertValue("path1.path2.test_me", map, "test_me_value"); - assertThat(getMapValue(map, "path1.path2.test_me"), equalTo("test_me_value")); - SemanticTextFieldMapper.insertValue("path1.non_path2.test", map, "test_value"); - assertThat(getMapValue(map, "path1.non_path2\\.test"), equalTo("test_value")); - - SemanticTextFieldMapper.insertValue("path1.path2", map, Map.of("path3", "bar")); - assertThat(getMapValue(map, "path1.path2"), equalTo(Map.of("path3", "bar"))); - - SemanticTextFieldMapper.insertValue("path1", map, "baz"); - assertThat(getMapValue(map, "path1"), equalTo("baz")); - - SemanticTextFieldMapper.insertValue("path3.path4", map, Map.of("test", "foo")); - assertThat(getMapValue(map, "path3\\.path4"), equalTo(Map.of("test", "foo"))); - } - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - builder.startObject("path1").array("test", "value1", "value2").endObject(); - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - - SemanticTextFieldMapper.insertValue("path1.test", map, List.of("value3", "value4", "value5")); - assertThat(getMapValue(map, "path1.test"), equalTo(List.of("value3", "value4", "value5"))); - - SemanticTextFieldMapper.insertValue("path2.test", map, List.of("value6", "value7", "value8")); - assertThat(getMapValue(map, "path2\\.test"), equalTo(List.of("value6", "value7", "value8"))); - } - } - - public void testInsertValueListTraversal() throws IOException { - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - { - builder.startObject("path1"); - { - builder.startArray("path2"); - builder.startObject().field("test", "value1").endObject(); - builder.endArray(); - } - builder.endObject(); - } - { - builder.startObject("path3"); - { - builder.startArray("path4"); - builder.startObject().field("test", "value1").endObject(); - builder.endArray(); - } - builder.endObject(); - } - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - - SemanticTextFieldMapper.insertValue("path1.path2.test", map, "value2"); - assertThat(getMapValue(map, "path1.path2.test"), equalTo("value2")); - SemanticTextFieldMapper.insertValue("path1.path2.test2", map, "value3"); - assertThat(getMapValue(map, "path1.path2.test2"), equalTo("value3")); - assertThat(getMapValue(map, "path1.path2"), equalTo(List.of(Map.of("test", "value2", "test2", "value3")))); - - SemanticTextFieldMapper.insertValue("path3.path4.test", map, "value4"); - assertThat(getMapValue(map, "path3.path4.test"), equalTo("value4")); - } - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - { - builder.startObject("path1"); - { - builder.startArray("path2"); - builder.startArray(); - builder.startObject().field("test", "value1").endObject(); - builder.endArray(); - builder.endArray(); - } - builder.endObject(); - } - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - - SemanticTextFieldMapper.insertValue("path1.path2.test", map, "value2"); - assertThat(getMapValue(map, "path1.path2.test"), equalTo("value2")); - SemanticTextFieldMapper.insertValue("path1.path2.test2", map, "value3"); - assertThat(getMapValue(map, "path1.path2.test2"), equalTo("value3")); - assertThat(getMapValue(map, "path1.path2"), equalTo(List.of(List.of(Map.of("test", "value2", "test2", "value3"))))); - } - } - - public void testInsertValueFieldsWithDots() throws IOException { - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject().field("xxx.yyy", "value1").endObject(); - Map map = toSourceMap(Strings.toString(builder)); - - SemanticTextFieldMapper.insertValue("xxx.yyy", map, "value2"); - assertThat(getMapValue(map, "xxx\\.yyy"), equalTo("value2")); - - SemanticTextFieldMapper.insertValue("xxx", map, "value3"); - assertThat(getMapValue(map, "xxx"), equalTo("value3")); - } - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - { - builder.startObject("path1.path2"); - { - builder.startObject("path3.path4"); - builder.field("test", "value1"); - builder.endObject(); - } - builder.endObject(); - } - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - - SemanticTextFieldMapper.insertValue("path1.path2.path3.path4.test", map, "value2"); - assertThat(getMapValue(map, "path1\\.path2.path3\\.path4.test"), equalTo("value2")); - - SemanticTextFieldMapper.insertValue("path1.path2.path3.path4.test2", map, "value3"); - assertThat(getMapValue(map, "path1\\.path2.path3\\.path4.test2"), equalTo("value3")); - assertThat(getMapValue(map, "path1\\.path2.path3\\.path4"), equalTo(Map.of("test", "value2", "test2", "value3"))); - } - } - - public void testInsertValueAmbiguousPath() throws IOException { - // Mixed dotted object notation - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - { - builder.startObject("path1.path2"); - { - builder.startObject("path3"); - builder.field("test1", "value1"); - builder.endObject(); - } - builder.endObject(); - } - { - builder.startObject("path1"); - { - builder.startObject("path2.path3"); - builder.field("test2", "value2"); - builder.endObject(); - } - builder.endObject(); - } - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - final Map originalMap = Collections.unmodifiableMap(toSourceMap(Strings.toString(builder))); - - IllegalArgumentException ex = assertThrows( - IllegalArgumentException.class, - () -> SemanticTextFieldMapper.insertValue("path1.path2.path3.test1", map, "value3") - ); - assertThat( - ex.getMessage(), - equalTo("Path [path1.path2.path3.test1] could be inserted in 2 distinct ways, it is ambiguous which one to use") - ); - - ex = assertThrows( - IllegalArgumentException.class, - () -> SemanticTextFieldMapper.insertValue("path1.path2.path3.test3", map, "value4") - ); - assertThat( - ex.getMessage(), - equalTo("Path [path1.path2.path3.test3] could be inserted in 2 distinct ways, it is ambiguous which one to use") - ); - - assertThat(map, equalTo(originalMap)); - } - - // traversal through lists - { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - { - builder.startObject("path1.path2"); - { - builder.startArray("path3"); - builder.startObject().field("test1", "value1").endObject(); - builder.endArray(); - } - builder.endObject(); - } - { - builder.startObject("path1"); - { - builder.startArray("path2.path3"); - builder.startObject().field("test2", "value2").endObject(); - builder.endArray(); - } - builder.endObject(); - } - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - final Map originalMap = Collections.unmodifiableMap(toSourceMap(Strings.toString(builder))); - - IllegalArgumentException ex = assertThrows( - IllegalArgumentException.class, - () -> SemanticTextFieldMapper.insertValue("path1.path2.path3.test1", map, "value3") - ); - assertThat( - ex.getMessage(), - equalTo("Path [path1.path2.path3.test1] could be inserted in 2 distinct ways, it is ambiguous which one to use") - ); - - ex = assertThrows( - IllegalArgumentException.class, - () -> SemanticTextFieldMapper.insertValue("path1.path2.path3.test3", map, "value4") - ); - assertThat( - ex.getMessage(), - equalTo("Path [path1.path2.path3.test3] could be inserted in 2 distinct ways, it is ambiguous which one to use") - ); - - assertThat(map, equalTo(originalMap)); - } - } - - public void testInsertValueCannotTraversePath() throws IOException { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - { - builder.startObject("path1"); - { - builder.startArray("path2"); - builder.startArray(); - builder.startObject().field("test", "value1").endObject(); - builder.endArray(); - builder.endArray(); - } - builder.endObject(); - } - builder.endObject(); - Map map = toSourceMap(Strings.toString(builder)); - final Map originalMap = Collections.unmodifiableMap(toSourceMap(Strings.toString(builder))); - - IllegalArgumentException ex = assertThrows( - IllegalArgumentException.class, - () -> SemanticTextFieldMapper.insertValue("path1.path2.test.test2", map, "value2") - ); - assertThat( - ex.getMessage(), - equalTo("Path [path1.path2.test] has value [value1] of type [String], which cannot be traversed into further") - ); - - assertThat(map, equalTo(originalMap)); - } - @Override protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) { // Until a doc is indexed, the query is rewritten as match no docs @@ -1143,62 +878,4 @@ private Map toSourceMap(String source) throws IOException { return parser.map(); } } - - private static Object getMapValue(Map map, String key) { - // Split the path on unescaped "." chars and then unescape the escaped "." chars - final String[] pathElements = Arrays.stream(key.split("(? k.replace("\\.", ".")).toArray(String[]::new); - - Object value = null; - Object nextLayer = map; - for (int i = 0; i < pathElements.length; i++) { - if (nextLayer instanceof Map nextMap) { - value = nextMap.get(pathElements[i]); - } else if (nextLayer instanceof List nextList) { - final String pathElement = pathElements[i]; - List values = nextList.stream().flatMap(v -> { - Stream.Builder streamBuilder = Stream.builder(); - if (v instanceof List innerList) { - traverseList(innerList, streamBuilder); - } else { - streamBuilder.add(v); - } - return streamBuilder.build(); - }).filter(v -> v instanceof Map).map(v -> ((Map) v).get(pathElement)).filter(Objects::nonNull).toList(); - - if (values.isEmpty()) { - return null; - } else if (values.size() > 1) { - throw new AssertionError("List " + nextList + " contains multiple values for [" + pathElement + "]"); - } else { - value = values.getFirst(); - } - } else if (nextLayer == null) { - break; - } else { - throw new AssertionError( - "Path [" - + String.join(".", Arrays.copyOfRange(pathElements, 0, i)) - + "] has value [" - + value - + "] of type [" - + value.getClass().getSimpleName() - + "], which cannot be traversed into further" - ); - } - - nextLayer = value; - } - - return value; - } - - private static void traverseList(List list, Stream.Builder streamBuilder) { - for (Object value : list) { - if (value instanceof List innerList) { - traverseList(innerList, streamBuilder); - } else { - streamBuilder.add(value); - } - } - } } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml index 294761608ee81..5fee41525e24f 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml @@ -440,6 +440,209 @@ setup: - match: { _source.dense_field.inference.chunks.0.text: "dense data 3" } - exists: _source.dense_field.inference.chunks.0.embeddings +--- +"Partial updates work when using the update API and the semantic text field is in an object field": + - requires: + cluster_features: semantic_text.partial_update_in_object_fix + reason: Partial update when semantic text field is in an object bug fix + + - do: + indices.create: + index: test-in-object-index + body: + mappings: + properties: + object.sparse_field: + type: semantic_text + inference_id: sparse-inference-id + object.sparse_source_field: + type: text + copy_to: object.sparse_field + object.dense_field: + type: semantic_text + inference_id: dense-inference-id + object.dense_source_field: + type: text + copy_to: object.dense_field + + - do: + index: + index: test-in-object-index + id: doc_1 + body: + object: + sparse_field: "sparse data 1" + sparse_source_field: "sparse data 2" + dense_field: "dense data 1" + dense_source_field: "dense data 2" + + - do: + get: + index: test-in-object-index + id: doc_1 + + - match: { _source.object.sparse_field.text: "sparse data 1" } + - length: { _source.object.sparse_field.inference.chunks: 2 } + - match: { _source.object.sparse_field.inference.chunks.1.text: "sparse data 2" } + - exists: _source.object.sparse_field.inference.chunks.1.embeddings + - match: { _source.object.dense_field.text: "dense data 1" } + - length: { _source.object.dense_field.inference.chunks: 2 } + - match: { _source.object.dense_field.inference.chunks.1.text: "dense data 2" } + - exists: _source.object.dense_field.inference.chunks.1.embeddings + + - do: + update: + index: test-in-object-index + id: doc_1 + body: + doc: { "object.sparse_source_field": "sparse data 3", "object.dense_source_field": "dense data 3" } + + - do: + get: + index: test-in-object-index + id: doc_1 + + - match: { _source.object.sparse_field.text: "sparse data 1" } + - length: { _source.object.sparse_field.inference.chunks: 3 } + - match: { _source.object.sparse_field.inference.chunks.1.text: "sparse data 2" } + - exists: _source.object.sparse_field.inference.chunks.1.embeddings + - match: { _source.object.sparse_field.inference.chunks.2.text: "sparse data 3" } + - exists: _source.object.sparse_field.inference.chunks.2.embeddings + - match: { _source.object.dense_field.text: "dense data 1" } + - length: { _source.object.dense_field.inference.chunks: 3 } + - match: { _source.object.dense_field.inference.chunks.1.text: "dense data 2" } + - exists: _source.object.dense_field.inference.chunks.1.embeddings + - match: { _source.object.dense_field.inference.chunks.2.text: "dense data 3" } + - exists: _source.object.dense_field.inference.chunks.2.embeddings + + - do: + update: + index: test-in-object-index + id: doc_1 + body: + doc: { "object.sparse_field": "sparse data 4", "object.dense_field": "dense data 4" } + + - do: + get: + index: test-in-object-index + id: doc_1 + + - match: { _source.object.sparse_field.text: "sparse data 4" } + - length: { _source.object.sparse_field.inference.chunks: 3 } + - match: { _source.object.sparse_field.inference.chunks.1.text: "sparse data 2" } + - exists: _source.object.sparse_field.inference.chunks.1.embeddings + - match: { _source.object.sparse_field.inference.chunks.2.text: "sparse data 3" } + - exists: _source.object.sparse_field.inference.chunks.2.embeddings + - match: { _source.object.dense_field.text: "dense data 4" } + - length: { _source.object.dense_field.inference.chunks: 3 } + - match: { _source.object.dense_field.inference.chunks.1.text: "dense data 2" } + - exists: _source.object.dense_field.inference.chunks.1.embeddings + - match: { _source.object.dense_field.inference.chunks.2.text: "dense data 3" } + - exists: _source.object.dense_field.inference.chunks.2.embeddings + + - do: + update: + index: test-in-object-index + id: doc_1 + body: + doc: { "object.sparse_field": null, "object.dense_field": null } + + - do: + get: + index: test-in-object-index + id: doc_1 + + - match: { _source.object.sparse_field.text: null } + - length: { _source.object.sparse_field.inference.chunks: 2 } + - match: { _source.object.sparse_field.inference.chunks.0.text: "sparse data 2" } + - exists: _source.object.sparse_field.inference.chunks.0.embeddings + - match: { _source.object.sparse_field.inference.chunks.1.text: "sparse data 3" } + - exists: _source.object.sparse_field.inference.chunks.1.embeddings + - match: { _source.object.dense_field.text: null } + - length: { _source.object.dense_field.inference.chunks: 2 } + - match: { _source.object.dense_field.inference.chunks.0.text: "dense data 2" } + - exists: _source.object.dense_field.inference.chunks.0.embeddings + - match: { _source.object.dense_field.inference.chunks.1.text: "dense data 3" } + - exists: _source.object.dense_field.inference.chunks.1.embeddings + + - do: + update: + index: test-in-object-index + id: doc_1 + body: + doc: { "object.sparse_field": null, "object.dense_field": null, + "object": { "sparse_field": "sparse data 5", "dense_field": "dense data 5"} } + + - do: + get: + index: test-in-object-index + id: doc_1 + + - match: { _source.object.sparse_field.text: "sparse data 5" } + - length: { _source.object.sparse_field.inference.chunks: 3 } + - match: { _source.object.sparse_field.inference.chunks.1.text: "sparse data 2" } + - exists: _source.object.sparse_field.inference.chunks.1.embeddings + - match: { _source.object.sparse_field.inference.chunks.2.text: "sparse data 3" } + - exists: _source.object.sparse_field.inference.chunks.2.embeddings + - match: { _source.object.dense_field.text: "dense data 5" } + - length: { _source.object.dense_field.inference.chunks: 3 } + - match: { _source.object.dense_field.inference.chunks.1.text: "dense data 2" } + - exists: _source.object.dense_field.inference.chunks.1.embeddings + - match: { _source.object.dense_field.inference.chunks.2.text: "dense data 3" } + - exists: _source.object.dense_field.inference.chunks.2.embeddings + + - do: + update: + index: test-in-object-index + id: doc_1 + body: + doc: { "object.sparse_field": null, "object.dense_field": null, + "object": { "sparse_field": null, "dense_field": null} } + + - do: + get: + index: test-in-object-index + id: doc_1 + + - match: { _source.object.sparse_field.text: null } + - length: { _source.object.sparse_field.inference.chunks: 2 } + - match: { _source.object.sparse_field.inference.chunks.0.text: "sparse data 2" } + - exists: _source.object.sparse_field.inference.chunks.0.embeddings + - match: { _source.object.sparse_field.inference.chunks.1.text: "sparse data 3" } + - exists: _source.object.sparse_field.inference.chunks.1.embeddings + - match: { _source.object.dense_field.text: null } + - length: { _source.object.dense_field.inference.chunks: 2 } + - match: { _source.object.dense_field.inference.chunks.0.text: "dense data 2" } + - exists: _source.object.dense_field.inference.chunks.0.embeddings + - match: { _source.object.dense_field.inference.chunks.1.text: "dense data 3" } + - exists: _source.object.dense_field.inference.chunks.1.embeddings + + - do: + update: + index: test-in-object-index + id: doc_1 + body: + doc: { "object.sparse_field": "sparse data 6", "object.dense_field": "dense data 6", + "object": { "sparse_field": "sparse data 7", "dense_field": "dense data 7" } } + + - do: + get: + index: test-in-object-index + id: doc_1 + + - match: { _source.object.sparse_field.text: ["sparse data 7", "sparse data 6"] } + - length: { _source.object.sparse_field.inference.chunks: 4 } + - match: { _source.object.sparse_field.inference.chunks.2.text: "sparse data 2" } + - exists: _source.object.sparse_field.inference.chunks.2.embeddings + - match: { _source.object.sparse_field.inference.chunks.3.text: "sparse data 3" } + - exists: _source.object.sparse_field.inference.chunks.3.embeddings + - match: { _source.object.dense_field.text: ["dense data 7", "dense data 6"] } + - length: { _source.object.dense_field.inference.chunks: 4 } + - match: { _source.object.dense_field.inference.chunks.2.text: "dense data 2" } + - exists: _source.object.dense_field.inference.chunks.2.embeddings + - match: { _source.object.dense_field.inference.chunks.3.text: "dense data 3" } + - exists: _source.object.dense_field.inference.chunks.3.embeddings + --- "Updates with script are not allowed": - do: