diff --git a/docs/changelog/119546.yaml b/docs/changelog/119546.yaml new file mode 100644 index 0000000000000..017bbb845c0a6 --- /dev/null +++ b/docs/changelog/119546.yaml @@ -0,0 +1,5 @@ +pr: 119546 +summary: Introduce `FallbackSyntheticSourceBlockLoader` and apply it to keyword fields +area: Mapping +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java new file mode 100644 index 0000000000000..28ea37ef73e33 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java @@ -0,0 +1,270 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedSetDocValues; +import org.elasticsearch.search.fetch.StoredFieldsSpec; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentParserConfiguration; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; + +/** + * Block loader for fields that use fallback synthetic source implementation. + *
+ * Usually fields have doc_values or stored fields and block loaders use them directly. In some cases neither is available + * and we would fall back to (potentially synthetic) _source. However, in case of synthetic source, there is actually no need to + * construct the entire _source. We know that there is no doc_values and stored fields, and therefore we will be using fallback synthetic + * source. That is equivalent to just reading _ignored_source stored field directly and doing an in-place synthetic source just + * for this field. + *
+ * See {@link IgnoredSourceFieldMapper}. + */ +public abstract class FallbackSyntheticSourceBlockLoader implements BlockLoader { + private final Reader reader; + private final String fieldName; + + protected FallbackSyntheticSourceBlockLoader(Reader reader, String fieldName) { + this.reader = reader; + this.fieldName = fieldName; + } + + @Override + public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws IOException { + return null; + } + + @Override + public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException { + return new IgnoredSourceRowStrideReader<>(fieldName, reader); + } + + @Override + public StoredFieldsSpec rowStrideStoredFieldSpec() { + return new StoredFieldsSpec(false, false, Set.of(IgnoredSourceFieldMapper.NAME)); + } + + @Override + public boolean supportsOrdinals() { + return false; + } + + @Override + public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException { + throw new UnsupportedOperationException(); + } + + private record IgnoredSourceRowStrideReader(String fieldName, Reader reader) implements RowStrideReader { + @Override + public void read(int docId, StoredFields storedFields, Builder builder) throws IOException { + var ignoredSource = storedFields.storedFields().get(IgnoredSourceFieldMapper.NAME); + if (ignoredSource == null) { + return; + } + + Map> valuesForFieldAndParents = new HashMap<>(); + + // Contains name of the field and all its parents + Set fieldNames = new HashSet<>() { + { + add("_doc"); + } + }; + + var current = new StringBuilder(); + for (String part : fieldName.split("\\.")) { + if (current.isEmpty() == false) { + current.append('.'); + } + current.append(part); + fieldNames.add(current.toString()); + } + + for (Object value : ignoredSource) { + IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(value); + if (fieldNames.contains(nameValue.name())) { + valuesForFieldAndParents.computeIfAbsent(nameValue.name(), k -> new ArrayList<>()).add(nameValue); + } + } + + // TODO figure out how to handle XContentDataHelper#voidValue() + + var blockValues = new ArrayList(); + + var leafFieldValue = valuesForFieldAndParents.get(fieldName); + if (leafFieldValue != null) { + readFromFieldValue(leafFieldValue, blockValues); + } else { + readFromParentValue(valuesForFieldAndParents, blockValues); + } + + if (blockValues.isEmpty() == false) { + if (blockValues.size() > 1) { + builder.beginPositionEntry(); + } + + reader.writeToBlock(blockValues, builder); + + if (blockValues.size() > 1) { + builder.endPositionEntry(); + } + } else { + builder.appendNull(); + } + } + + private void readFromFieldValue(List nameValues, List blockValues) throws IOException { + if (nameValues.isEmpty()) { + return; + } + + for (var nameValue : nameValues) { + // Leaf field is stored directly (not as a part of a parent object), let's try to decode it. + Optional singleValue = XContentDataHelper.decode(nameValue.value()); + if (singleValue.isPresent()) { + reader.convertValue(singleValue.get(), blockValues); + continue; + } + + // We have a value for this field but it's an array or an object + var type = XContentDataHelper.decodeType(nameValue.value()); + assert type.isPresent(); + + try ( + XContentParser parser = type.get() + .xContent() + .createParser( + XContentParserConfiguration.EMPTY, + nameValue.value().bytes, + nameValue.value().offset + 1, + nameValue.value().length - 1 + ) + ) { + parser.nextToken(); + parseWithReader(parser, blockValues); + } + } + } + + private void readFromParentValue( + Map> valuesForFieldAndParents, + List blockValues + ) throws IOException { + if (valuesForFieldAndParents.isEmpty()) { + return; + } + + // If a parent object is stored at a particular level its children won't be stored. + // So we should only ever have one parent here. + assert valuesForFieldAndParents.size() == 1 : "_ignored_source field contains multiple levels of the same object"; + var parentValues = valuesForFieldAndParents.values().iterator().next(); + + for (var nameValue : parentValues) { + parseFieldFromParent(nameValue, blockValues); + } + } + + private void parseFieldFromParent(IgnoredSourceFieldMapper.NameValue nameValue, List blockValues) throws IOException { + var type = XContentDataHelper.decodeType(nameValue.value()); + assert type.isPresent(); + + String nameAtThisLevel = fieldName.substring(nameValue.name().length() + 1); + var filterParserConfig = XContentParserConfiguration.EMPTY.withFiltering(null, Set.of(nameAtThisLevel), Set.of(), true); + try ( + XContentParser parser = type.get() + .xContent() + .createParser(filterParserConfig, nameValue.value().bytes, nameValue.value().offset + 1, nameValue.value().length - 1) + ) { + parser.nextToken(); + var fieldNameInParser = new StringBuilder(nameValue.name()); + while (true) { + if (parser.currentToken() == XContentParser.Token.FIELD_NAME) { + fieldNameInParser.append('.').append(parser.currentName()); + if (fieldNameInParser.toString().equals(fieldName)) { + parser.nextToken(); + break; + } + } + parser.nextToken(); + } + parseWithReader(parser, blockValues); + } + } + + private void parseWithReader(XContentParser parser, List blockValues) throws IOException { + if (parser.currentToken() == XContentParser.Token.START_ARRAY) { + while (parser.nextToken() != XContentParser.Token.END_ARRAY) { + reader.parse(parser, blockValues); + } + return; + } + + reader.parse(parser, blockValues); + } + + @Override + public boolean canReuse(int startingDocID) { + return true; + } + } + + /** + * Field-specific implementation that converts data stored in _ignored_source field to block loader values. + * @param + */ + public interface Reader { + /** + * Converts a raw stored value for this field to a value in a format suitable for block loader and adds it to the provided + * accumulator. + * @param value raw decoded value from _ignored_source field (synthetic _source value) + * @param accumulator list containing the result of conversion + */ + void convertValue(Object value, List accumulator); + + /** + * Parses one or more complex values using a provided parser and adds them to the provided accumulator. + * @param parser parser of a value from _ignored_source field (synthetic _source value) + * @param accumulator list containing the results of parsing + */ + void parse(XContentParser parser, List accumulator) throws IOException; + + void writeToBlock(List values, Builder blockBuilder); + } + + public abstract static class ReaderWithNullValueSupport implements Reader { + private final T nullValue; + + public ReaderWithNullValueSupport(T nullValue) { + this.nullValue = nullValue; + } + + @Override + public void parse(XContentParser parser, List accumulator) throws IOException { + if (parser.currentToken() == XContentParser.Token.VALUE_NULL) { + if (nullValue != null) { + convertValue(nullValue, accumulator); + } + return; + } + + parseNonNullValue(parser, accumulator); + } + + abstract void parseNonNullValue(XContentParser parser, List accumulator) throws IOException; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index b7528bd3729ee..5cd968dff52a4 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -67,6 +67,7 @@ import org.elasticsearch.search.runtime.StringScriptFieldTermQuery; import org.elasticsearch.search.runtime.StringScriptFieldWildcardQuery; import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; import java.io.IOException; import java.io.UncheckedIOException; @@ -74,6 +75,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Objects; @@ -738,10 +740,54 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) { if (isStored()) { return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(name()); } + + if (isSyntheticSource) { + return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) { + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.bytesRefs(expectedCount); + } + }; + } + SourceValueFetcher fetcher = sourceValueFetcher(blContext.sourcePaths(name())); return new BlockSourceReader.BytesRefsBlockLoader(fetcher, sourceBlockLoaderLookup(blContext)); } + private FallbackSyntheticSourceBlockLoader.Reader fallbackSyntheticSourceBlockLoaderReader() { + var nullValueBytes = nullValue != null ? new BytesRef(nullValue) : null; + return new FallbackSyntheticSourceBlockLoader.ReaderWithNullValueSupport<>(nullValueBytes) { + @Override + public void convertValue(Object value, List accumulator) { + String stringValue = ((BytesRef) value).utf8ToString(); + String adjusted = applyIgnoreAboveAndNormalizer(stringValue); + if (adjusted != null) { + // TODO what if the value didn't change? + accumulator.add(new BytesRef(adjusted)); + } + } + + @Override + public void parseNonNullValue(XContentParser parser, List accumulator) throws IOException { + assert parser.currentToken() == XContentParser.Token.VALUE_STRING : "Unexpected token " + parser.currentToken(); + + var value = applyIgnoreAboveAndNormalizer(parser.text()); + if (value != null) { + accumulator.add(new BytesRef(value)); + } + } + + @Override + public void writeToBlock(List values, BlockLoader.Builder blockBuilder) { + var bytesRefBuilder = (BlockLoader.BytesRefBuilder) blockBuilder; + + for (var value : values) { + bytesRefBuilder.appendBytesRef(value); + } + } + }; + } + private BlockSourceReader.LeafIteratorLookup sourceBlockLoaderLookup(BlockLoaderContext blContext) { if (getTextSearchInfo().hasNorms()) { return BlockSourceReader.lookupFromNorms(name()); @@ -821,15 +867,19 @@ private SourceValueFetcher sourceValueFetcher(Set sourcePaths) { @Override protected String parseSourceValue(Object value) { String keywordValue = value.toString(); - if (keywordValue.length() > ignoreAbove) { - return null; - } - - return normalizeValue(normalizer(), name(), keywordValue); + return applyIgnoreAboveAndNormalizer(keywordValue); } }; } + private String applyIgnoreAboveAndNormalizer(String value) { + if (value.length() > ignoreAbove) { + return null; + } + + return normalizeValue(normalizer(), name(), value); + } + @Override public Object valueForDisplay(Object value) { if (value == null) { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/XContentDataHelper.java b/server/src/main/java/org/elasticsearch/index/mapper/XContentDataHelper.java index 646368b96a4c5..8c2f567e2896e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/XContentDataHelper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/XContentDataHelper.java @@ -110,6 +110,28 @@ static void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { } } + /** + * Decode the value in the passed {@link BytesRef} in place and return it. + * Returns {@link Optional#empty()} for complex values (objects and arrays). + */ + static Optional decode(BytesRef r) { + return switch ((char) r.bytes[r.offset]) { + case BINARY_ENCODING -> Optional.of(TypeUtils.EMBEDDED_OBJECT.decode(r)); + case CBOR_OBJECT_ENCODING, JSON_OBJECT_ENCODING, YAML_OBJECT_ENCODING, SMILE_OBJECT_ENCODING -> Optional.empty(); + case BIG_DECIMAL_ENCODING -> Optional.of(TypeUtils.BIG_DECIMAL.decode(r)); + case FALSE_ENCODING, TRUE_ENCODING -> Optional.of(TypeUtils.BOOLEAN.decode(r)); + case BIG_INTEGER_ENCODING -> Optional.of(TypeUtils.BIG_INTEGER.decode(r)); + case STRING_ENCODING -> Optional.of(TypeUtils.STRING.decode(r)); + case INTEGER_ENCODING -> Optional.of(TypeUtils.INTEGER.decode(r)); + case LONG_ENCODING -> Optional.of(TypeUtils.LONG.decode(r)); + case DOUBLE_ENCODING -> Optional.of(TypeUtils.DOUBLE.decode(r)); + case FLOAT_ENCODING -> Optional.of(TypeUtils.FLOAT.decode(r)); + case NULL_ENCODING -> Optional.ofNullable(TypeUtils.NULL.decode(r)); + case VOID_ENCODING -> Optional.of(TypeUtils.VOID.decode(r)); + default -> throw new IllegalArgumentException("Can't decode " + r); + }; + } + /** * Determines if the given {@link BytesRef}, encoded with {@link XContentDataHelper#encodeToken(XContentParser)}, * is an encoded object. @@ -339,6 +361,11 @@ byte[] encode(XContentParser parser) throws IOException { return bytes; } + @Override + Object decode(BytesRef r) { + return new BytesRef(r.bytes, r.offset + 1, r.length - 1); + } + @Override void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { b.value(new BytesRef(r.bytes, r.offset + 1, r.length - 1).utf8ToString()); @@ -359,6 +386,11 @@ byte[] encode(XContentParser parser) throws IOException { return bytes; } + @Override + Object decode(BytesRef r) { + return ByteUtils.readIntLE(r.bytes, 1 + r.offset); + } + @Override void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { b.value(ByteUtils.readIntLE(r.bytes, 1 + r.offset)); @@ -379,6 +411,11 @@ byte[] encode(XContentParser parser) throws IOException { return bytes; } + @Override + Object decode(BytesRef r) { + return ByteUtils.readLongLE(r.bytes, 1 + r.offset); + } + @Override void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { b.value(ByteUtils.readLongLE(r.bytes, 1 + r.offset)); @@ -399,6 +436,11 @@ byte[] encode(XContentParser parser) throws IOException { return bytes; } + @Override + Object decode(BytesRef r) { + return ByteUtils.readDoubleLE(r.bytes, 1 + r.offset); + } + @Override void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { b.value(ByteUtils.readDoubleLE(r.bytes, 1 + r.offset)); @@ -419,6 +461,11 @@ byte[] encode(XContentParser parser) throws IOException { return bytes; } + @Override + Object decode(BytesRef r) { + return ByteUtils.readFloatLE(r.bytes, 1 + r.offset); + } + @Override void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { b.value(ByteUtils.readFloatLE(r.bytes, 1 + r.offset)); @@ -437,6 +484,11 @@ byte[] encode(XContentParser parser) throws IOException { return bytes; } + @Override + Object decode(BytesRef r) { + return new BigInteger(r.bytes, r.offset + 1, r.length - 1); + } + @Override void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { b.value(new BigInteger(r.bytes, r.offset + 1, r.length - 1)); @@ -455,6 +507,15 @@ byte[] encode(XContentParser parser) throws IOException { return bytes; } + @Override + Object decode(BytesRef r) { + if (r.length < 5) { + throw new IllegalArgumentException("Can't decode " + r); + } + int scale = ByteUtils.readIntLE(r.bytes, r.offset + 1); + return new BigDecimal(new BigInteger(r.bytes, r.offset + 5, r.length - 5), scale); + } + @Override void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { if (r.length < 5) { @@ -477,6 +538,15 @@ byte[] encode(XContentParser parser) throws IOException { return bytes; } + @Override + Object decode(BytesRef r) { + if (r.length != 1) { + throw new IllegalArgumentException("Can't decode " + r); + } + assert r.bytes[r.offset] == 't' || r.bytes[r.offset] == 'f' : r.bytes[r.offset]; + return r.bytes[r.offset] == 't'; + } + @Override void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { if (r.length != 1) { @@ -499,6 +569,11 @@ byte[] encode(XContentParser parser) throws IOException { return bytes; } + @Override + Object decode(BytesRef r) { + return null; + } + @Override void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { b.nullValue(); @@ -517,6 +592,11 @@ byte[] encode(XContentParser parser) throws IOException { return bytes; } + @Override + Object decode(BytesRef r) { + return new BytesRef(r.bytes, r.offset + 1, r.length - 1); + } + @Override void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { b.value(r.bytes, r.offset + 1, r.length - 1); @@ -538,6 +618,11 @@ byte[] encode(XContentParser parser) throws IOException { } } + @Override + Object decode(BytesRef r) { + throw new UnsupportedOperationException(); + } + @Override void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException { switch ((char) r.bytes[r.offset]) { @@ -562,6 +647,11 @@ byte[] encode(XContentParser parser) { return bytes; } + @Override + Object decode(BytesRef r) { + throw new UnsupportedOperationException(); + } + @Override void decodeAndWrite(XContentBuilder b, BytesRef r) { // NOOP @@ -591,6 +681,8 @@ void assertValidEncoding(byte[] encodedValue) { abstract byte[] encode(XContentParser parser) throws IOException; + abstract Object decode(BytesRef r); + abstract void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException; static byte[] encode(BigInteger n, Byte encoding) throws IOException { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/KeywordFieldBlockLoaderTests.java b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/KeywordFieldBlockLoaderTests.java index 40e5829b5b12e..4d5eb2ea641ae 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/KeywordFieldBlockLoaderTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/KeywordFieldBlockLoaderTests.java @@ -13,7 +13,6 @@ import org.elasticsearch.index.mapper.BlockLoaderTestCase; import org.elasticsearch.logsdb.datageneration.FieldType; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; @@ -28,27 +27,30 @@ public KeywordFieldBlockLoaderTests() { @SuppressWarnings("unchecked") @Override protected Object expected(Map fieldMapping, Object value, boolean syntheticSource) { - if (value == null) { - return null; - } + var nullValue = (String) fieldMapping.get("null_value"); var ignoreAbove = fieldMapping.get("ignore_above") == null ? Integer.MAX_VALUE : ((Number) fieldMapping.get("ignore_above")).intValue(); + if (value == null) { + return convert(null, nullValue, ignoreAbove); + } + if (value instanceof String s) { - return convert(s, ignoreAbove); + return convert(s, nullValue, ignoreAbove); } - Function, Stream> convertValues = s -> s.map(v -> convert(v, ignoreAbove)).filter(Objects::nonNull); + Function, Stream> convertValues = s -> s.map(v -> convert(v, nullValue, ignoreAbove)) + .filter(Objects::nonNull); if ((boolean) fieldMapping.getOrDefault("doc_values", false)) { // Sorted and no duplicates - var values = new HashSet<>((List) value); - var resultList = convertValues.compose(s -> values.stream().filter(Objects::nonNull).sorted()) + var resultList = convertValues.andThen(Stream::distinct) + .andThen(Stream::sorted) .andThen(Stream::toList) - .apply(values.stream()); + .apply(((List) value).stream()); return maybeFoldList(resultList); } @@ -69,9 +71,13 @@ private Object maybeFoldList(List list) { return list; } - private BytesRef convert(String value, int ignoreAbove) { + private BytesRef convert(String value, String nullValue, int ignoreAbove) { if (value == null) { - return null; + if (nullValue != null) { + value = nullValue; + } else { + return null; + } } return value.length() <= ignoreAbove ? new BytesRef(value) : null; diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java index 8f5478e1181f1..db8a38c63c64f 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java @@ -13,82 +13,159 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification; -import org.elasticsearch.logsdb.datageneration.FieldDataGenerator; +import org.elasticsearch.logsdb.datageneration.DocumentGenerator; import org.elasticsearch.logsdb.datageneration.FieldType; import org.elasticsearch.logsdb.datageneration.MappingGenerator; import org.elasticsearch.logsdb.datageneration.Template; import org.elasticsearch.logsdb.datageneration.datasource.DataSourceHandler; import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest; import org.elasticsearch.logsdb.datageneration.datasource.DataSourceResponse; +import org.elasticsearch.plugins.internal.XContentMeteringParserDecorator; import org.elasticsearch.search.fetch.StoredFieldsSpec; import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentType; import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Stream; public abstract class BlockLoaderTestCase extends MapperServiceTestCase { + private final FieldType fieldType; private final String fieldName; - private final Template template; private final MappingGenerator mappingGenerator; - private final FieldDataGenerator generator; + private final DocumentGenerator documentGenerator; protected BlockLoaderTestCase(FieldType fieldType) { + this.fieldType = fieldType; this.fieldName = randomAlphaOfLengthBetween(5, 10); - // Disable all dynamic mapping var specification = DataGeneratorSpecification.builder() .withFullyDynamicMapping(false) + // Disable dynamic mapping and disabled objects .withDataSourceHandlers(List.of(new DataSourceHandler() { @Override public DataSourceResponse.DynamicMappingGenerator handle(DataSourceRequest.DynamicMappingGenerator request) { return new DataSourceResponse.DynamicMappingGenerator(isObject -> false); } + + @Override + public DataSourceResponse.ObjectMappingParametersGenerator handle( + DataSourceRequest.ObjectMappingParametersGenerator request + ) { + return new DataSourceResponse.ObjectMappingParametersGenerator(HashMap::new); // just defaults + } })) .build(); - this.template = new Template(Map.of(fieldName, new Template.Leaf(fieldName, fieldType))); this.mappingGenerator = new MappingGenerator(specification); - this.generator = fieldType.generator(fieldName, specification.dataSource()); + this.documentGenerator = new DocumentGenerator(specification); } public void testBlockLoader() throws IOException { + var template = new Template(Map.of(fieldName, new Template.Leaf(fieldName, fieldType))); + runTest(template, fieldName); + } + + public void testBlockLoaderForFieldInObject() throws IOException { + int depth = randomIntBetween(0, 3); + + Map currentLevel = new HashMap<>(); + Map top = Map.of("top", new Template.Object("top", false, currentLevel)); + + var fullFieldName = new StringBuilder("top"); + int currentDepth = 0; + while (currentDepth++ < depth) { + fullFieldName.append('.').append("level").append(currentDepth); + + Map nextLevel = new HashMap<>(); + currentLevel.put("level" + currentDepth, new Template.Object("level" + currentDepth, false, nextLevel)); + currentLevel = nextLevel; + } + + fullFieldName.append('.').append(fieldName); + currentLevel.put(fieldName, new Template.Leaf(fieldName, fieldType)); + var template = new Template(top); + runTest(template, fullFieldName.toString()); + } + + private void runTest(Template template, String fieldName) throws IOException { var mapping = mappingGenerator.generate(template); var mappingXContent = XContentBuilder.builder(XContentType.JSON.xContent()).map(mapping.raw()); var syntheticSource = randomBoolean(); var mapperService = syntheticSource ? createSytheticSourceMapperService(mappingXContent) : createMapperService(mappingXContent); - var fieldValue = generator.generateValue(); + var document = documentGenerator.generate(template, mapping); + var documentXContent = XContentBuilder.builder(XContentType.JSON.xContent()).map(document); - Object blockLoaderResult = setupAndInvokeBlockLoader(mapperService, fieldValue); - Object expected = expected(mapping.lookup().get(fieldName), fieldValue, syntheticSource); + Object blockLoaderResult = setupAndInvokeBlockLoader(mapperService, documentXContent, fieldName); + Object expected = expected(mapping.lookup().get(fieldName), getFieldValue(document, fieldName), syntheticSource); assertEquals(expected, blockLoaderResult); } protected abstract Object expected(Map fieldMapping, Object value, boolean syntheticSource); - private Object setupAndInvokeBlockLoader(MapperService mapperService, Object fieldValue) throws IOException { + private Object getFieldValue(Map document, String fieldName) { + var rawValues = new ArrayList<>(); + processLevel(document, fieldName, rawValues); + + if (rawValues.size() == 1) { + return rawValues.get(0); + } + + return rawValues.stream().flatMap(v -> v instanceof List l ? l.stream() : Stream.of(v)).toList(); + } + + @SuppressWarnings("unchecked") + private void processLevel(Map level, String field, ArrayList values) { + if (field.contains(".") == false) { + var value = level.get(field); + values.add(value); + return; + } + + var nameInLevel = field.split("\\.")[0]; + var entry = level.get(nameInLevel); + if (entry instanceof Map m) { + processLevel((Map) m, field.substring(field.indexOf('.') + 1), values); + } + if (entry instanceof List l) { + for (var object : l) { + processLevel((Map) object, field.substring(field.indexOf('.') + 1), values); + } + } + } + + private Object setupAndInvokeBlockLoader(MapperService mapperService, XContentBuilder document, String fieldName) throws IOException { try (Directory directory = newDirectory()) { RandomIndexWriter iw = new RandomIndexWriter(random(), directory); - LuceneDocument doc = mapperService.documentMapper().parse(source(b -> { - b.field(fieldName); - b.value(fieldValue); - })).rootDoc(); + var source = new SourceToParse( + "1", + BytesReference.bytes(document), + XContentType.JSON, + null, + Map.of(), + true, + XContentMeteringParserDecorator.NOOP + ); + LuceneDocument doc = mapperService.documentMapper().parse(source).rootDoc(); iw.addDocument(doc); iw.close(); try (DirectoryReader reader = DirectoryReader.open(directory)) { LeafReaderContext context = reader.leaves().get(0); - return load(createBlockLoader(mapperService), context, mapperService); + return load(createBlockLoader(mapperService, fieldName), context, mapperService); } } } @@ -98,6 +175,9 @@ private Object load(BlockLoader blockLoader, LeafReaderContext context, MapperSe var columnAtATimeReader = blockLoader.columnAtATimeReader(context); if (columnAtATimeReader != null) { var block = (TestBlock) columnAtATimeReader.read(TestBlock.factory(context.reader().numDocs()), TestBlock.docs(0)); + if (block.size() == 0) { + return null; + } return block.get(0); } @@ -119,10 +199,13 @@ private Object load(BlockLoader blockLoader, LeafReaderContext context, MapperSe BlockLoader.Builder builder = blockLoader.builder(TestBlock.factory(context.reader().numDocs()), 1); blockLoader.rowStrideReader(context).read(0, storedFieldsLoader, builder); var block = (TestBlock) builder.build(); + if (block.size() == 0) { + return null; + } return block.get(0); } - private BlockLoader createBlockLoader(MapperService mapperService) { + private BlockLoader createBlockLoader(MapperService mapperService, String fieldName) { SearchLookup searchLookup = new SearchLookup(mapperService.mappingLookup().fieldTypesLookup()::get, null, null); return mapperService.fieldType(fieldName).blockLoader(new MappedFieldType.BlockLoaderContext() { diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultMappingParametersHandler.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultMappingParametersHandler.java index 2567037488f3f..702145dd9a503 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultMappingParametersHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultMappingParametersHandler.java @@ -64,6 +64,9 @@ private Supplier> keywordMapping( if (ESTestCase.randomDouble() <= 0.2) { injected.put("ignore_above", ESTestCase.randomIntBetween(1, 100)); } + if (ESTestCase.randomDouble() <= 0.2) { + injected.put("null_value", ESTestCase.randomAlphaOfLengthBetween(0, 10)); + } return injected; }; diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/matchers/source/FieldSpecificMatcher.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/matchers/source/FieldSpecificMatcher.java index f86eb31f47cc6..df26b652a806d 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/matchers/source/FieldSpecificMatcher.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/matchers/source/FieldSpecificMatcher.java @@ -288,4 +288,63 @@ public MatchResult match( ); } } + + class KeywordMatcher implements FieldSpecificMatcher { + private final XContentBuilder actualMappings; + private final Settings.Builder actualSettings; + private final XContentBuilder expectedMappings; + private final Settings.Builder expectedSettings; + + KeywordMatcher( + XContentBuilder actualMappings, + Settings.Builder actualSettings, + XContentBuilder expectedMappings, + Settings.Builder expectedSettings + ) { + this.actualMappings = actualMappings; + this.actualSettings = actualSettings; + this.expectedMappings = expectedMappings; + this.expectedSettings = expectedSettings; + } + + @Override + public MatchResult match( + List actual, + List expected, + Map actualMapping, + Map expectedMapping + ) { + var nullValue = actualMapping.get("null_value"); + var expectedNullValue = expectedMapping.get("null_value"); + if (Objects.equals(nullValue, expectedNullValue) == false) { + throw new IllegalStateException( + "[null_value] parameter for [keyword] field does not match between actual and expected mapping" + ); + } + + var expectedNormalized = normalize(expected, (String) nullValue); + var actualNormalized = normalize(actual, (String) nullValue); + + return actualNormalized.equals(expectedNormalized) + ? MatchResult.match() + : MatchResult.noMatch( + formatErrorMessage( + actualMappings, + actualSettings, + expectedMappings, + expectedSettings, + "Values of type [keyword] don't match after normalization, normalized " + + prettyPrintCollections(actualNormalized, expectedNormalized) + ) + ); + } + + private static Set normalize(List values, String nullValue) { + if (values == null) { + return Set.of(); + } + + return values.stream().map(v -> v == null ? nullValue : (String) v).filter(Objects::nonNull).collect(Collectors.toSet()); + } + } } diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/matchers/source/SourceMatcher.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/matchers/source/SourceMatcher.java index 96b8824b76af3..57c7a92bfa55a 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/matchers/source/SourceMatcher.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/matchers/source/SourceMatcher.java @@ -59,7 +59,9 @@ public SourceMatcher( "unsigned_long", new FieldSpecificMatcher.UnsignedLongMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings), "counted_keyword", - new FieldSpecificMatcher.CountedKeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings) + new FieldSpecificMatcher.CountedKeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings), + "keyword", + new FieldSpecificMatcher.KeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings) ); this.dynamicFieldMatcher = new DynamicFieldMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings); }