diff --git a/docs/changelog/119546.yaml b/docs/changelog/119546.yaml
new file mode 100644
index 0000000000000..017bbb845c0a6
--- /dev/null
+++ b/docs/changelog/119546.yaml
@@ -0,0 +1,5 @@
+pr: 119546
+summary: Introduce `FallbackSyntheticSourceBlockLoader` and apply it to keyword fields
+area: Mapping
+type: enhancement
+issues: []
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java
new file mode 100644
index 0000000000000..28ea37ef73e33
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java
@@ -0,0 +1,270 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.index.mapper;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.elasticsearch.search.fetch.StoredFieldsSpec;
+import org.elasticsearch.xcontent.XContentParser;
+import org.elasticsearch.xcontent.XContentParserConfiguration;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+
+/**
+ * Block loader for fields that use fallback synthetic source implementation.
+ *
+ * Usually fields have doc_values or stored fields and block loaders use them directly. In some cases neither is available
+ * and we would fall back to (potentially synthetic) _source. However, in case of synthetic source, there is actually no need to
+ * construct the entire _source. We know that there is no doc_values and stored fields, and therefore we will be using fallback synthetic
+ * source. That is equivalent to just reading _ignored_source stored field directly and doing an in-place synthetic source just
+ * for this field.
+ *
+ * See {@link IgnoredSourceFieldMapper}.
+ */
+public abstract class FallbackSyntheticSourceBlockLoader implements BlockLoader {
+ private final Reader> reader;
+ private final String fieldName;
+
+ protected FallbackSyntheticSourceBlockLoader(Reader> reader, String fieldName) {
+ this.reader = reader;
+ this.fieldName = fieldName;
+ }
+
+ @Override
+ public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws IOException {
+ return null;
+ }
+
+ @Override
+ public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException {
+ return new IgnoredSourceRowStrideReader<>(fieldName, reader);
+ }
+
+ @Override
+ public StoredFieldsSpec rowStrideStoredFieldSpec() {
+ return new StoredFieldsSpec(false, false, Set.of(IgnoredSourceFieldMapper.NAME));
+ }
+
+ @Override
+ public boolean supportsOrdinals() {
+ return false;
+ }
+
+ @Override
+ public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ private record IgnoredSourceRowStrideReader(String fieldName, Reader reader) implements RowStrideReader {
+ @Override
+ public void read(int docId, StoredFields storedFields, Builder builder) throws IOException {
+ var ignoredSource = storedFields.storedFields().get(IgnoredSourceFieldMapper.NAME);
+ if (ignoredSource == null) {
+ return;
+ }
+
+ Map> valuesForFieldAndParents = new HashMap<>();
+
+ // Contains name of the field and all its parents
+ Set fieldNames = new HashSet<>() {
+ {
+ add("_doc");
+ }
+ };
+
+ var current = new StringBuilder();
+ for (String part : fieldName.split("\\.")) {
+ if (current.isEmpty() == false) {
+ current.append('.');
+ }
+ current.append(part);
+ fieldNames.add(current.toString());
+ }
+
+ for (Object value : ignoredSource) {
+ IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(value);
+ if (fieldNames.contains(nameValue.name())) {
+ valuesForFieldAndParents.computeIfAbsent(nameValue.name(), k -> new ArrayList<>()).add(nameValue);
+ }
+ }
+
+ // TODO figure out how to handle XContentDataHelper#voidValue()
+
+ var blockValues = new ArrayList();
+
+ var leafFieldValue = valuesForFieldAndParents.get(fieldName);
+ if (leafFieldValue != null) {
+ readFromFieldValue(leafFieldValue, blockValues);
+ } else {
+ readFromParentValue(valuesForFieldAndParents, blockValues);
+ }
+
+ if (blockValues.isEmpty() == false) {
+ if (blockValues.size() > 1) {
+ builder.beginPositionEntry();
+ }
+
+ reader.writeToBlock(blockValues, builder);
+
+ if (blockValues.size() > 1) {
+ builder.endPositionEntry();
+ }
+ } else {
+ builder.appendNull();
+ }
+ }
+
+ private void readFromFieldValue(List nameValues, List blockValues) throws IOException {
+ if (nameValues.isEmpty()) {
+ return;
+ }
+
+ for (var nameValue : nameValues) {
+ // Leaf field is stored directly (not as a part of a parent object), let's try to decode it.
+ Optional singleValue = XContentDataHelper.decode(nameValue.value());
+ if (singleValue.isPresent()) {
+ reader.convertValue(singleValue.get(), blockValues);
+ continue;
+ }
+
+ // We have a value for this field but it's an array or an object
+ var type = XContentDataHelper.decodeType(nameValue.value());
+ assert type.isPresent();
+
+ try (
+ XContentParser parser = type.get()
+ .xContent()
+ .createParser(
+ XContentParserConfiguration.EMPTY,
+ nameValue.value().bytes,
+ nameValue.value().offset + 1,
+ nameValue.value().length - 1
+ )
+ ) {
+ parser.nextToken();
+ parseWithReader(parser, blockValues);
+ }
+ }
+ }
+
+ private void readFromParentValue(
+ Map> valuesForFieldAndParents,
+ List blockValues
+ ) throws IOException {
+ if (valuesForFieldAndParents.isEmpty()) {
+ return;
+ }
+
+ // If a parent object is stored at a particular level its children won't be stored.
+ // So we should only ever have one parent here.
+ assert valuesForFieldAndParents.size() == 1 : "_ignored_source field contains multiple levels of the same object";
+ var parentValues = valuesForFieldAndParents.values().iterator().next();
+
+ for (var nameValue : parentValues) {
+ parseFieldFromParent(nameValue, blockValues);
+ }
+ }
+
+ private void parseFieldFromParent(IgnoredSourceFieldMapper.NameValue nameValue, List blockValues) throws IOException {
+ var type = XContentDataHelper.decodeType(nameValue.value());
+ assert type.isPresent();
+
+ String nameAtThisLevel = fieldName.substring(nameValue.name().length() + 1);
+ var filterParserConfig = XContentParserConfiguration.EMPTY.withFiltering(null, Set.of(nameAtThisLevel), Set.of(), true);
+ try (
+ XContentParser parser = type.get()
+ .xContent()
+ .createParser(filterParserConfig, nameValue.value().bytes, nameValue.value().offset + 1, nameValue.value().length - 1)
+ ) {
+ parser.nextToken();
+ var fieldNameInParser = new StringBuilder(nameValue.name());
+ while (true) {
+ if (parser.currentToken() == XContentParser.Token.FIELD_NAME) {
+ fieldNameInParser.append('.').append(parser.currentName());
+ if (fieldNameInParser.toString().equals(fieldName)) {
+ parser.nextToken();
+ break;
+ }
+ }
+ parser.nextToken();
+ }
+ parseWithReader(parser, blockValues);
+ }
+ }
+
+ private void parseWithReader(XContentParser parser, List blockValues) throws IOException {
+ if (parser.currentToken() == XContentParser.Token.START_ARRAY) {
+ while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
+ reader.parse(parser, blockValues);
+ }
+ return;
+ }
+
+ reader.parse(parser, blockValues);
+ }
+
+ @Override
+ public boolean canReuse(int startingDocID) {
+ return true;
+ }
+ }
+
+ /**
+ * Field-specific implementation that converts data stored in _ignored_source field to block loader values.
+ * @param
+ */
+ public interface Reader {
+ /**
+ * Converts a raw stored value for this field to a value in a format suitable for block loader and adds it to the provided
+ * accumulator.
+ * @param value raw decoded value from _ignored_source field (synthetic _source value)
+ * @param accumulator list containing the result of conversion
+ */
+ void convertValue(Object value, List accumulator);
+
+ /**
+ * Parses one or more complex values using a provided parser and adds them to the provided accumulator.
+ * @param parser parser of a value from _ignored_source field (synthetic _source value)
+ * @param accumulator list containing the results of parsing
+ */
+ void parse(XContentParser parser, List accumulator) throws IOException;
+
+ void writeToBlock(List values, Builder blockBuilder);
+ }
+
+ public abstract static class ReaderWithNullValueSupport implements Reader {
+ private final T nullValue;
+
+ public ReaderWithNullValueSupport(T nullValue) {
+ this.nullValue = nullValue;
+ }
+
+ @Override
+ public void parse(XContentParser parser, List accumulator) throws IOException {
+ if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
+ if (nullValue != null) {
+ convertValue(nullValue, accumulator);
+ }
+ return;
+ }
+
+ parseNonNullValue(parser, accumulator);
+ }
+
+ abstract void parseNonNullValue(XContentParser parser, List accumulator) throws IOException;
+ }
+}
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
index 8104ed806410a..5642ffe91a10c 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
@@ -65,6 +65,7 @@
import org.elasticsearch.search.runtime.StringScriptFieldTermQuery;
import org.elasticsearch.search.runtime.StringScriptFieldWildcardQuery;
import org.elasticsearch.xcontent.XContentBuilder;
+import org.elasticsearch.xcontent.XContentParser;
import java.io.IOException;
import java.io.UncheckedIOException;
@@ -72,6 +73,7 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
+import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
@@ -635,10 +637,54 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
if (isStored()) {
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(name());
}
+
+ if (isSyntheticSource) {
+ return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) {
+ @Override
+ public Builder builder(BlockFactory factory, int expectedCount) {
+ return factory.bytesRefs(expectedCount);
+ }
+ };
+ }
+
SourceValueFetcher fetcher = sourceValueFetcher(blContext.sourcePaths(name()));
return new BlockSourceReader.BytesRefsBlockLoader(fetcher, sourceBlockLoaderLookup(blContext));
}
+ private FallbackSyntheticSourceBlockLoader.Reader> fallbackSyntheticSourceBlockLoaderReader() {
+ var nullValueBytes = nullValue != null ? new BytesRef(nullValue) : null;
+ return new FallbackSyntheticSourceBlockLoader.ReaderWithNullValueSupport<>(nullValueBytes) {
+ @Override
+ public void convertValue(Object value, List accumulator) {
+ String stringValue = ((BytesRef) value).utf8ToString();
+ String adjusted = applyIgnoreAboveAndNormalizer(stringValue);
+ if (adjusted != null) {
+ // TODO what if the value didn't change?
+ accumulator.add(new BytesRef(adjusted));
+ }
+ }
+
+ @Override
+ public void parseNonNullValue(XContentParser parser, List accumulator) throws IOException {
+ assert parser.currentToken() == XContentParser.Token.VALUE_STRING : "Unexpected token " + parser.currentToken();
+
+ var value = applyIgnoreAboveAndNormalizer(parser.text());
+ if (value != null) {
+ accumulator.add(new BytesRef(value));
+ }
+ }
+
+ @Override
+ public void writeToBlock(List values, BlockLoader.Builder blockBuilder) {
+ var bytesRefBuilder = (BlockLoader.BytesRefBuilder) blockBuilder;
+
+ for (var value : values) {
+ bytesRefBuilder.appendBytesRef(value);
+ }
+ }
+ };
+ }
+
private BlockSourceReader.LeafIteratorLookup sourceBlockLoaderLookup(BlockLoaderContext blContext) {
if (getTextSearchInfo().hasNorms()) {
return BlockSourceReader.lookupFromNorms(name());
@@ -718,15 +764,19 @@ private SourceValueFetcher sourceValueFetcher(Set sourcePaths) {
@Override
protected String parseSourceValue(Object value) {
String keywordValue = value.toString();
- if (keywordValue.length() > ignoreAbove) {
- return null;
- }
-
- return normalizeValue(normalizer(), name(), keywordValue);
+ return applyIgnoreAboveAndNormalizer(keywordValue);
}
};
}
+ private String applyIgnoreAboveAndNormalizer(String value) {
+ if (value.length() > ignoreAbove) {
+ return null;
+ }
+
+ return normalizeValue(normalizer(), name(), value);
+ }
+
@Override
public Object valueForDisplay(Object value) {
if (value == null) {
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/XContentDataHelper.java b/server/src/main/java/org/elasticsearch/index/mapper/XContentDataHelper.java
index 646368b96a4c5..8c2f567e2896e 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/XContentDataHelper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/XContentDataHelper.java
@@ -110,6 +110,28 @@ static void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException {
}
}
+ /**
+ * Decode the value in the passed {@link BytesRef} in place and return it.
+ * Returns {@link Optional#empty()} for complex values (objects and arrays).
+ */
+ static Optional decode(BytesRef r) {
+ return switch ((char) r.bytes[r.offset]) {
+ case BINARY_ENCODING -> Optional.of(TypeUtils.EMBEDDED_OBJECT.decode(r));
+ case CBOR_OBJECT_ENCODING, JSON_OBJECT_ENCODING, YAML_OBJECT_ENCODING, SMILE_OBJECT_ENCODING -> Optional.empty();
+ case BIG_DECIMAL_ENCODING -> Optional.of(TypeUtils.BIG_DECIMAL.decode(r));
+ case FALSE_ENCODING, TRUE_ENCODING -> Optional.of(TypeUtils.BOOLEAN.decode(r));
+ case BIG_INTEGER_ENCODING -> Optional.of(TypeUtils.BIG_INTEGER.decode(r));
+ case STRING_ENCODING -> Optional.of(TypeUtils.STRING.decode(r));
+ case INTEGER_ENCODING -> Optional.of(TypeUtils.INTEGER.decode(r));
+ case LONG_ENCODING -> Optional.of(TypeUtils.LONG.decode(r));
+ case DOUBLE_ENCODING -> Optional.of(TypeUtils.DOUBLE.decode(r));
+ case FLOAT_ENCODING -> Optional.of(TypeUtils.FLOAT.decode(r));
+ case NULL_ENCODING -> Optional.ofNullable(TypeUtils.NULL.decode(r));
+ case VOID_ENCODING -> Optional.of(TypeUtils.VOID.decode(r));
+ default -> throw new IllegalArgumentException("Can't decode " + r);
+ };
+ }
+
/**
* Determines if the given {@link BytesRef}, encoded with {@link XContentDataHelper#encodeToken(XContentParser)},
* is an encoded object.
@@ -339,6 +361,11 @@ byte[] encode(XContentParser parser) throws IOException {
return bytes;
}
+ @Override
+ Object decode(BytesRef r) {
+ return new BytesRef(r.bytes, r.offset + 1, r.length - 1);
+ }
+
@Override
void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException {
b.value(new BytesRef(r.bytes, r.offset + 1, r.length - 1).utf8ToString());
@@ -359,6 +386,11 @@ byte[] encode(XContentParser parser) throws IOException {
return bytes;
}
+ @Override
+ Object decode(BytesRef r) {
+ return ByteUtils.readIntLE(r.bytes, 1 + r.offset);
+ }
+
@Override
void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException {
b.value(ByteUtils.readIntLE(r.bytes, 1 + r.offset));
@@ -379,6 +411,11 @@ byte[] encode(XContentParser parser) throws IOException {
return bytes;
}
+ @Override
+ Object decode(BytesRef r) {
+ return ByteUtils.readLongLE(r.bytes, 1 + r.offset);
+ }
+
@Override
void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException {
b.value(ByteUtils.readLongLE(r.bytes, 1 + r.offset));
@@ -399,6 +436,11 @@ byte[] encode(XContentParser parser) throws IOException {
return bytes;
}
+ @Override
+ Object decode(BytesRef r) {
+ return ByteUtils.readDoubleLE(r.bytes, 1 + r.offset);
+ }
+
@Override
void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException {
b.value(ByteUtils.readDoubleLE(r.bytes, 1 + r.offset));
@@ -419,6 +461,11 @@ byte[] encode(XContentParser parser) throws IOException {
return bytes;
}
+ @Override
+ Object decode(BytesRef r) {
+ return ByteUtils.readFloatLE(r.bytes, 1 + r.offset);
+ }
+
@Override
void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException {
b.value(ByteUtils.readFloatLE(r.bytes, 1 + r.offset));
@@ -437,6 +484,11 @@ byte[] encode(XContentParser parser) throws IOException {
return bytes;
}
+ @Override
+ Object decode(BytesRef r) {
+ return new BigInteger(r.bytes, r.offset + 1, r.length - 1);
+ }
+
@Override
void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException {
b.value(new BigInteger(r.bytes, r.offset + 1, r.length - 1));
@@ -455,6 +507,15 @@ byte[] encode(XContentParser parser) throws IOException {
return bytes;
}
+ @Override
+ Object decode(BytesRef r) {
+ if (r.length < 5) {
+ throw new IllegalArgumentException("Can't decode " + r);
+ }
+ int scale = ByteUtils.readIntLE(r.bytes, r.offset + 1);
+ return new BigDecimal(new BigInteger(r.bytes, r.offset + 5, r.length - 5), scale);
+ }
+
@Override
void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException {
if (r.length < 5) {
@@ -477,6 +538,15 @@ byte[] encode(XContentParser parser) throws IOException {
return bytes;
}
+ @Override
+ Object decode(BytesRef r) {
+ if (r.length != 1) {
+ throw new IllegalArgumentException("Can't decode " + r);
+ }
+ assert r.bytes[r.offset] == 't' || r.bytes[r.offset] == 'f' : r.bytes[r.offset];
+ return r.bytes[r.offset] == 't';
+ }
+
@Override
void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException {
if (r.length != 1) {
@@ -499,6 +569,11 @@ byte[] encode(XContentParser parser) throws IOException {
return bytes;
}
+ @Override
+ Object decode(BytesRef r) {
+ return null;
+ }
+
@Override
void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException {
b.nullValue();
@@ -517,6 +592,11 @@ byte[] encode(XContentParser parser) throws IOException {
return bytes;
}
+ @Override
+ Object decode(BytesRef r) {
+ return new BytesRef(r.bytes, r.offset + 1, r.length - 1);
+ }
+
@Override
void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException {
b.value(r.bytes, r.offset + 1, r.length - 1);
@@ -538,6 +618,11 @@ byte[] encode(XContentParser parser) throws IOException {
}
}
+ @Override
+ Object decode(BytesRef r) {
+ throw new UnsupportedOperationException();
+ }
+
@Override
void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException {
switch ((char) r.bytes[r.offset]) {
@@ -562,6 +647,11 @@ byte[] encode(XContentParser parser) {
return bytes;
}
+ @Override
+ Object decode(BytesRef r) {
+ throw new UnsupportedOperationException();
+ }
+
@Override
void decodeAndWrite(XContentBuilder b, BytesRef r) {
// NOOP
@@ -591,6 +681,8 @@ void assertValidEncoding(byte[] encodedValue) {
abstract byte[] encode(XContentParser parser) throws IOException;
+ abstract Object decode(BytesRef r);
+
abstract void decodeAndWrite(XContentBuilder b, BytesRef r) throws IOException;
static byte[] encode(BigInteger n, Byte encoding) throws IOException {
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/KeywordFieldBlockLoaderTests.java b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/KeywordFieldBlockLoaderTests.java
new file mode 100644
index 0000000000000..4d5eb2ea641ae
--- /dev/null
+++ b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/KeywordFieldBlockLoaderTests.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.index.mapper.blockloader;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.index.mapper.BlockLoaderTestCase;
+import org.elasticsearch.logsdb.datageneration.FieldType;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.function.Function;
+import java.util.stream.Stream;
+
+public class KeywordFieldBlockLoaderTests extends BlockLoaderTestCase {
+ public KeywordFieldBlockLoaderTests() {
+ super(FieldType.KEYWORD);
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ protected Object expected(Map fieldMapping, Object value, boolean syntheticSource) {
+ var nullValue = (String) fieldMapping.get("null_value");
+
+ var ignoreAbove = fieldMapping.get("ignore_above") == null
+ ? Integer.MAX_VALUE
+ : ((Number) fieldMapping.get("ignore_above")).intValue();
+
+ if (value == null) {
+ return convert(null, nullValue, ignoreAbove);
+ }
+
+ if (value instanceof String s) {
+ return convert(s, nullValue, ignoreAbove);
+ }
+
+ Function, Stream> convertValues = s -> s.map(v -> convert(v, nullValue, ignoreAbove))
+ .filter(Objects::nonNull);
+
+ if ((boolean) fieldMapping.getOrDefault("doc_values", false)) {
+ // Sorted and no duplicates
+
+ var resultList = convertValues.andThen(Stream::distinct)
+ .andThen(Stream::sorted)
+ .andThen(Stream::toList)
+ .apply(((List) value).stream());
+ return maybeFoldList(resultList);
+ }
+
+ // store: "true" and source
+ var resultList = convertValues.andThen(Stream::toList).apply(((List) value).stream());
+ return maybeFoldList(resultList);
+ }
+
+ private Object maybeFoldList(List> list) {
+ if (list.isEmpty()) {
+ return null;
+ }
+
+ if (list.size() == 1) {
+ return list.get(0);
+ }
+
+ return list;
+ }
+
+ private BytesRef convert(String value, String nullValue, int ignoreAbove) {
+ if (value == null) {
+ if (nullValue != null) {
+ value = nullValue;
+ } else {
+ return null;
+ }
+ }
+
+ return value.length() <= ignoreAbove ? new BytesRef(value) : null;
+ }
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java
new file mode 100644
index 0000000000000..db8a38c63c64f
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java
@@ -0,0 +1,249 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.index.mapper;
+
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.tests.index.RandomIndexWriter;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
+import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification;
+import org.elasticsearch.logsdb.datageneration.DocumentGenerator;
+import org.elasticsearch.logsdb.datageneration.FieldType;
+import org.elasticsearch.logsdb.datageneration.MappingGenerator;
+import org.elasticsearch.logsdb.datageneration.Template;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceHandler;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceResponse;
+import org.elasticsearch.plugins.internal.XContentMeteringParserDecorator;
+import org.elasticsearch.search.fetch.StoredFieldsSpec;
+import org.elasticsearch.search.lookup.SearchLookup;
+import org.elasticsearch.xcontent.XContentBuilder;
+import org.elasticsearch.xcontent.XContentType;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Stream;
+
+public abstract class BlockLoaderTestCase extends MapperServiceTestCase {
+ private final FieldType fieldType;
+ private final String fieldName;
+ private final MappingGenerator mappingGenerator;
+ private final DocumentGenerator documentGenerator;
+
+ protected BlockLoaderTestCase(FieldType fieldType) {
+ this.fieldType = fieldType;
+ this.fieldName = randomAlphaOfLengthBetween(5, 10);
+
+ var specification = DataGeneratorSpecification.builder()
+ .withFullyDynamicMapping(false)
+ // Disable dynamic mapping and disabled objects
+ .withDataSourceHandlers(List.of(new DataSourceHandler() {
+ @Override
+ public DataSourceResponse.DynamicMappingGenerator handle(DataSourceRequest.DynamicMappingGenerator request) {
+ return new DataSourceResponse.DynamicMappingGenerator(isObject -> false);
+ }
+
+ @Override
+ public DataSourceResponse.ObjectMappingParametersGenerator handle(
+ DataSourceRequest.ObjectMappingParametersGenerator request
+ ) {
+ return new DataSourceResponse.ObjectMappingParametersGenerator(HashMap::new); // just defaults
+ }
+ }))
+ .build();
+
+ this.mappingGenerator = new MappingGenerator(specification);
+ this.documentGenerator = new DocumentGenerator(specification);
+ }
+
+ public void testBlockLoader() throws IOException {
+ var template = new Template(Map.of(fieldName, new Template.Leaf(fieldName, fieldType)));
+ runTest(template, fieldName);
+ }
+
+ public void testBlockLoaderForFieldInObject() throws IOException {
+ int depth = randomIntBetween(0, 3);
+
+ Map currentLevel = new HashMap<>();
+ Map top = Map.of("top", new Template.Object("top", false, currentLevel));
+
+ var fullFieldName = new StringBuilder("top");
+ int currentDepth = 0;
+ while (currentDepth++ < depth) {
+ fullFieldName.append('.').append("level").append(currentDepth);
+
+ Map nextLevel = new HashMap<>();
+ currentLevel.put("level" + currentDepth, new Template.Object("level" + currentDepth, false, nextLevel));
+ currentLevel = nextLevel;
+ }
+
+ fullFieldName.append('.').append(fieldName);
+ currentLevel.put(fieldName, new Template.Leaf(fieldName, fieldType));
+ var template = new Template(top);
+ runTest(template, fullFieldName.toString());
+ }
+
+ private void runTest(Template template, String fieldName) throws IOException {
+ var mapping = mappingGenerator.generate(template);
+ var mappingXContent = XContentBuilder.builder(XContentType.JSON.xContent()).map(mapping.raw());
+
+ var syntheticSource = randomBoolean();
+ var mapperService = syntheticSource ? createSytheticSourceMapperService(mappingXContent) : createMapperService(mappingXContent);
+
+ var document = documentGenerator.generate(template, mapping);
+ var documentXContent = XContentBuilder.builder(XContentType.JSON.xContent()).map(document);
+
+ Object blockLoaderResult = setupAndInvokeBlockLoader(mapperService, documentXContent, fieldName);
+ Object expected = expected(mapping.lookup().get(fieldName), getFieldValue(document, fieldName), syntheticSource);
+ assertEquals(expected, blockLoaderResult);
+ }
+
+ protected abstract Object expected(Map fieldMapping, Object value, boolean syntheticSource);
+
+ private Object getFieldValue(Map document, String fieldName) {
+ var rawValues = new ArrayList<>();
+ processLevel(document, fieldName, rawValues);
+
+ if (rawValues.size() == 1) {
+ return rawValues.get(0);
+ }
+
+ return rawValues.stream().flatMap(v -> v instanceof List> l ? l.stream() : Stream.of(v)).toList();
+ }
+
+ @SuppressWarnings("unchecked")
+ private void processLevel(Map level, String field, ArrayList values) {
+ if (field.contains(".") == false) {
+ var value = level.get(field);
+ values.add(value);
+ return;
+ }
+
+ var nameInLevel = field.split("\\.")[0];
+ var entry = level.get(nameInLevel);
+ if (entry instanceof Map, ?> m) {
+ processLevel((Map) m, field.substring(field.indexOf('.') + 1), values);
+ }
+ if (entry instanceof List> l) {
+ for (var object : l) {
+ processLevel((Map) object, field.substring(field.indexOf('.') + 1), values);
+ }
+ }
+ }
+
+ private Object setupAndInvokeBlockLoader(MapperService mapperService, XContentBuilder document, String fieldName) throws IOException {
+ try (Directory directory = newDirectory()) {
+ RandomIndexWriter iw = new RandomIndexWriter(random(), directory);
+
+ var source = new SourceToParse(
+ "1",
+ BytesReference.bytes(document),
+ XContentType.JSON,
+ null,
+ Map.of(),
+ true,
+ XContentMeteringParserDecorator.NOOP
+ );
+ LuceneDocument doc = mapperService.documentMapper().parse(source).rootDoc();
+
+ iw.addDocument(doc);
+ iw.close();
+
+ try (DirectoryReader reader = DirectoryReader.open(directory)) {
+ LeafReaderContext context = reader.leaves().get(0);
+ return load(createBlockLoader(mapperService, fieldName), context, mapperService);
+ }
+ }
+ }
+
+ private Object load(BlockLoader blockLoader, LeafReaderContext context, MapperService mapperService) throws IOException {
+ // `columnAtATimeReader` is tried first, we mimic `ValuesSourceReaderOperator`
+ var columnAtATimeReader = blockLoader.columnAtATimeReader(context);
+ if (columnAtATimeReader != null) {
+ var block = (TestBlock) columnAtATimeReader.read(TestBlock.factory(context.reader().numDocs()), TestBlock.docs(0));
+ if (block.size() == 0) {
+ return null;
+ }
+ return block.get(0);
+ }
+
+ StoredFieldsSpec storedFieldsSpec = blockLoader.rowStrideStoredFieldSpec();
+ SourceLoader.Leaf leafSourceLoader = null;
+ if (storedFieldsSpec.requiresSource()) {
+ var sourceLoader = mapperService.mappingLookup().newSourceLoader(null, SourceFieldMetrics.NOOP);
+ leafSourceLoader = sourceLoader.leaf(context.reader(), null);
+ storedFieldsSpec = storedFieldsSpec.merge(
+ new StoredFieldsSpec(true, storedFieldsSpec.requiresMetadata(), sourceLoader.requiredStoredFields())
+ );
+ }
+ BlockLoaderStoredFieldsFromLeafLoader storedFieldsLoader = new BlockLoaderStoredFieldsFromLeafLoader(
+ StoredFieldLoader.fromSpec(storedFieldsSpec).getLoader(context, null),
+ leafSourceLoader
+ );
+ storedFieldsLoader.advanceTo(0);
+
+ BlockLoader.Builder builder = blockLoader.builder(TestBlock.factory(context.reader().numDocs()), 1);
+ blockLoader.rowStrideReader(context).read(0, storedFieldsLoader, builder);
+ var block = (TestBlock) builder.build();
+ if (block.size() == 0) {
+ return null;
+ }
+ return block.get(0);
+ }
+
+ private BlockLoader createBlockLoader(MapperService mapperService, String fieldName) {
+ SearchLookup searchLookup = new SearchLookup(mapperService.mappingLookup().fieldTypesLookup()::get, null, null);
+
+ return mapperService.fieldType(fieldName).blockLoader(new MappedFieldType.BlockLoaderContext() {
+ @Override
+ public String indexName() {
+ return mapperService.getIndexSettings().getIndex().getName();
+ }
+
+ @Override
+ public IndexSettings indexSettings() {
+ return mapperService.getIndexSettings();
+ }
+
+ @Override
+ public MappedFieldType.FieldExtractPreference fieldExtractPreference() {
+ // TODO randomize when adding support for fields that care about this
+ return MappedFieldType.FieldExtractPreference.NONE;
+ }
+
+ @Override
+ public SearchLookup lookup() {
+ return searchLookup;
+ }
+
+ @Override
+ public Set sourcePaths(String name) {
+ return mapperService.mappingLookup().sourcePaths(name);
+ }
+
+ @Override
+ public String parentField(String field) {
+ return mapperService.mappingLookup().parentField(field);
+ }
+
+ @Override
+ public FieldNamesFieldMapper.FieldNamesFieldType fieldNames() {
+ return (FieldNamesFieldMapper.FieldNamesFieldType) mapperService.fieldType(FieldNamesFieldMapper.NAME);
+ }
+ });
+ }
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/Template.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/Template.java
index 9bb2d34958a27..a46e07446052a 100644
--- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/Template.java
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/Template.java
@@ -19,9 +19,9 @@
* @param template actual template data
*/
public record Template(Map template) {
- sealed interface Entry permits Leaf, Object {}
+ public sealed interface Entry permits Leaf, Object {}
- record Leaf(String name, FieldType type) implements Entry {}
+ public record Leaf(String name, FieldType type) implements Entry {}
- record Object(String name, boolean nested, Map children) implements Entry {}
+ public record Object(String name, boolean nested, Map children) implements Entry {}
}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultMappingParametersHandler.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultMappingParametersHandler.java
index 2567037488f3f..702145dd9a503 100644
--- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultMappingParametersHandler.java
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultMappingParametersHandler.java
@@ -64,6 +64,9 @@ private Supplier> keywordMapping(
if (ESTestCase.randomDouble() <= 0.2) {
injected.put("ignore_above", ESTestCase.randomIntBetween(1, 100));
}
+ if (ESTestCase.randomDouble() <= 0.2) {
+ injected.put("null_value", ESTestCase.randomAlphaOfLengthBetween(0, 10));
+ }
return injected;
};
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/matchers/source/FieldSpecificMatcher.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/matchers/source/FieldSpecificMatcher.java
index f86eb31f47cc6..df26b652a806d 100644
--- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/matchers/source/FieldSpecificMatcher.java
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/matchers/source/FieldSpecificMatcher.java
@@ -288,4 +288,63 @@ public MatchResult match(
);
}
}
+
+ class KeywordMatcher implements FieldSpecificMatcher {
+ private final XContentBuilder actualMappings;
+ private final Settings.Builder actualSettings;
+ private final XContentBuilder expectedMappings;
+ private final Settings.Builder expectedSettings;
+
+ KeywordMatcher(
+ XContentBuilder actualMappings,
+ Settings.Builder actualSettings,
+ XContentBuilder expectedMappings,
+ Settings.Builder expectedSettings
+ ) {
+ this.actualMappings = actualMappings;
+ this.actualSettings = actualSettings;
+ this.expectedMappings = expectedMappings;
+ this.expectedSettings = expectedSettings;
+ }
+
+ @Override
+ public MatchResult match(
+ List actual,
+ List expected,
+ Map actualMapping,
+ Map expectedMapping
+ ) {
+ var nullValue = actualMapping.get("null_value");
+ var expectedNullValue = expectedMapping.get("null_value");
+ if (Objects.equals(nullValue, expectedNullValue) == false) {
+ throw new IllegalStateException(
+ "[null_value] parameter for [keyword] field does not match between actual and expected mapping"
+ );
+ }
+
+ var expectedNormalized = normalize(expected, (String) nullValue);
+ var actualNormalized = normalize(actual, (String) nullValue);
+
+ return actualNormalized.equals(expectedNormalized)
+ ? MatchResult.match()
+ : MatchResult.noMatch(
+ formatErrorMessage(
+ actualMappings,
+ actualSettings,
+ expectedMappings,
+ expectedSettings,
+ "Values of type [keyword] don't match after normalization, normalized "
+ + prettyPrintCollections(actualNormalized, expectedNormalized)
+ )
+ );
+ }
+
+ private static Set normalize(List values, String nullValue) {
+ if (values == null) {
+ return Set.of();
+ }
+
+ return values.stream().map(v -> v == null ? nullValue : (String) v).filter(Objects::nonNull).collect(Collectors.toSet());
+ }
+ }
}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/matchers/source/SourceMatcher.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/matchers/source/SourceMatcher.java
index 96b8824b76af3..57c7a92bfa55a 100644
--- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/matchers/source/SourceMatcher.java
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/matchers/source/SourceMatcher.java
@@ -59,7 +59,9 @@ public SourceMatcher(
"unsigned_long",
new FieldSpecificMatcher.UnsignedLongMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings),
"counted_keyword",
- new FieldSpecificMatcher.CountedKeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings)
+ new FieldSpecificMatcher.CountedKeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings),
+ "keyword",
+ new FieldSpecificMatcher.KeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings)
);
this.dynamicFieldMatcher = new DynamicFieldMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings);
}