elastic · jimczi · Dec 11, 2024 · Sep 30, 2024 · Sep 30, 2024 · Sep 30, 2024
diff --git a/...ain/java/org/elasticsearch/benchmark/search/fetch/subphase/FetchSourcePhaseBenchmark.java b/...ain/java/org/elasticsearch/benchmark/search/fetch/subphase/FetchSourcePhaseBenchmark.java
@@ -63,7 +63,7 @@ public void setup() throws IOException {
         );
         includesSet = Set.of(fetchContext.includes());
         excludesSet = Set.of(fetchContext.excludes());
-        parserConfig = XContentParserConfiguration.EMPTY.withFiltering(includesSet, excludesSet, false);
+        parserConfig = XContentParserConfiguration.EMPTY.withFiltering(null, includesSet, excludesSet, false);
     }
 
     private BytesReference read300BytesExample() throws IOException {

diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/xcontent/FilterContentBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/xcontent/FilterContentBenchmark.java
@@ -170,7 +170,7 @@ private XContentParserConfiguration buildParseConfig(boolean matchDotsInFieldNam
             includes = null;
             excludes = filters;
         }
-        return XContentParserConfiguration.EMPTY.withFiltering(includes, excludes, matchDotsInFieldNames);
+        return XContentParserConfiguration.EMPTY.withFiltering(null, includes, excludes, matchDotsInFieldNames);
     }
 
     private BytesReference filter(XContentParserConfiguration contentParserConfiguration) throws IOException {

diff --git a/docs/changelog/113827.yaml b/docs/changelog/113827.yaml
@@ -0,0 +1,5 @@
+pr: 113827
+summary: Add Optional Source Filtering to Source Loaders
+area: Mapping
+type: enhancement
+issues: []
@@ -19,6 +19,8 @@
 import org.elasticsearch.xcontent.provider.filtering.FilterPathBasedFilter;
 import org.elasticsearch.xcontent.support.filtering.FilterPath;
 
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Set;
 
 public class XContentParserConfigurationImpl implements XContentParserConfiguration {
@@ -106,12 +108,41 @@ public XContentParserConfiguration withFiltering(
         Set<String> excludeStrings,
         boolean filtersMatchFieldNamesWithDots
     ) {
+        return withFiltering(null, includeStrings, excludeStrings, filtersMatchFieldNamesWithDots);
+    }
+
+    public XContentParserConfiguration withFiltering(
+        String prefixPath,
+        Set<String> includeStrings,
+        Set<String> excludeStrings,
+        boolean filtersMatchFieldNamesWithDots
+    ) {
+        FilterPath[] includePaths = FilterPath.compile(includeStrings);
+        FilterPath[] excludePaths = FilterPath.compile(excludeStrings);
+
+        if (prefixPath != null) {
+            if (includePaths != null) {
+                List<FilterPath> includeFilters = new ArrayList<>();
+                for (var incl : includePaths) {
+                    incl.matches(prefixPath, includeFilters, true);
+                }
+                includePaths = includeFilters.isEmpty() ? null : includeFilters.toArray(FilterPath[]::new);
+            }
+
+            if (excludePaths != null) {
+                List<FilterPath> excludeFilters = new ArrayList<>();
+                for (var excl : excludePaths) {
+                    excl.matches(prefixPath, excludeFilters, true);
+                }
+                excludePaths = excludeFilters.isEmpty() ? null : excludeFilters.toArray(FilterPath[]::new);
+            }
+        }
         return new XContentParserConfigurationImpl(
             registry,
             deprecationHandler,
             restApiVersion,
-            FilterPath.compile(includeStrings),
-            FilterPath.compile(excludeStrings),
+            includePaths,
+            excludePaths,
             filtersMatchFieldNamesWithDots
         );
     }

@@ -49,10 +49,27 @@ public interface XContentParserConfiguration {
 
     RestApiVersion restApiVersion();
 
+    // TODO: Remove when serverless uses the new API
+    XContentParserConfiguration withFiltering(
+        Set<String> includeStrings,
+        Set<String> excludeStrings,
+        boolean filtersMatchFieldNamesWithDots
+    );
+
     /**
      * Replace the configured filtering.
+     *
+     * @param prefixPath                    The prefix path to be appended to each sub-path before applying the include/exclude rules.
+     *                                      Specify {@code null} if parsing starts from the root.
+     * @param includeStrings                A set of strings representing paths to include during filtering.
+     *                                      If specified, only these paths will be included in parsing.
+     * @param excludeStrings                A set of strings representing paths to exclude during filtering.
+     *                                      If specified, these paths will be excluded from parsing.
+     * @param filtersMatchFieldNamesWithDots Indicates whether filters should match field names containing dots ('.')
+     *                                      as part of the field name.
      */
     XContentParserConfiguration withFiltering(
+        String prefixPath,
         Set<String> includeStrings,
         Set<String> excludeStrings,
         boolean filtersMatchFieldNamesWithDots

@@ -22,6 +22,7 @@
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.HashSet;
 import java.util.Set;
 import java.util.stream.IntStream;
 
@@ -332,6 +333,24 @@ protected final void testFilter(Builder expected, Builder sample, Collection<Str
     private void testFilter(Builder expected, Builder sample, Set<String> includes, Set<String> excludes, boolean matchFieldNamesWithDots)
         throws IOException {
         assertFilterResult(expected.apply(createBuilder()), filter(sample, includes, excludes, matchFieldNamesWithDots));
+
+        String rootPrefix = "root.path.random";
+        if (includes != null) {
+            Set<String> rootIncludes = new HashSet<>();
+            for (var incl : includes) {
+                rootIncludes.add(rootPrefix + (randomBoolean() ? "." : "*.") + incl);
+            }
+            includes = rootIncludes;
+        }
+
+        if (excludes != null) {
+            Set<String> rootExcludes = new HashSet<>();
+            for (var excl : excludes) {
+                rootExcludes.add(rootPrefix + (randomBoolean() ? "." : "*.") + excl);
+            }
+            excludes = rootExcludes;
+        }
+        assertFilterResult(expected.apply(createBuilder()), filterSub(sample, rootPrefix, includes, excludes, matchFieldNamesWithDots));
     }
 
     public void testArrayWithEmptyObjectInInclude() throws IOException {
@@ -413,21 +432,36 @@ private XContentBuilder filter(Builder sample, Set<String> includes, Set<String>
             && matchFieldNamesWithDots == false) {
             return filterOnBuilder(sample, includes, excludes);
         }
-        return filterOnParser(sample, includes, excludes, matchFieldNamesWithDots);
+        return filterOnParser(sample, null, includes, excludes, matchFieldNamesWithDots);
+    }
+
+    private XContentBuilder filterSub(
+        Builder sample,
+        String root,
+        Set<String> includes,
+        Set<String> excludes,
+        boolean matchFieldNamesWithDots
+    ) throws IOException {
+        return filterOnParser(sample, root, includes, excludes, matchFieldNamesWithDots);
     }
 
     private XContentBuilder filterOnBuilder(Builder sample, Set<String> includes, Set<String> excludes) throws IOException {
         return sample.apply(XContentBuilder.builder(getXContentType(), includes, excludes));
     }
 
-    private XContentBuilder filterOnParser(Builder sample, Set<String> includes, Set<String> excludes, boolean matchFieldNamesWithDots)
-        throws IOException {
+    private XContentBuilder filterOnParser(
+        Builder sample,
+        String rootPath,
+        Set<String> includes,
+        Set<String> excludes,
+        boolean matchFieldNamesWithDots
+    ) throws IOException {
         try (XContentBuilder builtSample = sample.apply(createBuilder())) {
             BytesReference sampleBytes = BytesReference.bytes(builtSample);
             try (
                 XContentParser parser = getXContentType().xContent()
                     .createParser(
-                        XContentParserConfiguration.EMPTY.withFiltering(includes, excludes, matchFieldNamesWithDots),
+                        XContentParserConfiguration.EMPTY.withFiltering(rootPath, includes, excludes, matchFieldNamesWithDots),
                         sampleBytes.streamInput()
                     )
             ) {

diff --git a/server/src/main/java/org/elasticsearch/action/update/UpdateHelper.java b/server/src/main/java/org/elasticsearch/action/update/UpdateHelper.java
@@ -36,6 +36,7 @@
 import org.elasticsearch.script.UpdateScript;
 import org.elasticsearch.script.UpsertCtxMap;
 import org.elasticsearch.search.lookup.Source;
+import org.elasticsearch.search.lookup.SourceFilter;
 import org.elasticsearch.xcontent.XContentType;
 
 import java.io.IOException;
@@ -347,8 +348,9 @@ public static GetResult extractGetResult(
             return null;
         }
         BytesReference sourceFilteredAsBytes = sourceAsBytes;
-        if (request.fetchSource().hasFilter()) {
-            sourceFilteredAsBytes = Source.fromMap(source, sourceContentType).filter(request.fetchSource().filter()).internalSourceRef();
+        SourceFilter sourceFilter = request.fetchSource().filter();
+        if (sourceFilter != null) {
+            sourceFilteredAsBytes = Source.fromMap(source, sourceContentType).filter(sourceFilter).internalSourceRef();
         }
 
         // TODO when using delete/none, we can still return the source as bytes by generating it (using the sourceContentType)

diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/DataStream.java b/server/src/main/java/org/elasticsearch/cluster/metadata/DataStream.java
@@ -1361,6 +1361,7 @@ public DataStream getParentDataStream() {
     }
 
     public static final XContentParserConfiguration TS_EXTRACT_CONFIG = XContentParserConfiguration.EMPTY.withFiltering(
+        null,
         Set.of(TIMESTAMP_FIELD_NAME),
         null,
         false

diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java
@@ -268,7 +268,7 @@ public static class ExtractFromSource extends IndexRouting {
             trackTimeSeriesRoutingHash = metadata.getCreationVersion().onOrAfter(IndexVersions.TIME_SERIES_ROUTING_HASH_IN_ID);
             List<String> routingPaths = metadata.getRoutingPaths();
             isRoutingPath = Regex.simpleMatcher(routingPaths.toArray(String[]::new));
-            this.parserConfig = XContentParserConfiguration.EMPTY.withFiltering(Set.copyOf(routingPaths), null, true);
+            this.parserConfig = XContentParserConfiguration.EMPTY.withFiltering(null, Set.copyOf(routingPaths), null, true);
         }
 
         public boolean matchesField(String fieldName) {

@@ -191,7 +191,7 @@ public static Tuple<XContentType, Map<String, Object>> convertToMap(
     ) throws ElasticsearchParseException {
         XContentParserConfiguration config = XContentParserConfiguration.EMPTY;
         if (include != null || exclude != null) {
-            config = config.withFiltering(include, exclude, false);
+            config = config.withFiltering(null, include, exclude, false);
         }
         return parseToType(ordered ? XContentParser::mapOrdered : XContentParser::map, bytes, xContentType, config);
     }
@@ -266,7 +266,10 @@ public static Map<String, Object> convertToMap(
         @Nullable Set<String> exclude
     ) throws ElasticsearchParseException {
         try (
-            XContentParser parser = xContent.createParser(XContentParserConfiguration.EMPTY.withFiltering(include, exclude, false), input)
+            XContentParser parser = xContent.createParser(
+                XContentParserConfiguration.EMPTY.withFiltering(null, include, exclude, false),
+                input
+            )
         ) {
             return ordered ? parser.mapOrdered() : parser.map();
         } catch (IOException e) {
@@ -301,7 +304,7 @@ public static Map<String, Object> convertToMap(
     ) throws ElasticsearchParseException {
         try (
             XContentParser parser = xContent.createParser(
-                XContentParserConfiguration.EMPTY.withFiltering(include, exclude, false),
+                XContentParserConfiguration.EMPTY.withFiltering(null, include, exclude, false),
                 bytes,
                 offset,
                 length

@@ -274,31 +274,24 @@ public static Map<String, Object> filter(Map<String, Object> map, String[] inclu
      */
     public static Function<Map<String, Object>, Map<String, Object>> filter(String[] includes, String[] excludes) {
         CharacterRunAutomaton matchAllAutomaton = new CharacterRunAutomaton(Automata.makeAnyString());
-
-        CharacterRunAutomaton include;
-        if (includes == null || includes.length == 0) {
-            include = matchAllAutomaton;
-        } else {
-            Automaton includeA = Regex.simpleMatchToAutomaton(includes);
-            includeA = makeMatchDotsInFieldNames(includeA);
-            include = new CharacterRunAutomaton(includeA, MAX_DETERMINIZED_STATES);
-        }
-
-        Automaton excludeA;
-        if (excludes == null || excludes.length == 0) {
-            excludeA = Automata.makeEmpty();
-        } else {
-            excludeA = Regex.simpleMatchToAutomaton(excludes);
-            excludeA = makeMatchDotsInFieldNames(excludeA);
-        }
-        CharacterRunAutomaton exclude = new CharacterRunAutomaton(excludeA, MAX_DETERMINIZED_STATES);
+        CharacterRunAutomaton include = compileAutomaton(includes, matchAllAutomaton);
+        CharacterRunAutomaton exclude = compileAutomaton(excludes, new CharacterRunAutomaton(Automata.makeEmpty()));
 
         // NOTE: We cannot use Operations.minus because of the special case that
         // we want all sub properties to match as soon as an object matches
 
         return (map) -> filter(map, include, 0, exclude, 0, matchAllAutomaton);
     }
 
+    public static CharacterRunAutomaton compileAutomaton(String[] patterns, CharacterRunAutomaton defaultValue) {
+        if (patterns == null || patterns.length == 0) {
+            return defaultValue;
+        }
+        var aut = Regex.simpleMatchToAutomaton(patterns);
+        aut = makeMatchDotsInFieldNames(aut);
+        return new CharacterRunAutomaton(aut, MAX_DETERMINIZED_STATES);
+    }
+
     /** Make matches on objects also match dots in field names.
      *  For instance, if the original simple regex is `foo`, this will translate
      *  it into `foo` OR `foo.*`. */

diff --git a/server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java b/server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java
@@ -53,17 +53,24 @@ public static StoredFieldLoader fromSpec(StoredFieldsSpec spec) {
         return create(spec.requiresSource(), spec.requiredStoredFields());
     }
 
+    public static StoredFieldLoader create(boolean loadSource, Set<String> fields) {
+        return create(loadSource, fields, false);
+    }
+
     /**
      * Creates a new StoredFieldLoader
-     * @param loadSource should this loader load the _source field
-     * @param fields     a set of additional fields the loader should load
+     *
+     * @param loadSource           indicates whether this loader should load the {@code _source} field.
+     * @param fields               a set of additional fields that the loader should load.
+     * @param forceSequentialReader if {@code true}, forces the use of a sequential leaf reader;
+     *                              otherwise, uses the heuristic defined in {@link StoredFieldLoader#reader(LeafReaderContext, int[])}.
      */
-    public static StoredFieldLoader create(boolean loadSource, Set<String> fields) {
+    public static StoredFieldLoader create(boolean loadSource, Set<String> fields, boolean forceSequentialReader) {
         List<String> fieldsToLoad = fieldsToLoad(loadSource, fields);
         return new StoredFieldLoader() {
             @Override
             public LeafStoredFieldLoader getLoader(LeafReaderContext ctx, int[] docs) throws IOException {
-                return new ReaderStoredFieldLoader(reader(ctx, docs), loadSource, fields);
+                return new ReaderStoredFieldLoader(forceSequentialReader ? sequentialReader(ctx) : reader(ctx, docs), loadSource, fields);
             }
 
             @Override

diff --git a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java
@@ -307,8 +307,11 @@ private GetResult innerGetFetch(
         Map<String, DocumentField> metadataFields = null;
         DocIdAndVersion docIdAndVersion = get.docIdAndVersion();
         SourceLoader loader = forceSyntheticSource
-            ? new SourceLoader.Synthetic(mappingLookup.getMapping()::syntheticFieldLoader, mapperMetrics.sourceFieldMetrics())
-            : mappingLookup.newSourceLoader(mapperMetrics.sourceFieldMetrics());
+            ? new SourceLoader.Synthetic(
+                () -> mappingLookup.getMapping().syntheticFieldLoader(fetchSourceContext.filter()),
+                mapperMetrics.sourceFieldMetrics()
+            )
+            : mappingLookup.newSourceLoader(fetchSourceContext.filter(), mapperMetrics.sourceFieldMetrics());
         StoredFieldLoader storedFieldLoader = buildStoredFieldLoader(storedFields, fetchSourceContext, loader);
         LeafStoredFieldLoader leafStoredFieldLoader = storedFieldLoader.getLoader(docIdAndVersion.reader.getContext(), null);
         try {
@@ -367,10 +370,6 @@ private GetResult innerGetFetch(
         if (mapperService.mappingLookup().isSourceEnabled() && fetchSourceContext.fetchSource()) {
             Source source = loader.leaf(docIdAndVersion.reader, new int[] { docIdAndVersion.docId })
                 .source(leafStoredFieldLoader, docIdAndVersion.docId);
-
-            if (fetchSourceContext.hasFilter()) {
-                source = source.filter(fetchSourceContext.filter());
-            }
             sourceBytes = source.internalSourceRef();
         }
 

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/CompositeSyntheticFieldLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/CompositeSyntheticFieldLoader.java
@@ -12,6 +12,7 @@
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.xcontent.XContentBuilder;
+import org.elasticsearch.xcontent.XContentParserConfiguration;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -169,7 +170,7 @@ public MalformedValuesLayer(String fieldName) {
         @Override
         protected void writeValue(Object value, XContentBuilder b) throws IOException {
             if (value instanceof BytesRef r) {
-                XContentDataHelper.decodeAndWrite(b, r);
+                XContentDataHelper.decodeAndWrite(XContentParserConfiguration.EMPTY, b, r);
             } else {
                 b.value(value);
             }

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java
@@ -134,7 +134,7 @@ public void validate(IndexSettings settings, boolean checkLimits) {
          * with the source loading strategy declared on the source field mapper.
          */
         try {
-            sourceMapper().newSourceLoader(mapping(), mapperMetrics.sourceFieldMetrics());
+            mappingLookup.newSourceLoader(null, mapperMetrics.sourceFieldMetrics());
         } catch (IllegalArgumentException e) {
             mapperMetrics.sourceFieldMetrics().recordSyntheticSourceIncompatibleMapping();
             throw e;

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldAliasMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldAliasMapper.java
@@ -156,9 +156,4 @@ public FieldAliasMapper build(MapperBuilderContext context) {
             return new FieldAliasMapper(leafName(), fullName, path);
         }
     }
-
-    @Override
-    public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
-        return SourceLoader.SyntheticFieldLoader.NOTHING;
-    }
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -1361,6 +1361,7 @@ public DataStream getParentDataStream() { @@
         }
         public static final XContentParserConfiguration TS_EXTRACT_CONFIG = XContentParserConfiguration.EMPTY.withFiltering(
+            null,
             Set.of(TIMESTAMP_FIELD_NAME),
             null,
             false
@@ Expand Down @@