Skip to content

Commit f62f4f6

Browse files
committed
Add Optional Source Filtering to Source Loaders
Spinoff of elastic#113036. This change introduces optional source filtering directly within source loaders (both synthetic and stored). The main benefit is seen in synthetic source loaders, as synthetic fields are stored independently. By filtering while loading the synthetic source, generating the source becomes linear in the number of fields that match the filter. This update also modifies the get document API to apply source filters earlier—directly through the source loader. The search API, however, is not affected in this change, since the loaded source is still used by other features (e.g., highlighting, fields, nested hits), and source filtering is always applied as the final step. A follow-up will be required to ensure careful handling of all search-related scenarios.
1 parent 8cb1266 commit f62f4f6

31 files changed

+629
-164
lines changed

libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/XContentParserConfigurationImpl.java

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
import org.elasticsearch.xcontent.provider.filtering.FilterPathBasedFilter;
2020
import org.elasticsearch.xcontent.support.filtering.FilterPath;
2121

22+
import java.util.ArrayList;
23+
import java.util.List;
2224
import java.util.Set;
2325

2426
public class XContentParserConfigurationImpl implements XContentParserConfiguration {
@@ -102,20 +104,49 @@ public RestApiVersion restApiVersion() {
102104
}
103105

104106
public XContentParserConfiguration withFiltering(
107+
String rootPath,
105108
Set<String> includeStrings,
106109
Set<String> excludeStrings,
107110
boolean filtersMatchFieldNamesWithDots
108111
) {
112+
FilterPath[] includePaths = FilterPath.compile(includeStrings);
113+
FilterPath[] excludePaths = FilterPath.compile(excludeStrings);
114+
115+
if (rootPath != null) {
116+
if (includePaths != null) {
117+
List<FilterPath> includeFilters = new ArrayList<>();
118+
for (var incl : includePaths) {
119+
incl.matches(rootPath, includeFilters, true);
120+
}
121+
includePaths = includeFilters.isEmpty() ? null : includeFilters.toArray(FilterPath[]::new);
122+
}
123+
124+
if (excludePaths != null) {
125+
List<FilterPath> excludeFilters = new ArrayList<>();
126+
for (var excl : excludePaths) {
127+
excl.matches(rootPath, excludeFilters, true);
128+
}
129+
excludePaths = excludeFilters.isEmpty() ? null : excludeFilters.toArray(FilterPath[]::new);
130+
}
131+
}
109132
return new XContentParserConfigurationImpl(
110133
registry,
111134
deprecationHandler,
112135
restApiVersion,
113-
FilterPath.compile(includeStrings),
114-
FilterPath.compile(excludeStrings),
136+
includePaths,
137+
excludePaths,
115138
filtersMatchFieldNamesWithDots
116139
);
117140
}
118141

142+
public XContentParserConfiguration withFiltering(
143+
Set<String> includeStrings,
144+
Set<String> excludeStrings,
145+
boolean filtersMatchFieldNamesWithDots
146+
) {
147+
return withFiltering(null, includeStrings, excludeStrings, filtersMatchFieldNamesWithDots);
148+
}
149+
119150
public JsonParser filter(JsonParser parser) {
120151
JsonParser filtered = parser;
121152
if (excludes != null) {

libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentParserConfiguration.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,4 +57,11 @@ XContentParserConfiguration withFiltering(
5757
Set<String> excludeStrings,
5858
boolean filtersMatchFieldNamesWithDots
5959
);
60+
61+
XContentParserConfiguration withFiltering(
62+
String rootPath,
63+
Set<String> includeStrings,
64+
Set<String> excludeStrings,
65+
boolean filtersMatchFieldNamesWithDots
66+
);
6067
}

libs/x-content/src/test/java/org/elasticsearch/xcontent/support/filtering/AbstractXContentFilteringTestCase.java

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.io.IOException;
2323
import java.util.Arrays;
2424
import java.util.Collection;
25+
import java.util.HashSet;
2526
import java.util.Set;
2627
import java.util.stream.IntStream;
2728

@@ -332,6 +333,24 @@ protected final void testFilter(Builder expected, Builder sample, Collection<Str
332333
private void testFilter(Builder expected, Builder sample, Set<String> includes, Set<String> excludes, boolean matchFieldNamesWithDots)
333334
throws IOException {
334335
assertFilterResult(expected.apply(createBuilder()), filter(sample, includes, excludes, matchFieldNamesWithDots));
336+
337+
String rootPrefix = "root.path.random";
338+
if (includes != null) {
339+
Set<String> rootIncludes = new HashSet<>();
340+
for (var incl : includes) {
341+
rootIncludes.add(rootPrefix + (randomBoolean() ? "." : "*.") + incl);
342+
}
343+
includes = rootIncludes;
344+
}
345+
346+
if (excludes != null) {
347+
Set<String> rootExcludes = new HashSet<>();
348+
for (var excl : excludes) {
349+
rootExcludes.add(rootPrefix + (randomBoolean() ? "." : "*.") + excl);
350+
}
351+
excludes = rootExcludes;
352+
}
353+
assertFilterResult(expected.apply(createBuilder()), filterSub(sample, rootPrefix, includes, excludes, matchFieldNamesWithDots));
335354
}
336355

337356
public void testArrayWithEmptyObjectInInclude() throws IOException {
@@ -413,21 +432,36 @@ private XContentBuilder filter(Builder sample, Set<String> includes, Set<String>
413432
&& matchFieldNamesWithDots == false) {
414433
return filterOnBuilder(sample, includes, excludes);
415434
}
416-
return filterOnParser(sample, includes, excludes, matchFieldNamesWithDots);
435+
return filterOnParser(sample, null, includes, excludes, matchFieldNamesWithDots);
436+
}
437+
438+
private XContentBuilder filterSub(
439+
Builder sample,
440+
String root,
441+
Set<String> includes,
442+
Set<String> excludes,
443+
boolean matchFieldNamesWithDots
444+
) throws IOException {
445+
return filterOnParser(sample, root, includes, excludes, matchFieldNamesWithDots);
417446
}
418447

419448
private XContentBuilder filterOnBuilder(Builder sample, Set<String> includes, Set<String> excludes) throws IOException {
420449
return sample.apply(XContentBuilder.builder(getXContentType(), includes, excludes));
421450
}
422451

423-
private XContentBuilder filterOnParser(Builder sample, Set<String> includes, Set<String> excludes, boolean matchFieldNamesWithDots)
424-
throws IOException {
452+
private XContentBuilder filterOnParser(
453+
Builder sample,
454+
String rootPath,
455+
Set<String> includes,
456+
Set<String> excludes,
457+
boolean matchFieldNamesWithDots
458+
) throws IOException {
425459
try (XContentBuilder builtSample = sample.apply(createBuilder())) {
426460
BytesReference sampleBytes = BytesReference.bytes(builtSample);
427461
try (
428462
XContentParser parser = getXContentType().xContent()
429463
.createParser(
430-
XContentParserConfiguration.EMPTY.withFiltering(includes, excludes, matchFieldNamesWithDots),
464+
XContentParserConfiguration.EMPTY.withFiltering(rootPath, includes, excludes, matchFieldNamesWithDots),
431465
sampleBytes.streamInput()
432466
)
433467
) {

server/src/main/java/org/elasticsearch/common/xcontent/support/XContentMapValues.java

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -274,31 +274,24 @@ public static Map<String, Object> filter(Map<String, Object> map, String[] inclu
274274
*/
275275
public static Function<Map<String, Object>, Map<String, Object>> filter(String[] includes, String[] excludes) {
276276
CharacterRunAutomaton matchAllAutomaton = new CharacterRunAutomaton(Automata.makeAnyString());
277-
278-
CharacterRunAutomaton include;
279-
if (includes == null || includes.length == 0) {
280-
include = matchAllAutomaton;
281-
} else {
282-
Automaton includeA = Regex.simpleMatchToAutomaton(includes);
283-
includeA = makeMatchDotsInFieldNames(includeA);
284-
include = new CharacterRunAutomaton(includeA, MAX_DETERMINIZED_STATES);
285-
}
286-
287-
Automaton excludeA;
288-
if (excludes == null || excludes.length == 0) {
289-
excludeA = Automata.makeEmpty();
290-
} else {
291-
excludeA = Regex.simpleMatchToAutomaton(excludes);
292-
excludeA = makeMatchDotsInFieldNames(excludeA);
293-
}
294-
CharacterRunAutomaton exclude = new CharacterRunAutomaton(excludeA, MAX_DETERMINIZED_STATES);
277+
CharacterRunAutomaton include = compileAutomaton(includes, matchAllAutomaton);
278+
CharacterRunAutomaton exclude = compileAutomaton(excludes, new CharacterRunAutomaton(Automata.makeEmpty()));
295279

296280
// NOTE: We cannot use Operations.minus because of the special case that
297281
// we want all sub properties to match as soon as an object matches
298282

299283
return (map) -> filter(map, include, 0, exclude, 0, matchAllAutomaton);
300284
}
301285

286+
public static CharacterRunAutomaton compileAutomaton(String[] patterns, CharacterRunAutomaton defaultValue) {
287+
if (patterns == null || patterns.length == 0) {
288+
return defaultValue;
289+
}
290+
var aut = Regex.simpleMatchToAutomaton(patterns);
291+
aut = makeMatchDotsInFieldNames(aut);
292+
return new CharacterRunAutomaton(aut, MAX_DETERMINIZED_STATES);
293+
}
294+
302295
/** Make matches on objects also match dots in field names.
303296
* For instance, if the original simple regex is `foo`, this will translate
304297
* it into `foo` OR `foo.*`. */

server/src/main/java/org/elasticsearch/index/get/ShardGetService.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -307,8 +307,14 @@ private GetResult innerGetFetch(
307307
Map<String, DocumentField> metadataFields = null;
308308
DocIdAndVersion docIdAndVersion = get.docIdAndVersion();
309309
SourceLoader loader = forceSyntheticSource
310-
? new SourceLoader.Synthetic(mappingLookup.getMapping()::syntheticFieldLoader, mapperMetrics.sourceFieldMetrics())
311-
: mappingLookup.newSourceLoader(mapperMetrics.sourceFieldMetrics());
310+
? new SourceLoader.Synthetic(
311+
() -> mappingLookup.getMapping().syntheticFieldLoader(fetchSourceContext.hasFilter() ? fetchSourceContext.filter() : null),
312+
mapperMetrics.sourceFieldMetrics()
313+
)
314+
: mappingLookup.newSourceLoader(
315+
fetchSourceContext.hasFilter() ? fetchSourceContext.filter() : null,
316+
mapperMetrics.sourceFieldMetrics()
317+
);
312318
StoredFieldLoader storedFieldLoader = buildStoredFieldLoader(storedFields, fetchSourceContext, loader);
313319
LeafStoredFieldLoader leafStoredFieldLoader = storedFieldLoader.getLoader(docIdAndVersion.reader.getContext(), null);
314320
try {
@@ -367,10 +373,6 @@ private GetResult innerGetFetch(
367373
if (mapperService.mappingLookup().isSourceEnabled() && fetchSourceContext.fetchSource()) {
368374
Source source = loader.leaf(docIdAndVersion.reader, new int[] { docIdAndVersion.docId })
369375
.source(leafStoredFieldLoader, docIdAndVersion.docId);
370-
371-
if (fetchSourceContext.hasFilter()) {
372-
source = source.filter(fetchSourceContext.filter());
373-
}
374376
sourceBytes = source.internalSourceRef();
375377
}
376378

server/src/main/java/org/elasticsearch/index/mapper/CompositeSyntheticFieldLoader.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.apache.lucene.index.LeafReader;
1313
import org.apache.lucene.util.BytesRef;
1414
import org.elasticsearch.xcontent.XContentBuilder;
15+
import org.elasticsearch.xcontent.XContentParserConfiguration;
1516

1617
import java.io.IOException;
1718
import java.util.ArrayList;
@@ -169,7 +170,7 @@ public MalformedValuesLayer(String fieldName) {
169170
@Override
170171
protected void writeValue(Object value, XContentBuilder b) throws IOException {
171172
if (value instanceof BytesRef r) {
172-
XContentDataHelper.decodeAndWrite(b, r);
173+
XContentDataHelper.decodeAndWrite(XContentParserConfiguration.EMPTY, b, r);
173174
} else {
174175
b.value(value);
175176
}

server/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ public void validate(IndexSettings settings, boolean checkLimits) {
134134
* with the source loading strategy declared on the source field mapper.
135135
*/
136136
try {
137-
sourceMapper().newSourceLoader(mapping(), mapperMetrics.sourceFieldMetrics());
137+
mappingLookup.newSourceLoader(null, mapperMetrics.sourceFieldMetrics());
138138
} catch (IllegalArgumentException e) {
139139
mapperMetrics.sourceFieldMetrics().recordSyntheticSourceIncompatibleMapping();
140140
throw e;

server/src/main/java/org/elasticsearch/index/mapper/FieldAliasMapper.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -156,9 +156,4 @@ public FieldAliasMapper build(MapperBuilderContext context) {
156156
return new FieldAliasMapper(leafName(), fullName, path);
157157
}
158158
}
159-
160-
@Override
161-
public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
162-
return SourceLoader.SyntheticFieldLoader.NOTHING;
163-
}
164159
}

server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.elasticsearch.script.Script;
3232
import org.elasticsearch.script.ScriptType;
3333
import org.elasticsearch.search.lookup.SearchLookup;
34+
import org.elasticsearch.search.lookup.SourceFilter;
3435
import org.elasticsearch.xcontent.ToXContent;
3536
import org.elasticsearch.xcontent.ToXContentFragment;
3637
import org.elasticsearch.xcontent.XContentBuilder;
@@ -484,15 +485,14 @@ final SyntheticSourceMode syntheticSourceMode() {
484485
/**
485486
* Returns synthetic field loader for the mapper.
486487
* If mapper does not support synthetic source, it is handled using generic implementation
487-
* in {@link DocumentParser#parseObjectOrField} and {@link ObjectMapper#syntheticFieldLoader()}.
488+
* in {@link DocumentParser#parseObjectOrField} and {@link ObjectMapper#syntheticFieldLoader(SourceFilter)}.
488489
* <br>
489490
*
490491
* This method is final in order to support common use cases like fallback synthetic source.
491492
* Mappers that need custom support of synthetic source should override {@link #syntheticSourceSupport()}.
492493
*
493494
* @return implementation of {@link SourceLoader.SyntheticFieldLoader}
494495
*/
495-
@Override
496496
public final SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
497497
if (hasScript()) {
498498
return SourceLoader.SyntheticFieldLoader.NOTHING;

server/src/main/java/org/elasticsearch/index/mapper/IgnoreMalformedStoredValues.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import org.apache.lucene.util.BytesRef;
1414
import org.elasticsearch.xcontent.XContentBuilder;
1515
import org.elasticsearch.xcontent.XContentParser;
16+
import org.elasticsearch.xcontent.XContentParserConfiguration;
1617

1718
import java.io.IOException;
1819
import java.util.List;
@@ -128,7 +129,7 @@ public int count() {
128129
public void write(XContentBuilder b) throws IOException {
129130
for (Object v : values) {
130131
if (v instanceof BytesRef r) {
131-
XContentDataHelper.decodeAndWrite(b, r);
132+
XContentDataHelper.decodeAndWrite(XContentParserConfiguration.EMPTY, b, r);
132133
} else {
133134
b.value(v);
134135
}

0 commit comments

Comments
 (0)