-
Notifications
You must be signed in to change notification settings - Fork 25.7k
Store ignored source in unique stored fields per entry #132142
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 14 commits
9c68921
09ecbb8
f8260c2
ab1550e
a11d2ac
da7c4f1
3e04818
5a44487
38e5420
ae1c258
6f75669
3cd0415
d042cab
d5e9e86
a5838d9
50e6c57
01a46f0
5a80a3e
5f52d64
2869608
da674b9
c57d2eb
c2dec2f
fcaf270
1741541
3181236
bdd031a
5edc55b
c392437
6d6b9f0
64d5f6b
df518aa
7f502a2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,6 +11,9 @@ | |
|
|
||
| import org.apache.lucene.index.LeafReaderContext; | ||
| import org.apache.lucene.index.SortedSetDocValues; | ||
| import org.apache.lucene.util.BytesRef; | ||
| import org.elasticsearch.index.IndexVersion; | ||
| import org.elasticsearch.index.IndexVersions; | ||
| import org.elasticsearch.search.fetch.StoredFieldsSpec; | ||
| import org.elasticsearch.xcontent.XContentParser; | ||
| import org.elasticsearch.xcontent.XContentParserConfiguration; | ||
|
|
@@ -24,6 +27,7 @@ | |
| import java.util.Optional; | ||
| import java.util.Set; | ||
| import java.util.Stack; | ||
| import java.util.stream.Collectors; | ||
|
|
||
| /** | ||
| * Block loader for fields that use fallback synthetic source implementation. | ||
|
|
@@ -39,10 +43,14 @@ | |
| public abstract class FallbackSyntheticSourceBlockLoader implements BlockLoader { | ||
| private final Reader<?> reader; | ||
| private final String fieldName; | ||
| private final Set<String> fieldPaths; | ||
| private final IndexVersion indexCreatedVersion; | ||
|
|
||
| protected FallbackSyntheticSourceBlockLoader(Reader<?> reader, String fieldName) { | ||
| protected FallbackSyntheticSourceBlockLoader(Reader<?> reader, String fieldName, IndexVersion indexCreatedVersion) { | ||
| this.reader = reader; | ||
| this.fieldName = fieldName; | ||
| this.indexCreatedVersion = indexCreatedVersion; | ||
| this.fieldPaths = splitIntoFieldPaths(fieldName); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice, now we do this once per field and shard instead of once per field and segment. |
||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -52,12 +60,19 @@ public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws | |
|
|
||
| @Override | ||
| public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException { | ||
| return new IgnoredSourceRowStrideReader<>(fieldName, reader); | ||
| return new IgnoredSourceRowStrideReader<>(fieldName, fieldPaths, reader, indexCreatedVersion); | ||
| } | ||
|
|
||
| @Override | ||
| public StoredFieldsSpec rowStrideStoredFieldSpec() { | ||
| return new StoredFieldsSpec(false, false, Set.of(IgnoredSourceFieldMapper.NAME)); | ||
| Set<String> ignoredFieldNames; | ||
| if (indexCreatedVersion.onOrAfter(IndexVersions.IGNORED_SOURCE_FIELDS_PER_ENTRY)) { | ||
| ignoredFieldNames = fieldPaths.stream().map(IgnoredSourceFieldMapper::ignoredFieldName).collect(Collectors.toSet()); | ||
| } else { | ||
| ignoredFieldNames = Set.of(IgnoredSourceFieldMapper.NAME); | ||
| } | ||
|
|
||
| return new StoredFieldsSpec(false, false, ignoredFieldNames); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -70,46 +85,73 @@ public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException | |
| throw new UnsupportedOperationException(); | ||
| } | ||
|
|
||
| public static Set<String> splitIntoFieldPaths(String fieldName) { | ||
| var paths = new HashSet<String>(); | ||
| paths.add("_doc"); | ||
| var current = new StringBuilder(); | ||
| for (var part : fieldName.split("\\.")) { | ||
| if (current.isEmpty() == false) { | ||
| current.append('.'); | ||
| } | ||
| current.append(part); | ||
| paths.add(current.toString()); | ||
| } | ||
| return paths; | ||
| } | ||
|
|
||
| private static class IgnoredSourceRowStrideReader<T> implements RowStrideReader { | ||
| // Contains name of the field and all its parents | ||
|
||
| private final Set<String> fieldNames; | ||
| private final String fieldName; | ||
| private final Set<String> fieldPaths; | ||
| private final Reader<T> reader; | ||
| private final IndexVersion indexCreatedVersion; | ||
|
|
||
| IgnoredSourceRowStrideReader(String fieldName, Reader<T> reader) { | ||
| IgnoredSourceRowStrideReader(String fieldName, Set<String> fieldPaths, Reader<T> reader, IndexVersion indexCreatedVersion) { | ||
| this.fieldName = fieldName; | ||
| this.fieldPaths = fieldPaths; | ||
| this.reader = reader; | ||
| this.fieldNames = new HashSet<>() { | ||
| { | ||
| add("_doc"); | ||
| } | ||
| }; | ||
|
|
||
| var current = new StringBuilder(); | ||
| for (String part : fieldName.split("\\.")) { | ||
| if (current.isEmpty() == false) { | ||
| current.append('.'); | ||
| } | ||
| current.append(part); | ||
| fieldNames.add(current.toString()); | ||
| } | ||
|
|
||
| this.indexCreatedVersion = indexCreatedVersion; | ||
| } | ||
|
|
||
| @Override | ||
| public void read(int docId, StoredFields storedFields, Builder builder) throws IOException { | ||
| var ignoredSource = storedFields.storedFields().get(IgnoredSourceFieldMapper.NAME); | ||
| if (ignoredSource == null) { | ||
| builder.appendNull(); | ||
| return; | ||
| } | ||
|
|
||
| Map<String, List<IgnoredSourceFieldMapper.NameValue>> valuesForFieldAndParents = new HashMap<>(); | ||
|
|
||
| for (Object value : ignoredSource) { | ||
| IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(value); | ||
| if (fieldNames.contains(nameValue.name())) { | ||
| valuesForFieldAndParents.computeIfAbsent(nameValue.name(), k -> new ArrayList<>()).add(nameValue); | ||
| if (indexCreatedVersion.onOrAfter(IndexVersions.IGNORED_SOURCE_FIELDS_PER_ENTRY)) { | ||
|
||
| List<Object> ignoredSource = null; | ||
| for (var parentPath : fieldPaths) { | ||
| ignoredSource = storedFields.storedFields().get(IgnoredSourceFieldMapper.ignoredFieldName(parentPath)); | ||
| if (ignoredSource == null) { | ||
| continue; | ||
| } | ||
| assert ignoredSource.size() == 1; | ||
|
|
||
| List<IgnoredSourceFieldMapper.NameValue> nameValues = IgnoredSourceFieldMapper.decodeMulti( | ||
| (BytesRef) ignoredSource.getFirst() | ||
| ); | ||
|
|
||
| for (var nameValue : nameValues) { | ||
| assert fieldPaths.contains(nameValue.name()); | ||
| valuesForFieldAndParents.computeIfAbsent(nameValue.name(), k -> new ArrayList<>()).add(nameValue); | ||
| } | ||
| } | ||
| if (valuesForFieldAndParents.isEmpty()) { | ||
| builder.appendNull(); | ||
| return; | ||
| } | ||
|
|
||
| } else { | ||
| var ignoredSource = storedFields.storedFields().get(IgnoredSourceFieldMapper.NAME); | ||
| if (ignoredSource == null) { | ||
| builder.appendNull(); | ||
| return; | ||
| } | ||
|
|
||
| for (Object value : ignoredSource) { | ||
| IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(value); | ||
| if (fieldPaths.contains(nameValue.name())) { | ||
| valuesForFieldAndParents.computeIfAbsent(nameValue.name(), k -> new ArrayList<>()).add(nameValue); | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
With this per field ignored source this can be improved in a followup.