-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Improve reading ignored source in esql #130676
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the "Elastic License | ||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
* Public License v 1"; you may not use this file except in compliance with, at | ||
* your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
* License v3.0 only", or the "Server Side Public License, v 1". | ||
*/ | ||
|
||
package org.elasticsearch.index.fieldvisitor; | ||
|
||
import org.apache.lucene.index.FieldInfo; | ||
import org.apache.lucene.index.LeafReader; | ||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.apache.lucene.index.StoredFieldVisitor; | ||
import org.elasticsearch.common.CheckedBiConsumer; | ||
import org.elasticsearch.common.bytes.BytesReference; | ||
import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader; | ||
import org.elasticsearch.index.mapper.FallbackSyntheticSourceBlockLoader; | ||
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper; | ||
import org.elasticsearch.search.fetch.StoredFieldsSpec; | ||
|
||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Set; | ||
|
||
class IgnoredSourceFieldLoader extends StoredFieldLoader { | ||
|
||
final Set<String> potentialFieldsInIgnoreSource; | ||
|
||
IgnoredSourceFieldLoader(StoredFieldsSpec spec) { | ||
Set<String> potentialFieldsInIgnoreSource = new HashSet<>(); | ||
for (String requiredStoredField : spec.requiredStoredFields()) { | ||
if (requiredStoredField.startsWith(IgnoredSourceFieldMapper.NAME)) { | ||
String fieldName = requiredStoredField.substring(IgnoredSourceFieldMapper.NAME.length()); | ||
potentialFieldsInIgnoreSource.addAll(FallbackSyntheticSourceBlockLoader.splitIntoFieldPaths(fieldName)); | ||
} | ||
} | ||
this.potentialFieldsInIgnoreSource = potentialFieldsInIgnoreSource; | ||
} | ||
|
||
@Override | ||
public LeafStoredFieldLoader getLoader(LeafReaderContext ctx, int[] docs) throws IOException { | ||
var reader = sequentialReader(ctx); | ||
var visitor = new SFV(potentialFieldsInIgnoreSource); | ||
return new LeafStoredFieldLoader() { | ||
|
||
private int doc = -1; | ||
|
||
@Override | ||
public void advanceTo(int doc) throws IOException { | ||
if (doc != this.doc) { | ||
visitor.reset(); | ||
reader.accept(doc, visitor); | ||
this.doc = doc; | ||
} | ||
} | ||
|
||
@Override | ||
public BytesReference source() { | ||
return null; | ||
} | ||
|
||
@Override | ||
public String id() { | ||
return null; | ||
} | ||
|
||
@Override | ||
public String routing() { | ||
return null; | ||
} | ||
|
||
@Override | ||
public Map<String, List<Object>> storedFields() { | ||
return Map.of(IgnoredSourceFieldMapper.NAME, visitor.values); | ||
} | ||
}; | ||
} | ||
|
||
@Override | ||
public List<String> fieldsToLoad() { | ||
return List.of(IgnoredSourceFieldMapper.NAME); | ||
} | ||
|
||
static class SFV extends StoredFieldVisitor { | ||
|
||
boolean done; | ||
final List<Object> values = new ArrayList<>(); | ||
final Set<String> potentialFieldsInIgnoreSource; | ||
|
||
SFV(Set<String> potentialFieldsInIgnoreSource) { | ||
this.potentialFieldsInIgnoreSource = potentialFieldsInIgnoreSource; | ||
} | ||
|
||
@Override | ||
public Status needsField(FieldInfo fieldInfo) throws IOException { | ||
if (done) { | ||
return Status.STOP; | ||
} else if (IgnoredSourceFieldMapper.NAME.equals(fieldInfo.name)) { | ||
return Status.YES; | ||
} else { | ||
return Status.NO; | ||
} | ||
} | ||
|
||
@Override | ||
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException { | ||
var result = IgnoredSourceFieldMapper.decodeIfMatch(value, potentialFieldsInIgnoreSource); | ||
if (result != null) { | ||
// TODO: can't do this in case multiple entries for the same field name. (objects, arrays etc.) | ||
// done = true; | ||
values.add(result); | ||
} | ||
} | ||
|
||
void reset() { | ||
values.clear(); | ||
done = false; | ||
} | ||
|
||
} | ||
|
||
static boolean supports(StoredFieldsSpec spec) { | ||
return spec.requiresSource() == false | ||
&& spec.requiresMetadata() == false | ||
&& spec.requiredStoredFields().size() == 1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When a query requests multiple values, the |
||
&& spec.requiredStoredFields().iterator().next().startsWith(IgnoredSourceFieldMapper.NAME); | ||
} | ||
|
||
// TODO: use provided one | ||
private static CheckedBiConsumer<Integer, StoredFieldVisitor, IOException> sequentialReader(LeafReaderContext ctx) throws IOException { | ||
LeafReader leafReader = ctx.reader(); | ||
if (leafReader instanceof SequentialStoredFieldsLeafReader lf) { | ||
return lf.getSequentialStoredFieldsReader()::document; | ||
} | ||
return leafReader.storedFields()::document; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,10 +39,12 @@ | |
public abstract class FallbackSyntheticSourceBlockLoader implements BlockLoader { | ||
private final Reader<?> reader; | ||
private final String fieldName; | ||
private final Set<String> fieldPaths; | ||
|
||
protected FallbackSyntheticSourceBlockLoader(Reader<?> reader, String fieldName) { | ||
this.reader = reader; | ||
this.fieldName = fieldName; | ||
this.fieldPaths = splitIntoFieldPaths(fieldName); | ||
} | ||
|
||
@Override | ||
|
@@ -52,12 +54,12 @@ public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws | |
|
||
@Override | ||
public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException { | ||
return new IgnoredSourceRowStrideReader<>(fieldName, reader); | ||
return new IgnoredSourceRowStrideReader<>(fieldName, reader, fieldPaths); | ||
} | ||
|
||
@Override | ||
public StoredFieldsSpec rowStrideStoredFieldSpec() { | ||
return new StoredFieldsSpec(false, false, Set.of(IgnoredSourceFieldMapper.NAME)); | ||
return new StoredFieldsSpec(false, false, Set.of(IgnoredSourceFieldMapper.NAME + "." + fieldName)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This specified field doesn't actually exist. This is fine when we're using our custom IgnoredSourceFieldLoader, but sometimes we fall back to the default StoredFieldLoader (when we're loading more than one value from ignored source, or when we're loading some values from other stored fields). |
||
} | ||
|
||
@Override | ||
|
@@ -70,7 +72,31 @@ public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException | |
throw new UnsupportedOperationException(); | ||
} | ||
|
||
private record IgnoredSourceRowStrideReader<T>(String fieldName, Reader<T> reader) implements RowStrideReader { | ||
public static Set<String> splitIntoFieldPaths(String fieldName) { | ||
var paths = new HashSet<String>(); | ||
paths.add("_doc"); | ||
var current = new StringBuilder(); | ||
for (var part : fieldName.split("\\.")) { | ||
if (current.isEmpty() == false) { | ||
current.append('.'); | ||
} | ||
current.append(part); | ||
paths.add(current.toString()); | ||
} | ||
return paths; | ||
} | ||
|
||
private static final class IgnoredSourceRowStrideReader<T> implements RowStrideReader { | ||
private final String fieldName; | ||
private final Reader<T> reader; | ||
private final Set<String> fieldPaths; | ||
|
||
private IgnoredSourceRowStrideReader(String fieldName, Reader<T> reader, Set<String> fieldPaths) { | ||
this.fieldName = fieldName; | ||
this.reader = reader; | ||
this.fieldPaths = fieldPaths; | ||
} | ||
|
||
@Override | ||
public void read(int docId, StoredFields storedFields, Builder builder) throws IOException { | ||
var ignoredSource = storedFields.storedFields().get(IgnoredSourceFieldMapper.NAME); | ||
|
@@ -80,26 +106,9 @@ public void read(int docId, StoredFields storedFields, Builder builder) throws I | |
} | ||
|
||
Map<String, List<IgnoredSourceFieldMapper.NameValue>> valuesForFieldAndParents = new HashMap<>(); | ||
|
||
// Contains name of the field and all its parents | ||
Set<String> fieldNames = new HashSet<>() { | ||
{ | ||
add("_doc"); | ||
} | ||
}; | ||
|
||
var current = new StringBuilder(); | ||
for (String part : fieldName.split("\\.")) { | ||
if (current.isEmpty() == false) { | ||
current.append('.'); | ||
} | ||
current.append(part); | ||
fieldNames.add(current.toString()); | ||
} | ||
|
||
for (Object value : ignoredSource) { | ||
IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(value); | ||
if (fieldNames.contains(nameValue.name())) { | ||
IgnoredSourceFieldMapper.NameValue nameValue = (IgnoredSourceFieldMapper.NameValue) value; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we fallback to the default |
||
if (fieldPaths.contains(nameValue.name())) { | ||
valuesForFieldAndParents.computeIfAbsent(nameValue.name(), k -> new ArrayList<>()).add(nameValue); | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note this doesn't work in the case that multiple ignored source entries exist for the same field. This is possible with nested objects and arrays that share the same name.
Looks like the best way to achieve is for each ignored source entry to use a unique stored field name and not reuse
_ignored_source
as stored field name. This shouldn't be too difficult, given that each ignored source entry is already stored separately, but just with the same name.