Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.fieldvisitor;

import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.StoredFieldVisitor;
import org.elasticsearch.common.CheckedBiConsumer;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
import org.elasticsearch.index.mapper.FallbackSyntheticSourceBlockLoader;
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper;
import org.elasticsearch.search.fetch.StoredFieldsSpec;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

class IgnoredSourceFieldLoader extends StoredFieldLoader {

final Set<String> potentialFieldsInIgnoreSource;

IgnoredSourceFieldLoader(StoredFieldsSpec spec) {
Set<String> potentialFieldsInIgnoreSource = new HashSet<>();
for (String requiredStoredField : spec.requiredStoredFields()) {
if (requiredStoredField.startsWith(IgnoredSourceFieldMapper.NAME)) {
String fieldName = requiredStoredField.substring(IgnoredSourceFieldMapper.NAME.length());
potentialFieldsInIgnoreSource.addAll(FallbackSyntheticSourceBlockLoader.splitIntoFieldPaths(fieldName));
}
}
this.potentialFieldsInIgnoreSource = potentialFieldsInIgnoreSource;
}

@Override
public LeafStoredFieldLoader getLoader(LeafReaderContext ctx, int[] docs) throws IOException {
var reader = sequentialReader(ctx);
var visitor = new SFV(potentialFieldsInIgnoreSource);
return new LeafStoredFieldLoader() {

private int doc = -1;

@Override
public void advanceTo(int doc) throws IOException {
if (doc != this.doc) {
visitor.reset();
reader.accept(doc, visitor);
this.doc = doc;
}
}

@Override
public BytesReference source() {
return null;
}

@Override
public String id() {
return null;
}

@Override
public String routing() {
return null;
}

@Override
public Map<String, List<Object>> storedFields() {
return Map.of(IgnoredSourceFieldMapper.NAME, visitor.values);
}
};
}

@Override
public List<String> fieldsToLoad() {
return List.of(IgnoredSourceFieldMapper.NAME);
}

static class SFV extends StoredFieldVisitor {

boolean done;
final List<Object> values = new ArrayList<>();
final Set<String> potentialFieldsInIgnoreSource;

SFV(Set<String> potentialFieldsInIgnoreSource) {
this.potentialFieldsInIgnoreSource = potentialFieldsInIgnoreSource;
}

@Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
if (done) {
return Status.STOP;
} else if (IgnoredSourceFieldMapper.NAME.equals(fieldInfo.name)) {
return Status.YES;
} else {
return Status.NO;
}
}

@Override
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
var result = IgnoredSourceFieldMapper.decodeIfMatch(value, potentialFieldsInIgnoreSource);
if (result != null) {
// TODO: can't do this in case multiple entries for the same field name. (objects, arrays etc.)
// done = true;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note this doesn't work in the case that multiple ignored source entries exist for the same field. This is possible with nested objects and arrays that share the same name.

Looks like the best way to achieve is for each ignored source entry to use a unique stored field name and not reuse _ignored_source as stored field name. This shouldn't be too difficult, given that each ignored source entry is already stored separately, but just with the same name.

values.add(result);
}
}

void reset() {
values.clear();
done = false;
}

}

static boolean supports(StoredFieldsSpec spec) {
return spec.requiresSource() == false
&& spec.requiresMetadata() == false
&& spec.requiredStoredFields().size() == 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When a query requests multiple values, the ValuesFromManyReader#fieldsMoved(), will merge the multiple StoredFieldSpecs into a single spec that specifies multiple fields. This check then fails (since it expects only 1 stored field in the spec), and we fall back to the default StoredFieldLoader implementation.

&& spec.requiredStoredFields().iterator().next().startsWith(IgnoredSourceFieldMapper.NAME);
}

// TODO: use provided one
private static CheckedBiConsumer<Integer, StoredFieldVisitor, IOException> sequentialReader(LeafReaderContext ctx) throws IOException {
LeafReader leafReader = ctx.reader();
if (leafReader instanceof SequentialStoredFieldsLeafReader lf) {
return lf.getSequentialStoredFieldsReader()::document;
}
return leafReader.storedFields()::document;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ public static StoredFieldLoader fromSpec(StoredFieldsSpec spec) {
if (spec.noRequirements()) {
return StoredFieldLoader.empty();
}
if (IgnoredSourceFieldLoader.supports(spec)) {
return new IgnoredSourceFieldLoader(spec);
}
return create(spec.requiresSource(), spec.requiredStoredFields());
}

Expand Down Expand Up @@ -91,6 +94,10 @@ public static StoredFieldLoader fromSpecSequential(StoredFieldsSpec spec) {
if (spec.noRequirements()) {
return StoredFieldLoader.empty();
}
if (IgnoredSourceFieldLoader.supports(spec)) {
return new IgnoredSourceFieldLoader(spec);
}

List<String> fieldsToLoad = fieldsToLoad(spec.requiresSource(), spec.requiredStoredFields());
return new StoredFieldLoader() {
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,12 @@
public abstract class FallbackSyntheticSourceBlockLoader implements BlockLoader {
private final Reader<?> reader;
private final String fieldName;
private final Set<String> fieldPaths;

protected FallbackSyntheticSourceBlockLoader(Reader<?> reader, String fieldName) {
this.reader = reader;
this.fieldName = fieldName;
this.fieldPaths = splitIntoFieldPaths(fieldName);
}

@Override
Expand All @@ -52,12 +54,12 @@ public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws

@Override
public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException {
return new IgnoredSourceRowStrideReader<>(fieldName, reader);
return new IgnoredSourceRowStrideReader<>(fieldName, reader, fieldPaths);
}

@Override
public StoredFieldsSpec rowStrideStoredFieldSpec() {
return new StoredFieldsSpec(false, false, Set.of(IgnoredSourceFieldMapper.NAME));
return new StoredFieldsSpec(false, false, Set.of(IgnoredSourceFieldMapper.NAME + "." + fieldName));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This specified field doesn't actually exist. This is fine when we're using our custom IgnoredSourceFieldLoader, but sometimes we fall back to the default StoredFieldLoader (when we're loading more than one value from ignored source, or when we're loading some values from other stored fields).

}

@Override
Expand All @@ -70,7 +72,31 @@ public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException
throw new UnsupportedOperationException();
}

private record IgnoredSourceRowStrideReader<T>(String fieldName, Reader<T> reader) implements RowStrideReader {
public static Set<String> splitIntoFieldPaths(String fieldName) {
var paths = new HashSet<String>();
paths.add("_doc");
var current = new StringBuilder();
for (var part : fieldName.split("\\.")) {
if (current.isEmpty() == false) {
current.append('.');
}
current.append(part);
paths.add(current.toString());
}
return paths;
}

private static final class IgnoredSourceRowStrideReader<T> implements RowStrideReader {
private final String fieldName;
private final Reader<T> reader;
private final Set<String> fieldPaths;

private IgnoredSourceRowStrideReader(String fieldName, Reader<T> reader, Set<String> fieldPaths) {
this.fieldName = fieldName;
this.reader = reader;
this.fieldPaths = fieldPaths;
}

@Override
public void read(int docId, StoredFields storedFields, Builder builder) throws IOException {
var ignoredSource = storedFields.storedFields().get(IgnoredSourceFieldMapper.NAME);
Expand All @@ -80,26 +106,9 @@ public void read(int docId, StoredFields storedFields, Builder builder) throws I
}

Map<String, List<IgnoredSourceFieldMapper.NameValue>> valuesForFieldAndParents = new HashMap<>();

// Contains name of the field and all its parents
Set<String> fieldNames = new HashSet<>() {
{
add("_doc");
}
};

var current = new StringBuilder();
for (String part : fieldName.split("\\.")) {
if (current.isEmpty() == false) {
current.append('.');
}
current.append(part);
fieldNames.add(current.toString());
}

for (Object value : ignoredSource) {
IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(value);
if (fieldNames.contains(nameValue.name())) {
IgnoredSourceFieldMapper.NameValue nameValue = (IgnoredSourceFieldMapper.NameValue) value;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we fallback to the default StoredFieldLoader instead of our custom IgnoredSourceFieldLoader, this would cause a ClassCastException (except that now our visitor is filtering for _ignored_source.<field_name> instead of _ignored_source, so the ignoredSource variable is null and this whole block is skipped).

if (fieldPaths.contains(nameValue.name())) {
valuesForFieldAndParents.computeIfAbsent(nameValue.name(), k -> new ArrayList<>()).add(nameValue);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,20 @@ static NameValue decode(Object field) {
return new NameValue(name, parentOffset, value, null);
}

public static NameValue decodeIfMatch(byte[] bytes, Set<String> potentialFieldsInIgnoreSource) {
int encodedSize = ByteUtils.readIntLE(bytes, 0);
int nameSize = encodedSize % PARENT_OFFSET_IN_NAME_OFFSET;
int parentOffset = encodedSize / PARENT_OFFSET_IN_NAME_OFFSET;

String name = new String(bytes, 4, nameSize, StandardCharsets.UTF_8);
if (potentialFieldsInIgnoreSource.contains(name)) {
BytesRef value = new BytesRef(bytes, 4 + nameSize, bytes.length - nameSize - 4);
return new NameValue(name, parentOffset, value, null);
} else {
return null;
}
}

// In rare cases decoding values stored in this field can fail leading to entire source
// not being available.
// We would like to have an option to lose some values in synthetic source
Expand Down