diff --git a/server/src/main/java/org/elasticsearch/index/fieldvisitor/IgnoredSourceFieldLoader.java b/server/src/main/java/org/elasticsearch/index/fieldvisitor/IgnoredSourceFieldLoader.java new file mode 100644 index 0000000000000..41b6b4daa49eb --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/fieldvisitor/IgnoredSourceFieldLoader.java @@ -0,0 +1,142 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.fieldvisitor; + +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.StoredFieldVisitor; +import org.elasticsearch.common.CheckedBiConsumer; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader; +import org.elasticsearch.index.mapper.FallbackSyntheticSourceBlockLoader; +import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper; +import org.elasticsearch.search.fetch.StoredFieldsSpec; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +class IgnoredSourceFieldLoader extends StoredFieldLoader { + + final Set potentialFieldsInIgnoreSource; + + IgnoredSourceFieldLoader(StoredFieldsSpec spec) { + Set potentialFieldsInIgnoreSource = new HashSet<>(); + for (String requiredStoredField : spec.requiredStoredFields()) { + if (requiredStoredField.startsWith(IgnoredSourceFieldMapper.NAME)) { + String fieldName = requiredStoredField.substring(IgnoredSourceFieldMapper.NAME.length()); + potentialFieldsInIgnoreSource.addAll(FallbackSyntheticSourceBlockLoader.splitIntoFieldPaths(fieldName)); + } + } + this.potentialFieldsInIgnoreSource = potentialFieldsInIgnoreSource; + } + + @Override + public LeafStoredFieldLoader getLoader(LeafReaderContext ctx, int[] docs) throws IOException { + var reader = sequentialReader(ctx); + var visitor = new SFV(potentialFieldsInIgnoreSource); + return new LeafStoredFieldLoader() { + + private int doc = -1; + + @Override + public void advanceTo(int doc) throws IOException { + if (doc != this.doc) { + visitor.reset(); + reader.accept(doc, visitor); + this.doc = doc; + } + } + + @Override + public BytesReference source() { + return null; + } + + @Override + public String id() { + return null; + } + + @Override + public String routing() { + return null; + } + + @Override + public Map> storedFields() { + return Map.of(IgnoredSourceFieldMapper.NAME, visitor.values); + } + }; + } + + @Override + public List fieldsToLoad() { + return List.of(IgnoredSourceFieldMapper.NAME); + } + + static class SFV extends StoredFieldVisitor { + + boolean done; + final List values = new ArrayList<>(); + final Set potentialFieldsInIgnoreSource; + + SFV(Set potentialFieldsInIgnoreSource) { + this.potentialFieldsInIgnoreSource = potentialFieldsInIgnoreSource; + } + + @Override + public Status needsField(FieldInfo fieldInfo) throws IOException { + if (done) { + return Status.STOP; + } else if (IgnoredSourceFieldMapper.NAME.equals(fieldInfo.name)) { + return Status.YES; + } else { + return Status.NO; + } + } + + @Override + public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException { + var result = IgnoredSourceFieldMapper.decodeIfMatch(value, potentialFieldsInIgnoreSource); + if (result != null) { + // TODO: can't do this in case multiple entries for the same field name. (objects, arrays etc.) + // done = true; + values.add(result); + } + } + + void reset() { + values.clear(); + done = false; + } + + } + + static boolean supports(StoredFieldsSpec spec) { + return spec.requiresSource() == false + && spec.requiresMetadata() == false + && spec.requiredStoredFields().size() == 1 + && spec.requiredStoredFields().iterator().next().startsWith(IgnoredSourceFieldMapper.NAME); + } + + // TODO: use provided one + private static CheckedBiConsumer sequentialReader(LeafReaderContext ctx) throws IOException { + LeafReader leafReader = ctx.reader(); + if (leafReader instanceof SequentialStoredFieldsLeafReader lf) { + return lf.getSequentialStoredFieldsReader()::document; + } + return leafReader.storedFields()::document; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java b/server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java index a02a8da9e629e..3dfba4cfda7f8 100644 --- a/server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java +++ b/server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java @@ -50,6 +50,9 @@ public static StoredFieldLoader fromSpec(StoredFieldsSpec spec) { if (spec.noRequirements()) { return StoredFieldLoader.empty(); } + if (IgnoredSourceFieldLoader.supports(spec)) { + return new IgnoredSourceFieldLoader(spec); + } return create(spec.requiresSource(), spec.requiredStoredFields()); } @@ -91,6 +94,10 @@ public static StoredFieldLoader fromSpecSequential(StoredFieldsSpec spec) { if (spec.noRequirements()) { return StoredFieldLoader.empty(); } + if (IgnoredSourceFieldLoader.supports(spec)) { + return new IgnoredSourceFieldLoader(spec); + } + List fieldsToLoad = fieldsToLoad(spec.requiresSource(), spec.requiredStoredFields()); return new StoredFieldLoader() { @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java index db63a4443f847..4b3d0575a0a8d 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java @@ -39,10 +39,12 @@ public abstract class FallbackSyntheticSourceBlockLoader implements BlockLoader { private final Reader reader; private final String fieldName; + private final Set fieldPaths; protected FallbackSyntheticSourceBlockLoader(Reader reader, String fieldName) { this.reader = reader; this.fieldName = fieldName; + this.fieldPaths = splitIntoFieldPaths(fieldName); } @Override @@ -52,12 +54,12 @@ public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws @Override public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException { - return new IgnoredSourceRowStrideReader<>(fieldName, reader); + return new IgnoredSourceRowStrideReader<>(fieldName, reader, fieldPaths); } @Override public StoredFieldsSpec rowStrideStoredFieldSpec() { - return new StoredFieldsSpec(false, false, Set.of(IgnoredSourceFieldMapper.NAME)); + return new StoredFieldsSpec(false, false, Set.of(IgnoredSourceFieldMapper.NAME + "." + fieldName)); } @Override @@ -70,7 +72,31 @@ public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException throw new UnsupportedOperationException(); } - private record IgnoredSourceRowStrideReader(String fieldName, Reader reader) implements RowStrideReader { + public static Set splitIntoFieldPaths(String fieldName) { + var paths = new HashSet(); + paths.add("_doc"); + var current = new StringBuilder(); + for (var part : fieldName.split("\\.")) { + if (current.isEmpty() == false) { + current.append('.'); + } + current.append(part); + paths.add(current.toString()); + } + return paths; + } + + private static final class IgnoredSourceRowStrideReader implements RowStrideReader { + private final String fieldName; + private final Reader reader; + private final Set fieldPaths; + + private IgnoredSourceRowStrideReader(String fieldName, Reader reader, Set fieldPaths) { + this.fieldName = fieldName; + this.reader = reader; + this.fieldPaths = fieldPaths; + } + @Override public void read(int docId, StoredFields storedFields, Builder builder) throws IOException { var ignoredSource = storedFields.storedFields().get(IgnoredSourceFieldMapper.NAME); @@ -80,26 +106,9 @@ public void read(int docId, StoredFields storedFields, Builder builder) throws I } Map> valuesForFieldAndParents = new HashMap<>(); - - // Contains name of the field and all its parents - Set fieldNames = new HashSet<>() { - { - add("_doc"); - } - }; - - var current = new StringBuilder(); - for (String part : fieldName.split("\\.")) { - if (current.isEmpty() == false) { - current.append('.'); - } - current.append(part); - fieldNames.add(current.toString()); - } - for (Object value : ignoredSource) { - IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(value); - if (fieldNames.contains(nameValue.name())) { + IgnoredSourceFieldMapper.NameValue nameValue = (IgnoredSourceFieldMapper.NameValue) value; + if (fieldPaths.contains(nameValue.name())) { valuesForFieldAndParents.computeIfAbsent(nameValue.name(), k -> new ArrayList<>()).add(nameValue); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java index d8d8200baac31..4db97ee52242a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java @@ -186,6 +186,20 @@ static NameValue decode(Object field) { return new NameValue(name, parentOffset, value, null); } + public static NameValue decodeIfMatch(byte[] bytes, Set potentialFieldsInIgnoreSource) { + int encodedSize = ByteUtils.readIntLE(bytes, 0); + int nameSize = encodedSize % PARENT_OFFSET_IN_NAME_OFFSET; + int parentOffset = encodedSize / PARENT_OFFSET_IN_NAME_OFFSET; + + String name = new String(bytes, 4, nameSize, StandardCharsets.UTF_8); + if (potentialFieldsInIgnoreSource.contains(name)) { + BytesRef value = new BytesRef(bytes, 4 + nameSize, bytes.length - nameSize - 4); + return new NameValue(name, parentOffset, value, null); + } else { + return null; + } + } + // In rare cases decoding values stored in this field can fail leading to entire source // not being available. // We would like to have an option to lose some values in synthetic source