Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.fieldvisitor;

import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.index.mapper.BlockLoader;
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

class IgnoredSourceFieldLoader extends StoredFieldLoader {
private final boolean forceSequentialReader;
private final Map<String, Set<String>> potentialFieldsInIgnoreSource;
private final Set<String> fieldNames;

IgnoredSourceFieldLoader(BlockLoader.FieldsSpec spec, boolean forceSequentialReader) {
fieldNames = new HashSet<>(spec.ignoredFieldsSpec().requiredIgnoredFields());
this.forceSequentialReader = forceSequentialReader;

HashMap<String, Set<String>> potentialFieldsInIgnoreSource = new HashMap<>();
for (String requiredIgnoredField : spec.ignoredFieldsSpec().requiredIgnoredFields()) {
for (String potentialStoredField : spec.ignoredFieldsSpec().format().requiredStoredFields(requiredIgnoredField)) {
potentialFieldsInIgnoreSource.computeIfAbsent(potentialStoredField, k -> new HashSet<>()).add(requiredIgnoredField);
}
}
Comment on lines +40 to +44
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hoping that we can optimize/simplify in a follow up change. This logic is now required for fields with dots and then we don't know which field part there is actually a stored field for.

Ideally we would know based from the mapping what stored field we need. For example if attributes.host.ip is requested and no attributes field is mapped, then the stored field should be _ignored_source.attributes. If attributes is mapped, but host isn't then the required stored field to load would be _ignored_source.attributes.host.

I also think requiredStoredFields(...) also always include the _doc field (via FallbackSyntheticSourceBlockLoader.splitIntoFieldPaths(...))?

Would be great if we need less maps and no sets in SFV.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, this can probably be optimized by using the mapping to figure out which stored field contains the value instead of just looking at all possible parent stored fields. But let's leave that as a follow-up.

this.potentialFieldsInIgnoreSource = potentialFieldsInIgnoreSource;
}

@Override
public LeafStoredFieldLoader getLoader(LeafReaderContext ctx, int[] docs) throws IOException {
var reader = forceSequentialReader ? sequentialReader(ctx) : reader(ctx, docs);
var visitor = new SFV(fieldNames, potentialFieldsInIgnoreSource);
return new LeafStoredFieldLoader() {

private int doc = -1;

@Override
public void advanceTo(int doc) throws IOException {
if (doc != this.doc) {
visitor.reset();
reader.accept(doc, visitor);
this.doc = doc;
}
}

@Override
public BytesReference source() {
assert false : "source() is not supported by IgnoredSourceFieldLoader";
return null;
}

@Override
public String id() {
assert false : "id() is not supported by IgnoredSourceFieldLoader";
return null;
}

@Override
public String routing() {
assert false : "routing() is not supported by IgnoredSourceFieldLoader";
return null;
}

@Override
public Map<String, List<Object>> storedFields() {
return visitor.values;
}
};
}

@Override
public List<String> fieldsToLoad() {
return potentialFieldsInIgnoreSource.keySet().stream().toList();
}

static class SFV extends StoredFieldVisitor {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the future we can perhaps explore an implementation that is tuned for just fetching one stored field.

final Map<String, List<Object>> values = new HashMap<>();
final Set<String> fieldNames;
private final Set<String> unvisitedFields;
final Map<String, Set<String>> potentialFieldsInIgnoreSource;

SFV(Set<String> fieldNames, Map<String, Set<String>> potentialFieldsInIgnoreSource) {
this.fieldNames = fieldNames;
this.unvisitedFields = new HashSet<>(fieldNames);
this.potentialFieldsInIgnoreSource = potentialFieldsInIgnoreSource;
}

@Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
if (unvisitedFields.isEmpty()) {
return Status.STOP;
}

Set<String> foundFields = potentialFieldsInIgnoreSource.get(fieldInfo.name);
if (foundFields == null) {
return Status.NO;
}

unvisitedFields.removeAll(foundFields);
return Status.YES;
}

@Override
public void binaryField(FieldInfo fieldInfo, byte[] value) {
values.computeIfAbsent(fieldInfo.name, k -> new ArrayList<>()).add(new BytesRef(value));
}

void reset() {
values.clear();
unvisitedFields.addAll(fieldNames);
}

}

static boolean supports(BlockLoader.FieldsSpec spec) {
return spec.storedFieldsSpec().noRequirements()
&& spec.ignoredFieldsSpec().format() == IgnoredSourceFieldMapper.IgnoredSourceFormat.PER_FIELD_IGNORED_SOURCE;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.StoredFields;
import org.elasticsearch.common.CheckedBiConsumer;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
import org.elasticsearch.index.mapper.BlockLoader;
import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper;
import org.elasticsearch.search.fetch.StoredFieldsSpec;

Expand Down Expand Up @@ -55,6 +57,26 @@ public static StoredFieldLoader fromSpec(StoredFieldsSpec spec) {
return create(spec.requiresSource(), spec.requiredStoredFields());
}

/**
* Crates a new StoredFieldLaoader using a BlockLoader.FieldsSpec
*/
public static StoredFieldLoader fromSpec(BlockLoader.FieldsSpec spec, boolean forceSequentialReader) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a unit test that tests whether the ignored source field loader is loaded when expected? Maybe we can add a test to BlockSourceReaderTests and then also test the produced block? I think current tests in BlockSourceReaderTests don't test ignored source?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or maybe adding a test to IgnoredSourceFieldMapperTests? But I don't see any block related tests there.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I added StoredFieldLoaderTests and IgnoredFieldLoaderTests

if (spec.noRequirements()) {
return StoredFieldLoader.empty();
}

if (IgnoredSourceFieldLoader.supports(spec)) {
return new IgnoredSourceFieldLoader(spec, forceSequentialReader);
}

StoredFieldsSpec mergedSpec = spec.storedFieldsSpec().merge(spec.ignoredFieldsSpec().requiredStoredFields());
if (forceSequentialReader) {
return fromSpecSequential(mergedSpec);
} else {
return fromSpec(mergedSpec);
}
}

public static StoredFieldLoader create(boolean loadSource, Set<String> fields) {
return create(loadSource, fields, false);
}
Expand Down Expand Up @@ -141,7 +163,8 @@ public List<String> fieldsToLoad() {
};
}

private static CheckedBiConsumer<Integer, FieldsVisitor, IOException> reader(LeafReaderContext ctx, int[] docs) throws IOException {
protected static CheckedBiConsumer<Integer, StoredFieldVisitor, IOException> reader(LeafReaderContext ctx, int[] docs)
throws IOException {
LeafReader leafReader = ctx.reader();
if (docs != null && docs.length > 10 && hasSequentialDocs(docs)) {
return sequentialReader(ctx);
Expand All @@ -150,7 +173,8 @@ private static CheckedBiConsumer<Integer, FieldsVisitor, IOException> reader(Lea
return storedFields::document;
}

private static CheckedBiConsumer<Integer, FieldsVisitor, IOException> sequentialReader(LeafReaderContext ctx) throws IOException {
protected static CheckedBiConsumer<Integer, StoredFieldVisitor, IOException> sequentialReader(LeafReaderContext ctx)
throws IOException {
LeafReader leafReader = ctx.reader();
if (leafReader instanceof SequentialStoredFieldsLeafReader lf) {
return lf.getSequentialStoredFieldsReader()::document;
Expand Down Expand Up @@ -201,7 +225,7 @@ public Map<String, List<Object>> storedFields() {

private static class ReaderStoredFieldLoader implements LeafStoredFieldLoader {

private final CheckedBiConsumer<Integer, FieldsVisitor, IOException> reader;
private final CheckedBiConsumer<Integer, StoredFieldVisitor, IOException> reader;
private final CustomFieldsVisitor visitor;
private int doc = -1;

Expand All @@ -221,7 +245,11 @@ public Status needsField(FieldInfo fieldInfo) {
return new CustomFieldsVisitor(fields, loadSource);
}

ReaderStoredFieldLoader(CheckedBiConsumer<Integer, FieldsVisitor, IOException> reader, boolean loadSource, Set<String> fields) {
ReaderStoredFieldLoader(
CheckedBiConsumer<Integer, StoredFieldVisitor, IOException> reader,
boolean loadSource,
Set<String> fields
) {
this.reader = reader;
this.visitor = getFieldsVisitor(fields, loadSource);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.ElementType;
import org.elasticsearch.index.mapper.vectors.VectorEncoderDecoder;
import org.elasticsearch.search.fetch.StoredFieldsSpec;

import java.io.IOException;

Expand Down Expand Up @@ -76,8 +75,8 @@ public final RowStrideReader rowStrideReader(LeafReaderContext context) throws I
}

@Override
public final StoredFieldsSpec rowStrideStoredFieldSpec() {
return StoredFieldsSpec.NO_REQUIREMENTS;
public final FieldsSpec rowStrideFieldSpec() {
return FieldsSpec.NO_REQUIREMENTS;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,26 @@ interface StoredFields {

RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException;

StoredFieldsSpec rowStrideStoredFieldSpec();
record FieldsSpec(StoredFieldsSpec storedFieldsSpec, IgnoredFieldsSpec ignoredFieldsSpec) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to extend StoredFieldsSpec to know about ignored fields, instead of having this new abstraction on top? I think that would make a lot of the logic here easier to follow.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was reluctant to do so because StoredFieldsSpec is used in a few other places, and I only need the IgnoredFieldsSpec in the ESQL blockloader context.

Copy link
Member

@martijnvg martijnvg Aug 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm also leaning towards keeping FieldsSpec in this PR.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After taking a second look at this, if we're able to add IgnoredFieldsSpec to StoredFieldsSpec and make it optional then I think it would reduce the size of this PR? By adding a constructor to StoredFieldSpec:

public StoredFieldsSpec(boolean requiresSource, boolean requiresMetadata, Set<String> requiredStoredFields) {
        this(requiresSource, requiresMetadata, requiredStoredFields, IgnoredFieldsSpec.NONE);
    }

(the StoredFieldsSpec#merge(...) method also needs to be updated, but from there we can delegate to IgnoredFieldsSpec)

StoredFieldSpec is used in fetch phase and value fetchers, but I think we don't need to make any changes in these areas of the code base.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I moved IgnoredFieldsSpec into StoredFieldsSpec in 9fb1125

public static FieldsSpec NO_REQUIREMENTS = new FieldsSpec(StoredFieldsSpec.NO_REQUIREMENTS, IgnoredFieldsSpec.NONE);

public FieldsSpec merge(FieldsSpec other) {
return new FieldsSpec(
this.storedFieldsSpec.merge(other.storedFieldsSpec),
this.ignoredFieldsSpec.merge(other.ignoredFieldsSpec)
);
}

public FieldsSpec merge(StoredFieldsSpec other) {
return new FieldsSpec(this.storedFieldsSpec.merge(other), this.ignoredFieldsSpec);
}

public boolean noRequirements() {
return storedFieldsSpec.noRequirements() && ignoredFieldsSpec.requiredIgnoredFields().isEmpty();
}
}

FieldsSpec rowStrideFieldSpec();

/**
* Does this loader support loading bytes via calling {@link #ordinals}.
Expand Down Expand Up @@ -140,8 +159,8 @@ public RowStrideReader rowStrideReader(LeafReaderContext context) {
}

@Override
public StoredFieldsSpec rowStrideStoredFieldSpec() {
return StoredFieldsSpec.NO_REQUIREMENTS;
public FieldsSpec rowStrideFieldSpec() {
return FieldsSpec.NO_REQUIREMENTS;
}

@Override
Expand Down Expand Up @@ -237,8 +256,8 @@ public String toString() {
}

@Override
public StoredFieldsSpec rowStrideStoredFieldSpec() {
return StoredFieldsSpec.NO_REQUIREMENTS;
public FieldsSpec rowStrideFieldSpec() {
return FieldsSpec.NO_REQUIREMENTS;
}

@Override
Expand Down Expand Up @@ -319,8 +338,8 @@ public String toString() {
}

@Override
public StoredFieldsSpec rowStrideStoredFieldSpec() {
return delegate.rowStrideStoredFieldSpec();
public FieldsSpec rowStrideFieldSpec() {
return delegate.rowStrideFieldSpec();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ public final ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context)
}

@Override
public final StoredFieldsSpec rowStrideStoredFieldSpec() {
return StoredFieldsSpec.NEEDS_SOURCE;
public final FieldsSpec rowStrideFieldSpec() {
return new FieldsSpec(StoredFieldsSpec.NEEDS_SOURCE, IgnoredFieldsSpec.NONE);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ public final ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context)
}

@Override
public final StoredFieldsSpec rowStrideStoredFieldSpec() {
return new StoredFieldsSpec(false, false, Set.of(field));
public final FieldsSpec rowStrideFieldSpec() {
return new FieldsSpec(new StoredFieldsSpec(false, false, Set.of(field)), IgnoredFieldsSpec.NONE);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import java.util.Optional;
import java.util.Set;
import java.util.Stack;
import java.util.stream.Collectors;

/**
* Block loader for fields that use fallback synthetic source implementation.
Expand Down Expand Up @@ -65,15 +64,8 @@ public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOExcep
}

@Override
public StoredFieldsSpec rowStrideStoredFieldSpec() {
Set<String> ignoredFieldNames;
if (ignoredSourceFormat == IgnoredSourceFieldMapper.IgnoredSourceFormat.PER_FIELD_IGNORED_SOURCE) {
ignoredFieldNames = fieldPaths.stream().map(IgnoredSourceFieldMapper::ignoredFieldName).collect(Collectors.toSet());
} else {
ignoredFieldNames = Set.of(IgnoredSourceFieldMapper.NAME);
}

return new StoredFieldsSpec(false, false, ignoredFieldNames);
public FieldsSpec rowStrideFieldSpec() {
return new FieldsSpec(StoredFieldsSpec.NO_REQUIREMENTS, new IgnoredFieldsSpec(Set.of(fieldName), ignoredSourceFormat));
}

@Override
Expand Down
Loading