diff --git a/server/src/main/java/org/elasticsearch/index/fieldvisitor/CustomFieldsVisitor.java b/server/src/main/java/org/elasticsearch/index/fieldvisitor/CustomFieldsVisitor.java index a7047265477e4..096d8c4af6d6e 100644 --- a/server/src/main/java/org/elasticsearch/index/fieldvisitor/CustomFieldsVisitor.java +++ b/server/src/main/java/org/elasticsearch/index/fieldvisitor/CustomFieldsVisitor.java @@ -10,6 +10,7 @@ import org.apache.lucene.index.FieldInfo; import org.elasticsearch.index.mapper.IgnoredFieldMapper; +import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper; import java.util.HashSet; import java.util.Set; @@ -50,6 +51,11 @@ public Status needsField(FieldInfo fieldInfo) { if (fields.contains(fieldInfo.name)) { return Status.YES; } + + if (IgnoredSourceFieldMapper.NAME.equals(fieldInfo.name)) { + return Status.YES; + } + return Status.NO; } } diff --git a/server/src/main/java/org/elasticsearch/index/fieldvisitor/IgnoredSourceFieldLoader.java b/server/src/main/java/org/elasticsearch/index/fieldvisitor/IgnoredSourceFieldLoader.java new file mode 100644 index 0000000000000..14b0c99113498 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/fieldvisitor/IgnoredSourceFieldLoader.java @@ -0,0 +1,142 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.fieldvisitor; + +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.StoredFieldVisitor; +import org.elasticsearch.common.CheckedBiConsumer; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader; +import org.elasticsearch.index.mapper.FallbackSyntheticSourceBlockLoader; +import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper; +import org.elasticsearch.search.fetch.StoredFieldsSpec; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +class IgnoredSourceFieldLoader extends StoredFieldLoader { + + final Set potentialFieldsInIgnoreSource; + + IgnoredSourceFieldLoader(StoredFieldsSpec spec) { + Set potentialFieldsInIgnoreSource = new HashSet<>(); + for (String requiredStoredField : spec.requiredStoredFields()) { + if (requiredStoredField.startsWith(IgnoredSourceFieldMapper.NAME)) { + String fieldName = requiredStoredField.substring(IgnoredSourceFieldMapper.NAME.length()); + potentialFieldsInIgnoreSource.addAll(FallbackSyntheticSourceBlockLoader.splitIntoFieldPaths(fieldName)); + } + } + this.potentialFieldsInIgnoreSource = potentialFieldsInIgnoreSource; + } + + @Override + public LeafStoredFieldLoader getLoader(LeafReaderContext ctx, int[] docs) throws IOException { + var reader = sequentialReader(ctx); + var visitor = new SFV(potentialFieldsInIgnoreSource); + return new LeafStoredFieldLoader() { + + private int doc = -1; + + @Override + public void advanceTo(int doc) throws IOException { + if (doc != this.doc) { + visitor.reset(); + reader.accept(doc, visitor); + this.doc = doc; + } + } + + @Override + public BytesReference source() { + return null; + } + + @Override + public String id() { + return null; + } + + @Override + public String routing() { + return null; + } + + @Override + public Map> storedFields() { + return Map.of(IgnoredSourceFieldMapper.NAME, visitor.values); + } + }; + } + + @Override + public List fieldsToLoad() { + return List.of(IgnoredSourceFieldMapper.NAME); + } + + static class SFV extends StoredFieldVisitor { + + final List values = new ArrayList<>(); + final Set potentialFieldsInIgnoreSource; + + SFV(Set potentialFieldsInIgnoreSource) { + this.potentialFieldsInIgnoreSource = potentialFieldsInIgnoreSource; + } + + @Override + public Status needsField(FieldInfo fieldInfo) throws IOException { + if (IgnoredSourceFieldMapper.NAME.equals(fieldInfo.name)) { + return Status.YES; + } else { + return Status.NO; + } + } + + @Override + public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException { + var result = IgnoredSourceFieldMapper.decodeIfMatch(value, potentialFieldsInIgnoreSource); + if (result != null) { + values.add(result); + } + } + + void reset() { + values.clear(); + } + + } + + static boolean supports(StoredFieldsSpec spec) { + if (spec.requiresSource() || spec.requiresMetadata()) { + return false; + } + + for (String fieldName : spec.requiredStoredFields()) { + if (fieldName.startsWith(IgnoredSourceFieldMapper.NAME) == false) { + return false; + } + } + return true; + } + + // TODO: use provided one + private static CheckedBiConsumer sequentialReader(LeafReaderContext ctx) throws IOException { + LeafReader leafReader = ctx.reader(); + if (leafReader instanceof SequentialStoredFieldsLeafReader lf) { + return lf.getSequentialStoredFieldsReader()::document; + } + return leafReader.storedFields()::document; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java b/server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java index a02a8da9e629e..3dfba4cfda7f8 100644 --- a/server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java +++ b/server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java @@ -50,6 +50,9 @@ public static StoredFieldLoader fromSpec(StoredFieldsSpec spec) { if (spec.noRequirements()) { return StoredFieldLoader.empty(); } + if (IgnoredSourceFieldLoader.supports(spec)) { + return new IgnoredSourceFieldLoader(spec); + } return create(spec.requiresSource(), spec.requiredStoredFields()); } @@ -91,6 +94,10 @@ public static StoredFieldLoader fromSpecSequential(StoredFieldsSpec spec) { if (spec.noRequirements()) { return StoredFieldLoader.empty(); } + if (IgnoredSourceFieldLoader.supports(spec)) { + return new IgnoredSourceFieldLoader(spec); + } + List fieldsToLoad = fieldsToLoad(spec.requiresSource(), spec.requiredStoredFields()); return new StoredFieldLoader() { @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java index c709c891e4ad4..6e4db9862ea27 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java @@ -39,10 +39,12 @@ public abstract class FallbackSyntheticSourceBlockLoader implements BlockLoader { private final Reader reader; private final String fieldName; + private final Set fieldPaths; protected FallbackSyntheticSourceBlockLoader(Reader reader, String fieldName) { this.reader = reader; this.fieldName = fieldName; + this.fieldPaths = splitIntoFieldPaths(fieldName); } @Override @@ -52,12 +54,12 @@ public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws @Override public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException { - return new IgnoredSourceRowStrideReader<>(fieldName, reader); + return new IgnoredSourceRowStrideReader<>(fieldName, reader, fieldPaths); } @Override public StoredFieldsSpec rowStrideStoredFieldSpec() { - return new StoredFieldsSpec(false, false, Set.of(IgnoredSourceFieldMapper.NAME)); + return new StoredFieldsSpec(false, false, Set.of(IgnoredSourceFieldMapper.NAME + "." + fieldName)); } @Override @@ -70,30 +72,29 @@ public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException throw new UnsupportedOperationException(); } - private static class IgnoredSourceRowStrideReader implements RowStrideReader { - // Contains name of the field and all its parents - private final Set fieldNames; + public static Set splitIntoFieldPaths(String fieldName) { + var paths = new HashSet(); + paths.add("_doc"); + var current = new StringBuilder(); + for (var part : fieldName.split("\\.")) { + if (current.isEmpty() == false) { + current.append('.'); + } + current.append(part); + paths.add(current.toString()); + } + return paths; + } + + private static final class IgnoredSourceRowStrideReader implements RowStrideReader { private final String fieldName; private final Reader reader; + private final Set fieldPaths; - IgnoredSourceRowStrideReader(String fieldName, Reader reader) { + private IgnoredSourceRowStrideReader(String fieldName, Reader reader, Set fieldPaths) { this.fieldName = fieldName; this.reader = reader; - this.fieldNames = new HashSet<>() { - { - add("_doc"); - } - }; - - var current = new StringBuilder(); - for (String part : fieldName.split("\\.")) { - if (current.isEmpty() == false) { - current.append('.'); - } - current.append(part); - fieldNames.add(current.toString()); - } - + this.fieldPaths = fieldPaths; } @Override @@ -105,10 +106,12 @@ public void read(int docId, StoredFields storedFields, Builder builder) throws I } Map> valuesForFieldAndParents = new HashMap<>(); - for (Object value : ignoredSource) { - IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(value); - if (fieldNames.contains(nameValue.name())) { + IgnoredSourceFieldMapper.NameValue nameValue = (value instanceof IgnoredSourceFieldMapper.NameValue nVal) + ? nVal + : IgnoredSourceFieldMapper.decode(value); + + if (fieldPaths.contains(nameValue.name())) { valuesForFieldAndParents.computeIfAbsent(nameValue.name(), k -> new ArrayList<>()).add(nameValue); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java index d8d8200baac31..4db97ee52242a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java @@ -186,6 +186,20 @@ static NameValue decode(Object field) { return new NameValue(name, parentOffset, value, null); } + public static NameValue decodeIfMatch(byte[] bytes, Set potentialFieldsInIgnoreSource) { + int encodedSize = ByteUtils.readIntLE(bytes, 0); + int nameSize = encodedSize % PARENT_OFFSET_IN_NAME_OFFSET; + int parentOffset = encodedSize / PARENT_OFFSET_IN_NAME_OFFSET; + + String name = new String(bytes, 4, nameSize, StandardCharsets.UTF_8); + if (potentialFieldsInIgnoreSource.contains(name)) { + BytesRef value = new BytesRef(bytes, 4 + nameSize, bytes.length - nameSize - 4); + return new NameValue(name, parentOffset, value, null); + } else { + return null; + } + } + // In rare cases decoding values stored in this field can fail leading to entire source // not being available. // We would like to have an option to lose some values in synthetic source diff --git a/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/53_esql_synthetic_source_mixed_disabled_fields.yml b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/53_esql_synthetic_source_mixed_disabled_fields.yml new file mode 100644 index 0000000000000..49a2a05154fdc --- /dev/null +++ b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/53_esql_synthetic_source_mixed_disabled_fields.yml @@ -0,0 +1,129 @@ +--- +setup: + - requires: + test_runner_features: allowed_warnings_regex + + - do: + indices.create: + index: my-index + body: + settings: + index: + mode: logsdb + mappings: + properties: + "@timestamp": + type: date + host.name: + type: keyword + agent_id: + type: keyword + doc_values: false + store: false + process_id: + type: integer + doc_values: false + store: false + http_method: + type: keyword + doc_values: false + store: true + is_https: + type: boolean + doc_values: false + store: false + location: + type: geo_point + doc_values: false + store: false + message: + type: text + store: false + fields: + raw: + type: keyword + + - do: + bulk: + index: my-index + refresh: true + body: + - { "index": { } } + - { "@timestamp": "2024-02-12T10:30:00Z", "host.name": "foo", "agent_id": "darth-vader", "process_id": 101, "http_method": "GET", "is_https": false, "location": { "lat": 40.7128, "lon": -74.0060 }, "message": "No, I am your father." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:31:00Z", "host.name": "bar", "agent_id": "yoda", "process_id": 102, "http_method": "PUT", "is_https": false, "location": { "lat": 40.7128, "lon": -74.0060 }, "message": "Do. Or do not. There is no try." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:32:00Z", "host.name": "foo", "agent_id": "obi-wan", "process_id": 103, "http_method": "GET", "is_https": false, "location": { "lat": 40.7128, "lon": -74.0060 }, "message": "May the force be with you." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:33:00Z", "host.name": "baz", "agent_id": "darth-vader", "process_id": 102, "http_method": "POST", "is_https": true, "location": { "lat": 40.7128, "lon": -74.0060 }, "message": "I find your lack of faith disturbing." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:34:00Z", "host.name": "baz", "agent_id": "yoda", "process_id": 104, "http_method": "POST", "is_https": false, "location": { "lat": 40.7128, "lon": -74.0060 }, "message": "Wars not make one great." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:35:00Z", "host.name": "foo", "agent_id": "obi-wan", "process_id": 105, "http_method": "GET", "is_https": false, "location": { "lat": 40.7128, "lon": -74.0060 }, "message": "That's no moon. It's a space station." } + +--- +teardown: + - do: + indices.delete: + index: my-index + +--- +"Simple from": + - do: + esql.query: + body: + query: 'FROM my-index | SORT host.name, @timestamp | LIMIT 1' + + - match: { columns.0.name: "@timestamp" } + - match: { columns.0.type: "date" } + - match: { columns.1.name: "agent_id" } + - match: { columns.1.type: "keyword" } + - match: { columns.2.name: "host.name" } + - match: { columns.2.type: "keyword" } + - match: { columns.3.name: "http_method" } + - match: { columns.3.type: "keyword" } + - match: { columns.4.name: "is_https" } + - match: { columns.4.type: "boolean" } + - match: { columns.5.name: "location" } + - match: { columns.5.type: "geo_point" } + - match: { columns.6.name: "message" } + - match: { columns.6.type: "text" } + - match: { columns.7.name: "message.raw" } + - match: { columns.7.type: "keyword" } + - match: { columns.8.name: "process_id" } + - match: { columns.8.type: "integer" } + + - match: { values.0.0: "2024-02-12T10:31:00.000Z" } + - match: { values.0.1: "yoda" } + - match: { values.0.2: "bar" } + - match: { values.0.3: "PUT" } + - match: { values.0.4: false } + - match: { values.0.5: "POINT (-74.006 40.7128)" } + - match: { values.0.6: "Do. Or do not. There is no try." } + - match: { values.0.7: "Do. Or do not. There is no try." } + - match: { values.0.8: 102 } + +--- +"Simple from keyword fields": + - do: + esql.query: + body: + query: 'FROM my-index | SORT host.name, @timestamp | KEEP agent_id, http_method | LIMIT 10' + + - match: { columns.0.name: "agent_id" } + - match: { columns.0.type: "keyword" } + - match: { columns.1.name: "http_method" } + - match: { columns.1.type: "keyword" } + + - match: { values.0.0: "yoda" } + - match: { values.0.1: "PUT" } + - match: { values.1.0: "darth-vader" } + - match: { values.1.1: "POST" } + - match: { values.2.0: "yoda" } + - match: { values.2.1: "POST" } + - match: { values.3.0: "darth-vader" } + - match: { values.3.1: "GET" } + - match: { values.4.0: "obi-wan" } + - match: { values.4.1: "GET" } + - match: { values.5.0: "obi-wan" } + - match: { values.5.1: "GET" }