diff --git a/libs/tdigest/build.gradle b/libs/tdigest/build.gradle index 47fc0dbc239cf..6cdd3bb12ba30 100644 --- a/libs/tdigest/build.gradle +++ b/libs/tdigest/build.gradle @@ -23,6 +23,7 @@ apply plugin: 'elasticsearch.publish' dependencies { api project(':libs:core') + api project(':libs:x-content') api "org.apache.lucene:lucene-core:${versions.lucene}" testImplementation(project(":test:framework")) { diff --git a/libs/tdigest/src/main/java/module-info.java b/libs/tdigest/src/main/java/module-info.java index 79ddbe88ab3d3..beae047e0d777 100644 --- a/libs/tdigest/src/main/java/module-info.java +++ b/libs/tdigest/src/main/java/module-info.java @@ -20,7 +20,9 @@ module org.elasticsearch.tdigest { requires org.elasticsearch.base; requires org.apache.lucene.core; + requires org.elasticsearch.xcontent; exports org.elasticsearch.tdigest; exports org.elasticsearch.tdigest.arrays; + exports org.elasticsearch.tdigest.parsing; } diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java b/libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/TDigestParser.java similarity index 69% rename from x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java rename to libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/TDigestParser.java index 2b8bdb7bf6613..ccdee289cf523 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestParser.java +++ b/libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/TDigestParser.java @@ -1,28 +1,29 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.xpack.analytics.mapper; +package org.elasticsearch.tdigest.parsing; -import org.elasticsearch.index.mapper.DocumentParsingException; import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.XContentLocation; import org.elasticsearch.xcontent.XContentParser; import java.io.IOException; import java.util.ArrayList; import java.util.List; - -import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; -import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.CENTROIDS_NAME; -import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.COUNTS_NAME; -import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.MAX_FIELD_NAME; -import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.MIN_FIELD_NAME; -import static org.elasticsearch.xpack.analytics.mapper.TDigestFieldMapper.SUM_FIELD_NAME; +import java.util.function.BiFunction; public class TDigestParser { + public static final String CENTROIDS_NAME = "centroids"; + public static final String COUNTS_NAME = "counts"; + public static final String SUM_FIELD_NAME = "sum"; + public static final String MIN_FIELD_NAME = "min"; + public static final String MAX_FIELD_NAME = "max"; private static final ParseField COUNTS_FIELD = new ParseField(COUNTS_NAME); private static final ParseField CENTROIDS_FIELD = new ParseField(CENTROIDS_NAME); @@ -91,9 +92,15 @@ public Long count() { * * @param mappedFieldName the name of the field being parsed, used for error messages * @param parser the parser to use + * @param documentParsingExceptionProvider factory function for generating document parsing exceptions. Required for visibility. * @return the parsed histogram */ - public static ParsedTDigest parse(String mappedFieldName, XContentParser parser) throws IOException { + public static ParsedTDigest parse( + String mappedFieldName, + XContentParser parser, + BiFunction documentParsingExceptionProvider, + ParsingExceptionProvider parsingExceptionProvider + ) throws IOException { ArrayList centroids = null; ArrayList counts = null; Double sum = null; @@ -102,26 +109,26 @@ public static ParsedTDigest parse(String mappedFieldName, XContentParser parser) XContentParser.Token token = parser.currentToken(); while (token != XContentParser.Token.END_OBJECT) { // should be a field - ensureExpectedToken(XContentParser.Token.FIELD_NAME, token, parser); + ensureExpectedToken(XContentParser.Token.FIELD_NAME, token, parser, parsingExceptionProvider); String fieldName = parser.currentName(); if (fieldName.equals(CENTROIDS_FIELD.getPreferredName())) { - centroids = getCentroids(mappedFieldName, parser); + centroids = getCentroids(mappedFieldName, parser, documentParsingExceptionProvider, parsingExceptionProvider); } else if (fieldName.equals(COUNTS_FIELD.getPreferredName())) { - counts = getCounts(mappedFieldName, parser); + counts = getCounts(mappedFieldName, parser, documentParsingExceptionProvider, parsingExceptionProvider); } else if (fieldName.equals(SUM_FIELD.getPreferredName())) { token = parser.nextToken(); - ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser, parsingExceptionProvider); sum = parser.doubleValue(); } else if (fieldName.equals(MIN_FIELD.getPreferredName())) { token = parser.nextToken(); - ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser, parsingExceptionProvider); min = parser.doubleValue(); } else if (fieldName.equals(MAX_FIELD.getPreferredName())) { token = parser.nextToken(); - ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser, parsingExceptionProvider); max = parser.doubleValue(); } else { - throw new DocumentParsingException( + throw documentParsingExceptionProvider.apply( parser.getTokenLocation(), "error parsing field [" + mappedFieldName + "], with unknown parameter [" + fieldName + "]" ); @@ -129,19 +136,19 @@ public static ParsedTDigest parse(String mappedFieldName, XContentParser parser) token = parser.nextToken(); } if (centroids == null) { - throw new DocumentParsingException( + throw documentParsingExceptionProvider.apply( parser.getTokenLocation(), "error parsing field [" + mappedFieldName + "], expected field called [" + CENTROIDS_FIELD.getPreferredName() + "]" ); } if (counts == null) { - throw new DocumentParsingException( + throw documentParsingExceptionProvider.apply( parser.getTokenLocation(), "error parsing field [" + mappedFieldName + "], expected field called [" + COUNTS_FIELD.getPreferredName() + "]" ); } if (centroids.size() != counts.size()) { - throw new DocumentParsingException( + throw documentParsingExceptionProvider.apply( parser.getTokenLocation(), "error parsing field [" + mappedFieldName @@ -165,20 +172,25 @@ public static ParsedTDigest parse(String mappedFieldName, XContentParser parser) return new ParsedTDigest(centroids, counts, sum, min, max); } - private static ArrayList getCounts(String mappedFieldName, XContentParser parser) throws IOException { + private static ArrayList getCounts( + String mappedFieldName, + XContentParser parser, + BiFunction documentParsingExceptionProvider, + ParsingExceptionProvider parsingExceptionProvider + ) throws IOException { ArrayList counts; XContentParser.Token token; token = parser.nextToken(); // should be an array - ensureExpectedToken(XContentParser.Token.START_ARRAY, token, parser); + ensureExpectedToken(XContentParser.Token.START_ARRAY, token, parser, parsingExceptionProvider); counts = new ArrayList<>(); token = parser.nextToken(); while (token != XContentParser.Token.END_ARRAY) { // should be a number - ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser, parsingExceptionProvider); long count = parser.longValue(); if (count < 0) { - throw new DocumentParsingException( + throw documentParsingExceptionProvider.apply( parser.getTokenLocation(), "error parsing field [" + mappedFieldName + "], [" + COUNTS_FIELD + "] elements must be >= 0 but got " + count ); @@ -189,22 +201,27 @@ private static ArrayList getCounts(String mappedFieldName, XContentParser return counts; } - private static ArrayList getCentroids(String mappedFieldName, XContentParser parser) throws IOException { + private static ArrayList getCentroids( + String mappedFieldName, + XContentParser parser, + BiFunction documentParsingExceptionProvider, + ParsingExceptionProvider parsingExceptionProvider + ) throws IOException { XContentParser.Token token; ArrayList centroids; token = parser.nextToken(); // should be an array - ensureExpectedToken(XContentParser.Token.START_ARRAY, token, parser); + ensureExpectedToken(XContentParser.Token.START_ARRAY, token, parser, parsingExceptionProvider); centroids = new ArrayList<>(); token = parser.nextToken(); double previousVal = -Double.MAX_VALUE; while (token != XContentParser.Token.END_ARRAY) { // should be a number - ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser); + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser, parsingExceptionProvider); double val = parser.doubleValue(); if (val < previousVal) { // centroids must be in increasing order - throw new DocumentParsingException( + throw documentParsingExceptionProvider.apply( parser.getTokenLocation(), "error parsing field [" + mappedFieldName @@ -224,4 +241,23 @@ private static ArrayList getCentroids(String mappedFieldName, XContentPa return centroids; } + /** + * Interface for throwing a parsing exception, needed for visibility + */ + @FunctionalInterface + public interface ParsingExceptionProvider { + RuntimeException apply(XContentParser parser, XContentParser.Token expected, XContentParser.Token actual) throws IOException; + } + + public static void ensureExpectedToken( + XContentParser.Token expected, + XContentParser.Token actual, + XContentParser parser, + ParsingExceptionProvider parsingExceptionProvider + ) throws IOException { + if (actual != expected) { + throw parsingExceptionProvider.apply(parser, expected, actual); + } + } + } diff --git a/libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/package-info.java b/libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/package-info.java new file mode 100644 index 0000000000000..3ddf75a66f584 --- /dev/null +++ b/libs/tdigest/src/main/java/org/elasticsearch/tdigest/parsing/package-info.java @@ -0,0 +1,15 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +/** + * Parsing package contains Elasticsearch specific classes for serializing and deserializing + * t-digests from various formats via Elasticsearch's XContent abstraction layer. + */ + +package org.elasticsearch.tdigest.parsing; diff --git a/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java b/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java index 6390e62f9758f..48ef941c60f4d 100644 --- a/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java +++ b/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java @@ -86,7 +86,7 @@ public static void expectValueToken(Token actual, XContentParser parser) { } } - private static ParsingException parsingException(XContentParser parser, Token expected, Token actual) { + public static ParsingException parsingException(XContentParser parser, Token expected, Token actual) { return new ParsingException( parser.getTokenLocation(), String.format(Locale.ROOT, "Failed to parse object: expecting token of type [%s] but found [%s]", expected, actual) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java index 74ced0a1c2e3b..b481fd8d9aab5 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java @@ -23,6 +23,7 @@ import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.FeatureFlag; +import org.elasticsearch.common.xcontent.XContentParserUtils; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.FormattedDocValues; @@ -54,6 +55,7 @@ import org.elasticsearch.search.aggregations.metrics.TDigestState; import org.elasticsearch.search.sort.BucketedSort; import org.elasticsearch.search.sort.SortOrder; +import org.elasticsearch.tdigest.parsing.TDigestParser; import org.elasticsearch.xcontent.CopyingXContentParser; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; @@ -62,6 +64,7 @@ import java.io.IOException; import java.io.UncheckedIOException; +import java.util.List; import java.util.Map; import java.util.Objects; @@ -368,21 +371,14 @@ public void parse(DocumentParserContext context) throws IOException { } subParser.nextToken(); // TODO: Here we should build a t-digest out of the input, based on the settings on the field - TDigestParser.ParsedTDigest parsedTDigest = TDigestParser.parse(fullPath(), subParser); - - BytesStreamOutput streamOutput = new BytesStreamOutput(); - - for (int i = 0; i < parsedTDigest.centroids().size(); i++) { - long count = parsedTDigest.counts().get(i); - assert count >= 0; - // we do not add elements with count == 0 - if (count > 0) { - streamOutput.writeVLong(count); - streamOutput.writeDouble(parsedTDigest.centroids().get(i)); - } - } + TDigestParser.ParsedTDigest parsedTDigest = TDigestParser.parse( + fullPath(), + subParser, + DocumentParsingException::new, + XContentParserUtils::parsingException + ); - BytesRef docValue = streamOutput.bytes().toBytesRef(); + BytesRef docValue = encodeCentroidsAndCounts(parsedTDigest.centroids(), parsedTDigest.counts()); Field digestField = new BinaryDocValuesField(fullPath(), docValue); // Add numeric doc values fields for the summary data @@ -458,6 +454,23 @@ public void parse(DocumentParserContext context) throws IOException { context.path().remove(); } + private static BytesRef encodeCentroidsAndCounts(List centroids, List counts) throws IOException { + BytesStreamOutput streamOutput = new BytesStreamOutput(); + + for (int i = 0; i < centroids.size(); i++) { + long count = counts.get(i); + assert count >= 0; + // we do not add elements with count == 0 + if (count > 0) { + streamOutput.writeVLong(count); + streamOutput.writeDouble(centroids.get(i)); + } + } + + BytesRef docValue = streamOutput.bytes().toBytesRef(); + return docValue; + } + private static String valuesCountSubFieldName(String fullPath) { return fullPath + "._values_count"; } diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/plugin/EsqlCorePlugin.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/plugin/EsqlCorePlugin.java index da808b0083d22..5bcff64bc7149 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/plugin/EsqlCorePlugin.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/plugin/EsqlCorePlugin.java @@ -14,4 +14,8 @@ public class EsqlCorePlugin extends Plugin implements ExtensiblePlugin { public static final FeatureFlag EXPONENTIAL_HISTOGRAM_FEATURE_FLAG = new FeatureFlag("esql_exponential_histogram"); + + // Note, there is also a feature flag for the field type in the analytics plugin, but for visibility reasons we need + // another one here. + public static final FeatureFlag T_DIGEST_ESQL_SUPPORT = new FeatureFlag("esql_t_digest_support"); } diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java index 8270605b97ca6..1c02630a4ece4 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java @@ -348,6 +348,16 @@ public enum DataType implements Writeable { .underConstruction() ), + /* + TDIGEST( + builder().esType("exponential_histogram") + .estimatedSize(16 * 160)// guess 160 buckets (OTEL default for positive values only histograms) with 16 bytes per bucket + .docValues() + .underConstruction() + ), + + */ + /** * Fields with this type are dense vectors, represented as an array of float values. */ diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockFactory.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockFactory.java index eeab17568e144..88264a1a3f66e 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockFactory.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockFactory.java @@ -496,6 +496,10 @@ public ExponentialHistogramBlockBuilder newExponentialHistogramBlockBuilder(int return new ExponentialHistogramBlockBuilder(estimatedSize, this); } + public TDigestBlockBuilder newTDigestBlockBuilder(int estimatedSize) { + return new TDigestBlockBuilder(estimatedSize, this); + } + public final ExponentialHistogramBlock newConstantExponentialHistogramBlock(ExponentialHistogram value, int positionCount) { try (ExponentialHistogramBlockBuilder builder = newExponentialHistogramBlockBuilder(positionCount)) { for (int i = 0; i < positionCount; i++) { diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java index 7dfe664364e51..0bfdc3910eced 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java @@ -221,13 +221,16 @@ public static void appendValue(Block.Builder builder, Object val, ElementType ty switch (type) { case LONG -> ((LongBlock.Builder) builder).appendLong((Long) val); case INT -> ((IntBlock.Builder) builder).appendInt((Integer) val); + case NULL -> { + } case BYTES_REF -> ((BytesRefBlock.Builder) builder).appendBytesRef(toBytesRef(val)); case FLOAT -> ((FloatBlock.Builder) builder).appendFloat((Float) val); case DOUBLE -> ((DoubleBlock.Builder) builder).appendDouble((Double) val); case BOOLEAN -> ((BooleanBlock.Builder) builder).appendBoolean((Boolean) val); + case TDIGEST -> ((TDigestBlockBuilder) builder).append((TDigestHolder) val); case AGGREGATE_METRIC_DOUBLE -> ((AggregateMetricDoubleBlockBuilder) builder).appendLiteral((AggregateMetricDoubleLiteral) val); case EXPONENTIAL_HISTOGRAM -> ((ExponentialHistogramBlockBuilder) builder).append((ExponentialHistogram) val); - default -> throw new UnsupportedOperationException("unsupported element type [" + type + "]"); + case DOC, COMPOSITE, UNKNOWN -> throw new UnsupportedOperationException("unsupported element type [" + type + "]"); } } @@ -316,6 +319,13 @@ yield new AggregateMetricDoubleLiteral( // return a copy so that the returned value is not bound to the lifetime of the block yield ExponentialHistogram.builder(histogram, ExponentialHistogramCircuitBreaker.noop()).build(); } + case TDIGEST -> { + TDigestBlock tDigestBlock = (TDigestBlock) block; + // TODO memory tracking? Or do we not care here because this is only called for literals? + BytesRef scratch = new BytesRef(); + yield tDigestBlock.getTDigestHolder(offset, scratch); + + } case UNKNOWN -> throw new IllegalArgumentException("can't read values from [" + block + "]"); }; } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java index 92b6843ed253e..706b5e48b8f0f 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java @@ -301,6 +301,12 @@ public ExponentialHistogram getExponentialHistogram(int valueIndex, ExponentialH throw new UnsupportedOperationException("null block"); } + @Override + public TDigestHolder getTDigestHolder(int valueIndex, BytesRef scratch) { + assert false : "null block"; + throw new UnsupportedOperationException("null block"); + } + @Override public Block buildExponentialHistogramComponentBlock(Component component) { // if all histograms are null, the component block is also a constant null block with the same position count diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ElementType.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ElementType.java index 52a7853e56182..c8890542cf867 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ElementType.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ElementType.java @@ -75,7 +75,8 @@ public enum ElementType { "ExponentialHistogram", BlockFactory::newExponentialHistogramBlockBuilder, ExponentialHistogramArrayBlock::readFrom - ); + ), + TDIGEST(12, "TDigest", BlockFactory::newTDigestBlockBuilder, TDigestArrayBlock::readFrom); private static final TransportVersion ESQL_SERIALIZE_BLOCK_TYPE_CODE = TransportVersion.fromName("esql_serialize_block_type_code"); @@ -126,6 +127,8 @@ public static ElementType fromJava(Class type) { elementType = AGGREGATE_METRIC_DOUBLE; } else if (type != null && ExponentialHistogram.class.isAssignableFrom(type)) { elementType = EXPONENTIAL_HISTOGRAM; + } else if (type != null && TDigestHolder.class.isAssignableFrom(type)) { + elementType = TDIGEST; } else if (type == null || type == Void.class) { elementType = NULL; } else { diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java index d98929b727091..a35b831233174 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestArrayBlock.java @@ -7,6 +7,7 @@ package org.elasticsearch.compute.data; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.core.ReleasableIterator; @@ -72,7 +73,7 @@ public int getValueCount(int position) { @Override public ElementType elementType() { - throw new UnsupportedOperationException("Need to implement this later"); + return ElementType.TDIGEST; } @Override @@ -200,6 +201,22 @@ public Block deepCopy(BlockFactory blockFactory) { return new TDigestArrayBlock(copiedEncodedDigests, copiedMinima, copiedMaxima, copiedSums, copiedValueCounts); } + void copyInto( + BytesRefBlock.Builder encodedDigestsBuilder, + DoubleBlock.Builder minimaBuilder, + DoubleBlock.Builder maximaBuilder, + DoubleBlock.Builder sumsBuilder, + LongBlock.Builder valueCountsBuilder, + int beginInclusive, + int endExclusive + ) { + encodedDigestsBuilder.copyFrom(encodedDigests, beginInclusive, endExclusive); + minimaBuilder.copyFrom(minima, beginInclusive, endExclusive); + maximaBuilder.copyFrom(maxima, beginInclusive, endExclusive); + sumsBuilder.copyFrom(sums, beginInclusive, endExclusive); + valueCountsBuilder.copyFrom(valueCounts, beginInclusive, endExclusive); + } + @Override public void writeTo(StreamOutput out) throws IOException { Block.writeTypedBlock(encodedDigests, out); @@ -209,6 +226,29 @@ public void writeTo(StreamOutput out) throws IOException { Block.writeTypedBlock(valueCounts, out); } + public static TDigestArrayBlock readFrom(BlockStreamInput in) throws IOException { + BytesRefBlock encodedDigests = null; + DoubleBlock minima = null; + DoubleBlock maxima = null; + DoubleBlock sums = null; + LongBlock valueCounts = null; + + boolean success = false; + try { + encodedDigests = (BytesRefBlock) Block.readTypedBlock(in); + minima = (DoubleBlock) Block.readTypedBlock(in); + maxima = (DoubleBlock) Block.readTypedBlock(in); + sums = (DoubleBlock) Block.readTypedBlock(in); + valueCounts = (LongBlock) Block.readTypedBlock(in); + success = true; + } finally { + if (success == false) { + Releasables.close(minima, maxima, sums, valueCounts, encodedDigests); + } + } + return new TDigestArrayBlock(encodedDigests, minima, maxima, sums, valueCounts); + } + @Override public long ramBytesUsed() { long bytes = 0; @@ -217,4 +257,15 @@ public long ramBytesUsed() { } return bytes; } + + @Override + public TDigestHolder getTDigestHolder(int offset, BytesRef scratch) { + return new TDigestHolder( + encodedDigests.getBytesRef(offset, scratch), + minima.getDouble(offset), + maxima.getDouble(offset), + sums.getDouble(offset), + valueCounts.getLong(offset) + ); + } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java index 315dbbb7b52f8..695a742e4a1df 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlock.java @@ -7,4 +7,24 @@ package org.elasticsearch.compute.data; -public sealed interface TDigestBlock extends Block permits ConstantNullBlock, TDigestArrayBlock {} +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.index.mapper.BlockLoader; + +public sealed interface TDigestBlock extends Block permits ConstantNullBlock, TDigestArrayBlock { + + /** + * Builder for {@link TDigestBlock} + */ + sealed interface Builder extends Block.Builder, BlockLoader.TDigestBuilder permits TDigestBlockBuilder { + + /** + * Copy the values in {@code block} from the given positon into this builder. + */ + TDigestBlock.Builder copyFrom(TDigestBlock block, int position); + + @Override + TDigestBlock build(); + } + + TDigestHolder getTDigestHolder(int offset, BytesRef scratch); +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java new file mode 100644 index 0000000000000..a6f7eaca72c41 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestBlockBuilder.java @@ -0,0 +1,160 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.data; + +import org.elasticsearch.core.Releasables; +import org.elasticsearch.index.mapper.BlockLoader; + +public final class TDigestBlockBuilder implements TDigestBlock.Builder { + + private final BytesRefBlock.Builder encodedDigestsBuilder; + private final DoubleBlock.Builder minimaBuilder; + private final DoubleBlock.Builder maximaBuilder; + private final DoubleBlock.Builder sumsBuilder; + private final LongBlock.Builder valueCountsBuilder; + + public TDigestBlockBuilder(int size, BlockFactory blockFactory) { + BytesRefBlock.Builder encodedDigestsBuilder = null; + DoubleBlock.Builder minimaBuilder = null; + DoubleBlock.Builder maximaBuilder = null; + DoubleBlock.Builder sumsBuilder = null; + LongBlock.Builder valueCountsBuilder = null; + boolean success = false; + try { + encodedDigestsBuilder = blockFactory.newBytesRefBlockBuilder(size); + minimaBuilder = blockFactory.newDoubleBlockBuilder(size); + maximaBuilder = blockFactory.newDoubleBlockBuilder(size); + sumsBuilder = blockFactory.newDoubleBlockBuilder(size); + valueCountsBuilder = blockFactory.newLongBlockBuilder(size); + this.encodedDigestsBuilder = encodedDigestsBuilder; + this.minimaBuilder = minimaBuilder; + this.maximaBuilder = maximaBuilder; + this.sumsBuilder = sumsBuilder; + this.valueCountsBuilder = valueCountsBuilder; + success = true; + } finally { + if (success == false) { + Releasables.close(encodedDigestsBuilder, minimaBuilder, maximaBuilder, sumsBuilder, valueCountsBuilder); + } + } + } + + @Override + public TDigestBlockBuilder copyFrom(Block block, int beginInclusive, int endExclusive) { + if (block.areAllValuesNull()) { + for (int i = beginInclusive; i < endExclusive; i++) { + appendNull(); + } + } else { + TDigestArrayBlock digestBlock = (TDigestArrayBlock) block; + digestBlock.copyInto( + encodedDigestsBuilder, + minimaBuilder, + maximaBuilder, + sumsBuilder, + valueCountsBuilder, + beginInclusive, + endExclusive + ); + } + return this; + } + + @Override + public TDigestBlock.Builder copyFrom(TDigestBlock block, int position) { + copyFrom(block, position, position + 1); + return this; + } + + @Override + public Block.Builder appendNull() { + throw new UnsupportedOperationException(); + } + + @Override + public Block.Builder beginPositionEntry() { + throw new UnsupportedOperationException(); + } + + @Override + public Block.Builder endPositionEntry() { + throw new UnsupportedOperationException(); + } + + @Override + public Block.Builder mvOrdering(Block.MvOrdering mvOrdering) { + assert mvOrdering == Block.MvOrdering.UNORDERED : "TDigests don't have a natural order, so it doesn't make sense to call this"; + return this; + } + + @Override + public long estimatedBytes() { + return 0; + } + + @Override + public TDigestBlock build() { + DoubleBlock minima = null; + DoubleBlock maxima = null; + DoubleBlock sums = null; + LongBlock valueCounts = null; + BytesRefBlock encodedDigests = null; + boolean success = false; + try { + minima = minimaBuilder.build(); + maxima = maximaBuilder.build(); + sums = sumsBuilder.build(); + valueCounts = valueCountsBuilder.build(); + encodedDigests = encodedDigestsBuilder.build(); + success = true; + return new TDigestArrayBlock(encodedDigests, minima, maxima, sums, valueCounts); + } finally { + if (success == false) { + Releasables.close(minima, maxima, sums, valueCounts, encodedDigests); + } + } + } + + @Override + public BlockLoader.DoubleBuilder minima() { + throw new UnsupportedOperationException(); + } + + @Override + public BlockLoader.DoubleBuilder maxima() { + throw new UnsupportedOperationException(); + } + + @Override + public BlockLoader.DoubleBuilder sums() { + throw new UnsupportedOperationException(); + } + + @Override + public BlockLoader.LongBuilder valueCounts() { + throw new UnsupportedOperationException(); + } + + @Override + public BlockLoader.BytesRefBuilder encodedDigests() { + throw new UnsupportedOperationException(); + } + + @Override + public void close() { + Releasables.close(encodedDigestsBuilder, minimaBuilder, maximaBuilder, sumsBuilder, valueCountsBuilder); + } + + public void append(TDigestHolder val) { + encodedDigestsBuilder.appendBytesRef(val.getEncodedDigest()); + minimaBuilder.appendDouble(val.getMin()); + maximaBuilder.appendDouble(val.getMax()); + sumsBuilder.appendDouble(val.getSum()); + valueCountsBuilder.appendLong(val.getValueCount()); + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java new file mode 100644 index 0000000000000..63ac22aaa5c66 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/TDigestHolder.java @@ -0,0 +1,83 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.data; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.io.stream.BytesStreamOutput; + +import java.io.IOException; +import java.util.List; + +/** + * This exists to hold the values from a {@link TDigestBlock}. It is roughly parallel to + * {@link org.elasticsearch.search.aggregations.metrics.TDigestState} in classic aggregations, which we are not using directly because + * the serialization format is pretty bad for ESQL's use case (specifically, encoding the near-constant compression and merge strategy + * data inline as opposed to in a dedicated column isn't great). + */ +public class TDigestHolder { + + private final double min; + private final double max; + private final double sum; + private final long valueCount; + private final BytesRef encodedDigest; + + // TODO - Deal with the empty array case better + public TDigestHolder(BytesRef encodedDigest, double min, double max, double sum, long valueCount) { + this.encodedDigest = encodedDigest; + this.min = min; + this.max = max; + this.sum = sum; + this.valueCount = valueCount; + } + + public TDigestHolder(List centroids, List counts, double min, double max, double sum, long valueCount) + throws IOException { + this(encodeCentroidsAndCounts(centroids, counts), min, max, sum, valueCount); + } + + private static BytesRef encodeCentroidsAndCounts(List centroids, List counts) throws IOException { + // TODO: This is copied from the method of the same name in TDigestFieldMapper. It would be nice to find a way to reuse that code + BytesStreamOutput streamOutput = new BytesStreamOutput(); + + for (int i = 0; i < centroids.size(); i++) { + long count = counts.get(i); + assert count >= 0; + // we do not add elements with count == 0 + if (count > 0) { + streamOutput.writeVLong(count); + streamOutput.writeDouble(centroids.get(i)); + } + } + + BytesRef docValue = streamOutput.bytes().toBytesRef(); + return docValue; + } + + public BytesRef getEncodedDigest() { + return encodedDigest; + } + + // TODO - compute these if they're not given? or do that at object creation time, maybe. + public double getMax() { + return max; + } + + public double getMin() { + return min; + } + + public double getSum() { + return sum; + } + + public long getValueCount() { + return valueCount; + } + +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/lookup/QueryList.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/lookup/QueryList.java index c42946ed71777..975d85c55e6af 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/lookup/QueryList.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/lookup/QueryList.java @@ -189,6 +189,7 @@ public static IntFunction createBlockValueReader(Block block) { case COMPOSITE -> throw new IllegalArgumentException("can't read values from [composite] block"); case AGGREGATE_METRIC_DOUBLE -> throw new IllegalArgumentException("can't read values from [aggregate metric double] block"); case EXPONENTIAL_HISTOGRAM -> throw new IllegalArgumentException("can't read values from [exponential histogram] block"); + case TDIGEST -> throw new IllegalArgumentException("can't read values from [tdigest] block"); case UNKNOWN -> throw new IllegalArgumentException("can't read values from [" + block + "]"); }; } diff --git a/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java b/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java index 9f0a0be60d5f7..a3ed5bb38551b 100644 --- a/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java +++ b/x-pack/plugin/esql/compute/test/src/main/java/org/elasticsearch/compute/test/BlockTestUtils.java @@ -27,17 +27,22 @@ import org.elasticsearch.compute.data.LongBlock; import org.elasticsearch.compute.data.OrdinalBytesRefBlock; import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.data.TDigestHolder; import org.elasticsearch.core.Releasables; import org.elasticsearch.exponentialhistogram.ExponentialHistogram; import org.elasticsearch.exponentialhistogram.ExponentialHistogramBuilder; import org.elasticsearch.exponentialhistogram.ExponentialHistogramCircuitBreaker; import org.elasticsearch.exponentialhistogram.ReleasableExponentialHistogram; import org.elasticsearch.exponentialhistogram.ZeroBucket; +import org.elasticsearch.search.aggregations.metrics.TDigestState; +import org.elasticsearch.tdigest.Centroid; import org.hamcrest.Matcher; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.stream.DoubleStream; @@ -45,9 +50,11 @@ import static org.elasticsearch.compute.data.BlockUtils.toJavaObject; import static org.elasticsearch.test.ESTestCase.between; +import static org.elasticsearch.test.ESTestCase.fail; import static org.elasticsearch.test.ESTestCase.randomBoolean; import static org.elasticsearch.test.ESTestCase.randomDouble; import static org.elasticsearch.test.ESTestCase.randomFloat; +import static org.elasticsearch.test.ESTestCase.randomGaussianDouble; import static org.elasticsearch.test.ESTestCase.randomInt; import static org.elasticsearch.test.ESTestCase.randomIntBetween; import static org.elasticsearch.test.ESTestCase.randomLong; @@ -80,6 +87,7 @@ public static Object randomValue(ElementType e) { between(0, Integer.MAX_VALUE) ); case EXPONENTIAL_HISTOGRAM -> randomExponentialHistogram(); + case TDIGEST -> randomTDigest(); case NULL -> null; case COMPOSITE -> throw new IllegalArgumentException("can't make random values for composite"); case UNKNOWN -> throw new IllegalArgumentException("can't make random values for [" + e + "]"); @@ -414,6 +422,40 @@ public static ExponentialHistogram randomExponentialHistogram() { return histo; } + public static TDigestHolder randomTDigest() { + // TODO: This is mostly copied from TDigestFieldMapperTests; refactor it. + Map value = new LinkedHashMap<>(); + int size = between(1, 100); + // Note - we use TDigestState to build an actual t-digest for realistic values here + TDigestState digest = TDigestState.createWithoutCircuitBreaking(100); + for (int i = 0; i < size; i++) { + double sample = randomGaussianDouble(); + int count = randomIntBetween(1, Integer.MAX_VALUE); + digest.add(sample, count); + } + List centroids = new ArrayList<>(); + List counts = new ArrayList<>(); + double sum = 0.0; + long valueCount = 0L; + for (Centroid c : digest.centroids()) { + centroids.add(c.mean()); + counts.add(c.count()); + sum += c.mean() * c.count(); + valueCount += c.count(); + } + double min = digest.getMin(); + double max = digest.getMax(); + + TDigestHolder returnValue = null; + try { + returnValue = new TDigestHolder(centroids, counts, min, max, sum, valueCount); + } catch (IOException e) { + // This is a test util, so we're just going to fail the test here + fail(e); + } + return returnValue; + } + private static int dedupe(Map dedupe, BytesRefVector.Builder bytes, BytesRef v) { Integer current = dedupe.get(v); if (current != null) { diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java index 349cc8c3e6a6f..7e58a5742d7e2 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java @@ -15,6 +15,7 @@ import org.elasticsearch.common.time.DateFormatters; import org.elasticsearch.common.time.DateUtils; import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.xcontent.XContentParserUtils; import org.elasticsearch.compute.data.AggregateMetricDoubleBlockBuilder; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BlockFactory; @@ -22,6 +23,7 @@ import org.elasticsearch.compute.data.BlockUtils.BuilderWrapper; import org.elasticsearch.compute.data.ElementType; import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.data.TDigestHolder; import org.elasticsearch.core.Booleans; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Releasable; @@ -32,8 +34,10 @@ import org.elasticsearch.exponentialhistogram.ExponentialHistogramXContent; import org.elasticsearch.geometry.utils.Geohash; import org.elasticsearch.h3.H3; +import org.elasticsearch.index.mapper.DocumentParsingException; import org.elasticsearch.logging.Logger; import org.elasticsearch.search.aggregations.bucket.geogrid.GeoTileUtils; +import org.elasticsearch.tdigest.parsing.TDigestParser; import org.elasticsearch.test.VersionUtils; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentParserConfiguration; @@ -505,6 +509,7 @@ public enum Type { ), DENSE_VECTOR(Float::parseFloat, Float.class, false), EXPONENTIAL_HISTOGRAM(CsvTestUtils::parseExponentialHistogram, ExponentialHistogram.class), + TDIGEST(CsvTestUtils::parseTDigest, TDigestHolder.class), UNSUPPORTED(Type::convertUnsupported, Void.class); private static Void convertUnsupported(String s) { @@ -601,6 +606,7 @@ public static Type asType(ElementType elementType, Type actualType) { case COMPOSITE -> throw new IllegalArgumentException("can't assert on composite blocks"); case AGGREGATE_METRIC_DOUBLE -> AGGREGATE_METRIC_DOUBLE; case EXPONENTIAL_HISTOGRAM -> EXPONENTIAL_HISTOGRAM; + case TDIGEST -> TDIGEST; case UNKNOWN -> throw new IllegalArgumentException("Unknown block types cannot be handled"); }; } @@ -717,4 +723,25 @@ private static ExponentialHistogram parseExponentialHistogram(@Nullable String j throw new IllegalArgumentException(e); } } + + private static TDigestHolder parseTDigest(@Nullable String json) { + if (json == null) { + return null; + } + try (XContentParser parser = JsonXContent.jsonXContent.createParser(XContentParserConfiguration.EMPTY, json)) { + if (parser.nextToken() != XContentParser.Token.START_OBJECT) { + throw new IllegalArgumentException("Expected START_OBJECT but found: " + parser.currentToken()); + } + parser.nextToken(); + TDigestParser.ParsedTDigest parsed = TDigestParser.parse( + "field from test data", + parser, + DocumentParsingException::new, + XContentParserUtils::parsingException + ); + return new TDigestHolder(parsed.centroids(), parsed.counts(), parsed.min(), parsed.max(), parsed.sum(), parsed.count()); + } catch (IOException e) { + throw new IllegalArgumentException(e); + } + } } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index bd42ee08ed384..b3320be613d20 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -173,6 +173,7 @@ public class CsvTestsDataLoader { private static final TestDataset COLORS = new TestDataset("colors"); private static final TestDataset COLORS_CMYK_LOOKUP = new TestDataset("colors_cmyk").withSetting("lookup-settings.json"); private static final TestDataset EXP_HISTO_SAMPLE = new TestDataset("exp_histo_sample"); + private static final TestDataset TDIGEST_STANDARD_INDEX = new TestDataset("tdigest_standard_index"); public static final Map CSV_DATASET_MAP = Map.ofEntries( Map.entry(EMPLOYEES.indexName, EMPLOYEES), @@ -241,7 +242,8 @@ public class CsvTestsDataLoader { Map.entry(COLORS_CMYK_LOOKUP.indexName, COLORS_CMYK_LOOKUP), Map.entry(MULTI_COLUMN_JOINABLE.indexName, MULTI_COLUMN_JOINABLE), Map.entry(MULTI_COLUMN_JOINABLE_LOOKUP.indexName, MULTI_COLUMN_JOINABLE_LOOKUP), - Map.entry(EXP_HISTO_SAMPLE.indexName, EXP_HISTO_SAMPLE) + Map.entry(EXP_HISTO_SAMPLE.indexName, EXP_HISTO_SAMPLE), + Map.entry(TDIGEST_STANDARD_INDEX.indexName, TDIGEST_STANDARD_INDEX) ); private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json"); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/tdigest_standard_index.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/tdigest_standard_index.csv new file mode 100644 index 0000000000000..34e7cbdb61e63 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/tdigest_standard_index.csv @@ -0,0 +1,2 @@ +@timestamp:date,instance:keyword,responseTime:tdigest +2025-01-01T00:00:00Z,hand-rolled,{"centroids":[0.1\,0.2\,0.3\,0.4\,0.5]\,"counts":[3\,7\,23\,12\,6]} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-tdigest_standard_index.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-tdigest_standard_index.json new file mode 100644 index 0000000000000..9ed14cd36e622 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-tdigest_standard_index.json @@ -0,0 +1,13 @@ +{ + "properties": { + "@timestamp": { + "type": "date" + }, + "instance": { + "type": "keyword" + }, + "responseTime": { + "type": "tdigest" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/tdigest.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/tdigest.csv-spec new file mode 100644 index 0000000000000..1ca152a4d76aa --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/tdigest.csv-spec @@ -0,0 +1,9 @@ +Make sure we can even load tdigest data +required_capability: tdigest_field_type_basic_functionality + +FROM tdigest_standard_index | KEEP @timestamp,instance; + +@timestamp:date | instance:keyword +2025-01-01T00:00:00Z | hand-rolled + +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index c85738e056254..992c65fa74d3b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -20,6 +20,7 @@ import java.util.Set; import static org.elasticsearch.xpack.esql.core.plugin.EsqlCorePlugin.EXPONENTIAL_HISTOGRAM_FEATURE_FLAG; +import static org.elasticsearch.xpack.esql.core.plugin.EsqlCorePlugin.T_DIGEST_ESQL_SUPPORT; /** * A {@link Set} of "capabilities" supported by the {@link RestEsqlQueryAction} @@ -1574,6 +1575,8 @@ public enum Cap { */ EXPONENTIAL_HISTOGRAM_PRE_TECH_PREVIEW_V4(EXPONENTIAL_HISTOGRAM_FEATURE_FLAG), + TDIGEST_FIELD_TYPE_BASIC_FUNCTIONALITY(T_DIGEST_ESQL_SUPPORT), + /** * Create new block when filtering OrdinalBytesRefBlock */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java index 3981b71f316b0..7c35cbc04aba1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java @@ -113,7 +113,7 @@ private static DataType toDataType(ElementType elementType) { case DOUBLE -> DataType.DOUBLE; case DOC -> DataType.DOC_DATA_TYPE; case EXPONENTIAL_HISTOGRAM -> DataType.EXPONENTIAL_HISTOGRAM; - case FLOAT, NULL, COMPOSITE, AGGREGATE_METRIC_DOUBLE, UNKNOWN -> throw new EsqlIllegalArgumentException( + case FLOAT, NULL, COMPOSITE, AGGREGATE_METRIC_DOUBLE, TDIGEST, UNKNOWN -> throw new EsqlIllegalArgumentException( "unsupported agg type: " + elementType ); };