ES|QL: Add TBUCKET function

leontyevdv · leontyevdv · commit 27941a7f8336 · 2025-07-17T16:05:12.000+02:00
Introduce the function TBUCKET(<time interval>) which applies grouping on the @timestamp field, truncating its value to the specified granularity: TBUCKET(1h) is equivalent to BUCKET(1 hour, @timestamp) TBUCKET(7d) is equivalent to BUCKET(7 days, @timestamp) Closes #131068
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/tbucket.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/tbucket.csv-spec
@@ -0,0 +1,15 @@
+// TBUCKET-specific tests
+
+docsGettingStartedBucketStatsByMedian#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+// tag::gs-bucket-stats-by-median[]
+FROM sample_data
+| KEEP @timestamp, event_duration
+| STATS median_duration = MEDIAN(event_duration) BY bucket = TBUCKET(@timestamp, "1 hour")
+// end::gs-bucket-stats-by-median[]
+| SORT bucket
+;
+
+median_duration:double | bucket:date
+3107561.0              |2023-10-23T12:00:00.000Z
+1756467.0              |2023-10-23T13:00:00.000Z
+;
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java
@@ -53,6 +53,7 @@
 import org.elasticsearch.xpack.esql.expression.function.fulltext.Term;
 import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket;
 import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize;
+import org.elasticsearch.xpack.esql.expression.function.grouping.TBucket;
 import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Case;
 import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Greatest;
 import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Least;
@@ -304,7 +305,8 @@ private static FunctionDefinition[][] functions() {
             // grouping functions
             new FunctionDefinition[] {
                 def(Bucket.class, Bucket::new, "bucket", "bin"),
-                def(Categorize.class, Categorize::new, "categorize") },
+                def(Categorize.class, Categorize::new, "categorize"),
+                def(TBucket.class, TBucket::new, "tbucket") },
             // aggregate functions
             // since they declare two public constructors - one with filter (for nested where) and one without
             // use casting to disambiguate between the two
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/GroupingWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/GroupingWritables.java
@@ -14,6 +14,6 @@
 public class GroupingWritables {
 
     public static List<NamedWriteableRegistry.Entry> getNamedWriteables() {
-        return List.of(Bucket.ENTRY, Categorize.ENTRY);
+        return List.of(Bucket.ENTRY, Categorize.ENTRY, TBucket.ENTRY);
     }
 }
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/TBucket.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/TBucket.java
@@ -0,0 +1,171 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.expression.function.grouping;
+
+import org.elasticsearch.common.Rounding;
+import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
+import org.elasticsearch.xpack.esql.capabilities.PostOptimizationVerificationAware;
+import org.elasticsearch.xpack.esql.common.Failures;
+import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.expression.FoldContext;
+import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
+import org.elasticsearch.xpack.esql.core.tree.Source;
+import org.elasticsearch.xpack.esql.core.type.DataType;
+import org.elasticsearch.xpack.esql.expression.LocalSurrogateExpression;
+import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
+import org.elasticsearch.xpack.esql.expression.function.FunctionType;
+import org.elasticsearch.xpack.esql.expression.function.Param;
+import org.elasticsearch.xpack.esql.expression.function.TwoOptionalArguments;
+import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateTrunc;
+import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
+import org.elasticsearch.xpack.esql.stats.SearchStats;
+
+import java.io.IOException;
+import java.util.List;
+
+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType;
+import static org.elasticsearch.xpack.esql.expression.Validations.isFoldable;
+import static org.elasticsearch.xpack.esql.expression.function.scalar.date.DateTrunc.maybeSubstituteWithRoundTo;
+import static org.elasticsearch.xpack.esql.session.Configuration.DEFAULT_TZ;
+
+/**
+ * Splits dates and numbers into a given number of buckets. There are two ways to invoke
+ * this function: with a user-provided span (explicit invocation mode), or a span derived
+ * from a number of desired buckets (as a hint) and a range (auto mode).
+ * In the former case, two parameters will be provided, in the latter four.
+ */
+public class TBucket extends GroupingFunction.EvaluatableGroupingFunction
+    implements
+        PostOptimizationVerificationAware,
+        TwoOptionalArguments,
+        LocalSurrogateExpression {
+
+    public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "TBucket", TBucket::new);
+
+    private final Expression field;
+    private final Expression buckets;
+
+    @FunctionInfo(
+        returnType = { "double", "date", "date_nanos" },
+        description = """
+            Creates groups of values - buckets - out of a datetime or numeric input.
+            The size of the buckets can either be provided directly, or chosen based on a recommended count and values range.""",
+        examples = {},
+        type = FunctionType.GROUPING
+    )
+    public TBucket(
+        Source source,
+        @Param(
+            name = "field",
+            type = { "integer", "long", "double", "date", "date_nanos" },
+            description = "Numeric or date expression from which to derive buckets."
+        ) Expression field,
+        @Param(
+            name = "buckets",
+            type = { "integer", "long", "double", "date_period", "time_duration" },
+            description = "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted."
+        ) Expression buckets
+    ) {
+        super(source, List.of(field, buckets));
+        this.field = field;
+        this.buckets = buckets;
+    }
+
+    private TBucket(StreamInput in) throws IOException {
+        this(Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(Expression.class), in.readNamedWriteable(Expression.class));
+    }
+
+    @Override
+    public void writeTo(StreamOutput out) throws IOException {
+        source().writeTo(out);
+        out.writeNamedWriteable(field);
+        out.writeNamedWriteable(buckets);
+    }
+
+    @Override
+    public String getWriteableName() {
+        return ENTRY.name;
+    }
+
+    @Override
+    public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
+        Rounding.Prepared preparedRounding = getDateRounding(toEvaluator.foldCtx(), null, null);
+        return DateTrunc.evaluator(field.dataType(), source(), toEvaluator.apply(field), preparedRounding);
+    }
+
+    /**
+     * Returns the date rounding from this bucket function if the target field is a date type; otherwise, returns null.
+     */
+    public Rounding.Prepared getDateRoundingOrNull(FoldContext foldCtx) {
+        return getDateRounding(foldCtx, null, null);
+    }
+
+    private Rounding.Prepared getDateRounding(FoldContext foldContext, Long min, Long max) {
+        assert DataType.isTemporalAmount(buckets.dataType()) : "Unexpected span data type [" + buckets.dataType() + "]";
+        return DateTrunc.createRounding(buckets.fold(foldContext), DEFAULT_TZ, min, max);
+    }
+
+    @Override
+    protected TypeResolution resolveType() {
+        if (childrenResolved() == false) {
+            return new TypeResolution("Unresolved children");
+        }
+        return isType(buckets, DataType::isTemporalAmount, sourceText(), SECOND, "date_period", "time_duration");
+    }
+
+    @Override
+    public void postOptimizationVerification(Failures failures) {
+        String operation = sourceText();
+        failures.add(isFoldable(buckets, operation, SECOND));
+    }
+
+    @Override
+    public DataType dataType() {
+        return field.dataType();
+    }
+
+    @Override
+    public Expression replaceChildren(List<Expression> newChildren) {
+        return new TBucket(source(), newChildren.get(0), newChildren.get(1));
+    }
+
+    @Override
+    protected NodeInfo<? extends Expression> info() {
+        return NodeInfo.create(this, TBucket::new, field, buckets);
+    }
+
+    public Expression field() {
+        return field;
+    }
+
+    public Expression buckets() {
+        return buckets;
+    }
+
+    @Override
+    public String toString() {
+        return "Bucket{" + "field=" + field + ", buckets=" + buckets + "}";
+    }
+
+    @Override
+    public Expression surrogate(SearchStats searchStats) {
+        // LocalSubstituteSurrogateExpressions should make sure this doesn't happen
+        assert searchStats != null : "SearchStats cannot be null";
+        return maybeSubstituteWithRoundTo(
+            source(),
+            field(),
+            buckets(),
+            searchStats,
+            (interval, minValue, maxValue) -> getDateRounding(FoldContext.small(), minValue, maxValue)
+        );
+    }
+}

Original file line number	Diff line number	Diff line change
`@@ -14,6 +14,6 @@`
`14`	`14`	`public class GroupingWritables {`
`15`	`15`
`16`	`16`	`public static List<NamedWriteableRegistry.Entry> getNamedWriteables() {`
`17`		`- return List.of(Bucket.ENTRY, Categorize.ENTRY);`
	`17`	`+ return List.of(Bucket.ENTRY, Categorize.ENTRY, TBucket.ENTRY);`
`18`	`18`	`}`
`19`	`19`	`}`