Skip to content

Commit 27941a7

Browse files
committed
ES|QL: Add TBUCKET function
Introduce the function TBUCKET(<time interval>) which applies grouping on the @timestamp field, truncating its value to the specified granularity: TBUCKET(1h) is equivalent to BUCKET(1 hour, @timestamp) TBUCKET(7d) is equivalent to BUCKET(7 days, @timestamp) Closes #131068
1 parent df985e6 commit 27941a7

File tree

4 files changed

+190
-2
lines changed

4 files changed

+190
-2
lines changed
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// TBUCKET-specific tests
2+
3+
docsGettingStartedBucketStatsByMedian#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
4+
// tag::gs-bucket-stats-by-median[]
5+
FROM sample_data
6+
| KEEP @timestamp, event_duration
7+
| STATS median_duration = MEDIAN(event_duration) BY bucket = TBUCKET(@timestamp, "1 hour")
8+
// end::gs-bucket-stats-by-median[]
9+
| SORT bucket
10+
;
11+
12+
median_duration:double | bucket:date
13+
3107561.0 |2023-10-23T12:00:00.000Z
14+
1756467.0 |2023-10-23T13:00:00.000Z
15+
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
import org.elasticsearch.xpack.esql.expression.function.fulltext.Term;
5454
import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket;
5555
import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize;
56+
import org.elasticsearch.xpack.esql.expression.function.grouping.TBucket;
5657
import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Case;
5758
import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Greatest;
5859
import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Least;
@@ -304,7 +305,8 @@ private static FunctionDefinition[][] functions() {
304305
// grouping functions
305306
new FunctionDefinition[] {
306307
def(Bucket.class, Bucket::new, "bucket", "bin"),
307-
def(Categorize.class, Categorize::new, "categorize") },
308+
def(Categorize.class, Categorize::new, "categorize"),
309+
def(TBucket.class, TBucket::new, "tbucket") },
308310
// aggregate functions
309311
// since they declare two public constructors - one with filter (for nested where) and one without
310312
// use casting to disambiguate between the two

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/GroupingWritables.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,6 @@
1414
public class GroupingWritables {
1515

1616
public static List<NamedWriteableRegistry.Entry> getNamedWriteables() {
17-
return List.of(Bucket.ENTRY, Categorize.ENTRY);
17+
return List.of(Bucket.ENTRY, Categorize.ENTRY, TBucket.ENTRY);
1818
}
1919
}
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.expression.function.grouping;
9+
10+
import org.elasticsearch.common.Rounding;
11+
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
12+
import org.elasticsearch.common.io.stream.StreamInput;
13+
import org.elasticsearch.common.io.stream.StreamOutput;
14+
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
15+
import org.elasticsearch.xpack.esql.capabilities.PostOptimizationVerificationAware;
16+
import org.elasticsearch.xpack.esql.common.Failures;
17+
import org.elasticsearch.xpack.esql.core.expression.Expression;
18+
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
19+
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
20+
import org.elasticsearch.xpack.esql.core.tree.Source;
21+
import org.elasticsearch.xpack.esql.core.type.DataType;
22+
import org.elasticsearch.xpack.esql.expression.LocalSurrogateExpression;
23+
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
24+
import org.elasticsearch.xpack.esql.expression.function.FunctionType;
25+
import org.elasticsearch.xpack.esql.expression.function.Param;
26+
import org.elasticsearch.xpack.esql.expression.function.TwoOptionalArguments;
27+
import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateTrunc;
28+
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
29+
import org.elasticsearch.xpack.esql.stats.SearchStats;
30+
31+
import java.io.IOException;
32+
import java.util.List;
33+
34+
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
35+
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType;
36+
import static org.elasticsearch.xpack.esql.expression.Validations.isFoldable;
37+
import static org.elasticsearch.xpack.esql.expression.function.scalar.date.DateTrunc.maybeSubstituteWithRoundTo;
38+
import static org.elasticsearch.xpack.esql.session.Configuration.DEFAULT_TZ;
39+
40+
/**
41+
* Splits dates and numbers into a given number of buckets. There are two ways to invoke
42+
* this function: with a user-provided span (explicit invocation mode), or a span derived
43+
* from a number of desired buckets (as a hint) and a range (auto mode).
44+
* In the former case, two parameters will be provided, in the latter four.
45+
*/
46+
public class TBucket extends GroupingFunction.EvaluatableGroupingFunction
47+
implements
48+
PostOptimizationVerificationAware,
49+
TwoOptionalArguments,
50+
LocalSurrogateExpression {
51+
52+
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "TBucket", TBucket::new);
53+
54+
private final Expression field;
55+
private final Expression buckets;
56+
57+
@FunctionInfo(
58+
returnType = { "double", "date", "date_nanos" },
59+
description = """
60+
Creates groups of values - buckets - out of a datetime or numeric input.
61+
The size of the buckets can either be provided directly, or chosen based on a recommended count and values range.""",
62+
examples = {},
63+
type = FunctionType.GROUPING
64+
)
65+
public TBucket(
66+
Source source,
67+
@Param(
68+
name = "field",
69+
type = { "integer", "long", "double", "date", "date_nanos" },
70+
description = "Numeric or date expression from which to derive buckets."
71+
) Expression field,
72+
@Param(
73+
name = "buckets",
74+
type = { "integer", "long", "double", "date_period", "time_duration" },
75+
description = "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted."
76+
) Expression buckets
77+
) {
78+
super(source, List.of(field, buckets));
79+
this.field = field;
80+
this.buckets = buckets;
81+
}
82+
83+
private TBucket(StreamInput in) throws IOException {
84+
this(Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(Expression.class), in.readNamedWriteable(Expression.class));
85+
}
86+
87+
@Override
88+
public void writeTo(StreamOutput out) throws IOException {
89+
source().writeTo(out);
90+
out.writeNamedWriteable(field);
91+
out.writeNamedWriteable(buckets);
92+
}
93+
94+
@Override
95+
public String getWriteableName() {
96+
return ENTRY.name;
97+
}
98+
99+
@Override
100+
public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
101+
Rounding.Prepared preparedRounding = getDateRounding(toEvaluator.foldCtx(), null, null);
102+
return DateTrunc.evaluator(field.dataType(), source(), toEvaluator.apply(field), preparedRounding);
103+
}
104+
105+
/**
106+
* Returns the date rounding from this bucket function if the target field is a date type; otherwise, returns null.
107+
*/
108+
public Rounding.Prepared getDateRoundingOrNull(FoldContext foldCtx) {
109+
return getDateRounding(foldCtx, null, null);
110+
}
111+
112+
private Rounding.Prepared getDateRounding(FoldContext foldContext, Long min, Long max) {
113+
assert DataType.isTemporalAmount(buckets.dataType()) : "Unexpected span data type [" + buckets.dataType() + "]";
114+
return DateTrunc.createRounding(buckets.fold(foldContext), DEFAULT_TZ, min, max);
115+
}
116+
117+
@Override
118+
protected TypeResolution resolveType() {
119+
if (childrenResolved() == false) {
120+
return new TypeResolution("Unresolved children");
121+
}
122+
return isType(buckets, DataType::isTemporalAmount, sourceText(), SECOND, "date_period", "time_duration");
123+
}
124+
125+
@Override
126+
public void postOptimizationVerification(Failures failures) {
127+
String operation = sourceText();
128+
failures.add(isFoldable(buckets, operation, SECOND));
129+
}
130+
131+
@Override
132+
public DataType dataType() {
133+
return field.dataType();
134+
}
135+
136+
@Override
137+
public Expression replaceChildren(List<Expression> newChildren) {
138+
return new TBucket(source(), newChildren.get(0), newChildren.get(1));
139+
}
140+
141+
@Override
142+
protected NodeInfo<? extends Expression> info() {
143+
return NodeInfo.create(this, TBucket::new, field, buckets);
144+
}
145+
146+
public Expression field() {
147+
return field;
148+
}
149+
150+
public Expression buckets() {
151+
return buckets;
152+
}
153+
154+
@Override
155+
public String toString() {
156+
return "Bucket{" + "field=" + field + ", buckets=" + buckets + "}";
157+
}
158+
159+
@Override
160+
public Expression surrogate(SearchStats searchStats) {
161+
// LocalSubstituteSurrogateExpressions should make sure this doesn't happen
162+
assert searchStats != null : "SearchStats cannot be null";
163+
return maybeSubstituteWithRoundTo(
164+
source(),
165+
field(),
166+
buckets(),
167+
searchStats,
168+
(interval, minValue, maxValue) -> getDateRounding(FoldContext.small(), minValue, maxValue)
169+
);
170+
}
171+
}

0 commit comments

Comments
 (0)