Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/131341.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 131341
summary: Consider min/max from predicates when transform date_trunc/bucket to `round_to`
area: ES|QL
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -884,3 +884,39 @@ c:long | b:datetime | yr:datetime
9 | 1989-01-01T00:00:00.000Z | 1988-01-01T00:00:00.000Z
13 | 1990-01-01T00:00:00.000Z | 1989-01-01T00:00:00.000Z
;

bucketYearInAggWithGTOutOfRange#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
FROM employees
| WHERE hire_date >= "2000-01-01T00:00:00Z"
| STATS COUNT(*) by bucket = BUCKET(hire_date, 1 month)
| sort bucket;

COUNT(*):long | bucket:date
;

bucketYearInAggWithLTOutOfRange#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
FROM employees
| WHERE hire_date <= "1980-01-01T00:00:00Z"
| STATS COUNT(*) by bucket = BUCKET(hire_date, 1 year)
| sort bucket;

COUNT(*):long | bucket:date
;

bucketYearInAggWithGTLTOutOfRange#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
FROM employees
| WHERE hire_date <= "1980-01-01T00:00:00Z" and hire_date <= "1970-01-01"
| STATS COUNT(*) by bucket = BUCKET(hire_date, 1 week)
| sort bucket;

COUNT(*):long | bucket:date
;

bucketYearInAggWithEQOutOfRange#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
FROM employees
| WHERE hire_date == "1980-01-01T00:00:00Z"
| STATS COUNT(*) by bucket = BUCKET(hire_date, 1 hour)
| sort bucket;

COUNT(*):long | bucket:date
;
145 changes: 145 additions & 0 deletions x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -1581,4 +1581,149 @@ x:date | y:date

;

evalDateTruncMonthIntervalWithGTEInRange
from employees
| sort hire_date
| where hire_date >= "1990-01-01"
| eval x = date_trunc(1 month, hire_date)
| keep emp_no, hire_date, x
| limit 5;

emp_no:integer | hire_date:date | x:date
10082 | 1990-01-03T00:00:00.000Z | 1990-01-01T00:00:00.000Z
10096 | 1990-01-14T00:00:00.000Z | 1990-01-01T00:00:00.000Z
10011 | 1990-01-22T00:00:00.000Z | 1990-01-01T00:00:00.000Z
10056 | 1990-02-01T00:00:00.000Z | 1990-02-01T00:00:00.000Z
10086 | 1990-02-16T00:00:00.000Z | 1990-02-01T00:00:00.000Z
;

evalDateTruncHoursIntervalWithLTEInRange
from employees
| sort hire_date desc
| where hire_date <= "1990-01-01"
| eval x = date_trunc(240 hours, hire_date)
| keep emp_no, hire_date, x
| limit 5;

emp_no:integer | hire_date:date | x:date
10023 | 1989-12-17T00:00:00.000Z | 1989-12-17T00:00:00.000Z
10041 | 1989-11-12T00:00:00.000Z | 1989-11-07T00:00:00.000Z
10069 | 1989-11-05T00:00:00.000Z | 1989-10-28T00:00:00.000Z
10092 | 1989-09-22T00:00:00.000Z | 1989-09-18T00:00:00.000Z
10038 | 1989-09-20T00:00:00.000Z | 1989-09-18T00:00:00.000Z
;

evalDateTruncWeeklyIntervalWithLTGTInRange
from employees
| sort hire_date
| where hire_date > "1986-01-01" and hire_date < "1988-01-01"
| eval x = date_trunc(1 week, hire_date)
| keep emp_no, hire_date, x
| limit 5;

emp_no:integer | hire_date:date | x:date
10053 | 1986-02-04T00:00:00.000Z | 1986-02-03T00:00:00.000Z
10066 | 1986-02-26T00:00:00.000Z | 1986-02-24T00:00:00.000Z
10090 | 1986-03-14T00:00:00.000Z | 1986-03-10T00:00:00.000Z
10079 | 1986-03-27T00:00:00.000Z | 1986-03-24T00:00:00.000Z
10001 | 1986-06-26T00:00:00.000Z | 1986-06-23T00:00:00.000Z
;

evalDateTruncQuarterlyIntervalWithGTInRange
from employees
| sort hire_date
| where hire_date > "1980-01-01"
| eval x = date_trunc(3 month, hire_date)
| keep emp_no, hire_date, x
| limit 5;

emp_no:integer | hire_date:date | x:date
10009 | 1985-02-18T00:00:00.000Z | 1985-01-01T00:00:00.000Z
10048 | 1985-02-24T00:00:00.000Z | 1985-01-01T00:00:00.000Z
10098 | 1985-05-13T00:00:00.000Z | 1985-04-01T00:00:00.000Z
10076 | 1985-07-09T00:00:00.000Z | 1985-07-01T00:00:00.000Z
10061 | 1985-09-17T00:00:00.000Z | 1985-07-01T00:00:00.000Z
;

dateTruncGroupingYearIntervalWithLTInRange
from employees
| where hire_date < "2025-01-01"
| eval y = date_trunc(1 year, hire_date)
| stats c = count(emp_no) by y
| sort y
| keep y, c
| limit 5;

y:date | c:long
1985-01-01T00:00:00.000Z | 11
1986-01-01T00:00:00.000Z | 11
1987-01-01T00:00:00.000Z | 15
1988-01-01T00:00:00.000Z | 9
1989-01-01T00:00:00.000Z | 13
;

dateTruncGroupingYearIntervalWithLTOutOfRange
from employees
| where hire_date < "1980-01-01"
| eval y = date_trunc(1 year, hire_date)
| stats c = count(emp_no) by y
| sort y
| keep y, c
| limit 5;

y:date | c:long
;

dateTruncGroupingYearIntervalWithGTOutOfRange
from employees
| where hire_date > "2000-01-01"
| eval y = date_trunc(1 year, hire_date)
| stats c = count(emp_no) by y
| sort y
| keep y, c
| limit 5;

y:date | c:long
;

dateTruncGroupingMonthIntervalWithLTGTInRange
from employees
| where hire_date > "1987-01-01" and hire_date < "1988-01-01"
| eval y = date_trunc(1 month, hire_date)
| stats c = count(emp_no) by y
| sort y
| keep y, c
| limit 5;

y:date | c:long
1987-03-01T00:00:00.000Z | 5
1987-04-01T00:00:00.000Z | 3
1987-05-01T00:00:00.000Z | 1
1987-07-01T00:00:00.000Z | 1
1987-08-01T00:00:00.000Z | 2
;

dateTruncGroupingDayIntervalWithEQInRange
from employees
| where hire_date == "1988-02-10"
| eval y = date_trunc(1 day, hire_date)
| stats c = count(emp_no) by y
| sort y
| keep y, c
| limit 5;

y:date | c:long
1988-02-10T00:00:00.000Z | 1
;

dateTruncGroupingDayIntervalWithEQOutOfRange
from employees
| where hire_date == "2025-01-01"
| eval y = date_trunc(1 day, hire_date)
| stats c = count(emp_no) by y
| sort y
| keep y, c
| limit 5;

y:date | c:long
;
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,25 @@
package org.elasticsearch.xpack.esql.expression;

import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison;
import org.elasticsearch.xpack.esql.stats.SearchStats;

import java.util.List;

/**
* Interface signaling to the local logical plan optimizer that the declaring expression
* has to be replaced by a different form.
* Implement this on {@code Function}s when:
* <ul>
* <li>The expression can be rewritten to another expression on data node, with the statistics available in SearchStats.
* Like {@code DateTrunc} and {@code Bucket} could be rewritten to {@code RoundTo} with the min/max values on the date field.
* <li>The expression can be rewritten to another expression on data node, with the statistics available in SearchStats and predicates
* in the query. Like {@code DateTrunc} and {@code Bucket} could be rewritten to {@code RoundTo} with the min/max values on the date
* field.
* </li>
* </ul>
*/
public interface LocalSurrogateExpression {
/**
* Returns the expression to be replaced by or {@code null} if this cannot be replaced.
*/
Expression surrogate(SearchStats searchStats);
Expression surrogate(SearchStats searchStats, List<EsqlBinaryComparison> binaryComparisons);
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.elasticsearch.xpack.esql.expression.function.scalar.math.Floor;
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Div;
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Mul;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
import org.elasticsearch.xpack.esql.stats.SearchStats;

Expand Down Expand Up @@ -498,14 +499,15 @@ public String toString() {
}

@Override
public Expression surrogate(SearchStats searchStats) {
public Expression surrogate(SearchStats searchStats, List<EsqlBinaryComparison> binaryComparisons) {
// LocalSubstituteSurrogateExpressions should make sure this doesn't happen
assert searchStats != null : "SearchStats cannot be null";
return maybeSubstituteWithRoundTo(
source(),
field(),
buckets(),
searchStats,
binaryComparisons,
(interval, minValue, maxValue) -> getDateRounding(FoldContext.small(), minValue, maxValue)
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.elasticsearch.compute.ann.Fixed;
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.logging.LogManager;
import org.elasticsearch.logging.Logger;
import org.elasticsearch.xpack.esql.core.expression.Expression;
Expand All @@ -27,12 +28,19 @@
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField;
import org.elasticsearch.xpack.esql.core.util.Holder;
import org.elasticsearch.xpack.esql.expression.LocalSurrogateExpression;
import org.elasticsearch.xpack.esql.expression.function.Example;
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
import org.elasticsearch.xpack.esql.expression.function.Param;
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
import org.elasticsearch.xpack.esql.expression.function.scalar.math.RoundTo;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.GreaterThan;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.GreaterThanOrEqual;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.LessThan;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.LessThanOrEqual;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
import org.elasticsearch.xpack.esql.stats.SearchStats;

Expand Down Expand Up @@ -125,7 +133,7 @@ Expression interval() {
return interval;
}

Expression field() {
public Expression field() {
return timestampField;
}

Expand Down Expand Up @@ -287,14 +295,15 @@ public static ExpressionEvaluator.Factory evaluator(
}

@Override
public Expression surrogate(SearchStats searchStats) {
public Expression surrogate(SearchStats searchStats, List<EsqlBinaryComparison> binaryComparisons) {
// LocalSubstituteSurrogateExpressions should make sure this doesn't happen
assert searchStats != null : "SearchStats cannot be null";
return maybeSubstituteWithRoundTo(
source(),
field(),
interval(),
searchStats,
binaryComparisons,
(interval, minValue, maxValue) -> createRounding(interval, DEFAULT_TZ, minValue, maxValue)
);
}
Expand All @@ -304,6 +313,7 @@ public static RoundTo maybeSubstituteWithRoundTo(
Expression field,
Expression foldableTimeExpression,
SearchStats searchStats,
List<EsqlBinaryComparison> binaryComparisons,
TriFunction<Object, Long, Long, Rounding.Prepared> roundingFunction
) {
if (field instanceof FieldAttribute fa && fa.field() instanceof MultiTypeEsField == false && isDateTime(fa.dataType())) {
Expand All @@ -312,8 +322,19 @@ public static RoundTo maybeSubstituteWithRoundTo(
FieldAttribute.FieldName fieldName = fa.fieldName();
var min = searchStats.min(fieldName);
var max = searchStats.max(fieldName);
// Extract min/max from query
Tuple<Long, Long> minMaxFromPredicates = minMaxFromPredicates(binaryComparisons);
Long minFromPredicates = minMaxFromPredicates.v1();
Long maxFromPredicates = minMaxFromPredicates.v2();
// Consolidate min/max from SearchStats and query
if (minFromPredicates instanceof Long minValue) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This value here should already be a Long. Why the instanceof?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need a check for null, not instanceof here, I'll change it.

min = min instanceof Long m ? Math.max(m, minValue) : minValue;
}
if (maxFromPredicates instanceof Long maxValue) {
max = max instanceof Long m ? Math.min(m, maxValue) : maxValue;
}
// If min/max is available create rounding with them
if (min instanceof Long minValue && max instanceof Long maxValue && foldableTimeExpression.foldable()) {
if (min instanceof Long minValue && max instanceof Long maxValue && foldableTimeExpression.foldable() && minValue <= maxValue) {
Object foldedInterval = foldableTimeExpression.fold(FoldContext.small() /* TODO remove me */);
Rounding.Prepared rounding = roundingFunction.apply(foldedInterval, minValue, maxValue);
long[] roundingPoints = rounding.fixedRoundingPoints();
Expand All @@ -337,4 +358,31 @@ public static RoundTo maybeSubstituteWithRoundTo(
}
return null;
}

private static Tuple<Long, Long> minMaxFromPredicates(List<EsqlBinaryComparison> binaryComparisons) {
long[] min = new long[] { Long.MIN_VALUE };
long[] max = new long[] { Long.MAX_VALUE };
Holder<Boolean> foundMinValue = new Holder<>(false);
Holder<Boolean> foundMaxValue = new Holder<>(false);
for (EsqlBinaryComparison binaryComparison : binaryComparisons) {
if (binaryComparison.right() instanceof Literal l) {
long value = Long.parseLong(l.value().toString());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we doing here the parsing of a long from String? The dataType of the Literal is not enough?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make sense, I'll refactor this.

if (binaryComparison instanceof Equals) {
return new Tuple<>(value, value);
}
if (binaryComparison instanceof GreaterThan || binaryComparison instanceof GreaterThanOrEqual) {
if (value >= min[0]) {
min[0] = value;
foundMinValue.set(true);
}
} else if (binaryComparison instanceof LessThan || binaryComparison instanceof LessThanOrEqual) {
if (value <= max[0]) {
max[0] = value;
foundMaxValue.set(true);
}
}
}
}
return new Tuple<>(foundMinValue.get() ? min[0] : null, foundMaxValue.get() ? max[0] : null);
}
}
Loading
Loading