Skip to content
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
b677c8f
consolidate min/max in SearchStats and substitue date_trunc with roun…
fang-xing-esql May 29, 2025
08c3d18
Update docs/changelog/128639.yaml
fang-xing-esql May 29, 2025
3f96c15
Merge branch 'main' into date-trunc-search-stats
fang-xing-esql May 29, 2025
c3ffa8f
skip multi-typed fields with mixed date and date_nanos
fang-xing-esql May 29, 2025
f78fa18
Merge branch 'main' into date-trunc-search-stats
fang-xing-esql May 29, 2025
88dbdf8
support bucket SubstituteSurrogateExpressionsWithSearchStats
fang-xing-esql Jun 3, 2025
511faeb
Merge branch 'main' into date-trunc-search-stats
fang-xing-esql Jun 3, 2025
3d28f88
Merge branch 'main' into date-trunc-search-stats
fang-xing-esql Jun 3, 2025
3f1cd81
Merge branch 'main' into date-trunc-search-stats
fang-xing-esql Jun 9, 2025
e59ea5a
add tests for min/max in SearchContextStats, disallow substitution fo…
fang-xing-esql Jun 10, 2025
2f1aef9
add tests for min/max in SearchContextStats, disallow substitution fo…
fang-xing-esql Jun 10, 2025
debd20e
Merge branch 'main' into date-trunc-search-stats
fang-xing-esql Jun 10, 2025
5d6d9c9
Merge branch 'main' into date-trunc-search-stats
fang-xing-esql Jun 13, 2025
5a152ab
Merge branch 'main' into date-trunc-search-stats
fang-xing-esql Jun 23, 2025
7b669b8
fix test failures
fang-xing-esql Jun 23, 2025
54567d4
safe guard prepare with min max
fang-xing-esql Jun 24, 2025
3208b5f
Merge branch 'main' into date-trunc-search-stats
fang-xing-esql Jun 24, 2025
ee24a7e
Merge branch 'main' into date-trunc-search-stats
fang-xing-esql Jun 24, 2025
a76362f
Merge branch 'main' into date-trunc-search-stats
fang-xing-esql Jun 27, 2025
57dfe15
add trace and clean up
fang-xing-esql Jun 27, 2025
d42ea8e
Merge branch 'main' into date-trunc-search-stats
fang-xing-esql Jul 1, 2025
89039cd
Merge branch 'main' into date-trunc-search-stats
fang-xing-esql Jul 3, 2025
1d453f3
Merge branch 'main' into date-trunc-search-stats
fang-xing-esql Jul 10, 2025
4e82da7
refactor according to review comments
fang-xing-esql Jul 11, 2025
a2b80be
Merge branch 'main' into date-trunc-search-stats
fang-xing-esql Jul 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/128639.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 128639
summary: Substitue `date_trunc` with `round_to` when the pre-calculated rounding points
are available
area: ES|QL
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -289,12 +289,12 @@ public long count(FieldName field, BytesRef value) {
}

@Override
public byte[] min(FieldName field, DataType dataType) {
public Object min(FieldName field) {
return null;
}

@Override
public byte[] max(FieldName field, DataType dataType) {
public Object max(FieldName field) {
return null;
}

Expand Down Expand Up @@ -380,6 +380,27 @@ public String toString() {
}
}

public static class TestSearchStatsWithMinMax extends TestSearchStats {

private final Map<String, Object> minValues;
private final Map<String, Object> maxValues;

public TestSearchStatsWithMinMax(Map<String, Object> minValues, Map<String, Object> maxValues) {
this.minValues = minValues;
this.maxValues = maxValues;
}

@Override
public Object min(FieldName field) {
return minValues.get(field.string());
}

@Override
public Object max(FieldName field) {
return maxValues.get(field.string());
}
}

public static final TestSearchStats TEST_SEARCH_STATS = new TestSearchStats();

private static final Map<String, Map<String, Column>> TABLES = tables();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction;
import org.elasticsearch.xpack.esql.stats.SearchStats;

/**
* Interface signaling to the planner that the declaring expression
Expand All @@ -28,4 +29,8 @@ public interface SurrogateExpression {
* be replaced.
*/
Expression surrogate();

default Expression surrogate(SearchStats searchStats) {
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,33 +14,42 @@
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.logging.LogManager;
import org.elasticsearch.logging.Logger;
import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException;
import org.elasticsearch.xpack.esql.capabilities.PostOptimizationVerificationAware;
import org.elasticsearch.xpack.esql.common.Failures;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
import org.elasticsearch.xpack.esql.core.expression.Foldables;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField;
import org.elasticsearch.xpack.esql.expression.SurrogateExpression;
import org.elasticsearch.xpack.esql.expression.function.Example;
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
import org.elasticsearch.xpack.esql.expression.function.FunctionType;
import org.elasticsearch.xpack.esql.expression.function.Param;
import org.elasticsearch.xpack.esql.expression.function.TwoOptionalArguments;
import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateTrunc;
import org.elasticsearch.xpack.esql.expression.function.scalar.math.Floor;
import org.elasticsearch.xpack.esql.expression.function.scalar.math.RoundTo;
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Div;
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Mul;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
import org.elasticsearch.xpack.esql.stats.SearchStats;

import java.io.IOException;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST;
Expand All @@ -49,8 +58,10 @@
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNumeric;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType;
import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime;
import static org.elasticsearch.xpack.esql.expression.Validations.isFoldable;
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.dateTimeToLong;
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.dateWithTypeToString;

/**
* Splits dates and numbers into a given number of buckets. There are two ways to invoke
Expand All @@ -61,9 +72,12 @@
public class Bucket extends GroupingFunction.EvaluatableGroupingFunction
implements
PostOptimizationVerificationAware,
TwoOptionalArguments {
TwoOptionalArguments,
SurrogateExpression {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Bucket", Bucket::new);

private static final Logger logger = LogManager.getLogger(Bucket.class);

// TODO maybe we should just cover the whole of representable dates here - like ten years, 100 years, 1000 years, all the way up.
// That way you never end up with more than the target number of buckets.
private static final Rounding LARGEST_HUMAN_DATE_ROUNDING = Rounding.builder(Rounding.DateTimeUnit.YEAR_OF_CENTURY).build();
Expand Down Expand Up @@ -301,15 +315,22 @@ public Rounding.Prepared getDateRoundingOrNull(FoldContext foldCtx) {
}

private Rounding.Prepared getDateRounding(FoldContext foldContext) {
return getDateRounding(foldContext, null, null);
}

private Rounding.Prepared getDateRounding(FoldContext foldContext, Long min, Long max) {
assert field.dataType() == DataType.DATETIME || field.dataType() == DataType.DATE_NANOS : "expected date type; got " + field;
if (buckets.dataType().isWholeNumber()) {
int b = ((Number) buckets.fold(foldContext)).intValue();
long f = foldToLong(foldContext, from);
long t = foldToLong(foldContext, to);
if (min != null && max != null) {
return new DateRoundingPicker(b, f, t).pickRounding().prepare(min, max);
}
return new DateRoundingPicker(b, f, t).pickRounding().prepareForUnknown();
} else {
assert DataType.isTemporalAmount(buckets.dataType()) : "Unexpected span data type [" + buckets.dataType() + "]";
return DateTrunc.createRounding(buckets.fold(foldContext), DEFAULT_TZ);
return DateTrunc.createRounding(buckets.fold(foldContext), DEFAULT_TZ, min, max);
}
}

Expand Down Expand Up @@ -488,4 +509,42 @@ public Expression to() {
public String toString() {
return "Bucket{" + "field=" + field + ", buckets=" + buckets + ", from=" + from + ", to=" + to + '}';
}

@Override
public Expression surrogate() {
return null;
}

@Override
public Expression surrogate(SearchStats searchStats) {
if (field() instanceof FieldAttribute fa && fa.field() instanceof MultiTypeEsField == false && isDateTime(fa.dataType())) {
// Extract min/max from SearchStats
DataType fieldType = fa.dataType();
FieldAttribute.FieldName fieldName = fa.fieldName();
var min = searchStats.min(fieldName);
var max = searchStats.max(fieldName);
// If min/max is available create rounding with them
if (min instanceof Long minValue && max instanceof Long maxValue && buckets().foldable()) {
Rounding.Prepared rounding = getDateRounding(FoldContext.small(), minValue, maxValue);
long[] roundingPoints = rounding.fixedRoundingPoints();
if (roundingPoints == null) {
logger.trace(
"Fixed rounding point is null for field {}, minValue {} in string format {} and maxValue {} in string format {}",
fieldName,
minValue,
dateWithTypeToString(minValue, fieldType),
maxValue,
dateWithTypeToString(maxValue, fieldType)
);
return null;
}
// Convert to round_to function with the roundings
List<Expression> points = Arrays.stream(roundingPoints)
.mapToObj(l -> new Literal(Source.EMPTY, l, fieldType))
.collect(Collectors.toList());
return new RoundTo(source(), field(), points);
}
}
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,38 +16,53 @@
import org.elasticsearch.compute.ann.Fixed;
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.logging.LogManager;
import org.elasticsearch.logging.Logger;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField;
import org.elasticsearch.xpack.esql.expression.SurrogateExpression;
import org.elasticsearch.xpack.esql.expression.function.Example;
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
import org.elasticsearch.xpack.esql.expression.function.Param;
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
import org.elasticsearch.xpack.esql.expression.function.scalar.math.RoundTo;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
import org.elasticsearch.xpack.esql.stats.SearchStats;

import java.io.IOException;
import java.time.Duration;
import java.time.Period;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType;
import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME;
import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_NANOS;
import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime;
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.dateWithTypeToString;

public class DateTrunc extends EsqlScalarFunction {
public class DateTrunc extends EsqlScalarFunction implements SurrogateExpression {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
Expression.class,
"DateTrunc",
DateTrunc::new
);

private static final Logger logger = LogManager.getLogger(DateTrunc.class);

@FunctionalInterface
public interface DateTruncFactoryProvider {
ExpressionEvaluator.Factory apply(Source source, ExpressionEvaluator.Factory lhs, Rounding.Prepared rounding);
Expand Down Expand Up @@ -163,14 +178,23 @@ static Rounding.Prepared createRounding(final Object interval) {

public static Rounding.Prepared createRounding(final Object interval, final ZoneId timeZone) {
if (interval instanceof Period period) {
return createRounding(period, timeZone);
return createRounding(period, timeZone, null, null);
} else if (interval instanceof Duration duration) {
return createRounding(duration, timeZone);
return createRounding(duration, timeZone, null, null);
}
throw new IllegalArgumentException("Time interval is not supported");
}

private static Rounding.Prepared createRounding(final Period period, final ZoneId timeZone) {
public static Rounding.Prepared createRounding(final Object interval, final ZoneId timeZone, Long min, Long max) {
if (interval instanceof Period period) {
return createRounding(period, timeZone, min, max);
} else if (interval instanceof Duration duration) {
return createRounding(duration, timeZone, min, max);
}
throw new IllegalArgumentException("Time interval is not supported");
}

private static Rounding.Prepared createRounding(final Period period, final ZoneId timeZone, Long min, Long max) {
// Zero or negative intervals are not supported
if (period == null || period.isNegative() || period.isZero()) {
throw new IllegalArgumentException("Zero or negative time interval is not supported");
Expand All @@ -182,6 +206,7 @@ private static Rounding.Prepared createRounding(final Period period, final ZoneI
}

final Rounding.Builder rounding;
boolean tryPrepareWithMinMax = true;
if (period.getDays() == 1) {
rounding = new Rounding.Builder(Rounding.DateTimeUnit.DAY_OF_MONTH);
} else if (period.getDays() == 7) {
Expand All @@ -190,6 +215,7 @@ private static Rounding.Prepared createRounding(final Period period, final ZoneI
rounding = new Rounding.Builder(Rounding.DateTimeUnit.WEEK_OF_WEEKYEAR);
} else if (period.getDays() > 1) {
rounding = new Rounding.Builder(new TimeValue(period.getDays(), TimeUnit.DAYS));
tryPrepareWithMinMax = false;
} else if (period.getMonths() == 3) {
// java.time.Period does not have a QUARTERLY period, so a period of 3 months
// returns a quarterly rounding
Expand All @@ -198,26 +224,36 @@ private static Rounding.Prepared createRounding(final Period period, final ZoneI
rounding = new Rounding.Builder(Rounding.DateTimeUnit.MONTH_OF_YEAR);
} else if (period.getMonths() > 0) {
rounding = new Rounding.Builder(Rounding.DateTimeUnit.MONTHS_OF_YEAR, period.getMonths());
tryPrepareWithMinMax = false;
} else if (period.getYears() == 1) {
rounding = new Rounding.Builder(Rounding.DateTimeUnit.YEAR_OF_CENTURY);
} else if (period.getYears() > 0) {
rounding = new Rounding.Builder(Rounding.DateTimeUnit.YEARS_OF_CENTURY, period.getYears());
tryPrepareWithMinMax = false;
} else {
throw new IllegalArgumentException("Time interval is not supported");
}

rounding.timeZone(timeZone);
if (min != null && max != null && tryPrepareWithMinMax) {
// Multiple quantities calendar interval - day/week/month/quarter/year is not supported by PreparedRounding.maybeUseArray,
// which is called by prepare(min, max), as it may hit an assert. Call prepare(min, max) only for single calendar interval.
return rounding.build().prepare(min, max);
}
return rounding.build().prepareForUnknown();
}

private static Rounding.Prepared createRounding(final Duration duration, final ZoneId timeZone) {
private static Rounding.Prepared createRounding(final Duration duration, final ZoneId timeZone, Long min, Long max) {
// Zero or negative intervals are not supported
if (duration == null || duration.isNegative() || duration.isZero()) {
throw new IllegalArgumentException("Zero or negative time interval is not supported");
}

final Rounding.Builder rounding = new Rounding.Builder(TimeValue.timeValueMillis(duration.toMillis()));
rounding.timeZone(timeZone);
if (min != null && max != null) {
return rounding.build().prepare(min, max);
}
return rounding.build().prepareForUnknown();
}

Expand Down Expand Up @@ -249,4 +285,43 @@ public static ExpressionEvaluator.Factory evaluator(
) {
return evaluatorMap.get(forType).apply(source, fieldEvaluator, rounding);
}

@Override
public Expression surrogate() { // there is no substitute without SearchStats
return null;
}

@Override
public Expression surrogate(SearchStats searchStats) {
if (field() instanceof FieldAttribute fa && fa.field() instanceof MultiTypeEsField == false && isDateTime(fa.dataType())) {
// Extract min/max from SearchStats
DataType fieldType = fa.dataType();
FieldAttribute.FieldName fieldName = fa.fieldName();
var min = searchStats.min(fieldName);
var max = searchStats.max(fieldName);
// If min/max is available create rounding with them
if (min instanceof Long minValue && max instanceof Long maxValue && interval().foldable()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

min and max here can be null? Can they be something else other than Long? I am assuming yes, and if so why here it only matters if it's long?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, min and max can be null if we don't find statistics of the fields from Lucene. And in this PR, they can only be Long, because:

  • SearchStats only consolidates min and max for date fields, and they are Long in Lucene.
  • We only do the substitution for date fields in DateTrunc and Bucket here.

Object foldedInterval = interval().fold(FoldContext.small() /* TODO remove me */);
Rounding.Prepared rounding = createRounding(foldedInterval, DEFAULT_TZ, minValue, maxValue);
long[] roundingPoints = rounding.fixedRoundingPoints();
if (roundingPoints == null) {
logger.trace(
"Fixed rounding point is null for field {}, minValue {} in string format {} and maxValue {} in string format {}",
fieldName,
minValue,
dateWithTypeToString(minValue, fieldType),
maxValue,
dateWithTypeToString(maxValue, fieldType)
);
return null;
}
// Convert to round_to function with the roundings
List<Expression> points = Arrays.stream(roundingPoints)
.mapToObj(l -> new Literal(Source.EMPTY, l, fieldType))
.collect(Collectors.toList());
return new RoundTo(source(), field(), points);
}
}
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.LocalPropagateEmptyRelation;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceFieldWithConstantOrNull;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceTopNWithLimitAndSort;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.SubstituteSurrogateExpressionsWithSearchStats;
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
import org.elasticsearch.xpack.esql.rule.ParameterizedRuleExecutor;
import org.elasticsearch.xpack.esql.rule.Rule;
Expand Down Expand Up @@ -42,7 +43,8 @@ public class LocalLogicalPlanOptimizer extends ParameterizedRuleExecutor<Logical
new ReplaceTopNWithLimitAndSort(),
new ReplaceFieldWithConstantOrNull(),
new InferIsNotNull(),
new InferNonNullAggConstraint()
new InferNonNullAggConstraint(),
new SubstituteSurrogateExpressionsWithSearchStats()
),
localOperators(),
cleanup()
Expand Down
Loading
Loading