Skip to content

Commit c7170e1

Browse files
ncordonjan-elastic
andauthored
[8.18] Propagates filter() to aggregation functions' surrogates (elastic#134461) (elastic#134725)
--------- Co-authored-by: Jan Kuipers <[email protected]> Co-authored-by: Jan Kuipers <[email protected]>
1 parent e3fdb8c commit c7170e1

File tree

11 files changed

+219
-11
lines changed

11 files changed

+219
-11
lines changed

docs/changelog/134461.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 134461
2+
summary: Propagates filter() to aggregation functions' surrogates
3+
area: Aggregations
4+
type: bug
5+
issues:
6+
- 134380

x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3097,3 +3097,162 @@ ROW a = [1,2,3], b = 5
30973097
STD_DEV(a):double | STD_DEV(b):double
30983098
0.816496580927726 | 0.0
30993099
;
3100+
3101+
sumWithConditions
3102+
required_capability: stats_with_filtered_surrogate_fixed
3103+
3104+
FROM employees
3105+
| STATS sum1 = SUM(1),
3106+
sum2 = SUM(1) WHERE emp_no == 10080,
3107+
sum3 = SUM(1) WHERE emp_no < 10080,
3108+
sum4 = SUM(1) WHERE emp_no >= 10080
3109+
;
3110+
3111+
sum1:long | sum2:long | sum3:long | sum4:long
3112+
100 | 1 | 79 | 21
3113+
;
3114+
3115+
weightedAvgWithConditions
3116+
required_capability: stats_with_filtered_surrogate_fixed
3117+
3118+
ROW x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
3119+
| MV_EXPAND x
3120+
| STATS w_avg1 = WEIGHTED_AVG(x, 1) WHERE x == 5,
3121+
w_avg2 = WEIGHTED_AVG(x, x) WHERE x == 5,
3122+
w_avg3 = WEIGHTED_AVG(x, 2) WHERE x <= 5,
3123+
w_avg4 = WEIGHTED_AVG(x, x) WHERE x > 5,
3124+
w_avg5 = WEIGHTED_AVG([1,2,3], 1),
3125+
w_avg6 = WEIGHTED_AVG([1,2,3], 1) WHERE x == 5
3126+
;
3127+
3128+
w_avg1:double | w_avg2:double | w_avg3:double | w_avg4:double | w_avg5:double | w_avg6:double
3129+
5.0 | 5.0 | 3.0 | 8.25 | 2.0 | 2.0
3130+
;
3131+
3132+
maxWithConditions
3133+
required_capability: stats_with_filtered_surrogate_fixed
3134+
3135+
ROW x = [1, 2, 3, 4, 5]
3136+
| MV_EXPAND x
3137+
| STATS max1 = MAX(x),
3138+
max2 = MAX(x) WHERE x > 3
3139+
;
3140+
3141+
max1:integer | max2:integer
3142+
5 | 5
3143+
;
3144+
3145+
minWithConditions
3146+
required_capability: stats_with_filtered_surrogate_fixed
3147+
3148+
ROW x = [1, 2, 3, 4, 5]
3149+
| MV_EXPAND x
3150+
| STATS min1 = MIN(x),
3151+
min2 = MIN(x) WHERE x > 3
3152+
;
3153+
3154+
min1:integer | min2:integer
3155+
1 | 4
3156+
;
3157+
3158+
countWithConditions
3159+
required_capability: stats_with_filtered_surrogate_fixed
3160+
3161+
ROW x = [1, 2, 3, 4, 5]
3162+
| MV_EXPAND x
3163+
| STATS count1 = COUNT(x) WHERE x >= 3,
3164+
count2 = COUNT(x),
3165+
count3 = COUNT(4) WHERE x >= 3,
3166+
count4 = COUNT(*) WHERE x >= 3,
3167+
count5 = COUNT([1,2,3]) WHERE x >= 3,
3168+
count6 = COUNT([1,2,3])
3169+
;
3170+
3171+
count1:long | count2:long | count3:long | count4:long | count5:long | count6:long
3172+
3 | 5 | 3 | 3 | 9 | 15
3173+
;
3174+
3175+
countDistinctWithConditions
3176+
required_capability: stats_with_filtered_surrogate_fixed
3177+
3178+
ROW x = [1, 2, 3, 4, 5]
3179+
| MV_EXPAND x
3180+
| STATS count1 = COUNT_DISTINCT(x) WHERE x <= 3,
3181+
count2 = COUNT_DISTINCT(x),
3182+
count3 = COUNT_DISTINCT(1) WHERE x <= 3,
3183+
count4 = COUNT_DISTINCT(1)
3184+
;
3185+
3186+
count1:long | count2:long | count3:long | count4:long
3187+
3 | 5 | 1 | 1
3188+
;
3189+
3190+
avgWithConditions
3191+
required_capability: stats_with_filtered_surrogate_fixed
3192+
3193+
ROW x = [1, 2, 3, 4, 5]
3194+
| MV_EXPAND x
3195+
| STATS avg1 = AVG(x) WHERE x <= 3,
3196+
avg2 = AVG(x)
3197+
;
3198+
3199+
avg1:double | avg2:double
3200+
2.0 | 3.0
3201+
;
3202+
3203+
percentileWithConditions
3204+
required_capability: stats_with_filtered_surrogate_fixed
3205+
3206+
ROW x = [1, 2, 3, 4, 5]
3207+
| MV_EXPAND x
3208+
| STATS percentile1 = PERCENTILE(x, 50) WHERE x <= 3,
3209+
percentile2 = PERCENTILE(x, 50)
3210+
;
3211+
3212+
percentile1:double | percentile2:double
3213+
2.0 | 3.0
3214+
;
3215+
3216+
medianWithConditions
3217+
required_capability: stats_with_filtered_surrogate_fixed
3218+
3219+
ROW x = [1, 2, 3, 4, 5]
3220+
| MV_EXPAND x
3221+
| STATS median1 = MEDIAN(x) WHERE x <= 3,
3222+
median2 = MEDIAN(x),
3223+
median3 = MEDIAN([5,6,7,8,9]) WHERE x <= 3,
3224+
median4 = MEDIAN([5,6,7,8,9])
3225+
;
3226+
3227+
median1:double | median2:double | median3:double | median4:double
3228+
2.0 | 3.0 | 7.0 | 7.0
3229+
;
3230+
3231+
medianAbsoluteDeviationWithConditions
3232+
required_capability: stats_with_filtered_surrogate_fixed
3233+
3234+
ROW x = [1, 3, 4, 7, 11, 18]
3235+
| MV_EXPAND x
3236+
| STATS median_dev1 = MEDIAN_ABSOLUTE_DEVIATION(x) WHERE x <= 3,
3237+
median_dev2 = MEDIAN_ABSOLUTE_DEVIATION(x),
3238+
median_dev3 = MEDIAN_ABSOLUTE_DEVIATION([3, 11, 14, 25]) WHERE x <= 3,
3239+
median_dev4 = MEDIAN_ABSOLUTE_DEVIATION([3, 11, 14, 25])
3240+
;
3241+
3242+
median_dev1:double | median_dev2:double | median_dev3:double | median_dev4:double
3243+
1.0 | 3.5 | 5.5 | 5.5
3244+
;
3245+
3246+
topWithConditions
3247+
required_capability: stats_with_filtered_surrogate_fixed
3248+
3249+
FROM employees
3250+
| STATS min1 = TOP(emp_no, 1, "ASC") WHERE emp_no > 10010,
3251+
min2 = TOP(emp_no, 2, "ASC") WHERE emp_no > 10010,
3252+
max1 = TOP(emp_no, 1, "DESC") WHERE emp_no < 10080,
3253+
max2 = TOP(emp_no, 2, "DESC") WHERE emp_no < 10080
3254+
;
3255+
3256+
min1:integer | min2:integer | max1:integer | max2:integer
3257+
10011 | [10011, 10012] | 10079 | [10079, 10078]
3258+
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -766,7 +766,15 @@ public enum Cap {
766766
* Support for the mv_expand target attribute should be retained in its original position.
767767
* see <a href="https://github.com/elastic/elasticsearch/issues/129000"> ES|QL: inconsistent column order #129000 </a>
768768
*/
769-
FIX_MV_EXPAND_INCONSISTENT_COLUMN_ORDER;
769+
FIX_MV_EXPAND_INCONSISTENT_COLUMN_ORDER,
770+
771+
/**
772+
* Bugfix for STATS {{expression}} WHERE {{condition}} when the
773+
* expression is replaced by something else on planning
774+
* e.g. STATS SUM(1) WHERE x==3 is replaced by
775+
* STATS MV_SUM(const)*COUNT(*) WHERE x == 3.
776+
*/
777+
STATS_WITH_FILTERED_SURROGATE_FIXED;
770778

771779
private final boolean enabled;
772780

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Count.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,11 @@ public Expression surrogate() {
145145
var s = source();
146146
var field = field();
147147
if (field.dataType() == DataType.AGGREGATE_METRIC_DOUBLE) {
148-
return new Sum(s, FromAggregateMetricDouble.withMetric(source(), field, AggregateMetricDoubleBlockBuilder.Metric.COUNT));
148+
return new Sum(
149+
s,
150+
FromAggregateMetricDouble.withMetric(source(), field, AggregateMetricDoubleBlockBuilder.Metric.COUNT),
151+
filter()
152+
);
149153
}
150154

151155
if (field.foldable()) {
@@ -162,7 +166,7 @@ public Expression surrogate() {
162166
return new Mul(
163167
s,
164168
new Coalesce(s, new MvCount(s, field), List.of(new Literal(s, 0, DataType.INTEGER))),
165-
new Count(s, new Literal(s, StringUtils.WILDCARD, DataType.KEYWORD))
169+
new Count(s, new Literal(s, StringUtils.WILDCARD, DataType.KEYWORD), filter())
166170
);
167171
}
168172

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Max.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,11 @@ public final AggregatorFunctionSupplier supplier(List<Integer> inputChannels) {
152152
@Override
153153
public Expression surrogate() {
154154
if (field().dataType() == DataType.AGGREGATE_METRIC_DOUBLE) {
155-
return new Max(source(), FromAggregateMetricDouble.withMetric(source(), field(), AggregateMetricDoubleBlockBuilder.Metric.MAX));
155+
return new Max(
156+
source(),
157+
FromAggregateMetricDouble.withMetric(source(), field(), AggregateMetricDoubleBlockBuilder.Metric.MAX),
158+
filter()
159+
);
156160
}
157161
return field().foldable() ? new MvMax(source(), field()) : null;
158162
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Median.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,6 @@ public Expression surrogate() {
110110

111111
return field.foldable()
112112
? new MvMedian(s, new ToDouble(s, field))
113-
: new Percentile(source(), field(), new Literal(source(), (int) QuantileStates.MEDIAN, DataType.INTEGER));
113+
: new Percentile(source(), field(), filter(), new Literal(source(), (int) QuantileStates.MEDIAN, DataType.INTEGER));
114114
}
115115
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Min.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,11 @@ public final AggregatorFunctionSupplier supplier(List<Integer> inputChannels) {
152152
@Override
153153
public Expression surrogate() {
154154
if (field().dataType() == DataType.AGGREGATE_METRIC_DOUBLE) {
155-
return new Min(source(), FromAggregateMetricDouble.withMetric(source(), field(), AggregateMetricDoubleBlockBuilder.Metric.MIN));
155+
return new Min(
156+
source(),
157+
FromAggregateMetricDouble.withMetric(source(), field(), AggregateMetricDoubleBlockBuilder.Metric.MIN),
158+
filter()
159+
);
156160
}
157161
return field().foldable() ? new MvMin(source(), field()) : null;
158162
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Sum.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ public Expression surrogate() {
143143

144144
// SUM(const) is equivalent to MV_SUM(const)*COUNT(*).
145145
return field.foldable()
146-
? new Mul(s, new MvSum(s, field), new Count(s, new Literal(s, StringUtils.WILDCARD, DataType.KEYWORD)))
146+
? new Mul(s, new MvSum(s, field), new Count(s, new Literal(s, StringUtils.WILDCARD, DataType.KEYWORD), filter()))
147147
: null;
148148
}
149149
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Top.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,9 +217,9 @@ public Expression surrogate() {
217217

218218
if (limitValue() == 1) {
219219
if (orderValue()) {
220-
return new Min(s, field());
220+
return new Min(s, field(), filter());
221221
} else {
222-
return new Max(s, field());
222+
return new Max(s, field(), filter());
223223
}
224224
}
225225

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/WeightedAvg.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,9 @@ public Expression surrogate() {
159159
return new MvAvg(s, field);
160160
}
161161
if (weight.foldable()) {
162-
return new Div(s, new Sum(s, field), new Count(s, field), dataType());
162+
return new Div(s, new Sum(s, field, filter()), new Count(s, field, filter()), dataType());
163163
} else {
164-
return new Div(s, new Sum(s, new Mul(s, field, weight)), new Sum(s, weight), dataType());
164+
return new Div(s, new Sum(s, new Mul(s, field, weight), filter()), new Sum(s, weight, filter()), dataType());
165165
}
166166
}
167167

0 commit comments

Comments
 (0)