Skip to content

Commit ffbf05c

Browse files
authored
ESQL: Fix columns ordering when pruning an INLINE STATS (#136827)
This fixes the incorrect ordering of the columns that can occur when INLINE STATS is pruned entirely, due to its calculated value being discarded (overshadowed). Fixes #136797
1 parent 7e52280 commit ffbf05c

File tree

6 files changed

+173
-24
lines changed

6 files changed

+173
-24
lines changed

docs/changelog/136827.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 136827
2+
summary: Fix columns ordering when pruning an INLINE STATS
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 136797

x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ public abstract class GenerativeRestTest extends ESRestTestCase implements Query
7171
"cannot be cast to class", // https://github.com/elastic/elasticsearch/issues/133992
7272
"can't find input for", // https://github.com/elastic/elasticsearch/issues/136596
7373
"unexpected byte", // https://github.com/elastic/elasticsearch/issues/136598
74-
"Output has changed from", // https://github.com/elastic/elasticsearch/issues/136797
7574
"out of bounds for length", // https://github.com/elastic/elasticsearch/issues/136851
7675

7776
// Awaiting fixes for correctness

x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1662,6 +1662,111 @@ ROW salary = 12300, emp_no = 5, gender = "F"
16621662
emp_no:integer
16631663
5
16641664
;
1665+
1666+
inlineStatsWithShadowedOutput1
1667+
required_capability: inline_stats_prune_column_fix
1668+
1669+
ROW a = null, b = 0, c = "x"
1670+
| INLINE STATS a = MAX(c) BY b
1671+
| EVAL a = c
1672+
;
1673+
1674+
c:keyword |b:integer |a:keyword
1675+
x |0 |x
1676+
;
1677+
1678+
inlineStatsWithShadowedOutput2
1679+
required_capability: inline_stats_prune_column_fix
1680+
1681+
ROW a = null, b = 0, c = "x", d = 1
1682+
| INLINE STATS a = MAX(c), d = MIN(c) by b
1683+
| EVAL a = c, d = 2
1684+
;
1685+
1686+
c:keyword |b:integer |a:keyword |d:integer
1687+
x |0 |x |2
1688+
;
1689+
1690+
inlineStatsWithShadowedOutput3
1691+
required_capability: inline_stats_prune_column_fix
1692+
1693+
FROM employees
1694+
| KEEP emp_no, salary, birth_date
1695+
| EVAL orig_emp_no = emp_no
1696+
| INLINE STATS emp_no = MAX(birth_date) BY salary
1697+
| EVAL emp_no = birth_date
1698+
| SORT orig_emp_no
1699+
| LIMIT 3
1700+
;
1701+
1702+
birth_date:date |orig_emp_no:integer|salary:integer |emp_no:date
1703+
1953-09-02T00:00:00.000Z|10001 |57305 |1953-09-02T00:00:00.000Z
1704+
1964-06-02T00:00:00.000Z|10002 |56371 |1964-06-02T00:00:00.000Z
1705+
1959-12-03T00:00:00.000Z|10003 |61805 |1959-12-03T00:00:00.000Z
1706+
;
1707+
1708+
inlineStatsWithShadowedOutput4
1709+
required_capability: inline_stats_prune_column_fix
1710+
1711+
ROW a = null, b = 0, c = "x", d = 1
1712+
| INLINE STATS a = MAX(c), d = MIN(c), e = AVG(b) by b
1713+
| EVAL a = c, d = 2, e = a
1714+
;
1715+
1716+
c:keyword |b:integer |a:keyword |d:integer |e:keyword
1717+
x |0 |x |2 |x
1718+
;
1719+
1720+
inlineStatsWithShadowedOutput5
1721+
required_capability: inline_stats_prune_column_fix
1722+
1723+
ROW a = null, b = 0, c = "x", d = 1
1724+
| INLINE STATS a = MAX(c), d = MIN(c), e = AVG(b), a = MIN(b) by b
1725+
| EVAL a = c, d = 2, e = a
1726+
;
1727+
warning:Line 2:16: Field 'a' shadowed by field at line 2:52
1728+
1729+
c:keyword |b:integer |a:keyword |d:integer |e:keyword
1730+
x |0 |x |2 |x
1731+
;
1732+
1733+
inlineStatsWithShadowedOutput6
1734+
required_capability: inline_stats_prune_column_fix
1735+
1736+
ROW a = null, b = 0, c = "x", d = 1
1737+
| INLINE STATS a = MAX(c), d = MIN(c), e = AVG(b), a = MIN(b), f = MAX(b) by b
1738+
| EVAL a = c, d = 2, e = a, f = d
1739+
;
1740+
warning:Line 2:16: Field 'a' shadowed by field at line 2:52
1741+
1742+
c:keyword |b:integer |a:keyword |d:integer |e:keyword |f:integer
1743+
x |0 |x |2 |x |2
1744+
;
1745+
1746+
tripleInlineStatsMultipleAssignmentsGetsPrunedPartially
1747+
required_capability: inline_stats_prune_column_fix
1748+
1749+
FROM employees
1750+
// | SORT languages DESC
1751+
| EVAL salaryK = salary / 10000
1752+
| INLINE STATS count = COUNT(*) BY salaryK
1753+
| EVAL hire_date_string = hire_date::keyword
1754+
| INLINE STATS sum = SUM(languages) BY hire_date_string
1755+
| DISSECT hire_date_string "%{date}"
1756+
| INLINE STATS min = MIN(MV_COUNT(languages)) BY salaryK
1757+
| SORT emp_no
1758+
| KEEP emp_no, salaryK, count, min
1759+
| LIMIT 5
1760+
;
1761+
1762+
emp_no:integer |salaryK:integer|count:long |min:integer
1763+
10001 |5 |17 |1
1764+
10002 |5 |17 |1
1765+
10003 |6 |17 |1
1766+
10004 |3 |27 |1
1767+
10005 |6 |17 |1
1768+
;
1769+
16651770
///////////////////////////
16661771
// INLINE STATS with filters
16671772
///////////////////////////

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1531,6 +1531,11 @@ public enum Cap {
15311531
*/
15321532
GROK_MULTI_PATTERN,
15331533

1534+
/**
1535+
* Fix pruning of columns when shadowed in INLINE STATS
1536+
*/
1537+
INLINE_STATS_PRUNE_COLUMN_FIX(INLINESTATS.enabled),
1538+
15341539
/**
15351540
* Fix double release in inline stats when LocalRelation is reused
15361541
*/

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneColumns.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ private static LogicalPlan pruneColumnsInAggregate(Aggregate aggregate, Attribut
123123
} else {
124124
// not expecting high groups cardinality, nested loops in lists should be fine, no need for a HashSet
125125
if (inlineJoin && aggregate.groupings().containsAll(remaining)) {
126-
// An INLINEJOIN right-hand side aggregation output had everything pruned, except for (some of the) groupings, which are
126+
// An InlineJoin right-hand side aggregation output had everything pruned, except for (some of the) groupings, which are
127127
// already part of the IJ output (from the left-hand side): the agg can just be dropped entirely.
128128
p = emptyLocalRelation(aggregate);
129129
} else { // not an INLINEJOIN or there are actually aggregates to compute
@@ -140,7 +140,12 @@ private static LogicalPlan pruneColumnsInInlineJoinRight(InlineJoin ij, Attribut
140140
used.addAll(ij.references());
141141
var right = pruneColumns(ij.right(), used, true);
142142
if (right.output().isEmpty() || isLocalEmptyRelation(right)) {
143-
p = ij.left();
143+
// InlineJoin updates the order of the output, so even if the computation is dropped, the groups need to be pulled to the end.
144+
// So we keep just the left side of the join (i.e. drop the computations), but place a Project on top to keep the right order.
145+
List<Attribute> newOutput = new ArrayList<>(ij.output());
146+
AttributeSet leftOutputSet = ij.left().outputSet();
147+
newOutput.removeIf(attr -> leftOutputSet.contains(attr) == false);
148+
p = new Project(ij.source(), ij.left(), newOutput);
144149
recheck.set(true);
145150
} else if (right != ij.right()) {
146151
// if the right side has been updated, replace it

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java

Lines changed: 50 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5955,7 +5955,7 @@ public void testDoubleInlineStatsWithEvalGetsPrunedEntirely() {
59555955
}
59565956
var plan = optimizedPlan(query);
59575957

5958-
var project = as(plan, EsqlProject.class);
5958+
var project = as(plan, Project.class);
59595959
assertThat(Expressions.names(project.projections()), is(List.of("emp_no", "count")));
59605960
var topN = as(project.child(), TopN.class);
59615961
assertThat(topN.order().size(), is(1));
@@ -6070,22 +6070,26 @@ public void testTripleInlineStatsGetsPrunedPartially() {
60706070
}
60716071

60726072
/*
6073-
* EsqlProject[[emp_no{f}#864, salaryK{r}#836, count{r}#838, min{r}#852]]
6074-
* \_TopN[[Order[emp_no{f}#864,ASC,LAST]],5[INTEGER]]
6075-
* \_InlineJoin[LEFT,[salaryK{r}#836],[salaryK{r}#836],[salaryK{r}#836]]
6076-
* |_Dissect[hire_date_string{r}#842,Parser[pattern=%{date}, appendSeparator=,
6077-
* parser=org.elasticsearch.dissect.DissectParser@27d57d5e],[date{r}#847]] <-- TODO: Dissect & Eval could/should be dropped
6078-
* | \_Eval[[TOSTRING(hire_date{f}#865) AS hire_date_string#842]]
6079-
* | \_InlineJoin[LEFT,[salaryK{r}#836],[salaryK{r}#836],[salaryK{r}#836]]
6080-
* | |_Eval[[salary{f}#866 / 10000[INTEGER] AS salaryK#836]]
6081-
* | | \_EsRelation[employees][emp_no{f}#864, hire_date{f}#865, languages{f}#860, ..]
6082-
* | \_Aggregate[[salaryK{r}#836],[COUNT(*[KEYWORD],true[BOOLEAN]) AS count#838, salaryK{r}#836]]
6083-
* | \_StubRelation[[emp_no{f}#864, hire_date{f}#865, languages{f}#860, languages.byte{f}#861, languages.long{f}#863,
6084-
* languages.short{f}#862, salary{f}#866, salaryK{r}#836]]
6085-
* \_Aggregate[[salaryK{r}#836],[MIN($$MV_COUNT(langua>$MIN$0{r$}#867,true[BOOLEAN]) AS min#852, salaryK{r}#836]]
6086-
* \_Eval[[MVCOUNT(languages{f}#860) AS $$MV_COUNT(langua>$MIN$0#867]]
6087-
* \_StubRelation[[emp_no{f}#864, hire_date{f}#865, languages{f}#860, languages.byte{f}#861, languages.long{f}#863,
6088-
* languages.short{f}#862, salary{f}#866, count{r}#838, salaryK{r}#836, sum{r}#845, hire_date_string{r}#842, date{r}#847]]
6073+
* EsqlProject[[emp_no{f}#26, salaryK{r}#4, count{r}#6, min{r}#19]]
6074+
* \_TopN[[Order[emp_no{f}#26,ASC,LAST]],5[INTEGER]]
6075+
* \_InlineJoin[LEFT,[salaryK{r}#4],[salaryK{r}#4]]
6076+
* |_EsqlProject[[_meta_field{f}#32, emp_no{f}#26, first_name{f}#27, gender{f}#28, hire_date{f}#33, job{f}#34, job.raw{f}#35,
6077+
* languages{f}#29, last_name{f}#30, long_noidx{f}#36, salary{f}#31, count{r}#6, salaryK{r}#4, hire_date_string{r}#10,
6078+
* date{r}#15]]
6079+
* | \_Dissect[hire_date_string{r}#10,Parser[pattern=%{date}, appendSeparator=,
6080+
* parser=org.elasticsearch.dissect.DissectParser@77d1afc3],[date{r}#15]] <-- TODO: Dissect & Eval could/should be dropped
6081+
* | \_Eval[[TOSTRING(hire_date{f}#33) AS hire_date_string#10]]
6082+
* | \_InlineJoin[LEFT,[salaryK{r}#4],[salaryK{r}#4]]
6083+
* | |_Eval[[salary{f}#31 / 10000[INTEGER] AS salaryK#4]]
6084+
* | | \_EsRelation[test][_meta_field{f}#32, emp_no{f}#26, first_name{f}#27, ..]
6085+
* | \_Aggregate[[salaryK{r}#4],[COUNT(*[KEYWORD],true[BOOLEAN]) AS count#6, salaryK{r}#4]]
6086+
* | \_StubRelation[[_meta_field{f}#32, emp_no{f}#26, first_name{f}#27, gender{f}#28, hire_date{f}#33, job{f}#34,
6087+
* job.raw{f}#35, languages{f}#29, last_name{f}#30, long_noidx{f}#36, salary{f}#31, salaryK{r}#4]]
6088+
* \_Aggregate[[salaryK{r}#4],[MIN($$MV_COUNT(langua>$MIN$0{r$}#37,true[BOOLEAN]) AS min#19, salaryK{r}#4]]
6089+
* \_Eval[[MVCOUNT(languages{f}#29) AS $$MV_COUNT(langua>$MIN$0#37]]
6090+
* \_StubRelation[[_meta_field{f}#32, emp_no{f}#26, first_name{f}#27, gender{f}#28, hire_date{f}#33, job{f}#34, job.raw{f}#35,
6091+
* languages{f}#29, last_name{f}#30, long_noidx{f}#36, salary{f}#31, count{r}#6, salaryK{r}#4, sum{r}#13,
6092+
* hire_date_string{r}#10, date{r}#15, $$MV_COUNT(langua>$MIN$0{r$}#37]]
60896093
*/
60906094
public void testTripleInlineStatsMultipleAssignmentsGetsPrunedPartially() {
60916095
// TODO: reenable 1st sort, pull the 2nd further up when #132417 is in
@@ -6121,7 +6125,7 @@ public void testTripleInlineStatsMultipleAssignmentsGetsPrunedPartially() {
61216125
"salary"
61226126
);
61236127

6124-
var project = as(plan, EsqlProject.class);
6128+
var project = as(plan, Project.class);
61256129
assertThat(Expressions.names(project.projections()), is(List.of("emp_no", "salaryK", "count", "min")));
61266130
var topN = as(project.child(), TopN.class);
61276131
var outerinline = as(topN.child(), InlineJoin.class);
@@ -6130,7 +6134,8 @@ public void testTripleInlineStatsMultipleAssignmentsGetsPrunedPartially() {
61306134
expectedOutterOutput.addAll(List.of("count", "hire_date_string", "date", "min", "salaryK"));
61316135
assertThat(Expressions.names(outerinline.output()), is(expectedOutterOutput));
61326136
// outer left
6133-
var dissect = as(outerinline.left(), Dissect.class);
6137+
var outerProject = as(outerinline.left(), Project.class);
6138+
var dissect = as(outerProject.child(), Dissect.class);
61346139
var eval = as(dissect.child(), Eval.class);
61356140
var innerinline = as(eval.child(), InlineJoin.class);
61366141
var expectedInnerOutput = new ArrayList<>(employeesFields);
@@ -6176,7 +6181,7 @@ public void testTripleInlineStatsMultipleAssignmentsGetsPrunedEntirely() {
61766181
}
61776182
var plan = optimizedPlan(query);
61786183

6179-
var project = as(plan, EsqlProject.class);
6184+
var project = as(plan, Project.class);
61806185
assertThat(Expressions.names(project.projections()), is(List.of("emp_no")));
61816186
var topN = as(project.child(), TopN.class);
61826187
var dissect = as(topN.child(), Dissect.class);
@@ -6425,7 +6430,7 @@ public void testInlineStatsWithRow() {
64256430
}
64266431
var plan = optimizedPlan(query);
64276432

6428-
var esqlProject = as(plan, EsqlProject.class);
6433+
var esqlProject = as(plan, Project.class);
64296434
assertThat(Expressions.names(esqlProject.projections()), is(List.of("emp_no")));
64306435
var limit = asLimit(esqlProject.child(), 1000, false);
64316436
var localRelation = as(limit.child(), LocalRelation.class);
@@ -6514,6 +6519,30 @@ public void testInlineStatsWithLimitAndAgg() {
65146519
var stub = as(agg.child(), StubRelation.class);
65156520
}
65166521

6522+
/*
6523+
* EsqlProject[[c{r}#7, b{r}#5, a{r}#14]]
6524+
* \_Eval[[[KEYWORD] AS a#14]]
6525+
* \_Limit[1000[INTEGER],false]
6526+
* \_LocalRelation[[a{r}#3, b{r}#5, c{r}#7],Page{blocks=[ConstantNullBlock[positions=1], IntVectorBlock[vector=ConstantIntVector[p
6527+
* ositions=1, value=0]], BytesRefVectorBlock[vector=ConstantBytesRefVector[positions=1, value=[]]]]}]
6528+
*/
6529+
public void testInlineStatsWithShadowedOutput() {
6530+
var query = """
6531+
ROW a = null, b = 0, c = ""
6532+
| INLINE STATS a = MAX(c) BY b
6533+
| EVAL a = c
6534+
""";
6535+
if (releaseBuildForInlineStats(query)) {
6536+
return;
6537+
}
6538+
var plan = optimizedPlan(query);
6539+
var project = as(plan, Project.class);
6540+
assertThat(Expressions.names(project.projections()), is(List.of("c", "b", "a")));
6541+
var eval = as(project.child(), Eval.class);
6542+
var limit = asLimit(eval.child(), 1000, false);
6543+
var localRelation = as(limit.child(), LocalRelation.class);
6544+
}
6545+
65176546
/**
65186547
* Expects
65196548
*

0 commit comments

Comments
 (0)