Skip to content

Commit 3a7bef4

Browse files
authored
ESQL: Temporarily forbid LIMIT before INLINE STATS commands (#136752) (#136981)
(cherry picked from commit e2eb709)
1 parent e2c5823 commit 3a7bef4

File tree

9 files changed

+287
-16
lines changed

9 files changed

+287
-16
lines changed

docs/reference/query-languages/esql/_snippets/commands/layout/inlinestats-by.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,5 +103,5 @@ The following example shows how to filter which rows are used for each aggregati
103103
**Limitations**
104104

105105
- The [`CATEGORIZE`](/reference/query-languages/esql/functions-operators/grouping-functions.md#esql-categorize) grouping function is not currently supported.
106-
- `INLINE STATS` cannot yet have an unbounded [`SORT`](/reference/query-languages/esql/commands/sort.md) before it.
107-
You must either move the SORT after it, or add a [`LIMIT`](/reference/query-languages/esql/commands/limit.md) before the [`SORT`](/reference/query-languages/esql/commands/sort.md).
106+
- You cannot currently use [`LIMIT`](/reference/query-languages/esql/commands/limit.md) (explicit or implicit) before `INLINE STATS`, because this can lead to unexpected results.
107+
- You cannot currently use [`FORK`](/reference/query-languages/esql/commands/fork.md) before `INLINE STATS`, because `FORK` adds an implicit [`LIMIT`](/reference/query-languages/esql/commands/limit.md) to each branch, which can lead to unexpected results.

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/tree/Node.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
*/
3838
public abstract class Node<T extends Node<T>> implements NamedWriteable {
3939
private static final int TO_STRING_MAX_PROP = 10;
40-
private static final int TO_STRING_MAX_WIDTH = 110;
40+
public static final int TO_STRING_MAX_WIDTH = 110;
4141

4242
private final Source source;
4343
private final List<T> children;

x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ public abstract class GenerativeRestTest extends ESRestTestCase implements Query
5858
"Unbounded SORT not supported yet",
5959
"The field names are too complex to process", // field_caps problem
6060
"must be \\[any type except counter types\\]", // TODO refine the generation of count()
61+
"INLINE STATS cannot be used after an explicit or implicit LIMIT command",
6162

6263
// Awaiting fixes for query failure
6364
"Unknown column \\[<all-fields-projected>\\]", // https://github.com/elastic/elasticsearch/issues/121741,

x-pack/plugin/esql/qa/testFixtures/src/main/resources/fork.csv-spec

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -688,7 +688,8 @@ emp_no:integer | job_positions:keyword | _fork:keyword
688688
10087 | Junior Developer | fork2
689689
;
690690

691-
forkBeforeInlineStats
691+
forkBeforeInlineStats-Ignore
692+
// temporarily forbid LIMIT before INLINE STATS
692693
required_capability: fork_v9
693694
required_capability: inline_stats
694695

x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec

Lines changed: 94 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,8 @@ emp_no:integer |avg_worked_seconds:long|avg_avg_worked_seconds:double|languages:
345345
10023 |330870342 |3.181719481E8 |null |5 |F
346346
;
347347

348-
three
348+
three-Ignore
349+
// temporarily forbid LIMIT before INLINE STATS
349350
required_capability: inline_stats
350351

351352
FROM employees
@@ -371,6 +372,33 @@ emp_no:integer |avg_worked_seconds:long|avg_avg_worked_seconds:double|languages:
371372
10023 |330870342 |3.181719481E8 |null |5 |3 |5 |F
372373
;
373374

375+
threeTopNAtEnd
376+
required_capability: inline_stats
377+
// same as above but with the final INLINE STATS after before the LIMIT
378+
379+
FROM employees
380+
| KEEP emp_no, languages, avg_worked_seconds, gender
381+
| INLINE STATS avg_avg_worked_seconds = AVG(avg_worked_seconds) BY languages
382+
| WHERE avg_worked_seconds > avg_avg_worked_seconds
383+
| INLINE STATS max_languages = MAX(languages) BY gender
384+
| INLINE STATS min(languages), max(languages) by gender
385+
| SORT emp_no ASC
386+
| LIMIT 10
387+
;
388+
389+
emp_no:integer |avg_worked_seconds:long|avg_avg_worked_seconds:double|languages:integer|max_languages:integer|min(languages):integer|max(languages):integer|gender:keyword
390+
10002 |328922887 |3.133013149047619E8 |5 |5 |1 |5 |F
391+
10006 |372957040 |2.978159518235294E8 |3 |5 |1 |5 |F
392+
10007 |393084805 |2.863684210555556E8 |4 |5 |1 |5 |F
393+
10010 |315236372 |2.863684210555556E8 |4 |5 |1 |5 |null
394+
10012 |365510850 |3.133013149047619E8 |5 |5 |1 |5 |null
395+
10015 |390266432 |3.133013149047619E8 |5 |5 |1 |5 |null
396+
10018 |309604079 |3.0318626831578946E8 |2 |5 |1 |5 |null
397+
10019 |342855721 |2.94833632E8 |1 |5 |1 |5 |null
398+
10020 |373309605 |3.181719481E8 |null |5 |1 |5 |M
399+
10023 |330870342 |3.181719481E8 |null |5 |1 |5 |F
400+
;
401+
374402
// TODO: INLINE STATS unit test needed for this one
375403
// https://github.com/elastic/elasticsearch/issues/113727
376404
pushDownSort_To_LeftSideOnly-Ignore
@@ -2567,7 +2595,8 @@ emp_no:integer | gender:keyword | languages:integer | greater_than:double | less
25672595
10010 | null | 4 | 6.94921 | 7.12723 | 27921.22074 | 10
25682596
;
25692597

2570-
twoConsecutiveInlinestatsWithFalseFilters
2598+
twoConsecutiveInlinestatsWithFalseFilters-Ignore
2599+
// temporarily forbid LIMIT before INLINE STATS
25712600
required_capability: inline_stats
25722601
from employees
25732602
| keep emp_no
@@ -2583,6 +2612,25 @@ from employees
25832612
10003 |0 |0
25842613
;
25852614

2615+
twoConsecutiveInlinestatsWithFalseFilters_LimitAfterInlineStats
2616+
// same as above but with LIMIT after INLINE STATS
2617+
required_capability: inline_stats
2618+
from employees
2619+
| keep emp_no
2620+
| inline stats count = count(*) where false
2621+
| inline stats cc = count_distinct(emp_no) where false
2622+
| sort emp_no
2623+
| limit 3
2624+
;
2625+
2626+
emp_no:i | count:l | cc:l
2627+
10001 |0 |0
2628+
10002 |0 |0
2629+
10003 |0 |0
2630+
;
2631+
2632+
2633+
25862634
////////////////////////////////
25872635
// Union types tests
25882636
////////////////////////////////
@@ -2946,7 +2994,8 @@ mc:l | count:l | event_duration:l
29462994
6 |2 |3450233
29472995
;
29482996

2949-
multiIndexIpStringInlinestats_Inline4
2997+
multiIndexIpStringInlinestats_Inline4-Ignore
2998+
// temporarily forbid LIMIT before INLINE STATS
29502999
required_capability: inline_stats
29513000

29523001
FROM sample_data, sample_data_str
@@ -2967,6 +3016,26 @@ mc:l | count:l | event_duration:l
29673016
5 |2 |3450233
29683017
;
29693018

3019+
multiIndexIpStringInlinestats_Inline4_LimitAfterInlineStats
3020+
// same as above but with LIMIT after INLINE STATS
3021+
required_capability: inline_stats
3022+
3023+
FROM sample_data, sample_data_str
3024+
| INLINE STATS count=count(*) BY client_ip::ip
3025+
| INLINE STATS mc=count(count) BY count
3026+
| SORT mc DESC, count ASC, event_duration
3027+
| LIMIT 5
3028+
| KEEP mc, count, event_duration
3029+
;
3030+
3031+
mc:l | count:l | event_duration:l
3032+
8 |8 |725448
3033+
8 |8 |725448
3034+
8 |8 |1756467
3035+
8 |8 |1756467
3036+
8 |8 |5033755
3037+
;
3038+
29703039
multiIndexWhereIpStringInlinestats
29713040
required_capability: inline_stats
29723041

@@ -4105,8 +4174,9 @@ null | null | null | null
41054174
;
41064175

41074176

4108-
inlineStatsAfterPruningAggregate8
4177+
inlineStatsAfterPruningAggregate8-Ignore
41094178
// https://github.com/elastic/elasticsearch/issues/135679
4179+
// temporarily forbid LIMIT before INLINE STATS
41104180
required_capability: inline_stats_double_release_fix
41114181

41124182
from dense_vector,airport_city_boundaries
@@ -4125,6 +4195,26 @@ zdxdQdVIyFh:long | WbWVJnYJ:long | zdxdQdVIyFh2:integer | uKjfeKTfaH:integer | Q
41254195
1 | 1 | -1579062572 | -1579062572 | -1579062572
41264196
;
41274197

4198+
inlineStatsAfterPruningAggregate8_NoLimit
4199+
// https://github.com/elastic/elasticsearch/issues/135679
4200+
// same as above but without LIMIT before INLINE STATS
4201+
required_capability: inline_stats_double_release_fix
4202+
4203+
from dense_vector,airport_city_boundaries
4204+
| eval kMvNZVQSH = -4048759096317256968, GzAYNHaHuIS = 4262770411405329967, EwPpQFmOPoB = "lkdgnbTrBT", `abbrev` = "LiXI"
4205+
| keep `city`, `kMvNZVQSH`, GzAYNHaHuIS, city_boundary, EwPpQFmOPoB, `city_boundary`, `id`, kMvNZVQSH, id, airport
4206+
| keep `kMvNZVQSH`, EwPpQFmOPoB, city, `id`, kMvNZVQSH
4207+
| stats `EwPpQFmOPoB2` = min(id), `EwPpQFmOPoB3` = count(*), kZiJPhNZ = count(id), ErGhdKhdv = count_distinct(city), EwPpQFmOPoB = min(id)
4208+
| eval ErGhdKhdv = null, fHSyxniFCS = -710891486, ErGhdKhdv = -1104488048, zdxdQdVIyFh = -1579062572, `kZiJPhNZ` = null, SzmOySyUh = null, eWVVRiwP = 1092261898855405071, nUxVhGhcpkiV = true, RvqGioBAAzYs = 169356242
4209+
| eval `kZiJPhNZ` = "gWJHDTcDMjRVSnlzSX", lxLgaagX = 4700531997851493340, `kZiJPhNZ` = false
4210+
| keep `zdxdQdVIyFh`
4211+
| inline stats `zdxdQdVIyFh` = count(*), WbWVJnYJ = count(*), zdxdQdVIyFh2 = max(zdxdQdVIyFh), uKjfeKTfaH = max(zdxdQdVIyFh), QpovVxjMWSjh = min(zdxdQdVIyFh)
4212+
;
4213+
4214+
zdxdQdVIyFh:long | WbWVJnYJ:long | zdxdQdVIyFh2:integer | uKjfeKTfaH:integer | QpovVxjMWSjh:integer
4215+
1 | 1 | -1579062572 | -1579062572 | -1579062572
4216+
;
4217+
41284218

41294219
inlineStatsAfterPruningAggregate9
41304220
required_capability: inline_stats_double_release_fix

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1625,6 +1625,10 @@ public enum Cap {
16251625
*/
16261626
ATTRIBUTE_EQUALS_RESPECTS_NAME_ID,
16271627

1628+
/**
1629+
* Temporarily forbid the use of an explicit or implicit LIMIT before INLINE STATS.
1630+
*/
1631+
FORBID_LIMIT_BEFORE_INLINE_STATS(INLINE_STATS.enabled),
16281632
// Last capability should still have a comma for fewer merge conflicts when adding new ones :)
16291633
// This comment prevents the semicolon from being on the previous capability when Spotless formats the file.
16301634
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,17 @@
2222
import org.elasticsearch.xpack.esql.core.expression.predicate.operator.comparison.BinaryComparison;
2323
import org.elasticsearch.xpack.esql.core.tree.Node;
2424
import org.elasticsearch.xpack.esql.core.type.DataType;
25+
import org.elasticsearch.xpack.esql.core.util.Holder;
2526
import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute;
2627
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Neg;
2728
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals;
2829
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison;
2930
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.NotEquals;
3031
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
3132
import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
33+
import org.elasticsearch.xpack.esql.plan.logical.InlineStats;
3234
import org.elasticsearch.xpack.esql.plan.logical.Insist;
35+
import org.elasticsearch.xpack.esql.plan.logical.Limit;
3336
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
3437
import org.elasticsearch.xpack.esql.plan.logical.Lookup;
3538
import org.elasticsearch.xpack.esql.plan.logical.Project;
@@ -108,6 +111,7 @@ Collection<Failure> verify(LogicalPlan plan, BitSet partialMetrics) {
108111
checkOperationsOnUnsignedLong(p, failures);
109112
checkBinaryComparison(p, failures);
110113
checkInsist(p, failures);
114+
checkLimitBeforeInlineStats(p, failures);
111115
});
112116

113117
if (failures.hasFailures() == false) {
@@ -256,6 +260,44 @@ private static void checkInsist(LogicalPlan p, Failures failures) {
256260
}
257261
}
258262

263+
/*
264+
* This is a rudimentary check to prevent INLINE STATS after LIMIT. A LIMIT command can be added by other commands by default,
265+
* the best example being FORK. A more robust solution would be to track the commands that add LIMIT and prevent them from doing so
266+
* if INLINE STATS is present in the plan. However, this would require authors of new such commands to be aware of this limitation and
267+
* implement the necessary checks, which is error-prone.
268+
*/
269+
private static void checkLimitBeforeInlineStats(LogicalPlan plan, Failures failures) {
270+
if (plan instanceof InlineStats is) {
271+
Holder<Limit> inlineStatsDescendantLimit = new Holder<>();
272+
is.forEachDownMayReturnEarly((p, breakEarly) -> {
273+
if (p instanceof Limit l) {
274+
inlineStatsDescendantLimit.set(l);
275+
breakEarly.set(true);
276+
return;
277+
}
278+
});
279+
280+
var firstLimit = inlineStatsDescendantLimit.get();
281+
if (firstLimit != null) {
282+
var isString = is.sourceText().length() > Node.TO_STRING_MAX_WIDTH
283+
? is.sourceText().substring(0, Node.TO_STRING_MAX_WIDTH) + "..."
284+
: is.sourceText();
285+
var limitString = firstLimit.sourceText().length() > Node.TO_STRING_MAX_WIDTH
286+
? firstLimit.sourceText().substring(0, Node.TO_STRING_MAX_WIDTH) + "..."
287+
: firstLimit.sourceText();
288+
failures.add(
289+
fail(
290+
is,
291+
"INLINE STATS cannot be used after an explicit or implicit LIMIT command, but was [{}] after [{}] [{}]",
292+
isString,
293+
limitString,
294+
firstLimit.source().source().toString()
295+
)
296+
);
297+
}
298+
}
299+
}
300+
259301
private void licenseCheck(LogicalPlan plan, Failures failures) {
260302
Consumer<Node<?>> licenseCheck = n -> {
261303
if (n instanceof LicenseAware la && la.licenseCheck(licenseState) == false) {

0 commit comments

Comments
 (0)