Skip to content

Commit e678909

Browse files
committed
refactor(esql): Improve field reference tracking in FORK command
- Enhance field reference tracking for FORK command branches - Add branch-specific keep references tracking with `currentBranchKeepRefs` - Refine logic for determining when to project all fields in FORK branches - Update field collection strategy to handle more complex query scenarios - Modify test cases to validate new field reference tracking behavior The changes improve the field reference collection mechanism in ESQL's FORK command, providing more precise field selection and projection logic across different branch scenarios.
1 parent bc5bc54 commit e678909

File tree

2 files changed

+107
-26
lines changed

2 files changed

+107
-26
lines changed

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, boolean ha
102102
// NOTE: the grammar allows wildcards to be used in other commands as well, but these are forbidden in the LogicalPlanBuilder
103103
// Except in KEEP and DROP.
104104
var keepRefs = AttributeSet.builder();
105+
var currentBranchKeepRefs = new Holder<>(AttributeSet.builder());
105106
var dropWildcardRefs = AttributeSet.builder();
106107
// fields required to request for lookup joins to work
107108
var joinRefs = AttributeSet.builder();
@@ -117,10 +118,17 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, boolean ha
117118
// Early return from forEachDown. We will iterate over the children manually and end the recursion via forEachDown early.
118119
var forkRefsResult = AttributeSet.builder();
119120
forkRefsResult.addAll(referencesBuilder.get());
121+
var parentKeepRefs = AttributeSet.builder();
122+
parentKeepRefs.addAll(keepRefs);
120123

121124
for (var forkBranch : fork.children()) {
125+
// Reset branch-specific state for each fork branch
126+
currentBranchKeepRefs.set(AttributeSet.builder());
127+
currentBranchKeepRefs.get().addAll(parentKeepRefs);
122128
referencesBuilder.set(AttributeSet.builder());
129+
123130
var isNestedFork = forkBranch.forEachDownMayReturnEarly(forEachDownProcessor.get());
131+
124132
// This assert is just for good measure. FORKs within FORKs is yet not supported.
125133
LogicalPlan lastFork = lastSeenFork.get();
126134
if (lastFork != null && fork instanceof UnionAll == false && lastFork instanceof UnionAll == false) {
@@ -130,11 +138,24 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, boolean ha
130138
// TODO consider deferring the nested fork check to Analyzer verifier or LogicalPlanOptimizer verifier.
131139
assert isNestedFork == false : "Nested FORKs are not yet supported";
132140
}
133-
// This is a safety measure for fork where the list of fields returned is empty.
134-
// It can be empty for a branch that does need all the fields. For example "fork (where true) (where a is not null)"
135-
// but it can also be empty for queries where NO fields are needed from ES,
136-
// for example "fork (eval x = 1 | keep x) (eval y = 1 | keep y)" but we cannot establish this yet.
137-
if (referencesBuilder.get().isEmpty()) {
141+
142+
// Determine if this fork branch requires all fields from the index (projectAll = true).
143+
// This happens when a branch has no explicit field selection and no KEEP constraints.
144+
//
145+
// We trigger projectAll when ALL of the following conditions are met:
146+
// 1. No KEEP commands in this branch (currentBranchKeepRefs is empty)
147+
// 2. AND either:
148+
// a) No field references were collected (referencesBuilder is empty), OR
149+
// b) The branch contains no commands that require explicit field collection
150+
// (e.g., no PROJECT or STATS commands that would limit field selection)
151+
//
152+
// Examples:
153+
// - "fork (where true) (where a is not null)" → needs all fields (no KEEP, only filters)
154+
// - "fork (eval x = 1 | keep x) (where true)" → needs all fields (second branch has no KEEP)
155+
// - "fork (eval x = 1 | keep x) (eval y = 2 | keep y)" → specific fields only (both branches have KEEP)
156+
if (currentBranchKeepRefs.get().isEmpty()
157+
&& (referencesBuilder.get().isEmpty()
158+
|| false == forkBranch.anyMatch(forkPlan -> shouldCollectReferencedFields(forkPlan, inlinestatsAggs)))) {
138159
projectAll.set(true);
139160
// Return early, we'll be returning all references no matter what the remainder of the query is.
140161
breakEarly.set(true);
@@ -189,6 +210,7 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, boolean ha
189210
referencesBuilder.get().add(ua);
190211
if (p instanceof Keep) {
191212
keepRefs.add(ua);
213+
currentBranchKeepRefs.get().add(ua);
192214
} else if (p instanceof Drop) {
193215
dropWildcardRefs.add(ua);
194216
} else {
@@ -197,6 +219,7 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, boolean ha
197219
});
198220
if (p instanceof Keep) {
199221
keepRefs.addAll(p.references());
222+
currentBranchKeepRefs.get().addAll(p.references());
200223
}
201224
}
202225

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java

Lines changed: 79 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ public void testDirectFilter() {
6565
}
6666

6767
public void testForkEval() {
68-
assertFieldNames("FROM employees | fork (eval x = 1 | keep x) (eval y = 2 | keep y) (eval z = 3 | keep z)", Set.of("*"));
68+
assertFieldNames("FROM employees | fork (eval x = 1 | keep x) (eval y = 2 | keep y) (eval z = 3 | keep z)", Set.of("_index"));
6969
}
7070

7171
public void testSort1() {
@@ -2741,6 +2741,19 @@ public void testForkBranchWithKeep() {
27412741
| SORT _fork, language_name""", Set.of("_index", "language_name", "language_code", "language_code.*", "language_name.*"));
27422742
}
27432743

2744+
public void testForkBranchWithKeep2() {
2745+
assertFieldNames("FROM employees | fork (eval x = 1 | keep x) (eval y = 2 | keep y) (eval z = 3)", IndexResolver.ALL_FIELDS);
2746+
}
2747+
2748+
public void testForkBranchWithKeep3() {
2749+
assertFieldNames("""
2750+
FROM employees
2751+
| FORK (EVAL x = 1 | KEEP x)
2752+
(EVAL y = 2 | KEEP y)
2753+
(WHERE emp_no > 10000)
2754+
""", ALL_FIELDS);
2755+
}
2756+
27442757
public void testForkBeforeRename() {
27452758
assertFieldNames("""
27462759
FROM languages
@@ -3144,33 +3157,78 @@ public void testStatsChainingWithTimestampCarriedForwardAsByKey() {
31443157

31453158
public void testSubqueryInFrom() {
31463159
assumeTrue("Requires subquery in FROM command support", EsqlCapabilities.Cap.SUBQUERY_IN_FROM_COMMAND.isEnabled());
3147-
// TODO improve FieldNameUtils to process subqueries better, so that we don't call field-caps with "*"
3148-
assertFieldNames("""
3149-
FROM employees, (FROM books | WHERE author:"Faulkner" | KEEP title, author | SORT title | LIMIT 5)
3150-
| WHERE emp_no == 10000 OR author IS NOT NULL
3151-
| KEEP emp_no, first_name, last_name, author, title
3152-
| SORT emp_no, author
3153-
""", Set.of("*"));
3160+
assertFieldNames(
3161+
"""
3162+
FROM employees, (FROM books | WHERE author:"Faulkner" | KEEP title, author | SORT title | LIMIT 5)
3163+
| WHERE emp_no == 10000 OR author IS NOT NULL
3164+
| KEEP emp_no, first_name, last_name, author, title
3165+
| SORT emp_no, author
3166+
""",
3167+
Set.of(
3168+
"title.*",
3169+
"last_name.*",
3170+
"_index",
3171+
"emp_no",
3172+
"author",
3173+
"first_name.*",
3174+
"last_name",
3175+
"title",
3176+
"author.*",
3177+
"first_name",
3178+
"emp_no.*"
3179+
)
3180+
);
31543181
}
31553182

31563183
public void testSubqueryInFromWithFork() {
31573184
assumeTrue("Requires subquery in FROM command support", EsqlCapabilities.Cap.SUBQUERY_IN_FROM_COMMAND.isEnabled());
31583185
// nested fork may trigger assertion in FieldNameUtils, defer the check of nested subqueries or subquery with fork
31593186
// to logical plan optimizer.
3160-
// TODO Improve FieldNameUtils to process subqueries better, , so that we don't call field-caps with "*"
3161-
assertFieldNames("""
3162-
FROM employees, (FROM books | FORK (WHERE author:"Faulkner") (WHERE title:"Ring") | KEEP title, author | SORT title | LIMIT 5)
3163-
| WHERE emp_no == 10000 OR author IS NOT NULL
3164-
| KEEP emp_no, first_name, last_name, author, title
3165-
| SORT emp_no, author
3166-
""", Set.of("*"));
3187+
assertFieldNames(
3188+
"""
3189+
FROM employees,
3190+
(FROM books | FORK (WHERE author:"Faulkner")
3191+
(WHERE title:"Ring") | KEEP title, author | SORT title | LIMIT 5)
3192+
| WHERE emp_no == 10000 OR author IS NOT NULL
3193+
| KEEP emp_no, first_name, last_name, author, title
3194+
| SORT emp_no, author
3195+
""",
3196+
Set.of(
3197+
"title.*",
3198+
"last_name.*",
3199+
"_index",
3200+
"emp_no",
3201+
"author",
3202+
"first_name.*",
3203+
"last_name",
3204+
"title",
3205+
"author.*",
3206+
"first_name",
3207+
"emp_no.*"
3208+
)
3209+
);
31673210

3168-
assertFieldNames("""
3169-
FROM books, (FROM employees | WHERE emp_no == 10000)
3170-
| FORK (WHERE author:"Faulkner") (WHERE title:"Ring")
3171-
| KEEP emp_no, first_name, last_name, author, title
3172-
| SORT emp_no, author
3173-
""", Set.of("*"));
3211+
assertFieldNames(
3212+
"""
3213+
FROM books, (FROM employees | WHERE emp_no == 10000)
3214+
| FORK (WHERE author:"Faulkner") (WHERE title:"Ring")
3215+
| KEEP emp_no, first_name, last_name, author, title
3216+
| SORT emp_no, author
3217+
""",
3218+
Set.of(
3219+
"title.*",
3220+
"last_name.*",
3221+
"_index",
3222+
"emp_no",
3223+
"author",
3224+
"first_name.*",
3225+
"last_name",
3226+
"title",
3227+
"author.*",
3228+
"first_name",
3229+
"emp_no.*"
3230+
)
3231+
);
31743232
}
31753233

31763234
private void assertFieldNames(String query, Set<String> expected) {

0 commit comments

Comments
 (0)