49
49
import java .util .HashSet ;
50
50
import java .util .List ;
51
51
import java .util .Set ;
52
+ import java .util .function .BiConsumer ;
52
53
import java .util .stream .Collectors ;
53
54
54
55
import static org .elasticsearch .xpack .esql .core .util .StringUtils .WILDCARD ;
@@ -76,11 +77,6 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso
76
77
return new PreAnalysisResult (enrichResolution , IndexResolver .ALL_FIELDS , Set .of ());
77
78
}
78
79
79
- // TODO: Improve field resolution for FORK - right now we request all fields
80
- if (parsed .anyMatch (p -> p instanceof Fork )) {
81
- return new PreAnalysisResult (enrichResolution , IndexResolver .ALL_FIELDS , Set .of ());
82
- }
83
-
84
80
Holder <Boolean > projectAll = new Holder <>(false );
85
81
parsed .forEachExpressionDown (UnresolvedStar .class , us -> {// explicit "*" fields selection
86
82
if (projectAll .get ()) {
@@ -93,7 +89,7 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso
93
89
return new PreAnalysisResult (enrichResolution , IndexResolver .ALL_FIELDS , Set .of ());
94
90
}
95
91
96
- var referencesBuilder = AttributeSet .builder ();
92
+ var referencesBuilder = new Holder <>( AttributeSet .builder () );
97
93
// "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can cover some
98
94
// attributes ("lookup join" generated columns among others); steps like removal of Aliases should ignore fields matching the
99
95
// wildcards.
@@ -110,19 +106,49 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso
110
106
// lookup indices where we request "*" because we may require all their fields
111
107
Set <String > wildcardJoinIndices = new java .util .HashSet <>();
112
108
113
- boolean [] canRemoveAliases = new boolean [] { true };
109
+ var canRemoveAliases = new Holder <>(true );
110
+
111
+ var forEachDownProcessor = new Holder <BiConsumer <LogicalPlan , Holder <Boolean >>>();
112
+ forEachDownProcessor .set ((LogicalPlan p , Holder <Boolean > breakEarly ) -> {// go over each plan top-down
113
+ if (p instanceof Fork fork ) {
114
+ // Early return from forEachDown. We will iterate over the children manually and end the recursion via forEachDown early.
115
+ var forkRefsResult = AttributeSet .builder ();
116
+ forkRefsResult .addAll (referencesBuilder .get ());
117
+
118
+ for (var forkBranch : fork .children ()) {
119
+ referencesBuilder .set (AttributeSet .builder ());
120
+ var isNestedFork = forkBranch .forEachDownMayReturnEarly (forEachDownProcessor .get ());
121
+ // This assert is just for good measure. FORKs within FORKs is yet not supported.
122
+ assert isNestedFork == false : "Nested FORKs are not yet supported" ;
123
+ // This is a safety measure for fork where the list of fields returned is empty.
124
+ // It can be empty for a branch that does need all the fields. For example "fork (where true) (where a is not null)"
125
+ // but it can also be empty for queries where NO fields are needed from ES,
126
+ // for example "fork (eval x = 1 | keep x) (eval y = 1 | keep y)" but we cannot establish this yet.
127
+ if (referencesBuilder .get ().isEmpty ()) {
128
+ projectAll .set (true );
129
+ // Return early, we'll be returning all references no matter what the remainder of the query is.
130
+ breakEarly .set (true );
131
+ return ;
132
+ }
133
+ forkRefsResult .addAll (referencesBuilder .get ());
134
+ }
135
+
136
+ forkRefsResult .removeIf (attr -> attr .name ().equals (Fork .FORK_FIELD ));
137
+ referencesBuilder .set (forkRefsResult );
114
138
115
- parsed .forEachDown (p -> {// go over each plan top-down
116
- if (p instanceof RegexExtract re ) { // for Grok and Dissect
139
+ // Return early, we've already explored all fork branches.
140
+ breakEarly .set (true );
141
+ return ;
142
+ } else if (p instanceof RegexExtract re ) { // for Grok and Dissect
117
143
// keep the inputs needed by Grok/Dissect
118
- referencesBuilder .addAll (re .input ().references ());
144
+ referencesBuilder .get (). addAll (re .input ().references ());
119
145
} else if (p instanceof Enrich enrich ) {
120
146
AttributeSet enrichFieldRefs = Expressions .references (enrich .enrichFields ());
121
147
AttributeSet .Builder enrichRefs = enrichFieldRefs .combine (enrich .matchField ().references ()).asBuilder ();
122
148
// Enrich adds an EmptyAttribute if no match field is specified
123
149
// The exact name of the field will be added later as part of enrichPolicyMatchFields Set
124
150
enrichRefs .removeIf (attr -> attr instanceof EmptyAttribute );
125
- referencesBuilder .addAll (enrichRefs );
151
+ referencesBuilder .get (). addAll (enrichRefs );
126
152
} else if (p instanceof LookupJoin join ) {
127
153
if (join .config ().type () instanceof JoinTypes .UsingJoinType usingJoinType ) {
128
154
joinRefs .addAll (usingJoinType .columns ());
@@ -135,15 +161,15 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso
135
161
joinRefs .addAll (keepRefs );
136
162
}
137
163
} else {
138
- referencesBuilder .addAll (p .references ());
164
+ referencesBuilder .get (). addAll (p .references ());
139
165
if (p instanceof UnresolvedRelation ur && ur .indexMode () == IndexMode .TIME_SERIES ) {
140
166
// METRICS aggs generally rely on @timestamp without the user having to mention it.
141
- referencesBuilder .add (new UnresolvedAttribute (ur .source (), MetadataAttribute .TIMESTAMP_FIELD ));
167
+ referencesBuilder .get (). add (new UnresolvedAttribute (ur .source (), MetadataAttribute .TIMESTAMP_FIELD ));
142
168
}
143
169
// special handling for UnresolvedPattern (which is not an UnresolvedAttribute)
144
170
p .forEachExpression (UnresolvedNamePattern .class , up -> {
145
171
var ua = new UnresolvedAttribute (up .source (), up .name ());
146
- referencesBuilder .add (ua );
172
+ referencesBuilder .get (). add (ua );
147
173
if (p instanceof Keep ) {
148
174
keepRefs .add (ua );
149
175
} else if (p instanceof Drop ) {
@@ -168,10 +194,10 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso
168
194
//
169
195
// and ips_policy enriches the results with the same name ip field),
170
196
// these aliases should be kept in the list of fields.
171
- if (canRemoveAliases [ 0 ] && p .anyMatch (FieldNameUtils ::couldOverrideAliases )) {
172
- canRemoveAliases [ 0 ] = false ;
197
+ if (canRemoveAliases . get () && p .anyMatch (FieldNameUtils ::couldOverrideAliases )) {
198
+ canRemoveAliases . set ( false ) ;
173
199
}
174
- if (canRemoveAliases [ 0 ] ) {
200
+ if (canRemoveAliases . get () ) {
175
201
// remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree
176
202
// for example "from test | eval x = salary | stats max = max(x) by gender"
177
203
// remove the UnresolvedAttribute "x", since that is an Alias defined in "eval"
@@ -187,21 +213,25 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso
187
213
if (fieldNames .contains (ne .name ())) {
188
214
return ;
189
215
}
190
- referencesBuilder .removeIf (
191
- attr -> matchByName (attr , ne .name (), keepRefs .contains (attr ) || dropWildcardRefs .contains (attr ))
192
- );
216
+ referencesBuilder .get ()
217
+ .removeIf (attr -> matchByName (attr , ne .name (), keepRefs .contains (attr ) || dropWildcardRefs .contains (attr )));
193
218
});
194
219
}
195
220
});
221
+ parsed .forEachDownMayReturnEarly (forEachDownProcessor .get ());
222
+
223
+ if (projectAll .get ()) {
224
+ return new PreAnalysisResult (enrichResolution , IndexResolver .ALL_FIELDS , Set .of ());
225
+ }
196
226
197
227
// Add JOIN ON column references afterward to avoid Alias removal
198
- referencesBuilder .addAll (joinRefs );
228
+ referencesBuilder .get (). addAll (joinRefs );
199
229
// If any JOIN commands need wildcard field-caps calls, persist the index names
200
230
201
231
// remove valid metadata attributes because they will be filtered out by the IndexResolver anyway
202
232
// otherwise, in some edge cases, we will fail to ask for "*" (all fields) instead
203
- referencesBuilder .removeIf (a -> a instanceof MetadataAttribute || MetadataAttribute .isSupported (a .name ()));
204
- Set <String > fieldNames = referencesBuilder .build ().names ();
233
+ referencesBuilder .get (). removeIf (a -> a instanceof MetadataAttribute || MetadataAttribute .isSupported (a .name ()));
234
+ Set <String > fieldNames = referencesBuilder .get (). build ().names ();
205
235
206
236
if (fieldNames .isEmpty () && enrichPolicyMatchFields .isEmpty ()) {
207
237
// there cannot be an empty list of fields, we'll ask the simplest and lightest one instead: _index
0 commit comments