From 9f24a18a31761d7c5321dbe11ed0a8dd6bf6d388 Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Tue, 22 Jul 2025 14:36:47 -0400 Subject: [PATCH 01/21] Initial --- .../xpack/esql/session/EsqlSession.java | 31 +++++++++-- .../session/IndexResolverFieldNamesTests.java | 52 ++++++++++++++----- 2 files changed, 64 insertions(+), 19 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index df18051bcf721..f240428eb3c4e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -836,11 +836,6 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy return result.withFieldNames(IndexResolver.ALL_FIELDS); } - // TODO: Improve field resolution for FORK - right now we request all fields - if (parsed.anyMatch(p -> p instanceof Fork)) { - return result.withFieldNames(IndexResolver.ALL_FIELDS); - } - Holder projectAll = new Holder<>(false); parsed.forEachExpressionDown(UnresolvedStar.class, us -> {// explicit "*" fields selection if (projectAll.get()) { @@ -853,6 +848,32 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy return result.withFieldNames(IndexResolver.ALL_FIELDS); } + Holder projectAfterFork = new Holder<>(false); + Holder hasFork = new Holder<>(false); + + parsed.forEachDown(plan -> { + if (projectAll.get()) { + return; + } + + if (hasFork.get() == false && shouldCollectReferencedFields(plan, inlinestatsAggs)) { + projectAfterFork.set(true); + } + + if (plan instanceof Fork fork && projectAfterFork.get() == false) { + hasFork.set(true); + fork.children().forEach(child -> { + if (child.anyMatch(p -> shouldCollectReferencedFields(p, inlinestatsAggs)) == false) { + projectAll.set(true); + } + }); + } + }); + + if (projectAll.get()) { + return result.withFieldNames(IndexResolver.ALL_FIELDS); + } + var referencesBuilder = AttributeSet.builder(); // "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can cover some // attributes ("lookup join" generated columns among others); steps like removal of Aliases should ignore fields matching the diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java index 4b940b73b6424..1ded858bcafbe 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java @@ -2080,7 +2080,7 @@ public void testForkFieldsWithKeepAfterFork() { (WHERE d > 1000 AND e == "aaa" | EVAL c = a + 200) | WHERE x > y | KEEP a, b, c, d, x - """, ALL_FIELDS); + """, Set.of("a", "x", "y", "d", "e", "e.*", "d.*", "y.*", "x.*", "a.*")); } public void testForkFieldsWithKeepBeforeFork() { @@ -2092,7 +2092,7 @@ public void testForkFieldsWithKeepBeforeFork() { | FORK (WHERE c > 1 AND a < 10000 | EVAL d = a + 500) (WHERE d > 1000 AND e == "aaa" | EVAL c = a + 200) | WHERE x > y - """, ALL_FIELDS); + """, Set.of("x", "y", "a", "d", "e", "b", "c", "e.*", "d.*", "y.*", "x.*", "a.*", "c.*", "b.*")); } public void testForkFieldsWithNoProjection() { @@ -2118,17 +2118,41 @@ public void testForkFieldsWithStatsInOneBranch() { } public void testForkFieldsWithEnrichAndLookupJoins() { - assertFieldNames(""" - FROM test - | KEEP a, b, abc, def, z, xyz - | ENRICH enrich_policy ON abc - | EVAL b = a + 100 - | LOOKUP JOIN my_lookup_index ON def - | FORK (WHERE c > 1 AND a < 10000 | EVAL d = a + 500) - (STATS x = count(*), y=min(z)) - | LOOKUP JOIN my_lookup_index ON xyz - | WHERE x > y OR _fork == "fork1" - """, ALL_FIELDS); + assertFieldNames( + """ + FROM test + | KEEP a, b, abc, def, z, xyz + | ENRICH enrich_policy ON abc + | EVAL b = a + 100 + | LOOKUP JOIN my_lookup_index ON def + | FORK (WHERE c > 1 AND a < 10000 | EVAL d = a + 500) + (STATS x = count(*), y=min(z)) + | LOOKUP JOIN my_lookup_index ON xyz + | WHERE x > y OR _fork == "fork1" + """, + Set.of( + "x", + "y", + "_fork", + "a", + "c", + "abc", + "b", + "def", + "z", + "xyz", + "def.*", + "_fork.*", + "y.*", + "x.*", + "xyz.*", + "z.*", + "abc.*", + "a.*", + "c.*", + "b.*" + ) + ); } public void testForkWithStatsInAllBranches() { @@ -2140,7 +2164,7 @@ public void testForkWithStatsInAllBranches() { (EVAL z = a * b | STATS m = max(z)) (STATS x = count(*), y=min(z)) | WHERE x > y - """, ALL_FIELDS); + """, Set.of("c", "a", "z", "z.*", "a.*", "c.*")); } public void testForkWithStatsAndWhere() { From b64ec91460cb455ff38f7b995aff8954cd90eb07 Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Thu, 24 Jul 2025 09:21:36 -0400 Subject: [PATCH 02/21] Update docs/changelog/131723.yaml --- docs/changelog/131723.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 docs/changelog/131723.yaml diff --git a/docs/changelog/131723.yaml b/docs/changelog/131723.yaml new file mode 100644 index 0000000000000..8f564a897c0e0 --- /dev/null +++ b/docs/changelog/131723.yaml @@ -0,0 +1,6 @@ +pr: 131723 +summary: Tests for FORK's evaluation of field names used in `field_caps` resolve calls +area: Search +type: bug +issues: + - 127208 From 666382c96a67d4d9defe43ea2382bfa2e3a44e31 Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Thu, 24 Jul 2025 10:59:12 -0400 Subject: [PATCH 03/21] Update docs/changelog/131723.yaml --- docs/changelog/131723.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/changelog/131723.yaml b/docs/changelog/131723.yaml index 8f564a897c0e0..a537793b08060 100644 --- a/docs/changelog/131723.yaml +++ b/docs/changelog/131723.yaml @@ -2,5 +2,4 @@ pr: 131723 summary: Tests for FORK's evaluation of field names used in `field_caps` resolve calls area: Search type: bug -issues: - - 127208 +issues: [] From 15480b6b7eee9b02fca3183bab818246c0890943 Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Thu, 24 Jul 2025 15:54:14 -0400 Subject: [PATCH 04/21] Handle fork references correctly --- .../xpack/esql/core/tree/Node.java | 21 ++++- .../xpack/esql/session/EsqlSession.java | 82 +++++++++---------- .../session/IndexResolverFieldNamesTests.java | 29 ++++++- 3 files changed, 81 insertions(+), 51 deletions(-) diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/tree/Node.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/tree/Node.java index 613f5b0ae76c2..850a1f3a5ca65 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/tree/Node.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/tree/Node.java @@ -65,13 +65,28 @@ public List children() { } @SuppressWarnings("unchecked") - public void forEachDown(Consumer action) { - action.accept((T) this); + public boolean forEachDownMayReturnEarly(Function action) { + if (action.apply((T) this) == false) { + // Early return. + return false; + } // please do not refactor it to a for-each loop to avoid // allocating iterator that performs concurrent modification checks and extra stack frames for (int c = 0, size = children.size(); c < size; c++) { - children.get(c).forEachDown(action); + if (children.get(c).forEachDownMayReturnEarly(action) == false) { + return false; + } } + return true; + } + + @SuppressWarnings("unchecked") + public void forEachDown(Consumer action) { + forEachDownMayReturnEarly(p -> { + action.accept(p); + // No early return. + return true; + }); } @SuppressWarnings("unchecked") diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 3f455aec75114..a9faaf783ce1e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -120,6 +120,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -851,33 +852,7 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy return result.withFieldNames(IndexResolver.ALL_FIELDS); } - Holder projectAfterFork = new Holder<>(false); - Holder hasFork = new Holder<>(false); - - parsed.forEachDown(plan -> { - if (projectAll.get()) { - return; - } - - if (hasFork.get() == false && shouldCollectReferencedFields(plan, inlinestatsAggs)) { - projectAfterFork.set(true); - } - - if (plan instanceof Fork fork && projectAfterFork.get() == false) { - hasFork.set(true); - fork.children().forEach(child -> { - if (child.anyMatch(p -> shouldCollectReferencedFields(p, inlinestatsAggs)) == false) { - projectAll.set(true); - } - }); - } - }); - - if (projectAll.get()) { - return result.withFieldNames(IndexResolver.ALL_FIELDS); - } - - var referencesBuilder = AttributeSet.builder(); + var referencesBuilder = new Holder<>(AttributeSet.builder()); // "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can cover some // attributes ("lookup join" generated columns among others); steps like removal of Aliases should ignore fields matching the // wildcards. @@ -894,19 +869,35 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy // lookup indices where we request "*" because we may require all their fields Set wildcardJoinIndices = new java.util.HashSet<>(); - boolean[] canRemoveAliases = new boolean[] { true }; + var canRemoveAliases = new Holder<>(true); + + var processingLambda = new Holder>(); + processingLambda.set((LogicalPlan p) -> {// go over each plan top-down + if (p instanceof Fork fork) { + // Early return from forEachDown. We will iterate over the children manually. + var forkRefsResult = AttributeSet.builder(); + forkRefsResult.addAll(referencesBuilder.get()); + + for (var child : fork.children()) { + referencesBuilder.set(AttributeSet.builder()); + var return_result = child.forEachDownMayReturnEarly(processingLambda.get()); + // No nested Forks for now... + assert return_result; + forkRefsResult.addAll(referencesBuilder.get()); + } - parsed.forEachDown(p -> {// go over each plan top-down - if (p instanceof RegexExtract re) { // for Grok and Dissect + referencesBuilder.set(forkRefsResult); + return false; + } else if (p instanceof RegexExtract re) { // for Grok and Dissect // keep the inputs needed by Grok/Dissect - referencesBuilder.addAll(re.input().references()); + referencesBuilder.get().addAll(re.input().references()); } else if (p instanceof Enrich enrich) { AttributeSet enrichFieldRefs = Expressions.references(enrich.enrichFields()); AttributeSet.Builder enrichRefs = enrichFieldRefs.combine(enrich.matchField().references()).asBuilder(); // Enrich adds an EmptyAttribute if no match field is specified // The exact name of the field will be added later as part of enrichPolicyMatchFields Set enrichRefs.removeIf(attr -> attr instanceof EmptyAttribute); - referencesBuilder.addAll(enrichRefs); + referencesBuilder.get().addAll(enrichRefs); } else if (p instanceof LookupJoin join) { if (join.config().type() instanceof JoinTypes.UsingJoinType usingJoinType) { joinRefs.addAll(usingJoinType.columns()); @@ -919,15 +910,15 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy joinRefs.addAll(keepRefs); } } else { - referencesBuilder.addAll(p.references()); + referencesBuilder.get().addAll(p.references()); if (p instanceof UnresolvedRelation ur && ur.indexMode() == IndexMode.TIME_SERIES) { // METRICS aggs generally rely on @timestamp without the user having to mention it. - referencesBuilder.add(new UnresolvedAttribute(ur.source(), MetadataAttribute.TIMESTAMP_FIELD)); + referencesBuilder.get().add(new UnresolvedAttribute(ur.source(), MetadataAttribute.TIMESTAMP_FIELD)); } // special handling for UnresolvedPattern (which is not an UnresolvedAttribute) p.forEachExpression(UnresolvedNamePattern.class, up -> { var ua = new UnresolvedAttribute(up.source(), up.name()); - referencesBuilder.add(ua); + referencesBuilder.get().add(ua); if (p instanceof Keep) { keepRefs.add(ua); } else if (p instanceof Drop) { @@ -952,10 +943,10 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy // // and ips_policy enriches the results with the same name ip field), // these aliases should be kept in the list of fields. - if (canRemoveAliases[0] && p.anyMatch(EsqlSession::couldOverrideAliases)) { - canRemoveAliases[0] = false; + if (canRemoveAliases.get() && p.anyMatch(EsqlSession::couldOverrideAliases)) { + canRemoveAliases.set(false); } - if (canRemoveAliases[0]) { + if (canRemoveAliases.get()) { // remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree // for example "from test | eval x = salary | stats max = max(x) by gender" // remove the UnresolvedAttribute "x", since that is an Alias defined in "eval" @@ -971,15 +962,18 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy if (fieldNames.contains(ne.name())) { return; } - referencesBuilder.removeIf( - attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr)) - ); + referencesBuilder.get() + .removeIf(attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr))); }); } + + // No early return. + return true; }); + parsed.forEachDownMayReturnEarly(processingLambda.get()); // Add JOIN ON column references afterward to avoid Alias removal - referencesBuilder.addAll(joinRefs); + referencesBuilder.get().addAll(joinRefs); // If any JOIN commands need wildcard field-caps calls, persist the index names if (wildcardJoinIndices.isEmpty() == false) { result = result.withWildcardJoinIndices(wildcardJoinIndices); @@ -987,8 +981,8 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy // remove valid metadata attributes because they will be filtered out by the IndexResolver anyway // otherwise, in some edge cases, we will fail to ask for "*" (all fields) instead - referencesBuilder.removeIf(a -> a instanceof MetadataAttribute || MetadataAttribute.isSupported(a.name())); - Set fieldNames = referencesBuilder.build().names(); + referencesBuilder.get().removeIf(a -> a instanceof MetadataAttribute || MetadataAttribute.isSupported(a.name())); + Set fieldNames = referencesBuilder.get().build().names(); if (fieldNames.isEmpty() && enrichPolicyMatchFields.isEmpty()) { // there cannot be an empty list of fields, we'll ask the simplest and lightest one instead: _index diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java index 1ded858bcafbe..638a5ae1423ab 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java @@ -2080,7 +2080,7 @@ public void testForkFieldsWithKeepAfterFork() { (WHERE d > 1000 AND e == "aaa" | EVAL c = a + 200) | WHERE x > y | KEEP a, b, c, d, x - """, Set.of("a", "x", "y", "d", "e", "e.*", "d.*", "y.*", "x.*", "a.*")); + """, Set.of("a", "x", "y", "c", "d", "e", "e.*", "d.*", "y.*", "x.*", "a.*", "c.*")); } public void testForkFieldsWithKeepBeforeFork() { @@ -2114,7 +2114,7 @@ public void testForkFieldsWithStatsInOneBranch() { | FORK (WHERE c > 1 AND a < 10000 | EVAL d = a + 500) (STATS x = count(*), y=min(z)) | WHERE x > y - """, ALL_FIELDS); + """, Set.of("x", "y", "a", "c", "z", "y.*", "x.*", "z.*", "a.*", "c.*")); } public void testForkFieldsWithEnrichAndLookupJoins() { @@ -2164,11 +2164,32 @@ public void testForkWithStatsInAllBranches() { (EVAL z = a * b | STATS m = max(z)) (STATS x = count(*), y=min(z)) | WHERE x > y - """, Set.of("c", "a", "z", "z.*", "a.*", "c.*")); + """, Set.of("x", "y", "c", "a", "z", "y.*", "x.*", "z.*", "a.*", "c.*")); + } + + public void testForkWithStatsInAllBranches1() { + assertFieldNames(""" + FROM employees + | FORK + ( STATS x = min(last_name)) + ( EVAL last_name = first_name | STATS y = max(last_name)) + """, Set.of("first_name", "last_name", "first_name.*", "last_name.*")); + } + + public void testForkWithStatsInAllBranches2() { + assertFieldNames(""" + FROM employees + | FORK + ( EVAL last_name = first_name | STATS y = VALUES(last_name)) + ( STATS x = VALUES(last_name)) + """, Set.of("first_name", "last_name", "first_name.*", "last_name.*")); } public void testForkWithStatsAndWhere() { - assertFieldNames(" FROM employees | FORK ( WHERE true | stats min(salary) by gender) ( WHERE true | LIMIT 3 )", ALL_FIELDS); + assertFieldNames( + " FROM employees | FORK ( WHERE true | stats min(salary) by gender) ( WHERE true | LIMIT 3 )", + Set.of("gender", "salary", "gender.*", "salary.*") + ); } private Set fieldNames(String query, Set enrichPolicyMatchFields) { From a20fb5b4a9943ffe972b803cdbbf8e91ab7b3bd7 Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Thu, 24 Jul 2025 19:38:55 -0400 Subject: [PATCH 05/21] Remove _fork --- .../xpack/esql/session/EsqlSession.java | 1 + .../session/IndexResolverFieldNamesTests.java | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 8291c18b3c835..aee105666c1ce 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -858,6 +858,7 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy forkRefsResult.addAll(referencesBuilder.get()); } + forkRefsResult.removeIf(attr -> attr.name().equals(Fork.FORK_FIELD)); referencesBuilder.set(forkRefsResult); return false; } else if (p instanceof RegexExtract re) { // for Grok and Dissect diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java index 638a5ae1423ab..e3df2a9df18d7 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java @@ -2133,7 +2133,6 @@ public void testForkFieldsWithEnrichAndLookupJoins() { Set.of( "x", "y", - "_fork", "a", "c", "abc", @@ -2142,7 +2141,6 @@ public void testForkFieldsWithEnrichAndLookupJoins() { "z", "xyz", "def.*", - "_fork.*", "y.*", "x.*", "xyz.*", @@ -2192,6 +2190,23 @@ public void testForkWithStatsAndWhere() { ); } + public void testNullString() { + assertFieldNames(""" + FROM sample_data + | EVAL x = null::string + | STATS COUNT() BY category=CATEGORIZE(x) + | SORT category""", Set.of("_index")); + } + + public void testNullStringWithFork() { + assertFieldNames(""" + FROM sample_data + | EVAL x = null::string + | STATS COUNT() BY category=CATEGORIZE(x) + | SORT category + | FORK (WHERE true) (WHERE true) | WHERE _fork == "fork1" | DROP _fork""", Set.of("_index")); + } + private Set fieldNames(String query, Set enrichPolicyMatchFields) { var preAnalysisResult = new EsqlSession.PreAnalysisResult(null); return EsqlSession.fieldNames(parser.createStatement(query, EsqlTestUtils.TEST_CFG), enrichPolicyMatchFields, preAnalysisResult) From b159bd74ef80010503c153de32fa6573d34fd055 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 24 Jul 2025 23:50:40 +0000 Subject: [PATCH 06/21] [CI] Auto commit changes from spotless --- .../session/IndexResolverFieldNamesTests.java | 21 +------------------ 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java index e3df2a9df18d7..dd387b5a4f59e 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java @@ -2130,26 +2130,7 @@ public void testForkFieldsWithEnrichAndLookupJoins() { | LOOKUP JOIN my_lookup_index ON xyz | WHERE x > y OR _fork == "fork1" """, - Set.of( - "x", - "y", - "a", - "c", - "abc", - "b", - "def", - "z", - "xyz", - "def.*", - "y.*", - "x.*", - "xyz.*", - "z.*", - "abc.*", - "a.*", - "c.*", - "b.*" - ) + Set.of("x", "y", "a", "c", "abc", "b", "def", "z", "xyz", "def.*", "y.*", "x.*", "xyz.*", "z.*", "abc.*", "a.*", "c.*", "b.*") ); } From f9f26d49cc07cfa2cdc28fa3253449ae52f8b128 Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Fri, 25 Jul 2025 09:53:50 -0400 Subject: [PATCH 07/21] Fix merge --- .../xpack/esql/session/EsqlSession.java | 217 ------------------ .../xpack/esql/session/FieldNameUtils.java | 62 +++-- .../esql/session/FieldNameUtilsTests.java | 31 +-- 3 files changed, 50 insertions(+), 260 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index d2c5b13d72b47..09ba8142bea7d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -84,7 +84,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -755,222 +754,6 @@ private void resolveInferences( inferenceRunner.resolveInferenceIds(inferencePlans, l.map(preAnalysisResult::withInferenceResolution)); } - static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicyMatchFields, PreAnalysisResult result) { - List inlinestats = parsed.collect(InlineStats.class::isInstance); - Set inlinestatsAggs = new HashSet<>(); - for (var i : inlinestats) { - inlinestatsAggs.add(((InlineStats) i).aggregate()); - } - - if (false == parsed.anyMatch(p -> shouldCollectReferencedFields(p, inlinestatsAggs))) { - // no explicit columns selection, for example "from employees" - // also, inlinestats only adds columns to the existent output, its Aggregate shouldn't interfere with potentially using "*" - return result.withFieldNames(IndexResolver.ALL_FIELDS); - } - - Holder projectAll = new Holder<>(false); - parsed.forEachExpressionDown(UnresolvedStar.class, us -> {// explicit "*" fields selection - if (projectAll.get()) { - return; - } - projectAll.set(true); - }); - - if (projectAll.get()) { - return result.withFieldNames(IndexResolver.ALL_FIELDS); - } - - var referencesBuilder = new Holder<>(AttributeSet.builder()); - // "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can cover some - // attributes ("lookup join" generated columns among others); steps like removal of Aliases should ignore fields matching the - // wildcards. - // - // E.g. "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for - // "from test | eval first_name = 1 | drop first_name | drop *name" should also consider "*name" as valid field to ask for - // - // NOTE: the grammar allows wildcards to be used in other commands as well, but these are forbidden in the LogicalPlanBuilder - // Except in KEEP and DROP. - var keepRefs = AttributeSet.builder(); - var dropWildcardRefs = AttributeSet.builder(); - // fields required to request for lookup joins to work - var joinRefs = AttributeSet.builder(); - // lookup indices where we request "*" because we may require all their fields - Set wildcardJoinIndices = new java.util.HashSet<>(); - - var canRemoveAliases = new Holder<>(true); - - var processingLambda = new Holder>(); - processingLambda.set((LogicalPlan p) -> {// go over each plan top-down - if (p instanceof Fork fork) { - // Early return from forEachDown. We will iterate over the children manually. - var forkRefsResult = AttributeSet.builder(); - forkRefsResult.addAll(referencesBuilder.get()); - - for (var child : fork.children()) { - referencesBuilder.set(AttributeSet.builder()); - var return_result = child.forEachDownMayReturnEarly(processingLambda.get()); - // No nested Forks for now... - assert return_result; - forkRefsResult.addAll(referencesBuilder.get()); - } - - forkRefsResult.removeIf(attr -> attr.name().equals(Fork.FORK_FIELD)); - referencesBuilder.set(forkRefsResult); - return false; - } else if (p instanceof RegexExtract re) { // for Grok and Dissect - // keep the inputs needed by Grok/Dissect - referencesBuilder.get().addAll(re.input().references()); - } else if (p instanceof Enrich enrich) { - AttributeSet enrichFieldRefs = Expressions.references(enrich.enrichFields()); - AttributeSet.Builder enrichRefs = enrichFieldRefs.combine(enrich.matchField().references()).asBuilder(); - // Enrich adds an EmptyAttribute if no match field is specified - // The exact name of the field will be added later as part of enrichPolicyMatchFields Set - enrichRefs.removeIf(attr -> attr instanceof EmptyAttribute); - referencesBuilder.get().addAll(enrichRefs); - } else if (p instanceof LookupJoin join) { - if (join.config().type() instanceof JoinTypes.UsingJoinType usingJoinType) { - joinRefs.addAll(usingJoinType.columns()); - } - if (keepRefs.isEmpty()) { - // No KEEP commands after the JOIN, so we need to mark this index for "*" field resolution - wildcardJoinIndices.add(((UnresolvedRelation) join.right()).indexPattern().indexPattern()); - } else { - // Keep commands can reference the join columns with names that shadow aliases, so we block their removal - joinRefs.addAll(keepRefs); - } - } else { - referencesBuilder.get().addAll(p.references()); - if (p instanceof UnresolvedRelation ur && ur.indexMode() == IndexMode.TIME_SERIES) { - // METRICS aggs generally rely on @timestamp without the user having to mention it. - referencesBuilder.get().add(new UnresolvedAttribute(ur.source(), MetadataAttribute.TIMESTAMP_FIELD)); - } - // special handling for UnresolvedPattern (which is not an UnresolvedAttribute) - p.forEachExpression(UnresolvedNamePattern.class, up -> { - var ua = new UnresolvedAttribute(up.source(), up.name()); - referencesBuilder.get().add(ua); - if (p instanceof Keep) { - keepRefs.add(ua); - } else if (p instanceof Drop) { - dropWildcardRefs.add(ua); - } else { - throw new IllegalStateException("Only KEEP and DROP should allow wildcards"); - } - }); - if (p instanceof Keep) { - keepRefs.addAll(p.references()); - } - } - - // If the current node in the tree is of type JOIN (lookup join, inlinestats) or ENRICH or other type of - // command that we may add in the future which can override already defined Aliases with EVAL - // (for example - // - // from test - // | eval ip = 123 - // | enrich ips_policy ON hostname - // | rename ip AS my_ip - // - // and ips_policy enriches the results with the same name ip field), - // these aliases should be kept in the list of fields. - if (canRemoveAliases.get() && p.anyMatch(EsqlSession::couldOverrideAliases)) { - canRemoveAliases.set(false); - } - if (canRemoveAliases.get()) { - // remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree - // for example "from test | eval x = salary | stats max = max(x) by gender" - // remove the UnresolvedAttribute "x", since that is an Alias defined in "eval" - // also remove other down-the-tree references to the extracted fields from "grok" and "dissect" - AttributeSet planRefs = p.references(); - Set fieldNames = planRefs.names(); - p.forEachExpressionDown(NamedExpression.class, ne -> { - if ((ne instanceof Alias || ne instanceof ReferenceAttribute) == false) { - return; - } - // do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id AS id" - // or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)" - if (fieldNames.contains(ne.name())) { - return; - } - referencesBuilder.get() - .removeIf(attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr))); - }); - } - - // No early return. - return true; - }); - parsed.forEachDownMayReturnEarly(processingLambda.get()); - - // Add JOIN ON column references afterward to avoid Alias removal - referencesBuilder.get().addAll(joinRefs); - // If any JOIN commands need wildcard field-caps calls, persist the index names - if (wildcardJoinIndices.isEmpty() == false) { - result = result.withWildcardJoinIndices(wildcardJoinIndices); - } - - // remove valid metadata attributes because they will be filtered out by the IndexResolver anyway - // otherwise, in some edge cases, we will fail to ask for "*" (all fields) instead - referencesBuilder.get().removeIf(a -> a instanceof MetadataAttribute || MetadataAttribute.isSupported(a.name())); - Set fieldNames = referencesBuilder.get().build().names(); - - if (fieldNames.isEmpty() && enrichPolicyMatchFields.isEmpty()) { - // there cannot be an empty list of fields, we'll ask the simplest and lightest one instead: _index - return result.withFieldNames(IndexResolver.INDEX_METADATA_FIELD); - } else { - fieldNames.addAll(subfields(fieldNames)); - fieldNames.addAll(enrichPolicyMatchFields); - fieldNames.addAll(subfields(enrichPolicyMatchFields)); - return result.withFieldNames(fieldNames); - } - } - - /** - * Indicates whether the given plan gives an exact list of fields that we need to collect from field_caps. - */ - private static boolean shouldCollectReferencedFields(LogicalPlan plan, Set inlinestatsAggs) { - return plan instanceof Project || (plan instanceof Aggregate agg && inlinestatsAggs.contains(agg) == false); - } - - /** - * Could a plan "accidentally" override aliases? - * Examples are JOIN and ENRICH, that _could_ produce fields with the same - * name of an existing alias, based on their index mapping. - * Here we just have to consider commands where this information is not available before index resolution, - * eg. EVAL, GROK, DISSECT can override an alias, but we know it in advance, ie. we don't need to resolve indices to know. - */ - private static boolean couldOverrideAliases(LogicalPlan p) { - return (p instanceof Aggregate - || p instanceof Completion - || p instanceof Drop - || p instanceof Eval - || p instanceof Filter - || p instanceof Fork - || p instanceof InlineStats - || p instanceof Insist - || p instanceof Keep - || p instanceof Limit - || p instanceof MvExpand - || p instanceof OrderBy - || p instanceof Project - || p instanceof RegexExtract - || p instanceof Rename - || p instanceof TopN - || p instanceof UnresolvedRelation) == false; - } - - private static boolean matchByName(Attribute attr, String other, boolean skipIfPattern) { - boolean isPattern = Regex.isSimpleMatchPattern(attr.name()); - if (skipIfPattern && isPattern) { - return false; - } - var name = attr.name(); - return isPattern ? Regex.simpleMatch(name, other) : name.equals(other); - } - - private static Set subfields(Set names) { - return names.stream().filter(name -> name.endsWith(WILDCARD) == false).map(name -> name + ".*").collect(Collectors.toSet()); - } - private PhysicalPlan logicalPlanToPhysicalPlan(LogicalPlan optimizedPlan, EsqlQueryRequest request) { PhysicalPlan physicalPlan = optimizedPhysicalPlan(optimizedPlan); physicalPlan = physicalPlan.transformUp(FragmentExec.class, f -> { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java index a42cff8ce07db..0a833bbb6a42c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java @@ -49,6 +49,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.function.Function; import java.util.stream.Collectors; import static org.elasticsearch.xpack.esql.core.util.StringUtils.WILDCARD; @@ -76,11 +77,6 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso return new PreAnalysisResult(enrichResolution, IndexResolver.ALL_FIELDS, Set.of()); } - // TODO: Improve field resolution for FORK - right now we request all fields - if (parsed.anyMatch(p -> p instanceof Fork)) { - return new PreAnalysisResult(enrichResolution, IndexResolver.ALL_FIELDS, Set.of()); - } - Holder projectAll = new Holder<>(false); parsed.forEachExpressionDown(UnresolvedStar.class, us -> {// explicit "*" fields selection if (projectAll.get()) { @@ -93,7 +89,7 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso return new PreAnalysisResult(enrichResolution, IndexResolver.ALL_FIELDS, Set.of()); } - var referencesBuilder = AttributeSet.builder(); + var referencesBuilder = new Holder<>(AttributeSet.builder()); // "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can cover some // attributes ("lookup join" generated columns among others); steps like removal of Aliases should ignore fields matching the // wildcards. @@ -110,19 +106,36 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso // lookup indices where we request "*" because we may require all their fields Set wildcardJoinIndices = new java.util.HashSet<>(); - boolean[] canRemoveAliases = new boolean[] { true }; + var canRemoveAliases = new Holder<>(true); + + var processingLambda = new Holder>(); + processingLambda.set((LogicalPlan p) -> {// go over each plan top-down + if (p instanceof Fork fork) { + // Early return from forEachDown. We will iterate over the children manually. + var forkRefsResult = AttributeSet.builder(); + forkRefsResult.addAll(referencesBuilder.get()); - parsed.forEachDown(p -> {// go over each plan top-down - if (p instanceof RegexExtract re) { // for Grok and Dissect + for (var child : fork.children()) { + referencesBuilder.set(AttributeSet.builder()); + var return_result = child.forEachDownMayReturnEarly(processingLambda.get()); + // No nested Forks for now... + assert return_result; + forkRefsResult.addAll(referencesBuilder.get()); + } + + forkRefsResult.removeIf(attr -> attr.name().equals(Fork.FORK_FIELD)); + referencesBuilder.set(forkRefsResult); + return false; + } else if (p instanceof RegexExtract re) { // for Grok and Dissect // keep the inputs needed by Grok/Dissect - referencesBuilder.addAll(re.input().references()); + referencesBuilder.get().addAll(re.input().references()); } else if (p instanceof Enrich enrich) { AttributeSet enrichFieldRefs = Expressions.references(enrich.enrichFields()); AttributeSet.Builder enrichRefs = enrichFieldRefs.combine(enrich.matchField().references()).asBuilder(); // Enrich adds an EmptyAttribute if no match field is specified // The exact name of the field will be added later as part of enrichPolicyMatchFields Set enrichRefs.removeIf(attr -> attr instanceof EmptyAttribute); - referencesBuilder.addAll(enrichRefs); + referencesBuilder.get().addAll(enrichRefs); } else if (p instanceof LookupJoin join) { if (join.config().type() instanceof JoinTypes.UsingJoinType usingJoinType) { joinRefs.addAll(usingJoinType.columns()); @@ -135,15 +148,15 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso joinRefs.addAll(keepRefs); } } else { - referencesBuilder.addAll(p.references()); + referencesBuilder.get().addAll(p.references()); if (p instanceof UnresolvedRelation ur && ur.indexMode() == IndexMode.TIME_SERIES) { // METRICS aggs generally rely on @timestamp without the user having to mention it. - referencesBuilder.add(new UnresolvedAttribute(ur.source(), MetadataAttribute.TIMESTAMP_FIELD)); + referencesBuilder.get().add(new UnresolvedAttribute(ur.source(), MetadataAttribute.TIMESTAMP_FIELD)); } // special handling for UnresolvedPattern (which is not an UnresolvedAttribute) p.forEachExpression(UnresolvedNamePattern.class, up -> { var ua = new UnresolvedAttribute(up.source(), up.name()); - referencesBuilder.add(ua); + referencesBuilder.get().add(ua); if (p instanceof Keep) { keepRefs.add(ua); } else if (p instanceof Drop) { @@ -168,10 +181,10 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso // // and ips_policy enriches the results with the same name ip field), // these aliases should be kept in the list of fields. - if (canRemoveAliases[0] && p.anyMatch(FieldNameUtils::couldOverrideAliases)) { - canRemoveAliases[0] = false; + if (canRemoveAliases.get() && p.anyMatch(FieldNameUtils::couldOverrideAliases)) { + canRemoveAliases.set(false); } - if (canRemoveAliases[0]) { + if (canRemoveAliases.get()) { // remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree // for example "from test | eval x = salary | stats max = max(x) by gender" // remove the UnresolvedAttribute "x", since that is an Alias defined in "eval" @@ -187,21 +200,24 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso if (fieldNames.contains(ne.name())) { return; } - referencesBuilder.removeIf( - attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr)) - ); + referencesBuilder.get() + .removeIf(attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr))); }); } + + // No early return. + return true; }); + parsed.forEachDownMayReturnEarly(processingLambda.get()); // Add JOIN ON column references afterward to avoid Alias removal - referencesBuilder.addAll(joinRefs); + referencesBuilder.get().addAll(joinRefs); // If any JOIN commands need wildcard field-caps calls, persist the index names // remove valid metadata attributes because they will be filtered out by the IndexResolver anyway // otherwise, in some edge cases, we will fail to ask for "*" (all fields) instead - referencesBuilder.removeIf(a -> a instanceof MetadataAttribute || MetadataAttribute.isSupported(a.name())); - Set fieldNames = referencesBuilder.build().names(); + referencesBuilder.get().removeIf(a -> a instanceof MetadataAttribute || MetadataAttribute.isSupported(a.name())); + Set fieldNames = referencesBuilder.get().build().names(); if (fieldNames.isEmpty() && enrichPolicyMatchFields.isEmpty()) { // there cannot be an empty list of fields, we'll ask the simplest and lightest one instead: _index diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java index f34190d5e9749..33c30b0cd785c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java @@ -2124,26 +2124,8 @@ public void testForkFieldsWithEnrichAndLookupJoins() { | LOOKUP JOIN my_lookup_index ON xyz | WHERE x > y OR _fork == "fork1" """, - Set.of( - "x", - "y", - "a", - "c", - "abc", - "b", - "def", - "z", - "xyz", - "def.*", - "y.*", - "x.*", - "xyz.*", - "z.*", - "abc.*", - "a.*", - "c.*", - "b.*" - ) + Set.of("x", "y", "a", "c", "abc", "b", "def", "z", "xyz", "def.*", "y.*", "x.*", "xyz.*", "z.*", "abc.*", "a.*", "c.*", "b.*"), + Set.of("my_lookup_index") ); } @@ -2201,6 +2183,15 @@ public void testNullStringWithFork() { | FORK (WHERE true) (WHERE true) | WHERE _fork == "fork1" | DROP _fork""", Set.of("_index")); } + public void testSingleFork() { + assertFieldNames(""" + FROM employees + | FORK + ( STATS x = count(*)) + ( WHERE emp_no == "2" ) + | SORT _fork""", Set.of("emp_no", "emp_no.*")); + } + private void assertFieldNames(String query, Set expected) { assertFieldNames(query, new EnrichResolution(), expected, Set.of()); } From cb412e6b2dfe699ce75e5500b4ebcb2a4b6b9fab Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Fri, 25 Jul 2025 12:02:19 -0400 Subject: [PATCH 08/21] Return all fields --- .../xpack/esql/session/FieldNameUtils.java | 17 +++++++++++------ .../xpack/esql/session/FieldNameUtilsTests.java | 13 ++++++++++--- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java index 0a833bbb6a42c..7bab8325beb6c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java @@ -71,12 +71,6 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso inlinestatsAggs.add(((InlineStats) i).aggregate()); } - if (false == parsed.anyMatch(p -> shouldCollectReferencedFields(p, inlinestatsAggs))) { - // no explicit columns selection, for example "from employees" - // also, inlinestats only adds columns to the existent output, its Aggregate shouldn't interfere with potentially using "*" - return new PreAnalysisResult(enrichResolution, IndexResolver.ALL_FIELDS, Set.of()); - } - Holder projectAll = new Holder<>(false); parsed.forEachExpressionDown(UnresolvedStar.class, us -> {// explicit "*" fields selection if (projectAll.get()) { @@ -107,6 +101,7 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso Set wildcardJoinIndices = new java.util.HashSet<>(); var canRemoveAliases = new Holder<>(true); + var needsAllFields = new Holder<>(parsed.anyMatch(p -> shouldCollectReferencedFields(p, inlinestatsAggs)) == false); var processingLambda = new Holder>(); processingLambda.set((LogicalPlan p) -> {// go over each plan top-down @@ -120,6 +115,10 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso var return_result = child.forEachDownMayReturnEarly(processingLambda.get()); // No nested Forks for now... assert return_result; + if (referencesBuilder.get().isEmpty()) { + needsAllFields.set(true); + return true; + } forkRefsResult.addAll(referencesBuilder.get()); } @@ -210,6 +209,12 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso }); parsed.forEachDownMayReturnEarly(processingLambda.get()); + if (needsAllFields.get()) { + // no explicit columns selection, for example "from employees" + // also, inlinestats only adds columns to the existent output, its Aggregate shouldn't interfere with potentially using "*" + return new PreAnalysisResult(enrichResolution, IndexResolver.ALL_FIELDS, Set.of()); + } + // Add JOIN ON column references afterward to avoid Alias removal referencesBuilder.get().addAll(joinRefs); // If any JOIN commands need wildcard field-caps calls, persist the index names diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java index 33c30b0cd785c..2d661165feb70 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java @@ -59,6 +59,13 @@ public void testSimple1() { ); } + public void testSimple2() { + assertFieldNames(""" + FROM employees + | WHERE emp_no == "2" + """, IndexResolver.ALL_FIELDS); + } + public void testDirectFilter() { assertFieldNames( "from employees | sort emp_no | where still_hired | keep emp_no | limit 3", @@ -2162,7 +2169,7 @@ public void testForkWithStatsInAllBranches2() { public void testForkWithStatsAndWhere() { assertFieldNames( " FROM employees | FORK ( WHERE true | stats min(salary) by gender) ( WHERE true | LIMIT 3 )", - Set.of("gender", "salary", "gender.*", "salary.*") + IndexResolver.ALL_FIELDS ); } @@ -2180,7 +2187,7 @@ public void testNullStringWithFork() { | EVAL x = null::string | STATS COUNT() BY category=CATEGORIZE(x) | SORT category - | FORK (WHERE true) (WHERE true) | WHERE _fork == "fork1" | DROP _fork""", Set.of("_index")); + | FORK (WHERE true) (WHERE true) | WHERE _fork == "fork1" | DROP _fork""", IndexResolver.ALL_FIELDS); } public void testSingleFork() { @@ -2189,7 +2196,7 @@ public void testSingleFork() { | FORK ( STATS x = count(*)) ( WHERE emp_no == "2" ) - | SORT _fork""", Set.of("emp_no", "emp_no.*")); + | SORT _fork""", IndexResolver.ALL_FIELDS); } private void assertFieldNames(String query, Set expected) { From 2479bfaaa20e1fb9095885ed7bbf93de3898ee9a Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Tue, 29 Jul 2025 18:49:20 -0400 Subject: [PATCH 09/21] Fix --- .../xpack/esql/session/FieldNameUtils.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java index 7bab8325beb6c..3a19aadee5e94 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java @@ -102,6 +102,11 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso var canRemoveAliases = new Holder<>(true); var needsAllFields = new Holder<>(parsed.anyMatch(p -> shouldCollectReferencedFields(p, inlinestatsAggs)) == false); + if (needsAllFields.get()) { + // no explicit columns selection, for example "from employees" + // also, inlinestats only adds columns to the existent output, its Aggregate shouldn't interfere with potentially using "*" + return new PreAnalysisResult(enrichResolution, IndexResolver.ALL_FIELDS, Set.of()); + } var processingLambda = new Holder>(); processingLambda.set((LogicalPlan p) -> {// go over each plan top-down @@ -117,7 +122,8 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso assert return_result; if (referencesBuilder.get().isEmpty()) { needsAllFields.set(true); - return true; + // Early return. + return false; } forkRefsResult.addAll(referencesBuilder.get()); } @@ -210,8 +216,6 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso parsed.forEachDownMayReturnEarly(processingLambda.get()); if (needsAllFields.get()) { - // no explicit columns selection, for example "from employees" - // also, inlinestats only adds columns to the existent output, its Aggregate shouldn't interfere with potentially using "*" return new PreAnalysisResult(enrichResolution, IndexResolver.ALL_FIELDS, Set.of()); } From 482fabe9bddd81c63f168f623dc534102b8cd2fb Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Wed, 30 Jul 2025 11:24:42 -0400 Subject: [PATCH 10/21] tweak --- .../xpack/esql/session/FieldNameUtils.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java index 3a19aadee5e94..1532001be3e7d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java @@ -71,6 +71,12 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso inlinestatsAggs.add(((InlineStats) i).aggregate()); } + if (false == parsed.anyMatch(p -> shouldCollectReferencedFields(p, inlinestatsAggs))) { + // no explicit columns selection, for example "from employees" + // also, inlinestats only adds columns to the existent output, its Aggregate shouldn't interfere with potentially using "*" + return new PreAnalysisResult(enrichResolution, IndexResolver.ALL_FIELDS, Set.of()); + } + Holder projectAll = new Holder<>(false); parsed.forEachExpressionDown(UnresolvedStar.class, us -> {// explicit "*" fields selection if (projectAll.get()) { @@ -101,12 +107,7 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso Set wildcardJoinIndices = new java.util.HashSet<>(); var canRemoveAliases = new Holder<>(true); - var needsAllFields = new Holder<>(parsed.anyMatch(p -> shouldCollectReferencedFields(p, inlinestatsAggs)) == false); - if (needsAllFields.get()) { - // no explicit columns selection, for example "from employees" - // also, inlinestats only adds columns to the existent output, its Aggregate shouldn't interfere with potentially using "*" - return new PreAnalysisResult(enrichResolution, IndexResolver.ALL_FIELDS, Set.of()); - } + var needsAllFields = new Holder<>(false); var processingLambda = new Holder>(); processingLambda.set((LogicalPlan p) -> {// go over each plan top-down From 6378f76e84f290910e29492e54aae73fc2d3cfc7 Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Wed, 30 Jul 2025 13:48:46 -0400 Subject: [PATCH 11/21] Add tests --- .../esql/session/FieldNameUtilsTests.java | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java index 7c4c2a95d4370..385e91e25f557 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java @@ -2199,6 +2199,34 @@ public void testSingleFork() { | SORT _fork""", IndexResolver.ALL_FIELDS); } + public void testForkRefs1() { + assertFieldNames(""" + FROM employees + | KEEP first_name, last_name + | FORK + ( EVAL x = first_name) + ( EVAL x = last_name) + """, Set.of("first_name", "last_name", "last_name.*", "first_name.*")); + } + + public void testForkRefs2() { + assertFieldNames(""" + FROM employees + | FORK + ( KEEP first_name | EVAL x = first_name) + ( KEEP last_name | EVAL x = last_name) + """, Set.of("first_name", "last_name", "last_name.*", "first_name.*")); + } + + public void testForkRefs3() { + assertFieldNames(""" + FROM employees + | FORK + ( KEEP first_name | EVAL last_name = first_name) + ( KEEP first_name | EVAL x = first_name) + """, Set.of("first_name", "first_name.*")); + } + private void assertFieldNames(String query, Set expected) { assertFieldNames(query, new EnrichResolution(), expected, Set.of()); } From 82f189dd57df1e837e75df9844ad0076873f52de Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Wed, 30 Jul 2025 14:09:28 -0400 Subject: [PATCH 12/21] Add test --- .../xpack/esql/session/FieldNameUtilsTests.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java index 385e91e25f557..054c93fc74701 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java @@ -2227,6 +2227,17 @@ public void testForkRefs3() { """, Set.of("first_name", "first_name.*")); } + public void testForkRef4() { + assertFieldNames(""" + from employees + | sort emp_no + | limit 1 + | FORK + (eval x = to_string(languages) | enrich languages_policy on x | keep language_name) + (eval y = to_string(emp_no) | enrich languages_policy on y | keep emp_no) + """, Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*", "y", "y.*")); + } + private void assertFieldNames(String query, Set expected) { assertFieldNames(query, new EnrichResolution(), expected, Set.of()); } From 199392f63a6c8746892a595179eb0683480bab97 Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Thu, 31 Jul 2025 12:41:30 -0400 Subject: [PATCH 13/21] Add more tests --- .../esql/session/FieldNameUtilsTests.java | 718 ++++++++++++++++++ 1 file changed, 718 insertions(+) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java index 054c93fc74701..579ef2bc6a5c3 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java @@ -2238,6 +2238,724 @@ public void testForkRef4() { """, Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*", "y", "y.*")); } + public void testRerankerAfterFuse() { + assertFieldNames(""" + FROM books METADATA _id, _index, _score + | FORK ( WHERE title:"Tolkien" | SORT _score, _id DESC | LIMIT 3 ) + ( WHERE author:"Tolkien" | SORT _score, _id DESC | LIMIT 3 ) + | FUSE + | RERANK "Tolkien" ON title WITH { "inference_id" : "test_reranker" } + | EVAL _score=ROUND(_score, 2) + | SORT _score DESC, book_no ASC + | LIMIT 2 + | KEEP book_no, title, author, _score""", Set.of("book_no", "title", "author", "title.*", "author.*", "book_no.*")); + } + + public void testSimpleFuse() { + assertFieldNames(""" + FROM employees METADATA _id, _index, _score + | FORK ( WHERE emp_no:10001 ) + ( WHERE emp_no:10002 ) + | FUSE + | EVAL _score = round(_score, 4) + | KEEP _score, _fork, emp_no + | SORT _score, _fork, emp_no""", Set.of("emp_no", "emp_no.*")); + } + + public void testFuseWithMatchAndScore() { + assertFieldNames(""" + FROM books METADATA _id, _index, _score + | FORK ( WHERE title:"Tolkien" | SORT _score, _id DESC | LIMIT 3 ) + ( WHERE author:"Tolkien" | SORT _score, _id DESC | LIMIT 3 ) + | FUSE + | SORT _score DESC, _id, _index + | EVAL _fork = mv_sort(_fork) + | EVAL _score = round(_score, 5) + | KEEP _score, _fork, _id""", Set.of("title", "author", "title.*", "author.*")); + } + + public void testFuseWithDisjunctionAndPostFilter() { + assertFieldNames(""" + FROM books METADATA _id, _index, _score + | FORK ( WHERE title:"Tolkien" OR author:"Tolkien" | SORT _score, _id DESC | LIMIT 3 ) + ( WHERE author:"Tolkien" | SORT _score, _id DESC | LIMIT 3 ) + | FUSE + | SORT _score DESC, _id, _index + | EVAL _fork = mv_sort(_fork) + | EVAL _score = round(_score, 5) + | KEEP _score, _fork, _id + | WHERE _score > 0.014""", Set.of("title", "author", "title.*", "author.*")); + } + + public void testFuseWithStats() { + assertFieldNames(""" + FROM books METADATA _id, _index, _score + | FORK ( WHERE title:"Tolkien" | SORT _score, _id DESC | LIMIT 3 ) + ( WHERE author:"Tolkien" | SORT _score, _id DESC | LIMIT 3 ) + ( WHERE author:"Ursula K. Le Guin" AND title:"short stories" | SORT _score, _id DESC | LIMIT 3) + | FUSE + | STATS count_fork=COUNT(*) BY _fork + | SORT _fork""", Set.of("title", "author", "title.*", "author.*")); + } + + public void testFuseWithMultipleForkBranches() { + assertFieldNames(""" + FROM books METADATA _id, _index, _score + | FORK (WHERE author:"Keith Faulkner" AND qstr("author:Rory or author:Beverlie") | SORT _score, _id DESC | LIMIT 3) + (WHERE author:"Ursula K. Le Guin" | SORT _score, _id DESC | LIMIT 3) + (WHERE title:"Tolkien" AND author:"Tolkien" AND year > 2000 AND mv_count(author) == 1 | SORT _score, _id DESC | LIMIT 3) + (WHERE match(author, "Keith Faulkner") AND match(author, "Rory Tyger") | SORT _score, _id DESC | LIMIT 3) + | FUSE + | SORT _score DESC, _id, _index + | EVAL _fork = mv_sort(_fork) + | EVAL _score = round(_score, 4) + | EVAL title = trim(substring(title, 1, 20)) + | KEEP _score, author, title, _fork""", Set.of("author", "title", "year", "title.*", "author.*", "year.*")); + } + + public void testFuseWithSemanticSearch() { + assertFieldNames(""" + FROM semantic_text METADATA _id, _score, _index + | FORK ( WHERE semantic_text_field:"something" | SORT _score DESC | LIMIT 2) + ( WHERE semantic_text_field:"something else" | SORT _score DESC | LIMIT 2) + | FUSE + | SORT _score DESC, _id, _index + | EVAL _score = round(_score, 4) + | EVAL _fork = mv_sort(_fork) + | KEEP _fork, _score, _id, semantic_text_field""", Set.of("semantic_text_field", "semantic_text_field.*")); + } + + public void testSimpleFork() { + assertFieldNames(""" + FROM employees + | FORK ( WHERE emp_no == 10001 ) + ( WHERE emp_no == 10002 ) + | KEEP emp_no, _fork + | SORT emp_no""", Set.of("emp_no", "emp_no.*")); + } + + public void testSimpleForkWithStats() { + assertFieldNames(""" + FROM books METADATA _score + | WHERE author:"Faulkner" + | EVAL score = round(_score, 2) + | FORK (SORT score DESC, author | LIMIT 5 | KEEP author, score) + (STATS total = COUNT(*)) + | SORT _fork, score DESC, author""", Set.of("score", "author", "score.*", "author.*")); + } + + public void testForkWithWhereSortAndLimit() { + assertFieldNames(""" + FROM employees + | FORK ( WHERE hire_date < "1985-03-01T00:00:00Z" | SORT first_name | LIMIT 5 ) + ( WHERE hire_date < "1988-03-01T00:00:00Z" | SORT first_name | LIMIT 5 ) + | KEEP emp_no, first_name, _fork + | SORT emp_no, _fork""", Set.of("emp_no", "first_name", "hire_date", "first_name.*", "hire_date.*", "emp_no.*")); + } + + public void testFiveFork() { + assertFieldNames(""" + FROM employees + | FORK ( WHERE emp_no == 10005 ) + ( WHERE emp_no == 10004 ) + ( WHERE emp_no == 10003 ) + ( WHERE emp_no == 10002 ) + ( WHERE emp_no == 10001 ) + | KEEP _fork, emp_no + | SORT _fork""", Set.of("emp_no", "emp_no.*")); + } + + public void testForkWithWhereSortDescAndLimit() { + assertFieldNames(""" + FROM employees + | FORK ( WHERE hire_date < "1985-03-01T00:00:00Z" | SORT first_name DESC | LIMIT 2 ) + ( WHERE hire_date < "1988-03-01T00:00:00Z" | SORT first_name DESC NULLS LAST | LIMIT 2 ) + | KEEP _fork, emp_no, first_name + | SORT _fork, first_name DESC""", Set.of("first_name", "emp_no", "hire_date", "first_name.*", "hire_date.*", "emp_no.*")); + } + + public void testForkWithCommonPrefilter() { + assertFieldNames(""" + FROM employees + | WHERE emp_no > 10050 + | FORK ( SORT emp_no ASC | LIMIT 2 ) + ( SORT emp_no DESC NULLS LAST | LIMIT 2 ) + | KEEP _fork, emp_no + | SORT _fork, emp_no""", Set.of("emp_no", "emp_no.*")); + } + + public void testForkWithSemanticSearchAndScore() { + assertFieldNames(""" + FROM semantic_text METADATA _id, _score + | FORK ( WHERE semantic_text_field:"something" | SORT _score DESC | LIMIT 2) + ( WHERE semantic_text_field:"something else" | SORT _score DESC | LIMIT 2) + | EVAL _score = round(_score, 4) + | SORT _fork, _score, _id + | KEEP _fork, _score, _id, semantic_text_field""", Set.of("semantic_text_field", "semantic_text_field.*")); + } + + public void testForkWithEvals() { + assertFieldNames(""" + FROM employees + | FORK (WHERE emp_no == 10048 OR emp_no == 10081 | EVAL x = "abc" | EVAL y = 1) + (WHERE emp_no == 10081 OR emp_no == 10087 | EVAL x = "def" | EVAL z = 2) + | KEEP _fork, emp_no, x, y, z + | SORT _fork, emp_no""", Set.of("emp_no", "x", "y", "z", "y.*", "x.*", "z.*", "emp_no.*")); + } + + public void testForkWithStats() { + assertFieldNames(""" + FROM employees + | FORK (WHERE emp_no == 10048 OR emp_no == 10081) + (WHERE emp_no == 10081 OR emp_no == 10087) + (STATS x = COUNT(*), y = MAX(emp_no), z = MIN(emp_no)) + (STATS x = COUNT(*), y = MIN(emp_no)) + | KEEP _fork, emp_no, x, y, z + | SORT _fork, emp_no""", Set.of("emp_no", "x", "y", "z", "y.*", "x.*", "z.*", "emp_no.*")); + } + + public void testForkWithDissect() { + assertFieldNames( + """ + FROM employees + | WHERE emp_no == 10048 OR emp_no == 10081 + | FORK (EVAL a = CONCAT(first_name, " ", emp_no::keyword, " ", last_name) + | DISSECT a "%{x} %{y} %{z}" ) + (EVAL b = CONCAT(last_name, " ", emp_no::keyword, " ", first_name) + | DISSECT b "%{x} %{y} %{w}" ) + | KEEP _fork, emp_no, x, y, z, w + | SORT _fork, emp_no""", + Set.of( + "emp_no", + "x", + "y", + "z", + "w", + "first_name", + "last_name", + "w.*", + "y.*", + "last_name.*", + "x.*", + "z.*", + "first_name.*", + "emp_no.*" + ) + ); + } + + public void testForkWithMixOfCommands() { + assertFieldNames( + """ + FROM employees + | WHERE emp_no == 10048 OR emp_no == 10081 + | FORK ( EVAL a = CONCAT(first_name, " ", emp_no::keyword, " ", last_name) + | DISSECT a "%{x} %{y} %{z}" + | EVAL y = y::keyword ) + ( STATS x = COUNT(*)::keyword, y = MAX(emp_no)::keyword, z = MIN(emp_no)::keyword ) + ( SORT emp_no ASC | LIMIT 2 | EVAL x = last_name ) + ( EVAL x = "abc" | EVAL y = "aaa" ) + | KEEP _fork, emp_no, x, y, z, a + | SORT _fork, emp_no""", + Set.of( + "emp_no", + "x", + "y", + "z", + "a", + "first_name", + "last_name", + "y.*", + "last_name.*", + "x.*", + "z.*", + "first_name.*", + "a.*", + "emp_no.*" + ) + ); + } + + public void testForkWithFiltersOnConstantValues() { + assertFieldNames(""" + FROM employees + | EVAL z = 1 + | WHERE z == 1 + | FORK (WHERE emp_no == 10048 OR emp_no == 10081 | WHERE z - 1 == 0) + (WHERE emp_no == 10081 OR emp_no == 10087 | EVAL a = "x" ) + (STATS x = COUNT(*), y = MAX(emp_no), z = MIN(emp_no) | EVAL a = "y" ) + (STATS x = COUNT(*), y = MIN(emp_no)) + | WHERE _fork == "fork2" OR a == "y" + | KEEP _fork, emp_no, x, y, z + | SORT _fork, emp_no""", Set.of("emp_no", "a", "a.*", "emp_no.*")); + } + + public void testForkWithUnsupportedAttributes() { + assertFieldNames(""" + FROM heights + | FORK (SORT description DESC | LIMIT 1 | EVAL x = length(description) ) + (SORT description ASC | LIMIT 1) + | SORT _fork""", ALL_FIELDS); + } + + public void testForkAfterLookupJoin() { + assertFieldNames( + """ + FROM employees + | EVAL language_code = languages + | LOOKUP JOIN languages_lookup ON language_code + | FORK (WHERE emp_no == 10048 OR emp_no == 10081) + (WHERE emp_no == 10081 OR emp_no == 10087) + (WHERE emp_no == 10081 | EVAL language_name = "Klingon") + | KEEP _fork, emp_no, language_code, language_name + | SORT _fork, emp_no""", + Set.of( + "emp_no", + "language_code", + "language_name", + "languages", + "_fork", + "_fork.*", + "language_code.*", + "language_name.*", + "languages.*", + "emp_no.*" + ) + ); + } + + public void testForkBeforeLookupJoin() { + assertFieldNames( + """ + FROM employees + | EVAL language_code = languages + | FORK (WHERE emp_no == 10048 OR emp_no == 10081) + (WHERE emp_no == 10081 OR emp_no == 10087) + (WHERE emp_no == 10081 | EVAL language_name = "Klingon") + | LOOKUP JOIN languages_lookup ON language_code + | KEEP _fork, emp_no, language_code, language_name + | SORT _fork, emp_no""", + Set.of( + "emp_no", + "language_code", + "language_name", + "languages", + "_fork", + "_fork.*", + "language_code.*", + "language_name.*", + "languages.*", + "emp_no.*" + ) + ); + } + + public void testForkBranchWithLookupJoin() { + assertFieldNames( + """ + FROM employees + | EVAL language_code = languages + | FORK (WHERE emp_no == 10048 OR emp_no == 10081 | LOOKUP JOIN languages_lookup ON language_code) + (WHERE emp_no == 10081 OR emp_no == 10087 | LOOKUP JOIN languages_lookup ON language_code) + (WHERE emp_no == 10081 | EVAL language_name = "Klingon" | LOOKUP JOIN languages_lookup ON language_code) + | KEEP _fork, emp_no, language_code, language_name + | SORT _fork, emp_no""", + Set.of( + "emp_no", + "language_code", + "language_name", + "languages", + "_fork", + "_fork.*", + "language_code.*", + "language_name.*", + "languages.*", + "emp_no.*" + ) + ); + } + + public void testForkBeforeStats() { + assertFieldNames( + """ + FROM employees + | WHERE emp_no == 10048 OR emp_no == 10081 + | FORK ( EVAL a = CONCAT(first_name, " ", emp_no::keyword, " ", last_name) + | DISSECT a "%{x} %{y} %{z}" + | EVAL y = y::keyword ) + ( STATS x = COUNT(*)::keyword, y = MAX(emp_no)::keyword, z = MIN(emp_no)::keyword ) + ( SORT emp_no ASC | LIMIT 2 | EVAL x = last_name ) + ( EVAL x = "abc" | EVAL y = "aaa" ) + | STATS c = count(*), m = max(_fork)""", + Set.of("first_name", "emp_no", "last_name", "last_name.*", "first_name.*", "emp_no.*") + ); + } + + public void testForkBeforeStatsWithWhere() { + assertFieldNames(""" + FROM employees + | WHERE emp_no == 10048 OR emp_no == 10081 + | FORK ( EVAL a = CONCAT(first_name, " ", emp_no::keyword, " ", last_name) + | DISSECT a "%{x} %{y} %{z}" + | EVAL y = y::keyword ) + ( STATS x = COUNT(*)::keyword, y = MAX(emp_no)::keyword, z = MIN(emp_no)::keyword ) + ( SORT emp_no ASC | LIMIT 2 | EVAL x = last_name ) + ( EVAL x = "abc" | EVAL y = "aaa" ) + | STATS a = count(*) WHERE _fork == "fork1", + b = max(_fork)""", Set.of("first_name", "emp_no", "last_name", "last_name.*", "first_name.*", "emp_no.*")); + } + + public void testForkBeforeStatsByWithWhere() { + assertFieldNames(""" + FROM employees + | WHERE emp_no == 10048 OR emp_no == 10081 + | FORK ( EVAL a = CONCAT(first_name, " ", emp_no::keyword, " ", last_name) + | DISSECT a "%{x} %{y} %{z}" + | EVAL y = y::keyword ) + ( STATS x = COUNT(*)::keyword, y = MAX(emp_no)::keyword, z = MIN(emp_no)::keyword ) + ( SORT emp_no ASC | LIMIT 2 | EVAL x = last_name ) + ( EVAL x = "abc" | EVAL y = "aaa" ) + | STATS a = count(*) WHERE emp_no > 10000, + b = max(x) WHERE _fork == "fork1" BY _fork + | SORT _fork""", Set.of("emp_no", "x", "first_name", "last_name", "last_name.*", "x.*", "first_name.*", "emp_no.*")); + } + + public void testForkAfterDrop() { + assertFieldNames(""" + FROM languages + | DROP language_code + | FORK ( WHERE language_name == "English" | EVAL x = 1 ) + ( WHERE language_name != "English" ) + | SORT _fork, language_name""", ALL_FIELDS); + } + + public void testForkBranchWithDrop() { + assertFieldNames(""" + FROM languages + | FORK ( EVAL x = 1 | DROP language_code | WHERE language_name == "English" | DROP x ) + ( WHERE language_name != "English" ) + | SORT _fork, language_name + | KEEP language_name, language_code, _fork""", Set.of("language_name", "language_code", "language_code.*", "language_name.*")); + } + + public void testForkBeforeDrop() { + assertFieldNames(""" + FROM languages + | FORK (WHERE language_code == 1 OR language_code == 2) + (WHERE language_code == 1) + | DROP language_code + | SORT _fork, language_name""", ALL_FIELDS); + } + + public void testForkBranchWithKeep() { + assertFieldNames(""" + FROM languages + | FORK ( WHERE language_name == "English" | KEEP language_name, language_code ) + ( WHERE language_name != "English" ) + | SORT _fork, language_name""", Set.of("language_name", "language_code", "language_code.*", "language_name.*")); + } + + public void testForkBeforeRename() { + assertFieldNames(""" + FROM languages + | FORK (WHERE language_code == 1 OR language_code == 2) + (WHERE language_code == 1) + | RENAME language_code AS code + | SORT _fork, language_name""", ALL_FIELDS); + } + + public void testForkBranchWithRenameAs() { + assertFieldNames(""" + FROM languages + | FORK (RENAME language_code AS code | WHERE code == 1 OR code == 2) + (WHERE language_code == 1 | RENAME language_code AS x) + | SORT _fork, language_name + | KEEP code, language_name, x, _fork""", Set.of("language_name", "language_code", "language_code.*", "language_name.*")); + } + + public void testForkBranchWithRenameEquals() { + assertFieldNames(""" + FROM languages + | FORK (RENAME code = language_code | WHERE code == 1 OR code == 2) + (WHERE language_code == 1 | RENAME x = language_code) + | SORT _fork, language_name + | KEEP code, language_name, x, _fork""", Set.of("language_name", "language_code", "language_code.*", "language_name.*")); + } + + public void testForkAfterRename() { + assertFieldNames(""" + FROM languages + | RENAME language_code AS code + | FORK (WHERE code == 1 OR code == 2) + (WHERE code == 1) + | SORT _fork, language_name""", ALL_FIELDS); + } + + public void testForkBeforeDissect() { + assertFieldNames(""" + FROM employees + | FORK (WHERE emp_no == 10048 OR emp_no == 10081) + (WHERE emp_no == 10081 OR emp_no == 10087) + | EVAL x = concat(gender, " foobar") + | DISSECT x "%{a} %{b}" + | SORT _fork, emp_no + | KEEP emp_no, gender, x, a, b, _fork""", Set.of("emp_no", "gender", "gender.*", "emp_no.*")); + } + + public void testForkBranchWithDissect() { + assertFieldNames(""" + FROM employees + | FORK (WHERE emp_no == 10048 OR emp_no == 10081 + | EVAL x = concat(gender, " foobar") + | DISSECT x "%{a} %{b}") + (WHERE emp_no == 10081 OR emp_no == 10087) + | SORT _fork, emp_no + | KEEP emp_no, gender, x, a, b, _fork""", Set.of("emp_no", "gender", "gender.*", "emp_no.*")); + } + + public void testForkAfterDissect() { + assertFieldNames(""" + FROM employees + | EVAL x = concat(gender, " foobar") + | DISSECT x "%{a} %{b}" + | FORK (WHERE emp_no == 10048 OR emp_no == 10081) + (WHERE emp_no == 10081 OR emp_no == 10087) + | SORT _fork, emp_no + | KEEP emp_no, gender, x, a, b, _fork""", Set.of("emp_no", "gender", "gender.*", "emp_no.*")); + } + + public void testForkAfterEnrich() { + assertFieldNames( + """ + FROM addresses + | KEEP city.country.continent.planet.name, city.country.name, city.name + | EVAL city.name = REPLACE(city.name, "San Francisco", "South San Francisco") + | ENRICH city_names ON city.name WITH city.country.continent.planet.name = airport + | FORK (WHERE city.name != "Amsterdam") + (WHERE city.country.name == "Japan") + | SORT _fork, city.name""", + Set.of( + "city.name", + "airport", + "city.country.continent.planet.name", + "city.country.name", + "city.country.continent.planet.name.*", + "city.name.*", + "city.country.name.*", + "airport.*" + ) + ); + } + + public void testForkBranchWithEnrich() { + assertFieldNames( + """ + FROM addresses + | KEEP city.country.continent.planet.name, city.country.name, city.name + | EVAL city.name = REPLACE(city.name, "San Francisco", "South San Francisco") + | FORK (ENRICH city_names ON city.name WITH city.country.continent.planet.name = airport) + (ENRICH city_names ON city.name WITH city.country.continent.planet.name = airport) + | SORT _fork, city.name""", + Set.of( + "city.name", + "airport", + "city.country.continent.planet.name", + "city.country.name", + "city.country.continent.planet.name.*", + "city.name.*", + "city.country.name.*", + "airport.*" + ) + ); + } + + public void testForkBeforeEnrich() { + assertFieldNames( + """ + FROM addresses + | KEEP city.country.continent.planet.name, city.country.name, city.name + | EVAL city.name = REPLACE(city.name, "San Francisco", "South San Francisco") + | FORK (WHERE city.country.name == "Netherlands") + (WHERE city.country.name != "Japan") + | ENRICH city_names ON city.name WITH city.country.continent.planet.name = airport + | SORT _fork, city.name""", + Set.of( + "city.name", + "airport", + "city.country.name", + "city.country.continent.planet.name", + "city.country.continent.planet.name.*", + "city.name.*", + "city.country.name.*", + "airport.*" + ) + ); + } + + public void testForkBeforeMvExpand() { + assertFieldNames(""" + FROM employees + | KEEP emp_no, job_positions + | FORK (WHERE emp_no == 10048 OR emp_no == 10081) + (WHERE emp_no == 10081 OR emp_no == 10087) + | MV_EXPAND job_positions + | SORT _fork, emp_no, job_positions""", Set.of("emp_no", "job_positions", "job_positions.*", "emp_no.*")); + } + + public void testForkBranchWithMvExpand() { + assertFieldNames(""" + FROM employees + | KEEP emp_no, job_positions + | FORK (WHERE emp_no == 10048 OR emp_no == 10081 | MV_EXPAND job_positions) + (WHERE emp_no == 10081 OR emp_no == 10087) + | SORT _fork, emp_no, job_positions""", Set.of("emp_no", "job_positions", "job_positions.*", "emp_no.*")); + } + + public void testForkAfterMvExpand() { + assertFieldNames(""" + FROM employees + | KEEP emp_no, job_positions + | MV_EXPAND job_positions + | FORK (WHERE emp_no == 10048 OR emp_no == 10081) + (WHERE emp_no == 10081 OR emp_no == 10087) + | SORT _fork, emp_no, job_positions""", Set.of("emp_no", "job_positions", "job_positions.*", "emp_no.*")); + } + + public void testForkBeforeInlineStatsIgnore() { + assertFieldNames(""" + FROM employees + | KEEP emp_no, languages, gender + | FORK (WHERE emp_no == 10048 OR emp_no == 10081) + (WHERE emp_no == 10081 OR emp_no == 10087) + | INLINESTATS max_lang = MAX(languages) BY gender + | SORT emp_no, gender, _fork + | LIMIT 5""", Set.of("emp_no", "gender", "languages", "gender.*", "languages.*", "emp_no.*")); + } + + public void testForkBranchWithInlineStatsIgnore() { + assertFieldNames(""" + FROM employees + | KEEP emp_no, languages, gender + | FORK (WHERE emp_no == 10048 OR emp_no == 10081 + | INLINESTATS x = MAX(languages) BY gender) + (WHERE emp_no == 10081 OR emp_no == 10087 + | INLINESTATS x = MIN(languages)) + (WHERE emp_no == 10012 OR emp_no == 10012) + | SORT emp_no, gender, _fork""", Set.of("emp_no", "gender", "languages", "gender.*", "languages.*", "emp_no.*")); + } + + public void testForkAfterInlineStatsIgnore() { + assertFieldNames(""" + FROM employees + | KEEP emp_no, languages, gender + | INLINESTATS max_lang = MAX(languages) BY gender + | FORK (WHERE emp_no == 10048 OR emp_no == 10081) + (WHERE emp_no == 10081 OR emp_no == 10087) + | SORT emp_no, gender, _fork""", Set.of("emp_no", "gender", "languages", "gender.*", "languages.*", "emp_no.*")); + } + + public void testForkBeforeChangePoint() { + assertFieldNames(""" + FROM employees + | KEEP emp_no, salary + | EVAL salary=CASE(emp_no==10042, 1000000, salary) + | FORK (WHERE emp_no > 10100) + (WHERE emp_no <= 10100) + | CHANGE_POINT salary ON emp_no + | STATS COUNT() by type + | SORT type""", Set.of("type", "emp_no", "salary", "type.*", "salary.*", "emp_no.*")); + } + + public void testForkBranchWithChangePoint() { + assertFieldNames(""" + FROM employees + | KEEP emp_no, salary + | FORK (EVAL salary=CASE(emp_no==10042, 1000000, salary) + | CHANGE_POINT salary ON emp_no) + (EVAL salary=CASE(emp_no==10087, 1000000, salary) + | CHANGE_POINT salary ON emp_no) + | STATS COUNT() by type, _fork + | SORT _fork, type""", Set.of("type", "emp_no", "salary", "type.*", "salary.*", "emp_no.*")); + } + + public void testForkAfterChangePoint() { + assertFieldNames(""" + FROM employees + | KEEP emp_no, salary + | EVAL salary = CASE(emp_no==10042, 1000000, salary) + | CHANGE_POINT salary ON emp_no + | FORK (STATS a = COUNT() by type) + (STATS b = VALUES(type)) + | SORT _fork, a, type, b""", Set.of("a", "type", "b", "emp_no", "salary", "type.*", "a.*", "salary.*", "b.*", "emp_no.*")); + } + + public void testForkBeforeCompletion() { + assertFieldNames(""" + FROM employees + | KEEP emp_no, first_name, last_name + | FORK (WHERE emp_no == 10048 OR emp_no == 10081) + (WHERE emp_no == 10081 OR emp_no == 10087) + | COMPLETION x=CONCAT(first_name, " ", last_name) WITH { "inference_id" : "test_completion" } + | SORT _fork, emp_no""", Set.of("emp_no", "first_name", "last_name", "last_name.*", "first_name.*", "emp_no.*")); + } + + public void testForkBranchWithCompletion() { + assertFieldNames(""" + FROM employees + | KEEP emp_no, first_name, last_name + | FORK (WHERE emp_no == 10048 OR emp_no == 10081 + | COMPLETION x=CONCAT(first_name, " ", last_name) WITH { "inference_id" : "test_completion" }) + (WHERE emp_no == 10081 OR emp_no == 10087) + | SORT _fork, emp_no""", Set.of("emp_no", "first_name", "last_name", "last_name.*", "first_name.*", "emp_no.*")); + } + + public void testForkAfterCompletion() { + assertFieldNames(""" + FROM employees + | KEEP emp_no, first_name, last_name + | COMPLETION x=CONCAT(first_name, " ", last_name) WITH { "inference_id" : "test_completion" } + | FORK (WHERE emp_no == 10048 OR emp_no == 10081) + (WHERE emp_no == 10081 OR emp_no == 10087) + | SORT _fork, emp_no""", Set.of("emp_no", "first_name", "last_name", "last_name.*", "first_name.*", "emp_no.*")); + } + + public void testForkAfterGrok() { + assertFieldNames(""" + FROM employees + | EVAL x = concat(gender, " foobar") + | GROK x "%{WORD:a} %{WORD:b}" + | FORK (WHERE emp_no == 10048 OR emp_no == 10081) + (WHERE emp_no == 10081 OR emp_no == 10087) + | SORT _fork, emp_no + | KEEP emp_no, gender, x, a, b, _fork""", Set.of("emp_no", "gender", "gender.*", "emp_no.*")); + } + + public void testForkBranchWithGrok() { + assertFieldNames( + """ + FROM employees + | WHERE emp_no == 10048 OR emp_no == 10081 + | FORK (EVAL a = CONCAT(first_name, " ", emp_no::keyword, " ", last_name) + | GROK a "%{WORD:x} %{WORD:y} %{WORD:z}" ) + (EVAL b = CONCAT(last_name, " ", emp_no::keyword, " ", first_name) + | GROK b "%{WORD:x} %{WORD:y} %{WORD:z}" ) + | KEEP _fork, emp_no, x, y, z + | SORT _fork, emp_no""", + Set.of("emp_no", "x", "y", "z", "first_name", "last_name", "y.*", "last_name.*", "x.*", "z.*", "first_name.*", "emp_no.*") + ); + } + + public void testForkBeforeGrok() { + assertFieldNames(""" + FROM employees + | FORK (WHERE emp_no == 10048 OR emp_no == 10081) + (WHERE emp_no == 10081 OR emp_no == 10087) + | EVAL x = concat(gender, " foobar") + | GROK x "%{WORD:a} %{WORD:b}" + | SORT _fork, emp_no + | KEEP emp_no, gender, x, a, b, _fork""", Set.of("emp_no", "gender", "gender.*", "emp_no.*")); + } + private void assertFieldNames(String query, Set expected) { assertFieldNames(query, new EnrichResolution(), expected, Set.of()); } From 7bc4a870ae75e3ece0d36f41144a6c67ce271a5e Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Mon, 4 Aug 2025 15:18:03 -0400 Subject: [PATCH 14/21] Address feedback --- .../xpack/esql/core/tree/Node.java | 31 +++++++++------- .../xpack/esql/session/FieldNameUtils.java | 36 ++++++++++--------- 2 files changed, 38 insertions(+), 29 deletions(-) diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/tree/Node.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/tree/Node.java index 850a1f3a5ca65..7bc329f9321da 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/tree/Node.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/tree/Node.java @@ -8,12 +8,14 @@ import org.elasticsearch.common.io.stream.NamedWriteable; import org.elasticsearch.xpack.esql.core.QlIllegalArgumentException; +import org.elasticsearch.xpack.esql.core.util.Holder; import java.util.ArrayList; import java.util.BitSet; import java.util.Iterator; import java.util.List; import java.util.Objects; +import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.function.Function; import java.util.function.Predicate; @@ -65,28 +67,33 @@ public List children() { } @SuppressWarnings("unchecked") - public boolean forEachDownMayReturnEarly(Function action) { - if (action.apply((T) this) == false) { + void forEachDownMayReturnEarly(BiConsumer> action, Holder breakEarly) { + action.accept((T) this, breakEarly); + if (breakEarly.get()) { // Early return. - return false; + return; } // please do not refactor it to a for-each loop to avoid // allocating iterator that performs concurrent modification checks and extra stack frames for (int c = 0, size = children.size(); c < size; c++) { - if (children.get(c).forEachDownMayReturnEarly(action) == false) { - return false; + children.get(c).forEachDownMayReturnEarly(action, breakEarly); + if (breakEarly.get()) { + // Early return. + return; } } - return true; } - @SuppressWarnings("unchecked") + public boolean forEachDownMayReturnEarly(BiConsumer> action) { + var breakEarly = new Holder<>(false); + forEachDownMayReturnEarly(action, breakEarly); + return breakEarly.get(); + } + public void forEachDown(Consumer action) { - forEachDownMayReturnEarly(p -> { - action.accept(p); - // No early return. - return true; - }); + boolean result = forEachDownMayReturnEarly((p, breakFlag) -> { action.accept(p); }); + // We should be breaking early here... + assert result == false; } @SuppressWarnings("unchecked") diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java index 1532001be3e7d..2f337c263deb5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java @@ -49,7 +49,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -import java.util.function.Function; +import java.util.function.BiConsumer; import java.util.stream.Collectors; import static org.elasticsearch.xpack.esql.core.util.StringUtils.WILDCARD; @@ -107,31 +107,36 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso Set wildcardJoinIndices = new java.util.HashSet<>(); var canRemoveAliases = new Holder<>(true); - var needsAllFields = new Holder<>(false); - var processingLambda = new Holder>(); - processingLambda.set((LogicalPlan p) -> {// go over each plan top-down + var processingLambda = new Holder>>(); + processingLambda.set((LogicalPlan p, Holder breakEarly) -> {// go over each plan top-down if (p instanceof Fork fork) { - // Early return from forEachDown. We will iterate over the children manually. + // Early return from forEachDown. We will iterate over the children manually and end the recursion via forEachDown early. var forkRefsResult = AttributeSet.builder(); forkRefsResult.addAll(referencesBuilder.get()); - for (var child : fork.children()) { + for (var fork_child : fork.children()) { referencesBuilder.set(AttributeSet.builder()); - var return_result = child.forEachDownMayReturnEarly(processingLambda.get()); - // No nested Forks for now... - assert return_result; + var nested_early_return = fork_child.forEachDownMayReturnEarly(processingLambda.get()); + // This assert is just for good measure. FORKs within FORKs is yet not supported. + assert nested_early_return == false; + + // See below, no references, means we should return all fields (*). if (referencesBuilder.get().isEmpty()) { - needsAllFields.set(true); - // Early return. - return false; + projectAll.set(true); + // Return early, we'll be returning all references no matter what the remainder of the query is. + breakEarly.set(true); + return; } forkRefsResult.addAll(referencesBuilder.get()); } forkRefsResult.removeIf(attr -> attr.name().equals(Fork.FORK_FIELD)); referencesBuilder.set(forkRefsResult); - return false; + + // Return early, we've already explored all fork branches. + breakEarly.set(true); + return; } else if (p instanceof RegexExtract re) { // for Grok and Dissect // keep the inputs needed by Grok/Dissect referencesBuilder.get().addAll(re.input().references()); @@ -210,13 +215,10 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso .removeIf(attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr))); }); } - - // No early return. - return true; }); parsed.forEachDownMayReturnEarly(processingLambda.get()); - if (needsAllFields.get()) { + if (projectAll.get()) { return new PreAnalysisResult(enrichResolution, IndexResolver.ALL_FIELDS, Set.of()); } From b01732101c435ea557f0025a8b65cddcf6ca0852 Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Mon, 4 Aug 2025 15:31:50 -0400 Subject: [PATCH 15/21] not --- .../main/java/org/elasticsearch/xpack/esql/core/tree/Node.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/tree/Node.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/tree/Node.java index 7bc329f9321da..27c9b868ee269 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/tree/Node.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/tree/Node.java @@ -92,7 +92,7 @@ public boolean forEachDownMayReturnEarly(BiConsumer> public void forEachDown(Consumer action) { boolean result = forEachDownMayReturnEarly((p, breakFlag) -> { action.accept(p); }); - // We should be breaking early here... + // We should not be breaking early here... assert result == false; } From d14bf24f8fdf24fbdfb85b58481e2e377428170b Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Wed, 6 Aug 2025 15:09:51 -0400 Subject: [PATCH 16/21] Update x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java Co-authored-by: Andrei Stefan --- .../xpack/esql/session/FieldNameUtils.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java index 2f337c263deb5..074833ab4a49b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java @@ -117,11 +117,13 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso for (var fork_child : fork.children()) { referencesBuilder.set(AttributeSet.builder()); - var nested_early_return = fork_child.forEachDownMayReturnEarly(processingLambda.get()); + var isNestedFork = forkBranch.forEachDownMayReturnEarly(forEachDownProcessor.get()); // This assert is just for good measure. FORKs within FORKs is yet not supported. - assert nested_early_return == false; - - // See below, no references, means we should return all fields (*). + assert isNestedFork == false : "Nested FORKs are not yet supported"; + // This is a safety measure for fork where the list of fields returned is empty. + // It can be empty for a branch that does need all the fields. For example "fork (where true) (where a is not null)" + // but it can also be empty for queries where NO fields are needed from ES, + // for example "fork (eval x = 1 | keep x) (eval y = 1 | keep y)" but we cannot establish this yet. if (referencesBuilder.get().isEmpty()) { projectAll.set(true); // Return early, we'll be returning all references no matter what the remainder of the query is. From 2931d833c8c0fdbe2fb9d4a82832f7b509e1674d Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Wed, 6 Aug 2025 15:10:03 -0400 Subject: [PATCH 17/21] Update x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java Co-authored-by: Andrei Stefan --- .../org/elasticsearch/xpack/esql/session/FieldNameUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java index 074833ab4a49b..87b6ab39d7d94 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java @@ -115,7 +115,7 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso var forkRefsResult = AttributeSet.builder(); forkRefsResult.addAll(referencesBuilder.get()); - for (var fork_child : fork.children()) { + for (var forkBranch : fork.children()) { referencesBuilder.set(AttributeSet.builder()); var isNestedFork = forkBranch.forEachDownMayReturnEarly(forEachDownProcessor.get()); // This assert is just for good measure. FORKs within FORKs is yet not supported. From 82672be93216cff83646105ce97ee7cdfa3e2e72 Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Wed, 6 Aug 2025 15:10:12 -0400 Subject: [PATCH 18/21] Update x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java Co-authored-by: Andrei Stefan --- .../org/elasticsearch/xpack/esql/session/FieldNameUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java index 87b6ab39d7d94..49ac9d54aa70d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java @@ -108,7 +108,7 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso var canRemoveAliases = new Holder<>(true); - var processingLambda = new Holder>>(); + var forEachDownProcessor = new Holder>>(); processingLambda.set((LogicalPlan p, Holder breakEarly) -> {// go over each plan top-down if (p instanceof Fork fork) { // Early return from forEachDown. We will iterate over the children manually and end the recursion via forEachDown early. From 94da34bb0b1a0e8bb40ad0b36e83b1aa65b5eb58 Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Wed, 6 Aug 2025 15:16:26 -0400 Subject: [PATCH 19/21] Update --- .../elasticsearch/xpack/esql/session/FieldNameUtils.java | 4 ++-- .../xpack/esql/session/FieldNameUtilsTests.java | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java index 49ac9d54aa70d..844f7cb1989df 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java @@ -109,7 +109,7 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso var canRemoveAliases = new Holder<>(true); var forEachDownProcessor = new Holder>>(); - processingLambda.set((LogicalPlan p, Holder breakEarly) -> {// go over each plan top-down + forEachDownProcessor.set((LogicalPlan p, Holder breakEarly) -> {// go over each plan top-down if (p instanceof Fork fork) { // Early return from forEachDown. We will iterate over the children manually and end the recursion via forEachDown early. var forkRefsResult = AttributeSet.builder(); @@ -218,7 +218,7 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, EnrichReso }); } }); - parsed.forEachDownMayReturnEarly(processingLambda.get()); + parsed.forEachDownMayReturnEarly(forEachDownProcessor.get()); if (projectAll.get()) { return new PreAnalysisResult(enrichResolution, IndexResolver.ALL_FIELDS, Set.of()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java index 579ef2bc6a5c3..2a245ca300cfb 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java @@ -73,6 +73,13 @@ public void testDirectFilter() { ); } + public void testForkEval() { + assertFieldNames( + "FROM employees | fork (eval x = 1 | keep x) (eval y = 2 | keep y) (eval z = 3 | keep z)", + Set.of("*") + ); + } + public void testSort1() { assertFieldNames( "from employees | sort still_hired, emp_no | keep emp_no, still_hired | limit 3", From 9606c04d89e57cf03e7682202775c1f639dd2b43 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 6 Aug 2025 19:29:26 +0000 Subject: [PATCH 20/21] [CI] Auto commit changes from spotless --- .../xpack/esql/session/FieldNameUtilsTests.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java index 2a245ca300cfb..b6f90beb81a21 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java @@ -74,10 +74,7 @@ public void testDirectFilter() { } public void testForkEval() { - assertFieldNames( - "FROM employees | fork (eval x = 1 | keep x) (eval y = 2 | keep y) (eval z = 3 | keep z)", - Set.of("*") - ); + assertFieldNames("FROM employees | fork (eval x = 1 | keep x) (eval y = 2 | keep y) (eval z = 3 | keep z)", Set.of("*")); } public void testSort1() { From 3f55419e282fa4d3ad25c29da26097d099b0b11c Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Thu, 7 Aug 2025 18:07:25 -0400 Subject: [PATCH 21/21] Separate implementation --- .../xpack/esql/core/tree/Node.java | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/tree/Node.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/tree/Node.java index 27c9b868ee269..cedc86dd3b690 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/tree/Node.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/tree/Node.java @@ -66,6 +66,25 @@ public List children() { return children; } + @SuppressWarnings("unchecked") + public void forEachDown(Consumer action) { + action.accept((T) this); + // please do not refactor it to a for-each loop to avoid + // allocating iterator that performs concurrent modification checks and extra stack frames + for (int c = 0, size = children.size(); c < size; c++) { + children.get(c).forEachDown(action); + } + } + + /** + * Same as forEachDown, but can end the traverse early, by setting the boolean argument in the action. + */ + public boolean forEachDownMayReturnEarly(BiConsumer> action) { + var breakEarly = new Holder<>(false); + forEachDownMayReturnEarly(action, breakEarly); + return breakEarly.get(); + } + @SuppressWarnings("unchecked") void forEachDownMayReturnEarly(BiConsumer> action, Holder breakEarly) { action.accept((T) this, breakEarly); @@ -84,18 +103,6 @@ void forEachDownMayReturnEarly(BiConsumer> action, Ho } } - public boolean forEachDownMayReturnEarly(BiConsumer> action) { - var breakEarly = new Holder<>(false); - forEachDownMayReturnEarly(action, breakEarly); - return breakEarly.get(); - } - - public void forEachDown(Consumer action) { - boolean result = forEachDownMayReturnEarly((p, breakFlag) -> { action.accept(p); }); - // We should not be breaking early here... - assert result == false; - } - @SuppressWarnings("unchecked") public void forEachDown(Class typeToken, Consumer action) { forEachDown(t -> {