From f8f047887e9ef0df7719a7a55a7d58406a048cfd Mon Sep 17 00:00:00 2001 From: Ioana Tagirta Date: Tue, 27 May 2025 11:26:52 +0200 Subject: [PATCH 1/3] Improve field resolution for FORK --- muted-tests.yml | 6 -- .../xpack/esql/session/EsqlSession.java | 60 +++++++++++-------- .../session/IndexResolverFieldNamesTests.java | 52 +++++----------- 3 files changed, 49 insertions(+), 69 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index ba48bf5d1efcf..6122665d7ce43 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -453,12 +453,6 @@ tests: - class: org.elasticsearch.packaging.test.DockerTests method: test121CanUseStackLoggingConfig issue: https://github.com/elastic/elasticsearch/issues/128165 -- class: org.elasticsearch.xpack.esql.session.IndexResolverFieldNamesTests - method: testForkFieldsWithKeepAfterFork - issue: https://github.com/elastic/elasticsearch/issues/128271 -- class: org.elasticsearch.xpack.esql.session.IndexResolverFieldNamesTests - method: testForkWithStatsInAllBranches - issue: https://github.com/elastic/elasticsearch/issues/128272 - class: org.elasticsearch.packaging.test.DockerTests method: test080ConfigurePasswordThroughEnvironmentVariableFile issue: https://github.com/elastic/elasticsearch/issues/128075 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index cc8f77da62f08..b5216039e71bb 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -596,31 +596,8 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy return result.withFieldNames(IndexResolver.ALL_FIELDS); } - Holder projectAfterFork = new Holder<>(false); - Holder hasFork = new Holder<>(false); - - parsed.forEachDown(plan -> { - if (projectAll.get()) { - return; - } - - if (hasFork.get() == false && shouldCollectReferencedFields(plan, inlinestatsAggs)) { - projectAfterFork.set(true); - } - - if (plan instanceof Fork fork && projectAfterFork.get() == false) { - hasFork.set(true); - fork.children().forEach(child -> { - if (child.anyMatch(p -> shouldCollectReferencedFields(p, inlinestatsAggs)) == false) { - projectAll.set(true); - } - }); - } - }); - - if (projectAll.get()) { - return result.withFieldNames(IndexResolver.ALL_FIELDS); - } + boolean hasFork = parsed.anyMatch(p -> p instanceof Fork); + Holder seenFork = new Holder<>(false); var referencesBuilder = AttributeSet.builder(); // "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can shadow some @@ -637,7 +614,36 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy boolean[] canRemoveAliases = new boolean[] { true }; + PreAnalysisResult initialResult = result; + projectAll.set(false); parsed.forEachDown(p -> {// go over each plan top-down + if (hasFork && seenFork.get() == false && p instanceof Fork == false) { + return; + } + + if (projectAll.get()) { + return; + } + + if (p instanceof Fork fork) { + seenFork.set(true); + + Set names = new HashSet<>(); + for (var subPlan : fork.children()) { + PreAnalysisResult subPlanResult = fieldNames(subPlan, enrichPolicyMatchFields, initialResult); + + if (subPlanResult.fieldNames.equals(IndexResolver.ALL_FIELDS)) { + projectAll.set(true); + return; + } + names.addAll(subPlanResult.fieldNames); + } + + for (var attrName : names) { + referencesBuilder.add(new UnresolvedAttribute(fork.source(), attrName)); + } + } + if (p instanceof RegexExtract re) { // for Grok and Dissect // keep the inputs needed by Grok/Dissect referencesBuilder.addAll(re.input().references()); @@ -711,6 +717,10 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy } }); + if (projectAll.get()) { + return result.withFieldNames(IndexResolver.ALL_FIELDS); + } + // Add JOIN ON column references afterward to avoid Alias removal referencesBuilder.addAll(keepJoinRefsBuilder); // If any JOIN commands need wildcard field-caps calls, persist the index names diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java index 84734dd77e871..66ae8f50a3d83 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java @@ -1994,7 +1994,7 @@ public void testForkFieldsWithKeepAfterFork() { (WHERE d > 1000 AND e == "aaa" | EVAL c = a + 200) | WHERE x > y | KEEP a, b, c, d, x - """, Set.of("a", "a.*", "c", "c.*", "d", "d.*", "e", "e.*", "x", "x.*", "y", "y.*")); + """, ALL_FIELDS); } public void testForkFieldsWithKeepBeforeFork() { @@ -2002,7 +2002,7 @@ public void testForkFieldsWithKeepBeforeFork() { assertFieldNames(""" FROM test - | KEEP a, b, c, d, x + | KEEP a, b, c, d, x, y | WHERE a > 2000 | EVAL b = a + 100 | FORK (WHERE c > 1 AND a < 10000 | EVAL d = a + 500) @@ -2041,41 +2041,17 @@ public void testForkFieldsWithEnrichAndLookupJoins() { assumeTrue("FORK available as snapshot only", EsqlCapabilities.Cap.FORK.isEnabled()); assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); - assertFieldNames( - """ - FROM test - | KEEP a, b, abc, def, z, xyz - | ENRICH enrich_policy ON abc - | EVAL b = a + 100 - | LOOKUP JOIN my_lookup_index ON def - | FORK (WHERE c > 1 AND a < 10000 | EVAL d = a + 500) - (STATS x = count(*), y=min(z)) - | LOOKUP JOIN my_lookup_index ON xyz - | WHERE x > y OR _fork == "fork1" - """, - Set.of( - "x", - "y", - "_fork", - "a", - "c", - "abc", - "b", - "def", - "z", - "xyz", - "def.*", - "_fork.*", - "y.*", - "x.*", - "xyz.*", - "z.*", - "abc.*", - "a.*", - "c.*", - "b.*" - ) - ); + assertFieldNames(""" + FROM test + | KEEP a, b, abc, def, z, xyz + | ENRICH enrich_policy ON abc + | EVAL b = a + 100 + | LOOKUP JOIN my_lookup_index ON def + | FORK (WHERE c > 1 AND a < 10000 | EVAL d = a + 500) + (STATS x = count(*), y=min(z)) + | LOOKUP JOIN my_lookup_index ON xyz + | WHERE x > y OR _fork == "fork1" + """, Set.of("a", "c", "abc", "b", "def", "z", "xyz", "def.*", "xyz.*", "z.*", "abc.*", "a.*", "c.*", "b.*")); } public void testForkWithStatsInAllBranches() { @@ -2089,7 +2065,7 @@ public void testForkWithStatsInAllBranches() { (EVAL z = a * b | STATS m = max(z)) (STATS x = count(*), y=min(z)) | WHERE x > y - """, Set.of("a", "a.*", "b", "b.*", "c", "c.*", "z", "z.*")); + """, Set.of("a", "a.*", "c", "c.*", "z", "z.*")); } public void testForkWithStatsAndWhere() { From 1474874a94936dadb50427206bbe0ef968a988ec Mon Sep 17 00:00:00 2001 From: Ioana Tagirta Date: Tue, 27 May 2025 18:04:16 +0200 Subject: [PATCH 2/3] Request all fields when FORK is used --- .../xpack/esql/session/EsqlSession.java | 35 +++---------------- .../session/IndexResolverFieldNamesTests.java | 6 ++-- 2 files changed, 8 insertions(+), 33 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index b5216039e71bb..4d0d8e871ed67 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -584,6 +584,11 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy return result.withFieldNames(IndexResolver.ALL_FIELDS); } + // TODO: Improve field resolution for FORK - right now we request all fields + if (parsed.anyMatch(p -> p instanceof Fork)) { + return result.withFieldNames(IndexResolver.ALL_FIELDS); + } + Holder projectAll = new Holder<>(false); parsed.forEachExpressionDown(UnresolvedStar.class, us -> {// explicit "*" fields selection if (projectAll.get()) { @@ -596,9 +601,6 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy return result.withFieldNames(IndexResolver.ALL_FIELDS); } - boolean hasFork = parsed.anyMatch(p -> p instanceof Fork); - Holder seenFork = new Holder<>(false); - var referencesBuilder = AttributeSet.builder(); // "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can shadow some // attributes ("lookup join" generated columns among others) and steps like removal of Aliases should ignore the fields @@ -617,33 +619,6 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy PreAnalysisResult initialResult = result; projectAll.set(false); parsed.forEachDown(p -> {// go over each plan top-down - if (hasFork && seenFork.get() == false && p instanceof Fork == false) { - return; - } - - if (projectAll.get()) { - return; - } - - if (p instanceof Fork fork) { - seenFork.set(true); - - Set names = new HashSet<>(); - for (var subPlan : fork.children()) { - PreAnalysisResult subPlanResult = fieldNames(subPlan, enrichPolicyMatchFields, initialResult); - - if (subPlanResult.fieldNames.equals(IndexResolver.ALL_FIELDS)) { - projectAll.set(true); - return; - } - names.addAll(subPlanResult.fieldNames); - } - - for (var attrName : names) { - referencesBuilder.add(new UnresolvedAttribute(fork.source(), attrName)); - } - } - if (p instanceof RegexExtract re) { // for Grok and Dissect // keep the inputs needed by Grok/Dissect referencesBuilder.addAll(re.input().references()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java index 66ae8f50a3d83..224abd60a5ff7 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java @@ -2008,7 +2008,7 @@ public void testForkFieldsWithKeepBeforeFork() { | FORK (WHERE c > 1 AND a < 10000 | EVAL d = a + 500) (WHERE d > 1000 AND e == "aaa" | EVAL c = a + 200) | WHERE x > y - """, Set.of("a", "a.*", "b", "b.*", "c", "c.*", "d", "d.*", "e", "e.*", "x", "x.*", "y", "y.*")); + """, ALL_FIELDS); } public void testForkFieldsWithNoProjection() { @@ -2051,7 +2051,7 @@ public void testForkFieldsWithEnrichAndLookupJoins() { (STATS x = count(*), y=min(z)) | LOOKUP JOIN my_lookup_index ON xyz | WHERE x > y OR _fork == "fork1" - """, Set.of("a", "c", "abc", "b", "def", "z", "xyz", "def.*", "xyz.*", "z.*", "abc.*", "a.*", "c.*", "b.*")); + """, ALL_FIELDS); } public void testForkWithStatsInAllBranches() { @@ -2065,7 +2065,7 @@ public void testForkWithStatsInAllBranches() { (EVAL z = a * b | STATS m = max(z)) (STATS x = count(*), y=min(z)) | WHERE x > y - """, Set.of("a", "a.*", "c", "c.*", "z", "z.*")); + """, ALL_FIELDS); } public void testForkWithStatsAndWhere() { From c2fa62fabf7f4aef9768ed368f481e3604d9a538 Mon Sep 17 00:00:00 2001 From: Ioana Tagirta Date: Wed, 28 May 2025 10:10:59 +0200 Subject: [PATCH 3/3] Remove unused variable --- .../org/elasticsearch/xpack/esql/session/EsqlSession.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 4d0d8e871ed67..ef9d556cc27e0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -616,8 +616,6 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy boolean[] canRemoveAliases = new boolean[] { true }; - PreAnalysisResult initialResult = result; - projectAll.set(false); parsed.forEachDown(p -> {// go over each plan top-down if (p instanceof RegexExtract re) { // for Grok and Dissect // keep the inputs needed by Grok/Dissect @@ -692,10 +690,6 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy } }); - if (projectAll.get()) { - return result.withFieldNames(IndexResolver.ALL_FIELDS); - } - // Add JOIN ON column references afterward to avoid Alias removal referencesBuilder.addAll(keepJoinRefsBuilder); // If any JOIN commands need wildcard field-caps calls, persist the index names