diff --git a/docs/changelog/127687.yaml b/docs/changelog/127687.yaml
new file mode 100644
index 0000000000000..e053c4a31ad2e
--- /dev/null
+++ b/docs/changelog/127687.yaml
@@ -0,0 +1,6 @@
+pr: 127687
+summary: "ESQL: Fix alias removal in regex extraction with JOIN"
+area: ES|QL
+type: bug
+issues:
+ - 127467
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec
index b7dc04a46f6e2..63a47424ac314 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec
@@ -1592,3 +1592,72 @@ null | Milky Way | Marunouchi
null | null | null
null | null | null
;
+
+
+joinMaskingRegex
+// https://github.com/elastic/elasticsearch/issues/127467
+required_capability: union_types
+required_capability: join_lookup_v12
+required_capability: fix_join_masking_regex_extract
+from books,message_*,ul*
+| enrich languages_policy on status
+| drop `language_name`, `bytes_out`, `id`, id
+| dissect book_no "%{type}"
+| dissect author.keyword "%{HZicfARaID}"
+| mv_expand `status`
+| sort HZicfARaID, year DESC NULLS LAST, publisher DESC NULLS FIRST, description DESC, type NULLS LAST, message ASC NULLS LAST, title NULLS FIRST, status NULLS LAST
+| enrich languages_policy on book_no
+| grok message "%{WORD:DiLNyZKNDu}"
+| limit 7972
+| rename year as language_code
+| lookup join languages_lookup on language_code
+| limit 13966
+| stats rcyIZnSOb = min(language_code), `ratings` = min(@timestamp), dgDxwMeFYrD = count(`@timestamp`), ifyZfXigqVN = count(*), qTXdrzSpY = min(language_code) by author.keyword
+| rename author.keyword as message
+| lookup join message_types_lookup on message
+| stats `ratings` = count(*) by type
+| stats `type` = count(type), `ratings` = count(*)
+| keep `ratings`, ratings
+;
+
+ratings:long
+1
+;
+
+joinMaskingDissect
+// https://github.com/elastic/elasticsearch/issues/127467
+required_capability: join_lookup_v12
+required_capability: fix_join_masking_regex_extract
+from sample_data
+| dissect message "%{type}"
+| drop type
+| lookup join message_types_lookup on message
+| stats count = count(*) by type
+| keep count
+| sort count
+;
+count:long
+1
+3
+3
+;
+
+
+joinMaskingGrok
+// https://github.com/elastic/elasticsearch/issues/127467
+required_capability: join_lookup_v12
+required_capability: fix_join_masking_regex_extract
+from sample_data
+| grok message "%{WORD:type}"
+| drop type
+| lookup join message_types_lookup on message
+| stats max = max(event_duration) by type
+| keep max
+| sort max
+;
+
+max:long
+1232382
+3450233
+8268153
+;
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
index b4210cad2d9d4..f1245c019c9ce 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -895,7 +895,13 @@ public enum Cap {
* Support for keeping `DROP` attributes when resolving field names.
* see ES|QL: no matches for pattern #126418
*/
- DROP_AGAIN_WITH_WILDCARD_AFTER_EVAL;
+ DROP_AGAIN_WITH_WILDCARD_AFTER_EVAL,
+
+ /**
+ * During resolution (pre-analysis) we have to consider that joins can override regex extracted values
+ * see ES|QL: pruning of JOINs leads to missing fields #127467
+ */
+ FIX_JOIN_MASKING_REGEX_EXTRACT;
private final boolean enabled;
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java
index 5e139f3a1963c..a77d0852ca311 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java
@@ -41,6 +41,8 @@
import org.elasticsearch.xpack.esql.core.expression.Expressions;
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute;
+import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
+import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
import org.elasticsearch.xpack.esql.core.expression.UnresolvedAttribute;
import org.elasticsearch.xpack.esql.core.expression.UnresolvedStar;
import org.elasticsearch.xpack.esql.core.util.Holder;
@@ -594,11 +596,7 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy
parsed.forEachDown(p -> {// go over each plan top-down
if (p instanceof RegexExtract re) { // for Grok and Dissect
- // remove other down-the-tree references to the extracted fields
- for (Attribute extracted : re.extractedFields()) {
- referencesBuilder.removeIf(attr -> matchByName(attr, extracted.name(), false));
- }
- // but keep the inputs needed by Grok/Dissect
+ // keep the inputs needed by Grok/Dissect
referencesBuilder.addAll(re.input().references());
} else if (p instanceof Enrich enrich) {
AttributeSet enrichFieldRefs = Expressions.references(enrich.enrichFields());
@@ -653,15 +651,19 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy
// remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree
// for example "from test | eval x = salary | stats max = max(x) by gender"
// remove the UnresolvedAttribute "x", since that is an Alias defined in "eval"
+ // also remove other down-the-tree references to the extracted fields from "grok" and "dissect"
AttributeSet planRefs = p.references();
Set fieldNames = planRefs.names();
- p.forEachExpressionDown(Alias.class, alias -> {
+ p.forEachExpressionDown(NamedExpression.class, ne -> {
+ if ((ne instanceof Alias || ne instanceof ReferenceAttribute) == false) {
+ return;
+ }
// do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id AS id"
// or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)"
- if (fieldNames.contains(alias.name())) {
+ if (fieldNames.contains(ne.name())) {
return;
}
- referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), shadowingRefsBuilder.contains(attr)));
+ referencesBuilder.removeIf(attr -> matchByName(attr, ne.name(), shadowingRefsBuilder.contains(attr)));
});
}
});
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java
index ad132aba74a28..1de69a7a70bde 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java
@@ -1341,6 +1341,53 @@ public void testDissectOverwriteName() {
assertThat(fieldNames, equalTo(Set.of("emp_no", "emp_no.*", "first_name", "first_name.*")));
}
+ /**
+ * Fix alias removal in regex extraction with JOIN
+ * @see ES|QL: pruning of JOINs leads to missing fields
+ */
+ public void testAvoidGrokAttributesRemoval() {
+ assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
+ Set fieldNames = fieldNames("""
+ from message_types
+ | eval type = 1
+ | lookup join message_types_lookup on message
+ | drop message
+ | grok type "%{WORD:b}"
+ | stats x = max(b)
+ | keep x""", Set.of());
+ assertThat(fieldNames, equalTo(Set.of("message", "x", "x.*", "message.*")));
+ }
+
+ public void testAvoidGrokAttributesRemoval2() {
+ assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
+ Set fieldNames = fieldNames("""
+ from sample_data
+ | dissect message "%{type}"
+ | drop type
+ | lookup join message_types_lookup on message
+ | stats count = count(*) by type
+ | keep count
+ | sort count""", Set.of());
+ assertThat(fieldNames, equalTo(Set.of("type", "message", "count", "message.*", "type.*", "count.*")));
+ }
+
+ public void testAvoidGrokAttributesRemoval3() {
+ assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
+ Set fieldNames = fieldNames("""
+ from sample_data
+ | grok message "%{WORD:type}"
+ | drop type
+ | lookup join message_types_lookup on message
+ | stats max = max(event_duration) by type
+ | keep max
+ | sort max""", Set.of());
+ assertThat(
+ fieldNames,
+ equalTo(Set.of("type", "event_duration", "message", "max", "event_duration.*", "message.*", "type.*", "max.*"))
+ );
+
+ }
+
public void testEnrichOnDefaultField() {
Set fieldNames = fieldNames("""
from employees