Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/126614.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 126614
summary: Fix join masking eval
area: ES|QL
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,13 @@ public abstract class GenerativeRestTest extends ESRestTestCase {
// Awaiting fixes
"estimated row size \\[0\\] wasn't set", // https://github.com/elastic/elasticsearch/issues/121739
"unknown physical plan node \\[OrderExec\\]", // https://github.com/elastic/elasticsearch/issues/120817
"Unknown column \\[<all-fields-projected>\\]", // https://github.com/elastic/elasticsearch/issues/121741
//
"Unknown column \\[<all-fields-projected>\\]", // https://github.com/elastic/elasticsearch/issues/121741,
"Plan \\[ProjectExec\\[\\[<no-fields>.* optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/125866
"only supports KEYWORD or TEXT values, found expression", // https://github.com/elastic/elasticsearch/issues/126017
"token recognition error at: '``", // https://github.com/elastic/elasticsearch/issues/125870
"Unknown column \\[.*\\]", // https://github.com/elastic/elasticsearch/issues/126026
"optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/116781
"No matches found for pattern", // https://github.com/elastic/elasticsearch/issues/126418
"The incoming YAML document exceeds the limit:" // still to investigate, but it seems to be specific to the test framework
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1527,3 +1527,45 @@ from *
salary_change.long:double|foo:long
5.0 |1698069301543123456
;


joinMaskingEval
required_capability: join_lookup_v12
required_capability: fix_join_masking_eval
from languag*
| eval type = null
| rename language_name as message
| lookup join message_types_lookup on message
| rename type as message
| lookup join message_types_lookup on message
| keep `language.name`
;

ignoreOrder:true
language.name:text
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
English
French
Spanish
German
;
Original file line number Diff line number Diff line change
Expand Up @@ -877,7 +877,13 @@ public enum Cap {
* the ownership of that block - but didn't account for the fact that the caller might close it, leading to double releases
* in some union type queries. C.f. https://github.com/elastic/elasticsearch/issues/125850
*/
FIX_DOUBLY_RELEASED_NULL_BLOCKS_IN_VALUESOURCEREADER;
FIX_DOUBLY_RELEASED_NULL_BLOCKS_IN_VALUESOURCEREADER,

/**
* During resolution (pre-analysis) we have to consider that joins or enriches can override EVALuated values
* https://github.com/elastic/elasticsearch/issues/126419
*/
FIX_JOIN_MASKING_EVAL;

private final boolean enabled;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,21 @@
import org.elasticsearch.xpack.esql.parser.QueryParams;
import org.elasticsearch.xpack.esql.plan.IndexPattern;
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
import org.elasticsearch.xpack.esql.plan.logical.Drop;
import org.elasticsearch.xpack.esql.plan.logical.Enrich;
import org.elasticsearch.xpack.esql.plan.logical.Eval;
import org.elasticsearch.xpack.esql.plan.logical.Filter;
import org.elasticsearch.xpack.esql.plan.logical.InlineStats;
import org.elasticsearch.xpack.esql.plan.logical.Insist;
import org.elasticsearch.xpack.esql.plan.logical.Keep;
import org.elasticsearch.xpack.esql.plan.logical.Limit;
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
import org.elasticsearch.xpack.esql.plan.logical.MvExpand;
import org.elasticsearch.xpack.esql.plan.logical.OrderBy;
import org.elasticsearch.xpack.esql.plan.logical.Project;
import org.elasticsearch.xpack.esql.plan.logical.RegexExtract;
import org.elasticsearch.xpack.esql.plan.logical.Rename;
import org.elasticsearch.xpack.esql.plan.logical.TopN;
import org.elasticsearch.xpack.esql.plan.logical.UnresolvedRelation;
import org.elasticsearch.xpack.esql.plan.logical.join.InlineJoin;
import org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes;
Expand Down Expand Up @@ -574,6 +584,8 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
var keepJoinRefsBuilder = AttributeSet.builder();
Set<String> wildcardJoinIndices = new java.util.HashSet<>();

boolean[] canRemoveAliases = new boolean[] { true };

parsed.forEachDown(p -> {// go over each plan top-down
if (p instanceof RegexExtract re) { // for Grok and Dissect
// remove other down-the-tree references to the extracted fields
Expand Down Expand Up @@ -619,20 +631,37 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
}
}

// remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree
// for example "from test | eval x = salary | stats max = max(x) by gender"
// remove the UnresolvedAttribute "x", since that is an Alias defined in "eval"
AttributeSet planRefs = p.references();
Set<String> fieldNames = planRefs.names();
p.forEachExpressionDown(Alias.class, alias -> {
// do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id = id"
// or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)"
if (fieldNames.contains(alias.name())) {
return;
}
referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), keepCommandRefsBuilder.contains(attr)));
});
// If the current node in the tree is of type JOIN (lookup join, inlinestats) or ENRICH or other type of
// command that we may add in the future which can override already defined Aliases with EVAL
// (for example
//
// from test
// | eval ip = 123
// | enrich ips_policy ON hostname
// | rename ip AS my_ip
//
// and ips_policy enriches the results with the same name ip field),
// these aliases should be kept in the list of fields.
if (canRemoveAliases[0] && couldOverrideAliases(p)) {
canRemoveAliases[0] = false;
}
if (canRemoveAliases[0]) {
// remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree
// for example "from test | eval x = salary | stats max = max(x) by gender"
// remove the UnresolvedAttribute "x", since that is an Alias defined in "eval"
AttributeSet planRefs = p.references();
Set<String> fieldNames = planRefs.names();
p.forEachExpressionDown(Alias.class, alias -> {
// do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id AS id"
// or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)"
if (fieldNames.contains(alias.name())) {
return;
}
referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), keepCommandRefsBuilder.contains(attr)));
});
}
});

// Add JOIN ON column references afterward to avoid Alias removal
referencesBuilder.addAll(keepJoinRefsBuilder);
// If any JOIN commands need wildcard field-caps calls, persist the index names
Expand All @@ -656,6 +685,30 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
}
}

/**
* Could a plan "accidentally" override aliases?
* Examples are JOIN and ENRICH, that _could_ produce fields with the same
* name of an existing alias, based on their index mapping.
* Here we just have to consider commands where this information is not available before index resolution,
* eg. EVAL, GROK, DISSECT can override an alias, but we know it in advance, ie. we don't need to resolve indices to know.
*/
private static boolean couldOverrideAliases(LogicalPlan p) {
return (p instanceof Aggregate
|| p instanceof Drop
|| p instanceof Eval
|| p instanceof Filter
|| p instanceof InlineStats
|| p instanceof Insist
|| p instanceof Keep
|| p instanceof Limit
|| p instanceof MvExpand
|| p instanceof OrderBy
|| p instanceof Project
|| p instanceof RegexExtract
|| p instanceof Rename
|| p instanceof TopN) == false;
}

private static boolean matchByName(Attribute attr, String other, boolean skipIfPattern) {
boolean isPattern = Regex.isSimpleMatchPattern(attr.name());
if (skipIfPattern && isPattern) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -478,13 +478,16 @@ public void testDropAllColumns_WithStats() {
}

public void testEnrichOn() {
assertFieldNames("""
from employees
| sort emp_no
| limit 1
| eval x = to_string(languages)
| enrich languages_policy on x
| keep emp_no, language_name""", Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*"));
assertFieldNames(
"""
from employees
| sort emp_no
| limit 1
| eval x = to_string(languages)
| enrich languages_policy on x
| keep emp_no, language_name""",
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
);
}

public void testEnrichOn2() {
Expand All @@ -494,7 +497,7 @@ public void testEnrichOn2() {
| enrich languages_policy on x
| keep emp_no, language_name
| sort emp_no
| limit 1""", Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*"));
| limit 1""", Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*"));
}

public void testUselessEnrich() {
Expand All @@ -512,15 +515,15 @@ public void testSimpleSortLimit() {
| enrich languages_policy on x
| keep emp_no, language_name
| sort emp_no
| limit 1""", Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*"));
| limit 1""", Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*", "x", "x.*"));
}

public void testWith() {
assertFieldNames(
"""
from employees | eval x = to_string(languages) | keep emp_no, x | sort emp_no | limit 1
| enrich languages_policy on x with language_name""",
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
);
}

Expand All @@ -529,7 +532,7 @@ public void testWithAlias() {
"""
from employees | sort emp_no | limit 3 | eval x = to_string(languages) | keep emp_no, x
| enrich languages_policy on x with lang = language_name""",
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
);
}

Expand All @@ -538,7 +541,7 @@ public void testWithAliasSort() {
"""
from employees | eval x = to_string(languages) | keep emp_no, x | sort emp_no | limit 3
| enrich languages_policy on x with lang = language_name""",
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
);
}

Expand All @@ -547,7 +550,7 @@ public void testWithAliasAndPlain() {
"""
from employees | sort emp_no desc | limit 3 | eval x = to_string(languages) | keep emp_no, x
| enrich languages_policy on x with lang = language_name, language_name""",
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
);
}

Expand All @@ -556,7 +559,7 @@ public void testWithTwoAliasesSameProp() {
"""
from employees | sort emp_no | limit 1 | eval x = to_string(languages) | keep emp_no, x
| enrich languages_policy on x with lang = language_name, lang2 = language_name""",
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
);
}

Expand All @@ -565,7 +568,7 @@ public void testRedundantWith() {
"""
from employees | sort emp_no | limit 1 | eval x = to_string(languages) | keep emp_no, x
| enrich languages_policy on x with language_name, language_name""",
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
);
}

Expand All @@ -588,28 +591,34 @@ public void testConstantNullInput() {
| eval x = to_string(languages)
| keep emp_no, x
| enrich languages_policy on x with language_name, language_name""",
Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*")
Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*", "x", "x.*")
);
}

public void testEnrichEval() {
assertFieldNames("""
from employees
| eval x = to_string(languages)
| enrich languages_policy on x with lang = language_name
| eval language = concat(x, "-", lang)
| keep emp_no, x, lang, language
| sort emp_no desc | limit 3""", Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*"));
assertFieldNames(
"""
from employees
| eval x = to_string(languages)
| enrich languages_policy on x with lang = language_name
| eval language = concat(x, "-", lang)
| keep emp_no, x, lang, language
| sort emp_no desc | limit 3""",
Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*", "x", "x.*", "lang", "lang.*")
);
}

public void testSimple() {
assertFieldNames("""
from employees
| eval x = 1, y = to_string(languages)
| enrich languages_policy on y
| where x > 1
| keep emp_no, language_name
| limit 1""", Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*"));
assertFieldNames(
"""
from employees
| eval x = 1, y = to_string(languages)
| enrich languages_policy on y
| where x > 1
| keep emp_no, language_name
| limit 1""",
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "y", "x.*", "y.*")
);
}

public void testEvalNullSort() {
Expand Down Expand Up @@ -1653,6 +1662,54 @@ public void testInsist_multiFieldMappedMultiIndex() {
);
}

public void testJoinMaskingKeep() {
assertFieldNames(
"""
from languag*
| eval type = null
| rename language_name as message
| lookup join message_types_lookup on message
| rename type as message
| lookup join message_types_lookup on message
| keep `language.name`""",
Set.of("language.name", "type", "language_name", "message", "language_name.*", "message.*", "type.*", "language.name.*")
);
}

public void testJoinMaskingKeep2() {
assertFieldNames("""
from languag*
| eval type = "foo"
| rename type as message
| lookup join message_types_lookup on message
| rename type as message
| lookup join message_types_lookup on message
| keep `language.name`""", Set.of("language.name", "type", "message", "message.*", "type.*", "language.name.*"));
}

public void testEnrichMaskingEvalOn() {
assertFieldNames("""
from employees
| eval language_name = null
| enrich languages_policy on languages
| rename language_name as languages
| eval languages = length(languages)
| enrich languages_policy on languages
| keep emp_no, language_name""", Set.of("emp_no", "language_name", "languages", "language_name.*", "languages.*", "emp_no.*"));
}

public void testEnrichAndJoinMaskingEvalWh() {
assertFieldNames("""
from employees
| eval language_name = null
| enrich languages_policy on languages
| rename language_name as languages
| eval languages = length(languages)
| enrich languages_policy on languages
| lookup join message_types_lookup on language_name
| keep emp_no, language_name""", Set.of("emp_no", "language_name", "languages", "language_name.*", "languages.*", "emp_no.*"));
}

private Set<String> fieldNames(String query, Set<String> enrichPolicyMatchFields) {
var preAnalysisResult = new EsqlSession.PreAnalysisResult(null);
return EsqlSession.fieldNames(parser.createStatement(query), enrichPolicyMatchFields, preAnalysisResult).fieldNames();
Expand Down