Skip to content

Commit 375510b

Browse files
ESQL: Fix lookup join filter pushdown to use semantic equality (#136818) (#136999)
Previously, when pushing down filters to the right side of a lookup join, we used simple equality (contains) to check for duplicate filters. This could result in semantically equivalent filters being added multiple times if they were different object instances. This commit changes the duplicate check to use `semanticEquals()` instead, ensuring that filters with the same semantic meaning are properly deduplicated regardless of object identity. Closes #136599 (cherry picked from commit f54987f) # Conflicts: # x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java Co-authored-by: kanoshiou <[email protected]>
1 parent 3a7bef4 commit 375510b

File tree

6 files changed

+115
-1
lines changed

6 files changed

+115
-1
lines changed

docs/changelog/136818.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 136818
2+
summary: "ESQL: Fix lookup join filter pushdown to use semantic equality"
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 136599

x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ public MultiClusterSpecIT(
119119
// Lookup join after LIMIT is not supported in CCS yet
120120
"LookupJoinAfterLimitAndRemoteEnrich",
121121
"LookupJoinExpressionAfterLimitAndRemoteEnrich",
122+
"LookupJoinWithSemanticFilterDeduplicationComplex",
122123
// Lookup join after FORK is not support in CCS yet
123124
"ForkBeforeLookupJoin"
124125
);

x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5647,3 +5647,46 @@ from sample_data,languages_mixed_numerics,apps
56475647
null |null |null |null |32767.0 |32767.0 |32768.0 |32767 |32767 |32767.0 |32767 |null |null |null |null
56485648
null |null |null |null |128.0 |128.0 |128.0 |128 |128 |128.0 |128 |null |null |null |null
56495649
;
5650+
5651+
5652+
lookupJoinWithSemanticFilterDeduplication
5653+
required_capability: join_lookup_v12
5654+
required_capability: lookup_join_semantic_filter_dedup
5655+
5656+
from languages
5657+
| lookup join languages_lookup on language_name
5658+
| where NOT language_code <= 50 OR language_name == "English"
5659+
| where language_code > 50
5660+
| where language_name == "English"
5661+
;
5662+
5663+
language_name:keyword | language_code:integer
5664+
;
5665+
5666+
5667+
// https://github.com/elastic/elasticsearch/issues/136599
5668+
lookupJoinWithSemanticFilterDeduplicationComplex
5669+
required_capability: join_lookup_v12
5670+
required_capability: lookup_join_semantic_filter_dedup
5671+
5672+
from languages_lookup_non_unique_ke*
5673+
| enrich languages_policy on language_name
5674+
| sort country DESC, language_code ASC NULLS LAST, language_name NULLS FIRST
5675+
| limit 14081
5676+
| keep `language_name`, `country`, *, `country.keyword`
5677+
| mv_expand country.keyword
5678+
| grok language_name "%{WORD:YEcxuCOHsGzb} %{WORD:country.keyword}"
5679+
| rename country.keyword as other1
5680+
| rename language_code as other2
5681+
| lookup join multi_column_joinable_lookup on other1, other2
5682+
| where NOT id_int <= 50 OR NOT false AND NOT other2 == 50
5683+
| where false AND NOT true OR id_int > 50 AND true
5684+
| where NOT false AND other2 == 50
5685+
;
5686+
5687+
warning:Line 13:24: evaluation of [other2 == 50] failed, treating result as null. Only first 20 failures recorded.
5688+
warning:Line 13:24: java.lang.IllegalArgumentException: single-value function encountered multi-value
5689+
5690+
language_name:keyword | country:text | other2:integer | YEcxuCOHsGzb:keyword | other1:keyword |id_int:integer | ip_addr:ip |is_active_bool:boolean | name_str:keyword
5691+
5692+
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1629,6 +1629,14 @@ public enum Cap {
16291629
* Temporarily forbid the use of an explicit or implicit LIMIT before INLINE STATS.
16301630
*/
16311631
FORBID_LIMIT_BEFORE_INLINE_STATS(INLINE_STATS.enabled),
1632+
1633+
/**
1634+
* Fix for lookup join filter pushdown not using semantic equality.
1635+
* This prevents duplicate filters from being pushed down when they are semantically equivalent, causing an infinite loop where
1636+
* BooleanSimplification will simplify the original and duplicate filters, so they'll be pushed down again...
1637+
*/
1638+
LOOKUP_JOIN_SEMANTIC_FILTER_DEDUP,
1639+
16321640
// Last capability should still have a comma for fewer merge conflicts when adding new ones :)
16331641
// This comment prevents the semicolon from being on the previous capability when Spotless formats the file.
16341642
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineFilters.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,9 @@ private static LogicalPlan pushDownPastJoin(Filter filter, Join join, FoldContex
171171

172172
List<Expression> existingFilters = new ArrayList<>(Predicates.splitAnd(existingRightFilter.condition()));
173173
int sizeBefore = existingFilters.size();
174-
rightPushableFilters.stream().filter(e -> existingFilters.contains(e) == false).forEach(existingFilters::add);
174+
rightPushableFilters.stream()
175+
.filter(e -> existingFilters.stream().anyMatch(x -> x.semanticEquals(e)) == false)
176+
.forEach(existingFilters::add);
175177
if (sizeBefore != existingFilters.size()) {
176178
right = existingRightFilter.with(Predicates.combineAnd(existingFilters));
177179
join = (Join) join.replaceRight(right);

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals;
9090
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison;
9191
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.GreaterThan;
92+
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.GreaterThanOrEqual;
9293
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In;
9394
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.InsensitiveEquals;
9495
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.LessThan;
@@ -9233,4 +9234,57 @@ public void testLookupJoinExpressionSameAttrsDifferentConditions() {
92339234
// Verify the right side of join is EsRelation with LOOKUP
92349235
as(join.right(), EsRelation.class);
92359236
}
9237+
9238+
/**
9239+
* Project[[_meta_field{f}#16, emp_no{f}#10, first_name{f}#11 AS language_name#4, gender{f}#12, hire_date{f}#17, job{f}#1
9240+
* 8, job.raw{f}#19, languages{f}#13, last_name{f}#14, long_noidx{f}#20, salary{f}#15, language_code{f}#21]]
9241+
* \_Limit[1000[INTEGER],false]
9242+
* \_Filter[NOT(language_code{f}#21 >= 50[INTEGER])]
9243+
* \_Join[LEFT,[first_name{f}#11],[language_name{f}#22],null]
9244+
* |_Filter[first_name{f}#11 == [KEYWORD]]
9245+
* | \_EsRelation[test][_meta_field{f}#16, emp_no{f}#10, first_name{f}#11, ..]
9246+
* \_Filter[language_code{f}#21 &lt; 50[INTEGER]]
9247+
* \_EsRelation[languages_lookup][LOOKUP][language_code{f}#21, language_name{f}#22]
9248+
*/
9249+
public void LookupJoinSemanticFilterDeupPushdown() {
9250+
LogicalPlan plan = optimizedPlan("""
9251+
from test
9252+
| rename first_name as language_name
9253+
| lookup join languages_lookup on language_name
9254+
| where NOT language_code >= 50 OR language_name == ""
9255+
| where language_code < 50
9256+
| where language_name == ""
9257+
""");
9258+
9259+
var project = as(plan, Project.class);
9260+
var limit = as(project.child(), Limit.class);
9261+
var filter = as(limit.child(), Filter.class);
9262+
9263+
// Verify the top-level filter is NOT(language_code >= 50)
9264+
var not = as(filter.condition(), Not.class);
9265+
var gte = as(not.field(), GreaterThanOrEqual.class);
9266+
assertThat(Expressions.name(gte.left()), equalTo("language_code"));
9267+
assertThat(gte.right().fold(FoldContext.small()), equalTo(50));
9268+
9269+
// Verify the join structure
9270+
var join = as(filter.child(), Join.class);
9271+
assertThat(join.config().type(), equalTo(JoinTypes.LEFT));
9272+
9273+
// Verify left side has filter for language_name == ""
9274+
var leftFilter = as(join.left(), Filter.class);
9275+
var leftEquals = as(leftFilter.condition(), Equals.class);
9276+
assertThat(Expressions.name(leftEquals.left()), equalTo("first_name"));
9277+
assertThat(leftEquals.right().fold(FoldContext.small()), equalTo(new BytesRef("")));
9278+
9279+
var leftRelation = as(leftFilter.child(), EsRelation.class);
9280+
9281+
// Verify right side has filter for language_code < 50
9282+
var rightFilter = as(join.right(), Filter.class);
9283+
var rightLt = as(rightFilter.condition(), LessThan.class);
9284+
assertThat(Expressions.name(rightLt.left()), equalTo("language_code"));
9285+
assertThat(rightLt.right().fold(FoldContext.small()), equalTo(50));
9286+
9287+
var rightRelation = as(rightFilter.child(), EsRelation.class);
9288+
}
9289+
92369290
}

0 commit comments

Comments
 (0)