Skip to content

Commit 89bb134

Browse files
ESQL: Performance improvements for Lookup Join on Expression (#135036)
Implement two performance improvements for Lookup Join on Expression Do not send duplicate columns more than once to the lookup node. Example column left_id will be just sent once instead of 3 times, even though it is used 3 times in the expression Use the termsQueryList that we build for field based join for Expression Based Joins too, if it uses equals operator. This allows us to take advantage of the optimizations made for field based join and apply them for expression based join whenever possible. We end up doing less work per row, so we expect this to be faster. Covered by existing UTs lookup-join-expression.LookupJoinOnSameLeftFieldTwice and multiple UTs on equals. Benchmarks show only moderate improvement 50sec to 43sec (14% improvement) in esql_lookup_join_expression_eq_100k_keys_where_few_matching_like_limit_10000.
1 parent d6d2fa8 commit 89bb134

File tree

3 files changed

+49
-11
lines changed

3 files changed

+49
-11
lines changed

docs/changelog/135036.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 135036
2+
summary: Performance improvements for Lookup Join on Expression
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/ExpressionQueryList.java

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.elasticsearch.xpack.esql.core.expression.Attribute;
2525
import org.elasticsearch.xpack.esql.core.expression.Expression;
2626
import org.elasticsearch.xpack.esql.expression.predicate.Predicates;
27+
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals;
2728
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison;
2829
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
2930
import org.elasticsearch.xpack.esql.plan.physical.EsSourceExec;
@@ -38,6 +39,7 @@
3839
import java.util.List;
3940

4041
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.LOOKUP_JOIN_ON_BOOLEAN_EXPRESSION;
42+
import static org.elasticsearch.xpack.esql.enrich.AbstractLookupService.termQueryList;
4143
import static org.elasticsearch.xpack.esql.planner.TranslatorHandler.TRANSLATOR_HANDLER;
4244

4345
/**
@@ -154,17 +156,31 @@ private void buildJoinOnForExpressionJoin(
154156
if (right instanceof Attribute rightAttribute) {
155157
MappedFieldType fieldType = context.getFieldType(rightAttribute.name());
156158
if (fieldType != null) {
157-
queryLists.add(
158-
new BinaryComparisonQueryList(
159+
// special handle Equals operator
160+
// TermQuery is faster than BinaryComparisonQueryList, as it does less work per row
161+
// so here we reuse the existing logic from field based join to build a termQueryList for Equals
162+
if (binaryComparison instanceof Equals) {
163+
QueryList termQueryForEquals = termQueryList(
159164
fieldType,
160165
context,
161-
block,
162-
binaryComparison,
163-
clusterService,
164166
aliasFilter,
165-
warnings
166-
)
167-
);
167+
inputPage.getBlock(matchFields.get(i).channel()),
168+
matchFields.get(i).type()
169+
).onlySingleValues(warnings, "LOOKUP JOIN encountered multi-value");
170+
queryLists.add(termQueryForEquals);
171+
} else {
172+
queryLists.add(
173+
new BinaryComparisonQueryList(
174+
fieldType,
175+
context,
176+
block,
177+
binaryComparison,
178+
clusterService,
179+
aliasFilter,
180+
warnings
181+
)
182+
);
183+
}
168184
matched = true;
169185
break;
170186
} else {

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/LookupFromIndexOperator.java

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,12 @@
3131

3232
import java.io.IOException;
3333
import java.util.ArrayList;
34+
import java.util.HashSet;
3435
import java.util.Iterator;
3536
import java.util.List;
3637
import java.util.Objects;
3738
import java.util.Optional;
39+
import java.util.Set;
3840
import java.util.function.Function;
3941

4042
// TODO rename package
@@ -143,10 +145,11 @@ public LookupFromIndexOperator(
143145

144146
@Override
145147
protected void performAsync(Page inputPage, ActionListener<OngoingJoin> listener) {
146-
Block[] inputBlockArray = new Block[matchFields.size()];
147148
List<MatchConfig> newMatchFields = new ArrayList<>();
148-
for (int i = 0; i < matchFields.size(); i++) {
149-
MatchConfig matchField = matchFields.get(i);
149+
List<MatchConfig> uniqueMatchFields = uniqueMatchFieldsByName(matchFields);
150+
Block[] inputBlockArray = new Block[uniqueMatchFields.size()];
151+
for (int i = 0; i < uniqueMatchFields.size(); i++) {
152+
MatchConfig matchField = uniqueMatchFields.get(i);
150153
int inputChannel = matchField.channel();
151154
final Block inputBlock = inputPage.getBlock(inputChannel);
152155
inputBlockArray[i] = inputBlock;
@@ -176,6 +179,20 @@ protected void performAsync(Page inputPage, ActionListener<OngoingJoin> listener
176179
);
177180
}
178181

182+
private List<MatchConfig> uniqueMatchFieldsByName(List<MatchConfig> matchFields) {
183+
if (joinOnConditions == null) {
184+
return matchFields;
185+
}
186+
List<MatchConfig> uniqueFields = new ArrayList<>();
187+
Set<String> seenFieldNames = new HashSet<>();
188+
for (MatchConfig matchField : matchFields) {
189+
if (seenFieldNames.add(matchField.fieldName())) {
190+
uniqueFields.add(matchField);
191+
}
192+
}
193+
return uniqueFields;
194+
}
195+
179196
@Override
180197
public Page getOutput() {
181198
if (ongoing == null) {

0 commit comments

Comments
 (0)