Skip to content

Commit 6eaa285

Browse files
committed
Initial support for TEXT fields in LOOKUP JOIN condition (elastic#119473)
When the join field on the right hand-side is a TEXT field, we cannot do an exact match. Since ES|QL treats TEXT fields as KEYWORD in all cases, ideally we would like to do the same for JOIN. However, this is achieved on the left-hand index in a way that is not easily achievable on the right-hand side. Comparing filtering and field extraction of left and right: * `FROM left` * FieldExtraction is done using `field.keyword` subfield if it exists, or from `_source` otherwise * Filtering is done by pushing down to Lucene `field.keyword` if it exists, or by not pushing down and filtering the value extracted from `_source` inside the compute engine itself * `LOOKUP JOIN right` * FieldExtraction is done simplistically, with no `_source` extraction * Filtering pushdown can be done with `field.keyword` if it exists, but we have no easy solution to filtering otherwise The decision taken is to disallow joining on TEXT fields, but allow explicit joining on the underlying keyword field (explicit in the query): | left type | right type | result | | --- | --- | --- | | KEYWORD | KEYWORD | ✅ Works | | TEXT | KEYWORD | ✅ Works | | KEYWORD | TEXT | ❌ Type mismatch error | | TEXT | TEXT | ❌ Type mismatch error | ``` FROM test | LOOKUP JOIN `test-lookup` ON color.keyword ``` ``` FROM test | RENAME color AS x | EVAL color.keyword = x | LOOKUP JOIN `test-lookup` ON color.keyword ``` ``` FROM test | EVAL color = color.keyword | LOOKUP JOIN `test-lookup` ON color ``` ``` FROM test | LOOKUP JOIN `test-lookup` ON color ```
1 parent fc1182b commit 6eaa285

File tree

6 files changed

+201
-26
lines changed

6 files changed

+201
-26
lines changed

x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ language_code:integer | language_name:keyword | country:text
488488
;
489489

490490
###########################################################################
491-
# nested filed join behavior with languages_nested_fields index
491+
# nested field join behavior with languages_nested_fields index
492492
###########################################################################
493493

494494
joinOnNestedField
@@ -536,6 +536,34 @@ language.id:integer | language.name:text | language.name.keyword:keyword
536536
1 | English | English
537537
;
538538

539+
joinOnNestedNestedFieldRowExplicitKeyword
540+
required_capability: join_lookup_v11
541+
required_capability: lookup_join_text
542+
543+
ROW language.name.keyword = "English"
544+
| LOOKUP JOIN languages_nested_fields ON language.name.keyword
545+
| KEEP language.id, language.name, language.name.keyword
546+
;
547+
548+
language.id:integer | language.name:text | language.name.keyword:keyword
549+
1 | English | English
550+
;
551+
552+
joinOnNestedNestedFieldRowExplicitKeywords
553+
required_capability: join_lookup_v11
554+
required_capability: lookup_join_text
555+
556+
ROW language.name.keyword = ["English", "French"]
557+
| MV_EXPAND language.name.keyword
558+
| LOOKUP JOIN languages_nested_fields ON language.name.keyword
559+
| KEEP language.id, language.name, language.name.keyword, language.code
560+
;
561+
562+
language.id:integer | language.name:text | language.name.keyword:keyword | language.code:keyword
563+
1 | English | English | EN
564+
2 | French | French | FR
565+
;
566+
539567
###############################################
540568
# Tests with clientips_lookup index
541569
###############################################

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,11 @@ public enum Cap {
573573
*/
574574
JOIN_LOOKUP_V11(Build.current().isSnapshot()),
575575

576+
/**
577+
* LOOKUP JOIN with TEXT fields on the right (right side of the join) (#119473)
578+
*/
579+
LOOKUP_JOIN_TEXT(Build.current().isSnapshot()),
580+
576581
/**
577582
* LOOKUP JOIN without MV matching (https://github.com/elastic/elasticsearch/issues/118780)
578583
*/

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/LookupFromIndexService.java

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,10 @@
1919
import org.elasticsearch.compute.data.Page;
2020
import org.elasticsearch.compute.operator.lookup.QueryList;
2121
import org.elasticsearch.core.Releasables;
22-
import org.elasticsearch.index.mapper.MappedFieldType;
2322
import org.elasticsearch.index.query.SearchExecutionContext;
2423
import org.elasticsearch.index.shard.ShardId;
2524
import org.elasticsearch.tasks.TaskId;
2625
import org.elasticsearch.transport.TransportService;
27-
import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException;
2826
import org.elasticsearch.xpack.esql.action.EsqlQueryAction;
2927
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
3028
import org.elasticsearch.xpack.esql.core.tree.Source;
@@ -79,9 +77,7 @@ protected TransportRequest transportRequest(LookupFromIndexService.Request reque
7977

8078
@Override
8179
protected QueryList queryList(TransportRequest request, SearchExecutionContext context, Block inputBlock, DataType inputDataType) {
82-
MappedFieldType fieldType = context.getFieldType(request.matchField);
83-
validateTypes(request.inputDataType, fieldType);
84-
return termQueryList(fieldType, context, inputBlock, inputDataType).onlySingleValues();
80+
return termQueryList(context.getFieldType(request.matchField), context, inputBlock, inputDataType).onlySingleValues();
8581
}
8682

8783
@Override
@@ -99,15 +95,6 @@ protected String getRequiredPrivilege() {
9995
return null;
10096
}
10197

102-
private static void validateTypes(DataType inputDataType, MappedFieldType fieldType) {
103-
// TODO: consider supporting implicit type conversion as done in ENRICH for some types
104-
if (fieldType.typeName().equals(inputDataType.typeName()) == false) {
105-
throw new EsqlIllegalArgumentException(
106-
"LOOKUP JOIN match and input types are incompatible: match[" + fieldType.typeName() + "], input[" + inputDataType + "]"
107-
);
108-
}
109-
}
110-
11198
public static class Request extends AbstractLookupService.Request {
11299
private final String matchField;
113100

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import java.util.Objects;
2929

3030
import static org.elasticsearch.xpack.esql.common.Failure.fail;
31+
import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT;
3132
import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes;
3233
import static org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes.LEFT;
3334

@@ -216,7 +217,7 @@ public void postAnalysisVerification(Failures failures) {
216217
for (int i = 0; i < config.leftFields().size(); i++) {
217218
Attribute leftField = config.leftFields().get(i);
218219
Attribute rightField = config.rightFields().get(i);
219-
if (leftField.dataType() != rightField.dataType()) {
220+
if (leftField.dataType().noText() != rightField.dataType().noText()) {
220221
failures.add(
221222
fail(
222223
leftField,
@@ -228,6 +229,11 @@ public void postAnalysisVerification(Failures failures) {
228229
)
229230
);
230231
}
232+
if (rightField.dataType().equals(TEXT)) {
233+
failures.add(
234+
fail(leftField, "JOIN with right field [{}] of type [{}] is not supported", rightField.name(), rightField.dataType())
235+
);
236+
}
231237
}
232238
}
233239
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,12 @@
5454
import org.elasticsearch.xpack.esql.core.expression.Attribute;
5555
import org.elasticsearch.xpack.esql.core.expression.Expression;
5656
import org.elasticsearch.xpack.esql.core.expression.Expressions;
57+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
5758
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
5859
import org.elasticsearch.xpack.esql.core.expression.Literal;
5960
import org.elasticsearch.xpack.esql.core.expression.NameId;
6061
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
62+
import org.elasticsearch.xpack.esql.core.expression.TypedAttribute;
6163
import org.elasticsearch.xpack.esql.core.tree.Source;
6264
import org.elasticsearch.xpack.esql.core.type.DataType;
6365
import org.elasticsearch.xpack.esql.core.util.Holder;
@@ -571,35 +573,49 @@ private PhysicalOperation planLookupJoin(LookupJoinExec join, LocalExecutionPlan
571573
throw new IllegalArgumentException("can't plan [" + join + "], found index with mode [" + entry.getValue() + "]");
572574
}
573575
String indexName = entry.getKey();
574-
List<Layout.ChannelAndType> matchFields = new ArrayList<>(join.leftFields().size());
575-
for (Attribute m : join.leftFields()) {
576-
Layout.ChannelAndType t = source.layout.get(m.id());
577-
if (t == null) {
578-
throw new IllegalArgumentException("can't plan [" + join + "][" + m + "]");
576+
if (join.leftFields().size() != join.rightFields().size()) {
577+
throw new IllegalArgumentException("can't plan [" + join + "]: mismatching left and right field count");
578+
}
579+
List<MatchConfig> matchFields = new ArrayList<>(join.leftFields().size());
580+
for (int i = 0; i < join.leftFields().size(); i++) {
581+
TypedAttribute left = (TypedAttribute) join.leftFields().get(i);
582+
FieldAttribute right = (FieldAttribute) join.rightFields().get(i);
583+
Layout.ChannelAndType input = source.layout.get(left.id());
584+
if (input == null) {
585+
throw new IllegalArgumentException("can't plan [" + join + "][" + left + "]");
579586
}
580-
matchFields.add(t);
587+
matchFields.add(new MatchConfig(right, input));
581588
}
582589
if (matchFields.size() != 1) {
583-
throw new IllegalArgumentException("can't plan [" + join + "]");
590+
throw new IllegalArgumentException("can't plan [" + join + "]: multiple join predicates are not supported");
584591
}
592+
// TODO support multiple match fields, and support more than equality predicates
593+
MatchConfig matchConfig = matchFields.get(0);
585594

586595
return source.with(
587596
new LookupFromIndexOperator.Factory(
588597
sessionId,
589598
parentTask,
590599
context.queryPragmas().enrichMaxWorkers(),
591-
matchFields.get(0).channel(),
600+
matchConfig.channel(),
592601
lookupFromIndexService,
593-
matchFields.get(0).type(),
602+
matchConfig.type(),
594603
indexName,
595-
join.leftFields().get(0).name(),
604+
matchConfig.fieldName(),
596605
join.addedFields().stream().map(f -> (NamedExpression) f).toList(),
597606
join.source()
598607
),
599608
layout
600609
);
601610
}
602611

612+
private record MatchConfig(String fieldName, int channel, DataType type) {
613+
private MatchConfig(FieldAttribute match, Layout.ChannelAndType input) {
614+
// Note, this handles TEXT fields with KEYWORD subfields
615+
this(match.exactAttribute().name(), input.channel(), input.type());
616+
}
617+
}
618+
603619
private PhysicalOperation planLocal(LocalSourceExec localSourceExec, LocalExecutionPlannerContext context) {
604620
Layout.Builder layout = new Layout.Builder();
605621
layout.append(localSourceExec.output());
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
---
2+
setup:
3+
- requires:
4+
test_runner_features: [capabilities, contains]
5+
capabilities:
6+
- method: POST
7+
path: /_query
8+
parameters: []
9+
capabilities: [lookup_join_text]
10+
reason: "uses LOOKUP JOIN"
11+
- do:
12+
indices.create:
13+
index: test
14+
body:
15+
mappings:
16+
properties:
17+
color:
18+
type: text
19+
fields:
20+
keyword:
21+
type: keyword
22+
description:
23+
type: text
24+
fields:
25+
keyword:
26+
type: keyword
27+
- do:
28+
indices.create:
29+
index: test-lookup
30+
body:
31+
settings:
32+
index:
33+
mode: lookup
34+
number_of_shards: 1
35+
mappings:
36+
properties:
37+
color:
38+
type: text
39+
fields:
40+
keyword:
41+
type: keyword
42+
description:
43+
type: text
44+
fields:
45+
keyword:
46+
type: keyword
47+
- do:
48+
bulk:
49+
index: "test"
50+
refresh: true
51+
body:
52+
- { "index": { } }
53+
- { "color": "red", "description": "The color Red" }
54+
- { "index": { } }
55+
- { "color": "blue", "description": "The color Blue" }
56+
- { "index": { } }
57+
- { "color": "green", "description": "The color Green" }
58+
- do:
59+
bulk:
60+
index: "test-lookup"
61+
refresh: true
62+
body:
63+
- { "index": { } }
64+
- { "color": "red", "description": "As red as a tomato" }
65+
- { "index": { } }
66+
- { "color": "blue", "description": "As blue as the sky" }
67+
68+
---
69+
keyword-keyword:
70+
- do:
71+
esql.query:
72+
body:
73+
query: 'FROM test | SORT color | LOOKUP JOIN `test-lookup` ON color.keyword | LIMIT 3'
74+
75+
- length: { columns: 4 }
76+
- length: { values: 3 }
77+
- match: {columns.0.name: "color.keyword"}
78+
- match: {columns.0.type: "keyword"}
79+
- match: {columns.1.name: "color"}
80+
- match: {columns.1.type: "text"}
81+
- match: {columns.2.name: "description"}
82+
- match: {columns.2.type: "text"}
83+
- match: {columns.3.name: "description.keyword"}
84+
- match: {columns.3.type: "keyword"}
85+
- match: {values.0: ["blue", "blue", "As blue as the sky", "As blue as the sky"]}
86+
- match: {values.1: ["green", null, null, null]}
87+
- match: {values.2: ["red", "red", "As red as a tomato", "As red as a tomato"]}
88+
89+
---
90+
text-keyword:
91+
- do:
92+
esql.query:
93+
body:
94+
query: 'FROM test | SORT color | RENAME color AS x | EVAL color.keyword = x | LOOKUP JOIN `test-lookup` ON color.keyword | LIMIT 3'
95+
96+
- length: { columns: 5 }
97+
- length: { values: 3 }
98+
- match: {columns.0.name: "x"}
99+
- match: {columns.0.type: "text"}
100+
- match: {columns.1.name: "color.keyword"}
101+
- match: {columns.1.type: "text"}
102+
- match: {columns.2.name: "color"}
103+
- match: {columns.2.type: "text"}
104+
- match: {columns.3.name: "description"}
105+
- match: {columns.3.type: "text"}
106+
- match: {columns.4.name: "description.keyword"}
107+
- match: {columns.4.type: "keyword"}
108+
- match: {values.0: ["blue", "blue", "blue", "As blue as the sky", "As blue as the sky"]}
109+
- match: {values.1: ["green", "green", null, null, null]}
110+
- match: {values.2: ["red", "red", "red", "As red as a tomato", "As red as a tomato"]}
111+
112+
---
113+
text-text:
114+
- do:
115+
esql.query:
116+
body:
117+
query: 'FROM test | SORT color | LOOKUP JOIN `test-lookup` ON color | LIMIT 3'
118+
catch: "bad_request"
119+
120+
- match: { error.type: "verification_exception" }
121+
- contains: { error.reason: "Found 1 problem\nline 1:55: JOIN with right field [color] of type [TEXT] is not supported" }
122+
123+
---
124+
keyword-text:
125+
- do:
126+
esql.query:
127+
body:
128+
query: 'FROM test | SORT color | EVAL color = color.keyword | LOOKUP JOIN `test-lookup` ON color | LIMIT 3'
129+
catch: "bad_request"
130+
131+
- match: { error.type: "verification_exception" }
132+
- contains: { error.reason: "Found 1 problem\nline 1:84: JOIN with right field [color] of type [TEXT] is not supported" }
133+

0 commit comments

Comments
 (0)