Skip to content
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
a4f3b5e
Initial support for TEXT fields in LOOKUP JOIN condition
craigtaverner Jan 2, 2025
c1744c9
Better approach, using KEYWORD subfield
craigtaverner Jan 3, 2025
8eb6554
Merge branch 'main' into lookup_join_text
craigtaverner Jan 3, 2025
69572e5
Add new capability to block mixed-cluster tests
craigtaverner Jan 3, 2025
417d3df
Merge branch 'main' into lookup_join_text
craigtaverner Jan 3, 2025
e799ef9
Merge branch 'main' into lookup_join_text
craigtaverner Jan 3, 2025
772659d
Merge branch 'main' into lookup_join_text
craigtaverner Jan 7, 2025
93779f9
Simplify lookup-join validation and remove keyword subfield validation
craigtaverner Jan 16, 2025
cbcd532
Merge remote-tracking branch 'origin/main' into lookup_join_text
craigtaverner Jan 16, 2025
0146742
Added yaml tests for failing TEXT lookup and added validation for that
craigtaverner Jan 16, 2025
e327808
Fixed failing tests
craigtaverner Jan 16, 2025
77fc5bf
Missing capability check
craigtaverner Jan 17, 2025
2b2f0dc
Merge branch 'main' into lookup_join_text
craigtaverner Jan 17, 2025
4380656
Merge branch 'main' into lookup_join_text
craigtaverner Jan 17, 2025
6a56279
Merge branch 'main' into lookup_join_text
craigtaverner Jan 20, 2025
3491006
Simplify long comment
craigtaverner Jan 20, 2025
d5d7cee
Use clearer error message when joining with right-side TEXT
craigtaverner Jan 22, 2025
879ecd4
Merge branch 'main' into lookup_join_text
craigtaverner Jan 22, 2025
724153c
Fix failing test
craigtaverner Jan 22, 2025
863cfc9
Merge remote-tracking branch 'origin/main' into lookup_join_text
craigtaverner Jan 22, 2025
26a76f9
Merge remote-tracking branch 'origin/main' into lookup_join_text
craigtaverner Jan 23, 2025
f0ce28a
Merge branch 'main' into lookup_join_text
craigtaverner Jan 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ language_code:integer | language_name:keyword | country:text
;

###########################################################################
# nested filed join behavior with languages_nested_fields index
# nested field join behavior with languages_nested_fields index
###########################################################################

joinOnNestedField
Expand Down Expand Up @@ -519,6 +519,34 @@ language.id:integer | language.name:text | language.name.keyword:keyword
1 | English | English
;

joinOnNestedNestedFieldRowExplicitKeyword
required_capability: join_lookup_v11
required_capability: lookup_join_text

ROW language.name.keyword = "English"
| LOOKUP JOIN languages_nested_fields ON language.name.keyword
| KEEP language.id, language.name, language.name.keyword
;

language.id:integer | language.name:text | language.name.keyword:keyword
1 | English | English
;

joinOnNestedNestedFieldRowExplicitKeywords
required_capability: join_lookup_v11
required_capability: lookup_join_text

ROW language.name.keyword = ["English", "French"]
| MV_EXPAND language.name.keyword
| LOOKUP JOIN languages_nested_fields ON language.name.keyword
| KEEP language.id, language.name, language.name.keyword, language.code
;

language.id:integer | language.name:text | language.name.keyword:keyword | language.code:keyword
1 | English | English | EN
2 | French | French | FR
;

###############################################
# Tests with clientips_lookup index
###############################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,11 @@ public enum Cap {
*/
JOIN_LOOKUP_V11(Build.current().isSnapshot()),

/**
* LOOKUP JOIN with TEXT fields on the right (right side of the join)
*/
LOOKUP_JOIN_TEXT(Build.current().isSnapshot()),

/**
* Fix for https://github.com/elastic/elasticsearch/issues/117054
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,10 @@
import org.elasticsearch.compute.data.Page;
import org.elasticsearch.compute.operator.lookup.QueryList;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.tasks.TaskId;
import org.elasticsearch.transport.TransportService;
import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException;
import org.elasticsearch.xpack.esql.action.EsqlQueryAction;
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
import org.elasticsearch.xpack.esql.core.tree.Source;
Expand Down Expand Up @@ -79,9 +77,7 @@ protected TransportRequest transportRequest(LookupFromIndexService.Request reque

@Override
protected QueryList queryList(TransportRequest request, SearchExecutionContext context, Block inputBlock, DataType inputDataType) {
MappedFieldType fieldType = context.getFieldType(request.matchField);
validateTypes(request.inputDataType, fieldType);
return termQueryList(fieldType, context, inputBlock, inputDataType);
return termQueryList(context.getFieldType(request.matchField), context, inputBlock, inputDataType);
}

@Override
Expand All @@ -99,15 +95,6 @@ protected String getRequiredPrivilege() {
return null;
}

private static void validateTypes(DataType inputDataType, MappedFieldType fieldType) {
// TODO: consider supporting implicit type conversion as done in ENRICH for some types
if (fieldType.typeName().equals(inputDataType.typeName()) == false) {
throw new EsqlIllegalArgumentException(
"LOOKUP JOIN match and input types are incompatible: match[" + fieldType.typeName() + "], input[" + inputDataType + "]"
);
}
}

public static class Request extends AbstractLookupService.Request {
private final String matchField;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import java.util.Objects;

import static org.elasticsearch.xpack.esql.common.Failure.fail;
import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT;
import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes;
import static org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes.LEFT;

Expand Down Expand Up @@ -216,7 +217,7 @@ public void postAnalysisVerification(Failures failures) {
for (int i = 0; i < config.leftFields().size(); i++) {
Attribute leftField = config.leftFields().get(i);
Attribute rightField = config.rightFields().get(i);
if (leftField.dataType() != rightField.dataType()) {
if (leftField.dataType().noText() != rightField.dataType().noText()) {
failures.add(
fail(
leftField,
Expand All @@ -228,6 +229,11 @@ public void postAnalysisVerification(Failures failures) {
)
);
}
if (rightField.dataType().equals(TEXT)) {
failures.add(
fail(leftField, "JOIN with right field [{}] of type [{}] is not supported", rightField.name(), rightField.dataType())
);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,12 @@
import org.elasticsearch.xpack.esql.core.expression.Attribute;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.Expressions;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.expression.NameId;
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
import org.elasticsearch.xpack.esql.core.expression.TypedAttribute;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.util.Holder;
Expand Down Expand Up @@ -571,35 +573,49 @@ private PhysicalOperation planLookupJoin(LookupJoinExec join, LocalExecutionPlan
throw new IllegalArgumentException("can't plan [" + join + "], found index with mode [" + entry.getValue() + "]");
}
String indexName = entry.getKey();
List<Layout.ChannelAndType> matchFields = new ArrayList<>(join.leftFields().size());
for (Attribute m : join.leftFields()) {
Layout.ChannelAndType t = source.layout.get(m.id());
if (t == null) {
throw new IllegalArgumentException("can't plan [" + join + "][" + m + "]");
if (join.leftFields().size() != join.rightFields().size()) {
throw new IllegalArgumentException("can't plan [" + join + "]: mismatching left and right field count");
}
List<MatchConfig> matchFields = new ArrayList<>(join.leftFields().size());
for (int i = 0; i < join.leftFields().size(); i++) {
TypedAttribute left = (TypedAttribute) join.leftFields().get(i);
FieldAttribute right = (FieldAttribute) join.rightFields().get(i);
Layout.ChannelAndType input = source.layout.get(left.id());
if (input == null) {
throw new IllegalArgumentException("can't plan [" + join + "][" + left + "]");
}
matchFields.add(t);
matchFields.add(new MatchConfig(right, input));
}
if (matchFields.size() != 1) {
throw new IllegalArgumentException("can't plan [" + join + "]");
throw new IllegalArgumentException("can't plan [" + join + "]: multiple join predicates are not supported");
}
// TODO support multiple match fields, and support more than equality predicates
MatchConfig matchConfig = matchFields.getFirst();

return source.with(
new LookupFromIndexOperator.Factory(
sessionId,
parentTask,
context.queryPragmas().enrichMaxWorkers(),
matchFields.getFirst().channel(),
matchConfig.channel(),
lookupFromIndexService,
matchFields.getFirst().type(),
matchConfig.type(),
indexName,
join.leftFields().getFirst().name(),
matchConfig.fieldName(),
join.addedFields().stream().map(f -> (NamedExpression) f).toList(),
join.source()
),
layout
);
}

private record MatchConfig(String fieldName, int channel, DataType type) {
private MatchConfig(FieldAttribute match, Layout.ChannelAndType input) {
// Note, this handles TEXT fields with KEYWORD subfields
this(match.exactAttribute().name(), input.channel(), input.type());
}
}

private PhysicalOperation planLocal(LocalSourceExec localSourceExec, LocalExecutionPlannerContext context) {
Layout.Builder layout = new Layout.Builder();
layout.append(localSourceExec.output());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
---
setup:
- requires:
test_runner_features: [capabilities, contains]
capabilities:
- method: POST
path: /_query
parameters: []
capabilities: [lookup_join_text]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Uh, I missed it, but this should also declare join_lookup_v11.
This should help skipping this spec if something changes about basic lookup logic (such as grammar :) )

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opened a pr for it #120771

reason: "uses LOOKUP JOIN"
- do:
indices.create:
index: test
body:
mappings:
properties:
color:
type: text
fields:
keyword:
type: keyword
description:
type: text
fields:
keyword:
type: keyword
- do:
indices.create:
index: test-lookup
body:
settings:
index:
mode: lookup
number_of_shards: 1
mappings:
properties:
color:
type: text
fields:
keyword:
type: keyword
description:
type: text
fields:
keyword:
type: keyword
- do:
bulk:
index: "test"
refresh: true
body:
- { "index": { } }
- { "color": "red", "description": "The color Red" }
- { "index": { } }
- { "color": "blue", "description": "The color Blue" }
- { "index": { } }
- { "color": "green", "description": "The color Green" }
- do:
bulk:
index: "test-lookup"
refresh: true
body:
- { "index": { } }
- { "color": "red", "description": "As red as a tomato" }
- { "index": { } }
- { "color": "blue", "description": "As blue as the sky" }

---
keyword-keyword:
- do:
esql.query:
body:
query: 'FROM test | SORT color | LOOKUP JOIN `test-lookup` ON color.keyword | LIMIT 3'

- length: { columns: 4 }
- length: { values: 3 }
- match: {columns.0.name: "color.keyword"}
- match: {columns.0.type: "keyword"}
- match: {columns.1.name: "color"}
- match: {columns.1.type: "text"}
- match: {columns.2.name: "description"}
- match: {columns.2.type: "text"}
- match: {columns.3.name: "description.keyword"}
- match: {columns.3.type: "keyword"}
- match: {values.0: ["blue", "blue", "As blue as the sky", "As blue as the sky"]}
- match: {values.1: ["green", null, null, null]}
- match: {values.2: ["red", "red", "As red as a tomato", "As red as a tomato"]}

---
text-keyword:
- do:
esql.query:
body:
query: 'FROM test | SORT color | RENAME color AS x | EVAL color.keyword = x | LOOKUP JOIN `test-lookup` ON color.keyword | LIMIT 3'

- length: { columns: 5 }
- length: { values: 3 }
- match: {columns.0.name: "x"}
- match: {columns.0.type: "text"}
- match: {columns.1.name: "color.keyword"}
- match: {columns.1.type: "text"}
- match: {columns.2.name: "color"}
- match: {columns.2.type: "text"}
- match: {columns.3.name: "description"}
- match: {columns.3.type: "text"}
- match: {columns.4.name: "description.keyword"}
- match: {columns.4.type: "keyword"}
- match: {values.0: ["blue", "blue", "blue", "As blue as the sky", "As blue as the sky"]}
- match: {values.1: ["green", "green", null, null, null]}
- match: {values.2: ["red", "red", "red", "As red as a tomato", "As red as a tomato"]}

---
text-text:
- do:
esql.query:
body:
query: 'FROM test | SORT color | LOOKUP JOIN `test-lookup` ON color | LIMIT 3'
catch: "bad_request"

- match: { error.type: "verification_exception" }
- contains: { error.reason: "Found 1 problem\nline 1:55: JOIN with right field [color] of type [TEXT] is not supported" }

---
keyword-text:
- do:
esql.query:
body:
query: 'FROM test | SORT color | EVAL color = color.keyword | LOOKUP JOIN `test-lookup` ON color | LIMIT 3'
catch: "bad_request"

- match: { error.type: "verification_exception" }
- contains: { error.reason: "Found 1 problem\nline 1:84: JOIN left field [color] of type [KEYWORD] is incompatible with right field [color] of type [TEXT]" }