Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
a4f3b5e
Initial support for TEXT fields in LOOKUP JOIN condition
craigtaverner Jan 2, 2025
c1744c9
Better approach, using KEYWORD subfield
craigtaverner Jan 3, 2025
8eb6554
Merge branch 'main' into lookup_join_text
craigtaverner Jan 3, 2025
69572e5
Add new capability to block mixed-cluster tests
craigtaverner Jan 3, 2025
417d3df
Merge branch 'main' into lookup_join_text
craigtaverner Jan 3, 2025
e799ef9
Merge branch 'main' into lookup_join_text
craigtaverner Jan 3, 2025
772659d
Merge branch 'main' into lookup_join_text
craigtaverner Jan 7, 2025
93779f9
Simplify lookup-join validation and remove keyword subfield validation
craigtaverner Jan 16, 2025
cbcd532
Merge remote-tracking branch 'origin/main' into lookup_join_text
craigtaverner Jan 16, 2025
0146742
Added yaml tests for failing TEXT lookup and added validation for that
craigtaverner Jan 16, 2025
e327808
Fixed failing tests
craigtaverner Jan 16, 2025
77fc5bf
Missing capability check
craigtaverner Jan 17, 2025
2b2f0dc
Merge branch 'main' into lookup_join_text
craigtaverner Jan 17, 2025
4380656
Merge branch 'main' into lookup_join_text
craigtaverner Jan 17, 2025
6a56279
Merge branch 'main' into lookup_join_text
craigtaverner Jan 20, 2025
3491006
Simplify long comment
craigtaverner Jan 20, 2025
d5d7cee
Use clearer error message when joining with right-side TEXT
craigtaverner Jan 22, 2025
879ecd4
Merge branch 'main' into lookup_join_text
craigtaverner Jan 22, 2025
724153c
Fix failing test
craigtaverner Jan 22, 2025
863cfc9
Merge remote-tracking branch 'origin/main' into lookup_join_text
craigtaverner Jan 22, 2025
26a76f9
Merge remote-tracking branch 'origin/main' into lookup_join_text
craigtaverner Jan 23, 2025
f0ce28a
Merge branch 'main' into lookup_join_text
craigtaverner Jan 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ language_code:integer | language_name:keyword | country:text
;

###########################################################################
# nested filed join behavior with languages_nested_fields index
# nested field join behavior with languages_nested_fields index
###########################################################################

joinOnNestedField
Expand Down Expand Up @@ -519,6 +519,34 @@ language.id:integer | language.name:text | language.name.keyword:keyword
1 | English | English
;

joinOnNestedNestedFieldRowExplicitKeyword
required_capability: join_lookup_v11
required_capability: lookup_join_text

ROW language.name.keyword = "English"
| LOOKUP JOIN languages_nested_fields ON language.name.keyword
| KEEP language.id, language.name, language.name.keyword
;

language.id:integer | language.name:text | language.name.keyword:keyword
1 | English | English
;

joinOnNestedNestedFieldRowExplicitKeywords
required_capability: join_lookup_v11
required_capability: lookup_join_text

ROW language.name.keyword = ["English", "French"]
| MV_EXPAND language.name.keyword
| LOOKUP JOIN languages_nested_fields ON language.name.keyword
| KEEP language.id, language.name, language.name.keyword, language.code
;

language.id:integer | language.name:text | language.name.keyword:keyword | language.code:keyword
1 | English | English | EN
2 | French | French | FR
;

###############################################
# Tests with clientips_lookup index
###############################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,11 @@ public enum Cap {
*/
JOIN_LOOKUP_V11(Build.current().isSnapshot()),

/**
* LOOKUP JOIN with TEXT fields on the right (right side of the join)
*/
LOOKUP_JOIN_TEXT(Build.current().isSnapshot()),

/**
* Fix for https://github.com/elastic/elasticsearch/issues/117054
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,11 @@
import org.elasticsearch.compute.data.Page;
import org.elasticsearch.compute.operator.lookup.QueryList;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.search.SearchService;
import org.elasticsearch.tasks.TaskId;
import org.elasticsearch.transport.TransportService;
import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException;
import org.elasticsearch.xpack.esql.action.EsqlQueryAction;
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
import org.elasticsearch.xpack.esql.core.tree.Source;
Expand Down Expand Up @@ -80,9 +78,7 @@ protected TransportRequest transportRequest(LookupFromIndexService.Request reque

@Override
protected QueryList queryList(TransportRequest request, SearchExecutionContext context, Block inputBlock, DataType inputDataType) {
MappedFieldType fieldType = context.getFieldType(request.matchField);
validateTypes(request.inputDataType, fieldType);
return termQueryList(fieldType, context, inputBlock, inputDataType);
return termQueryList(context.getFieldType(request.matchField), context, inputBlock, inputDataType);
}

@Override
Expand All @@ -100,15 +96,6 @@ protected String getRequiredPrivilege() {
return null;
}

private static void validateTypes(DataType inputDataType, MappedFieldType fieldType) {
// TODO: consider supporting implicit type conversion as done in ENRICH for some types
if (fieldType.typeName().equals(inputDataType.typeName()) == false) {
throw new EsqlIllegalArgumentException(
"LOOKUP JOIN match and input types are incompatible: match[" + fieldType.typeName() + "], input[" + inputDataType + "]"
);
}
}

public static class Request extends AbstractLookupService.Request {
private final String matchField;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import java.util.Objects;

import static org.elasticsearch.xpack.esql.common.Failure.fail;
import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT;
import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes;
import static org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes.LEFT;

Expand Down Expand Up @@ -216,7 +217,7 @@ public void postAnalysisVerification(Failures failures) {
for (int i = 0; i < config.leftFields().size(); i++) {
Attribute leftField = config.leftFields().get(i);
Attribute rightField = config.rightFields().get(i);
if (leftField.dataType() != rightField.dataType()) {
if (leftField.dataType().noText() != rightField.dataType().noText() || rightField.dataType().equals(TEXT)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest treating the TEXT data type on the hand right side of the join as a separate situation with a separate error message. See my comment from 191_lookup_join_text.yml.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. Done.

failures.add(
fail(
leftField,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,12 @@
import org.elasticsearch.xpack.esql.core.expression.Attribute;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.Expressions;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.expression.NameId;
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
import org.elasticsearch.xpack.esql.core.expression.TypedAttribute;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.util.Holder;
Expand Down Expand Up @@ -571,35 +573,49 @@ private PhysicalOperation planLookupJoin(LookupJoinExec join, LocalExecutionPlan
throw new IllegalArgumentException("can't plan [" + join + "], found index with mode [" + entry.getValue() + "]");
}
String indexName = entry.getKey();
List<Layout.ChannelAndType> matchFields = new ArrayList<>(join.leftFields().size());
for (Attribute m : join.leftFields()) {
Layout.ChannelAndType t = source.layout.get(m.id());
if (t == null) {
throw new IllegalArgumentException("can't plan [" + join + "][" + m + "]");
if (join.leftFields().size() != join.rightFields().size()) {
throw new IllegalArgumentException("can't plan [" + join + "]: mismatching left and right field count");
}
List<MatchConfig> matchFields = new ArrayList<>(join.leftFields().size());
for (int i = 0; i < join.leftFields().size(); i++) {
TypedAttribute left = (TypedAttribute) join.leftFields().get(i);
FieldAttribute right = (FieldAttribute) join.rightFields().get(i);
Layout.ChannelAndType input = source.layout.get(left.id());
if (input == null) {
throw new IllegalArgumentException("can't plan [" + join + "][" + left + "]");
}
matchFields.add(t);
matchFields.add(new MatchConfig(right, input));
}
if (matchFields.size() != 1) {
throw new IllegalArgumentException("can't plan [" + join + "]");
throw new IllegalArgumentException("can't plan [" + join + "]: multiple join predicates are not supported");
}
// TODO support multiple match fields, and support more than equality predicates
MatchConfig matchConfig = matchFields.getFirst();

return source.with(
new LookupFromIndexOperator.Factory(
sessionId,
parentTask,
context.queryPragmas().enrichMaxWorkers(),
matchFields.getFirst().channel(),
matchConfig.channel(),
lookupFromIndexService,
matchFields.getFirst().type(),
matchConfig.type(),
indexName,
join.leftFields().getFirst().name(),
matchConfig.fieldName(),
join.addedFields().stream().map(f -> (NamedExpression) f).toList(),
join.source()
),
layout
);
}

private record MatchConfig(String fieldName, int channel, DataType type) {
private MatchConfig(FieldAttribute match, Layout.ChannelAndType input) {
// Note, this handles TEXT fields with KEYWORD subfields
this(match.exactAttribute().name(), input.channel(), input.type());
}
}

private PhysicalOperation planLocal(LocalSourceExec localSourceExec, LocalExecutionPlannerContext context) {
Layout.Builder layout = new Layout.Builder();
layout.append(localSourceExec.output());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
---
setup:
- requires:
test_runner_features: [capabilities, contains]
capabilities:
- method: POST
path: /_query
parameters: []
capabilities: [lookup_join_text]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Uh, I missed it, but this should also declare join_lookup_v11.
This should help skipping this spec if something changes about basic lookup logic (such as grammar :) )

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opened a pr for it #120771

reason: "uses LOOKUP JOIN"
- do:
indices.create:
index: test
body:
mappings:
properties:
color:
type: text
fields:
keyword:
type: keyword
description:
type: text
fields:
keyword:
type: keyword
- do:
indices.create:
index: test-lookup
body:
settings:
index:
mode: lookup
number_of_shards: 1
mappings:
properties:
color:
type: text
fields:
keyword:
type: keyword
description:
type: text
fields:
keyword:
type: keyword
- do:
bulk:
index: "test"
refresh: true
body:
- { "index": { } }
- { "color": "red", "description": "The color Red" }
- { "index": { } }
- { "color": "blue", "description": "The color Blue" }
- { "index": { } }
- { "color": "green", "description": "The color Green" }
- do:
bulk:
index: "test-lookup"
refresh: true
body:
- { "index": { } }
- { "color": "red", "description": "As red as a tomato" }
- { "index": { } }
- { "color": "blue", "description": "As blue as the sky" }

---
keyword-keyword:
- do:
esql.query:
body:
query: 'FROM test | SORT color | LOOKUP JOIN `test-lookup` ON color.keyword | LIMIT 3'

- length: { columns: 4 }
- length: { values: 3 }
- match: {columns.0.name: "color.keyword"}
- match: {columns.0.type: "keyword"}
- match: {columns.1.name: "color"}
- match: {columns.1.type: "text"}
- match: {columns.2.name: "description"}
- match: {columns.2.type: "text"}
- match: {columns.3.name: "description.keyword"}
- match: {columns.3.type: "keyword"}
- match: {values.0: ["blue", "blue", "As blue as the sky", "As blue as the sky"]}
- match: {values.1: ["green", null, null, null]}
- match: {values.2: ["red", "red", "As red as a tomato", "As red as a tomato"]}

---
text-keyword:
- do:
esql.query:
body:
query: 'FROM test | SORT color | RENAME color AS x | EVAL color.keyword = x | LOOKUP JOIN `test-lookup` ON color.keyword | LIMIT 3'

- length: { columns: 5 }
- length: { values: 3 }
- match: {columns.0.name: "x"}
- match: {columns.0.type: "text"}
- match: {columns.1.name: "color.keyword"}
- match: {columns.1.type: "text"}
- match: {columns.2.name: "color"}
- match: {columns.2.type: "text"}
- match: {columns.3.name: "description"}
- match: {columns.3.type: "text"}
- match: {columns.4.name: "description.keyword"}
- match: {columns.4.type: "keyword"}
- match: {values.0: ["blue", "blue", "blue", "As blue as the sky", "As blue as the sky"]}
- match: {values.1: ["green", "green", null, null, null]}
- match: {values.2: ["red", "red", "red", "As red as a tomato", "As red as a tomato"]}

---
text-text:
- do:
esql.query:
body:
query: 'FROM test | SORT color | LOOKUP JOIN `test-lookup` ON color | LIMIT 3'
catch: "bad_request"

- match: { error.type: "verification_exception" }
- contains: { error.reason: "Found 1 problem\nline 1:55: JOIN left field [color] of type [TEXT] is incompatible with right field [color] of type [TEXT]" }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As an user, I would be confused about this message. As in "both types are the same, they are not unsupported, why is es|ql telling me they are not compatible".

The actual, more useful, error message would be true reason for the incompatibility: "unsupported [TEXT] data type as JOIN right field"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did think about that when coding this, but left the message the same between all scenarios, both where the types mismatch and where the failure is because the right type was TEXT for two reasons:

  • Simple validation code
  • More likely to be future compatible with the case where we decide to support TEXT on the right when it does have an exact KEYWORD subfield (which I understand is actually very common).

However, I personally have no objection to adding a clearer error now, and simply removing it again later if we do support that case).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the end-user perspective it would be less confusion imo; and less explanation from us when someone asks :-)). I don't think it would be difficult to remove this afterwards (whenever that will be). +100 from me to have two different error messages.


---
keyword-text:
- do:
esql.query:
body:
query: 'FROM test | SORT color | EVAL color = color.keyword | LOOKUP JOIN `test-lookup` ON color | LIMIT 3'
catch: "bad_request"

- match: { error.type: "verification_exception" }
- contains: { error.reason: "Found 1 problem\nline 1:84: JOIN left field [color] of type [KEYWORD] is incompatible with right field [color] of type [TEXT]" }