Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
a4f3b5e
Initial support for TEXT fields in LOOKUP JOIN condition
craigtaverner Jan 2, 2025
c1744c9
Better approach, using KEYWORD subfield
craigtaverner Jan 3, 2025
8eb6554
Merge branch 'main' into lookup_join_text
craigtaverner Jan 3, 2025
69572e5
Add new capability to block mixed-cluster tests
craigtaverner Jan 3, 2025
417d3df
Merge branch 'main' into lookup_join_text
craigtaverner Jan 3, 2025
e799ef9
Merge branch 'main' into lookup_join_text
craigtaverner Jan 3, 2025
772659d
Merge branch 'main' into lookup_join_text
craigtaverner Jan 7, 2025
93779f9
Simplify lookup-join validation and remove keyword subfield validation
craigtaverner Jan 16, 2025
cbcd532
Merge remote-tracking branch 'origin/main' into lookup_join_text
craigtaverner Jan 16, 2025
0146742
Added yaml tests for failing TEXT lookup and added validation for that
craigtaverner Jan 16, 2025
e327808
Fixed failing tests
craigtaverner Jan 16, 2025
77fc5bf
Missing capability check
craigtaverner Jan 17, 2025
2b2f0dc
Merge branch 'main' into lookup_join_text
craigtaverner Jan 17, 2025
4380656
Merge branch 'main' into lookup_join_text
craigtaverner Jan 17, 2025
6a56279
Merge branch 'main' into lookup_join_text
craigtaverner Jan 20, 2025
3491006
Simplify long comment
craigtaverner Jan 20, 2025
d5d7cee
Use clearer error message when joining with right-side TEXT
craigtaverner Jan 22, 2025
879ecd4
Merge branch 'main' into lookup_join_text
craigtaverner Jan 22, 2025
724153c
Fix failing test
craigtaverner Jan 22, 2025
863cfc9
Merge remote-tracking branch 'origin/main' into lookup_join_text
craigtaverner Jan 22, 2025
26a76f9
Merge remote-tracking branch 'origin/main' into lookup_join_text
craigtaverner Jan 23, 2025
f0ce28a
Merge branch 'main' into lookup_join_text
craigtaverner Jan 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,11 @@ public static QueryList rawTermQueryList(MappedFieldType field, SearchExecutionC
}
case BYTES_REF -> offset -> {
BytesRefBlock bytesRefBlock = (BytesRefBlock) block;
return bytesRefBlock.getBytesRef(offset, new BytesRef());
BytesRef value = bytesRefBlock.getBytesRef(offset, new BytesRef());
if (field.typeName().equals("text")) {
throw new IllegalArgumentException("Cannot perform LOOKUP JOIN on TEXT fields");
}
return value;
};
case DOUBLE -> {
DoubleBlock doubleBlock = ((DoubleBlock) block);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ language_code:integer | language_name:keyword | country:keyword
;

###########################################################################
# nested filed join behavior with languages_nested_fields index
# nested field join behavior with languages_nested_fields index
###########################################################################

joinOnNestedField
Expand Down Expand Up @@ -510,6 +510,34 @@ language.id:integer | language.name:text | language.name.keyword:keyword
1 | English | English
;

joinOnNestedNestedFieldRowImplicitKeyword
required_capability: join_lookup_v10
required_capability: lookup_join_text

ROW language.name = "English"
| LOOKUP JOIN languages_nested_fields ON language.name
| KEEP language.id, language.name, language.name.keyword
;

language.id:integer | language.name:keyword | language.name.keyword:keyword
1 | English | English
;

joinOnNestedNestedFieldRowImplicitKeywords
required_capability: join_lookup_v10
required_capability: lookup_join_text

ROW language.name = ["English", "French"]
| MV_EXPAND language.name
| LOOKUP JOIN languages_nested_fields ON language.name
| KEEP language.id, language.name, language.name.keyword, language.code
;

language.id:integer | language.name:keyword | language.name.keyword:keyword | language.code:keyword
1 | English | English | EN
2 | French | French | FR
;

###############################################
# Tests with clientips_lookup index
###############################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,11 @@ public enum Cap {
*/
JOIN_LOOKUP_V10(Build.current().isSnapshot()),

/**
* LOOKUP JOIN with TEXT fields on the right (right side of the join)
*/
LOOKUP_JOIN_TEXT(Build.current().isSnapshot()),

/**
* Fix for https://github.com/elastic/elasticsearch/issues/117054
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -858,7 +858,7 @@ private static void checkJoin(LogicalPlan plan, Set<Failure> failures) {
for (int i = 0; i < config.leftFields().size(); i++) {
Attribute leftField = config.leftFields().get(i);
Attribute rightField = config.rightFields().get(i);
if (leftField.dataType() != rightField.dataType()) {
if (leftField.dataType().noText() != rightField.dataType().noText()) {
failures.add(
fail(
leftField,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ protected String getRequiredPrivilege() {

private static void validateTypes(DataType inputDataType, MappedFieldType fieldType) {
// TODO: consider supporting implicit type conversion as done in ENRICH for some types
if (fieldType.typeName().equals(inputDataType.typeName()) == false) {
String typeName = fieldType.typeName().equals("text") ? DataType.KEYWORD.typeName() : fieldType.typeName();
if (typeName.equals(inputDataType.noText().typeName()) == false) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it is worth resolving typeName wirh fromTypeName here to avoid comparing strings?
Something like

Suggested change
String typeName = fieldType.typeName().equals("text") ? DataType.KEYWORD.typeName() : fieldType.typeName();
if (typeName.equals(inputDataType.noText().typeName()) == false) {
if (Objects.equals(DataType.fromTypeName(fieldType.typeName()).noText(), inputDataType.noText()) == false) {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm actually thinking of deleting this method entirely, since it is shadowed by the newer validation happening in the Validator. In the current draft I fixed both methods, but in reality we only need the one.

throw new EsqlIllegalArgumentException(
"LOOKUP JOIN match and input types are incompatible: match[" + fieldType.typeName() + "], input[" + inputDataType + "]"
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,11 @@
import org.elasticsearch.xpack.esql.core.expression.Attribute;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.Expressions;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.expression.NameId;
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
import org.elasticsearch.xpack.esql.core.expression.TypedAttribute;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.util.Holder;
Expand Down Expand Up @@ -570,35 +572,49 @@ private PhysicalOperation planLookupJoin(LookupJoinExec join, LocalExecutionPlan
throw new IllegalArgumentException("can't plan [" + join + "], found index with mode [" + entry.getValue() + "]");
}
String indexName = entry.getKey();
List<Layout.ChannelAndType> matchFields = new ArrayList<>(join.leftFields().size());
for (Attribute m : join.leftFields()) {
Layout.ChannelAndType t = source.layout.get(m.id());
if (t == null) {
throw new IllegalArgumentException("can't plan [" + join + "][" + m + "]");
if (join.leftFields().size() != join.rightFields().size()) {
throw new IllegalArgumentException("can't plan [" + join + "]: mismatching left and right field count");
}
List<MatchConfig> matchFields = new ArrayList<>(join.leftFields().size());
for (int i = 0; i < join.leftFields().size(); i++) {
TypedAttribute left = (TypedAttribute) join.leftFields().get(i);
FieldAttribute right = (FieldAttribute) join.rightFields().get(i);
Layout.ChannelAndType input = source.layout.get(left.id());
if (input == null) {
throw new IllegalArgumentException("can't plan [" + join + "][" + left + "]");
}
matchFields.add(t);
matchFields.add(new MatchConfig(right, input));
}
if (matchFields.size() != 1) {
throw new IllegalArgumentException("can't plan [" + join + "]");
throw new IllegalArgumentException("can't plan [" + join + "]: multiple join predicates are not supported");
}
// TODO support multiple match fields, and support more than equality predicates
MatchConfig matchConfig = matchFields.getFirst();

return source.with(
new LookupFromIndexOperator.Factory(
sessionId,
parentTask,
context.queryPragmas().enrichMaxWorkers(),
matchFields.getFirst().channel(),
matchConfig.channel(),
lookupFromIndexService,
matchFields.getFirst().type(),
matchConfig.type(),
indexName,
join.leftFields().getFirst().name(),
matchConfig.fieldName(),
join.addedFields().stream().map(f -> (NamedExpression) f).toList(),
join.source()
),
layout
);
}

private record MatchConfig(String fieldName, int channel, DataType type) {
private MatchConfig(FieldAttribute match, Layout.ChannelAndType input) {
// Note, this handles TEXT fields with KEYWORD subfields, and we assume tha has been validated earlier during planning
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Note, this handles TEXT fields with KEYWORD subfields, and we assume tha has been validated earlier during planning
// Note, this handles TEXT field with KEYWORD subfields, and we assume that has been validated earlier during planning

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please clarify "validated" means here? Is it existance of the nested .keyword field? If so could you please help me understand where such validation is happening?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR is still in draft, but I planned to find that location and add such validation myself. It does not yet exist.

this(match.exactAttribute().name(), input.channel(), input.type());
}
}

private ExpressionEvaluator.Factory toEvaluator(Expression exp, Layout layout) {
return EvalMapper.toEvaluator(exp, layout);
}
Expand Down