Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
a4f3b5e
Initial support for TEXT fields in LOOKUP JOIN condition
craigtaverner Jan 2, 2025
c1744c9
Better approach, using KEYWORD subfield
craigtaverner Jan 3, 2025
8eb6554
Merge branch 'main' into lookup_join_text
craigtaverner Jan 3, 2025
69572e5
Add new capability to block mixed-cluster tests
craigtaverner Jan 3, 2025
417d3df
Merge branch 'main' into lookup_join_text
craigtaverner Jan 3, 2025
e799ef9
Merge branch 'main' into lookup_join_text
craigtaverner Jan 3, 2025
772659d
Merge branch 'main' into lookup_join_text
craigtaverner Jan 7, 2025
93779f9
Simplify lookup-join validation and remove keyword subfield validation
craigtaverner Jan 16, 2025
cbcd532
Merge remote-tracking branch 'origin/main' into lookup_join_text
craigtaverner Jan 16, 2025
0146742
Added yaml tests for failing TEXT lookup and added validation for that
craigtaverner Jan 16, 2025
e327808
Fixed failing tests
craigtaverner Jan 16, 2025
77fc5bf
Missing capability check
craigtaverner Jan 17, 2025
2b2f0dc
Merge branch 'main' into lookup_join_text
craigtaverner Jan 17, 2025
4380656
Merge branch 'main' into lookup_join_text
craigtaverner Jan 17, 2025
6a56279
Merge branch 'main' into lookup_join_text
craigtaverner Jan 20, 2025
3491006
Simplify long comment
craigtaverner Jan 20, 2025
d5d7cee
Use clearer error message when joining with right-side TEXT
craigtaverner Jan 22, 2025
879ecd4
Merge branch 'main' into lookup_join_text
craigtaverner Jan 22, 2025
724153c
Fix failing test
craigtaverner Jan 22, 2025
863cfc9
Merge remote-tracking branch 'origin/main' into lookup_join_text
craigtaverner Jan 22, 2025
26a76f9
Merge remote-tracking branch 'origin/main' into lookup_join_text
craigtaverner Jan 23, 2025
f0ce28a
Merge branch 'main' into lookup_join_text
craigtaverner Jan 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.function.IntFunction;

/**
Expand Down Expand Up @@ -70,7 +71,12 @@ public static QueryList rawTermQueryList(MappedFieldType field, SearchExecutionC
}
case BYTES_REF -> offset -> {
BytesRefBlock bytesRefBlock = (BytesRefBlock) block;
return bytesRefBlock.getBytesRef(offset, new BytesRef());
BytesRef value = bytesRefBlock.getBytesRef(offset, new BytesRef());
if (field.typeName().equals("text")) {
// Text fields involve case-insensitive contains queries, we need to use lowercase on the term query
return new BytesRef(value.utf8ToString().toLowerCase(Locale.ROOT));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This mostly exists to show that matching against text fields "doesn't work like you think it does" in the ENRICH infrastructure we've borrowed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've deleted this now (or rather I'm throwing an exception, but might want to delete that too), and instead try get the planner to use an underlying KEYWORD sub-field, if it exists, using fa.exactAttribute().

}
return value;
};
case DOUBLE -> {
DoubleBlock doubleBlock = ((DoubleBlock) block);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ language_code:integer | language_name:keyword | country:keyword
;

###########################################################################
# nested filed join behavior with languages_nested_fields index
# nested field join behavior with languages_nested_fields index
###########################################################################

joinOnNestedField
Expand Down Expand Up @@ -509,6 +509,18 @@ language.id:integer | language.name:text | language.name.keyword:keyword
1 | English | English
;

joinOnNestedNestedFieldRowImplicitKeyword
required_capability: join_lookup_v10

ROW language.name = "English"
| LOOKUP JOIN languages_nested_fields ON language.name
| KEEP language.id, language.name, language.name.keyword
;

language.id:integer | language.name:keyword | language.name.keyword:keyword
1 | English | English
;

###############################################
# Tests with clientips_lookup index
###############################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -858,7 +858,7 @@ private static void checkJoin(LogicalPlan plan, Set<Failure> failures) {
for (int i = 0; i < config.leftFields().size(); i++) {
Attribute leftField = config.leftFields().get(i);
Attribute rightField = config.rightFields().get(i);
if (leftField.dataType() != rightField.dataType()) {
if (leftField.dataType().noText() != rightField.dataType().noText()) {
failures.add(
fail(
leftField,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ protected String getRequiredPrivilege() {

private static void validateTypes(DataType inputDataType, MappedFieldType fieldType) {
// TODO: consider supporting implicit type conversion as done in ENRICH for some types
if (fieldType.typeName().equals(inputDataType.typeName()) == false) {
String typeName = fieldType.typeName().equals("text") ? DataType.KEYWORD.typeName() : fieldType.typeName();
if (typeName.equals(inputDataType.noText().typeName()) == false) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it is worth resolving typeName wirh fromTypeName here to avoid comparing strings?
Something like

Suggested change
String typeName = fieldType.typeName().equals("text") ? DataType.KEYWORD.typeName() : fieldType.typeName();
if (typeName.equals(inputDataType.noText().typeName()) == false) {
if (Objects.equals(DataType.fromTypeName(fieldType.typeName()).noText(), inputDataType.noText()) == false) {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm actually thinking of deleting this method entirely, since it is shadowed by the newer validation happening in the Validator. In the current draft I fixed both methods, but in reality we only need the one.

throw new EsqlIllegalArgumentException(
"LOOKUP JOIN match and input types are incompatible: match[" + fieldType.typeName() + "], input[" + inputDataType + "]"
);
Expand Down