Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
*
* To adequately represent e.g. union types, the name of the attribute can be altered because we may have multiple synthetic field
* attributes that really belong to the same underlying field. For instance, if a multi-typed field is used both as {@code field::string}
* and {@code field::ip}, we'll generate 2 field attributes called {@code $$field$converted_to$string} and {@code $$field$converted_to$ip}
* but still referring to the same underlying field.
* and {@code field::ip}, we'll generate 2 field attributes called {@code $$field$converted_to$keyword} and {@code $$field$converted_to$ip}
* which still refer to the same underlying index field.
*/
public class FieldAttribute extends TypedAttribute {

Expand Down Expand Up @@ -162,6 +162,15 @@ public FieldName fieldName() {
return lazyFieldName;
}

/**
* The name of the attribute. Can deviate from the field name e.g. in case of union types. For the physical field name, use
* {@link FieldAttribute#fieldName()}.
*/
@Override
public String name() {
return super.name();
}

public EsField.Exact getExactInfo() {
return field.getExactInfo();
}
Expand All @@ -175,7 +184,7 @@ public FieldAttribute exactAttribute() {
}

private FieldAttribute innerField(EsField type) {
return new FieldAttribute(source(), name(), name() + "." + type.getName(), type, nullable(), id(), synthetic());
return new FieldAttribute(source(), fieldName().string, name() + "." + type.getName(), type, nullable(), id(), synthetic());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,28 @@ language.id:integer | language.name:text | language.name.keyword:keyword | langu
2 | French | French | FR
;

###############################################
# union type behavior
###############################################

joinOnMultiTypedMatchFieldCastToInteger
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for adding tests for multi-typed numeric fields as join keys. We still have gaps in test coverage for multi-typed join keys involving string types (keyword, text, ip) and date types (millis, nanos). If these are not within the scope of this PR, they should be covered in a separate one.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a more complete list of tests to LookupJoinTypesIT. It's still not exhaustive but IMO covers enough ground for now.

That required a bit of refactoring ^^

required_capability: join_lookup_v12

FROM apps, apps_short METADATA _index
| EVAL language_code = id::integer
| KEEP _index, language_code
| WHERE language_code < 3
| LOOKUP JOIN languages_lookup ON language_code
| SORT _index ASC, language_code ASC
;

_index:keyword | language_code:integer | language_name:keyword
apps | 1 | English
apps | 2 | French
apps_short | 1 | English
apps_short | 2 | French
;

###############################################
# Tests with clientips_lookup index
###############################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
import org.elasticsearch.transport.TransportService;
import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException;
import org.elasticsearch.xpack.esql.core.expression.Alias;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
Expand Down Expand Up @@ -415,8 +416,13 @@ private static Operator extractFieldsOperator(
) {
List<ValuesSourceReaderOperator.FieldInfo> fields = new ArrayList<>(extractFields.size());
for (NamedExpression extractField : extractFields) {
String fieldName = extractField instanceof FieldAttribute fa ? fa.fieldName().string()
// Cases for Alias and ReferenceAttribute: only required for ENRICH (Alias in case of ENRICH ... WITH x = field)
// (LOOKUP JOIN uses FieldAttributes)
: extractField instanceof Alias a ? ((NamedExpression) a.child()).name()
: extractField.name();
BlockLoader loader = shardContext.blockLoader(
extractField instanceof Alias a ? ((NamedExpression) a.child()).name() : extractField.name(),
fieldName,
extractField.dataType() == DataType.UNSUPPORTED,
MappedFieldType.FieldExtractPreference.NONE
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.elasticsearch.core.Releasables;
import org.elasticsearch.tasks.CancellableTask;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
Expand All @@ -45,7 +46,7 @@ public record Factory(
DataType inputDataType,
String lookupIndexPattern,
String lookupIndex,
String matchField,
FieldAttribute.FieldName matchField,
List<NamedExpression> loadFields,
Source source
) implements OperatorFactory {
Expand All @@ -56,7 +57,7 @@ public String describe() {
+ " input_type="
+ inputDataType
+ " match_field="
+ matchField
+ matchField.string()
+ " load_fields="
+ loadFields
+ " inputChannel="
Expand All @@ -76,7 +77,7 @@ public Operator get(DriverContext driverContext) {
inputDataType,
lookupIndexPattern,
lookupIndex,
matchField,
matchField.string(),
loadFields,
source
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,8 @@ protected Map<String, Object> resolvedOptions() throws InvalidArgumentException
return Map.of();
}

// TODO: this should likely be replaced by calls to FieldAttribute#fieldName; the MultiTypeEsField case looks
// wrong if `fieldAttribute` is a subfield, e.g. `parent.child` - multiTypeEsField#getName will just return `child`.
public static String getNameFromFieldAttribute(FieldAttribute fieldAttribute) {
String fieldName = fieldAttribute.name();
if (fieldAttribute.field() instanceof MultiTypeEsField multiTypeEsField) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ public AttributeSet rightReferences() {
return Expressions.references(config().rightFields());
}

/**
* The output fields obtained from the right child.
*/
public List<Attribute> rightOutputFields() {
AttributeSet leftInputs = left().outputSet();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -766,10 +766,10 @@ private PhysicalOperation planLookupJoin(LookupJoinExec join, LocalExecutionPlan
);
}

private record MatchConfig(String fieldName, int channel, DataType type) {
private record MatchConfig(FieldAttribute.FieldName fieldName, int channel, DataType type) {
private MatchConfig(FieldAttribute match, Layout.ChannelAndType input) {
// Note, this handles TEXT fields with KEYWORD subfields
this(match.exactAttribute().name(), input.channel(), input.type());
this(match.exactAttribute().fieldName(), input.channel(), input.type());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not quite obvious to me how the keyword subfield of a text field works with multi-typed field, it will be great if we have a test to exercise this code path, or some comments for when match.exactAttribute().name() is different from match.exactAttribute().fieldName().

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly, I think this line is super redundant. The matchfield is a field from the lookup index. We can't even use TEXT as a lookup key in the lookup index, it's only allowed in the main index. So the dropping down to the exact subfield just doesn't ever apply.

I'll expand the comment. Testing this code path is currently not really possible (at least not in a realistic scenario.)

In any case, using fieldName over name is going to be more correct once the call to exactAttribute stops being redundant.

}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
import org.elasticsearch.threadpool.TestThreadPool;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
import org.elasticsearch.xpack.esql.core.tree.Source;
Expand Down Expand Up @@ -134,7 +135,7 @@ protected Operator.OperatorFactory simple(SimpleOptions options) {
int inputChannel = 0;
DataType inputDataType = DataType.LONG;
String lookupIndex = "idx";
String matchField = "match";
FieldAttribute.FieldName matchField = new FieldAttribute.FieldName("match");
List<NamedExpression> loadFields = List.of(
new ReferenceAttribute(Source.EMPTY, "lkwd", DataType.KEYWORD),
new ReferenceAttribute(Source.EMPTY, "lint", DataType.INTEGER)
Expand Down