Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/128910.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 128910
summary: Fix `FieldAttribute` name usage in `InferNonNullAggConstraint`
area: ES|QL
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,29 @@

/**
* Attribute for an ES field.
* To differentiate between the different type of fields this class offers:
* - name - the fully qualified name (foo.bar.tar)
* - path - the path pointing to the field name (foo.bar)
* - parent - the immediate parent of the field; useful for figuring out the type of field (nested vs object)
* - nestedParent - if nested, what's the parent (which might not be the immediate one)
* This class offers:
* - name - the name of the attribute, but not necessarily of the field.
* - The raw EsField representing the field; for parent.child.grandchild this is just grandchild.
* - parentName - the full path to the immediate parent of the field, e.g. parent.child (without .grandchild)
*
* To adequately represent e.g. union types, the name of the attribute can be altered because we may have multiple synthetic field
* attributes that really belong to the same underlying field. For instance, if a multi-typed field is used both as {@code field::string}
* and {@code field::ip}, we'll generate 2 field attributes called {@code $$field$converted_to$string} and {@code $$field$converted_to$ip}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In union-types we do not have multiple names like this, since the name specifies the type we're converting to, not the type we're converting from. So if field is IP and KEYWORD in different mappings, the converted field might be $$field$coverted_to$ip if the query used field::ip to resolve the mapping conflict. A different query might use field::string to resolve the conflict, and in that query the attribute gets name $$field$converted_to$string. So we only ever generate 1 new attribute, not 2, in the same query.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do generate more than 1; I just confirmed this locally with the example given in the javadoc:

Eval[[TOIPLEADINGZEROSREJECTED(?field) AS field1#24, TOSTRING(?field) AS field2#27]] ! Project[[!field, field1{r}#24, field2{r}#27]]
\_UnresolvedRelation[test*]                                                          ! \_Limit[1000[INTEGER],false]
                                                                                     !   \_Eval[[$$field$converted_to$ip{f$}#29 AS field1#24, $$field$converted_to$keyword{f$}#30 AS field2#27]]
                                                                                     !     \_EsRelation[test*][!field, $$field$converted_to$ip{f$}#29, $$field$con..]

* but still referring to the same underlying field.
*/
public class FieldAttribute extends TypedAttribute {

/**
* A field name, as found in the mapping. Includes the whole path from the root of the document.
* Implemented as a wrapper around {@link String} to distinguish from the attribute name (which sometimes differs!) at compile time.
*/
public record FieldName(String name) {
@Override
public String toString() {
return name;
}
};

static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
Attribute.class,
"FieldAttribute",
Expand All @@ -43,6 +58,7 @@ public class FieldAttribute extends TypedAttribute {

private final String parentName;
private final EsField field;
protected FieldName lazyFieldName;

public FieldAttribute(Source source, String name, EsField field) {
this(source, null, name, field);
Expand Down Expand Up @@ -136,15 +152,19 @@ public String parentName() {
/**
* The full name of the field in the index, including all parent fields. E.g. {@code parent.subfield.this_field}.
*/
public String fieldName() {
// Before 8.15, the field name was the same as the attribute's name.
// On later versions, the attribute can be renamed when creating synthetic attributes.
// Because until 8.15, we couldn't set `synthetic` to true due to a bug, in that version such FieldAttributes are marked by their
// name starting with `$$`.
if ((synthetic() || name().startsWith(SYNTHETIC_ATTRIBUTE_NAME_PREFIX)) == false) {
return name();
public FieldName fieldName() {
if (lazyFieldName == null) {
// Before 8.15, the field name was the same as the attribute's name.
// On later versions, the attribute can be renamed when creating synthetic attributes.
// Because until 8.15, we couldn't set `synthetic` to true due to a bug, in that version such FieldAttributes are marked by
// their
// name starting with `$$`.
if ((synthetic() || name().startsWith(SYNTHETIC_ATTRIBUTE_NAME_PREFIX)) == false) {
lazyFieldName = new FieldName(name());
}
lazyFieldName = new FieldName(Strings.hasText(parentName) ? parentName + "." + field.getName() : field.getName());
}
return Strings.hasText(parentName) ? parentName + "." + field.getName() : field.getName();
return lazyFieldName;
}

public EsField.Exact getExactInfo() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ public String getWriteableName() {
}

/**
* Returns the field path
* Returns the simple name, but not the full field path. The latter requires knowing the path of the parent field.
*/
public String getName() {
return name;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
import org.elasticsearch.xpack.esql.core.expression.Attribute;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute.FieldName;
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
Expand Down Expand Up @@ -248,22 +249,22 @@ public static EsRelation relation() {
public static class TestSearchStats implements SearchStats {

@Override
public boolean exists(String field) {
public boolean exists(FieldName field) {
return true;
}

@Override
public boolean isIndexed(String field) {
public boolean isIndexed(FieldName field) {
return exists(field);
}

@Override
public boolean hasDocValues(String field) {
public boolean hasDocValues(FieldName field) {
return exists(field);
}

@Override
public boolean hasExactSubfield(String field) {
public boolean hasExactSubfield(FieldName field) {
return exists(field);
}

Expand All @@ -273,32 +274,32 @@ public long count() {
}

@Override
public long count(String field) {
public long count(FieldName field) {
return exists(field) ? -1 : 0;
}

@Override
public long count(String field, BytesRef value) {
public long count(FieldName field, BytesRef value) {
return exists(field) ? -1 : 0;
}

@Override
public byte[] min(String field, DataType dataType) {
public byte[] min(FieldName field, DataType dataType) {
return null;
}

@Override
public byte[] max(String field, DataType dataType) {
public byte[] max(FieldName field, DataType dataType) {
return null;
}

@Override
public boolean isSingleValue(String field) {
public boolean isSingleValue(FieldName field) {
return false;
}

@Override
public boolean canUseEqualityOnSyntheticSourceDelegate(String name, String value) {
public boolean canUseEqualityOnSyntheticSourceDelegate(FieldName name, String value) {
return false;
}
}
Expand Down Expand Up @@ -349,23 +350,23 @@ private boolean isConfigationSet(Config config, String field) {
}

@Override
public boolean exists(String field) {
return isConfigationSet(Config.EXISTS, field);
public boolean exists(FieldName field) {
return isConfigationSet(Config.EXISTS, field.toString());
}

@Override
public boolean isIndexed(String field) {
return isConfigationSet(Config.INDEXED, field);
public boolean isIndexed(FieldName field) {
return isConfigationSet(Config.INDEXED, field.toString());
}

@Override
public boolean hasDocValues(String field) {
return isConfigationSet(Config.DOC_VALUES, field);
public boolean hasDocValues(FieldName field) {
return isConfigationSet(Config.DOC_VALUES, field.toString());
}

@Override
public boolean hasExactSubfield(String field) {
return isConfigationSet(Config.EXACT_SUBFIELD, field);
public boolean hasExactSubfield(FieldName field) {
return isConfigationSet(Config.EXACT_SUBFIELD, field.toString());
}

@Override
Expand Down Expand Up @@ -500,8 +501,8 @@ private static SearchStats fieldMatchingExistOrMissing(boolean exists, String...
private final Set<String> fields = Set.of(names);

@Override
public boolean exists(String field) {
return fields.contains(field) == exists;
public boolean exists(FieldName field) {
return fields.contains(field.toString()) == exists;
}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1325,6 +1325,19 @@ count:long | message:keyword
3 | Connected to 10.1.0.3
;

multiIndexStatsOfMultiTypedField
required_capability: union_types
required_capability: casting_operator
required_capability: union_types_numeric_widening

FROM apps, apps_short
| STATS s = sum(id::integer)
;

s:long
210
;

multiIndexMultiColumnTypesRename
required_capability: union_types
required_capability: index_metadata_field
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,13 @@ public UnsupportedEsField field() {
}

@Override
public String fieldName() {
// The super fieldName uses parents to compute the path; this class ignores parents, so we need to rely on the name instead.
// Using field().getName() would be wrong: for subfields like parent.subfield that would return only the last part, subfield.
return name();
public FieldName fieldName() {
if (lazyFieldName == null) {
// The super fieldName uses parents to compute the path; this class ignores parents, so we need to rely on the name instead.
// Using field().getName() would be wrong: for subfields like parent.subfield that would return only the last part, subfield.
lazyFieldName = new FieldName(name());
}
return lazyFieldName;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

/**
* The vast majority of aggs ignore null entries - this rule adds a pushable filter, as it is cheap
* to execute, to filter this entries out to begin with.
* to execute, to filter these entries out to begin with.
* STATS x = min(a), y = sum(b)
* becomes
* | WHERE a IS NOT NULL OR b IS NOT NULL
Expand Down Expand Up @@ -55,7 +55,7 @@ protected LogicalPlan rule(Aggregate aggregate, LocalLogicalOptimizerContext con
Expression field = af.field();
// ignore literals (e.g. COUNT(1))
// make sure the field exists at the source and is indexed (not runtime)
if (field.foldable() == false && field instanceof FieldAttribute fa && stats.isIndexed(fa.name())) {
if (field.foldable() == false && field instanceof FieldAttribute fa && stats.isIndexed(fa.fieldName())) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, this is the actual fix.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks so familiar. I'm sure I've done that in a few places before, probably during the union-types work. Pity I was not thorough! What I did was replace field.name() with field.field().getName().

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Caution!

field.field().getName() is wrong in case of subfields! The EsField doesn't know about the full path, so this is wrong without also using field.parentName().

I hope this gets easier with the updated javadoc. :/

nonNullAggFields.add(field);
} else {
// otherwise bail out since unless disjunction needs to cover _all_ fields, things get filtered out
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,32 +126,36 @@ public boolean canUseEqualityOnSyntheticSourceDelegate(FieldAttribute attr, Stri
};

/**
* If we have access to SearchStats over a collection of shards, we can make more fine-grained decisions about what can be pushed down.
* This should open up more opportunities for lucene pushdown.
* If we have access to {@link SearchStats} over a collection of shards, we can make more fine-grained decisions about what can be
* pushed down. This should open up more opportunities for lucene pushdown.
*/
static LucenePushdownPredicates from(SearchStats stats) {
// TODO: use FieldAttribute#fieldName, otherwise this doesn't apply to field attributes used for union types.
// C.f. https://github.com/elastic/elasticsearch/issues/128905
return new LucenePushdownPredicates() {
@Override
public boolean hasExactSubfield(FieldAttribute attr) {
return stats.hasExactSubfield(attr.name());
return stats.hasExactSubfield(new FieldAttribute.FieldName(attr.name()));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we not calling attr.fieldName()?

Copy link
Contributor Author

@alex-spies alex-spies Jun 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This fails the union type csv tests - I didn't want to fix it in this PR but opened #128905

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Applies also to the instances below.

}

@Override
public boolean isIndexedAndHasDocValues(FieldAttribute attr) {
// We still consider the value of isAggregatable here, because some fields like ScriptFieldTypes are always aggregatable
// But this could hide issues with fields that are not indexed but are aggregatable
// This is the original behaviour for ES|QL, but is it correct?
return attr.field().isAggregatable() || stats.isIndexed(attr.name()) && stats.hasDocValues(attr.name());
return attr.field().isAggregatable()
|| stats.isIndexed(new FieldAttribute.FieldName(attr.name()))
&& stats.hasDocValues(new FieldAttribute.FieldName(attr.name()));
}

@Override
public boolean isIndexed(FieldAttribute attr) {
return stats.isIndexed(attr.name());
return stats.isIndexed(new FieldAttribute.FieldName(attr.name()));
}

@Override
public boolean canUseEqualityOnSyntheticSourceDelegate(FieldAttribute attr, String value) {
return stats.canUseEqualityOnSyntheticSourceDelegate(attr.field().getName(), value);
return stats.canUseEqualityOnSyntheticSourceDelegate(attr.fieldName(), value);
}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ private Tuple<List<Attribute>, List<EsStatsQueryExec.Stat>> pushableStats(
if (target instanceof FieldAttribute fa) {
var fName = fa.fieldName();
if (context.searchStats().isSingleValue(fName)) {
fieldName = fName;
fieldName = fName.toString();
query = QueryBuilders.existsQuery(fieldName);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ public final PhysicalOperation fieldExtractPhysicalOperation(FieldExtractExec fi

private static String getFieldName(Attribute attr) {
// Do not use the field attribute name, this can deviate from the field name for union types.
return attr instanceof FieldAttribute fa ? fa.fieldName() : attr.name();
return attr instanceof FieldAttribute fa ? fa.fieldName().toString() : attr.name();
}

private BlockLoader getBlockLoaderFor(int shardId, Attribute attr, MappedFieldType.FieldExtractPreference fieldExtractPreference) {
Expand Down
Loading
Loading