Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
a707bec
Make equals include ids for Alias, TypedAttribute
alex-spies Aug 5, 2025
2048598
Add test case for circular renames
alex-spies Sep 15, 2025
d0a994f
Add tests for ENRICH/LOOKUP JOIN + VALUES agg
alex-spies Sep 15, 2025
d34572a
Use id in hashCode for Alias, TypedAttribute
alex-spies Sep 15, 2025
3e8e919
Fix comment
alex-spies Sep 15, 2025
a2bb5e9
[CI] Update transport version definitions
Sep 15, 2025
a191f30
Make Attribute#equals generally respect the id
alex-spies Sep 17, 2025
f77652c
Merge remote-tracking branch 'upstream/main' into make-attribute-equa…
alex-spies Sep 17, 2025
41a60d3
[CI] Auto commit changes from spotless
Sep 17, 2025
c67035c
Introduce non-semantic equality for attributes
alex-spies Sep 17, 2025
eb07eb6
Fix union type resolution
alex-spies Sep 17, 2025
eeb6c5a
Fix a bunch of serialization tests
alex-spies Sep 18, 2025
4771dba
Fix most csv tests
alex-spies Sep 18, 2025
eb47b87
Merge remote-tracking branch 'upstream/main' into make-attribute-equa…
alex-spies Sep 23, 2025
9db6528
Add comment to semanticEquals
alex-spies Sep 23, 2025
782067b
Fix IncreaseSerializationTests
alex-spies Sep 23, 2025
6b7fa4e
Fix attribute tests
alex-spies Sep 23, 2025
09cd30a
[CI] Update transport version definitions
Sep 23, 2025
8309ca6
Merge remote-tracking branch 'upstream/main' into make-attribute-equa…
alex-spies Sep 25, 2025
5f4cd4f
Actually assert name ids in serialization tests
alex-spies Sep 25, 2025
9dcbd41
WIP: fix serialization tests
alex-spies Sep 25, 2025
6ff0283
[CI] Auto commit changes from spotless
Sep 25, 2025
1e6b100
Simplify de-/serialization tests
alex-spies Sep 26, 2025
4dab56b
Simplify de-/serialization tests some more
alex-spies Sep 26, 2025
5f62f72
Fix analyzer tests
alex-spies Sep 26, 2025
f5add18
Fix equality for failures
alex-spies Sep 26, 2025
9d9ad86
Fix some more tests
alex-spies Sep 26, 2025
c73b45d
Fix some more serialization tests
alex-spies Sep 26, 2025
8800d90
Merge remote-tracking branch 'upstream/main' into make-attribute-equa…
alex-spies Oct 3, 2025
e40f95f
Comment capability formatting
alex-spies Oct 3, 2025
9800b8e
Fix some more serialization tests
alex-spies Oct 3, 2025
53d50e9
Fix some more tests
alex-spies Oct 3, 2025
99dd677
Update x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/…
alex-spies Oct 3, 2025
8d6a0fa
Merge remote-tracking branch 'upstream/main' into make-attribute-equa…
alex-spies Oct 6, 2025
5afab2f
Simplify passing through test id mappers
alex-spies Oct 6, 2025
6c5bd8c
Fix some more tests
alex-spies Oct 6, 2025
44f580d
Fix AttributeMapTests
alex-spies Oct 6, 2025
c6b4c24
Merge remote-tracking branch 'upstream/main' into make-attribute-equa…
alex-spies Oct 20, 2025
4443fcd
Simplify NamedExpression#equals and #hashCode
alex-spies Oct 20, 2025
349c3fe
NonSemanticAttribute -> IdIgnoringWrapper
alex-spies Oct 20, 2025
b00b639
Improve javadoc for Attribute
alex-spies Oct 20, 2025
af30e47
Add tests + simplify some more
alex-spies Oct 20, 2025
6253ef0
Update docs/changelog/132455.yaml
alex-spies Oct 20, 2025
f3acb8f
Expand test of equality for id mutation
alex-spies Oct 20, 2025
826d4b6
Merge remote-tracking branch 'upstream/main' into make-attribute-equa…
alex-spies Oct 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/changelog/132455.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
pr: 132455
summary: "Make equals include ids for Alias, `TypedAttribute`"
area: ES|QL
type: bug
issues:
- 131509
- 132634
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,12 @@

/**
* An {@code Alias} is a {@code NamedExpression} that gets renamed to something else through the Alias.
*
* <p>
* For example, in the statement {@code 5 + 2 AS x}, {@code x} is an alias which is points to {@code ADD(5, 2)}.
*
* And in {@code SELECT col AS x} "col" is a named expression that gets renamed to "x" through an alias.
*
* <p>
* Note on equality: The id is respected in {@link #equals(Object)} for Alias because the {@link ReferenceAttribute}
* created by {@link #toAttribute()} uses it.
*/
public final class Alias extends NamedExpression {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(NamedExpression.class, "Alias", Alias::new);
Expand Down Expand Up @@ -71,6 +72,10 @@ public Alias(StreamInput in) throws IOException {
);
}

public Alias withId(NameId id) {
return new Alias(source(), name(), child(), id, synthetic());
}

@Override
public void writeTo(StreamOutput out) throws IOException {
Source.EMPTY.writeTo(out);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,55 @@
/**
* {@link Expression}s that can be materialized and describe properties of the derived table.
* In other words, an attribute represent a column in the results of a query.
*
* <p>
* In the statement {@code SELECT ABS(foo), A, B+C FROM ...} the three named
* expressions {@code ABS(foo), A, B+C} get converted to attributes and the user can
* only see Attributes.
*
* <p>
* In the statement {@code SELECT foo FROM TABLE WHERE foo > 10 + 1} only {@code foo} inside the SELECT
* is a named expression (an {@code Alias} will be created automatically for it).
* The rest are not as they are not part of the projection and thus are not part of the derived table.
* <p>
* Note on equality: Because the name alone is not sufficient to identify an attribute
* (two different relations can have the same attribute name), attributes get a unique {@link #id()}
* assigned at creation which is respected in equality checks and hashing.
* <p>
* Caution! {@link #semanticEquals(Expression)} and {@link #semanticHash()} rely solely on the id.
* But this doesn't extend to expressions containing attributes as children.
*/
public abstract class Attribute extends NamedExpression {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The attribute hierarchy is getting scarier for me.
This is probably outside of this change
I wonder if it is worth converting it to a flat record with a type field and a number of factories?

Something like:

Attribute.EMPTY
Attribute.unresolved(..)
Attribute.unsupported(..)
Attribute.metadata(..)
Attribute.field(..)
...

this will have the super set of all fields needed for everything but will be much easier to check equality and type (opposed to instanceof). Updating serialization accordingly might be a chllenge.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, maybe that's possible, but will mess with many instanceOf checks that we rely on. Let's consider this for another time.

/**
* A wrapper class where equality of the contained attribute ignores the {@link Attribute#id()}. Useful when we want to create new
* attributes and want to avoid duplicates. Because we assign unique ids on creation, a normal equality check would always fail when
* we create the "same" attribute again.
* <p>
* C.f. {@link AttributeMap} (and its contained wrapper class) which does the exact opposite: ignores everything but the id by
* using {@link Attribute#semanticEquals(Expression)}.
*/
public record IdIgnoringWrapper(Attribute inner) {
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}

Attribute otherAttribute = ((IdIgnoringWrapper) o).inner();
return inner().equals(otherAttribute, true);
}

@Override
public int hashCode() {
return inner().hashCode(true);
}
}

public IdIgnoringWrapper ignoreId() {
return new IdIgnoringWrapper(this);
}

/**
* Changing this will break bwc with 8.15, see {@link FieldAttribute#fieldName()}.
*/
Expand Down Expand Up @@ -152,7 +191,7 @@ public int semanticHash() {

@Override
public boolean semanticEquals(Expression other) {
return other instanceof Attribute ? id().equals(((Attribute) other).id()) : false;
return other instanceof Attribute otherAttr && id().equals(otherAttr.id());
}

@Override
Expand All @@ -161,15 +200,16 @@ protected Expression canonicalize() {
}

@Override
@SuppressWarnings("checkstyle:EqualsHashCode")// equals is implemented in parent. See innerEquals instead
public int hashCode() {
return Objects.hash(super.hashCode(), qualifier, nullability);
protected int innerHashCode(boolean ignoreIds) {
return Objects.hash(super.innerHashCode(ignoreIds), qualifier, nullability);
}

@Override
protected boolean innerEquals(Object o) {
protected boolean innerEquals(Object o, boolean ignoreIds) {
var other = (Attribute) o;
return super.innerEquals(other) && Objects.equals(qualifier, other.qualifier) && Objects.equals(nullability, other.nullability);
return super.innerEquals(other, ignoreIds)
&& Objects.equals(qualifier, other.qualifier)
&& Objects.equals(nullability, other.nullability);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@

/**
* Marker for optional attributes. Acting as a dummy placeholder to avoid using null
* in the tree (which is not allowed).
* in the tree (which is not allowed). All empty attributes are considered equal.
*/
public class EmptyAttribute extends Attribute {
// TODO: Could be a singleton - all instances are already considered equal.
public EmptyAttribute(Source source) {
super(source, StringUtils.EMPTY, null);
}
Expand Down Expand Up @@ -78,13 +79,22 @@ protected NodeInfo<? extends Expression> info() {
}

@Override
@SuppressWarnings("checkstyle:EqualsHashCode")// equals is implemented in parent. See innerEquals instead
public int hashCode() {
protected int innerHashCode(boolean ignoreIds) {
return EmptyAttribute.class.hashCode();
}

@Override
protected boolean innerEquals(Object o) {
protected boolean innerEquals(Object o, boolean ignoreIds) {
return true;
}

@Override
public int semanticHash() {
return EmptyAttribute.class.hashCode();
}

@Override
public boolean semanticEquals(Expression other) {
return other instanceof EmptyAttribute;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,26 @@ protected Expression canonicalize() {
return replaceChildrenSameSize(canonicalChildren);
}

/**
* Whether this expression means the same as {@code other}, even if they are not exactly equal.
* For example, {@code a + b} and {@code b + a} are not equal, but they are semantically equal.
* <p>
* If two expressions are equal, they are also semantically equal, but the reverse is generally not true.
* <p>
* Caution! {@link Attribute#semanticEquals(Expression)} is especially lenient, as it considers two attributes
* with the same {@link NameId} to be semantically equal, even if they have different data types or are represented using different
* classes.
* <p>
* But this doesn't extend to expressions containing attributes as children, which is pretty inconsistent.
* We have to revisit this before using {@link #semanticEquals} in more places.
*/
public boolean semanticEquals(Expression other) {
return canonical().equals(other.canonical());
}

/**
* A hash code that is consistent with {@link #semanticEquals}.
*/
public int semanticHash() {
return canonical().hashCode();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,15 +231,14 @@ public Attribute withDataType(DataType type) {
}

@Override
@SuppressWarnings("checkstyle:EqualsHashCode")// equals is implemented in parent. See innerEquals instead
public int hashCode() {
return Objects.hash(super.hashCode(), parentName, field);
protected int innerHashCode(boolean ignoreIds) {
return Objects.hash(super.innerHashCode(ignoreIds), parentName, field);
}

@Override
protected boolean innerEquals(Object o) {
protected boolean innerEquals(Object o, boolean ignoreIds) {
var other = (FieldAttribute) o;
return super.innerEquals(other) && Objects.equals(parentName, other.parentName) && Objects.equals(field, other.field);
return super.innerEquals(other, ignoreIds) && Objects.equals(parentName, other.parentName) && Objects.equals(field, other.field);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,14 +170,13 @@ public static boolean isScoreAttribute(Expression a) {
}

@Override
@SuppressWarnings("checkstyle:EqualsHashCode")// equals is implemented in parent. See innerEquals instead
public int hashCode() {
return Objects.hash(super.hashCode(), searchable);
protected int innerHashCode(boolean ignoreIds) {
return Objects.hash(super.innerHashCode(ignoreIds), searchable);
}

@Override
protected boolean innerEquals(Object o) {
protected boolean innerEquals(Object o, boolean ignoreIds) {
var other = (MetadataAttribute) o;
return super.innerEquals(other) && searchable == other.searchable;
return super.innerEquals(other, ignoreIds) && searchable == other.searchable;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,15 @@ public NameId() {
this.id = COUNTER.incrementAndGet();
}

/**
* Absolutely only intended for tests, esp. to deal with serialization. Never use in production as it breaks the
* uniqueness guarantee.
*/
@Deprecated
public NameId(long id) {
this.id = id;
}

@Override
public int hashCode() {
return Long.hashCode(id);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ public NamedExpression(Source source, String name, List<Expression> children, @N
this(source, name, children, id, false);
}

/**
* Assigns a new id if null is passed for {@code id}.
*/
public NamedExpression(Source source, String name, List<Expression> children, @Nullable NameId id, boolean synthetic) {
super(source, children);
this.name = name;
Expand Down Expand Up @@ -57,36 +60,53 @@ public boolean synthetic() {
public abstract Attribute toAttribute();

@Override
public int hashCode() {
return Objects.hash(super.hashCode(), name, synthetic);
public final int hashCode() {
return hashCode(false);
}

public final int hashCode(boolean ignoreIds) {
return innerHashCode(ignoreIds);
}

protected int innerHashCode(boolean ignoreIds) {
return ignoreIds ? Objects.hash(super.hashCode(), name, synthetic) : Objects.hash(super.hashCode(), id, name, synthetic);
}

/**
* Polymorphic equality is a pain and are likely slower than a regular ones.
* This equals shortcuts `this == o` and type checks (important when we expect only a few non-equal objects).
* Here equals is final to ensure we are not duplicating those checks.
* For actual equality check override `innerEquals` instead.
*/
@Override
public final boolean equals(Object o) {
return equals(o, false);
}

/**
* Polymorphic equality is a pain and can be slow.
* This shortcuts {@code this == o} and class checks (important when we expect only a few non-equal objects).
* <p>
* For the actual equality check override {@link #innerEquals(Object, boolean)} instead.
* <p>
* We also provide the option to ignore NameIds in the equality check, which helps e.g. when creating named expressions
* while avoiding duplicates, or when attaching failures to unresolved attributes (see Failure.equals).
* Some classes will always ignore ids, irrespective of the parameter passed here.
*/
public final boolean equals(Object o, boolean ignoreIds) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
return innerEquals(o);
return innerEquals(o, ignoreIds);
}

protected boolean innerEquals(Object o) {
/**
* The actual equality check, after shortcutting {@code this == o} and class checks.
*/
protected boolean innerEquals(Object o, boolean ignoreIds) {
var other = (NamedExpression) o;
return synthetic == other.synthetic
/*
* It is important that the line below be `name`
* and not `name()` because subclasses might override
* `name()` in ways that are not compatible with
* equality. Specifically the `Unresolved` subclasses.
*/
return (ignoreIds || Objects.equals(id, other.id)) && synthetic == other.synthetic
// It is important that the line below be `name`
// and not `name()` because subclasses might override
// `name()` in ways that are not compatible with
// equality. Specifically the `Unresolved` subclasses.
&& Objects.equals(name, other.name)
&& Objects.equals(children(), other.children());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@

import java.util.Objects;

/**
* A fully resolved attribute - we know its type. For example, if it references data directly from Lucene, this will be a
* {@link FieldAttribute}. If it references the results of another calculation it will be {@link ReferenceAttribute}s.
*/
public abstract class TypedAttribute extends Attribute {

private final DataType dataType;
Expand Down Expand Up @@ -46,14 +50,13 @@ public DataType dataType() {
}

@Override
@SuppressWarnings("checkstyle:EqualsHashCode")// equals is implemented in parent. See innerEquals instead
public int hashCode() {
return Objects.hash(super.hashCode(), dataType);
protected int innerHashCode(boolean ignoreIds) {
return Objects.hash(super.innerHashCode(ignoreIds), dataType);
}

@Override
protected boolean innerEquals(Object o) {
protected boolean innerEquals(Object o, boolean ignoreIds) {
var other = (TypedAttribute) o;
return super.innerEquals(other) && dataType == other.dataType;
return super.innerEquals(other, ignoreIds) && dataType == other.dataType;
}
}
Loading