Skip to content

Commit f7d4422

Browse files
authored
ESQL: Make equals include ids for Alias, TypedAttribute (#132455)
Fix #131509 Fix #132634 Make `Attribute#equals` and `Alias#equals` respect the `NameId`s, so that plan transformations that happen to replace an attribute/alias by one that only differs by id, we still actually update the plan rather than keeping the old plan object. This requires a bunch of boilerplate-y test updates. I'll point to the non-boilerplate changes in comments below.
1 parent bc1a04c commit f7d4422

File tree

65 files changed

+1126
-669
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+1126
-669
lines changed

docs/changelog/132455.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
pr: 132455
2+
summary: "Make equals include ids for Alias, `TypedAttribute`"
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 131509
7+
- 132634

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Alias.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,12 @@
2222

2323
/**
2424
* An {@code Alias} is a {@code NamedExpression} that gets renamed to something else through the Alias.
25-
*
25+
* <p>
2626
* For example, in the statement {@code 5 + 2 AS x}, {@code x} is an alias which is points to {@code ADD(5, 2)}.
27-
*
2827
* And in {@code SELECT col AS x} "col" is a named expression that gets renamed to "x" through an alias.
29-
*
28+
* <p>
29+
* Note on equality: The id is respected in {@link #equals(Object)} for Alias because the {@link ReferenceAttribute}
30+
* created by {@link #toAttribute()} uses it.
3031
*/
3132
public final class Alias extends NamedExpression {
3233
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(NamedExpression.class, "Alias", Alias::new);
@@ -71,6 +72,10 @@ public Alias(StreamInput in) throws IOException {
7172
);
7273
}
7374

75+
public Alias withId(NameId id) {
76+
return new Alias(source(), name(), child(), id, synthetic());
77+
}
78+
7479
@Override
7580
public void writeTo(StreamOutput out) throws IOException {
7681
Source.EMPTY.writeTo(out);

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Attribute.java

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,55 @@
2222
/**
2323
* {@link Expression}s that can be materialized and describe properties of the derived table.
2424
* In other words, an attribute represent a column in the results of a query.
25-
*
25+
* <p>
2626
* In the statement {@code SELECT ABS(foo), A, B+C FROM ...} the three named
2727
* expressions {@code ABS(foo), A, B+C} get converted to attributes and the user can
2828
* only see Attributes.
29-
*
29+
* <p>
3030
* In the statement {@code SELECT foo FROM TABLE WHERE foo > 10 + 1} only {@code foo} inside the SELECT
3131
* is a named expression (an {@code Alias} will be created automatically for it).
3232
* The rest are not as they are not part of the projection and thus are not part of the derived table.
33+
* <p>
34+
* Note on equality: Because the name alone is not sufficient to identify an attribute
35+
* (two different relations can have the same attribute name), attributes get a unique {@link #id()}
36+
* assigned at creation which is respected in equality checks and hashing.
37+
* <p>
38+
* Caution! {@link #semanticEquals(Expression)} and {@link #semanticHash()} rely solely on the id.
39+
* But this doesn't extend to expressions containing attributes as children.
3340
*/
3441
public abstract class Attribute extends NamedExpression {
42+
/**
43+
* A wrapper class where equality of the contained attribute ignores the {@link Attribute#id()}. Useful when we want to create new
44+
* attributes and want to avoid duplicates. Because we assign unique ids on creation, a normal equality check would always fail when
45+
* we create the "same" attribute again.
46+
* <p>
47+
* C.f. {@link AttributeMap} (and its contained wrapper class) which does the exact opposite: ignores everything but the id by
48+
* using {@link Attribute#semanticEquals(Expression)}.
49+
*/
50+
public record IdIgnoringWrapper(Attribute inner) {
51+
@Override
52+
public boolean equals(Object o) {
53+
if (this == o) {
54+
return true;
55+
}
56+
if (o == null || getClass() != o.getClass()) {
57+
return false;
58+
}
59+
60+
Attribute otherAttribute = ((IdIgnoringWrapper) o).inner();
61+
return inner().equals(otherAttribute, true);
62+
}
63+
64+
@Override
65+
public int hashCode() {
66+
return inner().hashCode(true);
67+
}
68+
}
69+
70+
public IdIgnoringWrapper ignoreId() {
71+
return new IdIgnoringWrapper(this);
72+
}
73+
3574
/**
3675
* Changing this will break bwc with 8.15, see {@link FieldAttribute#fieldName()}.
3776
*/
@@ -152,7 +191,7 @@ public int semanticHash() {
152191

153192
@Override
154193
public boolean semanticEquals(Expression other) {
155-
return other instanceof Attribute ? id().equals(((Attribute) other).id()) : false;
194+
return other instanceof Attribute otherAttr && id().equals(otherAttr.id());
156195
}
157196

158197
@Override
@@ -161,15 +200,16 @@ protected Expression canonicalize() {
161200
}
162201

163202
@Override
164-
@SuppressWarnings("checkstyle:EqualsHashCode")// equals is implemented in parent. See innerEquals instead
165-
public int hashCode() {
166-
return Objects.hash(super.hashCode(), qualifier, nullability);
203+
protected int innerHashCode(boolean ignoreIds) {
204+
return Objects.hash(super.innerHashCode(ignoreIds), qualifier, nullability);
167205
}
168206

169207
@Override
170-
protected boolean innerEquals(Object o) {
208+
protected boolean innerEquals(Object o, boolean ignoreIds) {
171209
var other = (Attribute) o;
172-
return super.innerEquals(other) && Objects.equals(qualifier, other.qualifier) && Objects.equals(nullability, other.nullability);
210+
return super.innerEquals(other, ignoreIds)
211+
&& Objects.equals(qualifier, other.qualifier)
212+
&& Objects.equals(nullability, other.nullability);
173213
}
174214

175215
@Override

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/EmptyAttribute.java

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@
1717

1818
/**
1919
* Marker for optional attributes. Acting as a dummy placeholder to avoid using null
20-
* in the tree (which is not allowed).
20+
* in the tree (which is not allowed). All empty attributes are considered equal.
2121
*/
2222
public class EmptyAttribute extends Attribute {
23+
// TODO: Could be a singleton - all instances are already considered equal.
2324
public EmptyAttribute(Source source) {
2425
super(source, StringUtils.EMPTY, null);
2526
}
@@ -78,13 +79,22 @@ protected NodeInfo<? extends Expression> info() {
7879
}
7980

8081
@Override
81-
@SuppressWarnings("checkstyle:EqualsHashCode")// equals is implemented in parent. See innerEquals instead
82-
public int hashCode() {
82+
protected int innerHashCode(boolean ignoreIds) {
8383
return EmptyAttribute.class.hashCode();
8484
}
8585

8686
@Override
87-
protected boolean innerEquals(Object o) {
87+
protected boolean innerEquals(Object o, boolean ignoreIds) {
8888
return true;
8989
}
90+
91+
@Override
92+
public int semanticHash() {
93+
return EmptyAttribute.class.hashCode();
94+
}
95+
96+
@Override
97+
public boolean semanticEquals(Expression other) {
98+
return other instanceof EmptyAttribute;
99+
}
90100
}

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Expression.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,10 +180,26 @@ protected Expression canonicalize() {
180180
return replaceChildrenSameSize(canonicalChildren);
181181
}
182182

183+
/**
184+
* Whether this expression means the same as {@code other}, even if they are not exactly equal.
185+
* For example, {@code a + b} and {@code b + a} are not equal, but they are semantically equal.
186+
* <p>
187+
* If two expressions are equal, they are also semantically equal, but the reverse is generally not true.
188+
* <p>
189+
* Caution! {@link Attribute#semanticEquals(Expression)} is especially lenient, as it considers two attributes
190+
* with the same {@link NameId} to be semantically equal, even if they have different data types or are represented using different
191+
* classes.
192+
* <p>
193+
* But this doesn't extend to expressions containing attributes as children, which is pretty inconsistent.
194+
* We have to revisit this before using {@link #semanticEquals} in more places.
195+
*/
183196
public boolean semanticEquals(Expression other) {
184197
return canonical().equals(other.canonical());
185198
}
186199

200+
/**
201+
* A hash code that is consistent with {@link #semanticEquals}.
202+
*/
187203
public int semanticHash() {
188204
return canonical().hashCode();
189205
}

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -231,15 +231,14 @@ public Attribute withDataType(DataType type) {
231231
}
232232

233233
@Override
234-
@SuppressWarnings("checkstyle:EqualsHashCode")// equals is implemented in parent. See innerEquals instead
235-
public int hashCode() {
236-
return Objects.hash(super.hashCode(), parentName, field);
234+
protected int innerHashCode(boolean ignoreIds) {
235+
return Objects.hash(super.innerHashCode(ignoreIds), parentName, field);
237236
}
238237

239238
@Override
240-
protected boolean innerEquals(Object o) {
239+
protected boolean innerEquals(Object o, boolean ignoreIds) {
241240
var other = (FieldAttribute) o;
242-
return super.innerEquals(other) && Objects.equals(parentName, other.parentName) && Objects.equals(field, other.field);
241+
return super.innerEquals(other, ignoreIds) && Objects.equals(parentName, other.parentName) && Objects.equals(field, other.field);
243242
}
244243

245244
@Override

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -170,14 +170,13 @@ public static boolean isScoreAttribute(Expression a) {
170170
}
171171

172172
@Override
173-
@SuppressWarnings("checkstyle:EqualsHashCode")// equals is implemented in parent. See innerEquals instead
174-
public int hashCode() {
175-
return Objects.hash(super.hashCode(), searchable);
173+
protected int innerHashCode(boolean ignoreIds) {
174+
return Objects.hash(super.innerHashCode(ignoreIds), searchable);
176175
}
177176

178177
@Override
179-
protected boolean innerEquals(Object o) {
178+
protected boolean innerEquals(Object o, boolean ignoreIds) {
180179
var other = (MetadataAttribute) o;
181-
return super.innerEquals(other) && searchable == other.searchable;
180+
return super.innerEquals(other, ignoreIds) && searchable == other.searchable;
182181
}
183182
}

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/NameId.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,15 @@ public NameId() {
3131
this.id = COUNTER.incrementAndGet();
3232
}
3333

34+
/**
35+
* Absolutely only intended for tests, esp. to deal with serialization. Never use in production as it breaks the
36+
* uniqueness guarantee.
37+
*/
38+
@Deprecated
39+
public NameId(long id) {
40+
this.id = id;
41+
}
42+
3443
@Override
3544
public int hashCode() {
3645
return Long.hashCode(id);

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/NamedExpression.java

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ public NamedExpression(Source source, String name, List<Expression> children, @N
2727
this(source, name, children, id, false);
2828
}
2929

30+
/**
31+
* Assigns a new id if null is passed for {@code id}.
32+
*/
3033
public NamedExpression(Source source, String name, List<Expression> children, @Nullable NameId id, boolean synthetic) {
3134
super(source, children);
3235
this.name = name;
@@ -57,36 +60,53 @@ public boolean synthetic() {
5760
public abstract Attribute toAttribute();
5861

5962
@Override
60-
public int hashCode() {
61-
return Objects.hash(super.hashCode(), name, synthetic);
63+
public final int hashCode() {
64+
return hashCode(false);
65+
}
66+
67+
public final int hashCode(boolean ignoreIds) {
68+
return innerHashCode(ignoreIds);
69+
}
70+
71+
protected int innerHashCode(boolean ignoreIds) {
72+
return ignoreIds ? Objects.hash(super.hashCode(), name, synthetic) : Objects.hash(super.hashCode(), id, name, synthetic);
6273
}
6374

64-
/**
65-
* Polymorphic equality is a pain and are likely slower than a regular ones.
66-
* This equals shortcuts `this == o` and type checks (important when we expect only a few non-equal objects).
67-
* Here equals is final to ensure we are not duplicating those checks.
68-
* For actual equality check override `innerEquals` instead.
69-
*/
7075
@Override
7176
public final boolean equals(Object o) {
77+
return equals(o, false);
78+
}
79+
80+
/**
81+
* Polymorphic equality is a pain and can be slow.
82+
* This shortcuts {@code this == o} and class checks (important when we expect only a few non-equal objects).
83+
* <p>
84+
* For the actual equality check override {@link #innerEquals(Object, boolean)} instead.
85+
* <p>
86+
* We also provide the option to ignore NameIds in the equality check, which helps e.g. when creating named expressions
87+
* while avoiding duplicates, or when attaching failures to unresolved attributes (see Failure.equals).
88+
* Some classes will always ignore ids, irrespective of the parameter passed here.
89+
*/
90+
public final boolean equals(Object o, boolean ignoreIds) {
7291
if (this == o) {
7392
return true;
7493
}
7594
if (o == null || getClass() != o.getClass()) {
7695
return false;
7796
}
78-
return innerEquals(o);
97+
return innerEquals(o, ignoreIds);
7998
}
8099

81-
protected boolean innerEquals(Object o) {
100+
/**
101+
* The actual equality check, after shortcutting {@code this == o} and class checks.
102+
*/
103+
protected boolean innerEquals(Object o, boolean ignoreIds) {
82104
var other = (NamedExpression) o;
83-
return synthetic == other.synthetic
84-
/*
85-
* It is important that the line below be `name`
86-
* and not `name()` because subclasses might override
87-
* `name()` in ways that are not compatible with
88-
* equality. Specifically the `Unresolved` subclasses.
89-
*/
105+
return (ignoreIds || Objects.equals(id, other.id)) && synthetic == other.synthetic
106+
// It is important that the line below be `name`
107+
// and not `name()` because subclasses might override
108+
// `name()` in ways that are not compatible with
109+
// equality. Specifically the `Unresolved` subclasses.
90110
&& Objects.equals(name, other.name)
91111
&& Objects.equals(children(), other.children());
92112
}

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/TypedAttribute.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212

1313
import java.util.Objects;
1414

15+
/**
16+
* A fully resolved attribute - we know its type. For example, if it references data directly from Lucene, this will be a
17+
* {@link FieldAttribute}. If it references the results of another calculation it will be {@link ReferenceAttribute}s.
18+
*/
1519
public abstract class TypedAttribute extends Attribute {
1620

1721
private final DataType dataType;
@@ -46,14 +50,13 @@ public DataType dataType() {
4650
}
4751

4852
@Override
49-
@SuppressWarnings("checkstyle:EqualsHashCode")// equals is implemented in parent. See innerEquals instead
50-
public int hashCode() {
51-
return Objects.hash(super.hashCode(), dataType);
53+
protected int innerHashCode(boolean ignoreIds) {
54+
return Objects.hash(super.innerHashCode(ignoreIds), dataType);
5255
}
5356

5457
@Override
55-
protected boolean innerEquals(Object o) {
58+
protected boolean innerEquals(Object o, boolean ignoreIds) {
5659
var other = (TypedAttribute) o;
57-
return super.innerEquals(other) && dataType == other.dataType;
60+
return super.innerEquals(other, ignoreIds) && dataType == other.dataType;
5861
}
5962
}

0 commit comments

Comments
 (0)