Skip to content

Commit c778b3b

Browse files
committed
Keep the last value in the top while using TreeSet, for performance
1 parent 13900f6 commit c778b3b

File tree

1 file changed

+38
-7
lines changed
  • x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash

1 file changed

+38
-7
lines changed

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/LongTopNBlockHash.java

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,13 @@ public final class LongTopNBlockHash extends BlockHash {
3838
private final int limit;
3939
private final LongHash hash;
4040
private final TreeSet<Long> topValues;
41+
/**
42+
* Helper field to keep track of the last top value.
43+
* <p>
44+
* Used temporarily as TreeSet#last() is O(log(n)) and would slow every insertion.
45+
* </p>
46+
*/
47+
private long lastTopValue;
4148

4249
/**
4350
* Have we seen any {@code null} values?
@@ -57,6 +64,7 @@ public LongTopNBlockHash(int channel, boolean asc, boolean nullsFirst, int limit
5764
this.limit = limit;
5865
this.hash = new LongHash(1, blockFactory.bigArrays());
5966
this.topValues = new TreeSet<>(asc ? Comparator.<Long>naturalOrder() : Comparator.<Long>reverseOrder());
67+
this.lastTopValue = asc ? Long.MAX_VALUE : Long.MIN_VALUE;
6068

6169
assert limit > 0 : "LongTopNBlockHash requires a limit greater than 0";
6270
}
@@ -98,6 +106,11 @@ private boolean acceptNull() {
98106
hasNull = true;
99107
if (topValues.size() == limit) {
100108
topValues.remove(topValues.last());
109+
if (topValues.isEmpty()) {
110+
lastTopValue = asc ? Long.MAX_VALUE : Long.MIN_VALUE;
111+
} else {
112+
lastTopValue = topValues.last();
113+
}
101114
}
102115
return true;
103116
}
@@ -118,19 +131,37 @@ private boolean acceptValue(long value) {
118131
return false;
119132
}
120133

121-
topValues.add(value);
134+
if (hash.find(value) >= 0) {
135+
// Already in the hash, no need to add it again
136+
return true;
137+
}
122138

123-
if (topValues.size() > limit - (hasNull ? 1 : 0)) {
124-
if (hasNull && nullsFirst == false) {
125-
hasNull = false;
126-
} else {
127-
topValues.remove(topValues.last());
139+
if (topValues.add(value)) {
140+
if (isBetterThan(lastTopValue, value) || topValues.size() == 1) {
141+
lastTopValue = value;
142+
}
143+
144+
if (topValues.size() > limit - (hasNull ? 1 : 0)) {
145+
if (hasNull && nullsFirst == false) {
146+
hasNull = false;
147+
} else {
148+
topValues.remove(topValues.last());
149+
if (topValues.isEmpty()) {
150+
lastTopValue = asc ? Long.MAX_VALUE : Long.MIN_VALUE;
151+
} else {
152+
lastTopValue = topValues.last();
153+
}
154+
}
128155
}
129156
}
130157

131158
return true;
132159
}
133160

161+
private boolean isBetterThan(long value, long other) {
162+
return asc ? value < other : value > other;
163+
}
164+
134165
/**
135166
* Returns true if the value is in, or can be added to the top; false otherwise.
136167
*/
@@ -145,7 +176,7 @@ private boolean isAcceptable(long value) {
145176
* </p>
146177
*/
147178
private boolean isInTop(long value) {
148-
return asc ? value <= topValues.last() : value >= topValues.last();
179+
return asc ? value <= lastTopValue : value >= lastTopValue;
149180
}
150181

151182
/**

0 commit comments

Comments
 (0)