Skip to content

Commit 76291a6

Browse files
committed
Renamed topN set and added tests
1 parent 40b7682 commit 76291a6

File tree

4 files changed

+285
-9
lines changed

4 files changed

+285
-9
lines changed

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/LongTopNBlockHash.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import org.elasticsearch.compute.data.LongBlock;
2020
import org.elasticsearch.compute.data.LongVector;
2121
import org.elasticsearch.compute.data.Page;
22-
import org.elasticsearch.compute.data.sort.LongTopNUniqueSort;
22+
import org.elasticsearch.compute.data.sort.LongTopNSet;
2323
import org.elasticsearch.compute.operator.mvdedupe.MultivalueDedupe;
2424
import org.elasticsearch.compute.operator.mvdedupe.TopNMultivalueDedupeLong;
2525
import org.elasticsearch.core.ReleasableIterator;
@@ -37,7 +37,7 @@ final class LongTopNBlockHash extends BlockHash {
3737
private final boolean nullsFirst;
3838
private final int limit;
3939
private final LongHash hash;
40-
private final LongTopNUniqueSort topValues;
40+
private final LongTopNSet topValues;
4141

4242
/**
4343
* Have we seen any {@code null} values?
@@ -59,7 +59,7 @@ final class LongTopNBlockHash extends BlockHash {
5959
boolean success = false;
6060
try {
6161
this.hash = new LongHash(1, blockFactory.bigArrays());
62-
this.topValues = new LongTopNUniqueSort(blockFactory.bigArrays(), asc ? SortOrder.ASC : SortOrder.DESC, limit);
62+
this.topValues = new LongTopNSet(blockFactory.bigArrays(), asc ? SortOrder.ASC : SortOrder.DESC, limit);
6363
} finally {
6464
if (success == false) {
6565
close();
Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@
1212
import org.elasticsearch.common.util.LongArray;
1313
import org.elasticsearch.core.Releasable;
1414
import org.elasticsearch.core.Releasables;
15-
import org.elasticsearch.search.sort.BucketedSort;
1615
import org.elasticsearch.search.sort.SortOrder;
1716

1817
/**
19-
* Aggregates the top N long values per bucket.
20-
* See {@link BucketedSort} for more information.
21-
* This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
18+
* Aggregates the top N collected values, and keeps them sorted.
19+
* <p>
20+
* Collection is O(1) for values out of the current top N. For values better than the worst value, it's O(log(n)).
21+
* </p>
2222
*/
23-
public class LongTopNUniqueSort implements Releasable {
23+
public class LongTopNSet implements Releasable {
2424

2525
private final SortOrder order;
2626
private int limit;
@@ -30,14 +30,17 @@ public class LongTopNUniqueSort implements Releasable {
3030

3131
private int count;
3232

33-
public LongTopNUniqueSort(BigArrays bigArrays, SortOrder order, int limit) {
33+
public LongTopNSet(BigArrays bigArrays, SortOrder order, int limit) {
3434
this.order = order;
3535
this.limit = limit;
3636
this.count = 0;
3737
this.values = bigArrays.newLongArray(limit, false);
3838
this.searcher = new LongBinarySearcher(values, order);
3939
}
4040

41+
/**
42+
* Adds the value to the top N, as long as it is "better" than the worst value, or the top isn't full yet.
43+
*/
4144
public boolean collect(long value) {
4245
if (limit == 0) {
4346
return false;
@@ -92,6 +95,12 @@ public void reduceLimitByOne() {
9295
count = Math.min(count, limit);
9396
}
9497

98+
/**
99+
* Returns the worst value in the top.
100+
* <p>
101+
* The worst is the greatest value for {@link SortOrder#ASC}, and the lowest value for {@link SortOrder#DESC}.
102+
* </p>
103+
*/
95104
public long getWorstValue() {
96105
assert count > 0;
97106
return values.get(count - 1);
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.compute.data.sort;
9+
10+
import org.elasticsearch.common.util.BigArrays;
11+
import org.elasticsearch.search.sort.SortOrder;
12+
13+
import java.util.List;
14+
15+
public class LongTopNSetTests extends TopNSetTestCase<LongTopNSet, Long> {
16+
17+
@Override
18+
protected LongTopNSet build(BigArrays bigArrays, SortOrder sortOrder, int limit) {
19+
return new LongTopNSet(bigArrays, sortOrder, limit);
20+
}
21+
22+
@Override
23+
protected Long randomValue() {
24+
return randomLong();
25+
}
26+
27+
@Override
28+
protected List<Long> threeSortedValues() {
29+
return List.of(Long.MIN_VALUE, randomLong(), Long.MAX_VALUE);
30+
}
31+
32+
@Override
33+
protected void collect(LongTopNSet sort, Long value) {
34+
sort.collect(value);
35+
}
36+
37+
@Override
38+
protected void reduceLimitByOne(LongTopNSet sort) {
39+
sort.reduceLimitByOne();
40+
}
41+
42+
@Override
43+
protected Long getWorstValue(LongTopNSet sort) {
44+
return sort.getWorstValue();
45+
}
46+
47+
@Override
48+
protected int getCount(LongTopNSet sort) {
49+
return sort.getCount();
50+
}
51+
52+
}
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.compute.data.sort;
9+
10+
import org.elasticsearch.common.breaker.CircuitBreaker;
11+
import org.elasticsearch.common.breaker.CircuitBreakingException;
12+
import org.elasticsearch.common.settings.Settings;
13+
import org.elasticsearch.common.util.BigArrays;
14+
import org.elasticsearch.common.util.MockBigArrays;
15+
import org.elasticsearch.common.util.MockPageCacheRecycler;
16+
import org.elasticsearch.common.util.PageCacheRecycler;
17+
import org.elasticsearch.core.Releasable;
18+
import org.elasticsearch.indices.CrankyCircuitBreakerService;
19+
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
20+
import org.elasticsearch.search.sort.SortOrder;
21+
import org.elasticsearch.test.ESTestCase;
22+
23+
import java.util.ArrayList;
24+
import java.util.Comparator;
25+
import java.util.List;
26+
27+
import static org.hamcrest.Matchers.equalTo;
28+
29+
public abstract class TopNSetTestCase<T extends Releasable, V extends Comparable<V>> extends ESTestCase {
30+
/**
31+
* Build a {@link T} to test. Sorts built by this method shouldn't need scores.
32+
*/
33+
protected abstract T build(BigArrays bigArrays, SortOrder sortOrder, int limit);
34+
35+
private T build(SortOrder sortOrder, int limit) {
36+
return build(bigArrays(), sortOrder, limit);
37+
}
38+
39+
/**
40+
* A random value for testing, with the appropriate precision for the type we're testing.
41+
*/
42+
protected abstract V randomValue();
43+
44+
/**
45+
* Returns a list of 3 values, in ascending order.
46+
*/
47+
protected abstract List<V> threeSortedValues();
48+
49+
/**
50+
* Collect a value into the top.
51+
*
52+
* @param value value to collect, always sent as double just to have
53+
* a number to test. Subclasses should cast to their favorite types
54+
*/
55+
protected abstract void collect(T sort, V value);
56+
57+
protected abstract void reduceLimitByOne(T sort);
58+
59+
protected abstract V getWorstValue(T sort);
60+
61+
protected abstract int getCount(T sort);
62+
63+
public final void testNeverCalled() {
64+
SortOrder sortOrder = randomFrom(SortOrder.values());
65+
int limit = randomIntBetween(0, 10);
66+
try (T sort = build(sortOrder, limit)) {
67+
assertResults(sort, sortOrder, limit, List.of());
68+
}
69+
}
70+
71+
public final void testLimit0() {
72+
SortOrder sortOrder = randomFrom(SortOrder.values());
73+
int limit = 0;
74+
try (T sort = build(sortOrder, limit)) {
75+
var values = threeSortedValues();
76+
77+
collect(sort, values.get(0));
78+
collect(sort, values.get(1));
79+
80+
assertResults(sort, sortOrder, limit, List.of());
81+
}
82+
}
83+
84+
public final void testSingleValue() {
85+
SortOrder sortOrder = randomFrom(SortOrder.values());
86+
int limit = 1;
87+
try (T sort = build(sortOrder, limit)) {
88+
var values = threeSortedValues();
89+
90+
collect(sort, values.get(0));
91+
92+
assertResults(sort, sortOrder, limit, List.of(values.get(0)));
93+
}
94+
}
95+
96+
public final void testNonCompetitive() {
97+
SortOrder sortOrder = SortOrder.DESC;
98+
int limit = 1;
99+
try (T sort = build(sortOrder, limit)) {
100+
var values = threeSortedValues();
101+
102+
collect(sort, values.get(1));
103+
collect(sort, values.get(0));
104+
105+
assertResults(sort, sortOrder, limit, List.of(values.get(1)));
106+
}
107+
}
108+
109+
public final void testCompetitive() {
110+
SortOrder sortOrder = SortOrder.DESC;
111+
int limit = 1;
112+
try (T sort = build(sortOrder, limit)) {
113+
var values = threeSortedValues();
114+
115+
collect(sort, values.get(0));
116+
collect(sort, values.get(1));
117+
118+
assertResults(sort, sortOrder, limit, List.of(values.get(1)));
119+
}
120+
}
121+
122+
public final void testTwoHitsDesc() {
123+
SortOrder sortOrder = SortOrder.DESC;
124+
int limit = 2;
125+
try (T sort = build(sortOrder, limit)) {
126+
var values = threeSortedValues();
127+
128+
collect(sort, values.get(0));
129+
collect(sort, values.get(1));
130+
collect(sort, values.get(2));
131+
132+
assertResults(sort, sortOrder, limit, List.of(values.get(2), values.get(1)));
133+
}
134+
}
135+
136+
public final void testTwoHitsAsc() {
137+
SortOrder sortOrder = SortOrder.ASC;
138+
int limit = 2;
139+
try (T sort = build(sortOrder, limit)) {
140+
var values = threeSortedValues();
141+
142+
collect(sort, values.get(0));
143+
collect(sort, values.get(1));
144+
collect(sort, values.get(2));
145+
146+
assertResults(sort, sortOrder, limit, List.of(values.get(0), values.get(1)));
147+
}
148+
}
149+
150+
public final void testReduceLimit() {
151+
SortOrder sortOrder = randomFrom(SortOrder.values());
152+
int limit = 3;
153+
try (T sort = build(sortOrder, limit)) {
154+
var values = threeSortedValues();
155+
156+
collect(sort, values.get(0));
157+
collect(sort, values.get(1));
158+
collect(sort, values.get(2));
159+
160+
assertResults(sort, sortOrder, limit, values);
161+
162+
reduceLimitByOne(sort);
163+
collect(sort, values.get(2));
164+
165+
assertResults(sort, sortOrder, limit - 1, values);
166+
}
167+
}
168+
169+
public final void testCrankyBreaker() {
170+
BigArrays bigArrays = new MockBigArrays(PageCacheRecycler.NON_RECYCLING_INSTANCE, new CrankyCircuitBreakerService());
171+
SortOrder sortOrder = randomFrom(SortOrder.values());
172+
int limit = randomIntBetween(0, 3);
173+
174+
try (T sort = build(bigArrays, sortOrder, limit)) {
175+
List<V> values = new ArrayList<>();
176+
177+
for (int i = 0; i < randomIntBetween(0, 4); i++) {
178+
V value = randomValue();
179+
values.add(value);
180+
collect(sort, value);
181+
}
182+
183+
if (randomBoolean() && limit > 0) {
184+
reduceLimitByOne(sort);
185+
limit--;
186+
187+
V value = randomValue();
188+
values.add(value);
189+
collect(sort, value);
190+
}
191+
192+
assertResults(sort, sortOrder, limit - 1, values);
193+
} catch (CircuitBreakingException e) {
194+
assertThat(e.getMessage(), equalTo(CrankyCircuitBreakerService.ERROR_MESSAGE));
195+
}
196+
assertThat(bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST).getUsed(), equalTo(0L));
197+
}
198+
199+
protected void assertResults(T sort, SortOrder sortOrder, int limit, List<V> values) {
200+
var sortedUniqueValues = values.stream()
201+
.distinct()
202+
.sorted(sortOrder == SortOrder.ASC ? Comparator.naturalOrder() : Comparator.reverseOrder())
203+
.limit(limit)
204+
.toList();
205+
206+
assertEquals(sortedUniqueValues.size(), getCount(sort));
207+
if (sortedUniqueValues.isEmpty() == false) {
208+
assertEquals(sortedUniqueValues.getLast(), getWorstValue(sort));
209+
}
210+
}
211+
212+
private BigArrays bigArrays() {
213+
return new MockBigArrays(new MockPageCacheRecycler(Settings.EMPTY), new NoneCircuitBreakerService());
214+
}
215+
}

0 commit comments

Comments
 (0)