Skip to content

Commit 422eb1a

Browse files
authored
Remove bucketOrd field from InternalTerms and friends (#118044)
The field bucketOrd is only used for building the aggregation but has no use after that.
1 parent fdb1b2b commit 422eb1a

File tree

14 files changed

+355
-282
lines changed

14 files changed

+355
-282
lines changed

server/src/main/java/org/elasticsearch/search/aggregations/BucketOrder.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.elasticsearch.common.io.stream.StreamOutput;
1313
import org.elasticsearch.common.io.stream.Writeable;
1414
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation.Bucket;
15+
import org.elasticsearch.search.aggregations.bucket.terms.BucketAndOrd;
1516
import org.elasticsearch.search.aggregations.support.AggregationPath;
1617
import org.elasticsearch.xcontent.ToXContentObject;
1718

@@ -20,13 +21,12 @@
2021
import java.util.Comparator;
2122
import java.util.List;
2223
import java.util.function.BiFunction;
23-
import java.util.function.ToLongFunction;
2424

2525
/**
2626
* {@link Bucket} ordering strategy. Buckets can be order either as
2727
* "complete" buckets using {@link #comparator()} or against a combination
2828
* of the buckets internals with its ordinal with
29-
* {@link #partiallyBuiltBucketComparator(ToLongFunction, Aggregator)}.
29+
* {@link #partiallyBuiltBucketComparator(Aggregator)}.
3030
*/
3131
public abstract class BucketOrder implements ToXContentObject, Writeable {
3232
/**
@@ -102,7 +102,7 @@ public final void validate(Aggregator aggregator) throws AggregationExecutionExc
102102
* to validate this order because doing so checks all of the appropriate
103103
* paths.
104104
*/
105-
partiallyBuiltBucketComparator(null, aggregator);
105+
partiallyBuiltBucketComparator(aggregator);
106106
}
107107

108108
/**
@@ -121,7 +121,7 @@ public final void validate(Aggregator aggregator) throws AggregationExecutionExc
121121
* with it all the time.
122122
* </p>
123123
*/
124-
public abstract <T extends Bucket> Comparator<T> partiallyBuiltBucketComparator(ToLongFunction<T> ordinalReader, Aggregator aggregator);
124+
public abstract <T extends Bucket> Comparator<BucketAndOrd<T>> partiallyBuiltBucketComparator(Aggregator aggregator);
125125

126126
/**
127127
* Build a comparator for fully built buckets.

server/src/main/java/org/elasticsearch/search/aggregations/InternalOrder.java

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.elasticsearch.common.logging.DeprecationLogger;
1616
import org.elasticsearch.search.aggregations.Aggregator.BucketComparator;
1717
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation.Bucket;
18+
import org.elasticsearch.search.aggregations.bucket.terms.BucketAndOrd;
1819
import org.elasticsearch.search.aggregations.support.AggregationPath;
1920
import org.elasticsearch.search.sort.SortOrder;
2021
import org.elasticsearch.search.sort.SortValue;
@@ -30,7 +31,6 @@
3031
import java.util.List;
3132
import java.util.Objects;
3233
import java.util.function.BiFunction;
33-
import java.util.function.ToLongFunction;
3434

3535
/**
3636
* Implementations for {@link Bucket} ordering strategies.
@@ -63,10 +63,10 @@ public AggregationPath path() {
6363
}
6464

6565
@Override
66-
public <T extends Bucket> Comparator<T> partiallyBuiltBucketComparator(ToLongFunction<T> ordinalReader, Aggregator aggregator) {
66+
public <T extends Bucket> Comparator<BucketAndOrd<T>> partiallyBuiltBucketComparator(Aggregator aggregator) {
6767
try {
6868
BucketComparator bucketComparator = path.bucketComparator(aggregator, order);
69-
return (lhs, rhs) -> bucketComparator.compare(ordinalReader.applyAsLong(lhs), ordinalReader.applyAsLong(rhs));
69+
return (lhs, rhs) -> bucketComparator.compare(lhs.ord, rhs.ord);
7070
} catch (IllegalArgumentException e) {
7171
throw new AggregationExecutionException.InvalidPath("Invalid aggregation order path [" + path + "]. " + e.getMessage(), e);
7272
}
@@ -188,12 +188,13 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
188188
}
189189

190190
@Override
191-
public <T extends Bucket> Comparator<T> partiallyBuiltBucketComparator(ToLongFunction<T> ordinalReader, Aggregator aggregator) {
192-
List<Comparator<T>> comparators = orderElements.stream()
193-
.map(oe -> oe.partiallyBuiltBucketComparator(ordinalReader, aggregator))
194-
.toList();
191+
public <T extends Bucket> Comparator<BucketAndOrd<T>> partiallyBuiltBucketComparator(Aggregator aggregator) {
192+
List<Comparator<BucketAndOrd<T>>> comparators = new ArrayList<>(orderElements.size());
193+
for (BucketOrder order : orderElements) {
194+
comparators.add(order.partiallyBuiltBucketComparator(aggregator));
195+
}
195196
return (lhs, rhs) -> {
196-
for (Comparator<T> c : comparators) {
197+
for (Comparator<BucketAndOrd<T>> c : comparators) {
197198
int result = c.compare(lhs, rhs);
198199
if (result != 0) {
199200
return result;
@@ -299,9 +300,9 @@ byte id() {
299300
}
300301

301302
@Override
302-
public <T extends Bucket> Comparator<T> partiallyBuiltBucketComparator(ToLongFunction<T> ordinalReader, Aggregator aggregator) {
303+
public <T extends Bucket> Comparator<BucketAndOrd<T>> partiallyBuiltBucketComparator(Aggregator aggregator) {
303304
Comparator<Bucket> comparator = comparator();
304-
return comparator::compare;
305+
return (lhs, rhs) -> comparator.compare(lhs.bucket, rhs.bucket);
305306
}
306307

307308
@Override

server/src/main/java/org/elasticsearch/search/aggregations/bucket/countedterms/CountedTermsAggregator.java

Lines changed: 48 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import org.apache.lucene.index.SortedDocValues;
1414
import org.apache.lucene.index.SortedSetDocValues;
1515
import org.apache.lucene.util.BytesRef;
16+
import org.elasticsearch.common.util.IntArray;
1617
import org.elasticsearch.common.util.LongArray;
1718
import org.elasticsearch.common.util.ObjectArray;
1819
import org.elasticsearch.core.Releasables;
@@ -26,6 +27,7 @@
2627
import org.elasticsearch.search.aggregations.InternalOrder;
2728
import org.elasticsearch.search.aggregations.LeafBucketCollector;
2829
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
30+
import org.elasticsearch.search.aggregations.bucket.terms.BucketAndOrd;
2931
import org.elasticsearch.search.aggregations.bucket.terms.BucketPriorityQueue;
3032
import org.elasticsearch.search.aggregations.bucket.terms.BytesKeyedBucketOrds;
3133
import org.elasticsearch.search.aggregations.bucket.terms.InternalTerms;
@@ -38,7 +40,6 @@
3840
import java.util.Arrays;
3941
import java.util.Map;
4042
import java.util.function.BiConsumer;
41-
import java.util.function.Supplier;
4243

4344
import static java.util.Collections.emptyList;
4445
import static org.elasticsearch.search.aggregations.InternalOrder.isKeyOrder;
@@ -115,51 +116,57 @@ public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throw
115116
LongArray otherDocCounts = bigArrays().newLongArray(owningBucketOrds.size());
116117
ObjectArray<StringTerms.Bucket[]> topBucketsPerOrd = bigArrays().newObjectArray(owningBucketOrds.size())
117118
) {
118-
for (long ordIdx = 0; ordIdx < topBucketsPerOrd.size(); ordIdx++) {
119-
int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
120-
121-
// as users can't control sort order, in practice we'll always sort by doc count descending
122-
try (
123-
BucketPriorityQueue<StringTerms.Bucket> ordered = new BucketPriorityQueue<>(
124-
size,
125-
bigArrays(),
126-
partiallyBuiltBucketComparator
127-
)
128-
) {
129-
StringTerms.Bucket spare = null;
130-
BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds.get(ordIdx));
131-
Supplier<StringTerms.Bucket> emptyBucketBuilder = () -> new StringTerms.Bucket(
132-
new BytesRef(),
133-
0,
134-
null,
135-
false,
136-
0,
137-
format
138-
);
139-
while (ordsEnum.next()) {
140-
long docCount = bucketDocCount(ordsEnum.ord());
141-
otherDocCounts.increment(ordIdx, docCount);
142-
if (spare == null) {
143-
checkRealMemoryCBForInternalBucket();
144-
spare = emptyBucketBuilder.get();
119+
try (IntArray bucketsToCollect = bigArrays().newIntArray(owningBucketOrds.size())) {
120+
// find how many buckets we are going to collect
121+
long ordsToCollect = 0;
122+
for (long ordIdx = 0; ordIdx < owningBucketOrds.size(); ordIdx++) {
123+
int size = (int) Math.min(bucketOrds.bucketsInOrd(owningBucketOrds.get(ordIdx)), bucketCountThresholds.getShardSize());
124+
bucketsToCollect.set(ordIdx, size);
125+
ordsToCollect += size;
126+
}
127+
try (LongArray ordsArray = bigArrays().newLongArray(ordsToCollect)) {
128+
long ordsCollected = 0;
129+
for (long ordIdx = 0; ordIdx < owningBucketOrds.size(); ordIdx++) {
130+
// as users can't control sort order, in practice we'll always sort by doc count descending
131+
try (
132+
BucketPriorityQueue<StringTerms.Bucket> ordered = new BucketPriorityQueue<>(
133+
bucketsToCollect.get(ordIdx),
134+
bigArrays(),
135+
order.partiallyBuiltBucketComparator(this)
136+
)
137+
) {
138+
BucketAndOrd<StringTerms.Bucket> spare = null;
139+
BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds.get(ordIdx));
140+
while (ordsEnum.next()) {
141+
long docCount = bucketDocCount(ordsEnum.ord());
142+
otherDocCounts.increment(ordIdx, docCount);
143+
if (spare == null) {
144+
checkRealMemoryCBForInternalBucket();
145+
spare = new BucketAndOrd<>(new StringTerms.Bucket(new BytesRef(), 0, null, false, 0, format));
146+
}
147+
ordsEnum.readValue(spare.bucket.getTermBytes());
148+
spare.bucket.setDocCount(docCount);
149+
spare.ord = ordsEnum.ord();
150+
spare = ordered.insertWithOverflow(spare);
151+
}
152+
final int orderedSize = (int) ordered.size();
153+
final StringTerms.Bucket[] buckets = new StringTerms.Bucket[orderedSize];
154+
for (int i = orderedSize - 1; i >= 0; --i) {
155+
BucketAndOrd<StringTerms.Bucket> bucketAndOrd = ordered.pop();
156+
buckets[i] = bucketAndOrd.bucket;
157+
ordsArray.set(ordsCollected + i, bucketAndOrd.ord);
158+
otherDocCounts.increment(ordIdx, -bucketAndOrd.bucket.getDocCount());
159+
bucketAndOrd.bucket.setTermBytes(BytesRef.deepCopyOf(bucketAndOrd.bucket.getTermBytes()));
160+
}
161+
topBucketsPerOrd.set(ordIdx, buckets);
162+
ordsCollected += orderedSize;
145163
}
146-
ordsEnum.readValue(spare.getTermBytes());
147-
spare.setDocCount(docCount);
148-
spare.setBucketOrd(ordsEnum.ord());
149-
spare = ordered.insertWithOverflow(spare);
150-
}
151-
152-
topBucketsPerOrd.set(ordIdx, new StringTerms.Bucket[(int) ordered.size()]);
153-
for (int i = (int) ordered.size() - 1; i >= 0; --i) {
154-
topBucketsPerOrd.get(ordIdx)[i] = ordered.pop();
155-
otherDocCounts.increment(ordIdx, -topBucketsPerOrd.get(ordIdx)[i].getDocCount());
156-
topBucketsPerOrd.get(ordIdx)[i].setTermBytes(BytesRef.deepCopyOf(topBucketsPerOrd.get(ordIdx)[i].getTermBytes()));
157164
}
165+
assert ordsCollected == ordsArray.size();
166+
buildSubAggsForAllBuckets(topBucketsPerOrd, ordsArray, InternalTerms.Bucket::setAggregations);
158167
}
159168
}
160169

161-
buildSubAggsForAllBuckets(topBucketsPerOrd, InternalTerms.Bucket::getBucketOrd, InternalTerms.Bucket::setAggregations);
162-
163170
return buildAggregations(Math.toIntExact(owningBucketOrds.size()), ordIdx -> {
164171
final BucketOrder reduceOrder;
165172
if (isKeyOrder(order) == false) {

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/BucketPriorityQueue.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,17 @@
1313

1414
import java.util.Comparator;
1515

16-
public class BucketPriorityQueue<B> extends ObjectArrayPriorityQueue<B> {
16+
public class BucketPriorityQueue<B> extends ObjectArrayPriorityQueue<BucketAndOrd<B>> {
1717

18-
private final Comparator<? super B> comparator;
18+
private final Comparator<BucketAndOrd<B>> comparator;
1919

20-
public BucketPriorityQueue(int size, BigArrays bigArrays, Comparator<? super B> comparator) {
20+
public BucketPriorityQueue(int size, BigArrays bigArrays, Comparator<BucketAndOrd<B>> comparator) {
2121
super(size, bigArrays);
2222
this.comparator = comparator;
2323
}
2424

2525
@Override
26-
protected boolean lessThan(B a, B b) {
26+
protected boolean lessThan(BucketAndOrd<B> a, BucketAndOrd<B> b) {
2727
return comparator.compare(a, b) > 0; // reverse, since we reverse again when adding to a list
2828
}
2929
}

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/BucketSignificancePriorityQueue.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@
1212
import org.elasticsearch.common.util.BigArrays;
1313
import org.elasticsearch.common.util.ObjectArrayPriorityQueue;
1414

15-
public class BucketSignificancePriorityQueue<B extends SignificantTerms.Bucket> extends ObjectArrayPriorityQueue<B> {
15+
public class BucketSignificancePriorityQueue<B extends SignificantTerms.Bucket> extends ObjectArrayPriorityQueue<BucketAndOrd<B>> {
1616

1717
public BucketSignificancePriorityQueue(int size, BigArrays bigArrays) {
1818
super(size, bigArrays);
1919
}
2020

2121
@Override
22-
protected boolean lessThan(SignificantTerms.Bucket o1, SignificantTerms.Bucket o2) {
23-
return o1.getSignificanceScore() < o2.getSignificanceScore();
22+
protected boolean lessThan(BucketAndOrd<B> o1, BucketAndOrd<B> o2) {
23+
return o1.bucket.getSignificanceScore() < o2.bucket.getSignificanceScore();
2424
}
2525
}

0 commit comments

Comments
 (0)