Skip to content

Commit 854f882

Browse files
authored
Leveraging segment-global ordinal mapping for efficient terms aggrega… (opensearch-project#20683)
Signed-off-by: Ankit Jain <jainankitk@apache.org>
1 parent 7dfce8d commit 854f882

File tree

2 files changed

+18
-25
lines changed

2 files changed

+18
-25
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
3131
- Service does not start on Windows with OpenJDK ([#20615](https://github.com/opensearch-project/OpenSearch/pull/20615))
3232
- Update RemoteClusterStateCleanupManager to performed batched deletions of stale ClusterMetadataManifests and address deletion timeout issues ([#20566](https://github.com/opensearch-project/OpenSearch/pull/20566))
3333
- Fix the regression of terms agg optimization at high cardinality ([#20623](https://github.com/opensearch-project/OpenSearch/pull/20623))
34+
- Leveraging segment-global ordinal mapping for efficient terms aggregation ([#20624](https://github.com/opensearch-project/OpenSearch/pull/20624))
3435
- Support Docker distribution builds for ppc64le, arm64 and s390x ([#20678](https://github.com/opensearch-project/OpenSearch/pull/20678))
3536
- Harden detection of HTTP/3 support by ensuring Quic native libraries are available for the target platform ([#20680](https://github.com/opensearch-project/OpenSearch/pull/20680))
3637
- Fallback to netty client if AWS Crt client is not available on the target platform / architecture ([#20698](https://github.com/opensearch-project/OpenSearch/pull/20698))

server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -207,28 +207,21 @@ boolean tryCollectFromTermFrequencies(LeafReaderContext ctx, BiConsumer<Long, In
207207
return false;
208208
}
209209

210-
TermsEnum indexTermsEnum = segmentTerms.iterator();
211-
BytesRef indexTerm = indexTermsEnum.next();
212-
final SortedSetDocValues globalOrds = this.getGlobalOrds(ctx);
213-
TermsEnum globalOrdinalTermsEnum = globalOrds.termsEnum();
214-
BytesRef ordinalTerm = globalOrdinalTermsEnum.next();
215-
216-
// Iterate over the terms in the segment, look for matches in the global ordinal terms,
217-
// and increment bucket count when segment terms match global ordinal terms.
218-
while (indexTerm != null && ordinalTerm != null) {
219-
int compare = indexTerm.compareTo(ordinalTerm);
220-
if (compare == 0) {
221-
if (acceptedGlobalOrdinals.test(globalOrdinalTermsEnum.ord())) {
222-
ordCountConsumer.accept(globalOrdinalTermsEnum.ord(), indexTermsEnum.docFreq());
223-
}
224-
indexTerm = indexTermsEnum.next();
225-
ordinalTerm = globalOrdinalTermsEnum.next();
226-
} else if (compare < 0) {
227-
indexTerm = indexTermsEnum.next();
228-
} else {
229-
ordinalTerm = globalOrdinalTermsEnum.next();
210+
final TermsEnum segmentTermsEnum = segmentTerms.iterator();
211+
final LongUnaryOperator globalOrdsMapping = valuesSource.globalOrdinalsMapping(ctx);
212+
213+
// Iterate over the ordinals in the segment, look for matches in the global ordinal,
214+
// and increment bucket count when segment ordinal is contained in global ordinals.
215+
for (long segmentOrd = 0; segmentOrd < termCount; segmentOrd++) {
216+
segmentTermsEnum.next();
217+
long globalOrd = globalOrdsMapping.applyAsLong(segmentOrd);
218+
if (acceptedGlobalOrdinals.test(globalOrd)) {
219+
ordCountConsumer.accept(globalOrd, segmentTermsEnum.docFreq());
230220
}
231221
}
222+
223+
// Segment terms enum should be fully exhausted after iterating over all the ordinals
224+
assert segmentTermsEnum.next() == null : "Segment terms enum must be fully exhausted";
232225
return true;
233226
}
234227

@@ -565,11 +558,10 @@ static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
565558
@Override
566559
protected boolean tryPrecomputeAggregationForLeaf(LeafReaderContext ctx) throws IOException {
567560
if (subAggregators.length == 0) {
568-
if (mapping != null) {
569-
mapSegmentCountsToGlobalCounts(mapping);
570-
}
571-
mapping = valuesSource.globalOrdinalsMapping(ctx);
572-
return tryCollectFromTermFrequencies(ctx, (ord, docCount) -> incrementBucketDocCount(mapping.applyAsLong(ord), docCount));
561+
return tryCollectFromTermFrequencies(
562+
ctx,
563+
(globalOrd, docCount) -> incrementBucketDocCount(collectionStrategy.globalOrdToBucketOrd(0, globalOrd), docCount)
564+
);
573565
}
574566
return tryStarTreePrecompute(ctx);
575567
}

0 commit comments

Comments
 (0)