Skip to content
Merged
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
99d1db6
WIP: pluggable competitiveDISIBuilder and secondary sort-based iterator
romseygeek Oct 29, 2025
45882c8
wip
romseygeek Oct 29, 2025
f72677c
Force use of competitive comparators
romseygeek Oct 30, 2025
4e704f1
tests
romseygeek Oct 30, 2025
ecea998
Add competitive sort tests to MapperTestCase
romseygeek Oct 31, 2025
842d14c
iter
romseygeek Oct 31, 2025
3e1c2d2
Merge branch 'sort/base-fieldmapper-sort-tests' into bug/sort-still-slow
romseygeek Oct 31, 2025
b04481e
Add competitive iterator check for logsdb-style timestamp field
romseygeek Oct 31, 2025
63b9329
Merge remote-tracking branch 'origin/main' into bug/sort-still-slow
romseygeek Nov 3, 2025
2fd86b5
NumericComparator: immediately check whether a segment is comparative…
martijnvg Nov 3, 2025
f9f8191
Add indexSort method to IndexFieldData
romseygeek Nov 3, 2025
08cea96
Merge remote-tracking branch 'romseygeek/bug/sort-still-slow' into bu…
romseygeek Nov 3, 2025
1eb03cd
Merge remote-tracking branch 'origin/main' into bug/sort-still-slow
romseygeek Nov 3, 2025
8603d58
Update docs/changelog/137533.yaml
romseygeek Nov 3, 2025
0595fff
[CI] Auto commit changes from spotless
Nov 3, 2025
3a933b4
spotless
romseygeek Nov 3, 2025
1548a2f
Merge remote-tracking branch 'romseygeek/bug/sort-still-slow' into bu…
romseygeek Nov 3, 2025
79bf9b2
tests
romseygeek Nov 4, 2025
32201c0
Merge remote-tracking branch 'origin/main' into bug/sort-still-slow
romseygeek Nov 4, 2025
c3b270c
[CI] Auto commit changes from spotless
Nov 4, 2025
dd1f994
need to add iterators back in for pruning - follow up
romseygeek Nov 4, 2025
0b79c60
Merge remote-tracking branch 'romseygeek/bug/sort-still-slow' into bu…
romseygeek Nov 4, 2025
4fa6f8e
dont' advance past maxdoc
romseygeek Nov 4, 2025
1249286
compilation
romseygeek Nov 4, 2025
0e0e392
added logic from lucene pr
martijnvg Nov 5, 2025
cb9a7f5
fencepost
romseygeek Nov 5, 2025
e32f535
Merge remote-tracking branch 'romseygeek/bug/sort-still-slow' into bu…
romseygeek Nov 5, 2025
d0a2218
only run accelerator on dense fields
romseygeek Nov 5, 2025
e603627
Merge remote-tracking branch 'origin/main' into bug/sort-still-slow
romseygeek Nov 6, 2025
2dc921e
Revert subclassing nonsense
romseygeek Nov 6, 2025
25dbeb4
Add unit test for SSI
romseygeek Nov 6, 2025
fe98352
Merge branch 'main' into bug/sort-still-slow
romseygeek Nov 7, 2025
8c495a5
Merge branch 'main' into bug/sort-still-slow
romseygeek Nov 7, 2025
7485e03
Merge remote-tracking branch 'origin/main' into bug/sort-still-slow
romseygeek Nov 7, 2025
3b6d308
conflicts
romseygeek Nov 7, 2025
8460a14
Merge remote-tracking branch 'romseygeek/bug/sort-still-slow' into bu…
romseygeek Nov 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/137533.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 137533
summary: Speed up sorts on secondary sort fields
area: Search
type: enhancement
issues: []
11 changes: 2 additions & 9 deletions server/src/main/java/org/elasticsearch/common/lucene/Lucene.java
Original file line number Diff line number Diff line change
Expand Up @@ -634,12 +634,8 @@ private static SortField rewriteMergeSortField(SortField sortField) {
SortField newSortField = new SortField(sortField.getField(), SortField.Type.STRING, sortField.getReverse());
newSortField.setMissingValue(sortField.getMissingValue());
return newSortField;
} else if (sortField.getClass() == SortedNumericSortField.class) {
SortField newSortField = new SortField(
sortField.getField(),
((SortedNumericSortField) sortField).getNumericType(),
sortField.getReverse()
);
} else if (sortField instanceof SortedNumericSortField snsf) {
SortField newSortField = new SortField(sortField.getField(), snsf.getNumericType(), sortField.getReverse());
newSortField.setMissingValue(sortField.getMissingValue());
return newSortField;
} else if (sortField.getClass() == ShardDocSortField.class) {
Expand All @@ -651,9 +647,6 @@ private static SortField rewriteMergeSortField(SortField sortField) {

static void writeSortField(StreamOutput out, SortField sortField) throws IOException {
sortField = rewriteMergeSortField(sortField);
if (sortField.getClass() != SortField.class) {
throw new IllegalArgumentException("Cannot serialize SortField impl [" + sortField + "]");
}
out.writeOptionalString(sortField.getField());
if (sortField.getComparatorSource() != null) {
IndexFieldData.XFieldComparatorSource comparatorSource = (IndexFieldData.XFieldComparatorSource) sortField
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ public FieldComparator<?> newComparator(String fieldname, int numHits, Pruning e
final double dMissingValue = (Double) missingObject(missingValue, reversed);
// NOTE: it's important to pass null as a missing value in the constructor so that
// the comparator doesn't check docsWithField since we replace missing values in select()
return new DoubleComparator(numHits, null, null, reversed, Pruning.NONE) {
// TODO we can re-enable pruning here if we allow NumericDoubleValues to expose an iterator
return new DoubleComparator(numHits, fieldname, null, reversed, Pruning.NONE) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new DoubleLeafComparator(context) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ public FieldComparator<?> newComparator(String fieldname, int numHits, Pruning e
final float fMissingValue = (Float) missingObject(missingValue, reversed);
// NOTE: it's important to pass null as a missing value in the constructor so that
// the comparator doesn't check docsWithField since we replace missing values in select()
return new FloatComparator(numHits, null, null, reversed, Pruning.NONE) {
// TODO we can re-enable pruning here if we allow NumericDoubleValues to expose an iterator
return new FloatComparator(numHits, fieldname, null, reversed, Pruning.NONE) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new FloatLeafComparator(context) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ public FieldComparator<?> newComparator(String fieldname, int numHits, Pruning e
final float fMissingValue = (Float) missingObject(missingValue, reversed);
// NOTE: it's important to pass null as a missing value in the constructor so that
// the comparator doesn't check docsWithField since we replace missing values in select()
return new HalfFloatComparator(numHits, fieldname, null, reversed, enableSkipping) {
// TODO we can re-enable pruning here if we allow NumericDoubleValues to expose an iterator
return new HalfFloatComparator(numHits, fieldname, null, reversed, Pruning.NONE) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new HalfFloatLeafComparator(context) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws I
return new IntLeafComparator(context) {
@Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
return wrap(getLongValues(context, iMissingValue));
return wrap(getLongValues(context, iMissingValue), context.reader().maxDoc());
}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,16 @@
*/
package org.elasticsearch.index.fielddata.fieldcomparator;

import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.LongValues;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.comparators.LongComparator;
import org.apache.lucene.util.BitSet;
import org.elasticsearch.common.time.DateUtils;
import org.elasticsearch.common.util.BigArrays;
Expand All @@ -28,6 +29,8 @@
import org.elasticsearch.index.fielddata.LeafNumericFieldData;
import org.elasticsearch.index.fielddata.SortedNumericLongValues;
import org.elasticsearch.index.fielddata.plain.SortedNumericIndexFieldData;
import org.elasticsearch.lucene.comparators.XLongComparator;
import org.elasticsearch.lucene.comparators.XNumericComparator;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.MultiValueMode;
import org.elasticsearch.search.sort.BucketedSort;
Expand Down Expand Up @@ -103,13 +106,48 @@ public FieldComparator<?> newComparator(String fieldname, int numHits, Pruning e
final long lMissingValue = (Long) missingObject(missingValue, reversed);
// NOTE: it's important to pass null as a missing value in the constructor so that
// the comparator doesn't check docsWithField since we replace missing values in select()
return new LongComparator(numHits, null, null, reversed, Pruning.NONE) {
return new XLongComparator(numHits, fieldname, null, reversed, enableSkipping) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
final int maxDoc = context.reader().maxDoc();
return new LongLeafComparator(context) {
@Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
return wrap(getLongValues(context, lMissingValue));
return wrap(getLongValues(context, lMissingValue), maxDoc);
}

@Override
protected XNumericComparator<Long>.CompetitiveDISIBuilder buildCompetitiveDISIBuilder(LeafReaderContext context)
throws IOException {
Sort indexSort = context.reader().getMetaData().sort();
if (indexSort == null) {
return super.buildCompetitiveDISIBuilder(context);
}
SortField[] sortFields = indexSort.getSort();
if (sortFields.length != 2) {
return super.buildCompetitiveDISIBuilder(context);
}
if (sortFields[1].getField().equals(field) == false) {
return super.buildCompetitiveDISIBuilder(context);
}
DocValuesSkipper skipper = context.reader().getDocValuesSkipper(field);
DocValuesSkipper primaryFieldSkipper = context.reader().getDocValuesSkipper(sortFields[0].getField());
if (primaryFieldSkipper == null || skipper.docCount() != maxDoc || primaryFieldSkipper.docCount() != maxDoc) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the case primaryFieldSkipper is null, then the secondary sort field can be treated as primary sort and we can go faster (like in SortedNumericDocValuesRangeQuery#getDocIdSetIteratorOrNullForPrimarySort). But I don't think this happens now? Because super.buildCompetitiveDISIBuilder(context); will not detect this?

The same applies if primary sort field has just one value.

If this is true. Let's address this then in a followup?

return super.buildCompetitiveDISIBuilder(context);
}
return new CompetitiveDISIBuilder(this) {
@Override
protected int docCount() {
return skipper.docCount();
}

@Override
protected void doUpdateCompetitiveIterator() {
competitiveIterator.update(
new SecondarySortIterator(docValues, skipper, primaryFieldSkipper, minValueAsLong, maxValueAsLong)
);
}
};
}
};
}
Expand Down Expand Up @@ -163,31 +201,43 @@ public Object missingObject(Object missingValue, boolean reversed) {
return super.missingObject(missingValue, reversed);
}

protected static NumericDocValues wrap(LongValues longValues) {
protected static NumericDocValues wrap(LongValues longValues, int maxDoc) {
return new NumericDocValues() {

int doc = -1;

@Override
public long longValue() throws IOException {
return longValues.longValue();
}

@Override
public boolean advanceExact(int target) throws IOException {
doc = target;
return longValues.advanceExact(target);
}

@Override
public int docID() {
throw new UnsupportedOperationException();
return doc;
}

@Override
public int nextDoc() throws IOException {
throw new UnsupportedOperationException();
return advance(doc + 1);
}

@Override
public int advance(int target) throws IOException {
throw new UnsupportedOperationException();
if (target >= maxDoc) {
return doc = NO_MORE_DOCS;
}
// All documents are guaranteed to have a value, as all invocations of getLongValues
// always return `true` from `advanceExact()`
boolean hasValue = longValues.advanceExact(target);
assert hasValue : "LongValuesComparatorSource#wrap called with a LongValues that has missing values";
doc = target;
return target;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.fielddata.fieldcomparator;

import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.DocIdSetIterator;

import java.io.IOException;

/**
* A competitive DocIdSetIterator that examines the values of a secondary
* sort field and tries to exclude documents with values outside a given
* range, using DocValueSkippers on the primary sort field to advance rapidly
* to the next block of values.
*/
class SecondarySortIterator extends DocIdSetIterator {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add a unit test for this iterator? Maybe we can do a test duel against DocValuesRangeIterator wrapped in an interator?


final NumericDocValues values;

final DocValuesSkipper valueSkipper;
final DocValuesSkipper primaryFieldSkipper;
final long minValue;
final long maxValue;

int docID = -1;
boolean skipperMatch;
int primaryFieldUpTo = -1;
int valueFieldUpTo = -1;

SecondarySortIterator(
NumericDocValues values,
DocValuesSkipper valueSkipper,
DocValuesSkipper primaryFieldSkipper,
long minValue,
long maxValue
) {
this.values = values;
this.valueSkipper = valueSkipper;
this.primaryFieldSkipper = primaryFieldSkipper;
this.minValue = minValue;
this.maxValue = maxValue;

valueFieldUpTo = valueSkipper.maxDocID(0);
primaryFieldUpTo = primaryFieldSkipper.maxDocID(0);
}

@Override
public int docID() {
return docID;
}

@Override
public int nextDoc() throws IOException {
return advance(docID + 1);
}

@Override
public int advance(int target) throws IOException {
skipperMatch = false;
target = values.advance(target);
if (target == DocIdSetIterator.NO_MORE_DOCS) {
return docID = target;
}
while (true) {
if (target > valueFieldUpTo) {
valueSkipper.advance(target);
valueFieldUpTo = valueSkipper.maxDocID(0);
long minValue = valueSkipper.minValue(0);
long maxValue = valueSkipper.maxValue(0);
if (minValue > this.maxValue || maxValue < this.minValue) {
// outside the desired range, skip forward
for (int level = 1; level < valueSkipper.numLevels(); level++) {
minValue = valueSkipper.minValue(level);
maxValue = valueSkipper.maxValue(level);
if (minValue > this.maxValue || maxValue < this.minValue) {
valueFieldUpTo = valueSkipper.maxDocID(level);
} else {
break;
}
}

int upTo = valueFieldUpTo;
if (maxValue < this.minValue) {
// We've moved past the end of the valid values in the secondary sort field
// for this primary value. Advance the primary skipper to find the starting point
// for the next primary value, where the secondary field values will have reset
primaryFieldSkipper.advance(target);
primaryFieldUpTo = primaryFieldSkipper.maxDocID(0);
if (primaryFieldSkipper.minValue(0) == primaryFieldSkipper.maxValue(0)) {
for (int level = 1; level < primaryFieldSkipper.numLevels(); level++) {
if (primaryFieldSkipper.minValue(level) == primaryFieldSkipper.maxValue(level)) {
primaryFieldUpTo = primaryFieldSkipper.maxDocID(level);
} else {
break;
}
}
}
if (primaryFieldUpTo > upTo) {
upTo = primaryFieldUpTo;
}
}

target = values.advance(upTo + 1);
if (target == DocIdSetIterator.NO_MORE_DOCS) {
return docID = target;
}
} else if (minValue >= this.minValue && maxValue <= this.maxValue) {
assert valueSkipper.docCount(0) == valueSkipper.maxDocID(0) - valueSkipper.minDocID(0) + 1;
skipperMatch = true;
return docID = target;
}
}

long value = values.longValue();
if (value < minValue && target > primaryFieldUpTo) {
primaryFieldSkipper.advance(target);
primaryFieldUpTo = primaryFieldSkipper.maxDocID(0);
if (primaryFieldSkipper.minValue(0) == primaryFieldSkipper.maxValue(0)) {
for (int level = 1; level < primaryFieldSkipper.numLevels(); level++) {
if (primaryFieldSkipper.minValue(level) == primaryFieldSkipper.maxValue(level)) {
primaryFieldUpTo = primaryFieldSkipper.maxDocID(level);
} else {
break;
}
}
target = values.advance(primaryFieldUpTo + 1);
if (target == DocIdSetIterator.NO_MORE_DOCS) {
return docID = target;
}
} else {
target = values.nextDoc();
if (target == DocIdSetIterator.NO_MORE_DOCS) {
return docID = target;
}
}
} else if (value >= minValue && value <= maxValue) {
return docID = target;
} else {
target = values.nextDoc();
if (target == DocIdSetIterator.NO_MORE_DOCS) {
return docID = target;
}
}
}
}

@Override
public int docIDRunEnd() throws IOException {
if (skipperMatch) {
return valueFieldUpTo + 1;
}
return super.docIDRunEnd();
}

@Override
public long cost() {
return values.cost();
}

}
Loading