Skip to content

Commit a039d90

Browse files
authored
Implement IndexedDISI#docIDRunEnd (#14753)
1 parent 9afcfdb commit a039d90

File tree

4 files changed

+115
-0
lines changed

4 files changed

+115
-0
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ Optimizations
144144

145145
* GITHUB#14720: Cache high-order bits of hashcode to speed up BytesRefHash. (Pan Guixin)
146146

147+
* GITHUB#14753: Implement IndexedDISI#docIDRunEnd. (Ge Song)
148+
147149
Bug Fixes
148150
---------------------
149151
* GITHUB#14654: ValueSource.fromDoubleValuesSource(dvs).getSortField() would throw errors when

lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,11 @@ public long cost() {
568568
return cost;
569569
}
570570

571+
@Override
572+
public int docIDRunEnd() throws IOException {
573+
return method.docIDRunEnd(this);
574+
}
575+
571576
enum Method {
572577
SPARSE {
573578
@Override
@@ -634,6 +639,11 @@ boolean intoBitSetWithinBlock(IndexedDISI disi, int upTo, FixedBitSet bitSet, in
634639
}
635640
return false;
636641
}
642+
643+
@Override
644+
int docIDRunEnd(IndexedDISI disi) throws IOException {
645+
return disi.doc + 1;
646+
}
637647
},
638648
DENSE {
639649
@Override
@@ -730,6 +740,14 @@ boolean intoBitSetWithinBlock(IndexedDISI disi, int upTo, FixedBitSet bitSet, in
730740
return advanceWithinBlock(disi, upTo);
731741
}
732742
}
743+
744+
@Override
745+
int docIDRunEnd(IndexedDISI disi) throws IOException {
746+
if (disi.word == -1L) {
747+
return (disi.doc | 0x3F) + 1;
748+
}
749+
return disi.doc + 1;
750+
}
733751
},
734752
ALL {
735753
@Override
@@ -757,6 +775,11 @@ boolean intoBitSetWithinBlock(IndexedDISI disi, int upTo, FixedBitSet bitSet, in
757775
return false;
758776
}
759777
}
778+
779+
@Override
780+
int docIDRunEnd(IndexedDISI disi) throws IOException {
781+
return (disi.doc | 0xFFFF) + 1;
782+
}
760783
};
761784

762785
/**
@@ -783,6 +806,8 @@ boolean intoBitSetWithinBlock(IndexedDISI disi, int upTo, FixedBitSet bitSet, in
783806
*/
784807
abstract boolean intoBitSetWithinBlock(
785808
IndexedDISI disi, int upTo, FixedBitSet bitSet, int offset) throws IOException;
809+
810+
abstract int docIDRunEnd(IndexedDISI disi) throws IOException;
786811
}
787812

788813
/**

lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,11 @@ public boolean advanceExact(int target) {
465465
public long cost() {
466466
return maxDoc;
467467
}
468+
469+
@Override
470+
public int docIDRunEnd() throws IOException {
471+
return maxDoc;
472+
}
468473
}
469474

470475
private abstract static class SparseNumericDocValues extends NumericDocValues {
@@ -504,6 +509,11 @@ public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOExcept
504509
public long cost() {
505510
return disi.cost();
506511
}
512+
513+
@Override
514+
public int docIDRunEnd() throws IOException {
515+
return disi.docIDRunEnd();
516+
}
507517
}
508518

509519
private LongValues getDirectReaderInstance(
@@ -747,6 +757,11 @@ public boolean advanceExact(int target) throws IOException {
747757
doc = target;
748758
return true;
749759
}
760+
761+
@Override
762+
public int docIDRunEnd() throws IOException {
763+
return maxDoc;
764+
}
750765
}
751766

752767
private abstract static class SparseBinaryDocValues extends BinaryDocValues {
@@ -786,6 +801,11 @@ public boolean advanceExact(int target) throws IOException {
786801
public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOException {
787802
disi.intoBitSet(upTo, bitSet, offset);
788803
}
804+
805+
@Override
806+
public int docIDRunEnd() throws IOException {
807+
return disi.docIDRunEnd();
808+
}
789809
}
790810

791811
@Override
@@ -954,6 +974,11 @@ public int advance(int target) throws IOException {
954974
public long cost() {
955975
return maxDoc;
956976
}
977+
978+
@Override
979+
public int docIDRunEnd() throws IOException {
980+
return maxDoc;
981+
}
957982
};
958983
} else if (ordsEntry.docsWithFieldOffset >= 0) { // sparse but non-empty
959984
final IndexedDISI disi =
@@ -1001,6 +1026,11 @@ public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOExcept
10011026
public long cost() {
10021027
return disi.cost();
10031028
}
1029+
1030+
@Override
1031+
public int docIDRunEnd() throws IOException {
1032+
return disi.docIDRunEnd();
1033+
}
10041034
};
10051035
}
10061036
}
@@ -1037,6 +1067,11 @@ public int advance(int target) throws IOException {
10371067
public long cost() {
10381068
return ords.cost();
10391069
}
1070+
1071+
@Override
1072+
public int docIDRunEnd() throws IOException {
1073+
return ords.docIDRunEnd();
1074+
}
10401075
};
10411076
}
10421077

@@ -1444,6 +1479,11 @@ public long nextValue() throws IOException {
14441479
public int docValueCount() {
14451480
return count;
14461481
}
1482+
1483+
@Override
1484+
public int docIDRunEnd() throws IOException {
1485+
return maxDoc;
1486+
}
14471487
};
14481488
} else {
14491489
// sparse
@@ -1516,6 +1556,11 @@ private void set() {
15161556
set = true;
15171557
}
15181558
}
1559+
1560+
@Override
1561+
public int docIDRunEnd() throws IOException {
1562+
return disi.docIDRunEnd();
1563+
}
15191564
};
15201565
}
15211566
}
@@ -1605,6 +1650,11 @@ public int advance(int target) throws IOException {
16051650
public long cost() {
16061651
return maxDoc;
16071652
}
1653+
1654+
@Override
1655+
public int docIDRunEnd() throws IOException {
1656+
return maxDoc;
1657+
}
16081658
};
16091659
} else if (ordsEntry.docsWithFieldOffset >= 0) { // sparse but non-empty
16101660
final IndexedDISI disi =
@@ -1677,6 +1727,11 @@ private void set() {
16771727
set = true;
16781728
}
16791729
}
1730+
1731+
@Override
1732+
public int docIDRunEnd() throws IOException {
1733+
return disi.docIDRunEnd();
1734+
}
16801735
};
16811736
}
16821737
}
@@ -1718,6 +1773,11 @@ public int advance(int target) throws IOException {
17181773
public long cost() {
17191774
return ords.cost();
17201775
}
1776+
1777+
@Override
1778+
public int docIDRunEnd() throws IOException {
1779+
return ords.docIDRunEnd();
1780+
}
17211781
};
17221782
}
17231783

lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestIndexedDISI.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,16 @@ private void doTest(BitSet set, Directory dir) throws IOException {
536536
}
537537
}
538538

539+
for (int step : new int[] {100, 1000, 10000, 100000}) {
540+
try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
541+
IndexedDISI disi =
542+
new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality);
543+
BitSetIterator disi2 = new BitSetIterator(set, cardinality);
544+
int disi2length = set.length();
545+
assertDocIDRunEndRandomized(disi, disi2, disi2length, step);
546+
}
547+
}
548+
539549
dir.deleteFile("foo");
540550
}
541551

@@ -614,6 +624,24 @@ private void assertIntoBitsetRandomized(
614624
}
615625
}
616626

627+
private void assertDocIDRunEndRandomized(
628+
IndexedDISI disi, BitSetIterator disi2, int disi2length, int step) throws IOException {
629+
for (int target = 0; target < disi2length; ) {
630+
target += TestUtil.nextInt(random(), 0, step);
631+
if (disi.docID() < target) {
632+
disi.advance(target);
633+
disi2.advance(target);
634+
assertEquals(disi2.docID(), disi.docID());
635+
int end = disi.docIDRunEnd();
636+
assertNotEquals(0, end);
637+
for (int it = disi.docID(); it != DocIdSetIterator.NO_MORE_DOCS && it + 1 < end; it++) {
638+
assertEquals(it + 1, disi.nextDoc());
639+
assertEquals(it + 1, disi2.nextDoc());
640+
}
641+
}
642+
}
643+
}
644+
617645
private void assertSingleStepEquality(IndexedDISI disi, BitSetIterator disi2) throws IOException {
618646
int i = 0;
619647
for (int doc = disi2.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = disi2.nextDoc()) {

0 commit comments

Comments
 (0)