Skip to content

Commit d176a42

Browse files
HUSTERGSgf2121
authored andcommitted
Implement IndexedDISI#docIDRunEnd (#14753)
1 parent d03071b commit d176a42

File tree

4 files changed

+115
-0
lines changed

4 files changed

+115
-0
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ Optimizations
7575

7676
* GITHUB#14720: Cache high-order bits of hashcode to speed up BytesRefHash. (Pan Guixin)
7777

78+
* GITHUB#14753: Implement IndexedDISI#docIDRunEnd. (Ge Song)
79+
7880
Bug Fixes
7981
---------------------
8082
* GITHUB#14654: ValueSource.fromDoubleValuesSource(dvs).getSortField() would throw errors when

lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,11 @@ public long cost() {
568568
return cost;
569569
}
570570

571+
@Override
572+
public int docIDRunEnd() throws IOException {
573+
return method.docIDRunEnd(this);
574+
}
575+
571576
enum Method {
572577
SPARSE {
573578
@Override
@@ -634,6 +639,11 @@ boolean intoBitSetWithinBlock(IndexedDISI disi, int upTo, FixedBitSet bitSet, in
634639
}
635640
return false;
636641
}
642+
643+
@Override
644+
int docIDRunEnd(IndexedDISI disi) throws IOException {
645+
return disi.doc + 1;
646+
}
637647
},
638648
DENSE {
639649
@Override
@@ -730,6 +740,14 @@ boolean intoBitSetWithinBlock(IndexedDISI disi, int upTo, FixedBitSet bitSet, in
730740
return advanceWithinBlock(disi, upTo);
731741
}
732742
}
743+
744+
@Override
745+
int docIDRunEnd(IndexedDISI disi) throws IOException {
746+
if (disi.word == -1L) {
747+
return (disi.doc | 0x3F) + 1;
748+
}
749+
return disi.doc + 1;
750+
}
733751
},
734752
ALL {
735753
@Override
@@ -757,6 +775,11 @@ boolean intoBitSetWithinBlock(IndexedDISI disi, int upTo, FixedBitSet bitSet, in
757775
return false;
758776
}
759777
}
778+
779+
@Override
780+
int docIDRunEnd(IndexedDISI disi) throws IOException {
781+
return (disi.doc | 0xFFFF) + 1;
782+
}
760783
};
761784

762785
/**
@@ -783,6 +806,8 @@ boolean intoBitSetWithinBlock(IndexedDISI disi, int upTo, FixedBitSet bitSet, in
783806
*/
784807
abstract boolean intoBitSetWithinBlock(
785808
IndexedDISI disi, int upTo, FixedBitSet bitSet, int offset) throws IOException;
809+
810+
abstract int docIDRunEnd(IndexedDISI disi) throws IOException;
786811
}
787812

788813
/**

lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,11 @@ public boolean advanceExact(int target) {
469469
public long cost() {
470470
return maxDoc;
471471
}
472+
473+
@Override
474+
public int docIDRunEnd() throws IOException {
475+
return maxDoc;
476+
}
472477
}
473478

474479
private abstract static class SparseNumericDocValues extends NumericDocValues {
@@ -508,6 +513,11 @@ public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOExcept
508513
public long cost() {
509514
return disi.cost();
510515
}
516+
517+
@Override
518+
public int docIDRunEnd() throws IOException {
519+
return disi.docIDRunEnd();
520+
}
511521
}
512522

513523
private LongValues getDirectReaderInstance(
@@ -751,6 +761,11 @@ public boolean advanceExact(int target) throws IOException {
751761
doc = target;
752762
return true;
753763
}
764+
765+
@Override
766+
public int docIDRunEnd() throws IOException {
767+
return maxDoc;
768+
}
754769
}
755770

756771
private abstract static class SparseBinaryDocValues extends BinaryDocValues {
@@ -790,6 +805,11 @@ public boolean advanceExact(int target) throws IOException {
790805
public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOException {
791806
disi.intoBitSet(upTo, bitSet, offset);
792807
}
808+
809+
@Override
810+
public int docIDRunEnd() throws IOException {
811+
return disi.docIDRunEnd();
812+
}
793813
}
794814

795815
@Override
@@ -958,6 +978,11 @@ public int advance(int target) throws IOException {
958978
public long cost() {
959979
return maxDoc;
960980
}
981+
982+
@Override
983+
public int docIDRunEnd() throws IOException {
984+
return maxDoc;
985+
}
961986
};
962987
} else if (ordsEntry.docsWithFieldOffset >= 0) { // sparse but non-empty
963988
final IndexedDISI disi =
@@ -1005,6 +1030,11 @@ public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOExcept
10051030
public long cost() {
10061031
return disi.cost();
10071032
}
1033+
1034+
@Override
1035+
public int docIDRunEnd() throws IOException {
1036+
return disi.docIDRunEnd();
1037+
}
10081038
};
10091039
}
10101040
}
@@ -1041,6 +1071,11 @@ public int advance(int target) throws IOException {
10411071
public long cost() {
10421072
return ords.cost();
10431073
}
1074+
1075+
@Override
1076+
public int docIDRunEnd() throws IOException {
1077+
return ords.docIDRunEnd();
1078+
}
10441079
};
10451080
}
10461081

@@ -1448,6 +1483,11 @@ public long nextValue() throws IOException {
14481483
public int docValueCount() {
14491484
return count;
14501485
}
1486+
1487+
@Override
1488+
public int docIDRunEnd() throws IOException {
1489+
return maxDoc;
1490+
}
14511491
};
14521492
} else {
14531493
// sparse
@@ -1520,6 +1560,11 @@ private void set() {
15201560
set = true;
15211561
}
15221562
}
1563+
1564+
@Override
1565+
public int docIDRunEnd() throws IOException {
1566+
return disi.docIDRunEnd();
1567+
}
15231568
};
15241569
}
15251570
}
@@ -1609,6 +1654,11 @@ public int advance(int target) throws IOException {
16091654
public long cost() {
16101655
return maxDoc;
16111656
}
1657+
1658+
@Override
1659+
public int docIDRunEnd() throws IOException {
1660+
return maxDoc;
1661+
}
16121662
};
16131663
} else if (ordsEntry.docsWithFieldOffset >= 0) { // sparse but non-empty
16141664
final IndexedDISI disi =
@@ -1681,6 +1731,11 @@ private void set() {
16811731
set = true;
16821732
}
16831733
}
1734+
1735+
@Override
1736+
public int docIDRunEnd() throws IOException {
1737+
return disi.docIDRunEnd();
1738+
}
16841739
};
16851740
}
16861741
}
@@ -1722,6 +1777,11 @@ public int advance(int target) throws IOException {
17221777
public long cost() {
17231778
return ords.cost();
17241779
}
1780+
1781+
@Override
1782+
public int docIDRunEnd() throws IOException {
1783+
return ords.docIDRunEnd();
1784+
}
17251785
};
17261786
}
17271787

lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestIndexedDISI.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,16 @@ private void doTest(BitSet set, Directory dir) throws IOException {
536536
}
537537
}
538538

539+
for (int step : new int[] {100, 1000, 10000, 100000}) {
540+
try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
541+
IndexedDISI disi =
542+
new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality);
543+
BitSetIterator disi2 = new BitSetIterator(set, cardinality);
544+
int disi2length = set.length();
545+
assertDocIDRunEndRandomized(disi, disi2, disi2length, step);
546+
}
547+
}
548+
539549
dir.deleteFile("foo");
540550
}
541551

@@ -614,6 +624,24 @@ private void assertIntoBitsetRandomized(
614624
}
615625
}
616626

627+
private void assertDocIDRunEndRandomized(
628+
IndexedDISI disi, BitSetIterator disi2, int disi2length, int step) throws IOException {
629+
for (int target = 0; target < disi2length; ) {
630+
target += TestUtil.nextInt(random(), 0, step);
631+
if (disi.docID() < target) {
632+
disi.advance(target);
633+
disi2.advance(target);
634+
assertEquals(disi2.docID(), disi.docID());
635+
int end = disi.docIDRunEnd();
636+
assertNotEquals(0, end);
637+
for (int it = disi.docID(); it != DocIdSetIterator.NO_MORE_DOCS && it + 1 < end; it++) {
638+
assertEquals(it + 1, disi.nextDoc());
639+
assertEquals(it + 1, disi2.nextDoc());
640+
}
641+
}
642+
}
643+
}
644+
617645
private void assertSingleStepEquality(IndexedDISI disi, BitSetIterator disi2) throws IOException {
618646
int i = 0;
619647
for (int doc = disi2.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = disi2.nextDoc()) {

0 commit comments

Comments
 (0)