Skip to content

Commit 95d4147

Browse files
committed
optimize prefix sums
1 parent 9d8685f commit 95d4147

File tree

8 files changed

+95
-86
lines changed

8 files changed

+95
-86
lines changed

lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene101/ForDeltaUtil.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,10 @@ private static void prefixSum16(int[] arr, int base) {
104104
}
105105

106106
private static void prefixSum32(int[] arr, int base) {
107-
arr[0] += base;
108-
for (int i = 1; i < BLOCK_SIZE; ++i) {
109-
arr[i] += arr[i - 1];
107+
int sum = base;
108+
for (int i = 0; i < BLOCK_SIZE; ++i) {
109+
sum += arr[i];
110+
arr[i] = sum;
110111
}
111112
}
112113

lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene101/Lucene101PostingsReader.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -186,9 +186,10 @@ public void init(IndexInput termsIn, SegmentReadState state) throws IOException
186186
}
187187

188188
static void prefixSum(int[] buffer, int count, long base) {
189-
buffer[0] += base;
190-
for (int i = 1; i < count; ++i) {
191-
buffer[i] += buffer[i - 1];
189+
int sum = base;
190+
for (int i = 0; i < count; ++i) {
191+
sum += buffer[i];
192+
buffer[i] = sum;
192193
}
193194
}
194195

@@ -606,8 +607,10 @@ private void refillFullBlock() throws IOException {
606607
for (int i = 0; i < numLongs - 1; ++i) {
607608
docCumulativeWordPopCounts[i] = Long.bitCount(docBitSet.getBits()[i]);
608609
}
610+
int sum = docCumulativeWordPopCounts[0];
609611
for (int i = 1; i < numLongs - 1; ++i) {
610-
docCumulativeWordPopCounts[i] += docCumulativeWordPopCounts[i - 1];
612+
sum += docCumulativeWordPopCounts[i];
613+
docCumulativeWordPopCounts[i] = sum;
611614
}
612615
docCumulativeWordPopCounts[numLongs - 1] = BLOCK_SIZE;
613616
assert docCumulativeWordPopCounts[numLongs - 2]

lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingTermVectorsReader.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -547,8 +547,10 @@ public Fields get(int doc) throws IOException {
547547
// delta-decode start offsets and patch lengths using term lengths
548548
final int termLength = fPrefixLengths[j] + fSuffixLengths[j];
549549
lengths[i][positionIndex[i][j]] += termLength;
550+
int sum = fStartOffsets[positionIndex[i][j]];
550551
for (int k = positionIndex[i][j] + 1; k < positionIndex[i][j + 1]; ++k) {
551-
fStartOffsets[k] += fStartOffsets[k - 1];
552+
sum += fStartOffsets[k];
553+
fStartOffsets[k] = sum;
552554
fLengths[k] += termLength;
553555
}
554556
}
@@ -565,8 +567,10 @@ public Fields get(int doc) throws IOException {
565567
if (fPositions != null) {
566568
for (int j = 0, end = (int) numTerms.get(skip + i); j < end; ++j) {
567569
// delta-decode start offsets
570+
int sum = fPositions[fpositionIndex[j]];
568571
for (int k = fpositionIndex[j] + 1; k < fpositionIndex[j + 1]; ++k) {
569-
fPositions[k] += fPositions[k - 1];
572+
sum += fPositions[k];
573+
fPositions[k] = sum;
570574
}
571575
}
572576
}

lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84PostingsReader.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,9 +164,10 @@ static void readVIntBlock(
164164
}
165165

166166
static void prefixSum(long[] buffer, int count, long base) {
167-
buffer[0] += base;
168-
for (int i = 1; i < count; ++i) {
169-
buffer[i] += buffer[i - 1];
167+
long sum = base;
168+
for (int i = 0; i < count; ++i) {
169+
sum += buffer[i];
170+
buffer[i] = sum;
170171
}
171172
}
172173

lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90PostingsReader.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,10 @@ static void readVIntBlock(
162162
}
163163

164164
static void prefixSum(long[] buffer, int count, long base) {
165-
buffer[0] += base;
166-
for (int i = 1; i < count; ++i) {
167-
buffer[i] += buffer[i - 1];
165+
long sum = base;
166+
for (int i = 0; i < count; ++i) {
167+
sum += buffer[i];
168+
buffer[i] = sum;
168169
}
169170
}
170171

lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene912/Lucene912PostingsReader.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,9 +196,10 @@ public void init(IndexInput termsIn, SegmentReadState state) throws IOException
196196
}
197197

198198
static void prefixSum(long[] buffer, int count, long base) {
199-
buffer[0] += base;
200-
for (int i = 1; i < count; ++i) {
201-
buffer[i] += buffer[i - 1];
199+
long sum = base;
200+
for (int i = 0; i < count; ++i) {
201+
sum += buffer[i];
202+
buffer[i] = sum;
202203
}
203204
}
204205

lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene99/Lucene99PostingsReader.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,10 @@ public void init(IndexInput termsIn, SegmentReadState state) throws IOException
145145
}
146146

147147
static void prefixSum(long[] buffer, int count, long base) {
148-
buffer[0] += base;
149-
for (int i = 1; i < count; ++i) {
150-
buffer[i] += buffer[i - 1];
148+
long sum = base;
149+
for (int i = 0; i < count; ++i) {
150+
sum += buffer[i];
151+
buffer[i] = sum;
151152
}
152153
}
153154

lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java

Lines changed: 62 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,12 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
9898
private final long numDirtyDocs; // cumulative number of docs in incomplete chunks
9999
private final long maxPointer; // end of the data section
100100
private BlockState blockState = new BlockState(-1, -1, 0);
101-
// Cache of recently prefetched block IDs. This helps reduce chances of prefetching the same block
102-
// multiple times, which is otherwise likely due to index sorting or recursive graph bisection
103-
// clustering similar documents together. NOTE: this cache must be small since it's fully scanned.
101+
// Cache of recently prefetched block IDs. This helps reduce chances of
102+
// prefetching the same block
103+
// multiple times, which is otherwise likely due to index sorting or recursive
104+
// graph bisection
105+
// clustering similar documents together. NOTE: this cache must be small since
106+
// it's fully scanned.
104107
private final long[] prefetchedBlockIDCache;
105108
private int prefetchedBlockIDCacheIndex;
106109

@@ -114,8 +117,7 @@ private Lucene90CompressingTermVectorsReader(Lucene90CompressingTermVectorsReade
114117
this.decompressor = reader.decompressor.clone();
115118
this.chunkSize = reader.chunkSize;
116119
this.numDocs = reader.numDocs;
117-
this.reader =
118-
new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, PACKED_BLOCK_SIZE, 0);
120+
this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, PACKED_BLOCK_SIZE, 0);
119121
this.version = reader.version;
120122
this.numChunks = reader.numChunks;
121123
this.numDirtyChunks = reader.numDirtyChunks;
@@ -144,18 +146,13 @@ public Lucene90CompressingTermVectorsReader(
144146
ChecksumIndexInput metaIn = null;
145147
try {
146148
// Open the data file
147-
final String vectorsStreamFN =
148-
IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION);
149-
vectorsStream =
150-
d.openInput(vectorsStreamFN, context.withHints(FileTypeHint.DATA, DataAccessHint.RANDOM));
151-
version =
152-
CodecUtil.checkIndexHeader(
153-
vectorsStream, formatName, VERSION_START, VERSION_CURRENT, si.getId(), segmentSuffix);
154-
assert CodecUtil.indexHeaderLength(formatName, segmentSuffix)
155-
== vectorsStream.getFilePointer();
156-
157-
final String metaStreamFN =
158-
IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_META_EXTENSION);
149+
final String vectorsStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION);
150+
vectorsStream = d.openInput(vectorsStreamFN, context.withHints(FileTypeHint.DATA, DataAccessHint.RANDOM));
151+
version = CodecUtil.checkIndexHeader(
152+
vectorsStream, formatName, VERSION_START, VERSION_CURRENT, si.getId(), segmentSuffix);
153+
assert CodecUtil.indexHeaderLength(formatName, segmentSuffix) == vectorsStream.getFilePointer();
154+
155+
final String metaStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_META_EXTENSION);
159156
metaIn = d.openChecksumInput(metaStreamFN);
160157
CodecUtil.checkIndexHeader(
161158
metaIn,
@@ -169,21 +166,22 @@ public Lucene90CompressingTermVectorsReader(
169166
chunkSize = metaIn.readVInt();
170167

171168
// NOTE: data file is too costly to verify checksum against all the bytes on open,
172-
// but for now we at least verify proper structure of the checksum footer: which looks
173-
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
169+
// but for now we at least verify proper structure of the checksum footer: which
170+
// looks
171+
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of
172+
// corruption
174173
// such as file truncation.
175174
CodecUtil.retrieveChecksum(vectorsStream);
176175

177-
FieldsIndexReader fieldsIndexReader =
178-
new FieldsIndexReader(
179-
d,
180-
si.name,
181-
segmentSuffix,
182-
VECTORS_INDEX_EXTENSION,
183-
VECTORS_INDEX_CODEC_NAME,
184-
si.getId(),
185-
metaIn,
186-
context);
176+
FieldsIndexReader fieldsIndexReader = new FieldsIndexReader(
177+
d,
178+
si.name,
179+
segmentSuffix,
180+
VECTORS_INDEX_EXTENSION,
181+
VECTORS_INDEX_CODEC_NAME,
182+
si.getId(),
183+
metaIn,
184+
context);
187185

188186
this.indexReader = fieldsIndexReader;
189187
this.maxPointer = fieldsIndexReader.getMaxPointer();
@@ -218,8 +216,7 @@ public Lucene90CompressingTermVectorsReader(
218216
}
219217

220218
decompressor = compressionMode.newDecompressor();
221-
this.reader =
222-
new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, PACKED_BLOCK_SIZE, 0);
219+
this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, PACKED_BLOCK_SIZE, 0);
223220

224221
CodecUtil.checkFooter(metaIn, null);
225222
metaIn.close();
@@ -338,7 +335,8 @@ boolean isLoaded(int docID) {
338335
return blockState.docBase <= docID && docID < blockState.docBase + blockState.chunkDocs;
339336
}
340337

341-
private record BlockState(long startPointer, int docBase, int chunkDocs) {}
338+
private record BlockState(long startPointer, int docBase, int chunkDocs) {
339+
}
342340

343341
@Override
344342
public void prefetch(int docID) throws IOException {
@@ -418,14 +416,13 @@ public Fields get(int doc) throws IOException {
418416
totalDistinctFields += vectorsStream.readVInt();
419417
}
420418
++totalDistinctFields;
421-
final PackedInts.ReaderIterator it =
422-
PackedInts.getReaderIteratorNoHeader(
423-
vectorsStream,
424-
PackedInts.Format.PACKED,
425-
packedIntsVersion,
426-
totalDistinctFields,
427-
bitsPerFieldNum,
428-
1);
419+
final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(
420+
vectorsStream,
421+
PackedInts.Format.PACKED,
422+
packedIntsVersion,
423+
totalDistinctFields,
424+
bitsPerFieldNum,
425+
1);
429426
fieldNums = new int[totalDistinctFields];
430427
for (int i = 0; i < totalDistinctFields; ++i) {
431428
fieldNums[i] = (int) it.next();
@@ -493,7 +490,7 @@ public Fields get(int doc) throws IOException {
493490
final int termCount = (int) numTerms.get(skip + i);
494491
final int[] fieldPrefixLengths = new int[termCount];
495492
prefixLengths[i] = fieldPrefixLengths;
496-
for (int j = 0; j < termCount; ) {
493+
for (int j = 0; j < termCount;) {
497494
final LongsRef next = reader.next(termCount - j);
498495
for (int k = 0; k < next.length; ++k) {
499496
fieldPrefixLengths[j++] = (int) next.longs[next.offset + k];
@@ -514,7 +511,7 @@ public Fields get(int doc) throws IOException {
514511
final int termCount = (int) numTerms.get(skip + i);
515512
final int[] fieldSuffixLengths = new int[termCount];
516513
suffixLengths[i] = fieldSuffixLengths;
517-
for (int j = 0; j < termCount; ) {
514+
for (int j = 0; j < termCount;) {
518515
final LongsRef next = reader.next(termCount - j);
519516
for (int k = 0; k < next.length; ++k) {
520517
fieldSuffixLengths[j++] = (int) next.longs[next.offset + k];
@@ -535,7 +532,7 @@ public Fields get(int doc) throws IOException {
535532
final int[] termFreqs = new int[totalTerms];
536533
{
537534
reader.reset(vectorsStream, totalTerms);
538-
for (int i = 0; i < totalTerms; ) {
535+
for (int i = 0; i < totalTerms;) {
539536
final LongsRef next = reader.next(totalTerms - i);
540537
for (int k = 0; k < next.length; ++k) {
541538
termFreqs[i++] = 1 + (int) next.longs[next.offset + k];
@@ -566,16 +563,15 @@ public Fields get(int doc) throws IOException {
566563
final int[][] positionIndex = positionIndex(skip, numFields, numTerms, termFreqs);
567564
final int[][] positions, startOffsets, lengths;
568565
if (totalPositions > 0) {
569-
positions =
570-
readPositions(
571-
skip,
572-
numFields,
573-
flags,
574-
numTerms,
575-
termFreqs,
576-
POSITIONS,
577-
totalPositions,
578-
positionIndex);
566+
positions = readPositions(
567+
skip,
568+
numFields,
569+
flags,
570+
numTerms,
571+
termFreqs,
572+
POSITIONS,
573+
totalPositions,
574+
positionIndex);
579575
} else {
580576
positions = new int[numFields][];
581577
}
@@ -586,12 +582,10 @@ public Fields get(int doc) throws IOException {
586582
for (int i = 0; i < charsPerTerm.length; ++i) {
587583
charsPerTerm[i] = Float.intBitsToFloat(vectorsStream.readInt());
588584
}
589-
startOffsets =
590-
readPositions(
591-
skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex);
592-
lengths =
593-
readPositions(
594-
skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex);
585+
startOffsets = readPositions(
586+
skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex);
587+
lengths = readPositions(
588+
skip, numFields, flags, numTerms, termFreqs, OFFSETS, totalOffsets, positionIndex);
595589

596590
for (int i = 0; i < numFields; ++i) {
597591
final int[] fStartOffsets = startOffsets[i];
@@ -608,11 +602,13 @@ public Fields get(int doc) throws IOException {
608602
final int[] fSuffixLengths = suffixLengths[i];
609603
final int[] fLengths = lengths[i];
610604
for (int j = 0, end = (int) numTerms.get(skip + i); j < end; ++j) {
611-
// delta-decode start offsets and patch lengths using term lengths
605+
// delta-decode start offsets and patch lengths using term lengths
612606
final int termLength = fPrefixLengths[j] + fSuffixLengths[j];
613607
lengths[i][positionIndex[i][j]] += termLength;
608+
int sum = fStartOffsets[positionIndex[i][j]];
614609
for (int k = positionIndex[i][j] + 1; k < positionIndex[i][j + 1]; ++k) {
615-
fStartOffsets[k] += fStartOffsets[k - 1];
610+
sum += fStartOffsets[k];
611+
fStartOffsets[k] = sum;
616612
fLengths[k] += termLength;
617613
}
618614
}
@@ -629,8 +625,10 @@ public Fields get(int doc) throws IOException {
629625
if (fPositions != null) {
630626
for (int j = 0, end = (int) numTerms.get(skip + i); j < end; ++j) {
631627
// delta-decode start offsets
628+
int sum = fPositions[fpositionIndex[j]];
632629
for (int k = fpositionIndex[j] + 1; k < fpositionIndex[j + 1]; ++k) {
633-
fPositions[k] += fPositions[k - 1];
630+
sum += fPositions[k];
631+
fPositions[k] = sum;
634632
}
635633
}
636634
}
@@ -709,8 +707,7 @@ public Fields get(int doc) throws IOException {
709707
docLen + payloadLen,
710708
suffixBytes);
711709
suffixBytes.length = docLen;
712-
final BytesRef payloadBytes =
713-
new BytesRef(suffixBytes.bytes, suffixBytes.offset + docLen, payloadLen);
710+
final BytesRef payloadBytes = new BytesRef(suffixBytes.bytes, suffixBytes.offset + docLen, payloadLen);
714711

715712
final int[] fieldFlags = new int[numFields];
716713
for (int i = 0; i < numFields; ++i) {
@@ -812,7 +809,7 @@ private int[][] readPositions(
812809
final int totalFreq = positionIndex[i][termCount];
813810
final int[] fieldPositions = new int[totalFreq];
814811
positions[i] = fieldPositions;
815-
for (int j = 0; j < totalFreq; ) {
812+
for (int j = 0; j < totalFreq;) {
816813
final LongsRef nextPositions = reader.next(totalFreq - j);
817814
for (int k = 0; k < nextPositions.length; ++k) {
818815
fieldPositions[j++] = (int) nextPositions.longs[nextPositions.offset + k];

0 commit comments

Comments
 (0)