Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ Improvements

Optimizations
---------------------
* GITHUB#13782: Replace handwritten loops compare with Arrays.compareUnsigned in TermsEnum and TermsEnumFrame classes. (Zhou Hui)

* GITHUB#14011: Reduce allocation rate in HNSW concurrent merge. (Viliam Durina)
* GITHUB#14022: Optimize DFS marking of connected components in HNSW by reducing stack depth, improving performance and reducing allocations. (Viswanath Kuchibhotla)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import java.io.IOException;
import java.io.PrintStream;
import java.util.Arrays;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
import org.apache.lucene.index.BaseTermsEnum;
Expand Down Expand Up @@ -183,19 +184,19 @@ OrdsSegmentTermsEnumFrame pushFrame(FST.Arc<Output> arc, long fp, int length, lo
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
// term.length + " vs prefix=" + f.prefix);
if (f.prefix > targetBeforeCurrentLength) {
if (f.prefixLength > targetBeforeCurrentLength) {
// System.out.println(" do rewind!");
f.rewind();
} else {
// if (DEBUG) {
// System.out.println(" skip rewind!");
// }
}
assert length == f.prefix;
assert length == f.prefixLength;
assert termOrd == f.termOrdOrig;
} else {
f.nextEnt = -1;
f.prefix = length;
f.prefixLength = length;
f.state.termBlockOrd = 0;
f.termOrdOrig = termOrd;
// System.out.println("set termOrdOrig=" + termOrd);
Expand Down Expand Up @@ -308,31 +309,18 @@ public boolean seekExact(final BytesRef target) throws IOException {
}

if (cmp == 0) {
final int targetUptoMid = targetUpto;

// Second compare the rest of the term, but
// don't save arc/output/frame; we only do this
// to find out if the target term is before,
// equal or after the current term
final int targetLimit2 = Math.min(target.length, term.length());
while (targetUpto < targetLimit2) {
cmp =
(term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) {
// System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" +
// targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset +
// targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
// }
if (cmp != 0) {
break;
}
targetUpto++;
}

if (cmp == 0) {
cmp = term.length() - target.length;
}
targetUpto = targetUptoMid;
cmp =
Arrays.compareUnsigned(
term.bytes(),
targetUpto,
term.length(),
target.bytes,
target.offset + targetUpto,
target.offset + target.length);
}

if (cmp < 0) {
Expand Down Expand Up @@ -424,7 +412,7 @@ public boolean seekExact(final BytesRef target) throws IOException {
// toHex(targetLabel));
// }

validIndexPrefix = currentFrame.prefix;
validIndexPrefix = currentFrame.prefixLength;
// validIndexPrefix = targetUpto;

currentFrame.scanToFloorFrame(target);
Expand Down Expand Up @@ -484,7 +472,7 @@ public boolean seekExact(final BytesRef target) throws IOException {
}

// validIndexPrefix = targetUpto;
validIndexPrefix = currentFrame.prefix;
validIndexPrefix = currentFrame.prefixLength;

currentFrame.scanToFloorFrame(target);

Expand Down Expand Up @@ -600,28 +588,16 @@ public SeekStatus seekCeil(final BytesRef target) throws IOException {
}

if (cmp == 0) {
final int targetUptoMid = targetUpto;
// Second compare the rest of the term, but
// don't save arc/output/frame:
final int targetLimit2 = Math.min(target.length, term.length());
while (targetUpto < targetLimit2) {
cmp =
(term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) {
// System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit
// + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto])
// + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
// }
if (cmp != 0) {
break;
}
targetUpto++;
}

if (cmp == 0) {
cmp = term.length() - target.length;
}
targetUpto = targetUptoMid;
cmp =
Arrays.compareUnsigned(
term.bytes(),
targetUpto,
term.length(),
target.bytes,
target.offset + targetUpto,
target.offset + target.length);
}

if (cmp < 0) {
Expand Down Expand Up @@ -710,7 +686,7 @@ public SeekStatus seekCeil(final BytesRef target) throws IOException {
// toHex(targetLabel));
// }

validIndexPrefix = currentFrame.prefix;
validIndexPrefix = currentFrame.prefixLength;
// validIndexPrefix = targetUpto;

currentFrame.scanToFloorFrame(target);
Expand Down Expand Up @@ -771,7 +747,7 @@ public SeekStatus seekCeil(final BytesRef target) throws IOException {
}

// validIndexPrefix = targetUpto;
validIndexPrefix = currentFrame.prefix;
validIndexPrefix = currentFrame.prefixLength;

currentFrame.scanToFloorFrame(target);

Expand Down Expand Up @@ -809,7 +785,7 @@ private void printSeekState(PrintStream out) throws IOException {
while (true) {
OrdsSegmentTermsEnumFrame f = getFrame(ord);
assert f != null;
final BytesRef prefix = new BytesRef(term.bytes(), 0, f.prefix);
final BytesRef prefix = new BytesRef(term.bytes(), 0, f.prefixLength);
if (f.nextEnt == -1) {
out.println(
" frame "
Expand All @@ -820,7 +796,7 @@ private void printSeekState(PrintStream out) throws IOException {
+ f.fp
+ (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
+ " prefixLen="
+ f.prefix
+ f.prefixLength
+ " prefix="
+ ToStringUtils.bytesRefToString(prefix)
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
Expand Down Expand Up @@ -850,7 +826,7 @@ private void printSeekState(PrintStream out) throws IOException {
+ f.fp
+ (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
+ " prefixLen="
+ f.prefix
+ f.prefixLength
+ " prefix="
+ ToStringUtils.bytesRefToString(prefix)
+ " nextEnt="
Expand All @@ -877,12 +853,14 @@ private void printSeekState(PrintStream out) throws IOException {
}
if (fr.index != null) {
assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
if (f.prefix > 0 && isSeekFrame && f.arc.label() != (term.byteAt(f.prefix - 1) & 0xFF)) {
if (f.prefixLength > 0
&& isSeekFrame
&& f.arc.label() != (term.byteAt(f.prefixLength - 1) & 0xFF)) {
out.println(
" broken seek state: arc.label="
+ (char) f.arc.label()
+ " vs term byte="
+ (char) (term.byteAt(f.prefix - 1) & 0xFF));
+ (char) (term.byteAt(f.prefixLength - 1) & 0xFF));
throw new RuntimeException("seek state is broken");
}
Output output = Util.get(fr.index, prefix);
Expand Down Expand Up @@ -911,7 +889,7 @@ private void printSeekState(PrintStream out) throws IOException {
if (f == currentFrame) {
break;
}
if (f.prefix == validIndexPrefix) {
if (f.prefixLength == validIndexPrefix) {
isSeekFrame = false;
}
ord++;
Expand Down Expand Up @@ -993,7 +971,7 @@ public BytesRef next() throws IOException {

// Note that the seek state (last seek) has been
// invalidated beyond this depth
validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefix);
validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefixLength);
// if (DEBUG) {
// System.out.println(" reset validIndexPrefix=" + validIndexPrefix);
// }
Expand Down
Loading