Skip to content

Commit 201c279

Browse files
authored
Use Arrays.compareUnsigned in IDVersionSegmentTermsEnum and OrdsSegmentTermsEnum. (#13782)
Replace handwritten loops compare with Arrays.compareUnsigned in TermsEnum and TermsEnumFrame classes.
1 parent eb3f27b commit 201c279

File tree

5 files changed

+247
-394
lines changed

5 files changed

+247
-394
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ Improvements
8989

9090
Optimizations
9191
---------------------
92+
* GITHUB#13782: Replace handwritten loops compare with Arrays.compareUnsigned in TermsEnum and TermsEnumFrame classes. (Zhou Hui)
93+
9294
* GITHUB#14011: Reduce allocation rate in HNSW concurrent merge. (Viliam Durina)
9395
* GITHUB#14022: Optimize DFS marking of connected components in HNSW by reducing stack depth, improving performance and reducing allocations. (Viswanath Kuchibhotla)
9496

lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java

Lines changed: 33 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import java.io.IOException;
2020
import java.io.PrintStream;
21+
import java.util.Arrays;
2122
import org.apache.lucene.codecs.BlockTermState;
2223
import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
2324
import org.apache.lucene.index.BaseTermsEnum;
@@ -183,19 +184,19 @@ OrdsSegmentTermsEnumFrame pushFrame(FST.Arc<Output> arc, long fp, int length, lo
183184
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
184185
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
185186
// term.length + " vs prefix=" + f.prefix);
186-
if (f.prefix > targetBeforeCurrentLength) {
187+
if (f.prefixLength > targetBeforeCurrentLength) {
187188
// System.out.println(" do rewind!");
188189
f.rewind();
189190
} else {
190191
// if (DEBUG) {
191192
// System.out.println(" skip rewind!");
192193
// }
193194
}
194-
assert length == f.prefix;
195+
assert length == f.prefixLength;
195196
assert termOrd == f.termOrdOrig;
196197
} else {
197198
f.nextEnt = -1;
198-
f.prefix = length;
199+
f.prefixLength = length;
199200
f.state.termBlockOrd = 0;
200201
f.termOrdOrig = termOrd;
201202
// System.out.println("set termOrdOrig=" + termOrd);
@@ -308,31 +309,18 @@ public boolean seekExact(final BytesRef target) throws IOException {
308309
}
309310

310311
if (cmp == 0) {
311-
final int targetUptoMid = targetUpto;
312-
313312
// Second compare the rest of the term, but
314313
// don't save arc/output/frame; we only do this
315314
// to find out if the target term is before,
316315
// equal or after the current term
317-
final int targetLimit2 = Math.min(target.length, term.length());
318-
while (targetUpto < targetLimit2) {
319-
cmp =
320-
(term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
321-
// if (DEBUG) {
322-
// System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" +
323-
// targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset +
324-
// targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
325-
// }
326-
if (cmp != 0) {
327-
break;
328-
}
329-
targetUpto++;
330-
}
331-
332-
if (cmp == 0) {
333-
cmp = term.length() - target.length;
334-
}
335-
targetUpto = targetUptoMid;
316+
cmp =
317+
Arrays.compareUnsigned(
318+
term.bytes(),
319+
targetUpto,
320+
term.length(),
321+
target.bytes,
322+
target.offset + targetUpto,
323+
target.offset + target.length);
336324
}
337325

338326
if (cmp < 0) {
@@ -424,7 +412,7 @@ public boolean seekExact(final BytesRef target) throws IOException {
424412
// toHex(targetLabel));
425413
// }
426414

427-
validIndexPrefix = currentFrame.prefix;
415+
validIndexPrefix = currentFrame.prefixLength;
428416
// validIndexPrefix = targetUpto;
429417

430418
currentFrame.scanToFloorFrame(target);
@@ -484,7 +472,7 @@ public boolean seekExact(final BytesRef target) throws IOException {
484472
}
485473

486474
// validIndexPrefix = targetUpto;
487-
validIndexPrefix = currentFrame.prefix;
475+
validIndexPrefix = currentFrame.prefixLength;
488476

489477
currentFrame.scanToFloorFrame(target);
490478

@@ -600,28 +588,16 @@ public SeekStatus seekCeil(final BytesRef target) throws IOException {
600588
}
601589

602590
if (cmp == 0) {
603-
final int targetUptoMid = targetUpto;
604591
// Second compare the rest of the term, but
605592
// don't save arc/output/frame:
606-
final int targetLimit2 = Math.min(target.length, term.length());
607-
while (targetUpto < targetLimit2) {
608-
cmp =
609-
(term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
610-
// if (DEBUG) {
611-
// System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit
612-
// + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto])
613-
// + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
614-
// }
615-
if (cmp != 0) {
616-
break;
617-
}
618-
targetUpto++;
619-
}
620-
621-
if (cmp == 0) {
622-
cmp = term.length() - target.length;
623-
}
624-
targetUpto = targetUptoMid;
593+
cmp =
594+
Arrays.compareUnsigned(
595+
term.bytes(),
596+
targetUpto,
597+
term.length(),
598+
target.bytes,
599+
target.offset + targetUpto,
600+
target.offset + target.length);
625601
}
626602

627603
if (cmp < 0) {
@@ -710,7 +686,7 @@ public SeekStatus seekCeil(final BytesRef target) throws IOException {
710686
// toHex(targetLabel));
711687
// }
712688

713-
validIndexPrefix = currentFrame.prefix;
689+
validIndexPrefix = currentFrame.prefixLength;
714690
// validIndexPrefix = targetUpto;
715691

716692
currentFrame.scanToFloorFrame(target);
@@ -771,7 +747,7 @@ public SeekStatus seekCeil(final BytesRef target) throws IOException {
771747
}
772748

773749
// validIndexPrefix = targetUpto;
774-
validIndexPrefix = currentFrame.prefix;
750+
validIndexPrefix = currentFrame.prefixLength;
775751

776752
currentFrame.scanToFloorFrame(target);
777753

@@ -809,7 +785,7 @@ private void printSeekState(PrintStream out) throws IOException {
809785
while (true) {
810786
OrdsSegmentTermsEnumFrame f = getFrame(ord);
811787
assert f != null;
812-
final BytesRef prefix = new BytesRef(term.bytes(), 0, f.prefix);
788+
final BytesRef prefix = new BytesRef(term.bytes(), 0, f.prefixLength);
813789
if (f.nextEnt == -1) {
814790
out.println(
815791
" frame "
@@ -820,7 +796,7 @@ private void printSeekState(PrintStream out) throws IOException {
820796
+ f.fp
821797
+ (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
822798
+ " prefixLen="
823-
+ f.prefix
799+
+ f.prefixLength
824800
+ " prefix="
825801
+ ToStringUtils.bytesRefToString(prefix)
826802
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
@@ -850,7 +826,7 @@ private void printSeekState(PrintStream out) throws IOException {
850826
+ f.fp
851827
+ (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
852828
+ " prefixLen="
853-
+ f.prefix
829+
+ f.prefixLength
854830
+ " prefix="
855831
+ ToStringUtils.bytesRefToString(prefix)
856832
+ " nextEnt="
@@ -877,12 +853,14 @@ private void printSeekState(PrintStream out) throws IOException {
877853
}
878854
if (fr.index != null) {
879855
assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
880-
if (f.prefix > 0 && isSeekFrame && f.arc.label() != (term.byteAt(f.prefix - 1) & 0xFF)) {
856+
if (f.prefixLength > 0
857+
&& isSeekFrame
858+
&& f.arc.label() != (term.byteAt(f.prefixLength - 1) & 0xFF)) {
881859
out.println(
882860
" broken seek state: arc.label="
883861
+ (char) f.arc.label()
884862
+ " vs term byte="
885-
+ (char) (term.byteAt(f.prefix - 1) & 0xFF));
863+
+ (char) (term.byteAt(f.prefixLength - 1) & 0xFF));
886864
throw new RuntimeException("seek state is broken");
887865
}
888866
Output output = Util.get(fr.index, prefix);
@@ -911,7 +889,7 @@ private void printSeekState(PrintStream out) throws IOException {
911889
if (f == currentFrame) {
912890
break;
913891
}
914-
if (f.prefix == validIndexPrefix) {
892+
if (f.prefixLength == validIndexPrefix) {
915893
isSeekFrame = false;
916894
}
917895
ord++;
@@ -993,7 +971,7 @@ public BytesRef next() throws IOException {
993971

994972
// Note that the seek state (last seek) has been
995973
// invalidated beyond this depth
996-
validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefix);
974+
validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefixLength);
997975
// if (DEBUG) {
998976
// System.out.println(" reset validIndexPrefix=" + validIndexPrefix);
999977
// }

0 commit comments

Comments
 (0)