Skip to content

Commit 48ff29c

Browse files
authored
LUCENE-9983: Stop sorting determinize powersets unnecessarily (#163)
* LUCENE-9983: Stop sorting determinize powersets unnecessarily
1 parent 1d5d458 commit 48ff29c

File tree

11 files changed

+1938
-216
lines changed

11 files changed

+1938
-216
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,8 @@ Improvements
368368
cases like det(rev(regexp("(.*a){2000}"))) that spend lots of effort but
369369
result in smallish eventual returned automata. (Robert Muir, Mike McCandless)
370370

371+
* LUCENE-9983: Stop sorting determinize powersets unnecessarily. (Patrick Zhai)
372+
371373
Optimizations
372374
---------------------
373375
* LUCENE-9996: Improved memory efficiency of IndexWriter's RAM buffer, in

lucene/core/src/java/org/apache/lucene/util/automaton/FrozenIntSet.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@
2020

2121
final class FrozenIntSet extends IntSet {
2222
final int[] values;
23-
final int hashCode;
2423
final int state;
24+
final long hashCode;
2525

26-
FrozenIntSet(int[] values, int hashCode, int state) {
26+
FrozenIntSet(int[] values, long hashCode, int state) {
2727
this.values = values;
28-
this.hashCode = hashCode;
2928
this.state = state;
29+
this.hashCode = hashCode;
3030
}
3131

3232
@Override
@@ -40,7 +40,7 @@ int size() {
4040
}
4141

4242
@Override
43-
public int hashCode() {
43+
long longHashCode() {
4444
return hashCode;
4545
}
4646

lucene/core/src/java/org/apache/lucene/util/automaton/IntSet.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,19 @@ abstract class IntSet {
3636
*/
3737
abstract int size();
3838

39+
abstract long longHashCode();
40+
3941
@Override
40-
public abstract int hashCode();
42+
public int hashCode() {
43+
return Long.hashCode(longHashCode());
44+
}
4145

4246
@Override
4347
public boolean equals(Object o) {
4448
if (this == o) return true;
4549
if (!(o instanceof IntSet)) return false;
4650
IntSet that = (IntSet) o;
47-
return hashCode() == that.hashCode()
51+
return longHashCode() == that.longHashCode()
4852
&& Arrays.equals(getArray(), 0, size(), that.getArray(), 0, that.size());
4953
}
5054
}

lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import org.apache.lucene.util.IntsRef;
4848
import org.apache.lucene.util.IntsRefBuilder;
4949
import org.apache.lucene.util.RamUsageEstimator;
50+
import org.apache.lucene.util.hppc.BitMixer;
5051

5152
/**
5253
* Automata operations.
@@ -690,7 +691,7 @@ public static Automaton determinize(Automaton a, int workLimit) {
690691
// a.writeDot("/l/la/lucene/core/detin.dot");
691692

692693
// Same initial values and state will always have the same hashCode
693-
FrozenIntSet initialset = new FrozenIntSet(new int[] {0}, 683, 0);
694+
FrozenIntSet initialset = new FrozenIntSet(new int[] {0}, BitMixer.mix(0) + 1, 0);
694695

695696
// Create state 0:
696697
b.createState();
@@ -706,8 +707,8 @@ public static Automaton determinize(Automaton a, int workLimit) {
706707
// like Set<Integer,PointTransitions>
707708
final PointTransitionSet points = new PointTransitionSet();
708709

709-
// like SortedMap<Integer,Integer>
710-
final SortedIntSet statesSet = new SortedIntSet(5);
710+
// like HashMap<Integer,Integer>, maps state to its count
711+
final StateSet statesSet = new StateSet(5);
711712

712713
Transition t = new Transition();
713714

@@ -759,11 +760,9 @@ public static Automaton determinize(Automaton a, int workLimit) {
759760

760761
final int point = points.points[i].point;
761762

762-
if (statesSet.upto > 0) {
763+
if (statesSet.size() > 0) {
763764
assert lastPoint != -1;
764765

765-
statesSet.computeHash();
766-
767766
Integer q = newstate.get(statesSet);
768767
if (q == null) {
769768
q = b.createState();
@@ -812,7 +811,7 @@ public static Automaton determinize(Automaton a, int workLimit) {
812811
points.points[i].starts.next = 0;
813812
}
814813
points.reset();
815-
assert statesSet.upto == 0 : "upto=" + statesSet.upto;
814+
assert statesSet.size() == 0 : "size=" + statesSet.size();
816815
}
817816

818817
Automaton result = b.finish();

lucene/core/src/java/org/apache/lucene/util/automaton/SortedIntSet.java

Lines changed: 0 additions & 187 deletions
This file was deleted.

0 commit comments

Comments
 (0)