Skip to content

Commit d020359

Browse files
dungba88mikemccand
authored andcommitted
Consolidate FSTStore and BytesStore in FST (#12709)
* Remove direct dependency of NodeHash to FST * Fix index out of bounds when writing FST to different metaOut (#12697) * Tidify code * Update CHANGES.txt * Re-add assertion * Remove direct dependency of NodeHash to FST * Hold off the FSTTraversal changes * Rename variable * Add Javadoc * Add @OverRide * tidy * tidy * Change to FSTReader * Update CHANGES.txt
1 parent 196800c commit d020359

File tree

8 files changed

+87
-65
lines changed

8 files changed

+87
-65
lines changed

lucene/CHANGES.txt

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,6 @@ API Changes
1818
* GITHUB#12592: Add RandomAccessInput#length method to the RandomAccessInput interface. In addition deprecate
1919
ByteBuffersDataInput#size in favour of this new method. (Ignacio Vera)
2020

21-
* GITHUB#12646: Move FST#addNode to FSTCompiler to avoid a circular dependency between FST and FSTCompiler
22-
(Anh Dung Bui via Alan Woodward)
23-
2421
* GITHUB#12718: Make IndexSearcher#getSlices final as it is not expected to be overridden (Luca Cavanna)
2522

2623
* GITHUB#12427: Automata#makeStringUnion #makeBinaryStringUnion now accept Iterable<BytesRef> instead of
@@ -32,6 +29,12 @@ API Changes
3229

3330
* GITHUB#12816: Add HumanReadableQuery which takes a description parameter for debugging purposes. (Jakub Slowinski)
3431

32+
* GITHUB#12646, GITHUB#12690: Move FST#addNode to FSTCompiler to avoid a circular dependency
33+
between FST and FSTCompiler (Anh Dung Bui)
34+
35+
* GITHUB#12709 Consolidate FSTStore and BytesStore in FST. Created FSTReader which contains the common methods
36+
of the two (Anh Dung Bui)
37+
3538
New Features
3639
---------------------
3740

@@ -108,7 +111,7 @@ Optimizations
108111

109112
* GITHUB#12382: Faster top-level conjunctions on term queries when sorting by
110113
descending score. (Adrien Grand)
111-
114+
112115
* GITHUB#12591: Use stable radix sort to speed up the sorting of update terms. (Guo Feng)
113116

114117
* GITHUB#12587: Use radix sort to speed up the sorting of terms in TermInSetQuery. (Guo Feng)
@@ -123,7 +126,7 @@ Optimizations
123126

124127
* GITHUB#12668: ImpactsEnums now decode frequencies lazily like PostingsEnums.
125128
(Adrien Grand)
126-
129+
127130
* GITHUB#12651: Use 2d array for OnHeapHnswGraph representation. (Patrick Zhai)
128131

129132
* GITHUB#12653: Optimize computing number of levels in MultiLevelSkipListWriter#bufferSkip. (Shubham Chaudhary)

lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,12 @@
2121
import java.util.List;
2222
import org.apache.lucene.store.DataInput;
2323
import org.apache.lucene.store.DataOutput;
24-
import org.apache.lucene.util.Accountable;
2524
import org.apache.lucene.util.RamUsageEstimator;
2625

2726
// TODO: merge with PagedBytes, except PagedBytes doesn't
2827
// let you read while writing which FST needs
2928

30-
class BytesStore extends DataOutput implements Accountable {
29+
class BytesStore extends DataOutput implements FSTReader {
3130

3231
private static final long BASE_RAM_BYTES_USED =
3332
RamUsageEstimator.shallowSizeOfInstance(BytesStore.class)
@@ -333,6 +332,11 @@ public long getPosition() {
333332
return ((long) blocks.size() - 1) * blockSize + nextWrite;
334333
}
335334

335+
@Override
336+
public long size() {
337+
return getPosition();
338+
}
339+
336340
/**
337341
* Pos must be less than the max position written so far! Ie, you cannot "grow" the file with
338342
* this!
@@ -365,6 +369,7 @@ public void finish() {
365369
}
366370

367371
/** Writes all of our bytes to the target {@link DataOutput}. */
372+
@Override
368373
public void writeTo(DataOutput out) throws IOException {
369374
for (byte[] block : blocks) {
370375
out.writeBytes(block, 0, block.length);
@@ -437,7 +442,8 @@ public boolean reversed() {
437442
};
438443
}
439444

440-
public FST.BytesReader getReverseReader() {
445+
@Override
446+
public FST.BytesReader getReverseBytesReader() {
441447
return getReverseReader(true);
442448
}
443449

lucene/core/src/java/org/apache/lucene/util/fst/FST.java

Lines changed: 12 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,7 @@ public enum INPUT_TYPE {
123123
* A {@link BytesStore}, used during building, or during reading when the FST is very large (more
124124
* than 1 GB). If the FST is less than 1 GB then bytesArray is set instead.
125125
*/
126-
final BytesStore bytes;
127-
128-
private final FSTStore fstStore;
126+
private final FSTReader fstReader;
129127

130128
private long startNode = -1;
131129

@@ -398,15 +396,11 @@ private static boolean flag(int flags, int bit) {
398396
}
399397

400398
// make a new empty FST, for building; Builder invokes this
401-
FST(INPUT_TYPE inputType, Outputs<T> outputs, int bytesPageBits) {
399+
FST(INPUT_TYPE inputType, Outputs<T> outputs, FSTReader fstReader) {
402400
this.inputType = inputType;
403401
this.outputs = outputs;
404-
fstStore = null;
405-
bytes = new BytesStore(bytesPageBits);
406-
// pad: ensure no node gets address 0 which is reserved to mean
407-
// the stop state w/ no arcs
408-
bytes.writeByte((byte) 0);
409402
emptyOutput = null;
403+
this.fstReader = fstReader;
410404
this.version = VERSION_CURRENT;
411405
}
412406

@@ -423,8 +417,6 @@ public FST(DataInput metaIn, DataInput in, Outputs<T> outputs) throws IOExceptio
423417
*/
424418
public FST(DataInput metaIn, DataInput in, Outputs<T> outputs, FSTStore fstStore)
425419
throws IOException {
426-
bytes = null;
427-
this.fstStore = fstStore;
428420
this.outputs = outputs;
429421

430422
// NOTE: only reads formats VERSION_START up to VERSION_CURRENT; we don't have
@@ -438,7 +430,7 @@ public FST(DataInput metaIn, DataInput in, Outputs<T> outputs, FSTStore fstStore
438430
emptyBytes.copyBytes(metaIn, numBytes);
439431

440432
// De-serialize empty-string output:
441-
BytesReader reader = emptyBytes.getReverseReader();
433+
BytesReader reader = emptyBytes.getReverseBytesReader();
442434
// NoOutputs uses 0 bytes when writing its output,
443435
// so we have to check here else BytesStore gets
444436
// angry:
@@ -466,19 +458,13 @@ public FST(DataInput metaIn, DataInput in, Outputs<T> outputs, FSTStore fstStore
466458
startNode = metaIn.readVLong();
467459

468460
long numBytes = metaIn.readVLong();
469-
this.fstStore.init(in, numBytes);
461+
fstStore.init(in, numBytes);
462+
this.fstReader = fstStore;
470463
}
471464

472465
@Override
473466
public long ramBytesUsed() {
474-
long size = BASE_RAM_BYTES_USED;
475-
if (this.fstStore != null) {
476-
size += this.fstStore.ramBytesUsed();
477-
} else {
478-
size += bytes.ramBytesUsed();
479-
}
480-
481-
return size;
467+
return BASE_RAM_BYTES_USED + fstReader.ramBytesUsed();
482468
}
483469

484470
@Override
@@ -487,19 +473,18 @@ public String toString() {
487473
}
488474

489475
void finish(long newStartNode) throws IOException {
490-
assert newStartNode <= bytes.getPosition();
476+
assert newStartNode <= fstReader.size();
491477
if (startNode != -1) {
492478
throw new IllegalStateException("already finished");
493479
}
494480
if (newStartNode == FINAL_END_NODE && emptyOutput != null) {
495481
newStartNode = 0;
496482
}
497483
startNode = newStartNode;
498-
bytes.finish();
499484
}
500485

501486
public long numBytes() {
502-
return bytes.getPosition();
487+
return fstReader.size();
503488
}
504489

505490
public T getEmptyOutput() {
@@ -555,16 +540,8 @@ public void save(DataOutput metaOut, DataOutput out) throws IOException {
555540
}
556541
metaOut.writeByte(t);
557542
metaOut.writeVLong(startNode);
558-
if (bytes != null) {
559-
long numBytes = bytes.getPosition();
560-
metaOut.writeVLong(numBytes);
561-
bytes.writeTo(out);
562-
} else {
563-
assert fstStore != null;
564-
long numBytes = fstStore.size();
565-
metaOut.writeVLong(numBytes);
566-
fstStore.writeTo(out);
567-
}
543+
metaOut.writeVLong(numBytes());
544+
fstReader.writeTo(out);
568545
}
569546

570547
/** Writes an automaton to a file. */
@@ -1141,11 +1118,7 @@ private void seekToNextNode(BytesReader in) throws IOException {
11411118

11421119
/** Returns a {@link BytesReader} for this FST, positioned at position 0. */
11431120
public BytesReader getBytesReader() {
1144-
if (this.fstStore != null) {
1145-
return this.fstStore.getReverseBytesReader();
1146-
} else {
1147-
return bytes.getReverseReader();
1148-
}
1121+
return fstReader.getReverseBytesReader();
11491122
}
11501123

11511124
/** Reads bytes stored in an FST. */

lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,11 @@ private FSTCompiler(
137137
float directAddressingMaxOversizingFactor) {
138138
this.allowFixedLengthArcs = allowFixedLengthArcs;
139139
this.directAddressingMaxOversizingFactor = directAddressingMaxOversizingFactor;
140-
fst = new FST<>(inputType, outputs, bytesPageBits);
141-
bytes = fst.bytes;
142-
assert bytes != null;
140+
bytes = new BytesStore(bytesPageBits);
141+
// pad: ensure no node gets address 0 which is reserved to mean
142+
// the stop state w/ no arcs
143+
bytes.writeByte((byte) 0);
144+
fst = new FST<>(inputType, outputs, bytes);
143145
if (suffixRAMLimitMB < 0) {
144146
throw new IllegalArgumentException("ramLimitMB must be >= 0; got: " + suffixRAMLimitMB);
145147
} else if (suffixRAMLimitMB > 0) {
@@ -317,8 +319,6 @@ private CompiledNode compileNode(UnCompiledNode<T> nodeIn, int tailLength) throw
317319
// serializes new node by appending its bytes to the end
318320
// of the current byte[]
319321
long addNode(FSTCompiler.UnCompiledNode<T> nodeIn) throws IOException {
320-
T NO_OUTPUT = fst.outputs.getNoOutput();
321-
322322
// System.out.println("FST.addNode pos=" + bytes.getPosition() + " numArcs=" + nodeIn.numArcs);
323323
if (nodeIn.numArcs == 0) {
324324
if (nodeIn.isFinal) {
@@ -859,6 +859,7 @@ public FST<T> compile() throws IOException {
859859
// if (DEBUG) System.out.println(" builder.finish root.isFinal=" + root.isFinal + "
860860
// root.output=" + root.output);
861861
fst.finish(compileNode(root, lastInput.length()).node);
862+
bytes.finish();
862863

863864
return fst;
864865
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.lucene.util.fst;
18+
19+
import java.io.IOException;
20+
import org.apache.lucene.store.DataOutput;
21+
import org.apache.lucene.util.Accountable;
22+
23+
/** Abstraction for reading bytes necessary for FST. */
24+
public interface FSTReader extends Accountable {
25+
26+
/**
27+
* The raw size in bytes of the FST
28+
*
29+
* @return the FST size
30+
*/
31+
long size();
32+
33+
/**
34+
* Get the reverse BytesReader for this FST
35+
*
36+
* @return the reverse BytesReader
37+
*/
38+
FST.BytesReader getReverseBytesReader();
39+
40+
/**
41+
* Write this FST to another DataOutput
42+
*
43+
* @param out the DataOutput
44+
* @throws IOException if exception occurred during writing
45+
*/
46+
void writeTo(DataOutput out) throws IOException;
47+
}

lucene/core/src/java/org/apache/lucene/util/fst/FSTStore.java

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,8 @@
1818

1919
import java.io.IOException;
2020
import org.apache.lucene.store.DataInput;
21-
import org.apache.lucene.store.DataOutput;
22-
import org.apache.lucene.util.Accountable;
2321

24-
/** Abstraction for reading/writing bytes necessary for FST. */
25-
public interface FSTStore extends Accountable {
22+
/** A type of {@link FSTReader} which needs data to be initialized before use */
23+
public interface FSTStore extends FSTReader {
2624
void init(DataInput in, long numBytes) throws IOException;
27-
28-
long size();
29-
30-
FST.BytesReader getReverseBytesReader();
31-
32-
void writeTo(DataOutput out) throws IOException;
3325
}

lucene/core/src/java/org/apache/lucene/util/fst/OnHeapFSTStore.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ public FST.BytesReader getReverseBytesReader() {
8888
if (bytesArray != null) {
8989
return new ReverseBytesReader(bytesArray);
9090
} else {
91-
return bytes.getReverseReader();
91+
return bytes.getReverseBytesReader();
9292
}
9393
}
9494

lucene/core/src/test/org/apache/lucene/util/fst/TestBytesStore.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ private void verify(BytesStore bytes, byte[] expected, int totalLength) throws E
271271
System.out.println(" bulk: reversed");
272272
}
273273
// reversed
274-
FST.BytesReader r = bytes.getReverseReader();
274+
FST.BytesReader r = bytes.getReverseBytesReader();
275275
assertTrue(r.reversed());
276276
r.setPosition(totalLength - 1);
277277
r.readBytes(actual, 0, actual.length);
@@ -306,7 +306,7 @@ private void verify(BytesStore bytes, byte[] expected, int totalLength) throws E
306306
if (VERBOSE) {
307307
System.out.println(" ops: reversed");
308308
}
309-
r = bytes.getReverseReader();
309+
r = bytes.getReverseBytesReader();
310310
} else {
311311
if (VERBOSE) {
312312
System.out.println(" ops: forward");

0 commit comments

Comments
 (0)