Skip to content

Commit 34e8ec0

Browse files
committed
Split FSTReader and FSTWriter
2 parents b381a34 + 6b8c43e commit 34e8ec0

File tree

6 files changed

+81
-42
lines changed

6 files changed

+81
-42
lines changed

lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
// TODO: merge with PagedBytes, except PagedBytes doesn't
2727
// let you read while writing which FST needs
2828

29-
class BytesStore extends DataOutput implements FSTWriter {
29+
class BytesStore extends DataOutput implements FSTWriter, FSTReader {
3030

3131
private static final long BASE_RAM_BYTES_USED =
3232
RamUsageEstimator.shallowSizeOfInstance(BytesStore.class)
@@ -337,10 +337,7 @@ public long size() {
337337
return getPosition();
338338
}
339339

340-
/**
341-
* Similar to {@link #truncate(long)} with newLen=0 but keep the first block
342-
* to reduce GC.
343-
*/
340+
/** Similar to {@link #truncate(long)} with newLen=0 but keep the first block to reduce GC. */
344341
public void reset() {
345342
if (blocks.isEmpty()) {
346343
return;
@@ -382,6 +379,11 @@ public void finish() {
382379
}
383380
}
384381

382+
@Override
383+
public FSTReader asReader() {
384+
return this;
385+
}
386+
385387
/** Writes all of our bytes to the target {@link FSTWriter}. */
386388
public void writeTo(FSTWriter out) throws IOException {
387389
reverse(0, getPosition() - 1);

lucene/core/src/java/org/apache/lucene/util/fst/DataOutputFSTWriter.java

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,8 @@
2626
* An {@link FSTWriter} which write to a {@link DataOutput}. It only supports writing to the {@link
2727
* DataOutput} and not reading from it To read, you must construct a corresponding {@link
2828
* org.apache.lucene.store.DataInput} and use the {@link FSTStore} to read
29-
*
30-
* @lucene.experimental
3129
*/
32-
public class DataOutputFSTWriter implements FSTWriter {
30+
public class DataOutputFSTWriter implements FSTWriter, FSTReader {
3331

3432
private static final long BASE_RAM_BYTES_USED =
3533
RamUsageEstimator.shallowSizeOfInstance(DataOutputFSTWriter.class);
@@ -47,20 +45,6 @@ public DataOutputFSTWriter(DataOutput dataOutput) {
4745
this.dataOutput = dataOutput;
4846
}
4947

50-
@Override
51-
public long ramBytesUsed() {
52-
long size = BASE_RAM_BYTES_USED;
53-
if (dataOutput instanceof Accountable) {
54-
size += ((Accountable) dataOutput).ramBytesUsed();
55-
}
56-
return size;
57-
}
58-
59-
@Override
60-
public long size() {
61-
return size;
62-
}
63-
6448
@Override
6549
public void writeByte(byte b) throws IOException {
6650
size++;
@@ -80,6 +64,11 @@ public void finish() throws IOException {
8064
}
8165
}
8266

67+
@Override
68+
public FSTReader asReader() {
69+
return this;
70+
}
71+
8372
@Override
8473
public void writeTo(DataOutput out) throws IOException {
8574
throw new UnsupportedOperationException(
@@ -90,4 +79,18 @@ public void writeTo(DataOutput out) throws IOException {
9079
public FST.BytesReader getReverseBytesReader() {
9180
return null;
9281
}
82+
83+
@Override
84+
public long ramBytesUsed() {
85+
long size = BASE_RAM_BYTES_USED;
86+
if (dataOutput instanceof Accountable) {
87+
size += ((Accountable) dataOutput).ramBytesUsed();
88+
}
89+
return size;
90+
}
91+
92+
@Override
93+
public long size() {
94+
return size;
95+
}
9396
}

lucene/core/src/java/org/apache/lucene/util/fst/FST.java

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -408,21 +408,29 @@ public FST(DataInput metaIn, DataInput in, Outputs<T> outputs, FSTStore fstStore
408408
}
409409

410410
/**
411-
* This constructor is private as it only acts as a transition from the DataInput metadata
412-
* constructor to the FSTMetadata one
411+
* Load a previously saved FST with a metdata object and a FSTStore. If using {@link
412+
* OnHeapFSTStore}, setting maxBlockBits allows you to control the size of the byte[] pages used
413+
* to hold the FST bytes.
413414
*/
414415
public FST(FSTMetadata<T> metadata, DataInput in, Outputs<T> outputs, FSTStore fstStore)
415416
throws IOException {
416417
this(metadata, outputs, fstStore.init(in, metadata.numBytes));
417418
}
418419

419-
/** Load a previously saved FST with a metadata object and a FSTReader. */
420+
/** Create the FST with a metadata object and a FSTReader. */
420421
FST(FSTMetadata<T> metadata, Outputs<T> outputs, FSTReader fstReader) {
421422
this.metadata = metadata;
422423
this.outputs = outputs;
423424
this.fstReader = fstReader;
424425
}
425426

427+
/**
428+
* @return true if and only if this FST is readable (i.e. has a reverse BytesReader)
429+
*/
430+
public boolean isReadable() {
431+
return fstReader.getReverseBytesReader() != null;
432+
}
433+
426434
/**
427435
* Read the FST metadata from DataInput
428436
*
@@ -1122,7 +1130,10 @@ private void seekToNextNode(BytesReader in) throws IOException {
11221130
}
11231131
}
11241132

1125-
/** Returns a {@link BytesReader} for this FST, positioned at position 0. */
1133+
/**
1134+
* Returns a {@link BytesReader} for this FST, positioned at position 0. You should check if
1135+
* {@link #isReadable()} == true before calling this method
1136+
*/
11261137
public BytesReader getBytesReader() {
11271138
return fstReader.getReverseBytesReader();
11281139
}

lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,10 @@ public class FSTCompiler<T> {
123123
final float directAddressingMaxOversizingFactor;
124124
long directAddressingExpansionCredit;
125125

126+
// writer for frozen nodes
126127
final FSTWriter fstWriter;
128+
// reader for the frozen nodes
129+
final FSTReader fstReader;
127130

128131
// buffer to store the scratch bytes before writing to the fstWriter
129132
final BytesStore bytes = new BytesStore(DEFAULT_SCRATCH_PAGE_BITS);
@@ -154,9 +157,10 @@ private FSTCompiler(
154157
throw new RuntimeException(e);
155158
}
156159
this.fstWriter = fstWriter;
160+
this.fstReader = fstWriter.asReader();
157161
fst =
158162
new FST<>(
159-
new FST.FSTMetadata<>(inputType, null, -1, VERSION_CURRENT, 0), outputs, fstWriter);
163+
new FST.FSTMetadata<>(inputType, null, -1, VERSION_CURRENT, 0), outputs, fstReader);
160164
if (suffixRAMLimitMB < 0) {
161165
throw new IllegalArgumentException("ramLimitMB must be >= 0; got: " + suffixRAMLimitMB);
162166
} else if (suffixRAMLimitMB > 0) {
@@ -316,7 +320,7 @@ public long getMappedStateCount() {
316320

317321
private CompiledNode compileNode(UnCompiledNode<T> nodeIn) throws IOException {
318322
final long node;
319-
long bytesPosStart = fstWriter.size();
323+
long bytesPosStart = fstReader.size();
320324
if (dedupHash != null) {
321325
if (nodeIn.numArcs == 0) {
322326
node = addNode(nodeIn);
@@ -330,7 +334,7 @@ private CompiledNode compileNode(UnCompiledNode<T> nodeIn) throws IOException {
330334

331335
assert node != -2;
332336

333-
long bytesPosEnd = fstWriter.size();
337+
long bytesPosEnd = fstReader.size();
334338
if (bytesPosEnd != bytesPosStart) {
335339
// The FST added a new node:
336340
assert bytesPosEnd > bytesPosStart;
@@ -492,7 +496,7 @@ long addNode(FSTCompiler.UnCompiledNode<T> nodeIn) throws IOException {
492496
bytes.writeTo(fstWriter);
493497

494498
nodeCount++;
495-
return fstWriter.size() - 1;
499+
return fstReader.size() - 1;
496500
}
497501

498502
private void writeLabel(DataOutput out, int v) throws IOException {
@@ -864,15 +868,15 @@ void setEmptyOutput(T v) {
864868
}
865869

866870
void finish(long newStartNode) {
867-
assert newStartNode <= fstWriter.size();
871+
assert newStartNode <= fstReader.size();
868872
if (fst.metadata.startNode != -1) {
869873
throw new IllegalStateException("already finished");
870874
}
871875
if (newStartNode == FINAL_END_NODE && fst.metadata.emptyOutput != null) {
872876
newStartNode = 0;
873877
}
874878
fst.metadata.startNode = newStartNode;
875-
fst.metadata.numBytes = fstWriter.size();
879+
fst.metadata.numBytes = fstReader.size();
876880
}
877881

878882
private boolean validOutput(T output) {

lucene/core/src/java/org/apache/lucene/util/fst/FSTWriter.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import java.io.IOException;
2020

2121
/** Abstract class which provides low-level functionality to write to a FST */
22-
public interface FSTWriter extends FSTReader {
22+
public interface FSTWriter {
2323

2424
/**
2525
* Write a single byte to the end of this FSTWriter
@@ -46,4 +46,11 @@ public interface FSTWriter extends FSTReader {
4646
default void finish() throws IOException {
4747
// do nothing by default
4848
}
49+
50+
/**
51+
* Get the corresponding {@link FSTReader} of this writer
52+
*
53+
* @return
54+
*/
55+
FSTReader asReader();
4956
}

lucene/core/src/test/org/apache/lucene/util/fst/TestDataOutputFSTWriter.java

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ public void testBasicFSA() throws IOException {
119119
"station", "commotion", "elation", "elastic", "plastic", "stop", "ftop", "ftation"
120120
};
121121
IntsRef[] terms2 = new IntsRef[strings2.length];
122+
// we will also test writing multiple FST to a single byte array
123+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
122124
for (int inputMode = 0; inputMode < 2; inputMode++) {
123125
if (VERBOSE) {
124126
System.out.println("TEST: inputMode=" + inputModeToString(inputMode));
@@ -141,7 +143,7 @@ public void testBasicFSA() throws IOException {
141143
pairs.add(new FSTTester.InputOutput<>(term, NO_OUTPUT));
142144
}
143145
FSTTester<Object> tester =
144-
new DataOutputFSTTester<>(random(), dir, inputMode, pairs, outputs);
146+
new DataOutputFSTTester<>(random(), dir, inputMode, pairs, outputs, baos);
145147
FST<Object> fst = tester.doTest();
146148
assertNotNull(fst);
147149
assertEquals(22, tester.nodeCount);
@@ -155,7 +157,8 @@ public void testBasicFSA() throws IOException {
155157
for (int idx = 0; idx < terms2.length; idx++) {
156158
pairs.add(new FSTTester.InputOutput<>(terms2[idx], (long) idx));
157159
}
158-
FSTTester<Long> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs);
160+
FSTTester<Long> tester =
161+
new DataOutputFSTTester<>(random(), dir, inputMode, pairs, outputs, baos);
159162
final FST<Long> fst = tester.doTest();
160163
assertNotNull(fst);
161164
assertEquals(22, tester.nodeCount);
@@ -170,7 +173,8 @@ public void testBasicFSA() throws IOException {
170173
final BytesRef output = newBytesRef(Integer.toString(idx));
171174
pairs.add(new FSTTester.InputOutput<>(terms2[idx], output));
172175
}
173-
FSTTester<BytesRef> tester = new FSTTester<>(random(), dir, inputMode, pairs, outputs);
176+
FSTTester<BytesRef> tester =
177+
new DataOutputFSTTester<>(random(), dir, inputMode, pairs, outputs, baos);
174178
final FST<BytesRef> fst = tester.doTest();
175179
assertNotNull(fst);
176180
assertEquals(24, tester.nodeCount);
@@ -181,32 +185,40 @@ public void testBasicFSA() throws IOException {
181185

182186
class DataOutputFSTTester<T> extends FSTTester<T> {
183187

184-
private ByteArrayOutputStream baos = new ByteArrayOutputStream();
188+
private final ByteArrayOutputStream baos;
189+
private int previousOffset;
185190

186191
public DataOutputFSTTester(
187192
Random random,
188193
Directory dir,
189194
int inputMode,
190195
List<InputOutput<T>> pairs,
191-
Outputs<T> outputs) {
196+
Outputs<T> outputs,
197+
ByteArrayOutputStream baos) {
192198
super(random, dir, inputMode, pairs, outputs);
199+
this.baos = baos;
193200
}
194201

195202
@Override
196203
protected FSTCompiler.Builder<T> getFSTBuilder() {
204+
// as the byte array could already contain another FST bytes, we should get the current offset
205+
// to know where to start reading from
206+
this.previousOffset = baos.size();
197207
return super.getFSTBuilder()
198208
.fstWriter(new DataOutputFSTWriter(new OutputStreamDataOutput(baos)));
199209
}
200210

201211
@Override
202212
protected FST<T> compile(FSTCompiler<T> fstCompiler) throws IOException {
203213
FST<T> fst = fstCompiler.compile();
214+
assertFalse(fst.isReadable());
204215

205216
// the returned FST is not readable thus we need to reconstruct one with FSTStore
206-
DataInput dataIn = new InputStreamDataInput(new ByteArrayInputStream(baos.toByteArray()));
207-
FST.FSTMetadata<T> metadata = fst.getMetadata();
208-
OnHeapFSTStore fstStore = new OnHeapFSTStore(5);
209-
return new FST<>(metadata, outputs, fstStore.init(dataIn, metadata.numBytes));
217+
DataInput dataIn =
218+
new InputStreamDataInput(
219+
new ByteArrayInputStream(
220+
baos.toByteArray(), previousOffset, baos.size() - previousOffset));
221+
return new FST<>(fst.getMetadata(), dataIn, outputs, new OnHeapFSTStore(5));
210222
}
211223
}
212224

0 commit comments

Comments
 (0)