Skip to content

Commit 90534ee

Browse files
Anh Dung Buidungba88
authored andcommitted
Allow FST builder to use different writer (#12543)
1 parent 05d26ac commit 90534ee

File tree

4 files changed

+197
-50
lines changed

4 files changed

+197
-50
lines changed

lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,12 @@
2121
import java.util.List;
2222
import org.apache.lucene.store.DataInput;
2323
import org.apache.lucene.store.DataOutput;
24-
import org.apache.lucene.util.Accountable;
2524
import org.apache.lucene.util.RamUsageEstimator;
2625

2726
// TODO: merge with PagedBytes, except PagedBytes doesn't
2827
// let you read while writing which FST needs
2928

30-
class BytesStore extends DataOutput implements Accountable {
29+
class BytesStore extends DataOutput implements FSTWriter {
3130

3231
private static final long BASE_RAM_BYTES_USED =
3332
RamUsageEstimator.shallowSizeOfInstance(BytesStore.class)
@@ -329,6 +328,7 @@ public void skipBytes(int len) {
329328
}
330329
}
331330

331+
@Override
332332
public long getPosition() {
333333
return ((long) blocks.size() - 1) * blockSize + nextWrite;
334334
}
@@ -355,6 +355,7 @@ public void truncate(long newLen) {
355355
assert newLen == getPosition();
356356
}
357357

358+
@Override
358359
public void finish() {
359360
if (current != null) {
360361
byte[] lastBuffer = new byte[nextWrite];
@@ -364,7 +365,15 @@ public void finish() {
364365
}
365366
}
366367

368+
/** Writes all of our bytes to the target {@link FSTWriter}. */
369+
public void writeTo(FSTWriter out) throws IOException {
370+
for (byte[] block : blocks) {
371+
out.writeBytes(block, 0, block.length);
372+
}
373+
}
374+
367375
/** Writes all of our bytes to the target {@link DataOutput}. */
376+
@Override
368377
public void writeTo(DataOutput out) throws IOException {
369378
for (byte[] block : blocks) {
370379
out.writeBytes(block, 0, block.length);
@@ -437,10 +446,16 @@ public boolean reversed() {
437446
};
438447
}
439448

449+
@Override
440450
public FST.BytesReader getReverseReader() {
441451
return getReverseReader(true);
442452
}
443453

454+
@Override
455+
public FST.BytesReader getReverseReaderForSuffixSharing() {
456+
return getReverseReader(false);
457+
}
458+
444459
FST.BytesReader getReverseReader(boolean allowSingle) {
445460
if (allowSingle && blocks.size() == 1) {
446461
return new ReverseBytesReader(blocks.get(0));

lucene/core/src/java/org/apache/lucene/util/fst/FST.java

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ public enum INPUT_TYPE {
120120
* A {@link BytesStore}, used during building, or during reading when the FST is very large (more
121121
* than 1 GB). If the FST is less than 1 GB then bytesArray is set instead.
122122
*/
123-
final BytesStore bytes;
123+
final FSTWriter fstWriter;
124124

125125
private final FSTStore fstStore;
126126

@@ -395,14 +395,18 @@ private static boolean flag(int flags, int bit) {
395395
}
396396

397397
// make a new empty FST, for building; Builder invokes this
398-
FST(INPUT_TYPE inputType, Outputs<T> outputs, int bytesPageBits) {
398+
FST(INPUT_TYPE inputType, Outputs<T> outputs, FSTWriter fstWriter) {
399399
this.inputType = inputType;
400400
this.outputs = outputs;
401401
fstStore = null;
402-
bytes = new BytesStore(bytesPageBits);
402+
this.fstWriter = fstWriter;
403403
// pad: ensure no node gets address 0 which is reserved to mean
404404
// the stop state w/ no arcs
405-
bytes.writeByte((byte) 0);
405+
try {
406+
this.fstWriter.writeByte((byte) 0);
407+
} catch (IOException e) {
408+
throw new RuntimeException(e);
409+
}
406410
emptyOutput = null;
407411
this.version = VERSION_CURRENT;
408412
}
@@ -420,7 +424,7 @@ public FST(DataInput metaIn, DataInput in, Outputs<T> outputs) throws IOExceptio
420424
*/
421425
public FST(DataInput metaIn, DataInput in, Outputs<T> outputs, FSTStore fstStore)
422426
throws IOException {
423-
bytes = null;
427+
fstWriter = null;
424428
this.fstStore = fstStore;
425429
this.outputs = outputs;
426430

@@ -472,7 +476,7 @@ public long ramBytesUsed() {
472476
if (this.fstStore != null) {
473477
size += this.fstStore.ramBytesUsed();
474478
} else {
475-
size += bytes.ramBytesUsed();
479+
size += this.fstWriter.ramBytesUsed();
476480
}
477481

478482
return size;
@@ -484,19 +488,19 @@ public String toString() {
484488
}
485489

486490
void finish(long newStartNode) throws IOException {
487-
assert newStartNode <= bytes.getPosition();
491+
assert newStartNode <= fstWriter.getPosition();
488492
if (startNode != -1) {
489493
throw new IllegalStateException("already finished");
490494
}
491495
if (newStartNode == FINAL_END_NODE && emptyOutput != null) {
492496
newStartNode = 0;
493497
}
494498
startNode = newStartNode;
495-
bytes.finish();
499+
fstWriter.finish();
496500
}
497501

498502
public long numBytes() {
499-
return bytes.getPosition();
503+
return fstWriter.getPosition();
500504
}
501505

502506
public T getEmptyOutput() {
@@ -512,6 +516,21 @@ void setEmptyOutput(T v) {
512516
}
513517

514518
public void save(DataOutput metaOut, DataOutput out) throws IOException {
519+
saveMetadata(metaOut);
520+
if (fstWriter != null) {
521+
fstWriter.writeTo(out);
522+
} else {
523+
assert fstStore != null;
524+
fstStore.writeTo(out);
525+
}
526+
}
527+
528+
/**
529+
* Save the metadata to a DataOutput
530+
*
531+
* @param metaOut the DataOutput to save
532+
*/
533+
public void saveMetadata(DataOutput metaOut) throws IOException {
515534
if (startNode == -1) {
516535
throw new IllegalStateException("call finish first");
517536
}
@@ -552,13 +571,9 @@ public void save(DataOutput metaOut, DataOutput out) throws IOException {
552571
}
553572
metaOut.writeByte(t);
554573
metaOut.writeVLong(startNode);
555-
if (bytes != null) {
556-
long numBytes = bytes.getPosition();
574+
if (fstWriter != null) {
575+
long numBytes = fstWriter.getPosition();
557576
metaOut.writeVLong(numBytes);
558-
bytes.writeTo(out);
559-
} else {
560-
assert fstStore != null;
561-
fstStore.writeTo(out);
562577
}
563578
}
564579

@@ -1131,7 +1146,7 @@ public BytesReader getBytesReader() {
11311146
if (this.fstStore != null) {
11321147
return this.fstStore.getReverseBytesReader();
11331148
} else {
1134-
return bytes.getReverseReader();
1149+
return fstWriter.getReverseReader();
11351150
}
11361151
}
11371152

0 commit comments

Comments
 (0)