Skip to content

Commit 5b05fe6

Browse files
committed
Allow FST builder to use different writer
1 parent 4e2ce76 commit 5b05fe6

File tree

10 files changed

+529
-53
lines changed

10 files changed

+529
-53
lines changed

lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
// TODO: merge with PagedBytes, except PagedBytes doesn't
2727
// let you read while writing which FST needs
2828

29-
class BytesStore extends DataOutput implements FSTReader {
29+
// TODO: Separate the scratch writer and reader functionality
30+
class BytesStore extends DataOutput implements FSTReader, Freezeable {
3031

3132
private static final long BASE_RAM_BYTES_USED =
3233
RamUsageEstimator.shallowSizeOfInstance(BytesStore.class)
@@ -40,6 +41,7 @@ class BytesStore extends DataOutput implements FSTReader {
4041

4142
private byte[] current;
4243
private int nextWrite;
44+
private boolean frozen;
4345

4446
public BytesStore(int blockBits) {
4547
this.blockBits = blockBits;
@@ -50,13 +52,15 @@ public BytesStore(int blockBits) {
5052

5153
/** Absolute write byte; you must ensure dest is < max position written so far. */
5254
public void writeByte(long dest, byte b) {
55+
assert frozen == false;
5356
int blockIndex = (int) (dest >> blockBits);
5457
byte[] block = blocks.get(blockIndex);
5558
block[(int) (dest & blockMask)] = b;
5659
}
5760

5861
@Override
5962
public void writeByte(byte b) {
63+
assert frozen == false;
6064
if (nextWrite == blockSize) {
6165
current = new byte[blockSize];
6266
blocks.add(current);
@@ -67,6 +71,7 @@ public void writeByte(byte b) {
6771

6872
@Override
6973
public void writeBytes(byte[] b, int offset, int len) {
74+
assert frozen == false;
7075
while (len > 0) {
7176
int chunk = blockSize - nextWrite;
7277
if (len <= chunk) {
@@ -97,6 +102,7 @@ int getBlockBits() {
97102
* so you must only call it on already written parts.
98103
*/
99104
void writeBytes(long dest, byte[] b, int offset, int len) {
105+
assert frozen == false;
100106
// System.out.println(" BS.writeBytes dest=" + dest + " offset=" + offset + " len=" + len);
101107
assert dest + len <= getPosition() : "dest=" + dest + " pos=" + getPosition() + " len=" + len;
102108

@@ -156,6 +162,7 @@ void writeBytes(long dest, byte[] b, int offset, int len) {
156162

157163
@Override
158164
public void copyBytes(DataInput input, long numBytes) throws IOException {
165+
assert frozen == false;
159166
assert numBytes >= 0 : "numBytes=" + numBytes;
160167
assert input != null;
161168
long len = numBytes;
@@ -180,6 +187,7 @@ public void copyBytes(DataInput input, long numBytes) throws IOException {
180187
* bytes, so must only call it on already written parts.
181188
*/
182189
public void copyBytes(long src, long dest, int len) {
190+
assert frozen == false;
183191
// System.out.println("BS.copyBytes src=" + src + " dest=" + dest + " len=" + len);
184192
assert src < dest;
185193

@@ -238,6 +246,7 @@ public void copyBytes(long src, long dest, int len) {
238246

239247
/** Copies bytes from this store to a target byte array. */
240248
public void copyBytes(long src, byte[] dest, int offset, int len) {
249+
assert frozen == false;
241250
int blockIndex = (int) (src >> blockBits);
242251
int upto = (int) (src & blockMask);
243252
byte[] block = blocks.get(blockIndex);
@@ -259,6 +268,7 @@ public void copyBytes(long src, byte[] dest, int offset, int len) {
259268

260269
/** Writes an int at the absolute position without changing the current pointer. */
261270
public void writeInt(long pos, int value) {
271+
assert frozen == false;
262272
int blockIndex = (int) (pos >> blockBits);
263273
int upto = (int) (pos & blockMask);
264274
byte[] block = blocks.get(blockIndex);
@@ -276,6 +286,7 @@ public void writeInt(long pos, int value) {
276286

277287
/** Reverse from srcPos, inclusive, to destPos, inclusive. */
278288
public void reverse(long srcPos, long destPos) {
289+
assert frozen == false;
279290
assert srcPos < destPos;
280291
assert destPos < getPosition();
281292
// System.out.println("reverse src=" + srcPos + " dest=" + destPos);
@@ -314,6 +325,7 @@ public void reverse(long srcPos, long destPos) {
314325
}
315326

316327
public void skipBytes(int len) {
328+
assert frozen == false;
317329
while (len > 0) {
318330
int chunk = blockSize - nextWrite;
319331
if (len <= chunk) {
@@ -337,11 +349,23 @@ public long size() {
337349
return getPosition();
338350
}
339351

352+
/** Similar to {@link #truncate(long)} with newLen=0 but keep the first block to reduce GC. */
353+
public void reset() {
354+
assert frozen == false;
355+
if (blocks.isEmpty()) {
356+
return;
357+
}
358+
nextWrite = 0;
359+
current = blocks.get(0);
360+
blocks.subList(1, blocks.size()).clear();
361+
}
362+
340363
/**
341364
* Pos must be less than the max position written so far! Ie, you cannot "grow" the file with
342365
* this!
343366
*/
344367
public void truncate(long newLen) {
368+
assert frozen == false;
345369
assert newLen <= getPosition();
346370
assert newLen >= 0;
347371
int blockIndex = (int) (newLen >> blockBits);
@@ -359,7 +383,9 @@ public void truncate(long newLen) {
359383
assert newLen == getPosition();
360384
}
361385

362-
public void finish() {
386+
@Override
387+
public void freeze() {
388+
this.frozen = true;
363389
if (current != null) {
364390
byte[] lastBuffer = new byte[nextWrite];
365391
System.arraycopy(current, 0, lastBuffer, 0, nextWrite);
@@ -368,9 +394,22 @@ public void finish() {
368394
}
369395
}
370396

397+
/** Writes all of our bytes to the target {@link FSTDataOutputWriter}. */
398+
public void writeTo(FSTDataOutputWriter out) throws IOException {
399+
reverse(0, getPosition() - 1);
400+
for (byte[] block : blocks) {
401+
if (block == current) { // last block
402+
out.writeBytes(block, 0, nextWrite);
403+
} else {
404+
out.writeBytes(block, 0, block.length);
405+
}
406+
}
407+
}
408+
371409
/** Writes all of our bytes to the target {@link DataOutput}. */
372410
@Override
373411
public void writeTo(DataOutput out) throws IOException {
412+
assert frozen;
374413
for (byte[] block : blocks) {
375414
out.writeBytes(block, 0, block.length);
376415
}

lucene/core/src/java/org/apache/lucene/util/fst/FST.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,13 @@ public FST(FSTMetadata<T> metadata, DataInput in, Outputs<T> outputs, FSTStore f
435435
this.fstReader = fstReader;
436436
}
437437

438+
/**
439+
* @return true if and only if this FST is readable (i.e. has a reverse BytesReader)
440+
*/
441+
public boolean hasReverseBytesReader() {
442+
return fstReader.getReverseBytesReader() != null;
443+
}
444+
438445
/**
439446
* Read the FST metadata from DataInput
440447
*
@@ -1181,7 +1188,11 @@ private void seekToNextNode(BytesReader in) throws IOException {
11811188
}
11821189
}
11831190

1184-
/** Returns a {@link BytesReader} for this FST, positioned at position 0. */
1191+
/**
1192+
* Returns a {@link BytesReader} for this FST, positioned at position 0.
1193+
*
1194+
* @see #hasReverseBytesReader()
1195+
*/
11851196
public BytesReader getBytesReader() {
11861197
return fstReader.getReverseBytesReader();
11871198
}

0 commit comments

Comments
 (0)