Skip to content

Commit cea2933

Browse files
committed
Decouple the reader functionality from BytesStore
1 parent ebc138c commit cea2933

File tree

5 files changed

+56
-14
lines changed

5 files changed

+56
-14
lines changed
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package org.apache.lucene.util.fst;
2+
3+
import java.io.IOException;
4+
import org.apache.lucene.store.ByteBuffersDataOutput;
5+
import org.apache.lucene.store.DataOutput;
6+
7+
/** A {@link FSTReader} which is backed by {@link ByteBuffersDataOutput} */
8+
final class ByteBuffersFSTReader extends DataOutput implements FSTReader {
9+
10+
private final ByteBuffersDataOutput dataOutput;
11+
12+
public ByteBuffersFSTReader(ByteBuffersDataOutput dataOutput) {
13+
this.dataOutput = dataOutput;
14+
}
15+
16+
@Override
17+
public void writeByte(byte b) {
18+
dataOutput.writeByte(b);
19+
}
20+
21+
@Override
22+
public void writeBytes(byte[] b, int offset, int length) {
23+
dataOutput.writeBytes(b, offset, length);
24+
}
25+
26+
@Override
27+
public long ramBytesUsed() {
28+
return dataOutput.ramBytesUsed();
29+
}
30+
31+
@Override
32+
public long size() {
33+
return dataOutput.size();
34+
}
35+
36+
@Override
37+
public FST.BytesReader getReverseBytesReader() {
38+
return new ReverseRandomAccessReader(dataOutput.toDataInput());
39+
}
40+
41+
@Override
42+
public void writeTo(DataOutput out) throws IOException {
43+
dataOutput.copyTo(out);
44+
}
45+
}

lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,13 @@
2121
import java.util.List;
2222
import org.apache.lucene.store.DataInput;
2323
import org.apache.lucene.store.DataOutput;
24+
import org.apache.lucene.util.Accountable;
2425
import org.apache.lucene.util.RamUsageEstimator;
2526

2627
// TODO: merge with PagedBytes, except PagedBytes doesn't
2728
// let you read while writing which FST needs
2829

29-
// TODO: Separate the scratch writer and reader functionality
30-
class BytesStore extends DataOutput implements FSTReader {
30+
class BytesStore extends DataOutput implements Accountable {
3131

3232
private static final long BASE_RAM_BYTES_USED =
3333
RamUsageEstimator.shallowSizeOfInstance(BytesStore.class)
@@ -333,11 +333,6 @@ public long getPosition() {
333333
return ((long) blocks.size() - 1) * blockSize + nextWrite;
334334
}
335335

336-
@Override
337-
public long size() {
338-
return getPosition();
339-
}
340-
341336
/** Similar to {@link #truncate(long)} with newLen=0 but keep the first block to reduce GC. */
342337
public void reset() {
343338
if (blocks.isEmpty()) {
@@ -371,7 +366,6 @@ public void truncate(long newLen) {
371366
}
372367

373368
/** Writes all of our bytes to the target {@link DataOutput}. */
374-
@Override
375369
public void writeTo(DataOutput out) throws IOException {
376370
for (byte[] block : blocks) {
377371
if (block == current) { // last block
@@ -443,7 +437,6 @@ public void setPosition(long pos) {
443437
};
444438
}
445439

446-
@Override
447440
public FST.BytesReader getReverseBytesReader() {
448441
if (blocks.size() == 1) {
449442
return new ReverseBytesReader(blocks.get(0));

lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
*/
1717
package org.apache.lucene.util.fst;
1818

19+
import static org.apache.lucene.store.ByteBuffersDataOutput.ALLOCATE_BB_ON_HEAP;
20+
import static org.apache.lucene.store.ByteBuffersDataOutput.NO_REUSE;
1921
import static org.apache.lucene.util.fst.FST.ARCS_FOR_BINARY_SEARCH;
2022
import static org.apache.lucene.util.fst.FST.ARCS_FOR_CONTINUOUS;
2123
import static org.apache.lucene.util.fst.FST.ARCS_FOR_DIRECT_ADDRESSING;
@@ -32,6 +34,7 @@
3234

3335
import java.io.IOException;
3436
import org.apache.lucene.store.ByteArrayDataOutput;
37+
import org.apache.lucene.store.ByteBuffersDataOutput;
3538
import org.apache.lucene.store.DataOutput;
3639
import org.apache.lucene.util.Accountable;
3740
import org.apache.lucene.util.ArrayUtil;
@@ -140,7 +143,8 @@ public FSTCompiler(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
140143
}
141144

142145
static DataOutput getLegacyDataOutput(int blockBits) {
143-
return new BytesStore(blockBits);
146+
return new ByteBuffersFSTReader(
147+
new ByteBuffersDataOutput(blockBits, blockBits, ALLOCATE_BB_ON_HEAP, NO_REUSE));
144148
}
145149

146150
private FSTCompiler(
@@ -539,9 +543,9 @@ long addNode(FSTCompiler.UnCompiledNode<T> nodeIn) throws IOException {
539543
}
540544
}
541545

542-
scratchBytes.reverse(0, scratchBytes.size() - 1);
546+
scratchBytes.reverse(0, scratchBytes.getPosition() - 1);
543547
scratchBytes.writeTo(dataOutput);
544-
numBytesWritten += scratchBytes.size();
548+
numBytesWritten += scratchBytes.getPosition();
545549

546550
nodeCount++;
547551
return numBytesWritten - 1;

lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ public long add(FSTCompiler.UnCompiledNode<T> nodeIn) throws IOException {
138138
// we use 0 as empty marker in hash table, so it better be impossible to get a frozen node
139139
// at 0:
140140
assert nodeAddress != FST.FINAL_END_NODE && nodeAddress != FST.NON_FINAL_END_NODE;
141-
byte[] buf = new byte[Math.toIntExact(fstCompiler.scratchBytes.size())];
141+
byte[] buf = new byte[Math.toIntExact(fstCompiler.scratchBytes.getPosition())];
142142
fstCompiler.scratchBytes.copyBytes(0, buf, 0, buf.length);
143143

144144
primaryTable.setNode(hashSlot, nodeAddress, buf);

lucene/core/src/test/org/apache/lucene/util/fst/TestFSTDirectAddressing.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ public void testDeDupTails() throws Exception {
6565
}
6666
entries.add(new BytesRef(b));
6767
}
68-
long size = buildFST(entries).ramBytesUsed();
68+
long size = buildFST(entries).numBytes();
6969
// Size is 1648 when we use only list-encoding. We were previously failing to ever de-dup
7070
// direct addressing, which led this case to blow up.
7171
// This test will fail if there is more than 1% size increase with direct addressing.

0 commit comments

Comments
 (0)