Skip to content

Commit b5a1be9

Browse files
committed
Use List<byte[]> instead of ByteBlockPool
1 parent 11ab1fa commit b5a1be9

File tree

5 files changed

+54
-70
lines changed

5 files changed

+54
-70
lines changed

lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -256,18 +256,6 @@ public void readBytes(final long offset, final byte[] bytes, int bytesOffset, in
256256
}
257257
}
258258

259-
/**
260-
* Read a single byte at the given offset
261-
*
262-
* @param offset the offset to read
263-
* @return the byte
264-
*/
265-
public byte readByte(final long offset) {
266-
int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
267-
int pos = (int) (offset & BYTE_BLOCK_MASK);
268-
return buffers[bufferIndex][pos];
269-
}
270-
271259
@Override
272260
public long ramBytesUsed() {
273261
long size = BASE_RAM_BYTES;

lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -292,13 +292,13 @@ private CompiledNode compileNode(UnCompiledNode<T> nodeIn, int tailLength) throw
292292
long bytesPosStart = bytes.getPosition();
293293
if (dedupHash != null) {
294294
if (nodeIn.numArcs == 0) {
295-
node = addNode(nodeIn).nodeAddress;
295+
node = addNode(nodeIn, false).nodeAddress;
296296
lastFrozenNode = node;
297297
} else {
298298
node = dedupHash.add(nodeIn);
299299
}
300300
} else {
301-
node = addNode(nodeIn).nodeAddress;
301+
node = addNode(nodeIn, false).nodeAddress;
302302
}
303303
assert node != -2;
304304

@@ -318,7 +318,7 @@ private CompiledNode compileNode(UnCompiledNode<T> nodeIn, int tailLength) throw
318318

319319
// serializes new node by appending its bytes to the end
320320
// of the current byte[]
321-
NodeAndBuffer addNode(FSTCompiler.UnCompiledNode<T> nodeIn) throws IOException {
321+
NodeAndBuffer addNode(FSTCompiler.UnCompiledNode<T> nodeIn, boolean needCopy) throws IOException {
322322
// System.out.println("FST.addNode pos=" + bytes.getPosition() + " numArcs=" + nodeIn.numArcs);
323323
if (nodeIn.numArcs == 0) {
324324
if (nodeIn.isFinal) {
@@ -461,13 +461,24 @@ NodeAndBuffer addNode(FSTCompiler.UnCompiledNode<T> nodeIn) throws IOException {
461461
final long thisNodeAddress = bytes.getPosition() - 1;
462462
bytes.reverse(startAddress, thisNodeAddress);
463463
nodeCount++;
464-
// nocommit: this is non-optimal, we should write the BytesStore to the ByteBlockPool directly
465-
byte[] buf = new byte[Math.toIntExact(thisNodeAddress - startAddress + 1)];
466-
bytes.copyBytes(startAddress, buf, 0, buf.length);
467-
return new NodeAndBuffer(thisNodeAddress, buf);
464+
if (needCopy) {
465+
byte[] buf = new byte[Math.toIntExact(thisNodeAddress - startAddress + 1)];
466+
bytes.copyBytes(startAddress, buf, 0, buf.length);
467+
return new NodeAndBuffer(thisNodeAddress, buf);
468+
}
469+
return new NodeAndBuffer(thisNodeAddress, null);
468470
}
469471

470-
record NodeAndBuffer(long nodeAddress, byte[] bytes) {}
472+
class NodeAndBuffer {
473+
474+
final long nodeAddress;
475+
final byte[] bytes;
476+
477+
NodeAndBuffer(long nodeAddress, byte[] bytes) {
478+
this.nodeAddress = nodeAddress;
479+
this.bytes = bytes;
480+
}
481+
}
471482

472483
private void writeLabel(DataOutput out, int v) throws IOException {
473484
assert v >= 0 : "v=" + v;

lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717
package org.apache.lucene.util.fst;
1818

1919
import java.io.IOException;
20+
import java.util.ArrayList;
2021
import java.util.HashMap;
22+
import java.util.List;
2123
import java.util.Map;
22-
import org.apache.lucene.util.ByteBlockPool;
23-
import org.apache.lucene.util.BytesRef;
2424
import org.apache.lucene.util.packed.PackedInts;
2525
import org.apache.lucene.util.packed.PagedGrowableWriter;
2626

@@ -117,14 +117,14 @@ public long add(FSTCompiler.UnCompiledNode<T> nodeIn) throws IOException {
117117
// not in fallback either -- freeze & add the incoming node
118118

119119
// freeze & add
120-
FSTCompiler.NodeAndBuffer nodeAndBuffer = fstCompiler.addNode(nodeIn);
121-
node = nodeAndBuffer.nodeAddress();
120+
FSTCompiler.NodeAndBuffer nodeAndBuffer = fstCompiler.addNode(nodeIn, true);
121+
node = nodeAndBuffer.nodeAddress;
122122

123123
// we use 0 as empty marker in hash table, so it better be impossible to get a frozen node
124124
// at 0:
125125
assert node != 0;
126126

127-
primaryTable.set(pos, node, nodeAndBuffer.bytes());
127+
primaryTable.set(pos, node, nodeAndBuffer.bytes);
128128

129129
// confirm frozen hash and unfrozen hash are the same
130130
assert hash(node) == hash : "mismatch frozenHash=" + hash(node) + " vs hash=" + hash;
@@ -192,9 +192,7 @@ private long hash(FSTCompiler.UnCompiledNode<T> node) {
192192
// hash code for a frozen node. this must precisely match the hash computation of an unfrozen
193193
// node!
194194
private long hash(long node) throws IOException {
195-
long offset = primaryTable.copiedOffsets.get(node).offset;
196-
FST.BytesReader in =
197-
new ByteBlockPoolReverseBytesReader(primaryTable.copiedNodes, node - offset);
195+
FST.BytesReader in = getBytesReader(primaryTable, node);
198196

199197
final int PRIME = 31;
200198

@@ -224,8 +222,7 @@ private long hash(long node) throws IOException {
224222
private boolean nodesEqual(
225223
PagedGrowableHash table, FSTCompiler.UnCompiledNode<T> node, long address)
226224
throws IOException {
227-
long offset = table.copiedOffsets.get(address).offset;
228-
FST.BytesReader in = new ByteBlockPoolReverseBytesReader(table.copiedNodes, address - offset);
225+
FST.BytesReader in = getBytesReader(table, address);
229226
fstCompiler.fst.readFirstRealTargetArc(address, scratchArc, in);
230227

231228
// fail fast for a node with fixed length arcs
@@ -280,20 +277,22 @@ private boolean nodesEqual(
280277
return false;
281278
}
282279

283-
// nocommit: change this to just offset and somehow find the length
284-
record OffsetAndLength(long offset, int length) {}
280+
private static <T> FST.BytesReader getBytesReader(
281+
NodeHash<T>.PagedGrowableHash table, long address) {
282+
byte[] bytes = table.getBytes(address);
283+
return new RelativeReverseBytesReader(bytes, address - bytes.length + 1);
284+
}
285285

286286
/** Inner class because it needs access to hash function and FST bytes. */
287287
private class PagedGrowableHash {
288288
private PagedGrowableWriter entries;
289289
// nocommit: use PagedGrowableWriter? there was some size overflow issue with
290290
// PagedGrowableWriter
291291
// mapping from FST real address to copiedNodes offsets & length
292-
private Map<Long, OffsetAndLength> copiedOffsets;
292+
private Map<Long, Integer> copiedOffsets;
293293
long count;
294-
long currentOffsets = -1;
295294
private long mask;
296-
private final ByteBlockPool copiedNodes;
295+
private final List<byte[]> copiedNodes;
297296

298297
// 256K blocks, but note that the final block is sized only as needed so it won't use the full
299298
// block size when just a few elements were written to it
@@ -303,7 +302,7 @@ public PagedGrowableHash() {
303302
entries = new PagedGrowableWriter(16, BLOCK_SIZE_BYTES, 8, PackedInts.COMPACT);
304303
copiedOffsets = new HashMap<>();
305304
mask = 15;
306-
copiedNodes = new ByteBlockPool(new ByteBlockPool.DirectAllocator());
305+
copiedNodes = new ArrayList<>();
307306
}
308307

309308
public PagedGrowableHash(long lastNodeAddress, long size) {
@@ -313,19 +312,11 @@ public PagedGrowableHash(long lastNodeAddress, long size) {
313312
copiedOffsets = new HashMap<>();
314313
mask = size - 1;
315314
assert (mask & size) == 0 : "size must be a power-of-2; got size=" + size + " mask=" + mask;
316-
copiedNodes = new ByteBlockPool(new ByteBlockPool.DirectAllocator());
315+
copiedNodes = new ArrayList<>();
317316
}
318317

319-
public byte[] getBytes(long index) {
320-
// nocommit: find a more efficient way to copy from fallback table to primary table
321-
// here we need double copying
322-
OffsetAndLength offsetAndLength = copiedOffsets.get(index);
323-
byte[] bytes = new byte[offsetAndLength.length];
324-
// offset is the last offset of the node, hence subtract by (length - 1) to get the first
325-
// offset
326-
copiedNodes.readBytes(
327-
offsetAndLength.offset - offsetAndLength.length + 1, bytes, 0, offsetAndLength.length);
328-
return bytes;
318+
public byte[] getBytes(long node) {
319+
return copiedNodes.get(copiedOffsets.get(node));
329320
}
330321

331322
public long get(long index) {
@@ -334,11 +325,9 @@ public long get(long index) {
334325

335326
public void set(long index, long pointer, byte[] bytes) {
336327
entries.set(index, pointer);
337-
copiedNodes.append(new BytesRef(bytes));
338-
count += 3;
339-
currentOffsets += bytes.length;
340-
// offsets is the last offset of the node, as we are reading in backward
341-
copiedOffsets.put(pointer, new OffsetAndLength(currentOffsets, bytes.length));
328+
copiedNodes.add(bytes);
329+
copiedOffsets.put(pointer, copiedNodes.size() - 1);
330+
count += 2;
342331
}
343332

344333
private void rehash(long lastNodeAddress) throws IOException {
Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,45 +17,41 @@
1717
package org.apache.lucene.util.fst;
1818

1919
import java.io.IOException;
20-
import org.apache.lucene.util.ByteBlockPool;
2120

22-
/** Reads in reverse from a ByteBlockPool. */
23-
final class ByteBlockPoolReverseBytesReader extends FST.BytesReader {
21+
/** Same as ReverseBytesReader but acts upon a relative position */
22+
final class RelativeReverseBytesReader extends FST.BytesReader {
2423

25-
private final ByteBlockPool buf;
24+
private final FST.BytesReader reader;
2625
private final long relativePos;
27-
private long pos;
2826

29-
public ByteBlockPoolReverseBytesReader(ByteBlockPool buf, long relativePos) {
30-
this.buf = buf;
27+
public RelativeReverseBytesReader(byte[] bytes, long relativePos) {
28+
this.reader = new ReverseBytesReader(bytes);
3129
this.relativePos = relativePos;
3230
}
3331

3432
@Override
3533
public byte readByte() throws IOException {
36-
return buf.readByte(pos--);
34+
return reader.readByte();
3735
}
3836

3937
@Override
4038
public void readBytes(byte[] b, int offset, int len) throws IOException {
41-
for (int i = 0; i < len; i++) {
42-
b[offset + i] = readByte();
43-
}
39+
reader.readBytes(b, offset, len);
4440
}
4541

4642
@Override
47-
public void skipBytes(long numBytes) throws IOException {
48-
pos -= numBytes;
43+
public void skipBytes(long count) throws IOException {
44+
reader.skipBytes(count);
4945
}
5046

5147
@Override
5248
public long getPosition() {
53-
return pos + relativePos;
49+
return reader.getPosition() + relativePos;
5450
}
5551

5652
@Override
5753
public void setPosition(long pos) {
58-
this.pos = pos - relativePos;
54+
reader.setPosition(pos - relativePos);
5955
}
6056

6157
@Override

lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1257,7 +1257,7 @@ public void testNonFinalStopNode() throws Exception {
12571257
node.isFinal = true;
12581258
rootNode.addArc('a', node);
12591259
final FSTCompiler.CompiledNode frozen = new FSTCompiler.CompiledNode();
1260-
frozen.node = fstCompiler.addNode(node).nodeAddress();
1260+
frozen.node = fstCompiler.addNode(node, false).nodeAddress;
12611261
rootNode.arcs[0].nextFinalOutput = 17L;
12621262
rootNode.arcs[0].isFinal = true;
12631263
rootNode.arcs[0].output = nothing;
@@ -1270,13 +1270,13 @@ public void testNonFinalStopNode() throws Exception {
12701270
new FSTCompiler.UnCompiledNode<>(fstCompiler, 0);
12711271
rootNode.addArc('b', node);
12721272
final FSTCompiler.CompiledNode frozen = new FSTCompiler.CompiledNode();
1273-
frozen.node = fstCompiler.addNode(node).nodeAddress();
1273+
frozen.node = fstCompiler.addNode(node, false).nodeAddress;
12741274
rootNode.arcs[1].nextFinalOutput = nothing;
12751275
rootNode.arcs[1].output = 42L;
12761276
rootNode.arcs[1].target = frozen;
12771277
}
12781278

1279-
fst.finish(fstCompiler.addNode(rootNode).nodeAddress());
1279+
fst.finish(fstCompiler.addNode(rootNode, false).nodeAddress);
12801280

12811281
StringWriter w = new StringWriter();
12821282
// Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp3/out.dot"));

0 commit comments

Comments
 (0)