Skip to content

Commit 3180fe2

Browse files
committed
Fix the RAM usage measurement
1 parent dd7cf64 commit 3180fe2

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,12 @@ public long add(FSTCompiler.UnCompiledNode<T> nodeIn) throws IOException {
131131
}
132132

133133
// how many bytes would be used if we had "perfect" hashing:
134-
long ramBytesUsed = primaryTable.count * PackedInts.bitsRequired(node) / 8;
134+
// x2 since we have two tables: entries for the node hash to node address and copiedOffsets
135+
// for node address to copiedNodes index
136+
// note that some of the copiedNodes are shared between fallback and primary tables so this
137+
// computation is pessimistic
138+
long ramBytesUsed =
139+
primaryTable.count * 2 * PackedInts.bitsRequired(node) / 8 + primaryTable.copiedBytes;
135140

136141
// NOTE: we could instead use the more precise RAM used, but this leads to unpredictable
137142
// quantized behavior due to 2X rehashing where for large ranges of the RAM limit, the
@@ -217,6 +222,7 @@ private long hash(long node) throws IOException {
217222

218223
/** Inner class because it needs access to hash function and FST bytes. */
219224
private class PagedGrowableHash {
225+
public long copiedBytes;
220226
private PagedGrowableWriter entries;
221227
// nocommit: use PagedGrowableWriter? there was some size overflow issue with
222228
// PagedGrowableWriter
@@ -260,6 +266,7 @@ public void set(long index, long pointer, byte[] bytes) {
260266
copiedNodes.add(bytes);
261267
copiedOffsets.put(pointer, copiedNodes.size() - 1);
262268
count++;
269+
copiedBytes += bytes.length;
263270
}
264271

265272
private void rehash(long lastNodeAddress) throws IOException {

0 commit comments

Comments
 (0)