@@ -131,7 +131,12 @@ public long add(FSTCompiler.UnCompiledNode<T> nodeIn) throws IOException {
131131 }
132132
133133 // how many bytes would be used if we had "perfect" hashing:
134- long ramBytesUsed = primaryTable .count * PackedInts .bitsRequired (node ) / 8 ;
134+ // x2 since we have two tables: entries for the node hash to node address and copiedOffsets
135+ // for node address to copiedNodes index
136+ // note that some of the copiedNodes are shared between fallback and primary tables so this
137+ // computation is pessimistic
138+ long ramBytesUsed =
139+ primaryTable .count * 2 * PackedInts .bitsRequired (node ) / 8 + primaryTable .copiedBytes ;
135140
136141 // NOTE: we could instead use the more precise RAM used, but this leads to unpredictable
137142 // quantized behavior due to 2X rehashing where for large ranges of the RAM limit, the
@@ -217,6 +222,7 @@ private long hash(long node) throws IOException {
217222
218223 /** Inner class because it needs access to hash function and FST bytes. */
219224 private class PagedGrowableHash {
225+ public long copiedBytes ;
220226 private PagedGrowableWriter entries ;
221227 // nocommit: use PagedGrowableWriter? there was some size overflow issue with
222228 // PagedGrowableWriter
@@ -260,6 +266,7 @@ public void set(long index, long pointer, byte[] bytes) {
260266 copiedNodes .add (bytes );
261267 copiedOffsets .put (pointer , copiedNodes .size () - 1 );
262268 count ++;
269+ copiedBytes += bytes .length ;
263270 }
264271
265272 private void rehash (long lastNodeAddress ) throws IOException {
0 commit comments