@@ -130,13 +130,14 @@ public long add(FSTCompiler.UnCompiledNode<T> nodeIn) throws IOException {
130130 }
131131
132132 // how many bytes would be used if we had "perfect" hashing:
133- // x3 since we have two tables:
134- // - x1 for entries for the node hash to node address
135- // - x2 for copiedOffsets for node address to copiedNodes index
133+ // x5 since we have two tables:
134+ // - x2 for entries for the node hash to node address
135+ // - x3 for copiedOffsets for node address to copiedNodes index
136+ // each account for approximate hash table overhead halfway between 33.3%
136137 // note that some of the copiedNodes are shared between fallback and primary tables so this
137138 // computation is pessimistic
138139 long ramBytesUsed =
139- primaryTable .count * 3 * PackedInts .bitsRequired (node ) / 8 + primaryTable .copiedBytes ;
140+ primaryTable .count * 5 * PackedInts .bitsRequired (node ) / 8 + primaryTable .copiedBytes ;
140141
141142 // NOTE: we could instead use the more precise RAM used, but this leads to unpredictable
142143 // quantized behavior due to 2X rehashing where for large ranges of the RAM limit, the
@@ -146,9 +147,7 @@ public long add(FSTCompiler.UnCompiledNode<T> nodeIn) throws IOException {
146147 // in smaller FSTs, even if the precise RAM used is not always under the limit.
147148
148149 // divide limit by 2 because fallback gets half the RAM and primary gets the other half
149- // divide by 2 again to account for approximate hash table overhead halfway between 33.3%
150- // and 66.7% occupancy = 50%
151- if (ramBytesUsed >= ramLimitBytes / (2 * 2 )) {
150+ if (ramBytesUsed >= ramLimitBytes / 2 ) {
152151 // time to fallback -- fallback is now used read-only to promote a node (suffix) to
153152 // primary if we encounter it again
154153 fallbackTable = primaryTable ;
0 commit comments