@@ -51,6 +51,8 @@ final class NodeHash<T> {
5151
5252 private final FSTCompiler <T > fstCompiler ;
5353 private final FST .Arc <T > scratchArc = new FST .Arc <>();
54+ // store the last fallback table node length in getFallback()
55+ private int lastFallbackNodeLength ;
5456
5557 /**
5658 * ramLimitMB is the max RAM we can use for recording suffixes. If we hit this limit, the least
@@ -73,24 +75,25 @@ public NodeHash(FSTCompiler<T> fstCompiler, double ramLimitMB) {
7375 this .fstCompiler = fstCompiler ;
7476 }
7577
76- private NodeAddressAndLength getFallback (FSTCompiler .UnCompiledNode <T > nodeIn , long hash )
77- throws IOException {
78+ private long getFallback (FSTCompiler .UnCompiledNode <T > nodeIn , long hash ) throws IOException {
7879 if (fallbackTable == null ) {
7980 // no fallback yet (primary table is not yet large enough to swap)
80- return null ;
81+ return 0 ;
8182 }
8283 long pos = hash & fallbackTable .mask ;
8384 int c = 0 ;
8485 while (true ) {
8586 long node = fallbackTable .get (pos );
8687 if (node == 0 ) {
8788 // not found
88- return null ;
89+ return 0 ;
8990 } else {
9091 int length = fallbackTable .getMatchedNodeLength (nodeIn , node , pos );
9192 if (length != -1 ) {
93+ // store the node length for further use
94+ this .lastFallbackNodeLength = length ;
9295 // frozen version of this node is already here
93- return new NodeAddressAndLength ( node , length ) ;
96+ return node ;
9497 }
9598 }
9699
@@ -99,16 +102,6 @@ private NodeAddressAndLength getFallback(FSTCompiler.UnCompiledNode<T> nodeIn, l
99102 }
100103 }
101104
102- static class NodeAddressAndLength {
103- private final long address ;
104- private final int length ;
105-
106- NodeAddressAndLength (long address , int length ) {
107- this .address = address ;
108- this .length = length ;
109- }
110- }
111-
112105 public long add (FSTCompiler .UnCompiledNode <T > nodeIn ) throws IOException {
113106
114107 long hash = hash (nodeIn );
@@ -121,12 +114,11 @@ public long add(FSTCompiler.UnCompiledNode<T> nodeIn) throws IOException {
121114 long node = primaryTable .get (pos );
122115 if (node == 0 ) {
123116 // node is not in primary table; is it in fallback table?
124- NodeAddressAndLength addressAndLength = getFallback (nodeIn , hash );
125- if (addressAndLength != null ) {
126- node = addressAndLength .address ;
117+ node = getFallback (nodeIn , hash );
118+ if (node != 0 ) {
127119 // it was already in fallback -- promote to primary
128120 // TODO: Copy directly between 2 ByteBlockPool to avoid double-copy
129- primaryTable .set (pos , node , fallbackTable .getBytes (pos , addressAndLength . length ));
121+ primaryTable .set (pos , node , fallbackTable .getBytes (pos , lastFallbackNodeLength ));
130122 } else {
131123 // not in fallback either -- freeze & add the incoming node
132124
@@ -151,7 +143,7 @@ public long add(FSTCompiler.UnCompiledNode<T> nodeIn) throws IOException {
151143 // how many bytes would be used if we had "perfect" hashing:
152144 // - x2 for fstNodeAddress for FST node address
153145 // - x2 for copiedNodeAddress for copied node address
154- // each account for approximate hash table overhead halfway between 33.3%
146+ // each account for approximate hash table overhead halfway between 33.3% and 66.6%
155147 // note that some of the copiedNodes are shared between fallback and primary tables so this
156148 // computation is pessimistic
157149 long ramBytesUsed =
@@ -266,8 +258,8 @@ public void set(long index, long pointer, byte[] bytes) {
266258 count ++;
267259 copiedNodes .append (new BytesRef (bytes ));
268260 copiedBytes += bytes .length ;
269- copiedNodeAddress . set (
270- index , copiedBytes - 1 ); // write the offset, which is the last offset of the node
261+ // write the offset, which is the last offset of the node
262+ copiedNodeAddress . set ( index , copiedBytes - 1 );
271263 }
272264
273265 private void rehash (long lastNodeAddress ) throws IOException {
@@ -334,8 +326,7 @@ private long hash(long node, long pos) throws IOException {
334326
335327 /**
336328 * Compares an unfrozen node (UnCompiledNode) with a frozen node at byte location address
337- * (long), returning the local copiedNodes start address if the two nodes are matched, or -1
338- * otherwise
329+ * (long), returning the node length if the two nodes are matched, or -1 otherwise
339330 */
340331 private int getMatchedNodeLength (FSTCompiler .UnCompiledNode <T > node , long address , long pos )
341332 throws IOException {
0 commit comments