1717package org .apache .lucene .util .fst ;
1818
1919import java .io .IOException ;
20+ import java .util .ArrayList ;
2021import java .util .HashMap ;
22+ import java .util .List ;
2123import java .util .Map ;
22- import org .apache .lucene .util .ByteBlockPool ;
23- import org .apache .lucene .util .BytesRef ;
2424import org .apache .lucene .util .packed .PackedInts ;
2525import org .apache .lucene .util .packed .PagedGrowableWriter ;
2626
@@ -117,14 +117,14 @@ public long add(FSTCompiler.UnCompiledNode<T> nodeIn) throws IOException {
117117 // not in fallback either -- freeze & add the incoming node
118118
119119 // freeze & add
120- FSTCompiler .NodeAndBuffer nodeAndBuffer = fstCompiler .addNode (nodeIn );
121- node = nodeAndBuffer .nodeAddress () ;
120+ FSTCompiler .NodeAndBuffer nodeAndBuffer = fstCompiler .addNode (nodeIn , true );
121+ node = nodeAndBuffer .nodeAddress ;
122122
123123 // we use 0 as empty marker in hash table, so it better be impossible to get a frozen node
124124 // at 0:
125125 assert node != 0 ;
126126
127- primaryTable .set (pos , node , nodeAndBuffer .bytes () );
127+ primaryTable .set (pos , node , nodeAndBuffer .bytes );
128128
129129 // confirm frozen hash and unfrozen hash are the same
130130 assert hash (node ) == hash : "mismatch frozenHash=" + hash (node ) + " vs hash=" + hash ;
@@ -192,9 +192,7 @@ private long hash(FSTCompiler.UnCompiledNode<T> node) {
192192 // hash code for a frozen node. this must precisely match the hash computation of an unfrozen
193193 // node!
194194 private long hash (long node ) throws IOException {
195- long offset = primaryTable .copiedOffsets .get (node ).offset ;
196- FST .BytesReader in =
197- new ByteBlockPoolReverseBytesReader (primaryTable .copiedNodes , node - offset );
195+ FST .BytesReader in = getBytesReader (primaryTable , node );
198196
199197 final int PRIME = 31 ;
200198
@@ -224,8 +222,7 @@ private long hash(long node) throws IOException {
224222 private boolean nodesEqual (
225223 PagedGrowableHash table , FSTCompiler .UnCompiledNode <T > node , long address )
226224 throws IOException {
227- long offset = table .copiedOffsets .get (address ).offset ;
228- FST .BytesReader in = new ByteBlockPoolReverseBytesReader (table .copiedNodes , address - offset );
225+ FST .BytesReader in = getBytesReader (table , address );
229226 fstCompiler .fst .readFirstRealTargetArc (address , scratchArc , in );
230227
231228 // fail fast for a node with fixed length arcs
@@ -280,20 +277,22 @@ private boolean nodesEqual(
280277 return false ;
281278 }
282279
283- // nocommit: change this to just offset and somehow find the length
284- record OffsetAndLength (long offset , int length ) {}
280+ private static <T > FST .BytesReader getBytesReader (
281+ NodeHash <T >.PagedGrowableHash table , long address ) {
282+ byte [] bytes = table .getBytes (address );
283+ return new RelativeReverseBytesReader (bytes , address - bytes .length + 1 );
284+ }
285285
286286 /** Inner class because it needs access to hash function and FST bytes. */
287287 private class PagedGrowableHash {
288288 private PagedGrowableWriter entries ;
289289 // nocommit: use PagedGrowableWriter? there was some size overflow issue with
290290 // PagedGrowableWriter
291291 // mapping from FST real address to copiedNodes offsets & length
292- private Map <Long , OffsetAndLength > copiedOffsets ;
292+ private Map <Long , Integer > copiedOffsets ;
293293 long count ;
294- long currentOffsets = -1 ;
295294 private long mask ;
296- private final ByteBlockPool copiedNodes ;
295+ private final List < byte []> copiedNodes ;
297296
298297 // 256K blocks, but note that the final block is sized only as needed so it won't use the full
299298 // block size when just a few elements were written to it
@@ -303,7 +302,7 @@ public PagedGrowableHash() {
303302 entries = new PagedGrowableWriter (16 , BLOCK_SIZE_BYTES , 8 , PackedInts .COMPACT );
304303 copiedOffsets = new HashMap <>();
305304 mask = 15 ;
306- copiedNodes = new ByteBlockPool ( new ByteBlockPool . DirectAllocator () );
305+ copiedNodes = new ArrayList <>( );
307306 }
308307
309308 public PagedGrowableHash (long lastNodeAddress , long size ) {
@@ -313,19 +312,11 @@ public PagedGrowableHash(long lastNodeAddress, long size) {
313312 copiedOffsets = new HashMap <>();
314313 mask = size - 1 ;
315314 assert (mask & size ) == 0 : "size must be a power-of-2; got size=" + size + " mask=" + mask ;
316- copiedNodes = new ByteBlockPool ( new ByteBlockPool . DirectAllocator () );
315+ copiedNodes = new ArrayList <>( );
317316 }
318317
319- public byte [] getBytes (long index ) {
320- // nocommit: find a more efficient way to copy from fallback table to primary table
321- // here we need double copying
322- OffsetAndLength offsetAndLength = copiedOffsets .get (index );
323- byte [] bytes = new byte [offsetAndLength .length ];
324- // offset is the last offset of the node, hence subtract by (length - 1) to get the first
325- // offset
326- copiedNodes .readBytes (
327- offsetAndLength .offset - offsetAndLength .length + 1 , bytes , 0 , offsetAndLength .length );
328- return bytes ;
318+ public byte [] getBytes (long node ) {
319+ return copiedNodes .get (copiedOffsets .get (node ));
329320 }
330321
331322 public long get (long index ) {
@@ -334,11 +325,9 @@ public long get(long index) {
334325
335326 public void set (long index , long pointer , byte [] bytes ) {
336327 entries .set (index , pointer );
337- copiedNodes .append (new BytesRef (bytes ));
338- count += 3 ;
339- currentOffsets += bytes .length ;
340- // offsets is the last offset of the node, as we are reading in backward
341- copiedOffsets .put (pointer , new OffsetAndLength (currentOffsets , bytes .length ));
328+ copiedNodes .add (bytes );
329+ copiedOffsets .put (pointer , copiedNodes .size () - 1 );
330+ count += 2 ;
342331 }
343332
344333 private void rehash (long lastNodeAddress ) throws IOException {
0 commit comments