diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index a3e7cb8ed277..6a2ab1a14bb3 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -227,6 +227,9 @@ Optimizations * GITHUB#12712: Speed up sorting postings file with an offline radix sorter in BPIndexReader. (Guo Feng) +* GITHUB#12702: Disable suffix sharing for block tree index, making writing the terms dictionary index faster + and less RAM hungry, while making the index a bit (~1.X% for the terms index file on wikipedia). (Guo Feng, Mike McCandless) + Changes in runtime behavior --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java index 0dded44d6c46..acd75092022f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java @@ -521,7 +521,12 @@ public void compileIndex( final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); final FSTCompiler fstCompiler = - new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs).bytesPageBits(pageBits).build(); + new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs) + // Disable suffixes sharing for block tree index because suffixes are mostly dropped + // from the FST index and left in the term blocks. + .suffixRAMLimitMB(0d) + .bytesPageBits(pageBits) + .build(); // if (DEBUG) { // System.out.println(" compile index for prefix=" + prefix); // }