apache
diff --git a/‎help/tests.txt‎
Lines changed: 9 additions & 0 deletions b/‎help/tests.txt‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎lucene/CHANGES.txt‎
Lines changed: 7 additions & 0 deletions b/‎lucene/CHANGES.txt‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene40/blocktree/Lucene40BlockTreeTermsWriter.java‎
Lines changed: 1 addition & 3 deletions b/‎lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene40/blocktree/Lucene40BlockTreeTermsWriter.java‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java‎
Lines changed: 1 addition & 3 deletions b/‎lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java‎
Lines changed: 1 addition & 6 deletions b/‎lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java‎
Lines changed: 1 addition & 6 deletions
diff --git a/‎lucene/core/src/java/org/apache/lucene/util/fst/FST.java‎
Lines changed: 18 additions & 19 deletions b/‎lucene/core/src/java/org/apache/lucene/util/fst/FST.java‎
Lines changed: 18 additions & 19 deletions
@@ -133,6 +133,15 @@ specifying the project and test task or a fully qualified task path. Example:
 gradlew -p lucene/core test -Ptests.verbose=true --tests "TestDemo"
 
 
+Larger heap size
+--------------------------
+
+By default tests run with a 512 MB max heap.  But some tests (monster/nightly)
+need more heap.  Use "-Dtests.heapsize" for this:
+
+  gradlew -p lucene/core test --tests "Test2BFST" -Dtest.heapsize=32g
+
+
 Run GUI tests headlessly with Xvfb (Linux only)
 -----------------------------------------------
 
 
@@ -93,6 +93,13 @@ Improvements
 
 * GITHUB#11277, LUCENE-10241: Upgrade to OpenNLP to 1.9.4. (Jeff Zemerick)
 
+* GITHUB#12542: FSTCompiler can now approximately limit how much RAM it uses to share
+  suffixes during FST construction using the suffixRAMLimitMB method.  Larger values
+  result in a more minimal FST (more common suffixes are shard).  Pass
+  Double.POSITIVE_INFINITY to use as much RAM as is needed to create a purely
+  minimal FST.  Inspired by this Rust FST implemention:
+  https://blog.burntsushi.net/transducers (Mike McCandless)
+
 Optimizations
 ---------------------
 * GITHUB#12183: Make TermStates#build concurrent. (Shubham Chaudhary)
 
@@ -478,9 +478,7 @@ public void compileIndex(
 
       final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
       final FSTCompiler<BytesRef> fstCompiler =
-          new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs)
-              .shouldShareNonSingletonNodes(false)
-              .build();
+          new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs).build();
       // if (DEBUG) {
       //  System.out.println("  compile index for prefix=" + prefix);
       // }
 
@@ -395,9 +395,7 @@ public void compileIndex(
       }
 
       final FSTCompiler<Output> fstCompiler =
-          new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, FST_OUTPUTS)
-              .shouldShareNonSingletonNodes(false)
-              .build();
+          new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, FST_OUTPUTS).build();
       // if (DEBUG) {
       //  System.out.println("  compile index for prefix=" + prefix);
       // }
 
@@ -521,12 +521,7 @@ public void compileIndex(
 
       final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
       final FSTCompiler<BytesRef> fstCompiler =
-          new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs)
-              // Disable suffixes sharing for block tree index because suffixes are mostly dropped
-              // from the FST index and left in the term blocks.
-              .shouldShareSuffix(false)
-              .bytesPageBits(pageBits)
-              .build();
+          new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs).bytesPageBits(pageBits).build();
       // if (DEBUG) {
       //  System.out.println("  compile index for prefix=" + prefix);
       // }
 
@@ -83,13 +83,16 @@ public enum INPUT_TYPE {
 
   static final int BIT_ARC_HAS_FINAL_OUTPUT = 1 << 5;
 
-  /** Value of the arc flags to declare a node with fixed length arcs designed for binary search. */
+  /**
+   * Value of the arc flags to declare a node with fixed length (sparse) arcs designed for binary
+   * search.
+   */
   // We use this as a marker because this one flag is illegal by itself.
   public static final byte ARCS_FOR_BINARY_SEARCH = BIT_ARC_HAS_FINAL_OUTPUT;
 
   /**
-   * Value of the arc flags to declare a node with fixed length arcs and bit table designed for
-   * direct addressing.
+   * Value of the arc flags to declare a node with fixed length dense arcs and bit table designed
+   * for direct addressing.
    */
   static final byte ARCS_FOR_DIRECT_ADDRESSING = 1 << 6;
 
@@ -751,11 +754,9 @@ public Arc<T> readFirstTargetArc(Arc<T> follow, Arc<T> arc, BytesReader in) thro
   private void readFirstArcInfo(long nodeAddress, Arc<T> arc, final BytesReader in)
       throws IOException {
     in.setPosition(nodeAddress);
-    // System.out.println("   flags=" + arc.flags);
 
     byte flags = arc.nodeFlags = in.readByte();
     if (flags == ARCS_FOR_BINARY_SEARCH || flags == ARCS_FOR_DIRECT_ADDRESSING) {
-      // System.out.println("  fixed length arc");
       // Special arc which is actually a node header for fixed length arcs.
       arc.numArcs = in.readVInt();
       arc.bytesPerArc = in.readVInt();
@@ -766,8 +767,6 @@ private void readFirstArcInfo(long nodeAddress, Arc<T> arc, final BytesReader in
         arc.presenceIndex = -1;
       }
       arc.posArcsStart = in.getPosition();
-      // System.out.println("  bytesPer=" + arc.bytesPerArc + " numArcs=" + arc.numArcs + "
-      // arcsStart=" + pos);
     } else {
       arc.nextArc = nodeAddress;
       arc.bytesPerArc = 0;
@@ -830,27 +829,27 @@ int readNextArcLabel(Arc<T> arc, BytesReader in) throws IOException {
         }
       }
     } else {
-      if (arc.bytesPerArc() != 0) {
-        // System.out.println("    nextArc real array");
-        // Arcs have fixed length.
-        if (arc.nodeFlags() == ARCS_FOR_BINARY_SEARCH) {
+      switch (arc.nodeFlags()) {
+        case ARCS_FOR_BINARY_SEARCH:
           // Point to next arc, -1 to skip arc flags.
           in.setPosition(arc.posArcsStart() - (1 + arc.arcIdx()) * (long) arc.bytesPerArc() - 1);
-        } else {
-          assert arc.nodeFlags() == ARCS_FOR_DIRECT_ADDRESSING;
+          break;
+        case ARCS_FOR_DIRECT_ADDRESSING:
           // Direct addressing node. The label is not stored but rather inferred
           // based on first label and arc index in the range.
           assert BitTable.assertIsValid(arc, in);
           assert BitTable.isBitSet(arc.arcIdx(), arc, in);
           int nextIndex = BitTable.nextBitSet(arc.arcIdx(), arc, in);
           assert nextIndex != -1;
           return arc.firstLabel() + nextIndex;
-        }
-      } else {
-        // Arcs have variable length.
-        // System.out.println("    nextArc real list");
-        // Position to next arc, -1 to skip flags.
-        in.setPosition(arc.nextArc() - 1);
+        default:
+          // Variable length arcs - linear search.
+          assert arc.bytesPerArc() == 0;
+          // Arcs have variable length.
+          // System.out.println("    nextArc real list");
+          // Position to next arc, -1 to skip flags.
+          in.setPosition(arc.nextArc() - 1);
+          break;
       }
     }
     return readLabel(in);