Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ public class FSTCompiler<T> {
// it will throw exceptions if attempt to call getReverseBytesReader() or writeTo(DataOutput)
private static final FSTReader NULL_FST_READER = new NullFSTReader();

private final NodeHash<T> dedupHash;
// a temporary FST used during building for NodeHash cache
private final FSTSuffixNodeCache<T> suffixDedupCache;
// a temporary FST used during building for FSTSuffixNodeCache cache
final FST<T> fst;
private final T NO_OUTPUT;

Expand Down Expand Up @@ -178,9 +178,9 @@ private FSTCompiler(
if (suffixRAMLimitMB < 0) {
throw new IllegalArgumentException("ramLimitMB must be >= 0; got: " + suffixRAMLimitMB);
} else if (suffixRAMLimitMB > 0) {
dedupHash = new NodeHash<>(this, suffixRAMLimitMB);
suffixDedupCache = new FSTSuffixNodeCache<>(this, suffixRAMLimitMB);
} else {
dedupHash = null;
suffixDedupCache = null;
}
NO_OUTPUT = outputs.getNoOutput();

Expand Down Expand Up @@ -379,12 +379,12 @@ public long getArcCount() {
private CompiledNode compileNode(UnCompiledNode<T> nodeIn) throws IOException {
final long node;
long bytesPosStart = numBytesWritten;
if (dedupHash != null) {
if (suffixDedupCache != null) {
if (nodeIn.numArcs == 0) {
node = addNode(nodeIn);
lastFrozenNode = node;
} else {
node = dedupHash.add(nodeIn);
node = suffixDedupCache.add(nodeIn);
}
} else {
node = addNode(nodeIn);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,24 @@
// TODO: couldn't we prune naturally back until we see a transition with an output? it's highly
// unlikely (mostly impossible) such suffixes can be shared?

// Used to dedup states (lookup already-frozen states)
final class NodeHash<T> {
/**
* This is essentially a LRU cache to maintain and lookup node suffix. Un-compiled node can be added
* into the cache and if a similar node exists we will return its address in the FST. A node is
* defined as similar if it has the same label, arcs, outputs & other properties that identify a
* node.
*
* <p>The total size of the cache is controlled through the constructor parameter <code>ramLimitMB
* </code> Implementation-wise, we maintain two lookup tables, a primary table where node can be
* looked up from, and a fallback lookup table in case the lookup in the primary table fails. Nodes
* from the fallback table can also be promoted to the primary table when that happens. When the
* primary table is full, we swap it with the fallback table and clear out the primary table.
*
* <p>To lookup the node address, we build a special hash table which maps from the Node hash value
* to the Node address in the FST, called <code>PagedGrowableHash</code>. Internally it uses {@link
* PagedGrowableWriter} to store the mapping, which allows efficient packing the hash & address long
* values, and uses {@link ByteBlockPool} to store the actual node content (arcs & outputs).
*/
final class FSTSuffixNodeCache<T> {

// primary table -- we add nodes into this until it reaches the requested tableSizeLimit/2, then
// we move it to fallback
Expand Down Expand Up @@ -60,7 +76,7 @@ final class NodeHash<T> {
* recently used suffixes are discarded, and the FST is no longer minimalI. Still, larger
* ramLimitMB will make the FST smaller (closer to minimal).
*/
public NodeHash(FSTCompiler<T> fstCompiler, double ramLimitMB) {
public FSTSuffixNodeCache(FSTCompiler<T> fstCompiler, double ramLimitMB) {
if (ramLimitMB <= 0) {
throw new IllegalArgumentException("ramLimitMB must be > 0; got: " + ramLimitMB);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,16 @@
import com.carrotsearch.randomizedtesting.generators.RandomBytes;
import org.apache.lucene.tests.util.LuceneTestCase;

public class TestNodeHash extends LuceneTestCase {
public class TestFSTSuffixNodeCache extends LuceneTestCase {

public void testCopyFallbackNodeBytes() {
// we don't need the FSTCompiler in this test
NodeHash<Object> nodeHash = new NodeHash<>(null, 1);
FSTSuffixNodeCache<Object> suffixCache = new FSTSuffixNodeCache<>(null, 1);

NodeHash<Object>.PagedGrowableHash primaryHashTable = nodeHash.new PagedGrowableHash();
NodeHash<Object>.PagedGrowableHash fallbackHashTable = nodeHash.new PagedGrowableHash();
FSTSuffixNodeCache<Object>.PagedGrowableHash primaryHashTable =
suffixCache.new PagedGrowableHash();
FSTSuffixNodeCache<Object>.PagedGrowableHash fallbackHashTable =
suffixCache.new PagedGrowableHash();
int nodeLength = atLeast(500);
long fallbackHashSlot = 1;
byte[] fallbackBytes = RandomBytes.randomBytesOfLength(random(), nodeLength);
Expand Down