diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 16e289c71bff..a398f9d9a46c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -147,6 +147,8 @@ Optimizations * GITHUB#12552: Make FSTPostingsFormat load FSTs off-heap. (Tony X) +* GITHUB#12985: Make Lucene90BlockTreePostingsFormat to build FST off-heap. (Anh Dung Bui) + Bug Fixes --------------------- diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/Lucene90RWPostingsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/Lucene90RWPostingsFormat.java index 4360b90f2370..16016ae746f0 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/Lucene90RWPostingsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/Lucene90RWPostingsFormat.java @@ -48,19 +48,23 @@ public final class Lucene90RWPostingsFormat extends PostingsFormat { private final int minTermBlockSize; private final int maxTermBlockSize; + private long blockHeapSizeLimitBytes; /** Creates {@code Lucene90RWPostingsFormat} with default settings. */ public Lucene90RWPostingsFormat() { this( Lucene90BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, - Lucene90BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE); + Lucene90BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE, + Lucene90BlockTreeTermsWriter.DEFAULT_BLOCK_HEAP_LIMIT_BYTES); } - public Lucene90RWPostingsFormat(int minTermBlockSize, int maxTermBlockSize) { + public Lucene90RWPostingsFormat( + int minTermBlockSize, int maxTermBlockSize, long blockHeapSizeLimitBytes) { super("Lucene90"); Lucene90BlockTreeTermsWriter.validateSettings(minTermBlockSize, maxTermBlockSize); this.minTermBlockSize = minTermBlockSize; this.maxTermBlockSize = maxTermBlockSize; + this.blockHeapSizeLimitBytes = blockHeapSizeLimitBytes; } @Override @@ -79,7 +83,8 @@ public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException postingsWriter, minTermBlockSize, maxTermBlockSize, - Lucene90BlockTreeTermsReader.VERSION_START); + Lucene90BlockTreeTermsReader.VERSION_START, + blockHeapSizeLimitBytes); success = true; return ret; } finally { diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90PostingsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90PostingsFormat.java index 7965bc9c7780..00500c1b8770 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90PostingsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90PostingsFormat.java @@ -26,6 +26,7 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.CompetitiveImpactAccumulator; import org.apache.lucene.codecs.lucene90.blocktree.FieldReader; +import org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsWriter; import org.apache.lucene.codecs.lucene90.blocktree.Stats; import org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat; import org.apache.lucene.codecs.lucene99.Lucene99SkipWriter; @@ -45,7 +46,27 @@ import org.apache.lucene.tests.util.TestUtil; public class TestLucene90PostingsFormat extends BasePostingsFormatTestCase { - private final Codec codec = TestUtil.alwaysPostingsFormat(new Lucene90RWPostingsFormat()); + private final Codec codec = + TestUtil.alwaysPostingsFormat( + new Lucene90RWPostingsFormat( + Lucene90BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, + Lucene90BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE, + getBlockHeapSizeLimitBytes())); + + private static long getBlockHeapSizeLimitBytes() { + // randomize the block heap max size between 3 states: + // - 0, effectively disable on-heap FST and always use off-heap + // - DEFAULT_BLOCK_HEAP_LIMIT_BYTES + // - a random number between 0 and DEFAULT_BLOCK_HEAP_LIMIT_BYTES + int r = random().nextInt(2); + if (r == 0) { + return 0; + } + if (r == 1) { + return Lucene90BlockTreeTermsWriter.DEFAULT_BLOCK_HEAP_LIMIT_BYTES; + } + return random().nextLong(Lucene90BlockTreeTermsWriter.DEFAULT_BLOCK_HEAP_LIMIT_BYTES); + } @Override protected Codec getCodec() { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java index 90b34750463d..5fd694704db6 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java @@ -39,6 +39,7 @@ import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.store.ByteBuffersDataOutput; import org.apache.lucene.store.DataOutput; +import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; @@ -54,6 +55,7 @@ import org.apache.lucene.util.fst.BytesRefFSTEnum; import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FSTCompiler; +import org.apache.lucene.util.fst.OffHeapFSTStore; import org.apache.lucene.util.fst.Util; import org.apache.lucene.util.packed.PackedInts; @@ -228,6 +230,9 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer { */ public static final int DEFAULT_MAX_BLOCK_SIZE = 48; + /** Suggested default value for the {@code blockHeapSizeLimitBytes} parameter. */ + public static final long DEFAULT_BLOCK_HEAP_LIMIT_BYTES = 512 * 1024; // 512KB + // public static boolean DEBUG = false; // public static boolean DEBUG2 = false; @@ -246,6 +251,18 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer { private final List fields = new ArrayList<>(); + // keep track of the temp IndexInput to close them + private final List tempIndexInputs = new ArrayList<>(); + + // keep track of the temp IndexInput name to delete them + private final List tempInputNames = new ArrayList<>(); + + // if the {@link PendingBlock} size is more than this, we will use off-heap FST + // setting to 0 means we will always use off-heap FST + private final long blockHeapSizeLimitBytes; + + private final SegmentWriteState state; + /** * Create a new writer. The number of items (terms or sub-blocks) per block will aim to be between * minItemsPerBlock and maxItemsPerBlock, though in some cases the blocks may be smaller than the @@ -273,10 +290,32 @@ public Lucene90BlockTreeTermsWriter( int maxItemsInBlock, int version) throws IOException { + this( + state, + postingsWriter, + minItemsInBlock, + maxItemsInBlock, + version, + DEFAULT_BLOCK_HEAP_LIMIT_BYTES); + } + + /** + * Expert constructor that allows configuring the version, used for bw tests. It also allows + * configuring of block heap max size + */ + public Lucene90BlockTreeTermsWriter( + SegmentWriteState state, + PostingsWriterBase postingsWriter, + int minItemsInBlock, + int maxItemsInBlock, + int version, + long blockHeapSizeLimitBytes) + throws IOException { validateSettings(minItemsInBlock, maxItemsInBlock); this.minItemsInBlock = minItemsInBlock; this.maxItemsInBlock = maxItemsInBlock; + this.blockHeapSizeLimitBytes = blockHeapSizeLimitBytes; if (version < Lucene90BlockTreeTermsReader.VERSION_START || version > Lucene90BlockTreeTermsReader.VERSION_CURRENT) { throw new IllegalArgumentException( @@ -288,6 +327,7 @@ public Lucene90BlockTreeTermsWriter( + version); } this.version = version; + this.state = state; this.maxDoc = state.segmentInfo.maxDoc(); this.fieldInfos = state.fieldInfos; @@ -438,6 +478,26 @@ public String toString() { } } + /** + * Create temp IndexOutput to write FST off-heap. The file name will be tracked to clean up later. + */ + private IndexOutput createTempOutput(String prefix) throws IOException { + IndexOutput tempOut = state.directory.createTempOutput(prefix, "temp", state.context); + tempInputNames.add(tempOut.getName()); + return tempOut; + } + + /** + * Close the temp IndexOutput and open IndexInput to read from. The IndexInput will be tracked to + * clean up later. + */ + private IndexInput openTempInput(IndexOutput fstDataOutput) throws IOException { + IOUtils.close(fstDataOutput); // we need to close the DataOutput before reading from DataInput + IndexInput tempIn = state.directory.openInput(fstDataOutput.getName(), state.context); + tempIndexInputs.add(tempIn); + return tempIn; + } + /** * Encodes long value to variable length byte[], in MSB order. Use {@link * FieldReader#readMSBVLong} to decode. @@ -460,7 +520,10 @@ static void writeMSBVLong(long l, DataOutput scratchBytes) throws IOException { private final class PendingBlock extends PendingEntry { public final BytesRef prefix; public final long fp; - public FST index; + // the index's FST + public FST indexFST; + // the index's FST metadata + public FST.FSTMetadata indexMetadata; public List> subIndices; public final boolean hasTerms; public final boolean isFloor; @@ -490,7 +553,8 @@ public String toString() { public void compileIndex( List blocks, ByteBuffersDataOutput scratchBytes, - IntsRefBuilder scratchIntsRef) + IntsRefBuilder scratchIntsRef, + DataOutput fstDataOutput) throws IOException { assert (isFloor && blocks.size() > 1) || (isFloor == false && blocks.size() == 1) @@ -520,17 +584,6 @@ public void compileIndex( } } - long estimateSize = prefix.length; - for (PendingBlock block : blocks) { - if (block.subIndices != null) { - for (FST subIndex : block.subIndices) { - estimateSize += subIndex.numBytes(); - } - } - } - int estimateBitsRequired = PackedInts.bitsRequired(estimateSize); - int pageBits = Math.min(15, Math.max(6, estimateBitsRequired)); - final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); final int fstVersion; if (version >= Lucene90BlockTreeTermsReader.VERSION_CURRENT) { @@ -543,7 +596,7 @@ public void compileIndex( // Disable suffixes sharing for block tree index because suffixes are mostly dropped // from the FST index and left in the term blocks. .suffixRAMLimitMB(0d) - .dataOutput(getOnHeapReaderWriter(pageBits)) + .dataOutput(fstDataOutput) .setVersion(fstVersion) .build(); // if (DEBUG) { @@ -565,7 +618,19 @@ public void compileIndex( } } - index = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); + indexMetadata = fstCompiler.compile(); + + if (fstDataOutput == indexOut) { + // this is the root block, we don't need to read from it and Lucene doesn't allow to read + // from still-writing DataOutput either, hence we only store the FST metadata to save it + // later + } else if (fstDataOutput instanceof IndexOutput) { + // if we write to IndexOutput then we should open and read from IndexInput + IndexInput indexInput = openTempInput((IndexOutput) fstDataOutput); + indexFST = new FST<>(indexMetadata, indexInput, new OffHeapFSTStore()); + } else { + indexFST = FST.fromFSTReader(indexMetadata, fstCompiler.getFSTReader()); + } assert subIndices == null; @@ -598,8 +663,6 @@ private void append( private final ByteBuffersDataOutput scratchBytes = ByteBuffersDataOutput.newResettableInstance(); private final IntsRefBuilder scratchIntsRef = new IntsRefBuilder(); - static final BytesRef EMPTY_BYTES_REF = new BytesRef(); - private static class StatsWriter { private final DataOutput out; @@ -659,8 +722,13 @@ class TermsWriter { private PendingTerm firstPendingTerm; private PendingTerm lastPendingTerm; - /** Writes the top count entries in pending, using prevTerm to compute the prefix. */ - void writeBlocks(int prefixLength, int count) throws IOException { + /** + * Writes the top count entries in pending, using prevTerm to compute the prefix. + * + *

For root block, we will write the FST directly to the IndexOutput, for others they will + * use on-heap FST + */ + void writeBlocks(int prefixLength, int count, boolean isRootBlock) throws IOException { assert count > 0; @@ -773,7 +841,12 @@ void writeBlocks(int prefixLength, int count) throws IOException { assert firstBlock.isFloor || newBlocks.size() == 1; - firstBlock.compileIndex(newBlocks, scratchBytes, scratchIntsRef); + // Create a proper DataOutput for the FST. For root block, we will write to the IndexOut + // directly. For sub blocks, if the size is smaller than blockHeapSizeLimitBytes then we + // will use the on-heap ReadWriteDataOutput, otherwise create a temp output + DataOutput fstDataOutput = getFSTDataOutput(newBlocks, firstBlock.prefix.length, isRootBlock); + + firstBlock.compileIndex(newBlocks, scratchBytes, scratchIntsRef, fstDataOutput); // Remove slice from the top of the pending stack, that we just wrote: pending.subList(pending.size() - count, pending.size()).clear(); @@ -784,6 +857,31 @@ void writeBlocks(int prefixLength, int count) throws IOException { newBlocks.clear(); } + private DataOutput getFSTDataOutput( + List blocks, int prefixLength, boolean isRootBlock) throws IOException { + if (isRootBlock) { + return indexOut; + } + long estimateSize = prefixLength; + for (PendingBlock block : blocks) { + if (block.subIndices != null) { + for (FST subIndex : block.subIndices) { + estimateSize += subIndex.numBytes(); + } + } + } + + // the size is larger than heap size limit, use off-heap writing instead + if (estimateSize > blockHeapSizeLimitBytes) { + return createTempOutput(fieldInfo.getName()); + } + + int estimateBitsRequired = PackedInts.bitsRequired(estimateSize); + int pageBits = Math.min(15, Math.max(6, estimateBitsRequired)); + + return getOnHeapReaderWriter(pageBits); + } + private boolean allEqual(byte[] b, int startOffset, int endOffset, byte value) { Objects.checkFromToIndex(startOffset, endOffset, b.length); for (int i = startOffset; i < endOffset; ++i) { @@ -965,7 +1063,7 @@ private PendingBlock writeBlock( assert block.fp < startFP; suffixLengthsWriter.writeVLong(startFP - block.fp); - subIndices.add(block.index); + subIndices.add(block.indexFST); } } statsWriter.finish(); @@ -1128,9 +1226,9 @@ private void pushTerm(BytesRef text) throws IOException { // we are closing: int prefixTopSize = pending.size() - prefixStarts[i]; if (prefixTopSize >= minItemsInBlock) { - // if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + - // " minItemsInBlock=" + minItemsInBlock); - writeBlocks(i + 1, prefixTopSize); + // if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + " + // minItemsInBlock=" + minItemsInBlock); + writeBlocks(i + 1, prefixTopSize, false); prefixStarts[i] -= prefixTopSize - 1; } } @@ -1160,14 +1258,14 @@ public void finish() throws IOException { // we can save writing a "degenerate" root block, but we have to // fix all the places that assume the root block's prefix is the empty string: pushTerm(new BytesRef()); - writeBlocks(0, pending.size()); + writeBlocks(0, pending.size(), true); // We better have one final "root" block: assert pending.size() == 1 && !pending.get(0).isTerm : "pending.size()=" + pending.size() + " pending=" + pending; final PendingBlock root = (PendingBlock) pending.get(0); assert root.prefix.length == 0; - final BytesRef rootCode = root.index.getEmptyOutput(); + final BytesRef rootCode = root.indexMetadata.getEmptyOutput(); assert rootCode != null; ByteBuffersDataOutput metaOut = new ByteBuffersDataOutput(); @@ -1185,9 +1283,11 @@ public void finish() throws IOException { metaOut.writeVInt(docsSeen.cardinality()); writeBytesRef(metaOut, new BytesRef(firstPendingTerm.termBytes)); writeBytesRef(metaOut, new BytesRef(lastPendingTerm.termBytes)); - metaOut.writeVLong(indexOut.getFilePointer()); + // Write the address to the beginning of the FST. Note that the FST is already written to + // indexOut by this point + metaOut.writeVLong(indexOut.getFilePointer() - root.indexMetadata.getNumBytes()); // Write FST to index - root.index.save(metaOut, indexOut); + root.indexMetadata.save(metaOut); // System.out.println(" write FST " + indexStartFP + " field=" + fieldInfo.name); /* @@ -1242,9 +1342,12 @@ public void close() throws IOException { } finally { if (success) { IOUtils.close(metaOut, termsOut, indexOut, postingsWriter); + IOUtils.close(tempIndexInputs); } else { IOUtils.closeWhileHandlingException(metaOut, termsOut, indexOut, postingsWriter); + IOUtils.closeWhileHandlingException(tempIndexInputs); } + IOUtils.deleteFilesIgnoringExceptions(state.directory, tempInputNames); } } diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java index 6bb5718d5c75..f2b54e5d7aaa 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java @@ -537,56 +537,10 @@ public FSTMetadata getMetadata() { * @param out the DataOutput to write the FST bytes to */ public void save(DataOutput metaOut, DataOutput out) throws IOException { - saveMetadata(metaOut); + metadata.save(metaOut); fstReader.writeTo(out); } - /** - * Save the metadata to a DataOutput - * - * @param metaOut the DataOutput to write the metadata to - */ - public void saveMetadata(DataOutput metaOut) throws IOException { - CodecUtil.writeHeader(metaOut, FILE_FORMAT_NAME, VERSION_CURRENT); - // TODO: really we should encode this as an arc, arriving - // to the root node, instead of special casing here: - if (metadata.emptyOutput != null) { - // Accepts empty string - metaOut.writeByte((byte) 1); - - // Serialize empty-string output: - ByteBuffersDataOutput ros = new ByteBuffersDataOutput(); - outputs.writeFinalOutput(metadata.emptyOutput, ros); - byte[] emptyOutputBytes = ros.toArrayCopy(); - int emptyLen = emptyOutputBytes.length; - - // reverse - final int stopAt = emptyLen / 2; - int upto = 0; - while (upto < stopAt) { - final byte b = emptyOutputBytes[upto]; - emptyOutputBytes[upto] = emptyOutputBytes[emptyLen - upto - 1]; - emptyOutputBytes[emptyLen - upto - 1] = b; - upto++; - } - metaOut.writeVInt(emptyLen); - metaOut.writeBytes(emptyOutputBytes, 0, emptyLen); - } else { - metaOut.writeByte((byte) 0); - } - final byte t; - if (metadata.inputType == INPUT_TYPE.BYTE1) { - t = 0; - } else if (metadata.inputType == INPUT_TYPE.BYTE2) { - t = 1; - } else { - t = 2; - } - metaOut.writeByte(t); - metaOut.writeVLong(metadata.startNode); - metaOut.writeVLong(numBytes()); - } - /** Writes an automaton to a file. */ public void save(final Path path) throws IOException { try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))) { @@ -1258,5 +1212,59 @@ public FSTMetadata( public int getVersion() { return version; } + + public T getEmptyOutput() { + return emptyOutput; + } + + public long getNumBytes() { + return numBytes; + } + + /** + * Save the metadata to a DataOutput + * + * @param metaOut the DataOutput to write the metadata to + */ + public void save(DataOutput metaOut) throws IOException { + CodecUtil.writeHeader(metaOut, FILE_FORMAT_NAME, VERSION_CURRENT); + // TODO: really we should encode this as an arc, arriving + // to the root node, instead of special casing here: + if (emptyOutput != null) { + // Accepts empty string + metaOut.writeByte((byte) 1); + + // Serialize empty-string output: + ByteBuffersDataOutput ros = new ByteBuffersDataOutput(); + outputs.writeFinalOutput(emptyOutput, ros); + byte[] emptyOutputBytes = ros.toArrayCopy(); + int emptyLen = emptyOutputBytes.length; + + // reverse + final int stopAt = emptyLen / 2; + int upto = 0; + while (upto < stopAt) { + final byte b = emptyOutputBytes[upto]; + emptyOutputBytes[upto] = emptyOutputBytes[emptyLen - upto - 1]; + emptyOutputBytes[emptyLen - upto - 1] = b; + upto++; + } + metaOut.writeVInt(emptyLen); + metaOut.writeBytes(emptyOutputBytes, 0, emptyLen); + } else { + metaOut.writeByte((byte) 0); + } + final byte t; + if (inputType == INPUT_TYPE.BYTE1) { + t = 0; + } else if (inputType == INPUT_TYPE.BYTE2) { + t = 1; + } else { + t = 2; + } + metaOut.writeByte(t); + metaOut.writeVLong(startNode); + metaOut.writeVLong(numBytes); + } } }