diff --git a/server/src/main/java/org/elasticsearch/index/codec/postings/ES812PostingsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/postings/ES812PostingsFormat.java index 6ccfaba7853f2..1d7f01be1cfc6 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/postings/ES812PostingsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/postings/ES812PostingsFormat.java @@ -27,7 +27,6 @@ import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.codecs.PostingsWriterBase; -import org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsReader; import org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsWriter; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.SegmentReadState; @@ -37,6 +36,8 @@ import org.apache.lucene.util.packed.PackedInts; import org.elasticsearch.core.IOUtils; import org.elasticsearch.index.codec.ForUtil; +import org.elasticsearch.index.codec.postings.terms.Lucene90BlockTreeTermsReader; +import org.elasticsearch.index.codec.postings.terms.NonForkedHelper; import java.io.IOException; @@ -414,7 +415,12 @@ public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException PostingsReaderBase postingsReader = new ES812PostingsReader(state); boolean success = false; try { - FieldsProducer ret = new Lucene90BlockTreeTermsReader(postingsReader, state); + FieldsProducer ret; + if (NonForkedHelper.USE_FORKED_TERMS_READER.isEnabled()) { + ret = new Lucene90BlockTreeTermsReader(postingsReader, state); + } else { + ret = new org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsReader(postingsReader, state); + } success = true; return ret; } finally { diff --git a/server/src/main/java/org/elasticsearch/index/codec/postings/terms/CompressionAlgorithm.java b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/CompressionAlgorithm.java new file mode 100644 index 0000000000000..17d524a346cd3 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/CompressionAlgorithm.java @@ -0,0 +1,65 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ +package org.elasticsearch.index.codec.postings.terms; + +import org.apache.lucene.store.DataInput; +import org.apache.lucene.util.compress.LowercaseAsciiCompression; + +import java.io.IOException; + +/** Compression algorithm used for suffixes of a block of terms. */ +enum CompressionAlgorithm { + NO_COMPRESSION(0x00) { + + @Override + void read(DataInput in, byte[] out, int len) throws IOException { + in.readBytes(out, 0, len); + } + }, + + LOWERCASE_ASCII(0x01) { + + @Override + void read(DataInput in, byte[] out, int len) throws IOException { + LowercaseAsciiCompression.decompress(in, out, len); + } + }, + + LZ4(0x02) { + + @Override + void read(DataInput in, byte[] out, int len) throws IOException { + org.apache.lucene.util.compress.LZ4.decompress(in, len, out, 0); + } + }; + + private static final CompressionAlgorithm[] BY_CODE = new CompressionAlgorithm[3]; + + static { + for (CompressionAlgorithm alg : CompressionAlgorithm.values()) { + BY_CODE[alg.code] = alg; + } + } + + /** Look up a {@link CompressionAlgorithm} by its {@link CompressionAlgorithm#code}. */ + static CompressionAlgorithm byCode(int code) { + if (code < 0 || code >= BY_CODE.length) { + throw new IllegalArgumentException("Illegal code for a compression algorithm: " + code); + } + return BY_CODE[code]; + } + + public final int code; + + CompressionAlgorithm(int code) { + this.code = code; + } + + abstract void read(DataInput in, byte[] out, int len) throws IOException; +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/postings/terms/FieldReader.java b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/FieldReader.java new file mode 100644 index 0000000000000..baed15e315d85 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/FieldReader.java @@ -0,0 +1,245 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ +package org.elasticsearch.index.codec.postings.terms; + +import org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsWriter; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.CompiledAutomaton; +import org.apache.lucene.util.fst.ByteSequenceOutputs; +import org.apache.lucene.util.fst.FST; +import org.apache.lucene.util.fst.OffHeapFSTStore; + +import java.io.IOException; + +import static org.elasticsearch.index.codec.postings.terms.Lucene90BlockTreeTermsReader.VERSION_MSB_VLONG_OUTPUT; + +/** + * BlockTree's implementation of {@link Terms}. + */ +public final class FieldReader extends Terms { + + // private final boolean DEBUG = BlockTreeTermsWriter.DEBUG; + + final long numTerms; + final FieldInfo fieldInfo; + final long sumTotalTermFreq; + final long sumDocFreq; + final int docCount; + final long rootBlockFP; + final BytesRef rootCode; + final BytesRef minTerm; + final BytesRef maxTerm; + final Lucene90BlockTreeTermsReader parent; + + final FST index; + + // private boolean DEBUG; + + FieldReader( + Lucene90BlockTreeTermsReader parent, + FieldInfo fieldInfo, + long numTerms, + BytesRef rootCode, + long sumTotalTermFreq, + long sumDocFreq, + int docCount, + long indexStartFP, + IndexInput metaIn, + IndexInput indexIn, + BytesRef minTerm, + BytesRef maxTerm + ) throws IOException { + assert numTerms > 0; + this.fieldInfo = fieldInfo; + // DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id"); + this.parent = parent; + this.numTerms = numTerms; + this.sumTotalTermFreq = sumTotalTermFreq; + this.sumDocFreq = sumDocFreq; + this.docCount = docCount; + this.minTerm = minTerm; + this.maxTerm = maxTerm; + // if (DEBUG) { + // System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + // + rootCode + " divisor=" + indexDivisor); + // } + rootBlockFP = readVLongOutput(new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)) + >>> Lucene90BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS; + // Initialize FST always off-heap. + var metadata = FST.readMetadata(metaIn, ByteSequenceOutputs.getSingleton()); + index = FST.fromFSTReader(metadata, new OffHeapFSTStore(indexIn, indexStartFP, metadata)); + /* + if (false) { + final String dotFileName = segment + "_" + fieldInfo.name + ".dot"; + Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName)); + Util.toDot(index, w, false, false); + System.out.println("FST INDEX: SAVED to " + dotFileName); + w.close(); + } + */ + BytesRef emptyOutput = metadata.getEmptyOutput(); + if (rootCode.equals(emptyOutput) == false) { + // TODO: this branch is never taken + assert false; + this.rootCode = rootCode; + } else { + this.rootCode = emptyOutput; + } + } + + long readVLongOutput(DataInput in) throws IOException { + if (parent.version >= VERSION_MSB_VLONG_OUTPUT) { + return readMSBVLong(in); + } else { + return in.readVLong(); + } + } + + /** + * Decodes a variable length byte[] in MSB order back to long, as written by {@link + * Lucene90BlockTreeTermsWriter#writeMSBVLong}. + * + *

Package private for testing. + */ + static long readMSBVLong(DataInput in) throws IOException { + long l = 0L; + while (true) { + byte b = in.readByte(); + l = (l << 7) | (b & 0x7FL); + if ((b & 0x80) == 0) { + break; + } + } + return l; + } + + @Override + public BytesRef getMin() throws IOException { + if (minTerm == null) { + // Older index that didn't store min/maxTerm + return super.getMin(); + } else { + return minTerm; + } + } + + @Override + public BytesRef getMax() throws IOException { + if (maxTerm == null) { + // Older index that didn't store min/maxTerm + return super.getMax(); + } else { + return maxTerm; + } + } + + /** For debugging -- used by CheckIndex too */ + @Override + public Stats getStats() throws IOException { + return new SegmentTermsEnum(this).computeBlockStats(); + } + + @Override + public boolean hasFreqs() { + return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; + } + + @Override + public boolean hasOffsets() { + return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; + } + + @Override + public boolean hasPositions() { + return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; + } + + @Override + public boolean hasPayloads() { + return fieldInfo.hasPayloads(); + } + + @Override + public TermsEnum iterator() throws IOException { + return new SegmentTermsEnum(this); + } + + @Override + public long size() { + return numTerms; + } + + @Override + public long getSumTotalTermFreq() { + return sumTotalTermFreq; + } + + @Override + public long getSumDocFreq() { + return sumDocFreq; + } + + @Override + public int getDocCount() { + return docCount; + } + + @Override + public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException { + // if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" + + // ToStringUtils.bytesRefToString(startTerm)); + // System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton); + // TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum? + // can we optimize knowing that...? + if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) { + throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead"); + } + return new IntersectTermsEnum( + this, + compiled.getTransitionAccessor(), + compiled.getByteRunnable(), + compiled.commonSuffixRef, + startTerm + ); + } + + @Override + public String toString() { + return "BlockTreeTerms(seg=" + + parent.segment + + " terms=" + + numTerms + + ",postings=" + + sumDocFreq + + ",positions=" + + sumTotalTermFreq + + ",docs=" + + docCount + + ")"; + } + + // CHANGES: + + public BytesRef getMinTerm() { + return minTerm; + } + + public BytesRef getMaxTerm() { + return maxTerm; + } + + // END CHANGES: +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/postings/terms/IntersectTermsEnum.java b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/IntersectTermsEnum.java new file mode 100644 index 0000000000000..dbaac8d1226b9 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/IntersectTermsEnum.java @@ -0,0 +1,558 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ +package org.elasticsearch.index.codec.postings.terms; + +import org.apache.lucene.index.BaseTermsEnum; +import org.apache.lucene.index.ImpactsEnum; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.Terms; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.automaton.ByteRunnable; +import org.apache.lucene.util.automaton.Transition; +import org.apache.lucene.util.automaton.TransitionAccessor; +import org.apache.lucene.util.fst.FST; + +import java.io.IOException; + +/** + * This is used to implement efficient {@link Terms#intersect} for block-tree. Note that it cannot + * seek, except for the initial term on init. It just "nexts" through the intersection of the + * automaton and the terms. It does not use the terms index at all: on init, it loads the root + * block, and scans its way to the initial term. Likewise, in next it scans until it finds a term + * that matches the current automaton transition. + */ +final class IntersectTermsEnum extends BaseTermsEnum { + + // static boolean DEBUG = BlockTreeTermsWriter.DEBUG; + + final IndexInput in; + + IntersectTermsEnumFrame[] stack; + + @SuppressWarnings({ "rawtypes", "unchecked" }) + private FST.Arc[] arcs = new FST.Arc[5]; + + final ByteRunnable runAutomaton; + final TransitionAccessor automaton; + final BytesRef commonSuffix; + + private IntersectTermsEnumFrame currentFrame; + private Transition currentTransition; + + private final BytesRef term = new BytesRef(); + + private final FST.BytesReader fstReader; + + final FieldReader fr; + + private BytesRef savedStartTerm; + + private final SegmentTermsEnum.OutputAccumulator outputAccumulator = new SegmentTermsEnum.OutputAccumulator(); + + // TODO: in some cases we can filter by length? eg + // regexp foo*bar must be at least length 6 bytes + IntersectTermsEnum(FieldReader fr, TransitionAccessor automaton, ByteRunnable runAutomaton, BytesRef commonSuffix, BytesRef startTerm) + throws IOException { + this.fr = fr; + + assert automaton != null; + assert runAutomaton != null; + + this.runAutomaton = runAutomaton; + this.automaton = automaton; + this.commonSuffix = commonSuffix; + + in = fr.parent.termsIn.clone(); + stack = new IntersectTermsEnumFrame[5]; + for (int idx = 0; idx < stack.length; idx++) { + stack[idx] = new IntersectTermsEnumFrame(this, idx); + } + for (int arcIdx = 0; arcIdx < arcs.length; arcIdx++) { + arcs[arcIdx] = new FST.Arc<>(); + } + + fstReader = fr.index.getBytesReader(); + + // TODO: if the automaton is "smallish" we really + // should use the terms index to seek at least to + // the initial term and likely to subsequent terms + // (or, maybe just fallback to ATE for such cases). + // Else the seek cost of loading the frames will be + // too costly. + + final FST.Arc arc = fr.index.getFirstArc(arcs[0]); + // Empty string prefix must have an output in the index! + assert arc.isFinal(); + + // Special pushFrame since it's the first one: + final IntersectTermsEnumFrame f = stack[0]; + f.fp = f.fpOrig = fr.rootBlockFP; + f.prefix = 0; + f.setState(0); + f.arc = arc; + f.load(fr.rootCode); + + // for assert: + assert setSavedStartTerm(startTerm); + + currentFrame = f; + outputAccumulator.push(currentFrame.arc.output()); + + if (startTerm != null) { + seekToStartTerm(startTerm); + } + currentTransition = currentFrame.transition; + } + + // only for assert: + private boolean setSavedStartTerm(BytesRef startTerm) { + savedStartTerm = startTerm == null ? null : BytesRef.deepCopyOf(startTerm); + return true; + } + + @Override + public TermState termState() throws IOException { + currentFrame.decodeMetaData(); + return currentFrame.termState.clone(); + } + + private IntersectTermsEnumFrame getFrame(int ord) throws IOException { + if (ord >= stack.length) { + final IntersectTermsEnumFrame[] next = new IntersectTermsEnumFrame[ArrayUtil.oversize( + 1 + ord, + RamUsageEstimator.NUM_BYTES_OBJECT_REF + )]; + System.arraycopy(stack, 0, next, 0, stack.length); + for (int stackOrd = stack.length; stackOrd < next.length; stackOrd++) { + next[stackOrd] = new IntersectTermsEnumFrame(this, stackOrd); + } + stack = next; + } + assert stack[ord].ord == ord; + return stack[ord]; + } + + private FST.Arc getArc(int ord) { + if (ord >= arcs.length) { + @SuppressWarnings({ "rawtypes", "unchecked" }) + final FST.Arc[] next = new FST.Arc[ArrayUtil.oversize(1 + ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + System.arraycopy(arcs, 0, next, 0, arcs.length); + for (int arcOrd = arcs.length; arcOrd < next.length; arcOrd++) { + next[arcOrd] = new FST.Arc<>(); + } + arcs = next; + } + return arcs[ord]; + } + + private IntersectTermsEnumFrame pushFrame(int state) throws IOException { + assert currentFrame != null; + + final IntersectTermsEnumFrame f = getFrame(currentFrame == null ? 0 : 1 + currentFrame.ord); + + f.fp = f.fpOrig = currentFrame.lastSubFP; + f.prefix = currentFrame.prefix + currentFrame.suffix; + f.setState(state); + + // Walk the arc through the index -- we only + // "bother" with this so we can get the floor data + // from the index and skip floor blocks when + // possible: + FST.Arc arc = currentFrame.arc; + int idx = currentFrame.prefix; + assert currentFrame.suffix > 0; + + int initOutputCount = outputAccumulator.outputCount(); + while (idx < f.prefix) { + final int target = term.bytes[idx] & 0xff; + // TODO: we could be more efficient for the next() + // case by using current arc as starting point, + // passed to findTargetArc + arc = fr.index.findTargetArc(target, arc, getArc(1 + idx), fstReader); + assert arc != null; + outputAccumulator.push(arc.output()); + idx++; + } + + f.arc = arc; + f.outputNum = outputAccumulator.outputCount() - initOutputCount; + assert arc.isFinal(); + outputAccumulator.push(arc.nextFinalOutput()); + f.load(outputAccumulator); + outputAccumulator.pop(arc.nextFinalOutput()); + return f; + } + + @Override + public BytesRef term() { + return term; + } + + @Override + public int docFreq() throws IOException { + currentFrame.decodeMetaData(); + return currentFrame.termState.docFreq; + } + + @Override + public long totalTermFreq() throws IOException { + currentFrame.decodeMetaData(); + return currentFrame.termState.totalTermFreq; + } + + @Override + public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException { + currentFrame.decodeMetaData(); + return fr.parent.postingsReader.postings(fr.fieldInfo, currentFrame.termState, reuse, flags); + } + + @Override + public ImpactsEnum impacts(int flags) throws IOException { + currentFrame.decodeMetaData(); + return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.termState, flags); + } + + private int getState() { + int state = currentFrame.state; + for (int idx = 0; idx < currentFrame.suffix; idx++) { + state = runAutomaton.step(state, currentFrame.suffixBytes[currentFrame.startBytePos + idx] & 0xff); + assert state != -1; + } + return state; + } + + // NOTE: specialized to only doing the first-time + // seek, but we could generalize it to allow + // arbitrary seekExact/Ceil. Note that this is a + // seekFloor! + private void seekToStartTerm(BytesRef target) throws IOException { + assert currentFrame.ord == 0; + if (term.length < target.length) { + term.bytes = ArrayUtil.grow(term.bytes, target.length); + } + FST.Arc arc = arcs[0]; + assert arc == currentFrame.arc; + + for (int idx = 0; idx <= target.length; idx++) { + + while (true) { + final int savNextEnt = currentFrame.nextEnt; + final int savePos = currentFrame.suffixesReader.getPosition(); + final int saveLengthPos = currentFrame.suffixLengthsReader.getPosition(); + final int saveStartBytePos = currentFrame.startBytePos; + final int saveSuffix = currentFrame.suffix; + final long saveLastSubFP = currentFrame.lastSubFP; + final int saveTermBlockOrd = currentFrame.termState.termBlockOrd; + + final boolean isSubBlock = currentFrame.next(); + + term.length = currentFrame.prefix + currentFrame.suffix; + if (term.bytes.length < term.length) { + term.bytes = ArrayUtil.grow(term.bytes, term.length); + } + System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix); + + if (isSubBlock && StringHelper.startsWith(target, term)) { + // Recurse + currentFrame = pushFrame(getState()); + break; + } else { + final int cmp = term.compareTo(target); + if (cmp < 0) { + if (currentFrame.nextEnt == currentFrame.entCount) { + if (currentFrame.isLastInFloor == false) { + // Advance to next floor block + currentFrame.loadNextFloorBlock(); + continue; + } else { + return; + } + } + continue; + } else if (cmp == 0) { + return; + } else { + // Fallback to prior entry: the semantics of + // this method is that the first call to + // next() will return the term after the + // requested term + currentFrame.nextEnt = savNextEnt; + currentFrame.lastSubFP = saveLastSubFP; + currentFrame.startBytePos = saveStartBytePos; + currentFrame.suffix = saveSuffix; + currentFrame.suffixesReader.setPosition(savePos); + currentFrame.suffixLengthsReader.setPosition(saveLengthPos); + currentFrame.termState.termBlockOrd = saveTermBlockOrd; + System.arraycopy( + currentFrame.suffixBytes, + currentFrame.startBytePos, + term.bytes, + currentFrame.prefix, + currentFrame.suffix + ); + term.length = currentFrame.prefix + currentFrame.suffix; + // If the last entry was a block we don't + // need to bother recursing and pushing to + // the last term under it because the first + // next() will simply skip the frame anyway + return; + } + } + } + } + + assert false; + } + + private boolean popPushNext() throws IOException { + // Pop finished frames + while (currentFrame.nextEnt == currentFrame.entCount) { + if (currentFrame.isLastInFloor == false) { + // Advance to next floor block + currentFrame.loadNextFloorBlock(); + break; + } else { + if (currentFrame.ord == 0) { + throw NoMoreTermsException.INSTANCE; + } + final long lastFP = currentFrame.fpOrig; + outputAccumulator.pop(currentFrame.outputNum); + currentFrame = stack[currentFrame.ord - 1]; + currentTransition = currentFrame.transition; + assert currentFrame.lastSubFP == lastFP; + } + } + + return currentFrame.next(); + } + + // Only used internally when there are no more terms in next(): + private static final class NoMoreTermsException extends RuntimeException { + + // Only used internally when there are no more terms in next(): + public static final NoMoreTermsException INSTANCE = new NoMoreTermsException(); + + private NoMoreTermsException() {} + + @Override + public Throwable fillInStackTrace() { + // Do nothing: + return this; + } + } + + @Override + public BytesRef next() throws IOException { + try { + return _next(); + } catch (@SuppressWarnings("unused") NoMoreTermsException eoi) { + // Provoke NPE if we are (illegally!) called again: + currentFrame = null; + return null; + } + } + + private BytesRef _next() throws IOException { + + boolean isSubBlock = popPushNext(); + + nextTerm: while (true) { + assert currentFrame.transition == currentTransition; + + int state; + int lastState; + + // NOTE: suffix == 0 can only happen on the first term in a block, when + // there is a term exactly matching a prefix in the index. If we + // could somehow re-org the code so we only checked this case immediately + // after pushing a frame... + if (currentFrame.suffix != 0) { + + final byte[] suffixBytes = currentFrame.suffixBytes; + + // This is the first byte of the suffix of the term we are now on: + final int label = suffixBytes[currentFrame.startBytePos] & 0xff; + + if (label < currentTransition.min) { + // Common case: we are scanning terms in this block to "catch up" to + // current transition in the automaton: + int minTrans = currentTransition.min; + while (currentFrame.nextEnt < currentFrame.entCount) { + isSubBlock = currentFrame.next(); + if ((suffixBytes[currentFrame.startBytePos] & 0xff) >= minTrans) { + continue nextTerm; + } + } + + // End of frame: + isSubBlock = popPushNext(); + continue nextTerm; + } + + // Advance where we are in the automaton to match this label: + + while (label > currentTransition.max) { + if (currentFrame.transitionIndex >= currentFrame.transitionCount - 1) { + // Pop this frame: no further matches are possible because + // we've moved beyond what the max transition will allow + if (currentFrame.ord == 0) { + // Provoke NPE if we are (illegally!) called again: + currentFrame = null; + return null; + } + outputAccumulator.pop(currentFrame.outputNum); + currentFrame = stack[currentFrame.ord - 1]; + currentTransition = currentFrame.transition; + isSubBlock = popPushNext(); + continue nextTerm; + } + currentFrame.transitionIndex++; + automaton.getNextTransition(currentTransition); + + if (label < currentTransition.min) { + int minTrans = currentTransition.min; + while (currentFrame.nextEnt < currentFrame.entCount) { + isSubBlock = currentFrame.next(); + if ((suffixBytes[currentFrame.startBytePos] & 0xff) >= minTrans) { + continue nextTerm; + } + } + + // End of frame: + isSubBlock = popPushNext(); + continue nextTerm; + } + } + + if (commonSuffix != null && isSubBlock == false) { + final int termLen = currentFrame.prefix + currentFrame.suffix; + if (termLen < commonSuffix.length) { + // No match + isSubBlock = popPushNext(); + continue nextTerm; + } + + final byte[] commonSuffixBytes = commonSuffix.bytes; + + final int lenInPrefix = commonSuffix.length - currentFrame.suffix; + assert commonSuffix.offset == 0; + int suffixBytesPos; + int commonSuffixBytesPos = 0; + + if (lenInPrefix > 0) { + // A prefix of the common suffix overlaps with + // the suffix of the block prefix so we first + // test whether the prefix part matches: + final byte[] termBytes = term.bytes; + int termBytesPos = currentFrame.prefix - lenInPrefix; + assert termBytesPos >= 0; + final int termBytesPosEnd = currentFrame.prefix; + while (termBytesPos < termBytesPosEnd) { + if (termBytes[termBytesPos++] != commonSuffixBytes[commonSuffixBytesPos++]) { + isSubBlock = popPushNext(); + continue nextTerm; + } + } + suffixBytesPos = currentFrame.startBytePos; + } else { + suffixBytesPos = currentFrame.startBytePos + currentFrame.suffix - commonSuffix.length; + } + + // Test overlapping suffix part: + final int commonSuffixBytesPosEnd = commonSuffix.length; + while (commonSuffixBytesPos < commonSuffixBytesPosEnd) { + if (suffixBytes[suffixBytesPos++] != commonSuffixBytes[commonSuffixBytesPos++]) { + isSubBlock = popPushNext(); + continue nextTerm; + } + } + } + + // TODO: maybe we should do the same linear test + // that AutomatonTermsEnum does, so that if we + // reach a part of the automaton where .* is + // "temporarily" accepted, we just blindly .next() + // until the limit + + // See if the term suffix matches the automaton: + + // We know from above that the first byte in our suffix (label) matches + // the current transition, so we step from the 2nd byte + // in the suffix: + lastState = currentFrame.state; + state = currentTransition.dest; + + int end = currentFrame.startBytePos + currentFrame.suffix; + for (int idx = currentFrame.startBytePos + 1; idx < end; idx++) { + lastState = state; + state = runAutomaton.step(state, suffixBytes[idx] & 0xff); + if (state == -1) { + // No match + isSubBlock = popPushNext(); + continue nextTerm; + } + } + } else { + state = currentFrame.state; + lastState = currentFrame.lastState; + } + + if (isSubBlock) { + // Match! Recurse: + copyTerm(); + currentFrame = pushFrame(state); + currentTransition = currentFrame.transition; + currentFrame.lastState = lastState; + } else if (runAutomaton.isAccept(state)) { + copyTerm(); + assert savedStartTerm == null || term.compareTo(savedStartTerm) > 0 + : "saveStartTerm=" + savedStartTerm.utf8ToString() + " term=" + term.utf8ToString(); + return term; + } else { + // This term is a prefix of a term accepted by the automaton, but is not itself accepted + } + + isSubBlock = popPushNext(); + } + } + + private void copyTerm() { + final int len = currentFrame.prefix + currentFrame.suffix; + if (term.bytes.length < len) { + term.bytes = ArrayUtil.grow(term.bytes, len); + } + System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix); + term.length = len; + } + + @Override + public boolean seekExact(BytesRef text) { + throw new UnsupportedOperationException(); + } + + @Override + public void seekExact(long ord) { + throw new UnsupportedOperationException(); + } + + @Override + public long ord() { + throw new UnsupportedOperationException(); + } + + @Override + public SeekStatus seekCeil(BytesRef text) { + throw new UnsupportedOperationException(); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/postings/terms/IntersectTermsEnumFrame.java b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/IntersectTermsEnumFrame.java new file mode 100644 index 0000000000000..452d13973b973 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/IntersectTermsEnumFrame.java @@ -0,0 +1,326 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ +package org.elasticsearch.index.codec.postings.terms; + +import org.apache.lucene.codecs.BlockTermState; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.Transition; +import org.apache.lucene.util.fst.FST; + +import java.io.IOException; +import java.util.Arrays; + +// TODO: can we share this with the frame in STE? +final class IntersectTermsEnumFrame { + final int ord; + long fp; + long fpOrig; + long fpEnd; + long lastSubFP; + + // private static boolean DEBUG = IntersectTermsEnum.DEBUG; + + // State in automaton + int state; + + // State just before the last label + int lastState; + + int metaDataUpto; + + byte[] suffixBytes = new byte[128]; + final ByteArrayDataInput suffixesReader = new ByteArrayDataInput(); + + byte[] suffixLengthBytes; + final ByteArrayDataInput suffixLengthsReader; + + byte[] statBytes = new byte[64]; + int statsSingletonRunLength = 0; + final ByteArrayDataInput statsReader = new ByteArrayDataInput(); + + final ByteArrayDataInput floorDataReader = new ByteArrayDataInput(); + + // Length of prefix shared by all terms in this block + int prefix; + + // Number of entries (term or sub-block) in this block + int entCount; + + // Which term we will next read + int nextEnt; + + // True if this block is either not a floor block, + // or, it's the last sub-block of a floor block + boolean isLastInFloor; + + // True if all entries are terms + boolean isLeafBlock; + + int numFollowFloorBlocks; + int nextFloorLabel; + + final Transition transition = new Transition(); + int transitionIndex; + int transitionCount; + + FST.Arc arc; + + final BlockTermState termState; + + // metadata buffer + byte[] bytes = new byte[32]; + + final ByteArrayDataInput bytesReader = new ByteArrayDataInput(); + + int outputNum; + + int startBytePos; + int suffix; + + private final IntersectTermsEnum ite; + + IntersectTermsEnumFrame(IntersectTermsEnum ite, int ord) throws IOException { + this.ite = ite; + this.ord = ord; + this.termState = ite.fr.parent.postingsReader.newTermState(); + this.termState.totalTermFreq = -1; + suffixLengthBytes = new byte[32]; + suffixLengthsReader = new ByteArrayDataInput(); + } + + void loadNextFloorBlock() throws IOException { + assert numFollowFloorBlocks > 0 : "nextFloorLabel=" + nextFloorLabel; + + do { + fp = fpOrig + (floorDataReader.readVLong() >>> 1); + numFollowFloorBlocks--; + if (numFollowFloorBlocks != 0) { + nextFloorLabel = floorDataReader.readByte() & 0xff; + } else { + nextFloorLabel = 256; + } + } while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min); + + load((Long) null); + } + + public void setState(int state) { + this.state = state; + transitionIndex = 0; + transitionCount = ite.automaton.getNumTransitions(state); + if (transitionCount != 0) { + ite.automaton.initTransition(state, transition); + ite.automaton.getNextTransition(transition); + } else { + + // Must set min to -1 so the "label < min" check never falsely triggers: + transition.min = -1; + + // Must set max to -1 so we immediately realize we need to step to the next transition and + // then pop this frame: + transition.max = -1; + } + } + + void load(BytesRef frameIndexData) throws IOException { + floorDataReader.reset(frameIndexData.bytes, frameIndexData.offset, frameIndexData.length); + load(ite.fr.readVLongOutput(floorDataReader)); + } + + void load(SegmentTermsEnum.OutputAccumulator outputAccumulator) throws IOException { + outputAccumulator.prepareRead(); + long code = ite.fr.readVLongOutput(outputAccumulator); + outputAccumulator.setFloorData(floorDataReader); + load(code); + } + + void load(Long blockCode) throws IOException { + if (blockCode != null) { + // This block is the first one in a possible sequence of floor blocks corresponding to a + // single seek point from the FST terms index + if ((blockCode & Lucene90BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR) != 0) { + // Floor frame + numFollowFloorBlocks = floorDataReader.readVInt(); + nextFloorLabel = floorDataReader.readByte() & 0xff; + + // If current state is not accept, and has transitions, we must process + // first block in case it has empty suffix: + if (ite.runAutomaton.isAccept(state) == false && transitionCount != 0) { + // Maybe skip floor blocks: + assert transitionIndex == 0 : "transitionIndex=" + transitionIndex; + while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min) { + fp = fpOrig + (floorDataReader.readVLong() >>> 1); + numFollowFloorBlocks--; + if (numFollowFloorBlocks != 0) { + nextFloorLabel = floorDataReader.readByte() & 0xff; + } else { + nextFloorLabel = 256; + } + } + } + } + } + + ite.in.seek(fp); + int code = ite.in.readVInt(); + entCount = code >>> 1; + assert entCount > 0; + isLastInFloor = (code & 1) != 0; + + // term suffixes: + final long codeL = ite.in.readVLong(); + isLeafBlock = (codeL & 0x04) != 0; + final int numSuffixBytes = (int) (codeL >>> 3); + if (suffixBytes.length < numSuffixBytes) { + suffixBytes = new byte[ArrayUtil.oversize(numSuffixBytes, 1)]; + } + final CompressionAlgorithm compressionAlg; + try { + compressionAlg = CompressionAlgorithm.byCode((int) codeL & 0x03); + } catch (IllegalArgumentException e) { + throw new CorruptIndexException(e.getMessage(), ite.in, e); + } + compressionAlg.read(ite.in, suffixBytes, numSuffixBytes); + suffixesReader.reset(suffixBytes, 0, numSuffixBytes); + + int numSuffixLengthBytes = ite.in.readVInt(); + final boolean allEqual = (numSuffixLengthBytes & 0x01) != 0; + numSuffixLengthBytes >>>= 1; + if (suffixLengthBytes.length < numSuffixLengthBytes) { + suffixLengthBytes = new byte[ArrayUtil.oversize(numSuffixLengthBytes, 1)]; + } + if (allEqual) { + Arrays.fill(suffixLengthBytes, 0, numSuffixLengthBytes, ite.in.readByte()); + } else { + ite.in.readBytes(suffixLengthBytes, 0, numSuffixLengthBytes); + } + suffixLengthsReader.reset(suffixLengthBytes, 0, numSuffixLengthBytes); + + // stats + int numBytes = ite.in.readVInt(); + if (statBytes.length < numBytes) { + statBytes = new byte[ArrayUtil.oversize(numBytes, 1)]; + } + ite.in.readBytes(statBytes, 0, numBytes); + statsReader.reset(statBytes, 0, numBytes); + statsSingletonRunLength = 0; + metaDataUpto = 0; + + termState.termBlockOrd = 0; + nextEnt = 0; + + // metadata + numBytes = ite.in.readVInt(); + if (bytes.length < numBytes) { + bytes = new byte[ArrayUtil.oversize(numBytes, 1)]; + } + ite.in.readBytes(bytes, 0, numBytes); + bytesReader.reset(bytes, 0, numBytes); + + if (isLastInFloor == false) { + // Sub-blocks of a single floor block are always + // written one after another -- tail recurse: + fpEnd = ite.in.getFilePointer(); + } + } + + // TODO: maybe add scanToLabel; should give perf boost + + // Decodes next entry; returns true if it's a sub-block + public boolean next() { + if (isLeafBlock) { + nextLeaf(); + return false; + } else { + return nextNonLeaf(); + } + } + + public void nextLeaf() { + assert nextEnt != -1 && nextEnt < entCount : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp; + nextEnt++; + suffix = suffixLengthsReader.readVInt(); + startBytePos = suffixesReader.getPosition(); + suffixesReader.skipBytes(suffix); + } + + public boolean nextNonLeaf() { + assert nextEnt != -1 && nextEnt < entCount : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp; + nextEnt++; + final int code = suffixLengthsReader.readVInt(); + suffix = code >>> 1; + startBytePos = suffixesReader.getPosition(); + suffixesReader.skipBytes(suffix); + if ((code & 1) == 0) { + // A normal term + termState.termBlockOrd++; + return false; + } else { + // A sub-block; make sub-FP absolute: + lastSubFP = fp - suffixLengthsReader.readVLong(); + return true; + } + } + + public int getTermBlockOrd() { + return isLeafBlock ? nextEnt : termState.termBlockOrd; + } + + public void decodeMetaData() throws IOException { + + // lazily catch up on metadata decode: + final int limit = getTermBlockOrd(); + boolean absolute = metaDataUpto == 0; + assert limit > 0; + + // TODO: better API would be "jump straight to term=N"??? + while (metaDataUpto < limit) { + + // TODO: we could make "tiers" of metadata, ie, + // decode docFreq/totalTF but don't decode postings + // metadata; this way caller could get + // docFreq/totalTF w/o paying decode cost for + // postings + + // TODO: if docFreq were bulk decoded we could + // just skipN here: + + // stats + if (statsSingletonRunLength > 0) { + termState.docFreq = 1; + termState.totalTermFreq = 1; + statsSingletonRunLength--; + } else { + int token = statsReader.readVInt(); + if ((token & 1) == 1) { + termState.docFreq = 1; + termState.totalTermFreq = 1; + statsSingletonRunLength = token >>> 1; + } else { + termState.docFreq = token >>> 1; + if (ite.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) { + termState.totalTermFreq = termState.docFreq; + } else { + termState.totalTermFreq = termState.docFreq + statsReader.readVLong(); + } + } + } + // metadata + ite.fr.parent.postingsReader.decodeTerm(bytesReader, ite.fr.fieldInfo, termState, absolute); + + metaDataUpto++; + absolute = false; + } + termState.termBlockOrd = metaDataUpto; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/postings/terms/Lucene90BlockTreeTermsReader.java b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/Lucene90BlockTreeTermsReader.java new file mode 100644 index 0000000000000..2bbf9b308420b --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/Lucene90BlockTreeTermsReader.java @@ -0,0 +1,328 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ +package org.elasticsearch.index.codec.postings.terms; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.FieldsProducer; +import org.apache.lucene.codecs.PostingsReaderBase; +import org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsWriter; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.Terms; +import org.apache.lucene.internal.hppc.IntCursor; +import org.apache.lucene.internal.hppc.IntObjectHashMap; +import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.ReadAdvice; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.fst.ByteSequenceOutputs; +import org.apache.lucene.util.fst.Outputs; +import org.elasticsearch.core.SuppressForbidden; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +/** + * A block-based terms index and dictionary that assigns terms to variable length blocks according + * to how they share prefixes. The terms index is a prefix trie whose leaves are term blocks. The + * advantage of this approach is that seekExact is often able to determine a term cannot exist + * without doing any IO, and intersection with Automata is very fast. Note that this terms + * dictionary has its own fixed terms index (ie, it does not support a pluggable terms index + * implementation). + * + *

NOTE: this terms dictionary supports min/maxItemsPerBlock during indexing to control + * how much memory the terms index uses. + * + *

The data structure used by this implementation is very similar to a burst trie + * (http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.18.3499), but with added logic to break + * up too-large blocks of all terms sharing a given prefix into smaller ones. + * + *

Use {@link org.apache.lucene.index.CheckIndex} with the -verbose option to see + * summary statistics on the blocks in the dictionary. + * + *

See {@link Lucene90BlockTreeTermsWriter}. + */ +public final class Lucene90BlockTreeTermsReader extends FieldsProducer { + + static final Outputs FST_OUTPUTS = ByteSequenceOutputs.getSingleton(); + + static final BytesRef NO_OUTPUT = FST_OUTPUTS.getNoOutput(); + + static final int OUTPUT_FLAGS_NUM_BITS = 2; + static final int OUTPUT_FLAGS_MASK = 0x3; + static final int OUTPUT_FLAG_IS_FLOOR = 0x1; + static final int OUTPUT_FLAG_HAS_TERMS = 0x2; + + /** Extension of terms file */ + static final String TERMS_EXTENSION = "tim"; + + static final String TERMS_CODEC_NAME = "BlockTreeTermsDict"; + + /** Initial terms format. */ + public static final int VERSION_START = 0; + + /** + * Version that encode output as MSB VLong for better outputs sharing in FST, see GITHUB#12620. + */ + public static final int VERSION_MSB_VLONG_OUTPUT = 1; + + /** The version that specialize arc store for continuous label in FST. */ + public static final int VERSION_FST_CONTINUOUS_ARCS = 2; + + /** Current terms format. */ + public static final int VERSION_CURRENT = VERSION_FST_CONTINUOUS_ARCS; + + /** Extension of terms index file */ + static final String TERMS_INDEX_EXTENSION = "tip"; + + static final String TERMS_INDEX_CODEC_NAME = "BlockTreeTermsIndex"; + + /** Extension of terms meta file */ + static final String TERMS_META_EXTENSION = "tmd"; + + static final String TERMS_META_CODEC_NAME = "BlockTreeTermsMeta"; + + // Open input to the main terms dict file (_X.tib) + final IndexInput termsIn; + // Open input to the terms index file (_X.tip) + final IndexInput indexIn; + + // private static final boolean DEBUG = BlockTreeTermsWriter.DEBUG; + + // Reads the terms dict entries, to gather state to + // produce DocsEnum on demand + final PostingsReaderBase postingsReader; + + private final FieldInfos fieldInfos; + private final IntObjectHashMap fieldMap; + private final List fieldList; + + final String segment; + + final int version; + + /** Sole constructor. */ + @SuppressForbidden(reason = "require usage of Lucene's IOUtils") + public Lucene90BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state) throws IOException { + boolean success = false; + + this.postingsReader = postingsReader; + this.segment = state.segmentInfo.name; + + try { + String termsName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION); + termsIn = state.directory.openInput(termsName, state.context); + version = CodecUtil.checkIndexHeader( + termsIn, + TERMS_CODEC_NAME, + VERSION_START, + VERSION_CURRENT, + state.segmentInfo.getId(), + state.segmentSuffix + ); + + String indexName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION); + indexIn = state.directory.openInput(indexName, state.context.withReadAdvice(ReadAdvice.RANDOM_PRELOAD)); + CodecUtil.checkIndexHeader(indexIn, TERMS_INDEX_CODEC_NAME, version, version, state.segmentInfo.getId(), state.segmentSuffix); + + // Read per-field details + String metaName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_META_EXTENSION); + IntObjectHashMap fieldMap = null; + Throwable priorE = null; + long indexLength = -1, termsLength = -1; + try (ChecksumIndexInput metaIn = state.directory.openChecksumInput(metaName)) { + try { + CodecUtil.checkIndexHeader( + metaIn, + TERMS_META_CODEC_NAME, + version, + version, + state.segmentInfo.getId(), + state.segmentSuffix + ); + postingsReader.init(metaIn, state); + + final int numFields = metaIn.readVInt(); + if (numFields < 0) { + throw new CorruptIndexException("invalid numFields: " + numFields, metaIn); + } + fieldMap = new IntObjectHashMap<>(numFields); + for (int i = 0; i < numFields; ++i) { + final int field = metaIn.readVInt(); + final long numTerms = metaIn.readVLong(); + if (numTerms <= 0) { + throw new CorruptIndexException("Illegal numTerms for field number: " + field, metaIn); + } + final BytesRef rootCode = readBytesRef(metaIn); + final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); + if (fieldInfo == null) { + throw new CorruptIndexException("invalid field number: " + field, metaIn); + } + final long sumTotalTermFreq = metaIn.readVLong(); + // when frequencies are omitted, sumDocFreq=sumTotalTermFreq and only one value is + // written. + final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : metaIn.readVLong(); + final int docCount = metaIn.readVInt(); + BytesRef minTerm = readBytesRef(metaIn); + BytesRef maxTerm = readBytesRef(metaIn); + if (numTerms == 1) { + assert maxTerm.equals(minTerm); + // save heap for edge case of a single term only so min == max + maxTerm = minTerm; + } + // START CHANGE: + minTerm = NonForkedHelper.shouldKeepMinOrMaxTerm(minTerm); + maxTerm = NonForkedHelper.shouldKeepMinOrMaxTerm(maxTerm); + // END CHANGE + if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs + throw new CorruptIndexException( + "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(), + metaIn + ); + } + if (sumDocFreq < docCount) { // #postings must be >= #docs with field + throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, metaIn); + } + if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings + throw new CorruptIndexException( + "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, + metaIn + ); + } + final long indexStartFP = metaIn.readVLong(); + FieldReader previous = fieldMap.put( + fieldInfo.number, + new FieldReader( + this, + fieldInfo, + numTerms, + rootCode, + sumTotalTermFreq, + sumDocFreq, + docCount, + indexStartFP, + metaIn, + indexIn, + minTerm, + maxTerm + ) + ); + if (previous != null) { + throw new CorruptIndexException("duplicate field: " + fieldInfo.name, metaIn); + } + } + indexLength = metaIn.readLong(); + termsLength = metaIn.readLong(); + } catch (Throwable exception) { + priorE = exception; + } finally { + if (metaIn != null) { + CodecUtil.checkFooter(metaIn, priorE); + } else if (priorE != null) { + IOUtils.rethrowAlways(priorE); + } + } + } + // At this point the checksum of the meta file has been verified so the lengths are likely + // correct + CodecUtil.retrieveChecksum(indexIn, indexLength); + CodecUtil.retrieveChecksum(termsIn, termsLength); + fieldInfos = state.fieldInfos; + this.fieldMap = fieldMap; + this.fieldList = sortFieldNames(fieldMap, state.fieldInfos); + success = true; + } finally { + if (success == false) { + // this.close() will close in: + IOUtils.closeWhileHandlingException(this); + } + } + } + + private static BytesRef readBytesRef(IndexInput in) throws IOException { + int numBytes = in.readVInt(); + if (numBytes < 0) { + throw new CorruptIndexException("invalid bytes length: " + numBytes, in); + } + + BytesRef bytes = new BytesRef(numBytes); + bytes.length = numBytes; + in.readBytes(bytes.bytes, 0, numBytes); + + return bytes; + } + + private static List sortFieldNames(IntObjectHashMap fieldMap, FieldInfos fieldInfos) { + List fieldNames = new ArrayList<>(fieldMap.size()); + for (IntCursor fieldNumber : fieldMap.keys()) { + fieldNames.add(fieldInfos.fieldInfo(fieldNumber.value).name); + } + fieldNames.sort(null); + return Collections.unmodifiableList(fieldNames); + } + + // for debugging + // private static String toHex(int v) { + // return "0x" + Integer.toHexString(v); + // } + + @Override + @SuppressForbidden(reason = "require usage of Lucene's IOUtils") + public void close() throws IOException { + try { + IOUtils.close(indexIn, termsIn, postingsReader); + } finally { + // Clear so refs to terms index is GCable even if + // app hangs onto us: + fieldMap.clear(); + } + } + + @Override + public Iterator iterator() { + return fieldList.iterator(); + } + + @Override + public Terms terms(String field) throws IOException { + assert field != null; + FieldInfo fieldInfo = fieldInfos.fieldInfo(field); + return fieldInfo == null ? null : fieldMap.get(fieldInfo.number); + } + + @Override + public int size() { + return fieldMap.size(); + } + + @Override + public void checkIntegrity() throws IOException { + // terms index + CodecUtil.checksumEntireFile(indexIn); + + // term dictionary + CodecUtil.checksumEntireFile(termsIn); + + // postings + postingsReader.checkIntegrity(); + } + + @Override + public String toString() { + return getClass().getSimpleName() + "(fields=" + fieldMap.size() + ",delegate=" + postingsReader + ")"; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/postings/terms/NonForkedHelper.java b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/NonForkedHelper.java new file mode 100644 index 0000000000000..2e14322a3bc12 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/NonForkedHelper.java @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.postings.terms; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.util.FeatureFlag; + +/** + * This class is not forked from Lucene and a place to keep non-forked cod. + */ +public final class NonForkedHelper { + + public static final FeatureFlag USE_FORKED_TERMS_READER = new FeatureFlag("use_forked_terms_reader"); + + /** + * Returns the term if length is smaller or equal to 512 bytes other null is returned. + * + * Returned null would mean the min or max term would be read from disk each time {@link FieldReader#getMin()} + * or {@link FieldReader#getMax()} would be invoked. + * + * @return Returns the term if length is smaller or equal to 512 bytes other null is returned. + */ + public static BytesRef shouldKeepMinOrMaxTerm(BytesRef term) { + if (term.length <= 512) { + return term; + } else { + return null; + } + } + + private NonForkedHelper() {} +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/postings/terms/SegmentTermsEnum.java b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/SegmentTermsEnum.java new file mode 100644 index 0000000000000..e2d9fc41ccfb0 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/SegmentTermsEnum.java @@ -0,0 +1,1214 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ +package org.elasticsearch.index.codec.postings.terms; + +import org.apache.lucene.codecs.BlockTermState; +import org.apache.lucene.index.BaseTermsEnum; +import org.apache.lucene.index.ImpactsEnum; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.TermState; +import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.IOBooleanSupplier; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.fst.FST; +import org.apache.lucene.util.fst.Util; + +import java.io.IOException; +import java.io.PrintStream; +import java.util.Arrays; + +/** Iterates through terms in this field. */ +final class SegmentTermsEnum extends BaseTermsEnum { + + // Lazy init: + IndexInput in; + + private SegmentTermsEnumFrame[] stack; + private final SegmentTermsEnumFrame staticFrame; + SegmentTermsEnumFrame currentFrame; + boolean termExists; + final FieldReader fr; + + private int targetBeforeCurrentLength; + + // static boolean DEBUG = BlockTreeTermsWriter.DEBUG; + + private final OutputAccumulator outputAccumulator = new OutputAccumulator(); + + // What prefix of the current term was present in the index; when we only next() through the + // index, this stays at 0. It's only set when + // we seekCeil/Exact: + private int validIndexPrefix; + + // assert only: + private boolean eof; + + final BytesRefBuilder term = new BytesRefBuilder(); + private final FST.BytesReader fstReader; + + @SuppressWarnings({ "rawtypes", "unchecked" }) + private FST.Arc[] arcs = new FST.Arc[1]; + + SegmentTermsEnum(FieldReader fr) throws IOException { + this.fr = fr; + + // if (DEBUG) { + // System.out.println("BTTR.init seg=" + fr.parent.segment); + // } + stack = new SegmentTermsEnumFrame[0]; + + // Used to hold seek by TermState, or cached seek + staticFrame = new SegmentTermsEnumFrame(this, -1); + + if (fr.index == null) { + fstReader = null; + } else { + fstReader = fr.index.getBytesReader(); + } + + // Init w/ root block; don't use index since it may + // not (and need not) have been loaded + for (int arcIdx = 0; arcIdx < arcs.length; arcIdx++) { + arcs[arcIdx] = new FST.Arc<>(); + } + + currentFrame = staticFrame; + final FST.Arc arc; + if (fr.index != null) { + arc = fr.index.getFirstArc(arcs[0]); + // Empty string prefix must have an output in the index! + assert arc.isFinal(); + } else { + arc = null; + } + // currentFrame = pushFrame(arc, rootCode, 0); + // currentFrame.loadBlock(); + validIndexPrefix = 0; + // if (DEBUG) { + // System.out.println("init frame state " + currentFrame.ord); + // printSeekState(); + // } + + // System.out.println(); + // computeBlockStats().print(System.out); + } + + // Not private to avoid synthetic access$NNN methods + void initIndexInput() { + if (this.in == null) { + this.in = fr.parent.termsIn.clone(); + } + } + + /** Runs next() through the entire terms dict, computing aggregate statistics. */ + public Stats computeBlockStats() throws IOException { + + Stats stats = new Stats(fr.parent.segment, fr.fieldInfo.name); + if (fr.index != null) { + stats.indexNumBytes = fr.index.ramBytesUsed(); + } + + currentFrame = staticFrame; + FST.Arc arc; + if (fr.index != null) { + arc = fr.index.getFirstArc(arcs[0]); + // Empty string prefix must have an output in the index! + assert arc.isFinal(); + } else { + arc = null; + } + + // Empty string prefix must have an output in the + // index! + currentFrame = pushFrame(arc, fr.rootCode, 0); + currentFrame.fpOrig = currentFrame.fp; + currentFrame.loadBlock(); + validIndexPrefix = 0; + + stats.startBlock(currentFrame, currentFrame.isLastInFloor == false); + + allTerms: while (true) { + + // Pop finished blocks + while (currentFrame.nextEnt == currentFrame.entCount) { + stats.endBlock(currentFrame); + if (currentFrame.isLastInFloor == false) { + // Advance to next floor block + currentFrame.loadNextFloorBlock(); + stats.startBlock(currentFrame, true); + break; + } else { + if (currentFrame.ord == 0) { + break allTerms; + } + final long lastFP = currentFrame.fpOrig; + currentFrame = stack[currentFrame.ord - 1]; + assert lastFP == currentFrame.lastSubFP; + // if (DEBUG) { + // System.out.println(" reset validIndexPrefix=" + validIndexPrefix); + // } + } + } + + while (true) { + if (currentFrame.next()) { + // Push to new block: + currentFrame = pushFrame(null, currentFrame.lastSubFP, term.length()); + currentFrame.fpOrig = currentFrame.fp; + // This is a "next" frame -- even if it's + // floor'd we must pretend it isn't so we don't + // try to scan to the right floor frame: + currentFrame.loadBlock(); + stats.startBlock(currentFrame, currentFrame.isLastInFloor == false); + } else { + stats.term(term.get()); + break; + } + } + } + + stats.finish(); + + // Put root frame back: + currentFrame = staticFrame; + if (fr.index != null) { + arc = fr.index.getFirstArc(arcs[0]); + // Empty string prefix must have an output in the index! + assert arc.isFinal(); + } else { + arc = null; + } + currentFrame = pushFrame(arc, fr.rootCode, 0); + currentFrame.rewind(); + currentFrame.loadBlock(); + validIndexPrefix = 0; + term.clear(); + + return stats; + } + + private SegmentTermsEnumFrame getFrame(int ord) throws IOException { + if (ord >= stack.length) { + final SegmentTermsEnumFrame[] next = new SegmentTermsEnumFrame[ArrayUtil.oversize( + 1 + ord, + RamUsageEstimator.NUM_BYTES_OBJECT_REF + )]; + System.arraycopy(stack, 0, next, 0, stack.length); + for (int stackOrd = stack.length; stackOrd < next.length; stackOrd++) { + next[stackOrd] = new SegmentTermsEnumFrame(this, stackOrd); + } + stack = next; + } + assert stack[ord].ord == ord; + return stack[ord]; + } + + private FST.Arc getArc(int ord) { + if (ord >= arcs.length) { + @SuppressWarnings({ "rawtypes", "unchecked" }) + final FST.Arc[] next = new FST.Arc[ArrayUtil.oversize(1 + ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + System.arraycopy(arcs, 0, next, 0, arcs.length); + for (int arcOrd = arcs.length; arcOrd < next.length; arcOrd++) { + next[arcOrd] = new FST.Arc<>(); + } + arcs = next; + } + return arcs[ord]; + } + + SegmentTermsEnumFrame pushFrame(FST.Arc arc, BytesRef frameData, int length) throws IOException { + outputAccumulator.reset(); + outputAccumulator.push(frameData); + return pushFrame(arc, length); + } + + // Pushes a frame we seek'd to + SegmentTermsEnumFrame pushFrame(FST.Arc arc, int length) throws IOException { + outputAccumulator.prepareRead(); + final long code = fr.readVLongOutput(outputAccumulator); + final long fpSeek = code >>> Lucene90BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS; + final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord); + f.hasTerms = (code & Lucene90BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS) != 0; + f.hasTermsOrig = f.hasTerms; + f.isFloor = (code & Lucene90BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR) != 0; + if (f.isFloor) { + f.setFloorData(outputAccumulator); + } + pushFrame(arc, fpSeek, length); + + return f; + } + + // Pushes next'd frame or seek'd frame; we later + // lazy-load the frame only when needed + SegmentTermsEnumFrame pushFrame(FST.Arc arc, long fp, int length) throws IOException { + final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord); + f.arc = arc; + if (f.fpOrig == fp && f.nextEnt != -1) { + // if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + + // " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + + // f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" + + // term.length + " vs prefix=" + f.prefix); + // if (f.prefix > targetBeforeCurrentLength) { + if (f.ord > targetBeforeCurrentLength) { + f.rewind(); + } else { + // if (DEBUG) { + // System.out.println(" skip rewind!"); + // } + } + assert length == f.prefixLength; + } else { + f.nextEnt = -1; + f.prefixLength = length; + f.state.termBlockOrd = 0; + f.fpOrig = f.fp = fp; + f.lastSubFP = -1; + // if (DEBUG) { + // final int sav = term.length; + // term.length = length; + // System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" + + // f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term)); + // term.length = sav; + // } + } + + return f; + } + + // asserts only + private boolean clearEOF() { + eof = false; + return true; + } + + // asserts only + private boolean setEOF() { + eof = true; + return true; + } + + private IOBooleanSupplier prepareSeekExact(BytesRef target, boolean prefetch) throws IOException { + if (fr.index == null) { + throw new IllegalStateException("terms index was not loaded"); + } + + if (fr.size() > 0 && (target.compareTo(fr.getMin()) < 0 || target.compareTo(fr.getMax()) > 0)) { + return null; + } + + term.grow(1 + target.length); + + assert clearEOF(); + + // if (DEBUG) { + // System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + + // fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" + + // ToStringUtils.bytesRefToString(term) + + // " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix); + // printSeekState(System.out); + // } + + FST.Arc arc; + int targetUpto; + + targetBeforeCurrentLength = currentFrame.ord; + outputAccumulator.reset(); + + if (currentFrame != staticFrame) { + + // We are already seek'd; find the common + // prefix of new seek term vs current term and + // re-use the corresponding seek state. For + // example, if app first seeks to foobar, then + // seeks to foobaz, we can re-use the seek state + // for the first 5 bytes. + + // if (DEBUG) { + // System.out.println(" re-use current seek state validIndexPrefix=" + validIndexPrefix); + // } + + arc = arcs[0]; + assert arc.isFinal(); + outputAccumulator.push(arc.output()); + targetUpto = 0; + + SegmentTermsEnumFrame lastFrame = stack[0]; + assert validIndexPrefix <= term.length(); + + final int targetLimit = Math.min(target.length, validIndexPrefix); + + int cmp = 0; + + // First compare up to valid seek frames: + while (targetUpto < targetLimit) { + cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF); + // if (DEBUG) { + // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + // + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + + // " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + // + " output=" + output); + // } + if (cmp != 0) { + break; + } + arc = arcs[1 + targetUpto]; + assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF) + : "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF); + outputAccumulator.push(arc.output()); + + if (arc.isFinal()) { + lastFrame = stack[1 + lastFrame.ord]; + } + targetUpto++; + } + + if (cmp == 0) { + // Second compare the rest of the term, but + // don't save arc/output/frame; we only do this + // to find out if the target term is before, + // equal or after the current term + cmp = Arrays.compareUnsigned( + term.bytes(), + targetUpto, + term.length(), + target.bytes, + target.offset + targetUpto, + target.offset + target.length + ); + } + + if (cmp < 0) { + // Common case: target term is after current + // term, ie, app is seeking multiple terms + // in sorted order + // if (DEBUG) { + // System.out.println(" target is after current (shares prefixLen=" + targetUpto + "); + // frame.ord=" + lastFrame.ord); + // } + currentFrame = lastFrame; + + } else if (cmp > 0) { + // Uncommon case: target term + // is before current term; this means we can + // keep the currentFrame but we must rewind it + // (so we scan from the start) + targetBeforeCurrentLength = lastFrame.ord; + // if (DEBUG) { + // System.out.println(" target is before current (shares prefixLen=" + targetUpto + "); + // rewind frame ord=" + lastFrame.ord); + // } + currentFrame = lastFrame; + currentFrame.rewind(); + } else { + // Target is exactly the same as current term + assert term.length() == target.length; + if (termExists) { + // if (DEBUG) { + // System.out.println(" target is same as current; return true"); + // } + return () -> true; + } else { + // if (DEBUG) { + // System.out.println(" target is same as current but term doesn't exist"); + // } + } + // validIndexPrefix = currentFrame.depth; + // term.length = target.length; + // return termExists; + } + + } else { + + targetBeforeCurrentLength = -1; + arc = fr.index.getFirstArc(arcs[0]); + + // Empty string prefix must have an output (block) in the index! + assert arc.isFinal(); + assert arc.output() != null; + + // if (DEBUG) { + // System.out.println(" no seek state; push root frame"); + // } + + outputAccumulator.push(arc.output()); + + currentFrame = staticFrame; + + // term.length = 0; + targetUpto = 0; + outputAccumulator.push(arc.nextFinalOutput()); + currentFrame = pushFrame(arc, 0); + outputAccumulator.pop(arc.nextFinalOutput()); + } + + // if (DEBUG) { + // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + + // " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + + // targetBeforeCurrentLength); + // } + + // We are done sharing the common prefix with the incoming target and where we are currently + // seek'd; now continue walking the index: + while (targetUpto < target.length) { + + final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF; + + final FST.Arc nextArc = fr.index.findTargetArc(targetLabel, arc, getArc(1 + targetUpto), fstReader); + + if (nextArc == null) { + + // Index is exhausted + // if (DEBUG) { + // System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " + + // toHex(targetLabel)); + // } + + validIndexPrefix = currentFrame.prefixLength; + // validIndexPrefix = targetUpto; + + currentFrame.scanToFloorFrame(target); + + if (currentFrame.hasTerms == false) { + termExists = false; + term.setByteAt(targetUpto, (byte) targetLabel); + term.setLength(1 + targetUpto); + // if (DEBUG) { + // System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term)); + // } + return null; + } + + if (prefetch) { + currentFrame.prefetchBlock(); + } + + return () -> { + currentFrame.loadBlock(); + + final SeekStatus result = currentFrame.scanToTerm(target, true); + if (result == SeekStatus.FOUND) { + // if (DEBUG) { + // System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term); + // } + return true; + } else { + // if (DEBUG) { + // System.out.println(" got " + result + "; return NOT_FOUND term=" + + // ToStringUtils.bytesRefToString(term)); + // } + return false; + } + }; + } else { + // Follow this arc + arc = nextArc; + term.setByteAt(targetUpto, (byte) targetLabel); + // Aggregate output as we go: + assert arc.output() != null; + outputAccumulator.push(arc.output()); + + // if (DEBUG) { + // System.out.println(" index: follow label=" + toHex(target.bytes[target.offset + + // targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput); + // } + targetUpto++; + + if (arc.isFinal()) { + // if (DEBUG) System.out.println(" arc is final!"); + outputAccumulator.push(arc.nextFinalOutput()); + currentFrame = pushFrame(arc, targetUpto); + outputAccumulator.pop(arc.nextFinalOutput()); + // if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + + // currentFrame.hasTerms); + } + } + } + + // validIndexPrefix = targetUpto; + validIndexPrefix = currentFrame.prefixLength; + + currentFrame.scanToFloorFrame(target); + + // Target term is entirely contained in the index: + if (currentFrame.hasTerms == false) { + termExists = false; + term.setLength(targetUpto); + // if (DEBUG) { + // System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term)); + // } + return null; + } + + if (prefetch) { + currentFrame.prefetchBlock(); + } + + return () -> { + currentFrame.loadBlock(); + + final SeekStatus result = currentFrame.scanToTerm(target, true); + if (result == SeekStatus.FOUND) { + // if (DEBUG) { + // System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term); + // } + return true; + } else { + // if (DEBUG) { + // System.out.println(" got result " + result + "; return NOT_FOUND term=" + + // term.utf8ToString()); + // } + + return false; + } + }; + } + + @Override + public IOBooleanSupplier prepareSeekExact(BytesRef target) throws IOException { + return prepareSeekExact(target, true); + } + + @Override + public boolean seekExact(BytesRef target) throws IOException { + IOBooleanSupplier termExistsSupplier = prepareSeekExact(target, false); + return termExistsSupplier != null && termExistsSupplier.get(); + } + + @Override + public SeekStatus seekCeil(BytesRef target) throws IOException { + + if (fr.index == null) { + throw new IllegalStateException("terms index was not loaded"); + } + + term.grow(1 + target.length); + + assert clearEOF(); + + // if (DEBUG) { + // System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" + + // fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" + + // ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists + + // ") validIndexPrefix= " + validIndexPrefix); + // printSeekState(System.out); + // } + + FST.Arc arc; + int targetUpto; + + targetBeforeCurrentLength = currentFrame.ord; + outputAccumulator.reset(); + + if (currentFrame != staticFrame) { + + // We are already seek'd; find the common + // prefix of new seek term vs current term and + // re-use the corresponding seek state. For + // example, if app first seeks to foobar, then + // seeks to foobaz, we can re-use the seek state + // for the first 5 bytes. + + // if (DEBUG) { + // System.out.println(" re-use current seek state validIndexPrefix=" + validIndexPrefix); + // } + + arc = arcs[0]; + assert arc.isFinal(); + outputAccumulator.push(arc.output()); + targetUpto = 0; + + SegmentTermsEnumFrame lastFrame = stack[0]; + assert validIndexPrefix <= term.length(); + + final int targetLimit = Math.min(target.length, validIndexPrefix); + + int cmp = 0; + + // First compare up to valid seek frames: + while (targetUpto < targetLimit) { + cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF); + // if (DEBUG) { + // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + + // ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + + // " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output + // + " output=" + output); + // } + if (cmp != 0) { + break; + } + arc = arcs[1 + targetUpto]; + assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF) + : "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF); + + outputAccumulator.push(arc.output()); + if (arc.isFinal()) { + lastFrame = stack[1 + lastFrame.ord]; + } + targetUpto++; + } + + if (cmp == 0) { + // Second compare the rest of the term, but + // don't save arc/output/frame: + cmp = Arrays.compareUnsigned( + term.bytes(), + targetUpto, + term.length(), + target.bytes, + target.offset + targetUpto, + target.offset + target.length + ); + } + + if (cmp < 0) { + // Common case: target term is after current + // term, ie, app is seeking multiple terms + // in sorted order + // if (DEBUG) { + // System.out.println(" target is after current (shares prefixLen=" + targetUpto + "); + // clear frame.scanned ord=" + lastFrame.ord); + // } + currentFrame = lastFrame; + + } else if (cmp > 0) { + // Uncommon case: target term + // is before current term; this means we can + // keep the currentFrame but we must rewind it + // (so we scan from the start) + targetBeforeCurrentLength = 0; + // if (DEBUG) { + // System.out.println(" target is before current (shares prefixLen=" + targetUpto + "); + // rewind frame ord=" + lastFrame.ord); + // } + currentFrame = lastFrame; + currentFrame.rewind(); + } else { + // Target is exactly the same as current term + assert term.length() == target.length; + if (termExists) { + // if (DEBUG) { + // System.out.println(" target is same as current; return FOUND"); + // } + return SeekStatus.FOUND; + } else { + // if (DEBUG) { + // System.out.println(" target is same as current but term doesn't exist"); + // } + } + } + + } else { + + targetBeforeCurrentLength = -1; + arc = fr.index.getFirstArc(arcs[0]); + + // Empty string prefix must have an output (block) in the index! + assert arc.isFinal(); + assert arc.output() != null; + + // if (DEBUG) { + // System.out.println(" no seek state; push root frame"); + // } + + outputAccumulator.push(arc.output()); + + currentFrame = staticFrame; + + // term.length = 0; + targetUpto = 0; + outputAccumulator.push(arc.nextFinalOutput()); + currentFrame = pushFrame(arc, 0); + outputAccumulator.pop(arc.nextFinalOutput()); + } + + // if (DEBUG) { + // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + + // " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + + // targetBeforeCurrentLength); + // } + + // We are done sharing the common prefix with the incoming target and where we are currently + // seek'd; now continue walking the index: + while (targetUpto < target.length) { + + final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF; + + final FST.Arc nextArc = fr.index.findTargetArc(targetLabel, arc, getArc(1 + targetUpto), fstReader); + + if (nextArc == null) { + + // Index is exhausted + // if (DEBUG) { + // System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " + + // targetLabel); + // } + + validIndexPrefix = currentFrame.prefixLength; + // validIndexPrefix = targetUpto; + + currentFrame.scanToFloorFrame(target); + + currentFrame.loadBlock(); + + // if (DEBUG) System.out.println(" now scanToTerm"); + final SeekStatus result = currentFrame.scanToTerm(target, false); + if (result == SeekStatus.END) { + term.copyBytes(target); + termExists = false; + + if (next() != null) { + // if (DEBUG) { + // System.out.println(" return NOT_FOUND term=" + + // ToStringUtils.bytesRefToString(term)); + // } + return SeekStatus.NOT_FOUND; + } else { + // if (DEBUG) { + // System.out.println(" return END"); + // } + return SeekStatus.END; + } + } else { + // if (DEBUG) { + // System.out.println(" return " + result + " term=" + + // ToStringUtils.bytesRefToString(term)); + // } + return result; + } + } else { + // Follow this arc + term.setByteAt(targetUpto, (byte) targetLabel); + arc = nextArc; + // Aggregate output as we go: + assert arc.output() != null; + outputAccumulator.push(arc.output()); + + // if (DEBUG) { + // System.out.println(" index: follow label=" + (target.bytes[target.offset + + // targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput); + // } + targetUpto++; + + if (arc.isFinal()) { + // if (DEBUG) System.out.println(" arc is final!"); + outputAccumulator.push(arc.nextFinalOutput()); + currentFrame = pushFrame(arc, targetUpto); + outputAccumulator.pop(arc.nextFinalOutput()); + // if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + + // currentFrame.hasTerms); + } + } + } + + // validIndexPrefix = targetUpto; + validIndexPrefix = currentFrame.prefixLength; + + currentFrame.scanToFloorFrame(target); + + currentFrame.loadBlock(); + + final SeekStatus result = currentFrame.scanToTerm(target, false); + + if (result == SeekStatus.END) { + term.copyBytes(target); + termExists = false; + if (next() != null) { + // if (DEBUG) { + // System.out.println(" return NOT_FOUND term=" + term.get().utf8ToString() + " " + term); + // } + return SeekStatus.NOT_FOUND; + } else { + // if (DEBUG) { + // System.out.println(" return END"); + // } + return SeekStatus.END; + } + } else { + return result; + } + } + + @SuppressWarnings("unused") + private void printSeekState(PrintStream out) throws IOException { + if (currentFrame == staticFrame) { + out.println(" no prior seek"); + } else { + out.println(" prior seek state:"); + int ord = 0; + boolean isSeekFrame = true; + while (true) { + SegmentTermsEnumFrame f = getFrame(ord); + assert f != null; + final BytesRef prefix = new BytesRef(term.get().bytes, 0, f.prefixLength); + if (f.nextEnt == -1) { + out.println( + " frame " + + (isSeekFrame ? "(seek)" : "(next)") + + " ord=" + + ord + + " fp=" + + f.fp + + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + + " prefixLen=" + + f.prefixLength + + " prefix=" + + prefix + + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + + " hasTerms=" + + f.hasTerms + + " isFloor=" + + f.isFloor + + " code=" + + ((f.fp << Lucene90BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms + ? Lucene90BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS + : 0) + (f.isFloor ? Lucene90BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0)) + + " isLastInFloor=" + + f.isLastInFloor + + " mdUpto=" + + f.metaDataUpto + + " tbOrd=" + + f.getTermBlockOrd() + ); + } else { + out.println( + " frame " + + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)") + + " ord=" + + ord + + " fp=" + + f.fp + + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + + " prefixLen=" + + f.prefixLength + + " prefix=" + + prefix + + " nextEnt=" + + f.nextEnt + + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + + " hasTerms=" + + f.hasTerms + + " isFloor=" + + f.isFloor + + " code=" + + ((f.fp << Lucene90BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms + ? Lucene90BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS + : 0) + (f.isFloor ? Lucene90BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0)) + + " lastSubFP=" + + f.lastSubFP + + " isLastInFloor=" + + f.isLastInFloor + + " mdUpto=" + + f.metaDataUpto + + " tbOrd=" + + f.getTermBlockOrd() + ); + } + if (fr.index != null) { + assert isSeekFrame == false || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc; + if (f.prefixLength > 0 && isSeekFrame && f.arc.label() != (term.byteAt(f.prefixLength - 1) & 0xFF)) { + out.println( + " broken seek state: arc.label=" + + (char) f.arc.label() + + " vs term byte=" + + (char) (term.byteAt(f.prefixLength - 1) & 0xFF) + ); + throw new RuntimeException("seek state is broken"); + } + BytesRef output = Util.get(fr.index, prefix); + if (output == null) { + out.println(" broken seek state: prefix is not final in index"); + throw new RuntimeException("seek state is broken"); + } else if (isSeekFrame && f.isFloor == false) { + final ByteArrayDataInput reader = new ByteArrayDataInput(output.bytes, output.offset, output.length); + final long codeOrig = fr.readVLongOutput(reader); + final long code = (f.fp << Lucene90BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS) | (f.hasTerms + ? Lucene90BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS + : 0) | (f.isFloor ? Lucene90BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0); + if (codeOrig != code) { + out.println(" broken seek state: output code=" + codeOrig + " doesn't match frame code=" + code); + throw new RuntimeException("seek state is broken"); + } + } + } + if (f == currentFrame) { + break; + } + if (f.prefixLength == validIndexPrefix) { + isSeekFrame = false; + } + ord++; + } + } + } + + /* Decodes only the term bytes of the next term. If caller then asks for + metadata, ie docFreq, totalTermFreq or pulls a D/&PEnum, we then (lazily) + decode all metadata up to the current term. */ + @Override + public BytesRef next() throws IOException { + if (in == null) { + // Fresh TermsEnum; seek to first term: + final FST.Arc arc; + if (fr.index != null) { + arc = fr.index.getFirstArc(arcs[0]); + // Empty string prefix must have an output in the index! + assert arc.isFinal(); + } else { + arc = null; + } + currentFrame = pushFrame(arc, fr.rootCode, 0); + currentFrame.loadBlock(); + } + + targetBeforeCurrentLength = currentFrame.ord; + + assert eof == false; + // if (DEBUG) { + // System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + + // ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists + " field=" + + // fr.fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd + + // " validIndexPrefix=" + validIndexPrefix); + // printSeekState(System.out); + // } + + if (currentFrame == staticFrame) { + // If seek was previously called and the term was + // cached, or seek(TermState) was called, usually + // caller is just going to pull a D/&PEnum or get + // docFreq, etc. But, if they then call next(), + // this method catches up all internal state so next() + // works properly: + // if (DEBUG) System.out.println(" re-seek to pending term=" + term.utf8ToString() + " " + + // term); + final boolean result = seekExact(term.get()); + assert result; + } + + // Pop finished blocks + while (currentFrame.nextEnt == currentFrame.entCount) { + if (currentFrame.isLastInFloor == false) { + // Advance to next floor block + currentFrame.loadNextFloorBlock(); + break; + } else { + // if (DEBUG) System.out.println(" pop frame"); + if (currentFrame.ord == 0) { + // if (DEBUG) System.out.println(" return null"); + assert setEOF(); + term.clear(); + validIndexPrefix = 0; + currentFrame.rewind(); + termExists = false; + return null; + } + final long lastFP = currentFrame.fpOrig; + currentFrame = stack[currentFrame.ord - 1]; + + if (currentFrame.nextEnt == -1 || currentFrame.lastSubFP != lastFP) { + // We popped into a frame that's not loaded + // yet or not scan'd to the right entry + currentFrame.scanToFloorFrame(term.get()); + currentFrame.loadBlock(); + currentFrame.scanToSubBlock(lastFP); + } + + // Note that the seek state (last seek) has been + // invalidated beyond this depth + validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefixLength); + // if (DEBUG) { + // System.out.println(" reset validIndexPrefix=" + validIndexPrefix); + // } + } + } + + while (true) { + if (currentFrame.next()) { + // Push to new block: + // if (DEBUG) System.out.println(" push frame"); + currentFrame = pushFrame(null, currentFrame.lastSubFP, term.length()); + // This is a "next" frame -- even if it's + // floor'd we must pretend it isn't so we don't + // try to scan to the right floor frame: + currentFrame.loadBlock(); + } else { + // if (DEBUG) System.out.println(" return term=" + ToStringUtils.bytesRefToString(term) + + // " currentFrame.ord=" + currentFrame.ord); + return term.get(); + } + } + } + + @Override + public BytesRef term() { + assert eof == false; + return term.get(); + } + + @Override + public int docFreq() throws IOException { + assert eof == false; + // if (DEBUG) System.out.println("BTR.docFreq"); + currentFrame.decodeMetaData(); + // if (DEBUG) System.out.println(" return " + currentFrame.state.docFreq); + return currentFrame.state.docFreq; + } + + @Override + public long totalTermFreq() throws IOException { + assert eof == false; + currentFrame.decodeMetaData(); + return currentFrame.state.totalTermFreq; + } + + @Override + public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException { + assert eof == false; + // if (DEBUG) { + // System.out.println("BTTR.docs seg=" + segment); + // } + currentFrame.decodeMetaData(); + // if (DEBUG) { + // System.out.println(" state=" + currentFrame.state); + // } + return fr.parent.postingsReader.postings(fr.fieldInfo, currentFrame.state, reuse, flags); + } + + @Override + public ImpactsEnum impacts(int flags) throws IOException { + assert eof == false; + // if (DEBUG) { + // System.out.println("BTTR.docs seg=" + segment); + // } + currentFrame.decodeMetaData(); + // if (DEBUG) { + // System.out.println(" state=" + currentFrame.state); + // } + return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.state, flags); + } + + @Override + public void seekExact(BytesRef target, TermState otherState) { + // if (DEBUG) { + // System.out.println("BTTR.seekExact termState seg=" + segment + " target=" + + // target.utf8ToString() + " " + target + " state=" + otherState); + // } + assert clearEOF(); + if (target.compareTo(term.get()) != 0 || termExists == false) { + assert otherState != null && otherState instanceof BlockTermState; + currentFrame = staticFrame; + currentFrame.state.copyFrom(otherState); + term.copyBytes(target); + currentFrame.metaDataUpto = currentFrame.getTermBlockOrd(); + assert currentFrame.metaDataUpto > 0; + validIndexPrefix = 0; + } else { + // if (DEBUG) { + // System.out.println(" skip seek: already on target state=" + currentFrame.state); + // } + } + } + + @Override + public TermState termState() throws IOException { + assert eof == false; + currentFrame.decodeMetaData(); + TermState ts = currentFrame.state.clone(); + // if (DEBUG) System.out.println("BTTR.termState seg=" + segment + " state=" + ts); + return ts; + } + + @Override + public void seekExact(long ord) { + throw new UnsupportedOperationException(); + } + + @Override + public long ord() { + throw new UnsupportedOperationException(); + } + + static class OutputAccumulator extends DataInput { + + BytesRef[] outputs = new BytesRef[16]; + BytesRef current; + int num; + int outputIndex; + int index; + + void push(BytesRef output) { + if (output != Lucene90BlockTreeTermsReader.NO_OUTPUT) { + assert output.length > 0; + outputs = ArrayUtil.grow(outputs, num + 1); + outputs[num++] = output; + } + } + + void pop(BytesRef output) { + if (output != Lucene90BlockTreeTermsReader.NO_OUTPUT) { + assert num > 0; + assert outputs[num - 1] == output; + num--; + } + } + + void pop(int cnt) { + assert num >= cnt; + num -= cnt; + } + + int outputCount() { + return num; + } + + void reset() { + num = 0; + } + + void prepareRead() { + index = 0; + outputIndex = 0; + current = outputs[0]; + } + + /** + * Set the last arc as the source of the floorData. This won't change the reading position of + * this {@link OutputAccumulator} + */ + void setFloorData(ByteArrayDataInput floorData) { + assert outputIndex == num - 1 : "floor data should be stored in last arc, get outputIndex: " + outputIndex + ", num: " + num; + BytesRef output = outputs[outputIndex]; + floorData.reset(output.bytes, output.offset + index, output.length - index); + } + + @Override + public byte readByte() throws IOException { + if (index >= current.length) { + current = outputs[++outputIndex]; + index = 0; + } + return current.bytes[current.offset + index++]; + } + + @Override + public void readBytes(byte[] b, int offset, int len) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public void skipBytes(long numBytes) throws IOException { + throw new UnsupportedOperationException(); + } + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/postings/terms/SegmentTermsEnumFrame.java b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/SegmentTermsEnumFrame.java new file mode 100644 index 0000000000000..84742be721b90 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/SegmentTermsEnumFrame.java @@ -0,0 +1,856 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ +package org.elasticsearch.index.codec.postings.terms; + +import org.apache.lucene.codecs.BlockTermState; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.TermsEnum.SeekStatus; +import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.fst.FST; + +import java.io.IOException; +import java.util.Arrays; + +final class SegmentTermsEnumFrame { + // Our index in stack[]: + final int ord; + + boolean hasTerms; + boolean hasTermsOrig; + boolean isFloor; + + FST.Arc arc; + + // static boolean DEBUG = BlockTreeTermsWriter.DEBUG; + + // File pointer where this block was loaded from + long fp; + long fpOrig; + long fpEnd; + long totalSuffixBytes; // for stats + + byte[] suffixBytes = new byte[128]; + final ByteArrayDataInput suffixesReader = new ByteArrayDataInput(); + + byte[] suffixLengthBytes; + final ByteArrayDataInput suffixLengthsReader; + + byte[] statBytes = new byte[64]; + int statsSingletonRunLength = 0; + final ByteArrayDataInput statsReader = new ByteArrayDataInput(); + + int rewindPos; + final ByteArrayDataInput floorDataReader = new ByteArrayDataInput(); + + // Length of prefix shared by all terms in this block + int prefixLength; + + // Number of entries (term or sub-block) in this block + int entCount; + + // Which term we will next read, or -1 if the block + // isn't loaded yet + int nextEnt; + + // True if this block is either not a floor block, + // or, it's the last sub-block of a floor block + boolean isLastInFloor; + + // True if all entries are terms + boolean isLeafBlock; + + // True if all entries have the same length. + boolean allEqual; + + long lastSubFP; + + int nextFloorLabel; + int numFollowFloorBlocks; + + // Next term to decode metaData; we decode metaData + // lazily so that scanning to find the matching term is + // fast and only if you find a match and app wants the + // stats or docs/positions enums, will we decode the + // metaData + int metaDataUpto; + + final BlockTermState state; + + // metadata buffer + byte[] bytes = new byte[32]; + final ByteArrayDataInput bytesReader = new ByteArrayDataInput(); + + private final SegmentTermsEnum ste; + + SegmentTermsEnumFrame(SegmentTermsEnum ste, int ord) throws IOException { + this.ste = ste; + this.ord = ord; + this.state = ste.fr.parent.postingsReader.newTermState(); + this.state.totalTermFreq = -1; + suffixLengthBytes = new byte[32]; + suffixLengthsReader = new ByteArrayDataInput(); + } + + public void setFloorData(SegmentTermsEnum.OutputAccumulator outputAccumulator) { + outputAccumulator.setFloorData(floorDataReader); + rewindPos = floorDataReader.getPosition(); + numFollowFloorBlocks = floorDataReader.readVInt(); + nextFloorLabel = floorDataReader.readByte() & 0xff; + // if (DEBUG) { + // System.out.println(" setFloorData fpOrig=" + fpOrig + " bytes=" + new + // BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks=" + // + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel)); + // } + } + + public int getTermBlockOrd() { + return isLeafBlock ? nextEnt : state.termBlockOrd; + } + + void loadNextFloorBlock() throws IOException { + // if (DEBUG) { + // System.out.println(" loadNextFloorBlock fp=" + fp + " fpEnd=" + fpEnd); + // } + assert arc == null || isFloor : "arc=" + arc + " isFloor=" + isFloor; + fp = fpEnd; + nextEnt = -1; + loadBlock(); + } + + void prefetchBlock() throws IOException { + if (nextEnt != -1) { + // Already loaded + return; + } + + // Clone the IndexInput lazily, so that consumers + // that just pull a TermsEnum to + // seekExact(TermState) don't pay this cost: + ste.initIndexInput(); + + // TODO: Could we know the number of bytes to prefetch? + ste.in.prefetch(fp, 1); + } + + /* Does initial decode of next block of terms; this + doesn't actually decode the docFreq, totalTermFreq, + postings details (frq/prx offset, etc.) metadata; + it just loads them as byte[] blobs which are then + decoded on-demand if the metadata is ever requested + for any term in this block. This enables terms-only + intensive consumes (eg certain MTQs, respelling) to + not pay the price of decoding metadata they won't + use. */ + void loadBlock() throws IOException { + + // Clone the IndexInput lazily, so that consumers + // that just pull a TermsEnum to + // seekExact(TermState) don't pay this cost: + ste.initIndexInput(); + + if (nextEnt != -1) { + // Already loaded + return; + } + // System.out.println("blc=" + blockLoadCount); + + ste.in.seek(fp); + int code = ste.in.readVInt(); + entCount = code >>> 1; + assert entCount > 0; + isLastInFloor = (code & 1) != 0; + + assert arc == null || (isLastInFloor || isFloor) + : "fp=" + fp + " arc=" + arc + " isFloor=" + isFloor + " isLastInFloor=" + isLastInFloor; + + // TODO: if suffixes were stored in random-access + // array structure, then we could do binary search + // instead of linear scan to find target term; eg + // we could have simple array of offsets + + final long startSuffixFP = ste.in.getFilePointer(); + // term suffixes: + final long codeL = ste.in.readVLong(); + isLeafBlock = (codeL & 0x04) != 0; + final int numSuffixBytes = (int) (codeL >>> 3); + if (suffixBytes.length < numSuffixBytes) { + suffixBytes = new byte[ArrayUtil.oversize(numSuffixBytes, 1)]; + } + try { + compressionAlg = CompressionAlgorithm.byCode((int) codeL & 0x03); + } catch (IllegalArgumentException e) { + throw new CorruptIndexException(e.getMessage(), ste.in, e); + } + compressionAlg.read(ste.in, suffixBytes, numSuffixBytes); + suffixesReader.reset(suffixBytes, 0, numSuffixBytes); + + int numSuffixLengthBytes = ste.in.readVInt(); + allEqual = (numSuffixLengthBytes & 0x01) != 0; + numSuffixLengthBytes >>>= 1; + if (suffixLengthBytes.length < numSuffixLengthBytes) { + suffixLengthBytes = new byte[ArrayUtil.oversize(numSuffixLengthBytes, 1)]; + } + if (allEqual) { + Arrays.fill(suffixLengthBytes, 0, numSuffixLengthBytes, ste.in.readByte()); + } else { + ste.in.readBytes(suffixLengthBytes, 0, numSuffixLengthBytes); + } + suffixLengthsReader.reset(suffixLengthBytes, 0, numSuffixLengthBytes); + totalSuffixBytes = ste.in.getFilePointer() - startSuffixFP; + + // stats + int numBytes = ste.in.readVInt(); + if (statBytes.length < numBytes) { + statBytes = new byte[ArrayUtil.oversize(numBytes, 1)]; + } + ste.in.readBytes(statBytes, 0, numBytes); + statsReader.reset(statBytes, 0, numBytes); + statsSingletonRunLength = 0; + metaDataUpto = 0; + + state.termBlockOrd = 0; + nextEnt = 0; + lastSubFP = -1; + + // TODO: we could skip this if !hasTerms; but + // that's rare so won't help much + // metadata + numBytes = ste.in.readVInt(); + if (bytes.length < numBytes) { + bytes = new byte[ArrayUtil.oversize(numBytes, 1)]; + } + ste.in.readBytes(bytes, 0, numBytes); + bytesReader.reset(bytes, 0, numBytes); + + // Sub-blocks of a single floor block are always + // written one after another -- tail recurse: + fpEnd = ste.in.getFilePointer(); + // if (DEBUG) { + // System.out.println(" fpEnd=" + fpEnd); + // } + } + + void rewind() { + + // Force reload: + fp = fpOrig; + nextEnt = -1; + hasTerms = hasTermsOrig; + if (isFloor) { + floorDataReader.setPosition(rewindPos); + numFollowFloorBlocks = floorDataReader.readVInt(); + assert numFollowFloorBlocks > 0; + nextFloorLabel = floorDataReader.readByte() & 0xff; + } + + /* + //System.out.println("rewind"); + // Keeps the block loaded, but rewinds its state: + if (nextEnt > 0 || fp != fpOrig) { + if (DEBUG) { + System.out.println(" rewind frame ord=" + ord + " fpOrig=" + fpOrig + " fp=" + fp + + " hasTerms?=" + hasTerms + " isFloor?=" + isFloor + " nextEnt=" + nextEnt + " prefixLen=" + prefix); + } + if (fp != fpOrig) { + fp = fpOrig; + nextEnt = -1; + } else { + nextEnt = 0; + } + hasTerms = hasTermsOrig; + if (isFloor) { + floorDataReader.rewind(); + numFollowFloorBlocks = floorDataReader.readVInt(); + nextFloorLabel = floorDataReader.readByte() & 0xff; + } + assert suffixBytes != null; + suffixesReader.rewind(); + assert statBytes != null; + statsReader.rewind(); + metaDataUpto = 0; + state.termBlockOrd = 0; + // TODO: skip this if !hasTerms? Then postings + // impl wouldn't have to write useless 0 byte + postingsReader.resetTermsBlock(fieldInfo, state); + lastSubFP = -1; + } else if (DEBUG) { + System.out.println(" skip rewind fp=" + fp + " fpOrig=" + fpOrig + " nextEnt=" + nextEnt + " ord=" + ord); + } + */ + } + + // Decodes next entry; returns true if it's a sub-block + public boolean next() throws IOException { + if (isLeafBlock) { + nextLeaf(); + return false; + } else { + return nextNonLeaf(); + } + } + + public void nextLeaf() { + // if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + + // " entCount=" + entCount); + assert nextEnt != -1 && nextEnt < entCount : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp; + nextEnt++; + suffixLength = suffixLengthsReader.readVInt(); + startBytePos = suffixesReader.getPosition(); + ste.term.setLength(prefixLength + suffixLength); + ste.term.grow(ste.term.length()); + suffixesReader.readBytes(ste.term.bytes(), prefixLength, suffixLength); + ste.termExists = true; + } + + public boolean nextNonLeaf() throws IOException { + // if (DEBUG) System.out.println(" stef.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + // + entCount + " fp=" + suffixesReader.getPosition()); + while (true) { + if (nextEnt == entCount) { + assert arc == null || (isFloor && isLastInFloor == false) : "isFloor=" + isFloor + " isLastInFloor=" + isLastInFloor; + loadNextFloorBlock(); + if (isLeafBlock) { + nextLeaf(); + return false; + } else { + continue; + } + } + + assert nextEnt != -1 && nextEnt < entCount : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp; + nextEnt++; + final int code = suffixLengthsReader.readVInt(); + suffixLength = code >>> 1; + startBytePos = suffixesReader.getPosition(); + ste.term.setLength(prefixLength + suffixLength); + ste.term.grow(ste.term.length()); + suffixesReader.readBytes(ste.term.bytes(), prefixLength, suffixLength); + if ((code & 1) == 0) { + // A normal term + ste.termExists = true; + subCode = 0; + state.termBlockOrd++; + return false; + } else { + // A sub-block; make sub-FP absolute: + ste.termExists = false; + subCode = suffixLengthsReader.readVLong(); + lastSubFP = fp - subCode; + // if (DEBUG) { + // System.out.println(" lastSubFP=" + lastSubFP); + // } + return true; + } + } + } + + // TODO: make this array'd so we can do bin search? + // likely not worth it? need to measure how many + // floor blocks we "typically" get + public void scanToFloorFrame(BytesRef target) { + + if (isFloor == false || target.length <= prefixLength) { + // if (DEBUG) { + // System.out.println(" scanToFloorFrame skip: isFloor=" + isFloor + " target.length=" + + // target.length + " vs prefix=" + prefix); + // } + return; + } + + final int targetLabel = target.bytes[target.offset + prefixLength] & 0xFF; + + // if (DEBUG) { + // System.out.println(" scanToFloorFrame fpOrig=" + fpOrig + " targetLabel=" + + // toHex(targetLabel) + " vs nextFloorLabel=" + toHex(nextFloorLabel) + " numFollowFloorBlocks=" + // + numFollowFloorBlocks); + // } + + if (targetLabel < nextFloorLabel) { + // if (DEBUG) { + // System.out.println(" already on correct block"); + // } + return; + } + + assert numFollowFloorBlocks != 0; + + long newFP = fpOrig; + while (true) { + final long code = floorDataReader.readVLong(); + newFP = fpOrig + (code >>> 1); + hasTerms = (code & 1) != 0; + // if (DEBUG) { + // System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + + // " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks); + // } + + isLastInFloor = numFollowFloorBlocks == 1; + numFollowFloorBlocks--; + + if (isLastInFloor) { + nextFloorLabel = 256; + // if (DEBUG) { + // System.out.println(" stop! last block nextFloorLabel=" + + // toHex(nextFloorLabel)); + // } + break; + } else { + nextFloorLabel = floorDataReader.readByte() & 0xff; + if (targetLabel < nextFloorLabel) { + // if (DEBUG) { + // System.out.println(" stop! nextFloorLabel=" + toHex(nextFloorLabel)); + // } + break; + } + } + } + + if (newFP != fp) { + // Force re-load of the block: + // if (DEBUG) { + // System.out.println(" force switch to fp=" + newFP + " oldFP=" + fp); + // } + nextEnt = -1; + fp = newFP; + } else { + // if (DEBUG) { + // System.out.println(" stay on same fp=" + newFP); + // } + } + } + + public void decodeMetaData() throws IOException { + + // if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + segment + " mdUpto=" + + // metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd); + + // lazily catch up on metadata decode: + final int limit = getTermBlockOrd(); + boolean absolute = metaDataUpto == 0; + assert limit > 0; + + // TODO: better API would be "jump straight to term=N"??? + while (metaDataUpto < limit) { + + // TODO: we could make "tiers" of metadata, ie, + // decode docFreq/totalTF but don't decode postings + // metadata; this way caller could get + // docFreq/totalTF w/o paying decode cost for + // postings + + // TODO: if docFreq were bulk decoded we could + // just skipN here: + if (statsSingletonRunLength > 0) { + state.docFreq = 1; + state.totalTermFreq = 1; + statsSingletonRunLength--; + } else { + int token = statsReader.readVInt(); + if ((token & 1) == 1) { + state.docFreq = 1; + state.totalTermFreq = 1; + statsSingletonRunLength = token >>> 1; + } else { + state.docFreq = token >>> 1; + if (ste.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) { + state.totalTermFreq = state.docFreq; + } else { + state.totalTermFreq = state.docFreq + statsReader.readVLong(); + } + } + } + + // metadata + ste.fr.parent.postingsReader.decodeTerm(bytesReader, ste.fr.fieldInfo, state, absolute); + + metaDataUpto++; + absolute = false; + } + state.termBlockOrd = metaDataUpto; + } + + // Used only by assert + private boolean prefixMatches(BytesRef target) { + for (int bytePos = 0; bytePos < prefixLength; bytePos++) { + if (target.bytes[target.offset + bytePos] != ste.term.byteAt(bytePos)) { + return false; + } + } + + return true; + } + + // Scans to sub-block that has this target fp; only + // called by next(); NOTE: does not set + // startBytePos/suffix as a side effect + public void scanToSubBlock(long subFP) { + assert isLeafBlock == false; + // if (DEBUG) System.out.println(" scanToSubBlock fp=" + fp + " subFP=" + subFP + " entCount=" + // + entCount + " lastSubFP=" + lastSubFP); + // assert nextEnt == 0; + if (lastSubFP == subFP) { + // if (DEBUG) System.out.println(" already positioned"); + return; + } + assert subFP < fp : "fp=" + fp + " subFP=" + subFP; + final long targetSubCode = fp - subFP; + // if (DEBUG) System.out.println(" targetSubCode=" + targetSubCode); + while (true) { + assert nextEnt < entCount; + nextEnt++; + final int code = suffixLengthsReader.readVInt(); + suffixesReader.skipBytes(code >>> 1); + if ((code & 1) != 0) { + final long subCode = suffixLengthsReader.readVLong(); + if (targetSubCode == subCode) { + // if (DEBUG) System.out.println(" match!"); + lastSubFP = subFP; + return; + } + } else { + state.termBlockOrd++; + } + } + } + + // NOTE: sets startBytePos/suffix as a side effect + public SeekStatus scanToTerm(BytesRef target, boolean exactOnly) throws IOException { + if (isLeafBlock) { + if (allEqual) { + return binarySearchTermLeaf(target, exactOnly); + } else { + return scanToTermLeaf(target, exactOnly); + } + } else { + return scanToTermNonLeaf(target, exactOnly); + } + } + + private int startBytePos; + private int suffixLength; + private long subCode; + CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION; + + // Target's prefix matches this block's prefix; we + // scan the entries to check if the suffix matches. + public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException { + + // if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + + // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + + // ToStringUtils.bytesRefToString(target) + + // " term=" + ToStringUtils.bytesRefToString(term)); + + assert nextEnt != -1; + + ste.termExists = true; + subCode = 0; + + if (nextEnt == entCount) { + if (exactOnly) { + fillTerm(); + } + return SeekStatus.END; + } + + assert prefixMatches(target); + + // Loop over each entry (term or sub-block) in this block: + do { + nextEnt++; + + suffixLength = suffixLengthsReader.readVInt(); + + // if (DEBUG) { + // BytesRef suffixBytesRef = new BytesRef(); + // suffixBytesRef.bytes = suffixBytes; + // suffixBytesRef.offset = suffixesReader.getPosition(); + // suffixBytesRef.length = suffix; + // System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix=" + // + ToStringUtils.bytesRefToString(suffixBytesRef)); + // } + + startBytePos = suffixesReader.getPosition(); + suffixesReader.skipBytes(suffixLength); + + // Compare suffix and target. + final int cmp = Arrays.compareUnsigned( + suffixBytes, + startBytePos, + startBytePos + suffixLength, + target.bytes, + target.offset + prefixLength, + target.offset + target.length + ); + + if (cmp < 0) { + // Current entry is still before the target; + // keep scanning + } else if (cmp > 0) { + // Done! Current entry is after target -- + // return NOT_FOUND: + fillTerm(); + + // if (DEBUG) System.out.println(" not found"); + return SeekStatus.NOT_FOUND; + } else { + // Exact match! + + // This cannot be a sub-block because we + // would have followed the index to this + // sub-block from the start: + + fillTerm(); + // if (DEBUG) System.out.println(" found!"); + return SeekStatus.FOUND; + } + } while (nextEnt < entCount); + + // It is possible (and OK) that terms index pointed us + // at this block, but, we scanned the entire block and + // did not find the term to position to. This happens + // when the target is after the last term in the block + // (but, before the next term in the index). EG + // target could be foozzz, and terms index pointed us + // to the foo* block, but the last term in this block + // was fooz (and, eg, first term in the next block will + // bee fop). + // if (DEBUG) System.out.println(" block end"); + if (exactOnly) { + fillTerm(); + } + + // TODO: not consistent that in the + // not-exact case we don't next() into the next + // frame here + return SeekStatus.END; + } + + // Target's prefix matches this block's prefix; + // And all suffixes have the same length in this block, + // we binary search the entries to check if the suffix matches. + public SeekStatus binarySearchTermLeaf(BytesRef target, boolean exactOnly) throws IOException { + // if (DEBUG) System.out.println(" binarySearchTermLeaf: block fp=" + fp + " prefix=" + + // prefix + " + // nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + + // brToString(term)); + + assert nextEnt != -1; + + ste.termExists = true; + subCode = 0; + + if (nextEnt == entCount) { + if (exactOnly) { + fillTerm(); + } + return SeekStatus.END; + } + + assert prefixMatches(target); + + suffixLength = suffixLengthsReader.readVInt(); + // TODO early terminate when target length unequals suffix + prefix. + // But we need to keep the same status with scanToTermLeaf. + int start = nextEnt; + int end = entCount - 1; + // Binary search the entries (terms) in this leaf block: + int cmp = 0; + while (start <= end) { + int mid = (start + end) >>> 1; + nextEnt = mid + 1; + startBytePos = mid * suffixLength; + + // Compare suffix and target. + cmp = Arrays.compareUnsigned( + suffixBytes, + startBytePos, + startBytePos + suffixLength, + target.bytes, + target.offset + prefixLength, + target.offset + target.length + ); + if (cmp < 0) { + start = mid + 1; + } else if (cmp > 0) { + end = mid - 1; + } else { + // Exact match! + suffixesReader.setPosition(startBytePos + suffixLength); + fillTerm(); + // if (DEBUG) System.out.println(" found!"); + return SeekStatus.FOUND; + } + } + + // It is possible (and OK) that terms index pointed us + // at this block, but, we searched the entire block and + // did not find the term to position to. This happens + // when the target is after the last term in the block + // (but, before the next term in the index). EG + // target could be foozzz, and terms index pointed us + // to the foo* block, but the last term in this block + // was fooz (and, eg, first term in the next block will + // bee fop). + // if (DEBUG) System.out.println(" block end"); + SeekStatus seekStatus; + if (end < entCount - 1) { + seekStatus = SeekStatus.NOT_FOUND; + // If binary search ended at the less term, and greater term exists. + // We need to advance to the greater term. + if (cmp < 0) { + startBytePos += suffixLength; + nextEnt++; + } + suffixesReader.setPosition(startBytePos + suffixLength); + fillTerm(); + } else { + seekStatus = SeekStatus.END; + suffixesReader.setPosition(startBytePos + suffixLength); + if (exactOnly) { + fillTerm(); + } + } + // TODO: not consistent that in the + // not-exact case we don't next() into the next + // frame here + return seekStatus; + } + + // Target's prefix matches this block's prefix; we + // scan the entries to check if the suffix matches. + public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException { + + // if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + + // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + + // ToStringUtils.bytesRefToString(target) + + // " term=" + ToStringUtils.bytesRefToString(term)); + + assert nextEnt != -1; + + if (nextEnt == entCount) { + if (exactOnly) { + fillTerm(); + ste.termExists = subCode == 0; + } + return SeekStatus.END; + } + + assert prefixMatches(target); + + // Loop over each entry (term or sub-block) in this block: + while (nextEnt < entCount) { + + nextEnt++; + + final int code = suffixLengthsReader.readVInt(); + suffixLength = code >>> 1; + + // if (DEBUG) { + // BytesRef suffixBytesRef = new BytesRef(); + // suffixBytesRef.bytes = suffixBytes; + // suffixBytesRef.offset = suffixesReader.getPosition(); + // suffixBytesRef.length = suffix; + // System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " + + // (nextEnt-1) + " (of " + entCount + ") suffix=" + + // ToStringUtils.bytesRefToString(suffixBytesRef)); + // } + + startBytePos = suffixesReader.getPosition(); + suffixesReader.skipBytes(suffixLength); + ste.termExists = (code & 1) == 0; + if (ste.termExists) { + state.termBlockOrd++; + subCode = 0; + } else { + subCode = suffixLengthsReader.readVLong(); + lastSubFP = fp - subCode; + } + + // Compare suffix and target. + final int cmp = Arrays.compareUnsigned( + suffixBytes, + startBytePos, + startBytePos + suffixLength, + target.bytes, + target.offset + prefixLength, + target.offset + target.length + ); + + if (cmp < 0) { + // Current entry is still before the target; + // keep scanning + } else if (cmp > 0) { + // Done! Current entry is after target -- + // return NOT_FOUND: + fillTerm(); + + // if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + + // " ste.termExists=" + ste.termExists); + + if (exactOnly == false && ste.termExists == false) { + // System.out.println(" now pushFrame"); + // TODO this + // We are on a sub-block, and caller wants + // us to position to the next term after + // the target, so we must recurse into the + // sub-frame(s): + ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, prefixLength + suffixLength); + ste.currentFrame.loadBlock(); + while (ste.currentFrame.next()) { + ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, ste.term.length()); + ste.currentFrame.loadBlock(); + } + } + + // if (DEBUG) System.out.println(" not found"); + return SeekStatus.NOT_FOUND; + } else { + // Exact match! + + // This cannot be a sub-block because we + // would have followed the index to this + // sub-block from the start: + + assert ste.termExists; + fillTerm(); + // if (DEBUG) System.out.println(" found!"); + return SeekStatus.FOUND; + } + } + + // It is possible (and OK) that terms index pointed us + // at this block, but, we scanned the entire block and + // did not find the term to position to. This happens + // when the target is after the last term in the block + // (but, before the next term in the index). EG + // target could be foozzz, and terms index pointed us + // to the foo* block, but the last term in this block + // was fooz (and, eg, first term in the next block will + // bee fop). + // if (DEBUG) System.out.println(" block end"); + if (exactOnly) { + fillTerm(); + } + + // TODO: not consistent that in the + // not-exact case we don't next() into the next + // frame here + return SeekStatus.END; + } + + private void fillTerm() { + final int termLength = prefixLength + suffixLength; + ste.term.setLength(termLength); + ste.term.grow(termLength); + System.arraycopy(suffixBytes, startBytePos, ste.term.bytes(), prefixLength, suffixLength); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/postings/terms/Stats.java b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/Stats.java new file mode 100644 index 0000000000000..a675b7199f62e --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/Stats.java @@ -0,0 +1,255 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ +package org.elasticsearch.index.codec.postings.terms; + +import org.apache.lucene.codecs.PostingsReaderBase; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.util.Locale; + +import static java.nio.charset.StandardCharsets.UTF_8; + +/** + * BlockTree statistics for a single field returned by {@link FieldReader#getStats()}. + */ +public class Stats { + /** Byte size of the index. */ + public long indexNumBytes; + + /** Total number of terms in the field. */ + public long totalTermCount; + + /** Total number of bytes (sum of term lengths) across all terms in the field. */ + public long totalTermBytes; + + /** The number of normal (non-floor) blocks in the terms file. */ + public int nonFloorBlockCount; + + /** + * The number of floor blocks (meta-blocks larger than the allowed {@code maxItemsPerBlock}) in + * the terms file. + */ + public int floorBlockCount; + + /** The number of sub-blocks within the floor blocks. */ + public int floorSubBlockCount; + + /** The number of "internal" blocks (that have both terms and sub-blocks). */ + public int mixedBlockCount; + + /** The number of "leaf" blocks (blocks that have only terms). */ + public int termsOnlyBlockCount; + + /** The number of "internal" blocks that do not contain terms (have only sub-blocks). */ + public int subBlocksOnlyBlockCount; + + /** Total number of blocks. */ + public int totalBlockCount; + + /** Number of blocks at each prefix depth. */ + public int[] blockCountByPrefixLen = new int[10]; + + private int startBlockCount; + private int endBlockCount; + + /** Total number of bytes used to store term suffixes. */ + public long totalBlockSuffixBytes; + + /** + * Number of times each compression method has been used. 0 = uncompressed 1 = lowercase_ascii 2 = + * LZ4 + */ + public final long[] compressionAlgorithms = new long[3]; + + /** Total number of suffix bytes before compression. */ + public long totalUncompressedBlockSuffixBytes; + + /** + * Total number of bytes used to store term stats (not including what the {@link + * PostingsReaderBase} stores). + */ + public long totalBlockStatsBytes; + + /** + * Total bytes stored by the {@link PostingsReaderBase}, plus the other few vInts stored in the + * frame. + */ + public long totalBlockOtherBytes; + + /** Segment name. */ + public final String segment; + + /** Field name. */ + public final String field; + + Stats(String segment, String field) { + this.segment = segment; + this.field = field; + } + + void startBlock(SegmentTermsEnumFrame frame, boolean isFloor) { + totalBlockCount++; + if (isFloor) { + if (frame.fp == frame.fpOrig) { + floorBlockCount++; + } + floorSubBlockCount++; + } else { + nonFloorBlockCount++; + } + + if (blockCountByPrefixLen.length <= frame.prefixLength) { + blockCountByPrefixLen = ArrayUtil.grow(blockCountByPrefixLen, 1 + frame.prefixLength); + } + blockCountByPrefixLen[frame.prefixLength]++; + startBlockCount++; + totalBlockSuffixBytes += frame.totalSuffixBytes; + totalUncompressedBlockSuffixBytes += frame.suffixesReader.length(); + if (frame.suffixesReader != frame.suffixLengthsReader) { + totalUncompressedBlockSuffixBytes += frame.suffixLengthsReader.length(); + } + totalBlockStatsBytes += frame.statsReader.length(); + compressionAlgorithms[frame.compressionAlg.code]++; + } + + void endBlock(SegmentTermsEnumFrame frame) { + final int termCount = frame.isLeafBlock ? frame.entCount : frame.state.termBlockOrd; + final int subBlockCount = frame.entCount - termCount; + totalTermCount += termCount; + if (termCount != 0 && subBlockCount != 0) { + mixedBlockCount++; + } else if (termCount != 0) { + termsOnlyBlockCount++; + } else if (subBlockCount != 0) { + subBlocksOnlyBlockCount++; + } else { + throw new IllegalStateException(); + } + endBlockCount++; + final long otherBytes = frame.fpEnd - frame.fp - frame.totalSuffixBytes - frame.statsReader.length(); + assert otherBytes > 0 : "otherBytes=" + otherBytes + " frame.fp=" + frame.fp + " frame.fpEnd=" + frame.fpEnd; + totalBlockOtherBytes += otherBytes; + } + + void term(BytesRef term) { + totalTermBytes += term.length; + } + + void finish() { + assert startBlockCount == endBlockCount : "startBlockCount=" + startBlockCount + " endBlockCount=" + endBlockCount; + assert totalBlockCount == floorSubBlockCount + nonFloorBlockCount + : "floorSubBlockCount=" + + floorSubBlockCount + + " nonFloorBlockCount=" + + nonFloorBlockCount + + " totalBlockCount=" + + totalBlockCount; + assert totalBlockCount == mixedBlockCount + termsOnlyBlockCount + subBlocksOnlyBlockCount + : "totalBlockCount=" + + totalBlockCount + + " mixedBlockCount=" + + mixedBlockCount + + " subBlocksOnlyBlockCount=" + + subBlocksOnlyBlockCount + + " termsOnlyBlockCount=" + + termsOnlyBlockCount; + } + + @Override + public String toString() { + final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); + PrintStream out = new PrintStream(bos, false, UTF_8); + + out.println(" index FST:"); + out.println(" " + indexNumBytes + " bytes"); + out.println(" terms:"); + out.println(" " + totalTermCount + " terms"); + out.println( + " " + + totalTermBytes + + " bytes" + + (totalTermCount != 0 + ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes) / totalTermCount) + " bytes/term)" + : "") + ); + out.println(" blocks:"); + out.println(" " + totalBlockCount + " blocks"); + out.println(" " + termsOnlyBlockCount + " terms-only blocks"); + out.println(" " + subBlocksOnlyBlockCount + " sub-block-only blocks"); + out.println(" " + mixedBlockCount + " mixed blocks"); + out.println(" " + floorBlockCount + " floor blocks"); + out.println(" " + (totalBlockCount - floorSubBlockCount) + " non-floor blocks"); + out.println(" " + floorSubBlockCount + " floor sub-blocks"); + out.println( + " " + + totalUncompressedBlockSuffixBytes + + " term suffix bytes before compression" + + (totalBlockCount != 0 + ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes) / totalBlockCount) + " suffix-bytes/block)" + : "") + ); + StringBuilder compressionCounts = new StringBuilder(); + for (int code = 0; code < compressionAlgorithms.length; ++code) { + if (compressionAlgorithms[code] == 0) { + continue; + } + if (compressionCounts.length() > 0) { + compressionCounts.append(", "); + } + compressionCounts.append(CompressionAlgorithm.byCode(code)); + compressionCounts.append(": "); + compressionCounts.append(compressionAlgorithms[code]); + } + out.println( + " " + + totalBlockSuffixBytes + + " compressed term suffix bytes" + + (totalBlockCount != 0 + ? " (" + + String.format(Locale.ROOT, "%.2f", ((double) totalBlockSuffixBytes) / totalUncompressedBlockSuffixBytes) + + " compression ratio - compression count by algorithm: " + + compressionCounts + : "") + + ")" + ); + out.println( + " " + + totalBlockStatsBytes + + " term stats bytes " + + (totalBlockCount != 0 + ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes) / totalBlockCount) + " stats-bytes/block)" + : "") + ); + out.println( + " " + + totalBlockOtherBytes + + " other bytes" + + (totalBlockCount != 0 + ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes) / totalBlockCount) + " other-bytes/block)" + : "") + ); + if (totalBlockCount != 0) { + out.println(" by prefix length:"); + int total = 0; + for (int prefix = 0; prefix < blockCountByPrefixLen.length; prefix++) { + final int blockCount = blockCountByPrefixLen[prefix]; + total += blockCount; + if (blockCount != 0) { + out.println(" " + String.format(Locale.ROOT, "%2d", prefix) + ": " + blockCount); + } + } + assert totalBlockCount == total; + } + + return bos.toString(UTF_8); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/postings/terms/package-info.java b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/package-info.java new file mode 100644 index 0000000000000..18d7c7fc6d573 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/postings/terms/package-info.java @@ -0,0 +1,18 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +/** + * All this code in this package is forked from Lucene and originates from: org.apache.lucene.codecs.lucene90.blocktree package. + * The reason this is forked is to avoid loading very large {@link org.elasticsearch.index.codec.postings.terms.FieldReader#minTerm} and + * {@link org.elasticsearch.index.codec.postings.terms.FieldReader#maxTerm} into jvm heap. The size of these terms is unbounded, and + * at scale this can consume significant jvm heap. + * + * + */ +package org.elasticsearch.index.codec.postings.terms; diff --git a/server/src/test/java/org/elasticsearch/index/codec/postings/ES812PostingsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/postings/ES812PostingsFormatTests.java index b11ab47102288..9fffbe711a9e8 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/postings/ES812PostingsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/postings/ES812PostingsFormatTests.java @@ -21,8 +21,6 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.CompetitiveImpactAccumulator; -import org.apache.lucene.codecs.lucene90.blocktree.FieldReader; -import org.apache.lucene.codecs.lucene90.blocktree.Stats; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.DirectoryReader; @@ -37,6 +35,8 @@ import org.apache.lucene.tests.analysis.MockAnalyzer; import org.apache.lucene.tests.index.BasePostingsFormatTestCase; import org.apache.lucene.tests.util.TestUtil; +import org.elasticsearch.index.codec.postings.terms.FieldReader; +import org.elasticsearch.index.codec.postings.terms.Stats; import java.io.IOException; import java.util.Arrays;