diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index ff875b4ef1c8a..6d709279902b4 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -345,6 +345,10 @@ public SortedDocValues getSorted(FieldInfo field) throws IOException { } private SortedDocValues getSorted(SortedEntry entry, boolean valuesSorted) throws IOException { + if (entry.ordsEntry.docsWithFieldOffset == -2) { + return DocValues.emptySorted(); + } + final NumericDocValues ords = getNumeric(entry.ordsEntry, entry.termsDictEntry.termsDictSize); return new BaseSortedDocValues(entry) { @@ -380,7 +384,25 @@ public long cost() { @Override public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException { - if (valuesSorted && ords instanceof BaseDenseNumericValues denseOrds) { + if (ords instanceof BaseDenseNumericValues denseOrds) { + var block = tryReadAHead(factory, docs, offset); + if (block != null) { + return block; + } + // Falling back to tryRead(...) is safe here, given that current block index wasn't altered by looking ahead. + try (var builder = factory.singletonOrdinalsBuilder(this, docs.count() - offset, true)) { + BlockLoader.SingletonLongBuilder delegate = new SingletonLongToSingletonOrdinalDelegate(builder); + var result = denseOrds.tryRead(delegate, docs, offset); + if (result != null) { + return result; + } + } + } + return null; + } + + BlockLoader.Block tryReadAHead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException { + if (ords instanceof BaseDenseNumericValues denseOrds && (valuesSorted || entry.termsDictEntry.termsDictSize == 1)) { int firstDoc = docs.get(offset); denseOrds.advanceExact(firstDoc); long startValue = denseOrds.longValue(); @@ -438,10 +460,18 @@ public TermsEnum termsEnum() throws IOException { public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException { return null; } + + BlockLoader.Block tryReadAHead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException { + return null; + } } abstract static class BaseDenseNumericValues extends NumericDocValues implements BlockLoader.OptionalColumnAtATimeReader { abstract long lookAheadValueAt(int targetDoc) throws IOException; + + BlockLoader.Block tryRead(BlockLoader.SingletonLongBuilder builder, BlockLoader.Docs docs, int offset) throws IOException { + return null; + } } abstract static class BaseSortedSetDocValues extends SortedSetDocValues { @@ -1256,41 +1286,49 @@ public long longValue() throws IOException { @Override public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException { - assert maxOrd == -1 : "unexpected maxOrd[" + maxOrd + "]"; + try (BlockLoader.SingletonLongBuilder builder = factory.singletonLongs(docs.count() - offset)) { + return tryRead(builder, docs, offset); + } + } + + @Override + BlockLoader.Block tryRead(BlockLoader.SingletonLongBuilder builder, BlockLoader.Docs docs, int offset) throws IOException { final int docsCount = docs.count(); doc = docs.get(docsCount - 1); - try (BlockLoader.SingletonLongBuilder builder = factory.singletonLongs(docs.count() - offset)) { - for (int i = offset; i < docsCount;) { - int index = docs.get(i); - final int blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT; - final int blockInIndex = index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK; - if (blockIndex != currentBlockIndex) { - assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex; - // no need to seek if the loading block is the next block - if (currentBlockIndex + 1 != blockIndex) { - valuesData.seek(indexReader.get(blockIndex)); - } - currentBlockIndex = blockIndex; + for (int i = offset; i < docsCount;) { + int index = docs.get(i); + final int blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT; + final int blockInIndex = index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK; + if (blockIndex != currentBlockIndex) { + assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex; + // no need to seek if the loading block is the next block + if (currentBlockIndex + 1 != blockIndex) { + valuesData.seek(indexReader.get(blockIndex)); + } + currentBlockIndex = blockIndex; + if (bitsPerOrd == -1) { decoder.decode(valuesData, currentBlock); + } else { + decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd); } + } - // Try to append more than just one value: - // Instead of iterating over docs and find the max length, take an optimistic approach to avoid as - // many comparisons as there are remaining docs and instead do at most 7 comparisons: - int length = 1; - int remainingBlockLength = Math.min(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE - blockInIndex, docsCount - i); - for (int newLength = remainingBlockLength; newLength > 1; newLength = newLength >> 1) { - int lastIndex = i + newLength - 1; - if (isDense(index, docs.get(lastIndex), newLength)) { - length = newLength; - break; - } + // Try to append more than just one value: + // Instead of iterating over docs and find the max length, take an optimistic approach to avoid as + // many comparisons as there are remaining docs and instead do at most 7 comparisons: + int length = 1; + int remainingBlockLength = Math.min(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE - blockInIndex, docsCount - i); + for (int newLength = remainingBlockLength; newLength > 1; newLength = newLength >> 1) { + int lastIndex = i + newLength - 1; + if (isDense(index, docs.get(lastIndex), newLength)) { + length = newLength; + break; } - builder.appendLongs(currentBlock, blockInIndex, length); - i += length; } - return builder.build(); + builder.appendLongs(currentBlock, blockInIndex, length); + i += length; } + return builder.build(); } @Override @@ -1624,4 +1662,59 @@ private static class TermsDictEntry { int maxBlockLength; } + static final class SingletonLongToSingletonOrdinalDelegate implements BlockLoader.SingletonLongBuilder { + private final BlockLoader.SingletonOrdinalsBuilder builder; + + SingletonLongToSingletonOrdinalDelegate(BlockLoader.SingletonOrdinalsBuilder builder) { + this.builder = builder; + } + + @Override + public BlockLoader.SingletonLongBuilder appendLong(long value) { + throw new UnsupportedOperationException(); + } + + @Override + public BlockLoader.SingletonLongBuilder appendLongs(long[] values, int from, int length) { + // Unfortunately, no array copy here... + // Since we need to loop here, let's also keep track of min/max. + int minOrd = Integer.MAX_VALUE; + int maxOrd = Integer.MIN_VALUE; + int counter = 0; + int[] convertedOrds = new int[length]; + int end = from + length; + for (int j = from; j < end; j++) { + int ord = Math.toIntExact(values[j]); + convertedOrds[counter++] = ord; + minOrd = Math.min(minOrd, ord); + maxOrd = Math.max(maxOrd, ord); + } + builder.appendOrds(convertedOrds, 0, length, minOrd, maxOrd); + return this; + } + + @Override + public BlockLoader.Block build() { + return builder.build(); + } + + @Override + public BlockLoader.Builder appendNull() { + throw new UnsupportedOperationException(); + } + + @Override + public BlockLoader.Builder beginPositionEntry() { + throw new UnsupportedOperationException(); + } + + @Override + public BlockLoader.Builder endPositionEntry() { + throw new UnsupportedOperationException(); + } + + @Override + public void close() {} + } + } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java index 64d54cc47fdb1..74182e10f64d4 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java @@ -756,7 +756,7 @@ public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throw return block; } } - try (var builder = factory.singletonOrdinalsBuilder(ordinals, docs.count() - offset)) { + try (var builder = factory.singletonOrdinalsBuilder(ordinals, docs.count() - offset, false)) { for (int i = offset; i < docs.count(); i++) { int doc = docs.get(i); if (doc < ordinals.docID()) { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java index 601379c37823e..75ef902e82b27 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java @@ -447,7 +447,7 @@ interface BlockFactory { /** * Build a reader for reading {@link SortedDocValues} */ - SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count); + SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count, boolean isDense); /** * Build a reader for reading {@link SortedSetDocValues} @@ -548,6 +548,8 @@ interface SingletonOrdinalsBuilder extends Builder { * Appends an ordinal to the builder. */ SingletonOrdinalsBuilder appendOrd(int value); + + SingletonOrdinalsBuilder appendOrds(int[] values, int from, int length, int minOrd, int maxOrd); } interface SortedSetOrdinalsBuilder extends Builder { diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java index 28a7f08bb8d27..d21f9b0af7e3c 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java @@ -40,7 +40,10 @@ import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec; import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests; +import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseDenseNumericValues; +import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseSortedDocValues; import org.elasticsearch.index.mapper.BlockLoader; +import org.elasticsearch.index.mapper.BlockLoader.OptionalColumnAtATimeReader; import org.elasticsearch.index.mapper.TestBlock; import org.elasticsearch.test.ESTestCase; @@ -717,8 +720,9 @@ public DocValuesFormat getDocValuesFormatForField(String field) { } } - public void testBulkLoading() throws Exception { + public void testOptionalColumnAtATimeReader() throws Exception { final String counterField = "counter"; + final String counterFieldAsString = "counter_as_string"; final String timestampField = "@timestamp"; final String gaugeField = "gauge"; long currentTimestamp = 1704067200000L; @@ -735,6 +739,7 @@ public void testBulkLoading() throws Exception { // Index sorting doesn't work with NumericDocValuesField: d.add(SortedNumericDocValuesField.indexedField(timestampField, timestamp)); d.add(new SortedNumericDocValuesField(counterField, currentCounter)); + d.add(new SortedSetDocValuesField(counterFieldAsString, new BytesRef(Long.toString(currentCounter)))); d.add(new SortedNumericDocValuesField(gaugeField, gauge1Values[i % gauge1Values.length])); iw.addDocument(d); @@ -753,9 +758,10 @@ public void testBulkLoading() throws Exception { try (var reader = DirectoryReader.open(iw)) { int gaugeIndex = numDocs; for (var leaf : reader.leaves()) { - var timestampDV = getColumnAtTimeReader(leaf.reader(), timestampField); - var counterDV = getColumnAtTimeReader(leaf.reader(), counterField); - var gaugeDV = getColumnAtTimeReader(leaf.reader(), gaugeField); + var timestampDV = getBaseDenseNumericValues(leaf.reader(), timestampField); + var counterDV = getBaseDenseNumericValues(leaf.reader(), counterField); + var gaugeDV = getBaseDenseNumericValues(leaf.reader(), gaugeField); + var stringCounterDV = getBaseSortedDocValues(leaf.reader(), counterFieldAsString); int maxDoc = leaf.reader().maxDoc(); for (int i = 0; i < maxDoc;) { int size = Math.max(1, random().nextInt(0, maxDoc - i)); @@ -778,10 +784,18 @@ public void testBulkLoading() throws Exception { var block = (TestBlock) counterDV.tryRead(factory, docs, 0); assertNotNull(block); assertEquals(size, block.size()); + var stringBlock = (TestBlock) stringCounterDV.tryRead(factory, docs, 0); + assertNotNull(stringBlock); + assertEquals(size, stringBlock.size()); for (int j = 0; j < block.size(); j++) { - long actualCounter = (long) block.get(j); long expectedCounter = currentCounter; + long actualCounter = (long) block.get(j); assertEquals(expectedCounter, actualCounter); + + var expectedStringCounter = Long.toString(actualCounter); + var actualStringCounter = ((BytesRef) stringBlock.get(j)).utf8ToString(); + assertEquals(expectedStringCounter, actualStringCounter); + currentCounter--; } } @@ -816,9 +830,10 @@ public void testBulkLoading() throws Exception { int size = maxDoc - randomOffset; int gaugeIndex = size; - var timestampDV = getColumnAtTimeReader(leafReader, timestampField); - var counterDV = getColumnAtTimeReader(leafReader, counterField); - var gaugeDV = getColumnAtTimeReader(leafReader, gaugeField); + var timestampDV = getBaseDenseNumericValues(leafReader, timestampField); + var counterDV = getBaseDenseNumericValues(leafReader, counterField); + var gaugeDV = getBaseDenseNumericValues(leafReader, gaugeField); + var stringCounterDV = getBaseSortedDocValues(leafReader, counterFieldAsString); var docs = TestBlock.docs(IntStream.range(0, maxDoc).toArray()); @@ -839,10 +854,20 @@ public void testBulkLoading() throws Exception { var block = (TestBlock) counterDV.tryRead(factory, docs, randomOffset); assertNotNull(block); assertEquals(size, block.size()); + + var stringBlock = (TestBlock) stringCounterDV.tryRead(factory, docs, randomOffset); + assertNotNull(stringBlock); + assertEquals(size, stringBlock.size()); + for (int j = 0; j < block.size(); j++) { long actualCounter = (long) block.get(j); long expectedCounter = currentCounter; assertEquals(expectedCounter, actualCounter); + + var expectedStringCounter = Long.toString(actualCounter); + var actualStringCounter = ((BytesRef) stringBlock.get(j)).utf8ToString(); + assertEquals(expectedStringCounter, actualStringCounter); + currentCounter--; } } @@ -863,30 +888,41 @@ public void testBulkLoading() throws Exception { size = docs.count(); // Test against values loaded using normal doc value apis: long[] expectedCounters = new long[size]; - counterDV = getColumnAtTimeReader(leafReader, counterField); + counterDV = getBaseDenseNumericValues(leafReader, counterField); for (int i = 0; i < docs.count(); i++) { int docId = docs.get(i); counterDV.advanceExact(docId); expectedCounters[i] = counterDV.longValue(); } - counterDV = getColumnAtTimeReader(leafReader, counterField); + counterDV = getBaseDenseNumericValues(leafReader, counterField); + stringCounterDV = getBaseSortedDocValues(leafReader, counterFieldAsString); { // bulk loading counter field: var block = (TestBlock) counterDV.tryRead(factory, docs, 0); assertNotNull(block); assertEquals(size, block.size()); + + var stringBlock = (TestBlock) stringCounterDV.tryRead(factory, docs, 0); + assertNotNull(stringBlock); + assertEquals(size, stringBlock.size()); + for (int j = 0; j < block.size(); j++) { long actualCounter = (long) block.get(j); long expectedCounter = expectedCounters[j]; assertEquals(expectedCounter, actualCounter); + + var expectedStringCounter = Long.toString(actualCounter); + var actualStringCounter = ((BytesRef) stringBlock.get(j)).utf8ToString(); + assertEquals(expectedStringCounter, actualStringCounter); } } } } } - public void testBulkLoadingWithSparseDocs() throws Exception { + public void testOptionalColumnAtATimeReaderWithSparseDocs() throws Exception { final String counterField = "counter"; + final String counterAsStringField = "counter_as_string"; final String timestampField = "@timestamp"; String queryField = "query_field"; long currentTimestamp = 1704067200000L; @@ -904,6 +940,7 @@ public void testBulkLoadingWithSparseDocs() throws Exception { // Index sorting doesn't work with NumericDocValuesField: d.add(SortedNumericDocValuesField.indexedField(timestampField, timestamp)); d.add(new SortedNumericDocValuesField(counterField, currentCounter)); + d.add(new SortedDocValuesField(counterAsStringField, new BytesRef(Long.toString(currentCounter)))); d.add(new SortedNumericDocValuesField(queryField, q)); if (i % 120 == 0) { q++; @@ -937,10 +974,12 @@ public void testBulkLoadingWithSparseDocs() throws Exception { false ); assertEquals(numDocsPerQValue, topDocs.totalHits.value()); - var timestampDV = getColumnAtTimeReader(leafReader, timestampField); + var timestampDV = getBaseDenseNumericValues(leafReader, timestampField); long[] expectedTimestamps = new long[numDocsPerQValue]; - var counterDV = getColumnAtTimeReader(leafReader, counterField); + var counterDV = getBaseDenseNumericValues(leafReader, counterField); long[] expectedCounters = new long[numDocsPerQValue]; + var counterAsStringDV = getBaseSortedDocValues(leafReader, counterAsStringField); + String[] expectedCounterAsStrings = new String[numDocsPerQValue]; int[] docIds = new int[numDocsPerQValue]; for (int i = 0; i < topDocs.scoreDocs.length; i++) { var scoreDoc = topDocs.scoreDocs[i]; @@ -951,11 +990,13 @@ public void testBulkLoadingWithSparseDocs() throws Exception { assertTrue(counterDV.advanceExact(scoreDoc.doc)); expectedCounters[i] = counterDV.longValue(); + assertTrue(counterAsStringDV.advanceExact(scoreDoc.doc)); + expectedCounterAsStrings[i] = counterAsStringDV.lookupOrd(counterAsStringDV.ordValue()).utf8ToString(); } var docs = TestBlock.docs(docIds); { - timestampDV = getColumnAtTimeReader(leafReader, timestampField); + timestampDV = getBaseDenseNumericValues(leafReader, timestampField); var block = (TestBlock) timestampDV.tryRead(factory, docs, 0); assertNotNull(block); assertEquals(numDocsPerQValue, block.size()); @@ -966,7 +1007,7 @@ public void testBulkLoadingWithSparseDocs() throws Exception { } } { - counterDV = getColumnAtTimeReader(leafReader, counterField); + counterDV = getBaseDenseNumericValues(leafReader, counterField); var block = (TestBlock) counterDV.tryRead(factory, docs, 0); assertNotNull(block); assertEquals(numDocsPerQValue, block.size()); @@ -976,6 +1017,17 @@ public void testBulkLoadingWithSparseDocs() throws Exception { assertEquals(expectedCounter, actualCounter); } } + { + counterAsStringDV = getBaseSortedDocValues(leafReader, counterAsStringField); + var block = (TestBlock) counterAsStringDV.tryRead(factory, docs, 0); + assertNotNull(block); + assertEquals(numDocsPerQValue, block.size()); + for (int j = 0; j < block.size(); j++) { + var actualCounter = ((BytesRef) block.get(j)).utf8ToString(); + var expectedCounter = expectedCounterAsStrings[j]; + assertEquals(expectedCounter, actualCounter); + } + } } } } @@ -1029,31 +1081,64 @@ public int get(int i) { return i; } }; - var idReader = ESTestCase.asInstanceOf( - BlockLoader.OptionalColumnAtATimeReader.class, - leaf.reader().getNumericDocValues("id") - ); + var idReader = ESTestCase.asInstanceOf(OptionalColumnAtATimeReader.class, leaf.reader().getNumericDocValues("id")); TestBlock idBlock = (TestBlock) idReader.tryRead(factory, docs, 0); assertNotNull(idBlock); - var reader2 = ESTestCase.asInstanceOf( - BlockLoader.OptionalColumnAtATimeReader.class, - leaf.reader().getSortedDocValues(secondField) - ); - assertNull(reader2.tryRead(factory, docs, 0)); - var reader3 = ESTestCase.asInstanceOf( - BlockLoader.OptionalColumnAtATimeReader.class, - leaf.reader().getSortedDocValues(unsortedField) - ); - assertNull(reader3.tryRead(factory, docs, 0)); + + { + var reader2 = (BaseSortedDocValues) ESTestCase.asInstanceOf( + OptionalColumnAtATimeReader.class, + leaf.reader().getSortedDocValues(secondField) + ); + int randomOffset = ESTestCase.between(0, docs.count() - 1); + TestBlock block; + if (reader2.getValueCount() == 1) { + block = (TestBlock) reader2.tryReadAHead(factory, docs, randomOffset); + } else { + assertNull(reader2.tryReadAHead(factory, docs, randomOffset)); + block = (TestBlock) reader2.tryRead(factory, docs, randomOffset); + } + assertNotNull(block); + assertThat(block.size(), equalTo(docs.count() - randomOffset)); + for (int i = 0; i < block.size(); i++) { + String actualHostName = BytesRefs.toString(block.get(i)); + int id = ((Number) idBlock.get(i + randomOffset)).intValue(); + String expectedHostName = hostnames.get(id); + assertEquals(expectedHostName, actualHostName); + } + } + { + var reader3 = (BaseSortedDocValues) ESTestCase.asInstanceOf( + OptionalColumnAtATimeReader.class, + leaf.reader().getSortedDocValues(unsortedField) + ); + int randomOffset = ESTestCase.between(0, docs.count() - 1); + TestBlock block; + if (reader3.getValueCount() == 1) { + block = (TestBlock) reader3.tryReadAHead(factory, docs, randomOffset); + } else { + assertNull(reader3.tryReadAHead(factory, docs, randomOffset)); + block = (TestBlock) reader3.tryRead(factory, docs, randomOffset); + } + assertNotNull(reader3); + assertNotNull(block); + assertThat(block.size(), equalTo(docs.count() - randomOffset)); + for (int i = 0; i < block.size(); i++) { + String actualHostName = BytesRefs.toString(block.get(i)); + int id = ((Number) idBlock.get(i + randomOffset)).intValue(); + String expectedHostName = hostnames.get(id); + assertEquals(expectedHostName, actualHostName); + } + } for (int offset = 0; offset < idBlock.size(); offset += ESTestCase.between(1, numDocs)) { int start = offset; - var reader1 = ESTestCase.asInstanceOf( - BlockLoader.OptionalColumnAtATimeReader.class, + var reader1 = (BaseSortedDocValues) ESTestCase.asInstanceOf( + OptionalColumnAtATimeReader.class, leaf.reader().getSortedDocValues(primaryField) ); while (start < idBlock.size()) { int end = start + random().nextInt(idBlock.size() - start); - TestBlock hostBlock = (TestBlock) reader1.tryRead(factory, new BlockLoader.Docs() { + TestBlock hostBlock = (TestBlock) reader1.tryReadAHead(factory, new BlockLoader.Docs() { @Override public int count() { return end + 1; @@ -1092,11 +1177,16 @@ public int get(int docId) { } } - private static ES819TSDBDocValuesProducer.BaseDenseNumericValues getColumnAtTimeReader(LeafReader leafReader, String counterField) - throws IOException { - return (ES819TSDBDocValuesProducer.BaseDenseNumericValues) DocValues.unwrapSingleton( - leafReader.getSortedNumericDocValues(counterField) - ); + private static BaseDenseNumericValues getBaseDenseNumericValues(LeafReader leafReader, String field) throws IOException { + return (BaseDenseNumericValues) DocValues.unwrapSingleton(leafReader.getSortedNumericDocValues(field)); + } + + private static BaseSortedDocValues getBaseSortedDocValues(LeafReader leafReader, String field) throws IOException { + var sortedDocValues = leafReader.getSortedDocValues(field); + if (sortedDocValues == null) { + sortedDocValues = DocValues.unwrapSingleton(leafReader.getSortedSetDocValues(field)); + } + return (BaseSortedDocValues) sortedDocValues; } private IndexWriterConfig getTimeSeriesIndexWriterConfig(String hostnameField, String timestampField) { diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java index f28aa7af3d228..09d7a4147563b 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java @@ -268,7 +268,11 @@ public BlockLoader.Block constantBytes(BytesRef value, int count) { } @Override - public BlockLoader.SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int expectedCount) { + public BlockLoader.SingletonOrdinalsBuilder singletonOrdinalsBuilder( + SortedDocValues ordinals, + int expectedCount, + boolean isDense + ) { class SingletonOrdsBuilder extends TestBlock.Builder implements BlockLoader.SingletonOrdinalsBuilder { private SingletonOrdsBuilder() { super(expectedCount); @@ -277,12 +281,20 @@ private SingletonOrdsBuilder() { @Override public SingletonOrdsBuilder appendOrd(int value) { try { - add(ordinals.lookupOrd(value)); + add(BytesRef.deepCopyOf(ordinals.lookupOrd(value))); return this; } catch (IOException e) { throw new UncheckedIOException(e); } } + + @Override + public BlockLoader.SingletonOrdinalsBuilder appendOrds(int[] values, int from, int length, int minOrd, int maxOrd) { + for (int i = from; i < from + length; i++) { + appendOrd(values[i]); + } + return this; + } } return new SingletonOrdsBuilder(); } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/DelegatingBlockLoaderFactory.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/DelegatingBlockLoaderFactory.java index 814a5c1a5c8a3..b23114292680d 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/DelegatingBlockLoaderFactory.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/DelegatingBlockLoaderFactory.java @@ -114,8 +114,8 @@ public BlockLoader.Builder nulls(int expectedCount) { } @Override - public BlockLoader.SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count) { - return new SingletonOrdinalsBuilder(factory, ordinals, count); + public BlockLoader.SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count, boolean isDense) { + return new SingletonOrdinalsBuilder(factory, ordinals, count, isDense); } @Override diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/SingletonOrdinalsBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/SingletonOrdinalsBuilder.java index 3effb3b476c9e..af308000d5ebd 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/SingletonOrdinalsBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/SingletonOrdinalsBuilder.java @@ -33,16 +33,19 @@ public class SingletonOrdinalsBuilder implements BlockLoader.SingletonOrdinalsBu private int maxOrd = Integer.MIN_VALUE; private final int[] ords; private int count; + private final boolean isDense; - public SingletonOrdinalsBuilder(BlockFactory blockFactory, SortedDocValues docValues, int count) { + public SingletonOrdinalsBuilder(BlockFactory blockFactory, SortedDocValues docValues, int count, boolean isDense) { this.blockFactory = blockFactory; this.docValues = docValues; blockFactory.adjustBreaker(ordsSize(count)); this.ords = new int[count]; + this.isDense = isDense; } @Override public SingletonOrdinalsBuilder appendNull() { + assert isDense == false; ords[count++] = -1; // real ords can't be < 0, so we use -1 as null return this; } @@ -55,6 +58,15 @@ public SingletonOrdinalsBuilder appendOrd(int ord) { return this; } + @Override + public BlockLoader.SingletonOrdinalsBuilder appendOrds(int[] values, int from, int length, int minOrd, int maxOrd) { + System.arraycopy(values, from, ords, count, length); + this.count += length; + this.minOrd = Math.min(this.minOrd, minOrd); + this.maxOrd = Math.max(this.maxOrd, maxOrd); + return this; + } + @Override public SingletonOrdinalsBuilder beginPositionEntry() { throw new UnsupportedOperationException("should only have one value per doc"); @@ -69,9 +81,11 @@ private BytesRefBlock tryBuildConstantBlock() { if (minOrd != maxOrd) { return null; } - for (int ord : ords) { - if (ord == -1) { - return null; + if (isDense == false) { + for (int ord : ords) { + if (ord == -1) { + return null; + } } } final BytesRef v; @@ -107,33 +121,61 @@ BytesRefBlock buildOrdinal() { try { int[] newOrds = new int[valueCount]; Arrays.fill(newOrds, -1); - for (int ord : ords) { - if (ord != -1) { + // Re-mapping ordinals to be more space-efficient: + if (isDense) { + for (int ord : ords) { newOrds[ord - minOrd] = 0; } + } else { + for (int ord : ords) { + if (ord != -1) { + newOrds[ord - minOrd] = 0; + } + } } // resolve the ordinals and remaps the ordinals - int nextOrd = -1; - try (BytesRefVector.Builder bytesBuilder = blockFactory.newBytesRefVectorBuilder(Math.min(valueCount, ords.length))) { - for (int i = 0; i < newOrds.length; i++) { - if (newOrds[i] != -1) { - newOrds[i] = ++nextOrd; - bytesBuilder.appendBytesRef(docValues.lookupOrd(i + minOrd)); + try { + int nextOrd = -1; + BytesRef firstTerm = minOrd != Integer.MAX_VALUE ? docValues.lookupOrd(minOrd) : null; + int estimatedSize; + if (firstTerm != null) { + estimatedSize = Math.min(valueCount, ords.length) * firstTerm.length; + } else { + estimatedSize = Math.min(valueCount, ords.length); + } + try (BytesRefVector.Builder bytesBuilder = blockFactory.newBytesRefVectorBuilder(estimatedSize)) { + if (firstTerm != null) { + newOrds[0] = ++nextOrd; + bytesBuilder.appendBytesRef(firstTerm); } + for (int i = firstTerm != null ? 1 : 0; i < newOrds.length; i++) { + if (newOrds[i] != -1) { + newOrds[i] = ++nextOrd; + bytesBuilder.appendBytesRef(docValues.lookupOrd(i + minOrd)); + } + } + bytesVector = bytesBuilder.build(); } - bytesVector = bytesBuilder.build(); } catch (IOException e) { throw new UncheckedIOException("error resolving ordinals", e); } - try (IntBlock.Builder ordinalsBuilder = blockFactory.newIntBlockBuilder(ords.length)) { - for (int ord : ords) { - if (ord == -1) { - ordinalsBuilder.appendNull(); - } else { - ordinalsBuilder.appendInt(newOrds[ord - minOrd]); + if (isDense) { + // Reusing ords array and overwrite all slots with re-mapped ordinals + for (int i = 0; i < ords.length; i++) { + ords[i] = newOrds[ords[i] - minOrd]; + } + ordinalBlock = blockFactory.newIntArrayVector(ords, ords.length).asBlock(); + } else { + try (IntBlock.Builder ordinalsBuilder = blockFactory.newIntBlockBuilder(ords.length)) { + for (int ord : ords) { + if (ord == -1) { + ordinalsBuilder.appendNull(); + } else { + ordinalsBuilder.appendInt(newOrds[ord - minOrd]); + } } + ordinalBlock = ordinalsBuilder.build(); } - ordinalBlock = ordinalsBuilder.build(); } final OrdinalBytesRefBlock result = new OrdinalBytesRefBlock(ordinalBlock, bytesVector); bytesVector = null; diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/TimeSeriesExtractFieldOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/TimeSeriesExtractFieldOperator.java index 26a4966bb8e34..f6712bf651da1 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/TimeSeriesExtractFieldOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/TimeSeriesExtractFieldOperator.java @@ -202,7 +202,7 @@ public BlockLoader.Block constantNulls(int count) { } @Override - public BlockLoader.SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count) { + public BlockLoader.SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count, boolean isDense) { throw new UnsupportedOperationException("must not be used by column readers"); } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/SingletonOrdinalsBuilderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/SingletonOrdinalsBuilderTests.java index c89811242845e..a1155b4291ec7 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/SingletonOrdinalsBuilderTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/SingletonOrdinalsBuilderTests.java @@ -150,7 +150,9 @@ public void testAllNull() throws IOException { try (IndexReader reader = indexWriter.getReader()) { for (LeafReaderContext ctx : reader.leaves()) { SortedDocValues docValues = ctx.reader().getSortedDocValues("f"); - try (SingletonOrdinalsBuilder builder = new SingletonOrdinalsBuilder(factory, docValues, ctx.reader().numDocs())) { + try ( + SingletonOrdinalsBuilder builder = new SingletonOrdinalsBuilder(factory, docValues, ctx.reader().numDocs(), false); + ) { for (int i = 0; i < ctx.reader().maxDoc(); i++) { if (ctx.reader().getLiveDocs() == null || ctx.reader().getLiveDocs().get(i)) { assertThat(docValues.advanceExact(i), equalTo(true)); @@ -185,8 +187,8 @@ public void testEmitOrdinalForHighCardinality() throws IOException { int batchSize = between(40, 100); int ord = random().nextInt(numOrds); try ( - var b1 = new SingletonOrdinalsBuilder(factory, ctx.reader().getSortedDocValues("f"), batchSize); - var b2 = new SingletonOrdinalsBuilder(factory, ctx.reader().getSortedDocValues("f"), batchSize) + var b1 = new SingletonOrdinalsBuilder(factory, ctx.reader().getSortedDocValues("f"), batchSize, randomBoolean()); + var b2 = new SingletonOrdinalsBuilder(factory, ctx.reader().getSortedDocValues("f"), batchSize, randomBoolean()) ) { for (int i = 0; i < batchSize; i++) { b1.appendOrd(ord);