diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java index 30f8409356d9c..eb5748d3647fc 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java @@ -13,6 +13,7 @@ import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.breaker.CircuitBreakingException; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Releasable; import org.elasticsearch.index.mapper.blockloader.docvalues.BlockDocValuesReader; @@ -377,6 +378,13 @@ interface Docs { * also a test implementation, but there may be no more other implementations. */ interface BlockFactory { + /** + * Adjust the circuit breaker with the given delta, if the delta is negative, the breaker will + * be adjusted without tripping. + * @throws CircuitBreakingException if the breaker was put above its limit + */ + void adjustBreaker(long delta) throws CircuitBreakingException; + /** * Build a builder to load booleans as loaded from doc values. Doc values * load booleans in sorted order. @@ -486,6 +494,12 @@ interface BlockFactory { */ Block constantBytes(BytesRef value, int count); + /** + * Build a block that contains {@code value} repeated + * {@code count} times. + */ + Block constantInt(int value, int count); + /** * Build a reader for reading {@link SortedDocValues} */ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/blockloader/docvalues/Utf8CodePointsFromOrdsBlockLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/blockloader/docvalues/Utf8CodePointsFromOrdsBlockLoader.java new file mode 100644 index 0000000000000..730ef2a58a6f3 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/blockloader/docvalues/Utf8CodePointsFromOrdsBlockLoader.java @@ -0,0 +1,466 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper.blockloader.docvalues; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.UnicodeUtil; + +import java.io.IOException; +import java.util.Arrays; + +import static org.elasticsearch.index.mapper.blockloader.docvalues.Warnings.registerSingleValueWarning; + +/** + * A count of utf-8 code points for {@code keyword} style fields that are stored as a lookup table. + */ +public class Utf8CodePointsFromOrdsBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader { + /** + * When there are fewer than this many unique values we use much more efficient "low cardinality" + * loaders. This must be fairly small because we build an untracked int[] with at most this many + * entries for a cache. + */ + static final int LOW_CARDINALITY = 1024; + + private final Warnings warnings; + + private final String fieldName; + + public Utf8CodePointsFromOrdsBlockLoader(Warnings warnings, String fieldName) { + this.fieldName = fieldName; + this.warnings = warnings; + } + + @Override + public IntBuilder builder(BlockFactory factory, int expectedCount) { + return factory.ints(expectedCount); + } + + @Override + public AllReader reader(LeafReaderContext context) throws IOException { + SortedSetDocValues docValues = context.reader().getSortedSetDocValues(fieldName); + if (docValues != null) { + if (docValues.getValueCount() > LOW_CARDINALITY) { + return new ImmediateOrdinals(warnings, docValues); + } + SortedDocValues singleton = DocValues.unwrapSingleton(docValues); + if (singleton != null) { + return new SingletonOrdinals(singleton); + } + return new Ordinals(warnings, docValues); + } + SortedDocValues singleton = context.reader().getSortedDocValues(fieldName); + if (singleton != null) { + if (singleton.getValueCount() > LOW_CARDINALITY) { + return new ImmediateOrdinals(warnings, DocValues.singleton(singleton)); + } + return new SingletonOrdinals(singleton); + } + return new ConstantNullsReader(); + } + + @Override + public String toString() { + return "Utf8CodePointsFromOrds[" + fieldName + "]"; + } + + /** + * Loads low cardinality singleton ordinals in using a cache of code point counts. + *

+ * If we haven't cached the counts for all ordinals then the process looks like: + *

+ *
    + *
  1. Build an int[] containing the ordinals
  2. + *
  3. + * Sort a copy of the int[] and load the cache for each ordinal. The sorting + * is important here because ordinals are faster to resolved in ascending order. + *
  4. + *
  5. Walk the int[] in order, reading from the cache to build the page
  6. + *
+ *

+ * If we have cached the counts for all ordinals we load the + * ordinals and look them up in the cache immediately. + *

+ */ + private static class SingletonOrdinals extends BlockDocValuesReader { + private final SortedDocValues ordinals; + private final int[] cache; + + private int cacheEntriesFilled; + + SingletonOrdinals(SortedDocValues ordinals) { + this.ordinals = ordinals; + + // TODO track this memory. we can't yet because this isn't Closeable + this.cache = new int[ordinals.getValueCount()]; + Arrays.fill(this.cache, -1); + } + + @Override + public Block read(BlockFactory factory, Docs docs, int offset, boolean nullsFiltered) throws IOException { + if (docs.count() - offset == 1) { + return blockForSingleDoc(factory, docs.get(offset)); + } + + if (cacheEntriesFilled == cache.length) { + return buildFromFilledCache(factory, docs, offset); + } + + int[] ords = readOrds(factory, docs, offset); + try { + fillCache(factory, ords); + return buildFromCache(factory, cache, ords); + } finally { + factory.adjustBreaker(-RamUsageEstimator.sizeOf(ords)); + } + } + + @Override + public void read(int docId, StoredFields storedFields, Builder builder) throws IOException { + if (ordinals.advanceExact(docId)) { + ((IntBuilder) builder).appendInt(codePointsAtOrd(ordinals.ordValue())); + } else { + builder.appendNull(); + } + } + + @Override + public int docId() { + return ordinals.docID(); + } + + @Override + public String toString() { + return "Utf8CodePointsFromOrds.SingletonOrdinals"; + } + + private Block blockForSingleDoc(BlockFactory factory, int docId) throws IOException { + if (ordinals.advanceExact(docId)) { + return factory.constantInt(codePointsAtOrd(ordinals.ordValue()), 1); + } else { + return factory.constantNulls(1); + } + } + + private int[] readOrds(BlockFactory factory, Docs docs, int offset) throws IOException { + int count = docs.count() - offset; + long size = sizeOfArray(count); + factory.adjustBreaker(size); + int[] ords = null; + try { + ords = new int[count]; + for (int i = offset; i < docs.count(); i++) { + int doc = docs.get(i); + if (ordinals.advanceExact(doc) == false) { + ords[i] = -1; + continue; + } + ords[i] = ordinals.ordValue(); + } + int[] result = ords; + ords = null; + return result; + } finally { + if (ords != null) { + factory.adjustBreaker(-size); + } + } + } + + private void fillCache(BlockFactory factory, int[] ords) throws IOException { + factory.adjustBreaker(RamUsageEstimator.sizeOf(ords)); + try { + int[] sortedOrds = ords.clone(); + Arrays.sort(sortedOrds); + int i = 0; + while (i < sortedOrds.length && sortedOrds[i] < 0) { + i++; + } + while (i < sortedOrds.length) { + // Fill the cache. Duplicates will noop quickly. + codePointsAtOrd(sortedOrds[i++]); + } + } finally { + factory.adjustBreaker(-RamUsageEstimator.sizeOf(ords)); + } + } + + private Block buildFromFilledCache(BlockFactory factory, Docs docs, int offset) throws IOException { + int count = docs.count() - offset; + try (IntBuilder builder = factory.ints(count)) { + for (int i = offset; i < docs.count(); i++) { + int doc = docs.get(i); + if (ordinals.advanceExact(doc) == false) { + builder.appendNull(); + continue; + } + builder.appendInt(cache[ordinals.ordValue()]); + } + return builder.build(); + } + } + + private int codePointsAtOrd(int ord) throws IOException { + if (cache[ord] >= 0) { + return cache[ord]; + } + BytesRef v = ordinals.lookupOrd(ord); + int count = UnicodeUtil.codePointCount(v); + cache[ord] = count; + cacheEntriesFilled++; + return count; + } + } + + /** + * Loads low cardinality non-singleton ordinals in using a cache of code point counts. + * See {@link SingletonOrdinals} for the process + */ + private static class Ordinals extends BlockDocValuesReader { + private final Warnings warnings; + private final SortedSetDocValues ordinals; + private final int[] cache; + + private int cacheEntriesFilled; + + Ordinals(Warnings warnings, SortedSetDocValues ordinals) { + this.warnings = warnings; + this.ordinals = ordinals; + + // TODO track this memory. we can't yet because this isn't Releasable + this.cache = new int[Math.toIntExact(ordinals.getValueCount())]; + Arrays.fill(this.cache, -1); + } + + @Override + public Block read(BlockFactory factory, Docs docs, int offset, boolean nullsFiltered) throws IOException { + if (docs.count() - offset == 1) { + return blockForSingleDoc(factory, docs.get(offset)); + } + + if (cacheEntriesFilled == cache.length) { + return buildFromFilledCache(factory, docs, offset); + } + + int[] ords = readOrds(factory, docs, offset); + try { + fillCache(factory, ords); + return buildFromCache(factory, cache, ords); + } finally { + factory.adjustBreaker(-RamUsageEstimator.shallowSizeOf(ords)); + } + } + + @Override + public void read(int docId, StoredFields storedFields, Builder builder) throws IOException { + if (ordinals.advanceExact(docId) == false) { + builder.appendNull(); + return; + } + if (ordinals.docValueCount() != 1) { + registerSingleValueWarning(warnings); + builder.appendNull(); + return; + } + ((IntBuilder) builder).appendInt(codePointsAtOrd(Math.toIntExact(ordinals.nextOrd()))); + } + + @Override + public int docId() { + return ordinals.docID(); + } + + @Override + public String toString() { + return "Utf8CodePointsFromOrds.Ordinals"; + } + + private Block blockForSingleDoc(BlockFactory factory, int docId) throws IOException { + if (ordinals.advanceExact(docId) == false) { + return factory.constantNulls(1); + } + if (ordinals.docValueCount() == 1) { + return factory.constantInt(codePointsAtOrd(Math.toIntExact(ordinals.nextOrd())), 1); + } + registerSingleValueWarning(warnings); + return factory.constantNulls(1); + } + + private int[] readOrds(BlockFactory factory, Docs docs, int offset) throws IOException { + int count = docs.count() - offset; + long size = sizeOfArray(count); + factory.adjustBreaker(size); + int[] ords = null; + try { + ords = new int[docs.count() - offset]; + for (int i = offset; i < docs.count(); i++) { + int doc = docs.get(i); + if (ordinals.advanceExact(doc) == false) { + ords[i] = -1; + continue; + } + if (ordinals.docValueCount() != 1) { + registerSingleValueWarning(warnings); + ords[i] = -1; + continue; + } + ords[i] = Math.toIntExact(ordinals.nextOrd()); + } + int[] result = ords; + ords = null; + return result; + } finally { + if (ords != null) { + factory.adjustBreaker(-size); + } + } + } + + private void fillCache(BlockFactory factory, int[] ords) throws IOException { + factory.adjustBreaker(RamUsageEstimator.sizeOf(ords)); + try { + int[] sortedOrds = ords.clone(); + Arrays.sort(sortedOrds); + int i = 0; + while (i < sortedOrds.length && sortedOrds[i] < 0) { + i++; + } + while (i < sortedOrds.length) { + // Fill the cache. Duplicates will noop quickly. + codePointsAtOrd(sortedOrds[i++]); + } + } finally { + factory.adjustBreaker(-RamUsageEstimator.sizeOf(ords)); + } + } + + private Block buildFromFilledCache(BlockFactory factory, Docs docs, int offset) throws IOException { + int count = docs.count() - offset; + try (IntBuilder builder = factory.ints(count)) { + for (int i = offset; i < docs.count(); i++) { + int doc = docs.get(i); + if (ordinals.advanceExact(doc) == false) { + builder.appendNull(); + continue; + } + if (ordinals.docValueCount() != 1) { + registerSingleValueWarning(warnings); + builder.appendNull(); + continue; + } + builder.appendInt(cache[Math.toIntExact(ordinals.nextOrd())]); + } + return builder.build(); + } + } + + private int codePointsAtOrd(int ord) throws IOException { + if (cache[ord] >= 0) { + return cache[ord]; + } + BytesRef v = ordinals.lookupOrd(ord); + int count = UnicodeUtil.codePointCount(v); + cache[ord] = count; + cacheEntriesFilled++; + return count; + } + } + + /** + * Loads a count of utf-8 code points for each ordinal doc by doc, without a cache. We use this when there + * are many unique doc values and the cache hit rate is unlikely to be high. + */ + private static class ImmediateOrdinals extends BlockDocValuesReader { + private final Warnings warnings; + private final SortedSetDocValues ordinals; + + ImmediateOrdinals(Warnings warnings, SortedSetDocValues ordinals) { + this.ordinals = ordinals; + this.warnings = warnings; + } + + @Override + public Block read(BlockFactory factory, Docs docs, int offset, boolean nullsFiltered) throws IOException { + if (docs.count() - offset == 1) { + return blockForSingleDoc(factory, docs.get(offset)); + } + try (IntBuilder builder = factory.ints(docs.count() - offset)) { + for (int i = offset; i < docs.count(); i++) { + read(docs.get(i), builder); + } + return builder.build(); + } + } + + @Override + public void read(int docId, StoredFields storedFields, Builder builder) throws IOException { + read(docId, (IntBuilder) builder); + } + + private void read(int docId, IntBuilder builder) throws IOException { + if (ordinals.advanceExact(docId) == false) { + builder.appendNull(); + return; + } + if (ordinals.docValueCount() != 1) { + registerSingleValueWarning(warnings); + builder.appendNull(); + return; + } + builder.appendInt(codePointsAtOrd(ordinals.nextOrd())); + } + + @Override + public int docId() { + return ordinals.docID(); + } + + @Override + public String toString() { + return "Utf8CodePointsFromOrds.Immediate"; + } + + private Block blockForSingleDoc(BlockFactory factory, int docId) throws IOException { + if (ordinals.advanceExact(docId) == false) { + return factory.constantNulls(1); + } + if (ordinals.docValueCount() == 1) { + return factory.constantInt(codePointsAtOrd(ordinals.nextOrd()), 1); + } + registerSingleValueWarning(warnings); + return factory.constantNulls(1); + } + + private int codePointsAtOrd(long ord) throws IOException { + return UnicodeUtil.codePointCount(ordinals.lookupOrd(ord)); + } + } + + private static Block buildFromCache(BlockFactory factory, int[] cache, int[] ords) { + try (IntBuilder builder = factory.ints(ords.length)) { + for (int ord : ords) { + if (ord >= 0) { + builder.appendInt(cache[ord]); + } else { + builder.appendNull(); + } + } + return builder.build(); + } + } + + private static long sizeOfArray(int count) { + return RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) Integer.BYTES * (long) count); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/blockloader/docvalues/Warnings.java b/server/src/main/java/org/elasticsearch/index/mapper/blockloader/docvalues/Warnings.java new file mode 100644 index 0000000000000..67fcb543d48c7 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/blockloader/docvalues/Warnings.java @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper.blockloader.docvalues; + +/** + * Warnings returned when loading values for ESQL. These are returned as HTTP 299 headers like so: + *
{@code
+ *  < Warning: 299 Elasticsearch-${ver} "No limit defined, adding default limit of [1000]"
+ *  < Warning: 299 Elasticsearch-${ver} "Line 1:27: evaluation of [a + 1] failed, treating result as null. Only first 20 failures recorded."
+ *  < Warning: 299 Elasticsearch-${ver} "Line 1:27: java.lang.IllegalArgumentException: single-value function encountered multi-value"
+ * }
+ */ +interface Warnings { + /** + * Register a warning. ESQL deduplicates and limits the number of warnings returned so it should + * be fine to blast as many warnings into this as you encounter. + * @param exceptionClass The class of exception. Pick the same exception you'd use if you were + * throwing it back over HTTP. + * @param message The message to the called. ESQL's warnings machinery attaches the location + * in the original query and the text so there isn't any need to describe the + * function in this message. + */ + void registerException(Class exceptionClass, String message); + + /** + * Register the canonical warning for when a single-value only function encounters + * a multivalued field. + */ + static void registerSingleValueWarning(Warnings warnings) { + warnings.registerException(IllegalArgumentException.class, "single-value function encountered multi-value"); + } +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/docvalues/ForceDocAtATime.java b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/docvalues/ForceDocAtATime.java new file mode 100644 index 0000000000000..0a8ffc7ba2941 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/docvalues/ForceDocAtATime.java @@ -0,0 +1,46 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper.blockloader.docvalues; + +import org.elasticsearch.index.mapper.BlockLoader; + +import java.io.IOException; +import java.util.function.Supplier; + +public class ForceDocAtATime implements BlockLoader.AllReader { + private final Supplier builder; + private final BlockLoader.AllReader delegate; + + public ForceDocAtATime(Supplier builder, BlockLoader.AllReader delegate) { + this.builder = builder; + this.delegate = delegate; + } + + @Override + public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset, boolean nullsFiltered) + throws IOException { + try (BlockLoader.Builder builder = this.builder.get()) { + for (int i = 0; i < docs.count(); i++) { + delegate.read(docs.get(i), null, builder); + } + return builder.build(); + } + } + + @Override + public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException { + delegate.read(docId, storedFields, builder); + } + + @Override + public boolean canReuse(int startingDocID) { + return delegate.canReuse(startingDocID); + } +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/docvalues/MockWarnings.java b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/docvalues/MockWarnings.java new file mode 100644 index 0000000000000..cfb9ffc93fc76 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/docvalues/MockWarnings.java @@ -0,0 +1,29 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper.blockloader.docvalues; + +import java.util.ArrayList; +import java.util.List; + +// TODO move me once we have more of these loaders +class MockWarnings implements Warnings { + record MockWarning(Class exceptionClass, String message) {} + + private final List warnings = new ArrayList<>(); + + @Override + public void registerException(Class exceptionClass, String message) { + warnings.add(new MockWarning(exceptionClass, message)); + } + + public List warnings() { + return warnings; + } +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/docvalues/Utf8CodePointsFromOrdsBlockLoaderTests.java b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/docvalues/Utf8CodePointsFromOrdsBlockLoaderTests.java new file mode 100644 index 0000000000000..5613abd02f4e5 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/docvalues/Utf8CodePointsFromOrdsBlockLoaderTests.java @@ -0,0 +1,166 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper.blockloader.docvalues; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.index.mapper.BlockLoader; +import org.elasticsearch.index.mapper.KeywordFieldMapper; +import org.elasticsearch.index.mapper.TestBlock; +import org.elasticsearch.test.ESTestCase; +import org.hamcrest.Matcher; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.elasticsearch.index.mapper.blockloader.docvalues.Utf8CodePointsFromOrdsBlockLoader.LOW_CARDINALITY; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasToString; +import static org.hamcrest.Matchers.nullValue; + +public class Utf8CodePointsFromOrdsBlockLoaderTests extends ESTestCase { + @ParametersFactory(argumentFormatting = "blockAtATime=%s, lowCardinality=%s, multiValues=%s, missingValues=%s") + public static List parameters() throws IOException { + List parameters = new ArrayList<>(); + for (boolean blockAtATime : new boolean[] { true, false }) { + for (boolean lowCardinality : new boolean[] { true, false }) { + for (boolean multiValues : new boolean[] { true, false }) { + for (boolean missingValues : new boolean[] { true, false }) { + parameters.add(new Object[] { blockAtATime, lowCardinality, multiValues, missingValues }); + } + } + } + } + return parameters; + } + + private final boolean blockAtATime; + private final boolean lowCardinality; + private final boolean multiValues; + private final boolean missingValues; + + public Utf8CodePointsFromOrdsBlockLoaderTests( + boolean blockAtATime, + boolean highCardinality, + boolean multiValues, + boolean missingValues + ) { + this.blockAtATime = blockAtATime; + this.lowCardinality = highCardinality; + this.multiValues = multiValues; + this.missingValues = missingValues; + } + + public void test() throws IOException { + List expectedWarnings = new ArrayList<>(); + try (Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) { + int docCount = 10_000; + int cardinality = lowCardinality ? between(1, LOW_CARDINALITY) : between(LOW_CARDINALITY + 1, LOW_CARDINALITY * 2); + for (int i = 0; i < docCount; i++) { + List doc = new ArrayList<>(2); + doc.add(field(i % cardinality)); + if (multiValues && i % cardinality == 0) { + doc.add(field((i % cardinality) + 1)); + expectedWarnings.add( + new MockWarnings.MockWarning(IllegalArgumentException.class, "single-value function encountered multi-value") + ); + } + iw.addDocument(doc); + } + if (missingValues) { + iw.addDocument(List.of()); + } + iw.forceMerge(1); + try (DirectoryReader dr = iw.getReader()) { + LeafReaderContext ctx = getOnlyLeafReader(dr).getContext(); + + var warnings = new MockWarnings(); + var stringsLoader = new BytesRefsFromOrdsBlockLoader("field"); + var codePointsLoader = new Utf8CodePointsFromOrdsBlockLoader(warnings, "field"); + + var stringsReader = stringsLoader.reader(ctx); + var codePointsReader = codePointsLoader.reader(ctx); + assertThat(codePointsReader, readerMatcher()); + BlockLoader.Docs docs = TestBlock.docs(ctx); + try ( + TestBlock strings = read(stringsLoader, stringsReader, ctx, docs); + TestBlock codePoints = read(codePointsLoader, codePointsReader, ctx, docs); + ) { + checkBlocks(strings, codePoints); + } + assertThat(warnings.warnings(), equalTo(expectedWarnings)); + warnings.warnings().clear(); + + stringsReader = stringsLoader.reader(ctx); + codePointsReader = codePointsLoader.reader(ctx); + for (int i = 0; i < ctx.reader().numDocs(); i += 10) { + int[] docsArray = new int[Math.min(10, ctx.reader().numDocs() - i)]; + for (int d = 0; d < docsArray.length; d++) { + docsArray[d] = i + d; + } + docs = TestBlock.docs(docsArray); + try ( + TestBlock strings = read(stringsLoader, stringsReader, ctx, docs); + TestBlock codePoints = read(codePointsLoader, codePointsReader, ctx, docs); + ) { + checkBlocks(strings, codePoints); + } + } + assertThat(warnings.warnings(), equalTo(expectedWarnings)); + } + } + } + + private TestBlock read(BlockLoader loader, BlockLoader.AllReader reader, LeafReaderContext ctx, BlockLoader.Docs docs) + throws IOException { + BlockLoader.AllReader toUse = blockAtATime + ? reader + : new ForceDocAtATime(() -> loader.builder(TestBlock.factory(), docs.count()), reader); + + return (TestBlock) toUse.read(TestBlock.factory(), docs, 0, false); + } + + private Matcher readerMatcher() { + if (lowCardinality == false) { + return hasToString("Utf8CodePointsFromOrds.Immediate"); + } + if (multiValues) { + return hasToString("Utf8CodePointsFromOrds.Ordinals"); + } + return hasToString("Utf8CodePointsFromOrds.SingletonOrdinals"); + } + + private static KeywordFieldMapper.KeywordField field(int codePointCount) { + return new KeywordFieldMapper.KeywordField( + "field", + new BytesRef("a".repeat(codePointCount)), + KeywordFieldMapper.Defaults.FIELD_TYPE + ); + } + + private void checkBlocks(TestBlock strings, TestBlock codePoints) { + for (int i = 0; i < strings.size(); i++) { + Object str = strings.get(i); + if (str instanceof List || str == null) { + assertThat(codePoints.get(i), nullValue()); + continue; + } + BytesRef bytes = (BytesRef) strings.get(i); + assertThat(codePoints.get(i), equalTo(bytes.length)); + } + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java index 060165decab78..7b19a809df5aa 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java @@ -13,6 +13,7 @@ import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.breaker.CircuitBreakingException; import org.elasticsearch.index.mapper.blockloader.docvalues.BlockDocValuesReader; import org.hamcrest.Matcher; @@ -33,6 +34,11 @@ public class TestBlock implements BlockLoader.Block { public static BlockLoader.BlockFactory factory() { return new BlockLoader.BlockFactory() { + @Override + public void adjustBreaker(long delta) throws CircuitBreakingException { + // Intentionally NOOP + } + @Override public BlockLoader.BooleanBuilder booleansFromDocValues(int expectedCount) { return booleans(expectedCount); @@ -368,6 +374,15 @@ public BlockLoader.Block constantBytes(BytesRef value, int count) { return builder.build(); } + @Override + public BlockLoader.Block constantInt(int value, int count) { + BlockLoader.IntBuilder builder = ints(count); + for (int i = 0; i < count; i++) { + builder.appendInt(value); + } + return builder.build(); + } + @Override public BlockLoader.SingletonOrdinalsBuilder singletonOrdinalsBuilder( SortedDocValues ordinals, @@ -481,6 +496,11 @@ public void close() { // TODO assert that we close the test blocks } + @Override + public String toString() { + return "TestBlock" + values; + } + private abstract static class Builder implements BlockLoader.Builder { private final List values = new ArrayList<>(); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/DelegatingBlockLoaderFactory.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/DelegatingBlockLoaderFactory.java index 9e5f045b965ee..78d0e3fc04060 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/DelegatingBlockLoaderFactory.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/DelegatingBlockLoaderFactory.java @@ -10,6 +10,7 @@ import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.breaker.CircuitBreakingException; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BlockFactory; import org.elasticsearch.compute.data.BytesRefBlock; @@ -27,6 +28,11 @@ protected DelegatingBlockLoaderFactory(BlockFactory factory) { this.factory = factory; } + @Override + public void adjustBreaker(long delta) throws CircuitBreakingException { + factory.adjustBreaker(delta); + } + @Override public BlockLoader.BooleanBuilder booleansFromDocValues(int expectedCount) { return factory.newBooleanBlockBuilder(expectedCount).mvOrdering(Block.MvOrdering.SORTED_ASCENDING); @@ -68,6 +74,11 @@ public BytesRefBlock constantBytes(BytesRef value, int count) { } } + @Override + public BlockLoader.Block constantInt(int value, int count) { + return factory.newConstantIntVector(value, count).asBlock(); + } + @Override public BlockLoader.DoubleBuilder doublesFromDocValues(int expectedCount) { return factory.newDoubleBlockBuilder(expectedCount).mvOrdering(Block.MvOrdering.SORTED_ASCENDING);