diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedQuery.java b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDvConfirmedQuery.java similarity index 80% rename from x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedQuery.java rename to server/src/main/java/org/elasticsearch/index/mapper/BinaryDvConfirmedQuery.java index cfd5a141a5128..fcf7c8c579783 100644 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/BinaryDvConfirmedQuery.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDvConfirmedQuery.java @@ -1,11 +1,13 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.xpack.wildcard.mapper; +package org.elasticsearch.index.mapper; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DocValues; @@ -35,7 +37,7 @@ * match a provided approximation query which is key to getting good performance). */ -abstract class BinaryDvConfirmedQuery extends Query { +public abstract class BinaryDvConfirmedQuery extends Query { protected final String field; protected final Query approxQuery; @@ -53,6 +55,10 @@ public static Query fromAutomaton(Query approximation, String field, String matc return new BinaryDvConfirmedAutomatonQuery(approximation, field, matchPattern, automaton); } + public static Query fromAutomatonSingleValue(Query approximation, String field, String matchPattern, Automaton automaton) { + return new SingleValueBinaryDvConfirmedAutomatonQuery(approximation, field, matchPattern, automaton); + } + /** * Returns a query that checks for equality of at leat one of the provided terms across * all binary doc values (but only for docs that also match a provided approximation query which @@ -63,6 +69,11 @@ public static Query fromTerms(Query approximation, String field, BytesRef... ter return new BinaryDvConfirmedTermsQuery(approximation, field, terms); } + public static Query fromTermsSingleValue(Query approximation, String field, BytesRef... terms) { + Arrays.sort(terms, BytesRef::compareTo); + return new SingleValueBinaryDvConfirmedTermsQuery(approximation, field, terms); + } + protected abstract boolean matchesBinaryDV(ByteArrayStreamInput bytes, BytesRef bytesRef, BytesRef scratch) throws IOException; protected abstract Query rewrite(Query approxRewrite) throws IOException; @@ -146,7 +157,7 @@ public int hashCode() { return Objects.hash(classHash(), field, approxQuery); } - Query getApproximationQuery() { + public Query getApproximationQuery() { return approxQuery; } @@ -159,7 +170,7 @@ public void visit(QueryVisitor visitor) { private static class BinaryDvConfirmedAutomatonQuery extends BinaryDvConfirmedQuery { - private final ByteRunAutomaton byteRunAutomaton; + protected final ByteRunAutomaton byteRunAutomaton; private final String matchPattern; private BinaryDvConfirmedAutomatonQuery(Query approximation, String field, String matchPattern, Automaton automaton) { @@ -209,9 +220,20 @@ public int hashCode() { } } + private static class SingleValueBinaryDvConfirmedAutomatonQuery extends BinaryDvConfirmedAutomatonQuery { + private SingleValueBinaryDvConfirmedAutomatonQuery(Query approximation, String field, String matchPattern, Automaton automaton) { + super(approximation, field, matchPattern, automaton); + } + + @Override + protected boolean matchesBinaryDV(ByteArrayStreamInput bytes, BytesRef bytesRef, BytesRef scratch) { + return byteRunAutomaton.run(bytesRef.bytes, bytesRef.offset, bytesRef.length); + } + } + private static class BinaryDvConfirmedTermsQuery extends BinaryDvConfirmedQuery { - private final BytesRef[] terms; + protected final BytesRef[] terms; private BinaryDvConfirmedTermsQuery(Query approximation, String field, BytesRef[] terms) { super(approximation, field); @@ -275,4 +297,24 @@ public int hashCode() { return Objects.hash(super.hashCode(), Arrays.hashCode(terms)); } } + + static class SingleValueBinaryDvConfirmedTermsQuery extends BinaryDvConfirmedTermsQuery { + SingleValueBinaryDvConfirmedTermsQuery(Query approximation, String field, BytesRef[] terms) { + super(approximation, field, terms); + } + + @Override + protected boolean matchesBinaryDV(ByteArrayStreamInput bytes, BytesRef bytesRef, BytesRef scratch) { + if (terms.length == 1) { + return terms[0].bytesEquals(bytesRef); + } else { + final int pos = Arrays.binarySearch(terms, bytesRef, BytesRef::compareTo); + if (pos >= 0) { + assert terms[pos].bytesEquals(bytesRef) : "Expected term at position " + pos + " to match bytesRef, but it did not."; + return true; + } + return false; + } + } + } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java index 457c90383b5d2..a2ec76a18dc1b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java @@ -707,6 +707,33 @@ public String toString() { } } + public static class BytesRefsFromSingletonBinaryBlockLoader extends DocValuesBlockLoader { + private final String fieldName; + + public BytesRefsFromSingletonBinaryBlockLoader(String fieldName) { + this.fieldName = fieldName; + } + + @Override + public BytesRefBuilder builder(BlockFactory factory, int expectedCount) { + return factory.bytesRefs(expectedCount); + } + + @Override + public AllReader reader(LeafReaderContext context) throws IOException { + BinaryDocValues docValues = context.reader().getBinaryDocValues(fieldName); + if (docValues != null) { + return new BytesRefsFromSingletonBinary(docValues); + } + return new ConstantNullsReader(); + } + + @Override + public String toString() { + return "BytesRefsFromSingletonBinaryBlockLoader[" + fieldName + "]"; + } + } + public static class BytesRefsFromOrdsBlockLoader extends DocValuesBlockLoader { private final String fieldName; @@ -1005,8 +1032,8 @@ public String toString() { * Read BinaryDocValues with no additional structure in the BytesRefs. * Each BytesRef from the doc values maps directly to a value in the block loader. */ - public static class BytesRefsFromBinary extends AbstractBytesRefsFromBinary { - public BytesRefsFromBinary(BinaryDocValues docValues) { + public static class BytesRefsFromSingletonBinary extends AbstractBytesRefsFromBinary { + public BytesRefsFromSingletonBinary(BinaryDocValues docValues) { super(docValues); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index cf3fad86812f5..503482957837f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -13,11 +13,12 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.InvertableType; -import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValuesSkipIndexType; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; @@ -27,8 +28,10 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; @@ -47,18 +50,16 @@ import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; -import org.elasticsearch.index.fielddata.FieldData; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData; import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData; -import org.elasticsearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData; import org.elasticsearch.index.query.AutomatonQueryWithDescription; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.similarity.SimilarityProvider; +import org.elasticsearch.script.BinaryDocValuesSingleStringFieldScript; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptCompiler; -import org.elasticsearch.script.SortedSetDocValuesStringFieldScript; import org.elasticsearch.script.StringFieldScript; import org.elasticsearch.script.field.KeywordDocValuesField; import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; @@ -113,7 +114,7 @@ public static class Defaults { ft.setTokenized(false); ft.setOmitNorms(true); ft.setIndexOptions(IndexOptions.DOCS); - ft.setDocValuesType(DocValuesType.SORTED_SET); + ft.setDocValuesType(DocValuesType.BINARY); FIELD_TYPE = freezeAndDeduplicateFieldType(ft); } @@ -122,7 +123,7 @@ public static class Defaults { ft.setTokenized(false); ft.setOmitNorms(true); ft.setIndexOptions(IndexOptions.NONE); - ft.setDocValuesType(DocValuesType.SORTED_SET); + ft.setDocValuesType(DocValuesType.BINARY); ft.setDocValuesSkipIndexType(DocValuesSkipIndexType.RANGE); FIELD_TYPE_WITH_SKIP_DOC_VALUES = freezeAndDeduplicateFieldType(ft); } @@ -465,7 +466,11 @@ public KeywordFieldMapper build(MapperBuilderContext context) { ); fieldtype.setOmitNorms(this.hasNorms.getValue() == false); fieldtype.setStored(this.stored.getValue()); - fieldtype.setDocValuesType(this.hasDocValues.getValue() ? DocValuesType.SORTED_SET : DocValuesType.NONE); + + // The keyword may have doc_values, but the Field used to create the index should not add doc_values. + // Instead, we insert a separate binary doc value field explicitly, so that we can throw if a value has already been inserted. + fieldtype.setDocValuesType(DocValuesType.NONE); + if (fieldtype.equals(Defaults.FIELD_TYPE_WITH_SKIP_DOC_VALUES) == false) { // NOTE: override index options only if we are not using a sparse doc values index (and we use an inverted index) fieldtype.setIndexOptions(TextParams.toIndexOptions(this.indexed.getValue(), this.indexOptions.getValue())); @@ -652,7 +657,7 @@ public Query termQuery(Object value, SearchExecutionContext context) { if (isIndexed()) { return super.termQuery(value, context); } else { - return SortedSetDocValuesField.newSlowExactQuery(name(), indexedValueForSearch(value)); + return BinaryDvConfirmedQuery.fromTermsSingleValue(new MatchAllDocsQuery(), name(), indexedValueForSearch(value)); } } @@ -663,7 +668,8 @@ public Query termsQuery(Collection values, SearchExecutionContext context) { return super.termsQuery(values, context); } else { Collection bytesRefs = values.stream().map(this::indexedValueForSearch).toList(); - return SortedSetDocValuesField.newSlowSetQuery(name(), bytesRefs); + BytesRef[] terms = bytesRefs.toArray(BytesRef[]::new); + return BinaryDvConfirmedQuery.fromTermsSingleValue(new MatchAllDocsQuery(), name(), terms); } } @@ -679,13 +685,10 @@ public Query rangeQuery( if (isIndexed()) { return super.rangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, context); } else { - return SortedSetDocValuesField.newSlowRangeQuery( - name(), - lowerTerm == null ? null : indexedValueForSearch(lowerTerm), - upperTerm == null ? null : indexedValueForSearch(upperTerm), - includeLower, - includeUpper - ); + BytesRef lower = lowerTerm == null ? null : BytesRefs.toBytesRef(lowerTerm); + BytesRef upper = upperTerm == null ? null : BytesRefs.toBytesRef(upperTerm); + Automaton automaton = TermRangeQuery.toAutomaton(lower, upper, includeLower, includeUpper); + return BinaryDvConfirmedQuery.fromAutomatonSingleValue(new MatchAllDocsQuery(), name(), lower + "-" + upper, automaton); } } @@ -705,7 +708,7 @@ public Query fuzzyQuery( } else { return StringScriptFieldFuzzyQuery.build( new Script(""), - ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx), + ctx -> new BinaryDocValuesSingleStringFieldScript(name(), context.lookup(), ctx), name(), indexedValueForSearch(value).utf8ToString(), fuzziness.asDistance(BytesRefs.toString(value)), @@ -728,7 +731,7 @@ public Query prefixQuery( } else { return new StringScriptFieldPrefixQuery( new Script(""), - ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx), + ctx -> new BinaryDocValuesSingleStringFieldScript(name(), context.lookup(), ctx), name(), indexedValueForSearch(value).utf8ToString(), caseInsensitive @@ -744,7 +747,7 @@ public Query termQueryCaseInsensitive(Object value, SearchExecutionContext conte } else { return new StringScriptFieldTermQuery( new Script(""), - ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx), + ctx -> new BinaryDocValuesSingleStringFieldScript(name(), context.lookup(), ctx), name(), indexedValueForSearch(value).utf8ToString(), true @@ -758,7 +761,8 @@ public TermsEnum getTerms(IndexReader reader, String prefix, boolean caseInsensi if (isIndexed()) { terms = MultiTerms.getTerms(reader, name()); } else if (hasDocValues()) { - terms = SortedSetDocValuesTerms.getTerms(reader, name()); + // TODO there is not an efficient way to get term is not using sorted set + terms = null; } if (terms == null) { // Field does not exist on this shard. @@ -800,7 +804,7 @@ NamedAnalyzer normalizer() { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { if (hasDocValues() && (blContext.fieldExtractPreference() != FieldExtractPreference.STORED || isSyntheticSourceEnabled())) { - return new BlockDocValuesReader.BytesRefsFromOrdsBlockLoader(name()); + return new BlockDocValuesReader.BytesRefsFromSingletonBinaryBlockLoader(name()); } if (isStored()) { return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(name()); @@ -914,12 +918,8 @@ protected BytesRef storedToBytesRef(Object stored) { ); } - private SortedSetOrdinalsIndexFieldData.Builder fieldDataFromDocValues() { - return new SortedSetOrdinalsIndexFieldData.Builder( - name(), - CoreValuesSourceType.KEYWORD, - (dv, n) -> new KeywordDocValuesField(FieldData.toString(dv), n) - ); + private IndexFieldData.Builder fieldDataFromDocValues() { + return new SingletonBinaryDocValuesFieldData.Builder(name()); } @Override @@ -999,7 +999,7 @@ public Query wildcardQuery( } return new StringScriptFieldWildcardQuery( new Script(""), - ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx), + ctx -> new BinaryDocValuesSingleStringFieldScript(name(), context.lookup(), ctx), name(), value, caseInsensitive @@ -1020,7 +1020,7 @@ public Query normalizedWildcardQuery(String value, MultiTermQuery.RewriteMethod } return new StringScriptFieldWildcardQuery( new Script(""), - ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx), + ctx -> new BinaryDocValuesSingleStringFieldScript(name(), context.lookup(), ctx), name(), value, false @@ -1046,7 +1046,7 @@ public Query regexpQuery( } return new StringScriptFieldRegexpQuery( new Script(""), - ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx), + ctx -> new BinaryDocValuesSingleStringFieldScript(name(), context.lookup(), ctx), name(), indexedValueForSearch(value).utf8ToString(), syntaxFlags, @@ -1256,7 +1256,18 @@ private boolean indexValue(DocumentParserContext context, XContentString value) throw new IllegalArgumentException(msg); } - Field field = buildKeywordField(binaryValue); + if (hasDocValues) { + BinaryDocValuesField dvField = (BinaryDocValuesField) context.doc().getByKey(fieldType().name()); + if (dvField != null) { + throw new IllegalArgumentException("field [" + fieldType().name() + "] already has a value for this document."); + } + dvField = new BinaryDocValuesField(fieldType().name(), binaryValue); + context.doc().addWithKey(fieldType().name(), dvField); + } + + // no longer include doc value in main field creation because needs internal structure for multiple values + Field field = new KeywordField(fieldType().name(), binaryValue, fieldType); + context.doc().add(field); if (fieldType().hasDocValues() == false && fieldType.omitNorms()) { @@ -1365,23 +1376,13 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException { } }); } else if (hasDocValues) { - if (offsetsFieldName != null) { - layers.add(new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFieldName)); - } else { - layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) { - - @Override - protected BytesRef convert(BytesRef value) { - return value; - } - - @Override - protected BytesRef preserve(BytesRef value) { - // Preserve must make a deep copy because convert gets a shallow copy from the iterator - return BytesRef.deepCopyOf(value); - } - }); - } + layers.add(new SingletonBinaryDocValuesSyntheticFieldLoaderLayer(fullPath(), (leafReader -> { + try { + return DocValues.getBinary(leafReader, fullFieldName); + } catch (IOException e) { + throw new RuntimeException(e); + } + }))); } // if ignore_above is set, then there is a chance that this field will be ignored. In such cases, we save an diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SingletonBinaryDocValuesField.java b/server/src/main/java/org/elasticsearch/index/mapper/SingletonBinaryDocValuesField.java new file mode 100644 index 0000000000000..992f57c30aa29 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/SingletonBinaryDocValuesField.java @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.util.BytesRef; + +public class SingletonBinaryDocValuesField extends CustomDocValuesField { + + private final BytesRef value; + + public SingletonBinaryDocValuesField(String name, BytesRef value) { + super(name); + this.value = value; + } + + @Override + public BytesRef binaryValue() { + return value; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SingletonBinaryDocValuesFieldData.java b/server/src/main/java/org/elasticsearch/index/mapper/SingletonBinaryDocValuesFieldData.java new file mode 100644 index 0000000000000..3b8ad7a35e5c1 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/SingletonBinaryDocValuesFieldData.java @@ -0,0 +1,123 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.SortField; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.index.fielddata.FieldData; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.IndexFieldDataCache; +import org.elasticsearch.index.fielddata.LeafFieldData; +import org.elasticsearch.index.fielddata.SortedBinaryDocValues; +import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource; +import org.elasticsearch.indices.breaker.CircuitBreakerService; +import org.elasticsearch.script.field.DocValuesScriptFieldFactory; +import org.elasticsearch.script.field.KeywordDocValuesField; +import org.elasticsearch.script.field.ToScriptFieldFactory; +import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.search.MultiValueMode; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; +import org.elasticsearch.search.sort.BucketedSort; +import org.elasticsearch.search.sort.SortOrder; + +import java.io.IOException; +import java.io.UncheckedIOException; + +public class SingletonBinaryDocValuesFieldData implements IndexFieldData { + + private final String fieldName; + + static class Builder implements IndexFieldData.Builder { + + final String fieldName; + + Builder(String fieldName) { + this.fieldName = fieldName; + } + + public SingletonBinaryDocValuesFieldData build(IndexFieldDataCache cache, CircuitBreakerService breakerService) { + return new SingletonBinaryDocValuesFieldData(fieldName); + } + } + + SingletonBinaryDocValuesFieldData(String fieldName) { + this.fieldName = fieldName; + } + + @Override + public String getFieldName() { + return fieldName; + } + + @Override + public ValuesSourceType getValuesSourceType() { + return null; + } + + @Override + public LeafFieldData load(LeafReaderContext context) { + try { + return loadDirect(context); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Override + public LeafFieldData loadDirect(LeafReaderContext context) throws IOException { + LeafReader leafReader = context.reader(); + var values = DocValues.getBinary(leafReader, fieldName); + return new LeafFieldData() { + final ToScriptFieldFactory factory = KeywordDocValuesField::new; + + @Override + public DocValuesScriptFieldFactory getScriptFieldFactory(String name) { + return factory.getScriptFieldFactory(getBytesValues(), name); + } + + @Override + public SortedBinaryDocValues getBytesValues() { + return FieldData.singleton(values); + } + + @Override + public long ramBytesUsed() { + return 1L; + } + }; + } + + @Override + public SortField sortField( + @Nullable Object missingValue, + MultiValueMode sortMode, + XFieldComparatorSource.Nested nested, + boolean reverse + ) { + XFieldComparatorSource source = new BytesRefFieldComparatorSource(this, missingValue, sortMode, nested); + return new SortField(getFieldName(), source, reverse); + } + + @Override + public BucketedSort newBucketedSort( + BigArrays bigArrays, + Object missingValue, + MultiValueMode sortMode, + XFieldComparatorSource.Nested nested, + SortOrder sortOrder, + DocValueFormat format, + int bucketSize, + BucketedSort.ExtraData extra + ) { + throw new IllegalArgumentException("only supported on numeric fields"); + } +} diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextSyntheticFieldLoaderLayer.java b/server/src/main/java/org/elasticsearch/index/mapper/SingletonBinaryDocValuesSyntheticFieldLoaderLayer.java similarity index 59% rename from x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextSyntheticFieldLoaderLayer.java rename to server/src/main/java/org/elasticsearch/index/mapper/SingletonBinaryDocValuesSyntheticFieldLoaderLayer.java index a8bfae78b0611..7938c4b577599 100644 --- a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextSyntheticFieldLoaderLayer.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/SingletonBinaryDocValuesSyntheticFieldLoaderLayer.java @@ -1,27 +1,29 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.xpack.logsdb.patterntext; +package org.elasticsearch.index.mapper; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.LeafReader; import org.apache.lucene.search.DocIdSetIterator; -import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; +import java.util.function.Function; -class PatternTextSyntheticFieldLoaderLayer implements CompositeSyntheticFieldLoader.DocValuesLayer { +public class SingletonBinaryDocValuesSyntheticFieldLoaderLayer implements CompositeSyntheticFieldLoader.DocValuesLayer { - private PatternTextSyntheticFieldLoader loader; + private SingletonBinaryDocValuesSyntheticFieldLoader loader; private final String name; - private final PatternTextFieldMapper.DocValuesSupplier docValuesSupplier; + private final Function docValuesSupplier; - PatternTextSyntheticFieldLoaderLayer(String name, PatternTextFieldMapper.DocValuesSupplier docValuesSupplier) { + public SingletonBinaryDocValuesSyntheticFieldLoaderLayer(String name, Function docValuesSupplier) { this.name = name; this.docValuesSupplier = docValuesSupplier; } @@ -33,11 +35,11 @@ public long valueCount() { @Override public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException { - var docValues = docValuesSupplier.get(leafReader); + var docValues = docValuesSupplier.apply(leafReader); if (docValues == null) { return null; } - loader = new PatternTextSyntheticFieldLoader(docValues); + loader = new SingletonBinaryDocValuesSyntheticFieldLoader(docValues); return loader; } @@ -58,11 +60,11 @@ public String fieldName() { return name; } - private static class PatternTextSyntheticFieldLoader implements DocValuesLoader { + private static class SingletonBinaryDocValuesSyntheticFieldLoader implements DocValuesLoader { private final BinaryDocValues docValues; private boolean hasValue = false; - PatternTextSyntheticFieldLoader(BinaryDocValues docValues) { + SingletonBinaryDocValuesSyntheticFieldLoader(BinaryDocValues docValues) { this.docValues = docValues; } diff --git a/server/src/main/java/org/elasticsearch/script/BinaryDocValuesSingleStringFieldScript.java b/server/src/main/java/org/elasticsearch/script/BinaryDocValuesSingleStringFieldScript.java new file mode 100644 index 0000000000000..768a37a66b2ff --- /dev/null +++ b/server/src/main/java/org/elasticsearch/script/BinaryDocValuesSingleStringFieldScript.java @@ -0,0 +1,56 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.script; + +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.index.mapper.OnScriptError; +import org.elasticsearch.search.lookup.SearchLookup; + +import java.io.IOException; +import java.util.Map; + +public class BinaryDocValuesSingleStringFieldScript extends StringFieldScript { + private final BinaryDocValues values; + + boolean hasValue = false; + + public BinaryDocValuesSingleStringFieldScript(String fieldName, SearchLookup searchLookup, LeafReaderContext ctx) { + super(fieldName, Map.of(), searchLookup, OnScriptError.FAIL, ctx); + try { + values = DocValues.getBinary(ctx.reader(), fieldName); + } catch (IOException e) { + throw new IllegalStateException("Cannot load doc values", e); + } + } + + @Override + public void setDocument(int docID) { + try { + hasValue = values.advanceExact(docID); + } catch (IOException e) { + throw new IllegalStateException("Cannot load doc values", e); + } + } + + @Override + public void execute() { + try { + if (hasValue) { + BytesRef bytesRef = values.binaryValue(); + emit(bytesRef.utf8ToString()); + } + } catch (IOException e) { + throw new IllegalStateException("Cannot load doc values", e); + } + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index f199fcaabd29b..af3b6aae0ef61 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -1324,7 +1324,7 @@ private static void writeTestDoc(MappedFieldType fieldType, String fieldName, Ra doc.add(new BinaryFieldMapper.CustomBinaryDocValuesField(fieldName, new BytesRef("a").bytes)); json = "{ \"" + fieldName + "\" : \"a\" }"; } else { - doc.add(new SortedSetDocValuesField(fieldName, new BytesRef("a"))); + doc.add(new BinaryDocValuesField(fieldName, new BytesRef("a"))); json = "{ \"" + fieldName + "\" : \"a\" }"; } } else if (vst.equals(CoreValuesSourceType.DATE)) { diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/normalize/NormalizeAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/normalize/NormalizeAggregatorTests.java index d6bd96600176a..1cf2e8f679b20 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/normalize/NormalizeAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/normalize/NormalizeAggregatorTests.java @@ -7,10 +7,10 @@ package org.elasticsearch.xpack.analytics.normalize; +import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; @@ -148,7 +148,7 @@ private void testCase(ValuesSourceAggregationBuilder aggBuilder, Consumer) dh -> { @@ -445,7 +445,7 @@ public void testWithComposite() throws IOException { "2010-03-11T01:07:45", new NumericDocValuesField("val", 1), new IntPoint("val", 1), - new SortedSetDocValuesField("term", new BytesRef("a")) + new BinaryDocValuesField("term", new BytesRef("a")) ) ); iw.addDocument( @@ -453,7 +453,7 @@ public void testWithComposite() throws IOException { "2010-03-12T01:07:45", new NumericDocValuesField("val", 2), new IntPoint("val", 2), - new SortedSetDocValuesField("term", new BytesRef("a")) + new BinaryDocValuesField("term", new BytesRef("a")) ) ); iw.addDocument( @@ -461,7 +461,7 @@ public void testWithComposite() throws IOException { "2010-04-01T03:43:34", new NumericDocValuesField("val", 3), new IntPoint("val", 3), - new SortedSetDocValuesField("term", new BytesRef("a")) + new BinaryDocValuesField("term", new BytesRef("a")) ) ); iw.addDocument( @@ -469,7 +469,7 @@ public void testWithComposite() throws IOException { "2010-04-27T03:43:34", new NumericDocValuesField("val", 4), new IntPoint("val", 4), - new SortedSetDocValuesField("term", new BytesRef("b")) + new BinaryDocValuesField("term", new BytesRef("b")) ) ); }, (Consumer) composite -> { @@ -523,7 +523,7 @@ public void testUnsupportedKeywordSandwich() throws IOException { "2010-03-11T01:07:45", new NumericDocValuesField("val", 1), new IntPoint("val", 1), - new SortedSetDocValuesField("term", new BytesRef("a")) + new BinaryDocValuesField("term", new BytesRef("a")) ) ); iw.addDocument( @@ -531,7 +531,7 @@ public void testUnsupportedKeywordSandwich() throws IOException { "2010-03-12T01:07:45", new NumericDocValuesField("val", 2), new IntPoint("val", 2), - new SortedSetDocValuesField("term", new BytesRef("a")) + new BinaryDocValuesField("term", new BytesRef("a")) ) ); iw.addDocument( @@ -539,7 +539,7 @@ public void testUnsupportedKeywordSandwich() throws IOException { "2010-04-01T03:43:34", new NumericDocValuesField("val", 3), new IntPoint("val", 3), - new SortedSetDocValuesField("term", new BytesRef("a")) + new BinaryDocValuesField("term", new BytesRef("a")) ) ); iw.addDocument( @@ -547,7 +547,7 @@ public void testUnsupportedKeywordSandwich() throws IOException { "2010-04-27T03:43:34", new NumericDocValuesField("val", 4), new IntPoint("val", 4), - new SortedSetDocValuesField("term", new BytesRef("b")) + new BinaryDocValuesField("term", new BytesRef("b")) ) ); }, @@ -597,22 +597,22 @@ public void testKeywordSandwichWithSorting() throws IOException { testCase(iw -> { iw.addDocument( - doc("2020-11-02T01:07:45", new NumericDocValuesField("val", 1), new SortedSetDocValuesField("term", new BytesRef("a"))) + doc("2020-11-02T01:07:45", new NumericDocValuesField("val", 1), new BinaryDocValuesField("term", new BytesRef("a"))) ); iw.addDocument( - doc("2020-11-03T01:07:45", new NumericDocValuesField("val", 2), new SortedSetDocValuesField("term", new BytesRef("a"))) + doc("2020-11-03T01:07:45", new NumericDocValuesField("val", 2), new BinaryDocValuesField("term", new BytesRef("a"))) ); iw.addDocument( - doc("2020-11-04T03:43:34", new NumericDocValuesField("val", 4), new SortedSetDocValuesField("term", new BytesRef("b"))) + doc("2020-11-04T03:43:34", new NumericDocValuesField("val", 4), new BinaryDocValuesField("term", new BytesRef("b"))) ); iw.addDocument( - doc("2020-11-09T03:43:34", new NumericDocValuesField("val", 30), new SortedSetDocValuesField("term", new BytesRef("a"))) + doc("2020-11-09T03:43:34", new NumericDocValuesField("val", 30), new BinaryDocValuesField("term", new BytesRef("a"))) ); iw.addDocument( - doc("2020-11-10T03:43:34", new NumericDocValuesField("val", 4), new SortedSetDocValuesField("term", new BytesRef("b"))) + doc("2020-11-10T03:43:34", new NumericDocValuesField("val", 4), new BinaryDocValuesField("term", new BytesRef("b"))) ); iw.addDocument( - doc("2020-11-11T03:43:34", new NumericDocValuesField("val", 4), new SortedSetDocValuesField("term", new BytesRef("b"))) + doc("2020-11-11T03:43:34", new NumericDocValuesField("val", 4), new BinaryDocValuesField("term", new BytesRef("b"))) ); }, (Consumer) dh -> { assertThat(dh.getBuckets(), hasSize(2)); diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextBlockLoader.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextBlockLoader.java index b6dc2e7b20aa9..56a45c84eea7c 100644 --- a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextBlockLoader.java +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextBlockLoader.java @@ -31,6 +31,6 @@ public AllReader reader(LeafReaderContext context) throws IOException { if (docValues == null) { return new ConstantNullsReader(); } - return new BlockDocValuesReader.BytesRefsFromBinary(docValues); + return new BlockDocValuesReader.BytesRefsFromSingletonBinary(docValues); } } diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldMapper.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldMapper.java index 30eeb073e382f..e7736757af45f 100644 --- a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldMapper.java +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldMapper.java @@ -29,6 +29,7 @@ import org.elasticsearch.index.mapper.MapperBuilderContext; import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.MappingParserContext; +import org.elasticsearch.index.mapper.SingletonBinaryDocValuesSyntheticFieldLoaderLayer; import org.elasticsearch.index.mapper.SourceFieldMapper; import org.elasticsearch.index.mapper.SourceLoader; import org.elasticsearch.index.mapper.StringStoredFieldFieldLoader; @@ -346,10 +347,13 @@ protected void write(XContentBuilder b, Object value) throws IOException { return new CompositeSyntheticFieldLoader( leafName(), fullPath(), - new PatternTextSyntheticFieldLoaderLayer( - fieldType().name(), - leafReader -> PatternTextCompositeValues.from(leafReader, fieldType()) - ) + new SingletonBinaryDocValuesSyntheticFieldLoaderLayer(fieldType().name(), leafReader -> { + try { + return PatternTextCompositeValues.from(leafReader, fieldType()); + } catch (IOException e) { + throw new RuntimeException(e); + } + }) ); } diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextIndexFieldData.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextIndexFieldData.java index 9bfc6f1654ce9..532043830e106 100644 --- a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextIndexFieldData.java +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextIndexFieldData.java @@ -10,8 +10,8 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.SortField; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.index.fielddata.FieldData; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.IndexFieldDataCache; import org.elasticsearch.index.fielddata.LeafFieldData; @@ -84,22 +84,7 @@ public DocValuesScriptFieldFactory getScriptFieldFactory(String name) { @Override public SortedBinaryDocValues getBytesValues() { - return new SortedBinaryDocValues() { - @Override - public boolean advanceExact(int doc) throws IOException { - return values.advanceExact(doc); - } - - @Override - public int docValueCount() { - return 1; - } - - @Override - public BytesRef nextValue() throws IOException { - return values.binaryValue(); - } - }; + return FieldData.singleton(values); } @Override diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java index 636f856e78f89..1b4287c6fb864 100644 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java @@ -60,6 +60,7 @@ import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.plain.StringBinaryIndexFieldData; +import org.elasticsearch.index.mapper.BinaryDvConfirmedQuery; import org.elasticsearch.index.mapper.BinaryFieldMapper.CustomBinaryDocValuesField; import org.elasticsearch.index.mapper.BlockDocValuesReader; import org.elasticsearch.index.mapper.BlockLoader; diff --git a/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java b/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java index 31f228ae6bd7e..74db6c0e0e359 100644 --- a/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java +++ b/x-pack/plugin/wildcard/src/test/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapperTests.java @@ -56,6 +56,7 @@ import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.IndexFieldDataCache; +import org.elasticsearch.index.mapper.BinaryDvConfirmedQuery; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.LuceneDocument;