Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/138548.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 138548
summary: Store high-cardinality keyword fields in binary doc values
area: Mapping
type: feature
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.elasticsearch.index.mapper.TestDocumentParserContext;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.script.field.BinaryDocValuesField;
import org.elasticsearch.search.SearchModule;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.test.ESTestCase;
Expand Down Expand Up @@ -88,7 +89,7 @@ public void testStoringQueryBuilders() throws IOException {
when(searchExecutionContext.getWriteableRegistry()).thenReturn(writableRegistry());
when(searchExecutionContext.getParserConfig()).thenReturn(parserConfig());
when(searchExecutionContext.getForField(fieldMapper.fieldType(), fielddataOperation)).thenReturn(
new BytesBinaryIndexFieldData(fieldMapper.fullPath(), CoreValuesSourceType.KEYWORD)
new BytesBinaryIndexFieldData(fieldMapper.fullPath(), CoreValuesSourceType.KEYWORD, BinaryDocValuesField::new)
);
when(searchExecutionContext.getFieldType(Mockito.anyString())).thenAnswer(invocation -> {
final String fieldName = (String) invocation.getArguments()[0];
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
---
setup:
- requires:
cluster_features: ["mapper.keyword.store_high_cardinality_in_binary_doc_values"]
reason: "testing binary doc values search"

- do:
indices.create:
index: test
body:
mappings:
dynamic: false
properties:
keyword:
type: keyword
index: false
doc_values:
cardinality: high

- do:
index:
index: test
id: "1"
body:
keyword: "key1"

- do:
index:
index: test
id: "2"
body:
keyword: "key2"
- do:
indices.refresh: {}

---
"Test match query on keyword field where only binary doc values are enabled":

- do:
search:
index: test
body: { query: { match: { keyword: { query: "key1" } } } }
- length: { hits.hits: 1 }

---
"Test terms query on keyword field where only binary doc values are enabled":

- do:
search:
index: test
body: { query: { terms: { keyword: [ "key1", "key2" ] } } }
- length: { hits.hits: 2 }

---
"Test range query on keyword field where only binary doc values are enabled":

- do:
search:
index: test
body: { query: { range: { keyword: { gte: "key1" } } } }
- length: { hits.hits: 2 }

---
"Test fuzzy query on keyword field where only binary doc values are enabled":

- do:
search:
index: test
body: { query: { fuzzy: { keyword: { value: "kay1", fuzziness: 1 } } } }
- length: { hits.hits: 1 }

---
"Test prefix query on keyword field where only binary doc values are enabled":

- do:
search:
index: test
body: { query: { prefix: { keyword: { value: "key" } } } }
- length: { hits.hits: 2 }

---
"Test case insensitive term query on keyword field where only binary doc values are enabled":

- do:
search:
index: test
body: { query: { term: { keyword: { value: "KeY1", case_insensitive: true } } } }
- length: { hits.hits: 1 }

---
"Test wildcard query on keyword field where only binary doc values are enabled":

- do:
search:
index: test
body: { query: { wildcard: { keyword: { value: "k*1" } } } }
- length: { hits.hits: 1 }

---
"Test case insensitive wildcard query on keyword field where only binary doc values are enabled":

- do:
search:
index: test
body: { query: { wildcard: { keyword: { value: "K*1", case_insensitive: true } } } }
- length: { hits.hits: 1 }

---
"Test regexp query on keyword field where only binary doc values are enabled":

- do:
search:
index: test
body: { query: { regexp: { keyword: { value: "k.*1" } } } }
- length: { hits.hits: 1 }
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.fielddata;

import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;

import java.io.IOException;

/**
* Wrapper around {@link BinaryDocValues} to decode the typical multivalued encoding used by
* {@link org.elasticsearch.index.mapper.BinaryFieldMapper.CustomBinaryDocValuesField}.
*/
public class MultiValuedSortedBinaryDocValues extends SortedBinaryDocValues {

BinaryDocValues values;
int count;
final ByteArrayStreamInput in = new ByteArrayStreamInput();
final BytesRef scratch = new BytesRef();

public MultiValuedSortedBinaryDocValues(BinaryDocValues values) {
this.values = values;
}

@Override
public boolean advanceExact(int doc) throws IOException {
if (values.advanceExact(doc)) {
final BytesRef bytes = values.binaryValue();
assert bytes.length > 0;
in.reset(bytes.bytes, bytes.offset, bytes.length);
count = in.readVInt();
scratch.bytes = bytes.bytes;
return true;
} else {
return false;
}
}

@Override
public int docValueCount() {
return count;
}

@Override
public BytesRef nextValue() throws IOException {
scratch.length = in.readVInt();
scratch.offset = in.getPosition();
in.setPosition(scratch.offset + scratch.length);
return scratch;
}
}

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
import org.elasticsearch.indices.breaker.CircuitBreakerService;
import org.elasticsearch.script.field.ToScriptFieldFactory;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.MultiValueMode;
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
Expand All @@ -26,14 +28,20 @@

import java.io.IOException;

public class BytesBinaryIndexFieldData implements IndexFieldData<BytesBinaryDVLeafFieldData> {
public class BytesBinaryIndexFieldData implements IndexFieldData<MultiValuedBinaryDVLeafFieldData> {

protected final String fieldName;
protected final ValuesSourceType valuesSourceType;
protected final ToScriptFieldFactory<SortedBinaryDocValues> toScriptFieldFactory;

public BytesBinaryIndexFieldData(String fieldName, ValuesSourceType valuesSourceType) {
public BytesBinaryIndexFieldData(
String fieldName,
ValuesSourceType valuesSourceType,
ToScriptFieldFactory<SortedBinaryDocValues> toScriptFieldFactory
) {
this.fieldName = fieldName;
this.valuesSourceType = valuesSourceType;
this.toScriptFieldFactory = toScriptFieldFactory;
}

@Override
Expand Down Expand Up @@ -66,32 +74,34 @@ public BucketedSort newBucketedSort(
}

@Override
public BytesBinaryDVLeafFieldData load(LeafReaderContext context) {
public MultiValuedBinaryDVLeafFieldData load(LeafReaderContext context) {
try {
return new BytesBinaryDVLeafFieldData(DocValues.getBinary(context.reader(), fieldName));
return new MultiValuedBinaryDVLeafFieldData(DocValues.getBinary(context.reader(), fieldName), toScriptFieldFactory);
} catch (IOException e) {
throw new IllegalStateException("Cannot load doc values", e);
}
}

@Override
public BytesBinaryDVLeafFieldData loadDirect(LeafReaderContext context) {
public MultiValuedBinaryDVLeafFieldData loadDirect(LeafReaderContext context) {
return load(context);
}

public static class Builder implements IndexFieldData.Builder {
private final String name;
private final ToScriptFieldFactory<SortedBinaryDocValues> toScriptFieldFactory;
private final ValuesSourceType valuesSourceType;

public Builder(String name, ValuesSourceType valuesSourceType) {
public Builder(String name, ValuesSourceType valuesSourceType, ToScriptFieldFactory<SortedBinaryDocValues> toScriptFieldFactory) {
this.name = name;
this.valuesSourceType = valuesSourceType;
this.toScriptFieldFactory = toScriptFieldFactory;
}

@Override
public IndexFieldData<?> build(IndexFieldDataCache cache, CircuitBreakerService breakerService) {
// Ignore breaker
return new BytesBinaryIndexFieldData(name, valuesSourceType);
return new BytesBinaryIndexFieldData(name, valuesSourceType, toScriptFieldFactory);
}
}
}
Loading