Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/122999.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 122999
summary: Store arrays offsets for ip fields natively with synthetic source
area: Mapping
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import org.apache.lucene.util.BitUtil;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;

import java.io.IOException;
import java.util.ArrayList;
Expand All @@ -23,9 +25,10 @@

public class FieldArrayContext {

private static final String OFFSETS_FIELD_NAME_SUFFIX = ".offsets";
private final Map<String, Offsets> offsetsPerField = new HashMap<>();

void recordOffset(String field, String value) {
void recordOffset(String field, Comparable<?> value) {
Offsets arrayOffsets = offsetsPerField.computeIfAbsent(field, k -> new Offsets());
int nextOffset = arrayOffsets.currentOffset++;
var offsets = arrayOffsets.valueToOffsets.computeIfAbsent(value, s -> new ArrayList<>(2));
Expand Down Expand Up @@ -79,13 +82,41 @@ static int[] parseOffsetArray(StreamInput in) throws IOException {
return offsetToOrd;
}

static String getOffsetsFieldName(
MapperBuilderContext context,
Mapper.SourceKeepMode indexSourceKeepMode,
boolean hasDocValues,
boolean isStored,
FieldMapper.Builder fieldMapperBuilder,
IndexVersion indexCreatedVersion
) {
var sourceKeepMode = fieldMapperBuilder.sourceKeepMode.orElse(indexSourceKeepMode);
if (context.isSourceSynthetic()
&& sourceKeepMode == Mapper.SourceKeepMode.ARRAYS
&& hasDocValues
&& isStored == false
&& fieldMapperBuilder.copyTo.copyToFields().isEmpty()
&& fieldMapperBuilder.multiFieldsBuilder.hasMultiFields() == false
&& indexCreatedVersion.onOrAfter(IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD)) {
// Skip stored, we will be synthesizing from stored fields, no point to keep track of the offsets
// Skip copy_to and multi fields, supporting that requires more work. However, copy_to usage is rare in metrics and
// logging use cases

// keep track of value offsets so that we can reconstruct arrays from doc values in order as was specified during indexing
// (if field is stored then there is no point of doing this)
return context.buildFullName(fieldMapperBuilder.leafName() + FieldArrayContext.OFFSETS_FIELD_NAME_SUFFIX);
} else {
return null;
}
}

private static class Offsets {

int currentOffset;
// Need to use TreeMap here, so that we maintain the order in which each value (with offset) stored inserted,
// (which is in the same order the document gets parsed) so we store offsets in right order. This is the same
// order in what the values get stored in SortedSetDocValues.
final Map<String, List<Integer>> valueToOffsets = new TreeMap<>();
final Map<Comparable<?>, List<Integer>> valueToOffsets = new TreeMap<>();
final List<Integer> nullValueOffsets = new ArrayList<>(2);

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import java.util.Objects;
import java.util.function.BiFunction;

import static org.elasticsearch.index.mapper.FieldArrayContext.getOffsetsFieldName;
import static org.elasticsearch.index.mapper.IpPrefixAutomatonUtil.buildIpPrefixAutomaton;

/**
Expand Down Expand Up @@ -92,8 +93,15 @@ public static final class Builder extends FieldMapper.DimensionBuilder {
private final boolean ignoreMalformedByDefault;
private final IndexVersion indexCreatedVersion;
private final ScriptCompiler scriptCompiler;
private final SourceKeepMode indexSourceKeepMode;

public Builder(String name, ScriptCompiler scriptCompiler, boolean ignoreMalformedByDefault, IndexVersion indexCreatedVersion) {
public Builder(
String name,
ScriptCompiler scriptCompiler,
boolean ignoreMalformedByDefault,
IndexVersion indexCreatedVersion,
SourceKeepMode indexSourceKeepMode
) {
super(name);
this.scriptCompiler = Objects.requireNonNull(scriptCompiler);
this.ignoreMalformedByDefault = ignoreMalformedByDefault;
Expand All @@ -114,6 +122,7 @@ public Builder(String name, ScriptCompiler scriptCompiler, boolean ignoreMalform
);
}
});
this.indexSourceKeepMode = indexSourceKeepMode;
}

Builder nullValue(String nullValue) {
Expand Down Expand Up @@ -184,6 +193,15 @@ public IpFieldMapper build(MapperBuilderContext context) {
}
hasScript = script.get() != null;
onScriptError = onScriptErrorParam.getValue();

String offsetsFieldName = getOffsetsFieldName(
context,
indexSourceKeepMode,
hasDocValues.getValue(),
stored.getValue(),
this,
indexCreatedVersion
);
return new IpFieldMapper(
leafName(),
new IpFieldType(
Expand All @@ -198,7 +216,8 @@ public IpFieldMapper build(MapperBuilderContext context) {
),
builderParams(this, context),
context.isSourceSynthetic(),
this
this,
offsetsFieldName
);
}

Expand All @@ -208,7 +227,7 @@ public IpFieldMapper build(MapperBuilderContext context) {

public static final TypeParser PARSER = new TypeParser((n, c) -> {
boolean ignoreMalformedByDefault = IGNORE_MALFORMED_SETTING.get(c.getSettings());
return new Builder(n, c.scriptCompiler(), ignoreMalformedByDefault, c.indexVersionCreated());
return new Builder(n, c.scriptCompiler(), ignoreMalformedByDefault, c.indexVersionCreated(), c.getIndexSettings().sourceKeepMode());
}, MINIMUM_COMPATIBILITY_VERSION);

public static final class IpFieldType extends SimpleMappedFieldType {
Expand Down Expand Up @@ -503,13 +522,16 @@ public TermsEnum getTerms(IndexReader reader, String prefix, boolean caseInsensi
private final Script script;
private final FieldValues<InetAddress> scriptValues;
private final ScriptCompiler scriptCompiler;
private final SourceKeepMode indexSourceKeepMode;
private final String offsetsFieldName;

private IpFieldMapper(
String simpleName,
MappedFieldType mappedFieldType,
BuilderParams builderParams,
boolean storeIgnored,
Builder builder
Builder builder,
String offsetsFieldName
) {
super(simpleName, mappedFieldType, builderParams);
this.ignoreMalformedByDefault = builder.ignoreMalformedByDefault;
Expand All @@ -525,6 +547,8 @@ private IpFieldMapper(
this.scriptCompiler = builder.scriptCompiler;
this.dimension = builder.dimension.getValue();
this.storeIgnored = storeIgnored;
this.indexSourceKeepMode = builder.indexSourceKeepMode;
this.offsetsFieldName = offsetsFieldName;
}

@Override
Expand Down Expand Up @@ -563,6 +587,14 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
if (address != null) {
indexValue(context, address);
}
if (offsetsFieldName != null && context.isImmediateParentAnArray() && context.canAddIgnoredField()) {
if (address != null) {
BytesRef sortableValue = new BytesRef(InetAddressPoint.encode(address));
context.getOffSetContext().recordOffset(offsetsFieldName, sortableValue);
} else {
context.getOffSetContext().recordNull(offsetsFieldName);
}
}
}

private void indexValue(DocumentParserContext context, InetAddress address) {
Expand Down Expand Up @@ -595,7 +627,9 @@ protected void indexScriptValues(

@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), scriptCompiler, ignoreMalformedByDefault, indexCreatedVersion).dimension(dimension).init(this);
return new Builder(leafName(), scriptCompiler, ignoreMalformedByDefault, indexCreatedVersion, indexSourceKeepMode).dimension(
dimension
).init(this);
}

@Override
Expand All @@ -612,19 +646,24 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
if (hasDocValues) {
return new SyntheticSourceSupport.Native(() -> {
var layers = new ArrayList<CompositeSyntheticFieldLoader.Layer>();
layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) {
@Override
protected BytesRef convert(BytesRef value) {
byte[] bytes = Arrays.copyOfRange(value.bytes, value.offset, value.offset + value.length);
return new BytesRef(NetworkAddress.format(InetAddressPoint.decode(bytes)));
}

@Override
protected BytesRef preserve(BytesRef value) {
// No need to copy because convert has made a deep copy
return value;
}
});
if (offsetsFieldName != null) {
layers.add(
new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFieldName, IpFieldMapper::convert)
);
} else {
layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) {
@Override
protected BytesRef convert(BytesRef value) {
return IpFieldMapper.convert(value);
}

@Override
protected BytesRef preserve(BytesRef value) {
// No need to copy because convert has made a deep copy
return value;
}
});
}

if (ignoreMalformed) {
layers.add(new CompositeSyntheticFieldLoader.MalformedValuesLayer(fullPath()));
Expand All @@ -635,4 +674,14 @@ protected BytesRef preserve(BytesRef value) {

return super.syntheticSourceSupport();
}

static BytesRef convert(BytesRef value) {
byte[] bytes = Arrays.copyOfRange(value.bytes, value.offset, value.offset + value.length);
return new BytesRef(NetworkAddress.format(InetAddressPoint.decode(bytes)));
}

@Override
public String getOffsetFieldName() {
return offsetsFieldName;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
import org.elasticsearch.core.Nullable;
import org.elasticsearch.features.NodeFeature;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.FieldData;
Expand Down Expand Up @@ -83,6 +82,7 @@
import static org.apache.lucene.index.IndexWriter.MAX_TERM_LENGTH;
import static org.elasticsearch.core.Strings.format;
import static org.elasticsearch.index.IndexSettings.IGNORE_ABOVE_SETTING;
import static org.elasticsearch.index.mapper.FieldArrayContext.getOffsetsFieldName;

/**
* A field mapper for keywords. This mapper accepts strings and indexes them as-is.
Expand All @@ -95,7 +95,6 @@ public final class KeywordFieldMapper extends FieldMapper {

static final NodeFeature KEYWORD_DIMENSION_IGNORE_ABOVE = new NodeFeature("mapper.keyword_dimension_ignore_above", true);
static final NodeFeature KEYWORD_NORMALIZER_SYNTHETIC_SOURCE = new NodeFeature("mapper.keyword_normalizer_synthetic_source", true);
public static final String OFFSETS_FIELD_NAME_SUFFIX = ".offsets";

public static class Defaults {
public static final FieldType FIELD_TYPE;
Expand Down Expand Up @@ -384,26 +383,14 @@ public KeywordFieldMapper build(MapperBuilderContext context) {
super.hasScript = script.get() != null;
super.onScriptError = onScriptError.getValue();

var sourceKeepMode = this.sourceKeepMode.orElse(indexSourceKeepMode);
String offsetsFieldName;
if (context.isSourceSynthetic()
&& sourceKeepMode == SourceKeepMode.ARRAYS
&& hasDocValues()
&& fieldtype.stored() == false
&& copyTo.copyToFields().isEmpty()
&& multiFieldsBuilder.hasMultiFields() == false
&& indexCreatedVersion.onOrAfter(IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD)) {
// Skip stored, we will be synthesizing from stored fields, no point to keep track of the offsets
// Skip copy_to and multi fields, supporting that requires more work. However, copy_to usage is rare in metrics and
// logging use cases

// keep track of value offsets so that we can reconstruct arrays from doc values in order as was specified during indexing
// (if field is stored then there is no point of doing this)
offsetsFieldName = context.buildFullName(leafName() + OFFSETS_FIELD_NAME_SUFFIX);
} else {
offsetsFieldName = null;
}

String offsetsFieldName = getOffsetsFieldName(
context,
indexSourceKeepMode,
hasDocValues.getValue(),
stored.getValue(),
this,
indexCreatedVersion
);
return new KeywordFieldMapper(
leafName(),
fieldtype,
Expand Down Expand Up @@ -1003,7 +990,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
}

boolean indexed = indexValue(context, value);
if (offsetsFieldName != null && context.isImmediateParentAnArray() && context.getRecordedSource() == false) {
if (offsetsFieldName != null && context.isImmediateParentAnArray() && context.canAddIgnoredField()) {
if (indexed) {
context.getOffSetContext().recordOffset(offsetsFieldName, value);
} else if (value == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import java.io.IOException;
import java.util.Objects;
import java.util.function.Function;

/**
* Load {@code _source} fields from {@link SortedSetDocValues} and associated {@link BinaryDocValues}. The former contains the unique values
Expand All @@ -30,11 +31,29 @@ final class SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer implements Co

private final String name;
private final String offsetsFieldName;
private final Function<BytesRef, BytesRef> converter;
private DocValuesWithOffsetsLoader docValues;

/**
* @param name The name of the field to synthesize
* @param offsetsFieldName The related offset field used to correctly synthesize the field if it is a leaf array
*/
SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(String name, String offsetsFieldName) {
this(name, offsetsFieldName, Function.identity());
}

/**
* @param name The name of the field to synthesize
* @param offsetsFieldName The related offset field used to correctly synthesize the field if it is a leaf array
* @param converter This field value loader layer synthesizes the values read from doc values as utf8 string. If the doc value
* values aren't serializable as utf8 string then it is the responsibility of the converter to covert into a
* format that can be serialized as utf8 string. For example IP field mapper doc values can't directly be
* serialized as utf8 string.
*/
SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(String name, String offsetsFieldName, Function<BytesRef, BytesRef> converter) {
this.name = Objects.requireNonNull(name);
this.offsetsFieldName = Objects.requireNonNull(offsetsFieldName);
this.converter = Objects.requireNonNull(converter);
}

@Override
Expand All @@ -47,7 +66,7 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
SortedSetDocValues valueDocValues = DocValues.getSortedSet(leafReader, name);
SortedDocValues offsetDocValues = DocValues.getSorted(leafReader, offsetsFieldName);

return docValues = new DocValuesWithOffsetsLoader(valueDocValues, offsetDocValues);
return docValues = new DocValuesWithOffsetsLoader(valueDocValues, offsetDocValues, converter);
}

@Override
Expand Down Expand Up @@ -78,15 +97,21 @@ public void write(XContentBuilder b) throws IOException {
static final class DocValuesWithOffsetsLoader implements DocValuesLoader {
private final SortedDocValues offsetDocValues;
private final SortedSetDocValues valueDocValues;
private final Function<BytesRef, BytesRef> converter;
private final ByteArrayStreamInput scratch = new ByteArrayStreamInput();

private boolean hasValue;
private boolean hasOffset;
private int[] offsetToOrd;

DocValuesWithOffsetsLoader(SortedSetDocValues valueDocValues, SortedDocValues offsetDocValues) {
DocValuesWithOffsetsLoader(
SortedSetDocValues valueDocValues,
SortedDocValues offsetDocValues,
Function<BytesRef, BytesRef> converter
) {
this.valueDocValues = valueDocValues;
this.offsetDocValues = offsetDocValues;
this.converter = converter;
}

@Override
Expand Down Expand Up @@ -146,7 +171,7 @@ public void write(XContentBuilder b) throws IOException {

long ord = ords[offset];
BytesRef c = valueDocValues.lookupOrd(ord);
// This is keyword specific and needs to be updated once support is added for other field types:
c = converter.apply(c);
b.utf8Value(c.bytes, c.offset, c.length);
}
} else if (offsetToOrd != null) {
Expand All @@ -158,6 +183,7 @@ public void write(XContentBuilder b) throws IOException {
} else {
for (int i = 0; i < valueDocValues.docValueCount(); i++) {
BytesRef c = valueDocValues.lookupOrd(valueDocValues.nextOrd());
c = converter.apply(c);
b.utf8Value(c.bytes, c.offset, c.length);
}
}
Expand Down
Loading