Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/122999.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 122999
summary: Store arrays offsets for ip fields natively with synthetic source
area: Mapping
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import org.apache.lucene.util.BitUtil;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;

import java.io.IOException;
import java.util.ArrayList;
Expand All @@ -23,9 +25,10 @@

public class FieldArrayContext {

private static final String OFFSETS_FIELD_NAME_SUFFIX = ".offsets";
private final Map<String, Offsets> offsetsPerField = new HashMap<>();

void recordOffset(String field, String value) {
void recordOffset(String field, Comparable<?> value) {
Offsets arrayOffsets = offsetsPerField.computeIfAbsent(field, k -> new Offsets());
int nextOffset = arrayOffsets.currentOffset++;
var offsets = arrayOffsets.valueToOffsets.computeIfAbsent(value, s -> new ArrayList<>(2));
Expand Down Expand Up @@ -79,13 +82,49 @@ static int[] parseOffsetArray(StreamInput in) throws IOException {
return offsetToOrd;
}

static String getOffsetsFieldName(
MapperBuilderContext context,
Mapper.SourceKeepMode indexSourceKeepMode,
boolean hasDocValues,
boolean isStored,
IndexVersion indexCreatedVersion,
FieldMapper.Builder fieldMapperBuilder
) {
var sourceKeepMode = fieldMapperBuilder.sourceKeepMode.orElse(indexSourceKeepMode);
if (context.isSourceSynthetic()
&& sourceKeepMode == Mapper.SourceKeepMode.ARRAYS
&& hasDocValues
&& isStored == false
&& fieldMapperBuilder.copyTo.copyToFields().isEmpty()
&& fieldMapperBuilder.multiFieldsBuilder.hasMultiFields() == false
&& indexVersionSupportStoringArraysNatively(indexCreatedVersion)) {
// Skip stored, we will be synthesizing from stored fields, no point to keep track of the offsets
// Skip copy_to and multi fields, supporting that requires more work. However, copy_to usage is rare in metrics and
// logging use cases

// keep track of value offsets so that we can reconstruct arrays from doc values in order as was specified during indexing
// (if field is stored then there is no point of doing this)
return context.buildFullName(fieldMapperBuilder.leafName() + FieldArrayContext.OFFSETS_FIELD_NAME_SUFFIX);
} else {
return null;
}
}

private static boolean indexVersionSupportStoringArraysNatively(IndexVersion indexCreatedVersion) {
return indexCreatedVersion.onOrAfter(IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD)
|| indexCreatedVersion.between(
IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD_BACKPORT_8_X,
IndexVersions.UPGRADE_TO_LUCENE_10_0_0
);
}

private static class Offsets {

int currentOffset;
// Need to use TreeMap here, so that we maintain the order in which each value (with offset) stored inserted,
// (which is in the same order the document gets parsed) so we store offsets in right order. This is the same
// order in what the values get stored in SortedSetDocValues.
final Map<String, List<Integer>> valueToOffsets = new TreeMap<>();
final Map<Comparable<?>, List<Integer>> valueToOffsets = new TreeMap<>();
final List<Integer> nullValueOffsets = new ArrayList<>(2);

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import java.util.Objects;
import java.util.function.BiFunction;

import static org.elasticsearch.index.mapper.FieldArrayContext.getOffsetsFieldName;
import static org.elasticsearch.index.mapper.IpPrefixAutomatonUtil.buildIpPrefixAutomaton;

/**
Expand Down Expand Up @@ -92,8 +93,15 @@ public static final class Builder extends FieldMapper.DimensionBuilder {
private final boolean ignoreMalformedByDefault;
private final IndexVersion indexCreatedVersion;
private final ScriptCompiler scriptCompiler;
private final SourceKeepMode indexSourceKeepMode;

public Builder(String name, ScriptCompiler scriptCompiler, boolean ignoreMalformedByDefault, IndexVersion indexCreatedVersion) {
public Builder(
String name,
ScriptCompiler scriptCompiler,
boolean ignoreMalformedByDefault,
IndexVersion indexCreatedVersion,
SourceKeepMode indexSourceKeepMode
) {
super(name);
this.scriptCompiler = Objects.requireNonNull(scriptCompiler);
this.ignoreMalformedByDefault = ignoreMalformedByDefault;
Expand All @@ -114,6 +122,7 @@ public Builder(String name, ScriptCompiler scriptCompiler, boolean ignoreMalform
);
}
});
this.indexSourceKeepMode = indexSourceKeepMode;
}

Builder nullValue(String nullValue) {
Expand Down Expand Up @@ -184,6 +193,15 @@ public IpFieldMapper build(MapperBuilderContext context) {
}
hasScript = script.get() != null;
onScriptError = onScriptErrorParam.getValue();

String offsetsFieldName = getOffsetsFieldName(
context,
indexSourceKeepMode,
hasDocValues.getValue(),
stored.getValue(),
indexCreatedVersion,
this
);
return new IpFieldMapper(
leafName(),
new IpFieldType(
Expand All @@ -198,15 +216,16 @@ public IpFieldMapper build(MapperBuilderContext context) {
),
builderParams(this, context),
context.isSourceSynthetic(),
this
this,
offsetsFieldName
);
}

}

public static final TypeParser PARSER = createTypeParserWithLegacySupport((n, c) -> {
boolean ignoreMalformedByDefault = IGNORE_MALFORMED_SETTING.get(c.getSettings());
return new Builder(n, c.scriptCompiler(), ignoreMalformedByDefault, c.indexVersionCreated());
return new Builder(n, c.scriptCompiler(), ignoreMalformedByDefault, c.indexVersionCreated(), c.getIndexSettings().sourceKeepMode());
});

public static final class IpFieldType extends SimpleMappedFieldType {
Expand Down Expand Up @@ -501,13 +520,16 @@ public TermsEnum getTerms(IndexReader reader, String prefix, boolean caseInsensi
private final Script script;
private final FieldValues<InetAddress> scriptValues;
private final ScriptCompiler scriptCompiler;
private final SourceKeepMode indexSourceKeepMode;
private final String offsetsFieldName;

private IpFieldMapper(
String simpleName,
MappedFieldType mappedFieldType,
BuilderParams builderParams,
boolean storeIgnored,
Builder builder
Builder builder,
String offsetsFieldName
) {
super(simpleName, mappedFieldType, builderParams);
this.ignoreMalformedByDefault = builder.ignoreMalformedByDefault;
Expand All @@ -523,6 +545,8 @@ private IpFieldMapper(
this.scriptCompiler = builder.scriptCompiler;
this.dimension = builder.dimension.getValue();
this.storeIgnored = storeIgnored;
this.indexSourceKeepMode = builder.indexSourceKeepMode;
this.offsetsFieldName = offsetsFieldName;
}

@Override
Expand Down Expand Up @@ -561,6 +585,14 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
if (address != null) {
Copy link
Contributor

@kkrik-es kkrik-es Feb 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is address set to null, ever? It's set to nullValue or value, iiuc.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nullValue is null by default, so address can be set to null.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But then, should we be comparing to nullValue here and below?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good point: 77c30b1

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually that change fails the build. If we only check nullValue then also value needs to be checked...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pushed another commit

indexValue(context, address);
}
if (offsetsFieldName != null && context.isImmediateParentAnArray() && context.getRecordedSource() == false) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should be using context.canAddIgnoredField instead of context.getRecordedSource. We had this comment in the previous PR, let's update KeywordFieldMapper too.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done: 2a72a77

if (address != null) {
BytesRef sortableValue = new BytesRef(InetAddressPoint.encode(address));
context.getOffSetContext().recordOffset(offsetsFieldName, sortableValue);
} else {
context.getOffSetContext().recordNull(offsetsFieldName);
}
}
}

private void indexValue(DocumentParserContext context, InetAddress address) {
Expand Down Expand Up @@ -593,7 +625,9 @@ protected void indexScriptValues(

@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), scriptCompiler, ignoreMalformedByDefault, indexCreatedVersion).dimension(dimension).init(this);
return new Builder(leafName(), scriptCompiler, ignoreMalformedByDefault, indexCreatedVersion, indexSourceKeepMode).dimension(
dimension
).init(this);
}

@Override
Expand All @@ -610,19 +644,24 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
if (hasDocValues) {
return new SyntheticSourceSupport.Native(() -> {
var layers = new ArrayList<CompositeSyntheticFieldLoader.Layer>();
layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) {
@Override
protected BytesRef convert(BytesRef value) {
byte[] bytes = Arrays.copyOfRange(value.bytes, value.offset, value.offset + value.length);
return new BytesRef(NetworkAddress.format(InetAddressPoint.decode(bytes)));
}

@Override
protected BytesRef preserve(BytesRef value) {
// No need to copy because convert has made a deep copy
return value;
}
});
if (offsetsFieldName != null) {
layers.add(
new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFieldName, IpFieldMapper::convert)
);
} else {
layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) {
@Override
protected BytesRef convert(BytesRef value) {
return IpFieldMapper.convert(value);
}

@Override
protected BytesRef preserve(BytesRef value) {
// No need to copy because convert has made a deep copy
return value;
}
});
}

if (ignoreMalformed) {
layers.add(new CompositeSyntheticFieldLoader.MalformedValuesLayer(fullPath()));
Expand All @@ -633,4 +672,14 @@ protected BytesRef preserve(BytesRef value) {

return super.syntheticSourceSupport();
}

static BytesRef convert(BytesRef value) {
byte[] bytes = Arrays.copyOfRange(value.bytes, value.offset, value.offset + value.length);
return new BytesRef(NetworkAddress.format(InetAddressPoint.decode(bytes)));
}

@Override
public String getOffsetFieldName() {
return offsetsFieldName;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
import static org.elasticsearch.core.Strings.format;
import static org.elasticsearch.index.IndexSettings.IGNORE_ABOVE_SETTING;
import static org.elasticsearch.index.IndexSettings.USE_DOC_VALUES_SKIPPER;
import static org.elasticsearch.index.mapper.FieldArrayContext.getOffsetsFieldName;

/**
* A field mapper for keywords. This mapper accepts strings and indexes them as-is.
Expand All @@ -95,7 +96,6 @@ public final class KeywordFieldMapper extends FieldMapper {

public static final String CONTENT_TYPE = "keyword";
private static final String HOST_NAME = "host.name";
public static final String OFFSETS_FIELD_NAME_SUFFIX = ".offsets";

public static class Defaults {
public static final FieldType FIELD_TYPE;
Expand Down Expand Up @@ -439,26 +439,14 @@ public KeywordFieldMapper build(MapperBuilderContext context) {
super.hasScript = script.get() != null;
super.onScriptError = onScriptError.getValue();

var sourceKeepMode = this.sourceKeepMode.orElse(indexSourceKeepMode);
String offsetsFieldName;
if (context.isSourceSynthetic()
&& sourceKeepMode == SourceKeepMode.ARRAYS
&& hasDocValues()
&& fieldtype.stored() == false
&& copyTo.copyToFields().isEmpty()
&& multiFieldsBuilder.hasMultiFields() == false
&& indexVersionSupportStoringArraysNatively()) {
// Skip stored, we will be synthesizing from stored fields, no point to keep track of the offsets
// Skip copy_to and multi fields, supporting that requires more work. However, copy_to usage is rare in metrics and
// logging use cases

// keep track of value offsets so that we can reconstruct arrays from doc values in order as was specified during indexing
// (if field is stored then there is no point of doing this)
offsetsFieldName = context.buildFullName(leafName() + OFFSETS_FIELD_NAME_SUFFIX);
} else {
offsetsFieldName = null;
}

String offsetsFieldName = getOffsetsFieldName(
context,
indexSourceKeepMode,
hasDocValues.getValue(),
stored.getValue(),
indexCreatedVersion,
this
);
return new KeywordFieldMapper(
leafName(),
fieldtype,
Expand All @@ -472,14 +460,6 @@ && indexVersionSupportStoringArraysNatively()) {
);
}

private boolean indexVersionSupportStoringArraysNatively() {
return indexCreatedVersion.onOrAfter(IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD)
|| indexCreatedVersion.between(
IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD_BACKPORT_8_X,
IndexVersions.UPGRADE_TO_LUCENE_10_0_0
);
}

private FieldType resolveFieldType(
final boolean useDocValuesSkipper,
final IndexVersion indexCreatedVersion,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import java.io.IOException;
import java.util.Objects;
import java.util.function.Function;

/**
* Load {@code _source} fields from {@link SortedSetDocValues} and associated {@link BinaryDocValues}. The former contains the unique values
Expand All @@ -30,11 +31,17 @@ final class SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer implements Co

private final String name;
private final String offsetsFieldName;
private final Function<BytesRef, BytesRef> converter;
private DocValuesWithOffsetsLoader docValues;

SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(String name, String offsetsFieldName) {
this(name, offsetsFieldName, Function.identity());
}

SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(String name, String offsetsFieldName, Function<BytesRef, BytesRef> converter) {
this.name = Objects.requireNonNull(name);
this.offsetsFieldName = Objects.requireNonNull(offsetsFieldName);
this.converter = Objects.requireNonNull(converter);
}

@Override
Expand All @@ -47,7 +54,7 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
SortedSetDocValues valueDocValues = DocValues.getSortedSet(leafReader, name);
SortedDocValues offsetDocValues = DocValues.getSorted(leafReader, offsetsFieldName);

return docValues = new DocValuesWithOffsetsLoader(valueDocValues, offsetDocValues);
return docValues = new DocValuesWithOffsetsLoader(valueDocValues, offsetDocValues, converter);
}

@Override
Expand Down Expand Up @@ -78,15 +85,21 @@ public void write(XContentBuilder b) throws IOException {
static final class DocValuesWithOffsetsLoader implements DocValuesLoader {
private final SortedDocValues offsetDocValues;
private final SortedSetDocValues valueDocValues;
private final Function<BytesRef, BytesRef> converter;
private final ByteArrayStreamInput scratch = new ByteArrayStreamInput();

private boolean hasValue;
private boolean hasOffset;
private int[] offsetToOrd;

DocValuesWithOffsetsLoader(SortedSetDocValues valueDocValues, SortedDocValues offsetDocValues) {
DocValuesWithOffsetsLoader(
SortedSetDocValues valueDocValues,
SortedDocValues offsetDocValues,
Function<BytesRef, BytesRef> converter
) {
this.valueDocValues = valueDocValues;
this.offsetDocValues = offsetDocValues;
this.converter = converter;
}

@Override
Expand Down Expand Up @@ -146,7 +159,7 @@ public void write(XContentBuilder b) throws IOException {

long ord = ords[offset];
BytesRef c = valueDocValues.lookupOrd(ord);
// This is keyword specific and needs to be updated once support is added for other field types:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We still need a comment, as the value is written as utf8. Maybe something about what the converter is expected to return, in the constructor?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done: 86a1aa0

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we enforce utf8 if this goes to xcontentbuilder anyway that has a generic value method that takes anything?

c = converter.apply(c);
b.utf8Value(c.bytes, c.offset, c.length);
}
} else if (offsetToOrd != null) {
Expand All @@ -158,6 +171,7 @@ public void write(XContentBuilder b) throws IOException {
} else {
for (int i = 0; i < valueDocValues.docValueCount(); i++) {
BytesRef c = valueDocValues.lookupOrd(valueDocValues.nextOrd());
c = converter.apply(c);
b.utf8Value(c.bytes, c.offset, c.length);
}
}
Expand Down
Loading