Skip to content

Commit 6c55099

Browse files
authored
Store arrays offsets for ip fields natively with synthetic source (#122999)
Follow up of #113757 and adds support to natively store array offsets for ip fields instead of falling back to ignored source.
1 parent 2f0e1da commit 6c55099

14 files changed

+859
-537
lines changed

docs/changelog/122999.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 122999
2+
summary: Store arrays offsets for ip fields natively with synthetic source
3+
area: Mapping
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/index/IndexVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ private static Version parseUnchecked(String version) {
150150
public static final IndexVersion TIMESTAMP_DOC_VALUES_SPARSE_INDEX = def(9_011_0_00, Version.LUCENE_10_1_0);
151151
public static final IndexVersion TIME_SERIES_ID_DOC_VALUES_SPARSE_INDEX = def(9_012_0_00, Version.LUCENE_10_1_0);
152152
public static final IndexVersion SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD = def(9_013_0_00, Version.LUCENE_10_1_0);
153+
public static final IndexVersion SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_IP = def(9_014_0_00, Version.LUCENE_10_1_0);
153154
/*
154155
* STOP! READ THIS FIRST! No, really,
155156
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _

server/src/main/java/org/elasticsearch/index/mapper/FieldArrayContext.java

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
import org.apache.lucene.util.BitUtil;
1414
import org.elasticsearch.common.io.stream.BytesStreamOutput;
1515
import org.elasticsearch.common.io.stream.StreamInput;
16+
import org.elasticsearch.index.IndexVersion;
17+
import org.elasticsearch.index.IndexVersions;
1618

1719
import java.io.IOException;
1820
import java.util.ArrayList;
@@ -23,9 +25,10 @@
2325

2426
public class FieldArrayContext {
2527

28+
private static final String OFFSETS_FIELD_NAME_SUFFIX = ".offsets";
2629
private final Map<String, Offsets> offsetsPerField = new HashMap<>();
2730

28-
void recordOffset(String field, String value) {
31+
void recordOffset(String field, Comparable<?> value) {
2932
Offsets arrayOffsets = offsetsPerField.computeIfAbsent(field, k -> new Offsets());
3033
int nextOffset = arrayOffsets.currentOffset++;
3134
var offsets = arrayOffsets.valueToOffsets.computeIfAbsent(value, s -> new ArrayList<>(2));
@@ -79,13 +82,53 @@ static int[] parseOffsetArray(StreamInput in) throws IOException {
7982
return offsetToOrd;
8083
}
8184

85+
static String getOffsetsFieldName(
86+
MapperBuilderContext context,
87+
Mapper.SourceKeepMode indexSourceKeepMode,
88+
boolean hasDocValues,
89+
boolean isStored,
90+
FieldMapper.Builder fieldMapperBuilder,
91+
IndexVersion indexCreatedVersion,
92+
IndexVersion minSupportedVersionMain
93+
) {
94+
var sourceKeepMode = fieldMapperBuilder.sourceKeepMode.orElse(indexSourceKeepMode);
95+
if (context.isSourceSynthetic()
96+
&& sourceKeepMode == Mapper.SourceKeepMode.ARRAYS
97+
&& hasDocValues
98+
&& isStored == false
99+
&& fieldMapperBuilder.copyTo.copyToFields().isEmpty()
100+
&& fieldMapperBuilder.multiFieldsBuilder.hasMultiFields() == false
101+
&& indexVersionSupportStoringArraysNatively(indexCreatedVersion, minSupportedVersionMain)) {
102+
// Skip stored, we will be synthesizing from stored fields, no point to keep track of the offsets
103+
// Skip copy_to and multi fields, supporting that requires more work. However, copy_to usage is rare in metrics and
104+
// logging use cases
105+
106+
// keep track of value offsets so that we can reconstruct arrays from doc values in order as was specified during indexing
107+
// (if field is stored then there is no point of doing this)
108+
return context.buildFullName(fieldMapperBuilder.leafName() + FieldArrayContext.OFFSETS_FIELD_NAME_SUFFIX);
109+
} else {
110+
return null;
111+
}
112+
}
113+
114+
private static boolean indexVersionSupportStoringArraysNatively(
115+
IndexVersion indexCreatedVersion,
116+
IndexVersion minSupportedVersionMain
117+
) {
118+
return indexCreatedVersion.onOrAfter(minSupportedVersionMain)
119+
|| indexCreatedVersion.between(
120+
IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD_BACKPORT_8_X,
121+
IndexVersions.UPGRADE_TO_LUCENE_10_0_0
122+
);
123+
}
124+
82125
private static class Offsets {
83126

84127
int currentOffset;
85128
// Need to use TreeMap here, so that we maintain the order in which each value (with offset) stored inserted,
86129
// (which is in the same order the document gets parsed) so we store offsets in right order. This is the same
87130
// order in what the values get stored in SortedSetDocValues.
88-
final Map<String, List<Integer>> valueToOffsets = new TreeMap<>();
131+
final Map<Comparable<?>, List<Integer>> valueToOffsets = new TreeMap<>();
89132
final List<Integer> nullValueOffsets = new ArrayList<>(2);
90133

91134
}

server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java

Lines changed: 68 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import java.util.Objects;
5656
import java.util.function.BiFunction;
5757

58+
import static org.elasticsearch.index.mapper.FieldArrayContext.getOffsetsFieldName;
5859
import static org.elasticsearch.index.mapper.IpPrefixAutomatonUtil.buildIpPrefixAutomaton;
5960

6061
/**
@@ -92,8 +93,15 @@ public static final class Builder extends FieldMapper.DimensionBuilder {
9293
private final boolean ignoreMalformedByDefault;
9394
private final IndexVersion indexCreatedVersion;
9495
private final ScriptCompiler scriptCompiler;
96+
private final SourceKeepMode indexSourceKeepMode;
9597

96-
public Builder(String name, ScriptCompiler scriptCompiler, boolean ignoreMalformedByDefault, IndexVersion indexCreatedVersion) {
98+
public Builder(
99+
String name,
100+
ScriptCompiler scriptCompiler,
101+
boolean ignoreMalformedByDefault,
102+
IndexVersion indexCreatedVersion,
103+
SourceKeepMode indexSourceKeepMode
104+
) {
97105
super(name);
98106
this.scriptCompiler = Objects.requireNonNull(scriptCompiler);
99107
this.ignoreMalformedByDefault = ignoreMalformedByDefault;
@@ -114,6 +122,7 @@ public Builder(String name, ScriptCompiler scriptCompiler, boolean ignoreMalform
114122
);
115123
}
116124
});
125+
this.indexSourceKeepMode = indexSourceKeepMode;
117126
}
118127

119128
Builder nullValue(String nullValue) {
@@ -184,6 +193,16 @@ public IpFieldMapper build(MapperBuilderContext context) {
184193
}
185194
hasScript = script.get() != null;
186195
onScriptError = onScriptErrorParam.getValue();
196+
197+
String offsetsFieldName = getOffsetsFieldName(
198+
context,
199+
indexSourceKeepMode,
200+
hasDocValues.getValue(),
201+
stored.getValue(),
202+
this,
203+
indexCreatedVersion,
204+
IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_IP
205+
);
187206
return new IpFieldMapper(
188207
leafName(),
189208
new IpFieldType(
@@ -198,15 +217,16 @@ public IpFieldMapper build(MapperBuilderContext context) {
198217
),
199218
builderParams(this, context),
200219
context.isSourceSynthetic(),
201-
this
220+
this,
221+
offsetsFieldName
202222
);
203223
}
204224

205225
}
206226

207227
public static final TypeParser PARSER = createTypeParserWithLegacySupport((n, c) -> {
208228
boolean ignoreMalformedByDefault = IGNORE_MALFORMED_SETTING.get(c.getSettings());
209-
return new Builder(n, c.scriptCompiler(), ignoreMalformedByDefault, c.indexVersionCreated());
229+
return new Builder(n, c.scriptCompiler(), ignoreMalformedByDefault, c.indexVersionCreated(), c.getIndexSettings().sourceKeepMode());
210230
});
211231

212232
public static final class IpFieldType extends SimpleMappedFieldType {
@@ -501,13 +521,16 @@ public TermsEnum getTerms(IndexReader reader, String prefix, boolean caseInsensi
501521
private final Script script;
502522
private final FieldValues<InetAddress> scriptValues;
503523
private final ScriptCompiler scriptCompiler;
524+
private final SourceKeepMode indexSourceKeepMode;
525+
private final String offsetsFieldName;
504526

505527
private IpFieldMapper(
506528
String simpleName,
507529
MappedFieldType mappedFieldType,
508530
BuilderParams builderParams,
509531
boolean storeIgnored,
510-
Builder builder
532+
Builder builder,
533+
String offsetsFieldName
511534
) {
512535
super(simpleName, mappedFieldType, builderParams);
513536
this.ignoreMalformedByDefault = builder.ignoreMalformedByDefault;
@@ -523,6 +546,8 @@ private IpFieldMapper(
523546
this.scriptCompiler = builder.scriptCompiler;
524547
this.dimension = builder.dimension.getValue();
525548
this.storeIgnored = storeIgnored;
549+
this.indexSourceKeepMode = builder.indexSourceKeepMode;
550+
this.offsetsFieldName = offsetsFieldName;
526551
}
527552

528553
@Override
@@ -561,6 +586,14 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
561586
if (address != null) {
562587
indexValue(context, address);
563588
}
589+
if (offsetsFieldName != null && context.isImmediateParentAnArray() && context.canAddIgnoredField()) {
590+
if (address != null) {
591+
BytesRef sortableValue = new BytesRef(InetAddressPoint.encode(address));
592+
context.getOffSetContext().recordOffset(offsetsFieldName, sortableValue);
593+
} else {
594+
context.getOffSetContext().recordNull(offsetsFieldName);
595+
}
596+
}
564597
}
565598

566599
private void indexValue(DocumentParserContext context, InetAddress address) {
@@ -593,7 +626,9 @@ protected void indexScriptValues(
593626

594627
@Override
595628
public FieldMapper.Builder getMergeBuilder() {
596-
return new Builder(leafName(), scriptCompiler, ignoreMalformedByDefault, indexCreatedVersion).dimension(dimension).init(this);
629+
return new Builder(leafName(), scriptCompiler, ignoreMalformedByDefault, indexCreatedVersion, indexSourceKeepMode).dimension(
630+
dimension
631+
).init(this);
597632
}
598633

599634
@Override
@@ -610,19 +645,24 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
610645
if (hasDocValues) {
611646
return new SyntheticSourceSupport.Native(() -> {
612647
var layers = new ArrayList<CompositeSyntheticFieldLoader.Layer>();
613-
layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) {
614-
@Override
615-
protected BytesRef convert(BytesRef value) {
616-
byte[] bytes = Arrays.copyOfRange(value.bytes, value.offset, value.offset + value.length);
617-
return new BytesRef(NetworkAddress.format(InetAddressPoint.decode(bytes)));
618-
}
619-
620-
@Override
621-
protected BytesRef preserve(BytesRef value) {
622-
// No need to copy because convert has made a deep copy
623-
return value;
624-
}
625-
});
648+
if (offsetsFieldName != null) {
649+
layers.add(
650+
new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFieldName, IpFieldMapper::convert)
651+
);
652+
} else {
653+
layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) {
654+
@Override
655+
protected BytesRef convert(BytesRef value) {
656+
return IpFieldMapper.convert(value);
657+
}
658+
659+
@Override
660+
protected BytesRef preserve(BytesRef value) {
661+
// No need to copy because convert has made a deep copy
662+
return value;
663+
}
664+
});
665+
}
626666

627667
if (ignoreMalformed) {
628668
layers.add(new CompositeSyntheticFieldLoader.MalformedValuesLayer(fullPath()));
@@ -633,4 +673,14 @@ protected BytesRef preserve(BytesRef value) {
633673

634674
return super.syntheticSourceSupport();
635675
}
676+
677+
static BytesRef convert(BytesRef value) {
678+
byte[] bytes = Arrays.copyOfRange(value.bytes, value.offset, value.offset + value.length);
679+
return new BytesRef(NetworkAddress.format(InetAddressPoint.decode(bytes)));
680+
}
681+
682+
@Override
683+
public String getOffsetFieldName() {
684+
return offsetsFieldName;
685+
}
636686
}

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 11 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
import static org.elasticsearch.core.Strings.format;
8686
import static org.elasticsearch.index.IndexSettings.IGNORE_ABOVE_SETTING;
8787
import static org.elasticsearch.index.IndexSettings.USE_DOC_VALUES_SKIPPER;
88+
import static org.elasticsearch.index.mapper.FieldArrayContext.getOffsetsFieldName;
8889

8990
/**
9091
* A field mapper for keywords. This mapper accepts strings and indexes them as-is.
@@ -95,7 +96,6 @@ public final class KeywordFieldMapper extends FieldMapper {
9596

9697
public static final String CONTENT_TYPE = "keyword";
9798
private static final String HOST_NAME = "host.name";
98-
public static final String OFFSETS_FIELD_NAME_SUFFIX = ".offsets";
9999

100100
public static class Defaults {
101101
public static final FieldType FIELD_TYPE;
@@ -439,26 +439,15 @@ public KeywordFieldMapper build(MapperBuilderContext context) {
439439
super.hasScript = script.get() != null;
440440
super.onScriptError = onScriptError.getValue();
441441

442-
var sourceKeepMode = this.sourceKeepMode.orElse(indexSourceKeepMode);
443-
String offsetsFieldName;
444-
if (context.isSourceSynthetic()
445-
&& sourceKeepMode == SourceKeepMode.ARRAYS
446-
&& hasDocValues()
447-
&& fieldtype.stored() == false
448-
&& copyTo.copyToFields().isEmpty()
449-
&& multiFieldsBuilder.hasMultiFields() == false
450-
&& indexVersionSupportStoringArraysNatively()) {
451-
// Skip stored, we will be synthesizing from stored fields, no point to keep track of the offsets
452-
// Skip copy_to and multi fields, supporting that requires more work. However, copy_to usage is rare in metrics and
453-
// logging use cases
454-
455-
// keep track of value offsets so that we can reconstruct arrays from doc values in order as was specified during indexing
456-
// (if field is stored then there is no point of doing this)
457-
offsetsFieldName = context.buildFullName(leafName() + OFFSETS_FIELD_NAME_SUFFIX);
458-
} else {
459-
offsetsFieldName = null;
460-
}
461-
442+
String offsetsFieldName = getOffsetsFieldName(
443+
context,
444+
indexSourceKeepMode,
445+
hasDocValues.getValue(),
446+
stored.getValue(),
447+
this,
448+
indexCreatedVersion,
449+
IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD
450+
);
462451
return new KeywordFieldMapper(
463452
leafName(),
464453
fieldtype,
@@ -472,14 +461,6 @@ && indexVersionSupportStoringArraysNatively()) {
472461
);
473462
}
474463

475-
private boolean indexVersionSupportStoringArraysNatively() {
476-
return indexCreatedVersion.onOrAfter(IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD)
477-
|| indexCreatedVersion.between(
478-
IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD_BACKPORT_8_X,
479-
IndexVersions.UPGRADE_TO_LUCENE_10_0_0
480-
);
481-
}
482-
483464
private FieldType resolveFieldType(
484465
final boolean useDocValuesSkipper,
485466
final IndexVersion indexCreatedVersion,
@@ -1127,7 +1108,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
11271108
}
11281109

11291110
boolean indexed = indexValue(context, value);
1130-
if (offsetsFieldName != null && context.isImmediateParentAnArray() && context.getRecordedSource() == false) {
1111+
if (offsetsFieldName != null && context.isImmediateParentAnArray() && context.canAddIgnoredField()) {
11311112
if (indexed) {
11321113
context.getOffSetContext().recordOffset(offsetsFieldName, value);
11331114
} else if (value == null) {

0 commit comments

Comments
 (0)