Skip to content

Commit 7c4fde6

Browse files
martijnvgelasticsearchmachine
andauthored
[8.x] Store arrays offsets for ip fields natively with synthetic source (#123405)
* [8.x] Store arrays offsets for ip fields natively with synthetic source Backporting #122999 to 8.x branch. Follow up of #113757 and adds support to natively store array offsets for ip fields instead of falling back to ignored source. * [CI] Auto commit changes from spotless --------- Co-authored-by: elasticsearchmachine <[email protected]>
1 parent f3bf8ed commit 7c4fde6

13 files changed

+844
-530
lines changed

docs/changelog/122999.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 122999
2+
summary: Store arrays offsets for ip fields natively with synthetic source
3+
area: Mapping
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/index/mapper/FieldArrayContext.java

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
import org.apache.lucene.util.BitUtil;
1414
import org.elasticsearch.common.io.stream.BytesStreamOutput;
1515
import org.elasticsearch.common.io.stream.StreamInput;
16+
import org.elasticsearch.index.IndexVersion;
17+
import org.elasticsearch.index.IndexVersions;
1618

1719
import java.io.IOException;
1820
import java.util.ArrayList;
@@ -23,9 +25,10 @@
2325

2426
public class FieldArrayContext {
2527

28+
private static final String OFFSETS_FIELD_NAME_SUFFIX = ".offsets";
2629
private final Map<String, Offsets> offsetsPerField = new HashMap<>();
2730

28-
void recordOffset(String field, String value) {
31+
void recordOffset(String field, Comparable<?> value) {
2932
Offsets arrayOffsets = offsetsPerField.computeIfAbsent(field, k -> new Offsets());
3033
int nextOffset = arrayOffsets.currentOffset++;
3134
var offsets = arrayOffsets.valueToOffsets.computeIfAbsent(value, s -> new ArrayList<>(2));
@@ -79,13 +82,41 @@ static int[] parseOffsetArray(StreamInput in) throws IOException {
7982
return offsetToOrd;
8083
}
8184

85+
static String getOffsetsFieldName(
86+
MapperBuilderContext context,
87+
Mapper.SourceKeepMode indexSourceKeepMode,
88+
boolean hasDocValues,
89+
boolean isStored,
90+
FieldMapper.Builder fieldMapperBuilder,
91+
IndexVersion indexCreatedVersion
92+
) {
93+
var sourceKeepMode = fieldMapperBuilder.sourceKeepMode.orElse(indexSourceKeepMode);
94+
if (context.isSourceSynthetic()
95+
&& sourceKeepMode == Mapper.SourceKeepMode.ARRAYS
96+
&& hasDocValues
97+
&& isStored == false
98+
&& fieldMapperBuilder.copyTo.copyToFields().isEmpty()
99+
&& fieldMapperBuilder.multiFieldsBuilder.hasMultiFields() == false
100+
&& indexCreatedVersion.onOrAfter(IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD)) {
101+
// Skip stored, we will be synthesizing from stored fields, no point to keep track of the offsets
102+
// Skip copy_to and multi fields, supporting that requires more work. However, copy_to usage is rare in metrics and
103+
// logging use cases
104+
105+
// keep track of value offsets so that we can reconstruct arrays from doc values in order as was specified during indexing
106+
// (if field is stored then there is no point of doing this)
107+
return context.buildFullName(fieldMapperBuilder.leafName() + FieldArrayContext.OFFSETS_FIELD_NAME_SUFFIX);
108+
} else {
109+
return null;
110+
}
111+
}
112+
82113
private static class Offsets {
83114

84115
int currentOffset;
85116
// Need to use TreeMap here, so that we maintain the order in which each value (with offset) stored inserted,
86117
// (which is in the same order the document gets parsed) so we store offsets in right order. This is the same
87118
// order in what the values get stored in SortedSetDocValues.
88-
final Map<String, List<Integer>> valueToOffsets = new TreeMap<>();
119+
final Map<Comparable<?>, List<Integer>> valueToOffsets = new TreeMap<>();
89120
final List<Integer> nullValueOffsets = new ArrayList<>(2);
90121

91122
}

server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java

Lines changed: 67 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import java.util.Objects;
5656
import java.util.function.BiFunction;
5757

58+
import static org.elasticsearch.index.mapper.FieldArrayContext.getOffsetsFieldName;
5859
import static org.elasticsearch.index.mapper.IpPrefixAutomatonUtil.buildIpPrefixAutomaton;
5960

6061
/**
@@ -92,8 +93,15 @@ public static final class Builder extends FieldMapper.DimensionBuilder {
9293
private final boolean ignoreMalformedByDefault;
9394
private final IndexVersion indexCreatedVersion;
9495
private final ScriptCompiler scriptCompiler;
96+
private final SourceKeepMode indexSourceKeepMode;
9597

96-
public Builder(String name, ScriptCompiler scriptCompiler, boolean ignoreMalformedByDefault, IndexVersion indexCreatedVersion) {
98+
public Builder(
99+
String name,
100+
ScriptCompiler scriptCompiler,
101+
boolean ignoreMalformedByDefault,
102+
IndexVersion indexCreatedVersion,
103+
SourceKeepMode indexSourceKeepMode
104+
) {
97105
super(name);
98106
this.scriptCompiler = Objects.requireNonNull(scriptCompiler);
99107
this.ignoreMalformedByDefault = ignoreMalformedByDefault;
@@ -114,6 +122,7 @@ public Builder(String name, ScriptCompiler scriptCompiler, boolean ignoreMalform
114122
);
115123
}
116124
});
125+
this.indexSourceKeepMode = indexSourceKeepMode;
117126
}
118127

119128
Builder nullValue(String nullValue) {
@@ -184,6 +193,15 @@ public IpFieldMapper build(MapperBuilderContext context) {
184193
}
185194
hasScript = script.get() != null;
186195
onScriptError = onScriptErrorParam.getValue();
196+
197+
String offsetsFieldName = getOffsetsFieldName(
198+
context,
199+
indexSourceKeepMode,
200+
hasDocValues.getValue(),
201+
stored.getValue(),
202+
this,
203+
indexCreatedVersion
204+
);
187205
return new IpFieldMapper(
188206
leafName(),
189207
new IpFieldType(
@@ -198,7 +216,8 @@ public IpFieldMapper build(MapperBuilderContext context) {
198216
),
199217
builderParams(this, context),
200218
context.isSourceSynthetic(),
201-
this
219+
this,
220+
offsetsFieldName
202221
);
203222
}
204223

@@ -208,7 +227,7 @@ public IpFieldMapper build(MapperBuilderContext context) {
208227

209228
public static final TypeParser PARSER = new TypeParser((n, c) -> {
210229
boolean ignoreMalformedByDefault = IGNORE_MALFORMED_SETTING.get(c.getSettings());
211-
return new Builder(n, c.scriptCompiler(), ignoreMalformedByDefault, c.indexVersionCreated());
230+
return new Builder(n, c.scriptCompiler(), ignoreMalformedByDefault, c.indexVersionCreated(), c.getIndexSettings().sourceKeepMode());
212231
}, MINIMUM_COMPATIBILITY_VERSION);
213232

214233
public static final class IpFieldType extends SimpleMappedFieldType {
@@ -503,13 +522,16 @@ public TermsEnum getTerms(IndexReader reader, String prefix, boolean caseInsensi
503522
private final Script script;
504523
private final FieldValues<InetAddress> scriptValues;
505524
private final ScriptCompiler scriptCompiler;
525+
private final SourceKeepMode indexSourceKeepMode;
526+
private final String offsetsFieldName;
506527

507528
private IpFieldMapper(
508529
String simpleName,
509530
MappedFieldType mappedFieldType,
510531
BuilderParams builderParams,
511532
boolean storeIgnored,
512-
Builder builder
533+
Builder builder,
534+
String offsetsFieldName
513535
) {
514536
super(simpleName, mappedFieldType, builderParams);
515537
this.ignoreMalformedByDefault = builder.ignoreMalformedByDefault;
@@ -525,6 +547,8 @@ private IpFieldMapper(
525547
this.scriptCompiler = builder.scriptCompiler;
526548
this.dimension = builder.dimension.getValue();
527549
this.storeIgnored = storeIgnored;
550+
this.indexSourceKeepMode = builder.indexSourceKeepMode;
551+
this.offsetsFieldName = offsetsFieldName;
528552
}
529553

530554
@Override
@@ -563,6 +587,14 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
563587
if (address != null) {
564588
indexValue(context, address);
565589
}
590+
if (offsetsFieldName != null && context.isImmediateParentAnArray() && context.canAddIgnoredField()) {
591+
if (address != null) {
592+
BytesRef sortableValue = new BytesRef(InetAddressPoint.encode(address));
593+
context.getOffSetContext().recordOffset(offsetsFieldName, sortableValue);
594+
} else {
595+
context.getOffSetContext().recordNull(offsetsFieldName);
596+
}
597+
}
566598
}
567599

568600
private void indexValue(DocumentParserContext context, InetAddress address) {
@@ -595,7 +627,9 @@ protected void indexScriptValues(
595627

596628
@Override
597629
public FieldMapper.Builder getMergeBuilder() {
598-
return new Builder(leafName(), scriptCompiler, ignoreMalformedByDefault, indexCreatedVersion).dimension(dimension).init(this);
630+
return new Builder(leafName(), scriptCompiler, ignoreMalformedByDefault, indexCreatedVersion, indexSourceKeepMode).dimension(
631+
dimension
632+
).init(this);
599633
}
600634

601635
@Override
@@ -612,19 +646,24 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
612646
if (hasDocValues) {
613647
return new SyntheticSourceSupport.Native(() -> {
614648
var layers = new ArrayList<CompositeSyntheticFieldLoader.Layer>();
615-
layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) {
616-
@Override
617-
protected BytesRef convert(BytesRef value) {
618-
byte[] bytes = Arrays.copyOfRange(value.bytes, value.offset, value.offset + value.length);
619-
return new BytesRef(NetworkAddress.format(InetAddressPoint.decode(bytes)));
620-
}
621-
622-
@Override
623-
protected BytesRef preserve(BytesRef value) {
624-
// No need to copy because convert has made a deep copy
625-
return value;
626-
}
627-
});
649+
if (offsetsFieldName != null) {
650+
layers.add(
651+
new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFieldName, IpFieldMapper::convert)
652+
);
653+
} else {
654+
layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) {
655+
@Override
656+
protected BytesRef convert(BytesRef value) {
657+
return IpFieldMapper.convert(value);
658+
}
659+
660+
@Override
661+
protected BytesRef preserve(BytesRef value) {
662+
// No need to copy because convert has made a deep copy
663+
return value;
664+
}
665+
});
666+
}
628667

629668
if (ignoreMalformed) {
630669
layers.add(new CompositeSyntheticFieldLoader.MalformedValuesLayer(fullPath()));
@@ -635,4 +674,14 @@ protected BytesRef preserve(BytesRef value) {
635674

636675
return super.syntheticSourceSupport();
637676
}
677+
678+
static BytesRef convert(BytesRef value) {
679+
byte[] bytes = Arrays.copyOfRange(value.bytes, value.offset, value.offset + value.length);
680+
return new BytesRef(NetworkAddress.format(InetAddressPoint.decode(bytes)));
681+
}
682+
683+
@Override
684+
public String getOffsetFieldName() {
685+
return offsetsFieldName;
686+
}
638687
}

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 10 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@
4141
import org.elasticsearch.core.Nullable;
4242
import org.elasticsearch.features.NodeFeature;
4343
import org.elasticsearch.index.IndexVersion;
44-
import org.elasticsearch.index.IndexVersions;
4544
import org.elasticsearch.index.analysis.IndexAnalyzers;
4645
import org.elasticsearch.index.analysis.NamedAnalyzer;
4746
import org.elasticsearch.index.fielddata.FieldData;
@@ -83,6 +82,7 @@
8382
import static org.apache.lucene.index.IndexWriter.MAX_TERM_LENGTH;
8483
import static org.elasticsearch.core.Strings.format;
8584
import static org.elasticsearch.index.IndexSettings.IGNORE_ABOVE_SETTING;
85+
import static org.elasticsearch.index.mapper.FieldArrayContext.getOffsetsFieldName;
8686

8787
/**
8888
* A field mapper for keywords. This mapper accepts strings and indexes them as-is.
@@ -95,7 +95,6 @@ public final class KeywordFieldMapper extends FieldMapper {
9595

9696
static final NodeFeature KEYWORD_DIMENSION_IGNORE_ABOVE = new NodeFeature("mapper.keyword_dimension_ignore_above", true);
9797
static final NodeFeature KEYWORD_NORMALIZER_SYNTHETIC_SOURCE = new NodeFeature("mapper.keyword_normalizer_synthetic_source", true);
98-
public static final String OFFSETS_FIELD_NAME_SUFFIX = ".offsets";
9998

10099
public static class Defaults {
101100
public static final FieldType FIELD_TYPE;
@@ -384,26 +383,14 @@ public KeywordFieldMapper build(MapperBuilderContext context) {
384383
super.hasScript = script.get() != null;
385384
super.onScriptError = onScriptError.getValue();
386385

387-
var sourceKeepMode = this.sourceKeepMode.orElse(indexSourceKeepMode);
388-
String offsetsFieldName;
389-
if (context.isSourceSynthetic()
390-
&& sourceKeepMode == SourceKeepMode.ARRAYS
391-
&& hasDocValues()
392-
&& fieldtype.stored() == false
393-
&& copyTo.copyToFields().isEmpty()
394-
&& multiFieldsBuilder.hasMultiFields() == false
395-
&& indexCreatedVersion.onOrAfter(IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD)) {
396-
// Skip stored, we will be synthesizing from stored fields, no point to keep track of the offsets
397-
// Skip copy_to and multi fields, supporting that requires more work. However, copy_to usage is rare in metrics and
398-
// logging use cases
399-
400-
// keep track of value offsets so that we can reconstruct arrays from doc values in order as was specified during indexing
401-
// (if field is stored then there is no point of doing this)
402-
offsetsFieldName = context.buildFullName(leafName() + OFFSETS_FIELD_NAME_SUFFIX);
403-
} else {
404-
offsetsFieldName = null;
405-
}
406-
386+
String offsetsFieldName = getOffsetsFieldName(
387+
context,
388+
indexSourceKeepMode,
389+
hasDocValues.getValue(),
390+
stored.getValue(),
391+
this,
392+
indexCreatedVersion
393+
);
407394
return new KeywordFieldMapper(
408395
leafName(),
409396
fieldtype,
@@ -1003,7 +990,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
1003990
}
1004991

1005992
boolean indexed = indexValue(context, value);
1006-
if (offsetsFieldName != null && context.isImmediateParentAnArray() && context.getRecordedSource() == false) {
993+
if (offsetsFieldName != null && context.isImmediateParentAnArray() && context.canAddIgnoredField()) {
1007994
if (indexed) {
1008995
context.getOffSetContext().recordOffset(offsetsFieldName, value);
1009996
} else if (value == null) {

server/src/main/java/org/elasticsearch/index/mapper/SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer.java

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import java.io.IOException;
2222
import java.util.Objects;
23+
import java.util.function.Function;
2324

2425
/**
2526
* Load {@code _source} fields from {@link SortedSetDocValues} and associated {@link BinaryDocValues}. The former contains the unique values
@@ -30,11 +31,29 @@ final class SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer implements Co
3031

3132
private final String name;
3233
private final String offsetsFieldName;
34+
private final Function<BytesRef, BytesRef> converter;
3335
private DocValuesWithOffsetsLoader docValues;
3436

37+
/**
38+
* @param name The name of the field to synthesize
39+
* @param offsetsFieldName The related offset field used to correctly synthesize the field if it is a leaf array
40+
*/
3541
SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(String name, String offsetsFieldName) {
42+
this(name, offsetsFieldName, Function.identity());
43+
}
44+
45+
/**
46+
* @param name The name of the field to synthesize
47+
* @param offsetsFieldName The related offset field used to correctly synthesize the field if it is a leaf array
48+
* @param converter This field value loader layer synthesizes the values read from doc values as utf8 string. If the doc value
49+
* values aren't serializable as utf8 string then it is the responsibility of the converter to covert into a
50+
* format that can be serialized as utf8 string. For example IP field mapper doc values can't directly be
51+
* serialized as utf8 string.
52+
*/
53+
SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(String name, String offsetsFieldName, Function<BytesRef, BytesRef> converter) {
3654
this.name = Objects.requireNonNull(name);
3755
this.offsetsFieldName = Objects.requireNonNull(offsetsFieldName);
56+
this.converter = Objects.requireNonNull(converter);
3857
}
3958

4059
@Override
@@ -47,7 +66,7 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
4766
SortedSetDocValues valueDocValues = DocValues.getSortedSet(leafReader, name);
4867
SortedDocValues offsetDocValues = DocValues.getSorted(leafReader, offsetsFieldName);
4968

50-
return docValues = new DocValuesWithOffsetsLoader(valueDocValues, offsetDocValues);
69+
return docValues = new DocValuesWithOffsetsLoader(valueDocValues, offsetDocValues, converter);
5170
}
5271

5372
@Override
@@ -78,15 +97,21 @@ public void write(XContentBuilder b) throws IOException {
7897
static final class DocValuesWithOffsetsLoader implements DocValuesLoader {
7998
private final SortedDocValues offsetDocValues;
8099
private final SortedSetDocValues valueDocValues;
100+
private final Function<BytesRef, BytesRef> converter;
81101
private final ByteArrayStreamInput scratch = new ByteArrayStreamInput();
82102

83103
private boolean hasValue;
84104
private boolean hasOffset;
85105
private int[] offsetToOrd;
86106

87-
DocValuesWithOffsetsLoader(SortedSetDocValues valueDocValues, SortedDocValues offsetDocValues) {
107+
DocValuesWithOffsetsLoader(
108+
SortedSetDocValues valueDocValues,
109+
SortedDocValues offsetDocValues,
110+
Function<BytesRef, BytesRef> converter
111+
) {
88112
this.valueDocValues = valueDocValues;
89113
this.offsetDocValues = offsetDocValues;
114+
this.converter = converter;
90115
}
91116

92117
@Override
@@ -146,7 +171,7 @@ public void write(XContentBuilder b) throws IOException {
146171

147172
long ord = ords[offset];
148173
BytesRef c = valueDocValues.lookupOrd(ord);
149-
// This is keyword specific and needs to be updated once support is added for other field types:
174+
c = converter.apply(c);
150175
b.utf8Value(c.bytes, c.offset, c.length);
151176
}
152177
} else if (offsetToOrd != null) {
@@ -158,6 +183,7 @@ public void write(XContentBuilder b) throws IOException {
158183
} else {
159184
for (int i = 0; i < valueDocValues.docValueCount(); i++) {
160185
BytesRef c = valueDocValues.lookupOrd(valueDocValues.nextOrd());
186+
c = converter.apply(c);
161187
b.utf8Value(c.bytes, c.offset, c.length);
162188
}
163189
}

0 commit comments

Comments
 (0)