Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/113757.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 113757
summary: Store arrays offsets for keyword fields natively with synthetic source instead of falling back to ignored source.
area: Mapping
type: enhancement
issues: []
4 changes: 4 additions & 0 deletions rest-api-spec/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -276,4 +276,8 @@ tasks.named("yamlRestTestV7CompatTransform").configure({ task ->
task.skipTest("search.vectors/130_knn_query_nested_search/nested kNN search inner_hits size > 1", "waiting for #118774 backport")
task.skipTest("search.vectors/110_knn_query_with_filter/PRE_FILTER: pre-filter across multiple aliases", "waiting for #118774 backport")
task.skipTest("search.vectors/160_knn_query_missing_params/kNN search in a dis_max query - missing num_candidates", "waiting for #118774 backport")
task.skipTest("logsdb/10_settings/routing path allowed in logs mode with routing on sort fields", "Unknown feature routing.logsb_route_on_sort_fields")
task.skipTest("indices.create/21_synthetic_source_stored/index param - field ordering", "Synthetic source keep arrays now stores leaf arrays natively")
task.skipTest("indices.create/21_synthetic_source_stored/field param - keep nested array", "Synthetic source keep arrays now stores leaf arrays natively")
task.skipTest("indices.create/21_synthetic_source_stored/field param - keep root array", "Synthetic source keep arrays now stores leaf arrays natively")
})
Original file line number Diff line number Diff line change
Expand Up @@ -982,7 +982,7 @@ subobjects auto:
- match: { hits.hits.0._source.foo: 10 }
- match: { hits.hits.0._source.foo\.bar: 100 }
- match: { hits.hits.0._source.regular.span.id: "1" }
- match: { hits.hits.0._source.regular.trace.id: [ "a", "b" ] }
- match: { hits.hits.0._source.regular.trace.id: ["a", "b" ] }
- match: { hits.hits.1._source.id: 2 }
- match: { hits.hits.1._source.foo: 20 }
- match: { hits.hits.1._source.foo\.bar: 200 }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1091,7 +1091,7 @@ index param - field ordering:
index: test

- length: { hits.hits.0._source: 4 }
- match: { hits.hits.0._source: { "a": "2", "b": [ { "bb": 100, "aa": 200 }, { "aa": 300, "bb": 400 } ], "c": [30, 20, 10], "d": [ { "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 } ] } }
- match: { hits.hits.0._source: { "a": "2", "b": [ { "bb": 100, "aa": 200 }, { "aa": 300, "bb": 400 } ], "c": ["30", "20", "10"], "d": [ { "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 } ] } }


---
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ private static IndexVersion def(int id, Version luceneVersion) {
public static final IndexVersion INFERENCE_METADATA_FIELDS_BACKPORT = def(8_524_0_00, Version.LUCENE_9_12_1);
public static final IndexVersion LOGSB_OPTIONAL_SORTING_ON_HOST_NAME_BACKPORT = def(8_525_0_00, Version.LUCENE_9_12_1);
public static final IndexVersion USE_SYNTHETIC_SOURCE_FOR_RECOVERY_BY_DEFAULT_BACKPORT = def(8_526_0_00, Version.LUCENE_9_12_1);
public static final IndexVersion SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD = def(8_527_0_00, Version.LUCENE_9_12_1);
/*
* STOP! READ THIS FIRST! No, really,
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ private void internalParseDocument(MetadataFieldMapper[] metadataFieldsMappers,

executeIndexTimeScripts(context);

context.processArrayOffsets(context);
for (MetadataFieldMapper metadataMapper : metadataFieldsMappers) {
metadataMapper.postParse(context);
}
Expand Down Expand Up @@ -519,6 +520,7 @@ private static void throwOnCopyToOnObject(Mapper mapper, List<String> copyToFiel

private static void parseObject(final DocumentParserContext context, String currentFieldName) throws IOException {
assert currentFieldName != null;
context.setImmediateXContentParent(context.parser().currentToken());
Mapper objectMapper = context.getMapper(currentFieldName);
if (objectMapper != null) {
doParseObject(context, currentFieldName, objectMapper);
Expand Down Expand Up @@ -611,6 +613,12 @@ private static void throwOnCreateDynamicNestedViaCopyTo(Mapper dynamicObjectMapp
}

private static void parseArray(DocumentParserContext context, String lastFieldName) throws IOException {
// Record previous immediate parent, so that it can be reset after array has been parsed.
// This is for recording array offset with synthetic source. Only if the immediate parent is an array,
// then the offsets can be accounted accurately.
var prev = context.getImmediateXContentParent();
context.setImmediateXContentParent(context.parser().currentToken());

Mapper mapper = getLeafMapper(context, lastFieldName);
if (mapper != null) {
// There is a concrete mapper for this field already. Need to check if the mapper
Expand All @@ -624,6 +632,8 @@ private static void parseArray(DocumentParserContext context, String lastFieldNa
} else {
parseArrayDynamic(context, lastFieldName);
}
// Reset previous immediate parent
context.setImmediateXContentParent(prev);
}

private static void parseArrayDynamic(DocumentParserContext context, String currentFieldName) throws IOException {
Expand Down Expand Up @@ -688,11 +698,12 @@ private static void parseNonDynamicArray(
final String lastFieldName,
String arrayFieldName
) throws IOException {
boolean supportStoringArrayOffsets = mapper != null && mapper.supportStoringArrayOffsets();
String fullPath = context.path().pathAsText(arrayFieldName);

// Check if we need to record the array source. This only applies to synthetic source.
boolean canRemoveSingleLeafElement = false;
if (context.canAddIgnoredField()) {
if (context.canAddIgnoredField() && supportStoringArrayOffsets == false) {
Mapper.SourceKeepMode mode = Mapper.SourceKeepMode.NONE;
boolean objectWithFallbackSyntheticSource = false;
if (mapper instanceof ObjectMapper objectMapper) {
Expand Down Expand Up @@ -736,6 +747,7 @@ private static void parseNonDynamicArray(

XContentParser parser = context.parser();
XContentParser.Token token;
XContentParser.Token previousToken = parser.currentToken();
int elements = 0;
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (token == XContentParser.Token.START_OBJECT) {
Expand All @@ -754,6 +766,14 @@ private static void parseNonDynamicArray(
elements++;
parseValue(context, lastFieldName);
}
previousToken = token;
}
if (mapper != null
&& context.canAddIgnoredField()
&& mapper.supportStoringArrayOffsets()
&& previousToken == XContentParser.Token.START_ARRAY
&& context.isImmediateParentAnArray()) {
context.getOffSetContext().maybeRecordEmptyArray(mapper.getOffsetFieldName());
}
if (elements <= 1 && canRemoveSingleLeafElement) {
context.removeLastIgnoredField(fullPath);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,31 @@ public LuceneDocument doc() {
protected void addDoc(LuceneDocument doc) {
in.addDoc(doc);
}

@Override
public void processArrayOffsets(DocumentParserContext context) throws IOException {
in.processArrayOffsets(context);
}

@Override
public FieldArrayContext getOffSetContext() {
return in.getOffSetContext();
}

@Override
public void setImmediateXContentParent(XContentParser.Token token) {
in.setImmediateXContentParent(token);
}

@Override
public XContentParser.Token getImmediateXContentParent() {
return in.getImmediateXContentParent();
}

@Override
public boolean isImmediateParentAnArray() {
return in.isImmediateParentAnArray();
}
}

/**
Expand Down Expand Up @@ -141,6 +166,8 @@ private enum Scope {
private final SeqNoFieldMapper.SequenceIDFields seqID;
private final Set<String> fieldsAppliedFromTemplates;

private FieldArrayContext fieldArrayContext;

/**
* Fields that are copied from values of other fields via copy_to.
* This per-document state is needed since it is possible
Expand Down Expand Up @@ -460,6 +487,33 @@ public boolean isCopyToDestinationField(String name) {
return copyToFields.contains(name);
}

public void processArrayOffsets(DocumentParserContext context) throws IOException {
if (fieldArrayContext != null) {
fieldArrayContext.addToLuceneDocument(context);
}
}

public FieldArrayContext getOffSetContext() {
if (fieldArrayContext == null) {
fieldArrayContext = new FieldArrayContext();
}
return fieldArrayContext;
}

private XContentParser.Token lastSetToken;

public void setImmediateXContentParent(XContentParser.Token token) {
this.lastSetToken = token;
}

public XContentParser.Token getImmediateXContentParent() {
return lastSetToken;
}

public boolean isImmediateParentAnArray() {
return lastSetToken == XContentParser.Token.START_ARRAY;
}

/**
* Add a new mapper dynamically created while parsing.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.mapper;

import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.util.BitUtil;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.StreamInput;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

public class FieldArrayContext {

private final Map<String, Offsets> offsetsPerField = new HashMap<>();

void recordOffset(String field, String value) {
Offsets arrayOffsets = offsetsPerField.computeIfAbsent(field, k -> new Offsets());
int nextOffset = arrayOffsets.currentOffset++;
var offsets = arrayOffsets.valueToOffsets.computeIfAbsent(value, s -> new ArrayList<>(2));
offsets.add(nextOffset);
}

void recordNull(String field) {
Offsets arrayOffsets = offsetsPerField.computeIfAbsent(field, k -> new Offsets());
int nextOffset = arrayOffsets.currentOffset++;
arrayOffsets.nullValueOffsets.add(nextOffset);
}

void maybeRecordEmptyArray(String field) {
offsetsPerField.computeIfAbsent(field, k -> new Offsets());
}

void addToLuceneDocument(DocumentParserContext context) throws IOException {
for (var entry : offsetsPerField.entrySet()) {
var fieldName = entry.getKey();
var offset = entry.getValue();

int currentOrd = 0;
// This array allows to retain the original ordering of elements in leaf arrays and retain duplicates.
int[] offsetToOrd = new int[offset.currentOffset];
for (var offsetEntry : offset.valueToOffsets.entrySet()) {
for (var offsetAndLevel : offsetEntry.getValue()) {
offsetToOrd[offsetAndLevel] = currentOrd;
}
currentOrd++;
}
for (var nullOffset : offset.nullValueOffsets) {
offsetToOrd[nullOffset] = -1;
}

try (var streamOutput = new BytesStreamOutput()) {
// Could just use vint for array length, but this allows for decoding my_field: null as -1
streamOutput.writeVInt(BitUtil.zigZagEncode(offsetToOrd.length));
for (int ord : offsetToOrd) {
streamOutput.writeVInt(BitUtil.zigZagEncode(ord));
}
context.doc().add(new SortedDocValuesField(fieldName, streamOutput.bytes().toBytesRef()));
}
}
}

static int[] parseOffsetArray(StreamInput in) throws IOException {
int[] offsetToOrd = new int[BitUtil.zigZagDecode(in.readVInt())];
for (int i = 0; i < offsetToOrd.length; i++) {
offsetToOrd[i] = BitUtil.zigZagDecode(in.readVInt());
}
return offsetToOrd;
}

private static class Offsets {

int currentOffset;
// Need to use TreeMap here, so that we maintain the order in which each value (with offset) stored inserted,
// (which is in the same order the document gets parsed) so we store offsets in right order. This is the same
// order in what the values get stored in SortedSetDocValues.
final Map<String, List<Integer>> valueToOffsets = new TreeMap<>();
final List<Integer> nullValueOffsets = new ArrayList<>(2);

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -201,15 +201,15 @@ public void parse(DocumentParserContext context) throws IOException {
}
}

private void doParseMultiFields(DocumentParserContext context) throws IOException {
protected void doParseMultiFields(DocumentParserContext context) throws IOException {
context.path().add(leafName());
for (FieldMapper mapper : builderParams.multiFields.mappers) {
mapper.parse(context);
}
context.path().remove();
}

private static void throwIndexingWithScriptParam() {
protected static void throwIndexingWithScriptParam() {
throw new IllegalArgumentException("Cannot index data directly into a field with a [script] parameter");
}

Expand Down
Loading