Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/120835.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 120835
summary: Synthetic source doc values arrays encoding experiment 2
area: Mapping
type: enhancement
issues: []
3 changes: 3 additions & 0 deletions rest-api-spec/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,7 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task ->
task.skipTest("index/91_metrics_no_subobjects/Metrics object indexing with synthetic source", "_source.mode mapping attribute is no-op since 9.0.0")
task.skipTest("index/91_metrics_no_subobjects/Root without subobjects with synthetic source", "_source.mode mapping attribute is no-op since 9.0.0")
task.skipTest("logsdb/10_settings/routing path allowed in logs mode with routing on sort fields", "Unknown feature routing.logsb_route_on_sort_fields")
task.skipTest("indices.create/21_synthetic_source_stored/index param - field ordering", "Synthetic source keep arrays now stores leaf arrays natively")
task.skipTest("indices.create/21_synthetic_source_stored/field param - keep nested array", "Synthetic source keep arrays now stores leaf arrays natively")
task.skipTest("indices.create/21_synthetic_source_stored/field param - keep root array", "Synthetic source keep arrays now stores leaf arrays natively")
})
Original file line number Diff line number Diff line change
Expand Up @@ -1024,7 +1024,7 @@ index param - field ordering:
index: test

- length: { hits.hits.0._source: 4 }
- match: { hits.hits.0._source: { "a": "2", "b": [ { "bb": 100, "aa": 200 }, { "aa": 300, "bb": 400 } ], "c": [30, 20, 10], "d": [ { "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 } ] } }
- match: { hits.hits.0._source: { "a": "2", "b": [ { "bb": 100, "aa": 200 }, { "aa": 300, "bb": 400 } ], "c": ["30", "20", "10"], "d": [ { "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 } ] } }


---
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@

package org.elasticsearch.index.mapper;

import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Query;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.core.Nullable;
Expand All @@ -24,6 +26,8 @@
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
import org.elasticsearch.logging.LogManager;
import org.elasticsearch.logging.Logger;
import org.elasticsearch.plugins.internal.XContentMeteringParserDecorator;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.lookup.Source;
Expand All @@ -36,6 +40,7 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
Expand All @@ -53,6 +58,8 @@
*/
public final class DocumentParser {

private static final Logger LOGGER = LogManager.getLogger(DocumentParser.class);

public static final IndexVersion DYNAMICALLY_MAP_DENSE_VECTORS_INDEX_VERSION = IndexVersions.FIRST_DETACHED_INDEX_VERSION;
static final NodeFeature FIX_PARSING_SUBOBJECTS_FALSE_DYNAMIC_FALSE = new NodeFeature(
"mapper.fix_parsing_subobjects_false_dynamic_false"
Expand Down Expand Up @@ -148,7 +155,7 @@ private void internalParseDocument(MetadataFieldMapper[] metadataFieldsMappers,
}

executeIndexTimeScripts(context);

processArrayOffsets(context);
for (MetadataFieldMapper metadataMapper : metadataFieldsMappers) {
metadataMapper.postParse(context);
}
Expand All @@ -157,6 +164,41 @@ private void internalParseDocument(MetadataFieldMapper[] metadataFieldsMappers,
}
}

private static void processArrayOffsets(DocumentParserContext context) throws IOException {
var offsets = context.getOffSetsByField();
for (var entry : offsets.entrySet()) {
var fieldName = entry.getKey();
var offset = entry.getValue();
if (offset.valueToOffsets.isEmpty()) {
continue;
}

if (offset.currentOffset == 1 && offset.inArray == false) {
continue;
}

int ord = 0;
int[] offsetToOrd = new int[offset.currentOffset];
for (var offsetEntry : offset.valueToOffsets.entrySet()) {
for (var offsetAndLevel : offsetEntry.getValue()) {
offsetToOrd[offsetAndLevel] = ord;
}
ord++;
}

// TODO: remove later
LOGGER.info("values=" + offset.valueToOffsets);
LOGGER.info("offsetToOrd=" + Arrays.toString(offsetToOrd));

try (var streamOutput = new BytesStreamOutput()) {
// TODO: optimize
// This array allows to retain the original ordering of the leaf array and duplicate values.
streamOutput.writeVIntArray(offsetToOrd);
context.doc().add(new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef()));
}
}
}

private static void executeIndexTimeScripts(DocumentParserContext context) {
List<FieldMapper> indexTimeScriptMappers = context.mappingLookup().indexTimeScriptMappers();
if (indexTimeScriptMappers.isEmpty()) {
Expand Down Expand Up @@ -687,7 +729,7 @@ private static void parseNonDynamicArray(

// Check if we need to record the array source. This only applies to synthetic source.
boolean canRemoveSingleLeafElement = false;
if (context.canAddIgnoredField()) {
if (context.canAddIgnoredField() && (mapper != null && mapper.supportsStoringArraysNatively() == false)) {
Mapper.SourceKeepMode mode = Mapper.SourceKeepMode.NONE;
boolean objectWithFallbackSyntheticSource = false;
if (mapper instanceof ObjectMapper objectMapper) {
Expand Down Expand Up @@ -725,10 +767,13 @@ private static void parseNonDynamicArray(
// In synthetic source, if any array element requires storing its source as-is, it takes precedence over
// elements from regular source loading that are then skipped from the synthesized array source.
// To prevent this, we track that parsing sub-context is within array scope.
context = context.maybeCloneForArray(mapper);
if (mapper != null && mapper.supportsStoringArraysNatively() == false) {
context = context.maybeCloneForArray(mapper);
}

XContentParser parser = context.parser();
XContentParser.Token token;
context.setInArray(true);
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (token == XContentParser.Token.START_OBJECT) {
parseObject(context, lastFieldName);
Expand All @@ -743,6 +788,7 @@ private static void parseNonDynamicArray(
parseValue(context, lastFieldName);
}
}
context.setInArray(false);
postProcessDynamicArrayMapping(context, lastFieldName);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;

/**
* Context used when parsing incoming documents. Holds everything that is needed to parse a document as well as
Expand Down Expand Up @@ -84,6 +85,21 @@ public LuceneDocument doc() {
protected void addDoc(LuceneDocument doc) {
in.addDoc(doc);
}

@Override
public Map<String, Offsets> getOffSetsByField() {
return in.getOffSetsByField();
}

@Override
void recordOffset(String field, String value) {
in.recordOffset(field, value);
}

@Override
public void setInArray(boolean inArray) {
in.setInArray(inArray);
}
}

/**
Expand Down Expand Up @@ -134,6 +150,9 @@ private enum Scope {
private final SeqNoFieldMapper.SequenceIDFields seqID;
private final Set<String> fieldsAppliedFromTemplates;

private final Map<String, Offsets> offsetsPerField = new HashMap<>();
private boolean inArray;

/**
* Fields that are copied from values of other fields via copy_to.
* This per-document state is needed since it is possible
Expand Down Expand Up @@ -470,6 +489,30 @@ public Set<String> getCopyToFields() {
return copyToFields;
}

public static class Offsets {

public int currentOffset;
public boolean inArray;
public final Map<String, List<Integer>> valueToOffsets = new TreeMap<>();

}

public Map<String, Offsets> getOffSetsByField() {
return offsetsPerField;
}

void recordOffset(String field, String value) {
Offsets arrayOffsets = offsetsPerField.computeIfAbsent(field, k -> new Offsets());
int nextOffset = arrayOffsets.currentOffset++;
var offsets = arrayOffsets.valueToOffsets.computeIfAbsent(value, s -> new ArrayList<>());
offsets.add(nextOffset);
arrayOffsets.inArray = inArray;
}

public void setInArray(boolean inArray) {
this.inArray = inArray;
}

/**
* Add a new mapper dynamically created while parsing.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ public final class KeywordFieldMapper extends FieldMapper {
private static final Logger logger = LogManager.getLogger(KeywordFieldMapper.class);

public static final String CONTENT_TYPE = "keyword";
public static final String OFFSETS_FIELD_NAME_SUFFIX = ".offsets";

public static class Defaults {
public static final FieldType FIELD_TYPE;
Expand Down Expand Up @@ -182,14 +183,16 @@ public static final class Builder extends FieldMapper.DimensionBuilder {
private final IndexAnalyzers indexAnalyzers;
private final ScriptCompiler scriptCompiler;
private final IndexVersion indexCreatedVersion;
private final SourceKeepMode indexSourceKeepMode;

public Builder(final String name, final MappingParserContext mappingParserContext) {
this(
name,
mappingParserContext.getIndexAnalyzers(),
mappingParserContext.scriptCompiler(),
IGNORE_ABOVE_SETTING.get(mappingParserContext.getSettings()),
mappingParserContext.getIndexSettings().getIndexVersionCreated()
mappingParserContext.getIndexSettings().getIndexVersionCreated(),
mappingParserContext.getIndexSettings().sourceKeepMode()
);
}

Expand All @@ -198,7 +201,8 @@ public Builder(final String name, final MappingParserContext mappingParserContex
IndexAnalyzers indexAnalyzers,
ScriptCompiler scriptCompiler,
int ignoreAboveDefault,
IndexVersion indexCreatedVersion
IndexVersion indexCreatedVersion,
SourceKeepMode indexSourceKeepMode
) {
super(name);
this.indexAnalyzers = indexAnalyzers;
Expand Down Expand Up @@ -233,10 +237,11 @@ public Builder(final String name, final MappingParserContext mappingParserContex
throw new IllegalArgumentException("[ignore_above] must be positive, got [" + v + "]");
}
});
this.indexSourceKeepMode = indexSourceKeepMode;
}

public Builder(String name, IndexVersion indexCreatedVersion) {
this(name, null, ScriptCompiler.NONE, Integer.MAX_VALUE, indexCreatedVersion);
this(name, null, ScriptCompiler.NONE, Integer.MAX_VALUE, indexCreatedVersion, SourceKeepMode.NONE);
}

public Builder ignoreAbove(int ignoreAbove) {
Expand Down Expand Up @@ -370,13 +375,36 @@ public KeywordFieldMapper build(MapperBuilderContext context) {
}
super.hasScript = script.get() != null;
super.onScriptError = onScriptError.getValue();

var sourceKeepMode = this.sourceKeepMode.orElse(indexSourceKeepMode);
BinaryFieldMapper offsetsFieldMapper;
if (context.isSourceSynthetic()
&& sourceKeepMode == SourceKeepMode.ARRAYS
&& fieldtype.stored() == false
&& copyTo.copyToFields().isEmpty()
&& multiFieldsBuilder.hasMultiFields() == false) {
// Skip stored, we will be synthesizing from stored fields, no point to keep track of the offsets
// Skip copy_to, supporting that requires more work. However, copy_to usage is rare in metrics and logging use cases

// keep track of value offsets so that we can reconstruct arrays from doc values in order as was specified during indexing
// (if field is stored then there is no point of doing this)
offsetsFieldMapper = new BinaryFieldMapper.Builder(
context.buildFullName(leafName() + OFFSETS_FIELD_NAME_SUFFIX),
context.isSourceSynthetic()
).docValues(true).build(context);
} else {
offsetsFieldMapper = null;
}

return new KeywordFieldMapper(
leafName(),
fieldtype,
buildFieldType(context, fieldtype),
builderParams(this, context),
context.isSourceSynthetic(),
this
this,
offsetsFieldMapper,
indexSourceKeepMode
);
}
}
Expand Down Expand Up @@ -867,14 +895,18 @@ public boolean hasNormalizer() {
private final IndexAnalyzers indexAnalyzers;
private final int ignoreAboveDefault;
private final int ignoreAbove;
private final BinaryFieldMapper offsetsFieldMapper;
private final SourceKeepMode indexSourceKeepMode;

private KeywordFieldMapper(
String simpleName,
FieldType fieldType,
KeywordFieldType mappedFieldType,
BuilderParams builderParams,
boolean isSyntheticSource,
Builder builder
Builder builder,
BinaryFieldMapper offsetsFieldMapper,
SourceKeepMode indexSourceKeepMode
) {
super(simpleName, mappedFieldType, builderParams);
assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0;
Expand All @@ -891,6 +923,8 @@ private KeywordFieldMapper(
this.isSyntheticSource = isSyntheticSource;
this.ignoreAboveDefault = builder.ignoreAboveDefault;
this.ignoreAbove = builder.ignoreAbove.getValue();
this.offsetsFieldMapper = offsetsFieldMapper;
this.indexSourceKeepMode = indexSourceKeepMode;
}

@Override
Expand Down Expand Up @@ -967,6 +1001,9 @@ private void indexValue(DocumentParserContext context, String value) {
if (fieldType().hasDocValues() == false && fieldType.omitNorms()) {
context.addToFieldNames(fieldType().name());
}
if (offsetsFieldMapper != null) {
context.recordOffset(offsetsFieldMapper.fullPath(), value);
}
}

private static String normalizeValue(NamedAnalyzer normalizer, String field, String value) {
Expand Down Expand Up @@ -1008,9 +1045,9 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {

@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), indexAnalyzers, scriptCompiler, ignoreAboveDefault, indexCreatedVersion).dimension(
fieldType().isDimension()
).init(this);
return new Builder(leafName(), indexAnalyzers, scriptCompiler, ignoreAboveDefault, indexCreatedVersion, indexSourceKeepMode)
.dimension(fieldType().isDimension())
.init(this);
}

@Override
Expand Down Expand Up @@ -1063,7 +1100,8 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException {
}
});
} else if (hasDocValues) {
layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) {
String offsetsFullPath = offsetsFieldMapper != null ? offsetsFieldMapper.fullPath() : null;
layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFullPath) {

@Override
protected BytesRef convert(BytesRef value) {
Expand All @@ -1090,4 +1128,9 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException {

return new CompositeSyntheticFieldLoader(leafFieldName, fullFieldName, layers);
}

@Override
public boolean supportsStoringArraysNatively() {
return offsetsFieldMapper != null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -212,4 +212,8 @@ public static FieldType freezeAndDeduplicateFieldType(FieldType fieldType) {
* Defines how this mapper counts towards {@link MapperService#INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING}.
*/
public abstract int getTotalFieldsCount();

public boolean supportsStoringArraysNatively() {
return false;
}
}
Loading