Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/127796.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 127796
summary: Do not respect synthetic_source_keep=arrays if type parses arrays
area: Mapping
type: enhancement
issues:
- 126155
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ static void parseObjectOrField(DocumentParserContext context, Mapper mapper) thr
if (context.canAddIgnoredField()
&& (fieldMapper.syntheticSourceMode() == FieldMapper.SyntheticSourceMode.FALLBACK
|| sourceKeepMode == Mapper.SourceKeepMode.ALL
|| (sourceKeepMode == Mapper.SourceKeepMode.ARRAYS && context.inArrayScope())
|| (sourceKeepMode == Mapper.SourceKeepMode.ARRAYS && context.inArrayScope() && parsesArrayValue(mapper) == false)
Copy link
Contributor

@lkts lkts May 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a correct fix in my opinion. But now that you mention offsets i actually wonder how does this work if offsets are enabled. It seems like (sourceKeepMode == Mapper.SourceKeepMode.ARRAYS && context.inArrayScope() is true and we'll proceed with storing the value in ignored source. But that seems wrong since we should be using offsets?

Copy link
Contributor Author

@parkertimmins parkertimmins May 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we enable offsets, parsesArrayValue will still be true for geo_point, and we won't use ignored source, right? I'm not totally sure if this is safe though. Maybe by adding offsets, geo_point will be subject to the same issue that other types have when they are in an array context. In which case we would want to use ignored_source.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, i was asking about existing fields that implement offsets logic like ip. Does it not have the same problem?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think existing fields with offsets do have this issue, and need to be use stored values. For example, if

|| (sourceKeepMode == Mapper.SourceKeepMode.ARRAYS && context.inArrayScope() && parsesArrayValue(mapper) == false)
is commented out.

The followings:


curl -XPUT localhost:9200/idx7 -H 'Content-Type: application/json' -d'
{
  "settings": {
    "index": {
      "mapping": {
        "source": {
          "mode": "synthetic"
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "obj": {
        "properties": {
          "str": { 
            "synthetic_source_keep": "arrays",
            "type": "keyword"
          }
        }
      }
    }
  }
}
'

curl -XPOST localhost:9200/idx7/_doc -H 'Content-Type: application/json' -d'
{
    "obj":  [
        { 
            "str": ["cat", "dog"]
        },
        { 
            "str": "cat"
        }
    ]
}
'

Results in:

  "_source": {
          "obj": {
            "str": [
              "cat",
              "dog"
            ]
          }
        }

Meaning we do need the stored source. Or is that not what you're talking about?

|| (context.isWithinCopyTo() == false && context.isCopyToDestinationField(mapper.fullPath())))) {
context = context.addIgnoredFieldFromContext(
IgnoredSourceFieldMapper.NameValue.fromContext(context, fieldMapper.fullPath(), null)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import org.elasticsearch.index.mapper.MappedFieldType;

import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
Expand All @@ -30,10 +29,7 @@ public GeoPointFieldBlockLoaderTests(BlockLoaderTestCase.Params params) {

@Override
@SuppressWarnings("unchecked")
protected Object expected(Map<String, Object> fieldMapping, Object value, TestContext testContext) {
var extractedFieldValues = (ExtractedFieldValues) value;
var values = extractedFieldValues.values();

protected Object expected(Map<String, Object> fieldMapping, Object values, TestContext testContext) {
var nullValue = switch (fieldMapping.get("null_value")) {
case String s -> convert(s, null, false);
case null -> null;
Expand Down Expand Up @@ -75,9 +71,6 @@ protected Object expected(Map<String, Object> fieldMapping, Object value, TestCo
if (syntheticSourceKeep.equals("all")) {
return exactValuesFromSource(values, nullValue, false);
}
if (syntheticSourceKeep.equals("arrays") && extractedFieldValues.documentHasObjectArrays()) {
return exactValuesFromSource(values, nullValue, false);
}

// synthetic source and doc_values are present
if (hasDocValues(fieldMapping, true)) {
Expand Down Expand Up @@ -112,61 +105,6 @@ private Object exactValuesFromSource(Object value, GeoPoint nullValue, boolean n
return maybeFoldList(resultList);
}

private record ExtractedFieldValues(Object values, boolean documentHasObjectArrays) {}

@Override
protected Object getFieldValue(Map<String, Object> document, String fieldName) {
var extracted = new ArrayList<>();
var documentHasObjectArrays = processLevel(document, fieldName, extracted, false);

if (extracted.size() == 1) {
return new ExtractedFieldValues(extracted.get(0), documentHasObjectArrays);
}

return new ExtractedFieldValues(extracted, documentHasObjectArrays);
}

@SuppressWarnings("unchecked")
private boolean processLevel(Map<String, Object> level, String field, ArrayList<Object> extracted, boolean documentHasObjectArrays) {
if (field.contains(".") == false) {
var value = level.get(field);
processLeafLevel(value, extracted);
return documentHasObjectArrays;
}

var nameInLevel = field.split("\\.")[0];
var entry = level.get(nameInLevel);
if (entry instanceof Map<?, ?> m) {
return processLevel((Map<String, Object>) m, field.substring(field.indexOf('.') + 1), extracted, documentHasObjectArrays);
}
if (entry instanceof List<?> l) {
for (var object : l) {
processLevel((Map<String, Object>) object, field.substring(field.indexOf('.') + 1), extracted, true);
}
return true;
}

assert false : "unexpected document structure";
return false;
}

private void processLeafLevel(Object value, ArrayList<Object> extracted) {
if (value instanceof List<?> l) {
if (l.size() > 0 && l.get(0) instanceof Double) {
// this must be a single point in array form
// we'll put it into a different form here to make our lives a bit easier while implementing `expected`
extracted.add(Map.of("type", "point", "coordinates", l));
} else {
// this is actually an array of points but there could still be points in array form inside
for (var arrayValue : l) {
processLeafLevel(arrayValue, extracted);
}
}
} else {
extracted.add(value);
}
}

@SuppressWarnings("unchecked")
private GeoPoint convert(Object value, GeoPoint nullValue, boolean needsMultifieldAdjustment) {
if (value == null) {
Expand Down
Loading