Skip to content

Commit 012ac7f

Browse files
committed
Move offset parsing logic to DocumentParser
1 parent 7ed0857 commit 012ac7f

File tree

12 files changed

+93
-89
lines changed

12 files changed

+93
-89
lines changed

server/src/main/java/org/elasticsearch/index/mapper/AbstractGeometryFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ public boolean ignoreZValue() {
265265
}
266266

267267
@Override
268-
public final boolean parsesArrayValue(DocumentParserContext context) {
268+
public final boolean parsesArrayValue() {
269269
return true;
270270
}
271271
}

server/src/main/java/org/elasticsearch/index/mapper/ArraySourceValueFetcher.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
*
2626
* This class differs from {@link SourceValueFetcher} in that it directly handles
2727
* array values in parsing. Field types should use this class if their corresponding
28-
* mapper returns true for {@link FieldMapper#parsesArrayValue(DocumentParserContext)}.
28+
* mapper returns true for {@link FieldMapper#parsesArrayValue()}.
2929
*/
3030
public abstract class ArraySourceValueFetcher implements ValueFetcher {
3131
private final Set<String> sourcePaths;

server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ public CompletionFieldType fieldType() {
361361
}
362362

363363
@Override
364-
public boolean parsesArrayValue(DocumentParserContext context) {
364+
public boolean parsesArrayValue() {
365365
return true;
366366
}
367367

server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -373,9 +373,11 @@ private static void innerParseObject(DocumentParserContext context) throws IOExc
373373
}
374374
break;
375375
case START_OBJECT:
376+
context.setImmediateXContentParent(token);
376377
parseObject(context, currentFieldName);
377378
break;
378379
case START_ARRAY:
380+
context.setImmediateXContentParent(token);
379381
parseArray(context, currentFieldName);
380382
break;
381383
case VALUE_NULL:
@@ -456,7 +458,9 @@ static void parseObjectOrField(DocumentParserContext context, Mapper mapper) thr
456458
if (context.canAddIgnoredField()
457459
&& (fieldMapper.syntheticSourceMode() == FieldMapper.SyntheticSourceMode.FALLBACK
458460
|| sourceKeepMode == Mapper.SourceKeepMode.ALL
459-
|| (sourceKeepMode == Mapper.SourceKeepMode.ARRAYS && context.inArrayScope())
461+
|| (sourceKeepMode == Mapper.SourceKeepMode.ARRAYS
462+
&& context.inArrayScope()
463+
&& mapper.supportStoringArrayOffsets(context) == false)
460464
|| (context.isWithinCopyTo() == false && context.isCopyToDestinationField(mapper.fullPath())))) {
461465
context = context.addIgnoredFieldFromContext(
462466
IgnoredSourceFieldMapper.NameValue.fromContext(context, fieldMapper.fullPath(), null)
@@ -613,7 +617,7 @@ private static void parseArray(DocumentParserContext context, String lastFieldNa
613617
// There is a concrete mapper for this field already. Need to check if the mapper
614618
// expects an array, if so we pass the context straight to the mapper and if not
615619
// we serialize the array components
616-
if (parsesArrayValue(mapper, context)) {
620+
if (parsesArrayValue(mapper)) {
617621
parseObjectOrField(context, mapper);
618622
} else {
619623
parseNonDynamicArray(context, mapper, lastFieldName, lastFieldName);
@@ -661,7 +665,7 @@ private static void parseArrayDynamic(DocumentParserContext context, String curr
661665
}
662666
parseNonDynamicArray(context, objectMapperFromTemplate, currentFieldName, currentFieldName);
663667
} else {
664-
if (parsesArrayValue(objectMapperFromTemplate, context)) {
668+
if (parsesArrayValue(objectMapperFromTemplate)) {
665669
if (context.addDynamicMapper(objectMapperFromTemplate) == false) {
666670
context.parser().skipChildren();
667671
return;
@@ -675,8 +679,8 @@ private static void parseArrayDynamic(DocumentParserContext context, String curr
675679
}
676680
}
677681

678-
private static boolean parsesArrayValue(Mapper mapper, DocumentParserContext context) {
679-
return mapper instanceof FieldMapper && ((FieldMapper) mapper).parsesArrayValue(context);
682+
private static boolean parsesArrayValue(Mapper mapper) {
683+
return mapper instanceof FieldMapper && ((FieldMapper) mapper).parsesArrayValue();
680684
}
681685

682686
private static void parseNonDynamicArray(
@@ -685,11 +689,12 @@ private static void parseNonDynamicArray(
685689
final String lastFieldName,
686690
String arrayFieldName
687691
) throws IOException {
692+
boolean supportStoringArrayOffsets = mapper != null && mapper.supportStoringArrayOffsets(context);
688693
String fullPath = context.path().pathAsText(arrayFieldName);
689694

690695
// Check if we need to record the array source. This only applies to synthetic source.
691696
boolean canRemoveSingleLeafElement = false;
692-
if (context.canAddIgnoredField() && (parsesArrayValue(mapper, context) == false)) {
697+
if (context.canAddIgnoredField() && (parsesArrayValue(mapper) == false && supportStoringArrayOffsets == false)) {
693698
Mapper.SourceKeepMode mode = Mapper.SourceKeepMode.NONE;
694699
boolean objectWithFallbackSyntheticSource = false;
695700
if (mapper instanceof ObjectMapper objectMapper) {
@@ -726,7 +731,7 @@ private static void parseNonDynamicArray(
726731
}
727732
}
728733

729-
if (parsesArrayValue(mapper, context) == false) {
734+
if (parsesArrayValue(mapper) == false && supportStoringArrayOffsets == false) {
730735
// In synthetic source, if any array element requires storing its source as-is, it takes precedence over
731736
// elements from regular source loading that are then skipped from the synthesized array source.
732737
// To prevent this, we track that parsing sub-context is within array scope.
@@ -735,14 +740,19 @@ private static void parseNonDynamicArray(
735740

736741
XContentParser parser = context.parser();
737742
XContentParser.Token token;
743+
XContentParser.Token previousToken = parser.currentToken();
738744
int elements = 0;
739745
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
740746
if (token == XContentParser.Token.START_OBJECT) {
747+
context.setImmediateXContentParent(token);
741748
elements = 2;
742749
parseObject(context, lastFieldName);
743750
} else if (token == XContentParser.Token.START_ARRAY) {
751+
var prev = context.getImmediateXContentParent();
752+
context.setImmediateXContentParent(token);
744753
elements = 2;
745754
parseArray(context, lastFieldName);
755+
context.setImmediateXContentParent(prev);
746756
} else if (token == XContentParser.Token.VALUE_NULL) {
747757
elements++;
748758
parseNullValue(context, lastFieldName);
@@ -753,7 +763,12 @@ private static void parseNonDynamicArray(
753763
elements++;
754764
parseValue(context, lastFieldName);
755765
}
766+
previousToken = token;
756767
}
768+
if (mapper != null && previousToken == XContentParser.Token.START_ARRAY) {
769+
mapper.handleEmptyArray(context);
770+
}
771+
context.setImmediateXContentParent(token);
757772
if (elements <= 1 && canRemoveSingleLeafElement) {
758773
context.removeLastIgnoredField(fullPath);
759774
}

server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,21 @@ public void processArrayOffsets(DocumentParserContext context) throws IOExceptio
9494
public ArrayOffsetContext getOffSetContext() {
9595
return in.getOffSetContext();
9696
}
97+
98+
@Override
99+
public void setImmediateXContentParent(XContentParser.Token token) {
100+
in.setImmediateXContentParent(token);
101+
}
102+
103+
@Override
104+
public XContentParser.Token getImmediateXContentParent() {
105+
return in.getImmediateXContentParent();
106+
}
107+
108+
@Override
109+
public boolean isImmediateParentAnArray() {
110+
return in.isImmediateParentAnArray();
111+
}
97112
}
98113

99114
/**
@@ -495,6 +510,20 @@ public ArrayOffsetContext getOffSetContext() {
495510
return arrayOffsetContext;
496511
}
497512

513+
private XContentParser.Token token;
514+
515+
public void setImmediateXContentParent(XContentParser.Token token) {
516+
this.token = token;
517+
}
518+
519+
public XContentParser.Token getImmediateXContentParent() {
520+
return token;
521+
}
522+
523+
public boolean isImmediateParentAnArray() {
524+
return token == XContentParser.Token.START_ARRAY;
525+
}
526+
498527
/**
499528
* Add a new mapper dynamically created while parsing.
500529
*

server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ public boolean ignoreMalformed() {
166166
* whole array to the mapper. If false, the array is split into individual values
167167
* and each value is passed to the mapper for parsing.
168168
*/
169-
public boolean parsesArrayValue(DocumentParserContext context) {
169+
public boolean parsesArrayValue() {
170170
return false;
171171
}
172172

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 19 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@
6363
import org.elasticsearch.search.runtime.StringScriptFieldTermQuery;
6464
import org.elasticsearch.search.runtime.StringScriptFieldWildcardQuery;
6565
import org.elasticsearch.xcontent.XContentBuilder;
66-
import org.elasticsearch.xcontent.XContentParser;
6766

6867
import java.io.IOException;
6968
import java.io.UncheckedIOException;
@@ -931,61 +930,32 @@ public KeywordFieldType fieldType() {
931930
return (KeywordFieldType) super.fieldType();
932931
}
933932

934-
@Override
935-
public boolean parsesArrayValue(DocumentParserContext context) {
936-
// Only if offsets need to be recorded/stored and if current content hasn't been recorded yet.
937-
// (for example if parent object or object array got stored in ignored source)
938-
return offsetsFieldName != null && context.getRecordedSource() == false;
939-
}
940-
941-
@Override
942-
public void parse(DocumentParserContext context) throws IOException {
933+
public boolean supportStoringArrayOffsets(DocumentParserContext context) {
943934
if (offsetsFieldName != null) {
944-
if (builderParams.hasScript()) {
945-
throwIndexingWithScriptParam();
946-
}
947-
948-
if (context.parser().currentToken() == XContentParser.Token.START_ARRAY) {
949-
parseArray(context);
950-
} else if (context.parser().currentToken().isValue() || context.parser().currentToken() == XContentParser.Token.VALUE_NULL) {
951-
final String value = context.parser().textOrNull();
952-
indexValue(context, value == null ? fieldType().nullValue : value);
953-
} else {
954-
throw new IllegalArgumentException("Encountered unexpected token [" + context.parser().currentToken() + "].");
955-
}
935+
return true;
956936
} else {
957-
super.parse(context);
937+
return false;
958938
}
959939
}
960940

961-
protected void parseCreateField(DocumentParserContext context) throws IOException {
962-
final String value = context.parser().textOrNull();
963-
indexValue(context, value == null ? fieldType().nullValue : value);
941+
public void handleEmptyArray(DocumentParserContext context) {
942+
if (offsetsFieldName != null && context.isImmediateParentAnArray() && context.getRecordedSource() == false) {
943+
context.getOffSetContext().maybeRecordEmptyArray(offsetsFieldName);
944+
}
964945
}
965946

966-
private void parseArray(DocumentParserContext context) throws IOException {
967-
XContentParser parser = context.parser();
968-
while (true) {
969-
XContentParser.Token token = parser.nextToken();
970-
if (token == XContentParser.Token.END_ARRAY) {
971-
context.getOffSetContext().maybeRecordEmptyArray(offsetsFieldName);
972-
return;
973-
}
974-
if (token.isValue() || token == XContentParser.Token.VALUE_NULL) {
975-
String value = context.parser().textOrNull();
976-
if (value == null) {
977-
value = fieldType().nullValue;
978-
}
979-
boolean indexed = indexValue(context, value);
980-
if (indexed) {
981-
context.getOffSetContext().recordOffset(offsetsFieldName, value);
982-
} else if (value == null) {
983-
context.getOffSetContext().recordNull(offsetsFieldName);
984-
}
985-
} else if (token == XContentParser.Token.START_ARRAY) {
986-
parseArray(context);
987-
} else {
988-
throw new IllegalArgumentException("Encountered unexpected token [" + token + "].");
947+
protected void parseCreateField(DocumentParserContext context) throws IOException {
948+
String value = context.parser().textOrNull();
949+
if (value == null) {
950+
value = fieldType().nullValue;
951+
}
952+
953+
boolean indexed = indexValue(context, value);
954+
if (offsetsFieldName != null && context.isImmediateParentAnArray() && context.getRecordedSource() == false) {
955+
if (indexed) {
956+
context.getOffSetContext().recordOffset(offsetsFieldName, value);
957+
} else if (value == null) {
958+
context.getOffSetContext().recordNull(offsetsFieldName);
989959
}
990960
}
991961
}

server/src/main/java/org/elasticsearch/index/mapper/Mapper.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,4 +212,11 @@ public static FieldType freezeAndDeduplicateFieldType(FieldType fieldType) {
212212
* Defines how this mapper counts towards {@link MapperService#INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING}.
213213
*/
214214
public abstract int getTotalFieldsCount();
215+
216+
public boolean supportStoringArrayOffsets(DocumentParserContext context) {
217+
return false;
218+
}
219+
220+
public void handleEmptyArray(DocumentParserContext context) {
221+
}
215222
}

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2186,7 +2186,7 @@ public DenseVectorFieldType fieldType() {
21862186
}
21872187

21882188
@Override
2189-
public boolean parsesArrayValue(DocumentParserContext context) {
2189+
public boolean parsesArrayValue() {
21902190
return true;
21912191
}
21922192

server/src/test/java/org/elasticsearch/index/mapper/SyntheticSourceNativeArrayIntegrationTests.java

Lines changed: 9 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,15 @@
99

1010
package org.elasticsearch.index.mapper;
1111

12+
import org.apache.lucene.index.DocValuesType;
1213
import org.apache.lucene.index.FieldInfo;
14+
import org.apache.lucene.index.FieldInfos;
1315
import org.apache.lucene.index.IndexableField;
1416
import org.apache.lucene.index.LeafReader;
15-
import org.apache.lucene.util.BitUtil;
1617
import org.elasticsearch.action.admin.indices.forcemerge.ForceMergeRequest;
1718
import org.elasticsearch.action.index.IndexRequest;
1819
import org.elasticsearch.action.search.SearchRequest;
1920
import org.elasticsearch.action.support.WriteRequest;
20-
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
2121
import org.elasticsearch.common.settings.Settings;
2222
import org.elasticsearch.index.query.IdsQueryBuilder;
2323
import org.elasticsearch.test.ESSingleNodeTestCase;
@@ -36,7 +36,6 @@
3636
import static org.hamcrest.Matchers.empty;
3737
import static org.hamcrest.Matchers.equalTo;
3838
import static org.hamcrest.Matchers.hasKey;
39-
import static org.hamcrest.Matchers.notNullValue;
4039
import static org.hamcrest.Matchers.nullValue;
4140

4241
public class SyntheticSourceNativeArrayIntegrationTests extends ESSingleNodeTestCase {
@@ -78,7 +77,7 @@ public void testSynthesizeArrayIgnoreAbove() throws Exception {
7877
new Object[] { "123", "1234", "12345" },
7978
new Object[] { null, null, null, "blabla" },
8079
new Object[] { "1", "2", "3", "blabla" } };
81-
verifySyntheticArray(arrayValues, mapping, "_id", "_recovery_source", "field._original");
80+
verifySyntheticArray(arrayValues, mapping, 4, "_id", "_recovery_source", "field._original");
8281
}
8382

8483
public void testSynthesizeObjectArray() throws Exception {
@@ -101,10 +100,11 @@ private void verifySyntheticArray(Object[][] arrays) throws IOException {
101100
.endObject()
102101
.endObject()
103102
.endObject();
104-
verifySyntheticArray(arrays, mapping, "_id", "_recovery_source");
103+
verifySyntheticArray(arrays, mapping, null, "_id", "_recovery_source");
105104
}
106105

107-
private void verifySyntheticArray(Object[][] arrays, XContentBuilder mapping, String... expectedStoredFields) throws IOException {
106+
private void verifySyntheticArray(Object[][] arrays, XContentBuilder mapping, Integer ignoreAbove, String... expectedStoredFields)
107+
throws IOException {
108108
var indexService = createIndex(
109109
"test-index",
110110
Settings.builder().put("index.mapping.source.mode", "synthetic").put("index.mapping.synthetic_source_keep", "arrays").build(),
@@ -150,33 +150,16 @@ private void verifySyntheticArray(Object[][] arrays, XContentBuilder mapping, St
150150
}
151151
}
152152

153-
indexService.getShard(0).forceMerge(new ForceMergeRequest("test-index").maxNumSegments(1));
154153
try (var searcher = indexService.getShard(0).acquireSearcher(getTestName())) {
155154
var reader = searcher.getDirectoryReader();
156155
for (int i = 0; i < arrays.length; i++) {
157156
var document = reader.storedFields().document(i);
158157
// Verify that there is no ignored source:
159158
Set<String> storedFieldNames = new LinkedHashSet<>(document.getFields().stream().map(IndexableField::name).toList());
160159
assertThat(storedFieldNames, contains(expectedStoredFields));
161-
162-
// Verify that there is an offset field:
163-
var binaryDocValues = reader.leaves().get(0).reader().getBinaryDocValues("field.offsets");
164-
boolean match = binaryDocValues.advanceExact(i);
165-
if (arrays[i] == null) {
166-
assertThat(match, equalTo(false));
167-
} else {
168-
assertThat(match, equalTo(true));
169-
var ref = binaryDocValues.binaryValue();
170-
try (ByteArrayStreamInput scratch = new ByteArrayStreamInput()) {
171-
scratch.reset(ref.bytes, ref.offset, ref.length);
172-
int[] offsets = new int[BitUtil.zigZagDecode(scratch.readVInt())];
173-
for (int j = 0; j < offsets.length; j++) {
174-
offsets[j] = BitUtil.zigZagDecode(scratch.readVInt());
175-
}
176-
assertThat(offsets, notNullValue());
177-
}
178-
}
179160
}
161+
var fieldInfo = FieldInfos.getMergedFieldInfos(reader).fieldInfo("field.offsets");
162+
assertThat(fieldInfo.getDocValuesType(), equalTo(DocValuesType.BINARY));
180163
}
181164
}
182165

@@ -236,7 +219,7 @@ private void verifySyntheticObjectArray(List<List<Object[]>> documents) throws I
236219
var reader = searcher.getDirectoryReader();
237220
for (int i = 0; i < documents.size(); i++) {
238221
var document = reader.storedFields().document(i);
239-
// Verify that there is no ignored source:
222+
// Verify that there is ignored source because of leaf array being wrapped by object array:
240223
List<String> storedFieldNames = document.getFields().stream().map(IndexableField::name).toList();
241224
assertThat(storedFieldNames, contains("_id", "_recovery_source", "_ignored_source"));
242225

0 commit comments

Comments
 (0)