Skip to content

Commit 4ab32ed

Browse files
committed
Introduce FallbackSyntheticSourceBlockLoader and apply it to keyword fields (elastic#119546)
(cherry picked from commit e885da1) # Conflicts: # server/src/test/java/org/elasticsearch/index/mapper/blockloader/KeywordFieldBlockLoaderTests.java # test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java
1 parent c9f0d93 commit 4ab32ed

File tree

9 files changed

+821
-6
lines changed

9 files changed

+821
-6
lines changed

docs/changelog/119546.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 119546
2+
summary: Introduce `FallbackSyntheticSourceBlockLoader` and apply it to keyword fields
3+
area: Mapping
4+
type: enhancement
5+
issues: []
Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.mapper;
11+
12+
import org.apache.lucene.index.LeafReaderContext;
13+
import org.apache.lucene.index.SortedSetDocValues;
14+
import org.elasticsearch.search.fetch.StoredFieldsSpec;
15+
import org.elasticsearch.xcontent.XContentParser;
16+
import org.elasticsearch.xcontent.XContentParserConfiguration;
17+
18+
import java.io.IOException;
19+
import java.util.ArrayList;
20+
import java.util.HashMap;
21+
import java.util.HashSet;
22+
import java.util.List;
23+
import java.util.Map;
24+
import java.util.Optional;
25+
import java.util.Set;
26+
27+
/**
28+
* Block loader for fields that use fallback synthetic source implementation.
29+
* <br>
30+
* Usually fields have doc_values or stored fields and block loaders use them directly. In some cases neither is available
31+
* and we would fall back to (potentially synthetic) _source. However, in case of synthetic source, there is actually no need to
32+
* construct the entire _source. We know that there is no doc_values and stored fields, and therefore we will be using fallback synthetic
33+
* source. That is equivalent to just reading _ignored_source stored field directly and doing an in-place synthetic source just
34+
* for this field.
35+
* <br>
36+
* See {@link IgnoredSourceFieldMapper}.
37+
*/
38+
public abstract class FallbackSyntheticSourceBlockLoader implements BlockLoader {
39+
private final Reader<?> reader;
40+
private final String fieldName;
41+
42+
protected FallbackSyntheticSourceBlockLoader(Reader<?> reader, String fieldName) {
43+
this.reader = reader;
44+
this.fieldName = fieldName;
45+
}
46+
47+
@Override
48+
public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws IOException {
49+
return null;
50+
}
51+
52+
@Override
53+
public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException {
54+
return new IgnoredSourceRowStrideReader<>(fieldName, reader);
55+
}
56+
57+
@Override
58+
public StoredFieldsSpec rowStrideStoredFieldSpec() {
59+
return new StoredFieldsSpec(false, false, Set.of(IgnoredSourceFieldMapper.NAME));
60+
}
61+
62+
@Override
63+
public boolean supportsOrdinals() {
64+
return false;
65+
}
66+
67+
@Override
68+
public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException {
69+
throw new UnsupportedOperationException();
70+
}
71+
72+
private record IgnoredSourceRowStrideReader<T>(String fieldName, Reader<T> reader) implements RowStrideReader {
73+
@Override
74+
public void read(int docId, StoredFields storedFields, Builder builder) throws IOException {
75+
var ignoredSource = storedFields.storedFields().get(IgnoredSourceFieldMapper.NAME);
76+
if (ignoredSource == null) {
77+
return;
78+
}
79+
80+
Map<String, List<IgnoredSourceFieldMapper.NameValue>> valuesForFieldAndParents = new HashMap<>();
81+
82+
// Contains name of the field and all its parents
83+
Set<String> fieldNames = new HashSet<>() {
84+
{
85+
add("_doc");
86+
}
87+
};
88+
89+
var current = new StringBuilder();
90+
for (String part : fieldName.split("\\.")) {
91+
if (current.isEmpty() == false) {
92+
current.append('.');
93+
}
94+
current.append(part);
95+
fieldNames.add(current.toString());
96+
}
97+
98+
for (Object value : ignoredSource) {
99+
IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(value);
100+
if (fieldNames.contains(nameValue.name())) {
101+
valuesForFieldAndParents.computeIfAbsent(nameValue.name(), k -> new ArrayList<>()).add(nameValue);
102+
}
103+
}
104+
105+
// TODO figure out how to handle XContentDataHelper#voidValue()
106+
107+
var blockValues = new ArrayList<T>();
108+
109+
var leafFieldValue = valuesForFieldAndParents.get(fieldName);
110+
if (leafFieldValue != null) {
111+
readFromFieldValue(leafFieldValue, blockValues);
112+
} else {
113+
readFromParentValue(valuesForFieldAndParents, blockValues);
114+
}
115+
116+
if (blockValues.isEmpty() == false) {
117+
if (blockValues.size() > 1) {
118+
builder.beginPositionEntry();
119+
}
120+
121+
reader.writeToBlock(blockValues, builder);
122+
123+
if (blockValues.size() > 1) {
124+
builder.endPositionEntry();
125+
}
126+
} else {
127+
builder.appendNull();
128+
}
129+
}
130+
131+
private void readFromFieldValue(List<IgnoredSourceFieldMapper.NameValue> nameValues, List<T> blockValues) throws IOException {
132+
if (nameValues.isEmpty()) {
133+
return;
134+
}
135+
136+
for (var nameValue : nameValues) {
137+
// Leaf field is stored directly (not as a part of a parent object), let's try to decode it.
138+
Optional<Object> singleValue = XContentDataHelper.decode(nameValue.value());
139+
if (singleValue.isPresent()) {
140+
reader.convertValue(singleValue.get(), blockValues);
141+
continue;
142+
}
143+
144+
// We have a value for this field but it's an array or an object
145+
var type = XContentDataHelper.decodeType(nameValue.value());
146+
assert type.isPresent();
147+
148+
try (
149+
XContentParser parser = type.get()
150+
.xContent()
151+
.createParser(
152+
XContentParserConfiguration.EMPTY,
153+
nameValue.value().bytes,
154+
nameValue.value().offset + 1,
155+
nameValue.value().length - 1
156+
)
157+
) {
158+
parser.nextToken();
159+
parseWithReader(parser, blockValues);
160+
}
161+
}
162+
}
163+
164+
private void readFromParentValue(
165+
Map<String, List<IgnoredSourceFieldMapper.NameValue>> valuesForFieldAndParents,
166+
List<T> blockValues
167+
) throws IOException {
168+
if (valuesForFieldAndParents.isEmpty()) {
169+
return;
170+
}
171+
172+
// If a parent object is stored at a particular level its children won't be stored.
173+
// So we should only ever have one parent here.
174+
assert valuesForFieldAndParents.size() == 1 : "_ignored_source field contains multiple levels of the same object";
175+
var parentValues = valuesForFieldAndParents.values().iterator().next();
176+
177+
for (var nameValue : parentValues) {
178+
parseFieldFromParent(nameValue, blockValues);
179+
}
180+
}
181+
182+
private void parseFieldFromParent(IgnoredSourceFieldMapper.NameValue nameValue, List<T> blockValues) throws IOException {
183+
var type = XContentDataHelper.decodeType(nameValue.value());
184+
assert type.isPresent();
185+
186+
String nameAtThisLevel = fieldName.substring(nameValue.name().length() + 1);
187+
var filterParserConfig = XContentParserConfiguration.EMPTY.withFiltering(null, Set.of(nameAtThisLevel), Set.of(), true);
188+
try (
189+
XContentParser parser = type.get()
190+
.xContent()
191+
.createParser(filterParserConfig, nameValue.value().bytes, nameValue.value().offset + 1, nameValue.value().length - 1)
192+
) {
193+
parser.nextToken();
194+
var fieldNameInParser = new StringBuilder(nameValue.name());
195+
while (true) {
196+
if (parser.currentToken() == XContentParser.Token.FIELD_NAME) {
197+
fieldNameInParser.append('.').append(parser.currentName());
198+
if (fieldNameInParser.toString().equals(fieldName)) {
199+
parser.nextToken();
200+
break;
201+
}
202+
}
203+
parser.nextToken();
204+
}
205+
parseWithReader(parser, blockValues);
206+
}
207+
}
208+
209+
private void parseWithReader(XContentParser parser, List<T> blockValues) throws IOException {
210+
if (parser.currentToken() == XContentParser.Token.START_ARRAY) {
211+
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
212+
reader.parse(parser, blockValues);
213+
}
214+
return;
215+
}
216+
217+
reader.parse(parser, blockValues);
218+
}
219+
220+
@Override
221+
public boolean canReuse(int startingDocID) {
222+
return true;
223+
}
224+
}
225+
226+
/**
227+
* Field-specific implementation that converts data stored in _ignored_source field to block loader values.
228+
* @param <T>
229+
*/
230+
public interface Reader<T> {
231+
/**
232+
* Converts a raw stored value for this field to a value in a format suitable for block loader and adds it to the provided
233+
* accumulator.
234+
* @param value raw decoded value from _ignored_source field (synthetic _source value)
235+
* @param accumulator list containing the result of conversion
236+
*/
237+
void convertValue(Object value, List<T> accumulator);
238+
239+
/**
240+
* Parses one or more complex values using a provided parser and adds them to the provided accumulator.
241+
* @param parser parser of a value from _ignored_source field (synthetic _source value)
242+
* @param accumulator list containing the results of parsing
243+
*/
244+
void parse(XContentParser parser, List<T> accumulator) throws IOException;
245+
246+
void writeToBlock(List<T> values, Builder blockBuilder);
247+
}
248+
249+
public abstract static class ReaderWithNullValueSupport<T> implements Reader<T> {
250+
private final T nullValue;
251+
252+
public ReaderWithNullValueSupport(T nullValue) {
253+
this.nullValue = nullValue;
254+
}
255+
256+
@Override
257+
public void parse(XContentParser parser, List<T> accumulator) throws IOException {
258+
if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
259+
if (nullValue != null) {
260+
convertValue(nullValue, accumulator);
261+
}
262+
return;
263+
}
264+
265+
parseNonNullValue(parser, accumulator);
266+
}
267+
268+
abstract void parseNonNullValue(XContentParser parser, List<T> accumulator) throws IOException;
269+
}
270+
}

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,15 @@
6565
import org.elasticsearch.search.runtime.StringScriptFieldTermQuery;
6666
import org.elasticsearch.search.runtime.StringScriptFieldWildcardQuery;
6767
import org.elasticsearch.xcontent.XContentBuilder;
68+
import org.elasticsearch.xcontent.XContentParser;
6869

6970
import java.io.IOException;
7071
import java.io.UncheckedIOException;
7172
import java.util.ArrayList;
7273
import java.util.Arrays;
7374
import java.util.Collection;
7475
import java.util.Collections;
76+
import java.util.List;
7577
import java.util.Locale;
7678
import java.util.Map;
7779
import java.util.Objects;
@@ -635,10 +637,54 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
635637
if (isStored()) {
636638
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(name());
637639
}
640+
641+
if (isSyntheticSource) {
642+
return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) {
643+
@Override
644+
public Builder builder(BlockFactory factory, int expectedCount) {
645+
return factory.bytesRefs(expectedCount);
646+
}
647+
};
648+
}
649+
638650
SourceValueFetcher fetcher = sourceValueFetcher(blContext.sourcePaths(name()));
639651
return new BlockSourceReader.BytesRefsBlockLoader(fetcher, sourceBlockLoaderLookup(blContext));
640652
}
641653

654+
private FallbackSyntheticSourceBlockLoader.Reader<?> fallbackSyntheticSourceBlockLoaderReader() {
655+
var nullValueBytes = nullValue != null ? new BytesRef(nullValue) : null;
656+
return new FallbackSyntheticSourceBlockLoader.ReaderWithNullValueSupport<>(nullValueBytes) {
657+
@Override
658+
public void convertValue(Object value, List<BytesRef> accumulator) {
659+
String stringValue = ((BytesRef) value).utf8ToString();
660+
String adjusted = applyIgnoreAboveAndNormalizer(stringValue);
661+
if (adjusted != null) {
662+
// TODO what if the value didn't change?
663+
accumulator.add(new BytesRef(adjusted));
664+
}
665+
}
666+
667+
@Override
668+
public void parseNonNullValue(XContentParser parser, List<BytesRef> accumulator) throws IOException {
669+
assert parser.currentToken() == XContentParser.Token.VALUE_STRING : "Unexpected token " + parser.currentToken();
670+
671+
var value = applyIgnoreAboveAndNormalizer(parser.text());
672+
if (value != null) {
673+
accumulator.add(new BytesRef(value));
674+
}
675+
}
676+
677+
@Override
678+
public void writeToBlock(List<BytesRef> values, BlockLoader.Builder blockBuilder) {
679+
var bytesRefBuilder = (BlockLoader.BytesRefBuilder) blockBuilder;
680+
681+
for (var value : values) {
682+
bytesRefBuilder.appendBytesRef(value);
683+
}
684+
}
685+
};
686+
}
687+
642688
private BlockSourceReader.LeafIteratorLookup sourceBlockLoaderLookup(BlockLoaderContext blContext) {
643689
if (getTextSearchInfo().hasNorms()) {
644690
return BlockSourceReader.lookupFromNorms(name());
@@ -718,15 +764,19 @@ private SourceValueFetcher sourceValueFetcher(Set<String> sourcePaths) {
718764
@Override
719765
protected String parseSourceValue(Object value) {
720766
String keywordValue = value.toString();
721-
if (keywordValue.length() > ignoreAbove) {
722-
return null;
723-
}
724-
725-
return normalizeValue(normalizer(), name(), keywordValue);
767+
return applyIgnoreAboveAndNormalizer(keywordValue);
726768
}
727769
};
728770
}
729771

772+
private String applyIgnoreAboveAndNormalizer(String value) {
773+
if (value.length() > ignoreAbove) {
774+
return null;
775+
}
776+
777+
return normalizeValue(normalizer(), name(), value);
778+
}
779+
730780
@Override
731781
public Object valueForDisplay(Object value) {
732782
if (value == null) {

0 commit comments

Comments
 (0)