Skip to content

Commit 2ab71ba

Browse files
Use doc_value based value fetcher for patterned_text (#134693)
The value fetcher is used produce message values during the second phrase of the two phrase iterator during a source confirmed query to check that the message actually matches. A query may need to scan many values so the value fetcher must be fast. Currently the value fetcher requires building the source. Instead it should use the doc_value iterator and only load the message values.
1 parent 99ae21c commit 2ab71ba

File tree

3 files changed

+131
-23
lines changed

3 files changed

+131
-23
lines changed

x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldType.java

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -28,21 +28,18 @@
2828
import org.elasticsearch.common.unit.Fuzziness;
2929
import org.elasticsearch.index.fielddata.FieldDataContext;
3030
import org.elasticsearch.index.fielddata.IndexFieldData;
31-
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
3231
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
3332
import org.elasticsearch.index.mapper.BlockLoader;
3433
import org.elasticsearch.index.mapper.BlockStoredFieldsReader;
35-
import org.elasticsearch.index.mapper.SourceValueFetcher;
3634
import org.elasticsearch.index.mapper.StringFieldType;
3735
import org.elasticsearch.index.mapper.TextFieldMapper;
3836
import org.elasticsearch.index.mapper.TextSearchInfo;
3937
import org.elasticsearch.index.mapper.ValueFetcher;
4038
import org.elasticsearch.index.mapper.extras.SourceConfirmedTextQuery;
4139
import org.elasticsearch.index.mapper.extras.SourceIntervalsSource;
4240
import org.elasticsearch.index.query.SearchExecutionContext;
43-
import org.elasticsearch.script.field.KeywordDocValuesField;
44-
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
45-
import org.elasticsearch.search.lookup.SourceProvider;
41+
import org.elasticsearch.search.fetch.StoredFieldsSpec;
42+
import org.elasticsearch.search.lookup.Source;
4643

4744
import java.io.IOException;
4845
import java.io.UncheckedIOException;
@@ -115,7 +112,32 @@ public String familyTypeName() {
115112

116113
@Override
117114
public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
118-
return SourceValueFetcher.toString(name(), context, format);
115+
return new ValueFetcher() {
116+
PatternTextCompositeValues docValues;
117+
118+
@Override
119+
public void setNextReader(LeafReaderContext context) {
120+
try {
121+
this.docValues = PatternTextCompositeValues.from(context.reader(), PatternTextFieldType.this);
122+
} catch (IOException e) {
123+
throw new UncheckedIOException(e);
124+
}
125+
}
126+
127+
@Override
128+
public List<Object> fetchValues(Source source, int doc, List<Object> ignoredValues) throws IOException {
129+
if (false == docValues.advanceExact(doc)) {
130+
return List.of();
131+
}
132+
return List.of(docValues.binaryValue().utf8ToString());
133+
}
134+
135+
@Override
136+
public StoredFieldsSpec storedFieldsSpec() {
137+
// PatternedTextCompositeValues may require a stored field, but it handles loading this field internally.
138+
return StoredFieldsSpec.NO_REQUIREMENTS;
139+
}
140+
};
119141
}
120142

121143
private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> getValueFetcherProvider(
@@ -127,11 +149,10 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
127149

128150
return context -> {
129151
ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null);
130-
SourceProvider sourceProvider = searchExecutionContext.lookup();
131152
valueFetcher.setNextReader(context);
132153
return docID -> {
133154
try {
134-
return valueFetcher.fetchValues(sourceProvider.getSource(context, docID), docID, new ArrayList<>());
155+
return valueFetcher.fetchValues(null, docID, new ArrayList<>());
135156
} catch (IOException e) {
136157
throw new UncheckedIOException(e);
137158
}
@@ -293,17 +314,7 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext
293314
if (fieldDataContext.fielddataOperation() != FielddataOperation.SCRIPT) {
294315
throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support sorting and aggregations");
295316
}
296-
if (textFieldType.isSyntheticSource()) {
297-
return new PatternTextIndexFieldData.Builder(this);
298-
}
299-
return new SourceValueFetcherSortedBinaryIndexFieldData.Builder(
300-
name(),
301-
CoreValuesSourceType.KEYWORD,
302-
SourceValueFetcher.toString(fieldDataContext.sourcePathsLookup().apply(name())),
303-
fieldDataContext.lookupSupplier().get(),
304-
KeywordDocValuesField::new
305-
);
306-
317+
return new PatternTextIndexFieldData.Builder(this);
307318
}
308319

309320
String templateFieldName() {

x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldMapperTests.java

Lines changed: 75 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@
2626
import org.elasticsearch.common.settings.Settings;
2727
import org.elasticsearch.common.xcontent.XContentHelper;
2828
import org.elasticsearch.core.Tuple;
29+
import org.elasticsearch.index.fielddata.FieldDataContext;
30+
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
31+
import org.elasticsearch.index.mapper.DocValueFetcher;
2932
import org.elasticsearch.index.mapper.DocumentMapper;
3033
import org.elasticsearch.index.mapper.KeywordFieldMapper;
3134
import org.elasticsearch.index.mapper.LuceneDocument;
@@ -34,9 +37,14 @@
3437
import org.elasticsearch.index.mapper.MapperService;
3538
import org.elasticsearch.index.mapper.MapperTestCase;
3639
import org.elasticsearch.index.mapper.ParsedDocument;
40+
import org.elasticsearch.index.mapper.SourceToParse;
41+
import org.elasticsearch.index.mapper.ValueFetcher;
3742
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
3843
import org.elasticsearch.index.query.SearchExecutionContext;
44+
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
3945
import org.elasticsearch.plugins.Plugin;
46+
import org.elasticsearch.search.lookup.Source;
47+
import org.elasticsearch.search.lookup.SourceProvider;
4048
import org.elasticsearch.xcontent.ToXContent;
4149
import org.elasticsearch.xcontent.XContentBuilder;
4250
import org.elasticsearch.xcontent.XContentFactory;
@@ -47,14 +55,21 @@
4755
import org.junit.Before;
4856

4957
import java.io.IOException;
58+
import java.util.ArrayList;
5059
import java.util.Collection;
5160
import java.util.Collections;
5261
import java.util.List;
5362
import java.util.Map;
63+
import java.util.Set;
64+
import java.util.stream.Collectors;
5465

66+
import static java.util.stream.Collectors.toList;
67+
import static org.hamcrest.Matchers.containsInAnyOrder;
5568
import static org.hamcrest.Matchers.containsString;
5669
import static org.hamcrest.Matchers.equalTo;
5770
import static org.hamcrest.Matchers.instanceOf;
71+
import static org.mockito.Mockito.mock;
72+
import static org.mockito.Mockito.when;
5873

5974
public class PatternTextFieldMapperTests extends MapperTestCase {
6075

@@ -298,13 +313,69 @@ public void testDisabledSource() throws IOException {
298313

299314
@Override
300315
protected Object generateRandomInputValue(MappedFieldType ft) {
301-
assumeFalse("We don't have a way to assert things here", true);
302-
return null;
316+
return PatternTextIntegrationTests.randomMessageMaybeLarge();
303317
}
304318

305319
@Override
306-
protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException {
307-
assumeFalse("We don't have a way to assert things here", true);
320+
protected void assertFetchMany(MapperService mapperService, String field, Object value, String format, int count) throws IOException {
321+
assumeFalse("pattern_text currently don't support multiple values in the same field", false);
322+
}
323+
324+
/**
325+
* pattern_text does not allow sorting or aggregation and thus only allow field data operations
326+
* of type SCRIPT to access field data. We still want to use `testFetch` to compare value fetchers against doc
327+
* values. This method copies MapperTestCase.assertFetch, but uses field data operation type SCRIPT.
328+
*/
329+
@Override
330+
protected void assertFetch(MapperService mapperService, String field, Object value, String format) throws IOException {
331+
MappedFieldType ft = mapperService.fieldType(field);
332+
SourceToParse source = source(b -> b.field(ft.name(), value));
333+
var fielddataContext = new FieldDataContext("", null, () -> null, Set::of, MappedFieldType.FielddataOperation.SCRIPT);
334+
var fdt = fielddataContext.fielddataOperation();
335+
ValueFetcher docValueFetcher = new DocValueFetcher(
336+
ft.docValueFormat(format, null),
337+
ft.fielddataBuilder(fielddataContext).build(new IndexFieldDataCache.None(), new NoneCircuitBreakerService())
338+
);
339+
SearchExecutionContext searchExecutionContext = mock(SearchExecutionContext.class);
340+
when(searchExecutionContext.isSourceEnabled()).thenReturn(true);
341+
when(searchExecutionContext.sourcePath(field)).thenReturn(Set.of(field));
342+
when(searchExecutionContext.getForField(ft, fdt)).thenAnswer(inv -> fieldDataLookup(mapperService).apply(ft, () -> {
343+
throw new UnsupportedOperationException();
344+
}, fdt));
345+
ValueFetcher nativeFetcher = ft.valueFetcher(searchExecutionContext, format);
346+
ParsedDocument doc = mapperService.documentMapper().parse(source);
347+
withLuceneIndex(mapperService, iw -> iw.addDocuments(doc.docs()), ir -> {
348+
Source s = SourceProvider.fromLookup(mapperService.mappingLookup(), null, mapperService.getMapperMetrics().sourceFieldMetrics())
349+
.getSource(ir.leaves().get(0), 0);
350+
docValueFetcher.setNextReader(ir.leaves().get(0));
351+
nativeFetcher.setNextReader(ir.leaves().get(0));
352+
List<Object> fromDocValues = docValueFetcher.fetchValues(s, 0, new ArrayList<>());
353+
List<Object> fromNative = nativeFetcher.fetchValues(s, 0, new ArrayList<>());
354+
/*
355+
* The native fetcher uses byte, short, etc but doc values always
356+
* uses long or double. This difference is fine because on the outside
357+
* users can't see it.
358+
*/
359+
fromNative = fromNative.stream().map(o -> {
360+
if (o instanceof Integer || o instanceof Short || o instanceof Byte) {
361+
return ((Number) o).longValue();
362+
}
363+
if (o instanceof Float) {
364+
return ((Float) o).doubleValue();
365+
}
366+
return o;
367+
}).collect(toList());
368+
369+
if (dedupAfterFetch()) {
370+
fromNative = fromNative.stream().distinct().collect(Collectors.toList());
371+
}
372+
/*
373+
* Doc values sort according to something appropriate to the field
374+
* and the native fetchers usually don't sort. We're ok with this
375+
* difference. But we have to convince the test we're ok with it.
376+
*/
377+
assertThat("fetching " + value, fromNative, containsInAnyOrder(fromDocValues.toArray()));
378+
});
308379
}
309380

310381
@Override

x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextIntegrationTests.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,24 @@ public void testSmallValueNotStored() throws IOException {
185185
}
186186
}
187187

188+
public void testPhraseQuery() throws IOException {
189+
var createRequest = new CreateIndexRequest(INDEX).mapping(mapping);
190+
createRequest.settings(LOGSDB_SETTING);
191+
assertAcked(admin().indices().create(createRequest));
192+
193+
String smallMessage = "cat dog 123 house mouse";
194+
final String message = randomBoolean() ? smallMessage : smallMessage.repeat(32_000 / smallMessage.length());
195+
196+
List<String> logMessages = List.of(message);
197+
indexDocs(logMessages);
198+
assertMappings();
199+
200+
var query = QueryBuilders.matchPhraseQuery("field_pattern_text", "dog 123 house");
201+
var searchRequest = client().prepareSearch(INDEX).setQuery(query);
202+
203+
assertNoFailuresAndResponse(searchRequest, searchResponse -> { assertEquals(1, searchResponse.getHits().getTotalHits().value()); });
204+
}
205+
188206
public void testQueryResultsSameAsMatchOnlyText() throws IOException {
189207
var createRequest = new CreateIndexRequest(INDEX).mapping(mapping);
190208

@@ -338,6 +356,14 @@ public static String randomMessage(int minLength) {
338356
return sb.toString();
339357
}
340358

359+
public static String randomMessageMaybeLarge() {
360+
if (randomDouble() < 0.2) {
361+
return randomMessage(32 * 1024);
362+
} else {
363+
return randomMessage();
364+
}
365+
}
366+
341367
public static String randomMessage() {
342368
if (rarely()) {
343369
return randomRealisticUnicodeOfCodepointLength(randomIntBetween(1, 100));

0 commit comments

Comments
 (0)