Use doc_value based value fetcher for patterned_text (#134693)

parkertimmins · web-flow · commit 2ab71babbc96 · 2025-09-17T17:06:08.000-05:00
The value fetcher is used produce message values during the second phrase of the two phrase iterator during a source confirmed query to check that the message actually matches. A query may need to scan many values so the value fetcher must be fast. Currently the value fetcher requires building the source. Instead it should use the doc_value iterator and only load the message values.
diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldType.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldType.java
@@ -28,21 +28,18 @@
 import org.elasticsearch.common.unit.Fuzziness;
 import org.elasticsearch.index.fielddata.FieldDataContext;
 import org.elasticsearch.index.fielddata.IndexFieldData;
-import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
 import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
 import org.elasticsearch.index.mapper.BlockLoader;
 import org.elasticsearch.index.mapper.BlockStoredFieldsReader;
-import org.elasticsearch.index.mapper.SourceValueFetcher;
 import org.elasticsearch.index.mapper.StringFieldType;
 import org.elasticsearch.index.mapper.TextFieldMapper;
 import org.elasticsearch.index.mapper.TextSearchInfo;
 import org.elasticsearch.index.mapper.ValueFetcher;
 import org.elasticsearch.index.mapper.extras.SourceConfirmedTextQuery;
 import org.elasticsearch.index.mapper.extras.SourceIntervalsSource;
 import org.elasticsearch.index.query.SearchExecutionContext;
-import org.elasticsearch.script.field.KeywordDocValuesField;
-import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
-import org.elasticsearch.search.lookup.SourceProvider;
+import org.elasticsearch.search.fetch.StoredFieldsSpec;
+import org.elasticsearch.search.lookup.Source;
 
 import java.io.IOException;
 import java.io.UncheckedIOException;
@@ -115,7 +112,32 @@ public String familyTypeName() {
 
     @Override
     public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
-        return SourceValueFetcher.toString(name(), context, format);
+        return new ValueFetcher() {
+            PatternTextCompositeValues docValues;
+
+            @Override
+            public void setNextReader(LeafReaderContext context) {
+                try {
+                    this.docValues = PatternTextCompositeValues.from(context.reader(), PatternTextFieldType.this);
+                } catch (IOException e) {
+                    throw new UncheckedIOException(e);
+                }
+            }
+
+            @Override
+            public List<Object> fetchValues(Source source, int doc, List<Object> ignoredValues) throws IOException {
+                if (false == docValues.advanceExact(doc)) {
+                    return List.of();
+                }
+                return List.of(docValues.binaryValue().utf8ToString());
+            }
+
+            @Override
+            public StoredFieldsSpec storedFieldsSpec() {
+                // PatternedTextCompositeValues may require a stored field, but it handles loading this field internally.
+                return StoredFieldsSpec.NO_REQUIREMENTS;
+            }
+        };
     }
 
     private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> getValueFetcherProvider(
@@ -127,11 +149,10 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
 
         return context -> {
             ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null);
-            SourceProvider sourceProvider = searchExecutionContext.lookup();
             valueFetcher.setNextReader(context);
             return docID -> {
                 try {
-                    return valueFetcher.fetchValues(sourceProvider.getSource(context, docID), docID, new ArrayList<>());
+                    return valueFetcher.fetchValues(null, docID, new ArrayList<>());
                 } catch (IOException e) {
                     throw new UncheckedIOException(e);
                 }
@@ -293,17 +314,7 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext
         if (fieldDataContext.fielddataOperation() != FielddataOperation.SCRIPT) {
             throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support sorting and aggregations");
         }
-        if (textFieldType.isSyntheticSource()) {
-            return new PatternTextIndexFieldData.Builder(this);
-        }
-        return new SourceValueFetcherSortedBinaryIndexFieldData.Builder(
-            name(),
-            CoreValuesSourceType.KEYWORD,
-            SourceValueFetcher.toString(fieldDataContext.sourcePathsLookup().apply(name())),
-            fieldDataContext.lookupSupplier().get(),
-            KeywordDocValuesField::new
-        );
-
+        return new PatternTextIndexFieldData.Builder(this);
     }
 
     String templateFieldName() {
diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldMapperTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldMapperTests.java
@@ -26,6 +26,9 @@
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.XContentHelper;
 import org.elasticsearch.core.Tuple;
+import org.elasticsearch.index.fielddata.FieldDataContext;
+import org.elasticsearch.index.fielddata.IndexFieldDataCache;
+import org.elasticsearch.index.mapper.DocValueFetcher;
 import org.elasticsearch.index.mapper.DocumentMapper;
 import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.LuceneDocument;
@@ -34,9 +37,14 @@
 import org.elasticsearch.index.mapper.MapperService;
 import org.elasticsearch.index.mapper.MapperTestCase;
 import org.elasticsearch.index.mapper.ParsedDocument;
+import org.elasticsearch.index.mapper.SourceToParse;
+import org.elasticsearch.index.mapper.ValueFetcher;
 import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
 import org.elasticsearch.index.query.SearchExecutionContext;
+import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
 import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.search.lookup.Source;
+import org.elasticsearch.search.lookup.SourceProvider;
 import org.elasticsearch.xcontent.ToXContent;
 import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xcontent.XContentFactory;
@@ -47,14 +55,21 @@
 import org.junit.Before;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
 
+import static java.util.stream.Collectors.toList;
+import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.instanceOf;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
 
 public class PatternTextFieldMapperTests extends MapperTestCase {
 
@@ -298,13 +313,69 @@ public void testDisabledSource() throws IOException {
 
     @Override
     protected Object generateRandomInputValue(MappedFieldType ft) {
-        assumeFalse("We don't have a way to assert things here", true);
-        return null;
+        return PatternTextIntegrationTests.randomMessageMaybeLarge();
     }
 
     @Override
-    protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException {
-        assumeFalse("We don't have a way to assert things here", true);
+    protected void assertFetchMany(MapperService mapperService, String field, Object value, String format, int count) throws IOException {
+        assumeFalse("pattern_text currently don't support multiple values in the same field", false);
+    }
+
+    /**
+     * pattern_text does not allow sorting or aggregation and thus only allow field data operations
+     * of type SCRIPT to access field data. We still want to use `testFetch` to compare value fetchers against doc
+     * values. This method copies MapperTestCase.assertFetch, but uses field data operation type SCRIPT.
+     */
+    @Override
+    protected void assertFetch(MapperService mapperService, String field, Object value, String format) throws IOException {
+        MappedFieldType ft = mapperService.fieldType(field);
+        SourceToParse source = source(b -> b.field(ft.name(), value));
+        var fielddataContext = new FieldDataContext("", null, () -> null, Set::of, MappedFieldType.FielddataOperation.SCRIPT);
+        var fdt = fielddataContext.fielddataOperation();
+        ValueFetcher docValueFetcher = new DocValueFetcher(
+            ft.docValueFormat(format, null),
+            ft.fielddataBuilder(fielddataContext).build(new IndexFieldDataCache.None(), new NoneCircuitBreakerService())
+        );
+        SearchExecutionContext searchExecutionContext = mock(SearchExecutionContext.class);
+        when(searchExecutionContext.isSourceEnabled()).thenReturn(true);
+        when(searchExecutionContext.sourcePath(field)).thenReturn(Set.of(field));
+        when(searchExecutionContext.getForField(ft, fdt)).thenAnswer(inv -> fieldDataLookup(mapperService).apply(ft, () -> {
+            throw new UnsupportedOperationException();
+        }, fdt));
+        ValueFetcher nativeFetcher = ft.valueFetcher(searchExecutionContext, format);
+        ParsedDocument doc = mapperService.documentMapper().parse(source);
+        withLuceneIndex(mapperService, iw -> iw.addDocuments(doc.docs()), ir -> {
+            Source s = SourceProvider.fromLookup(mapperService.mappingLookup(), null, mapperService.getMapperMetrics().sourceFieldMetrics())
+                .getSource(ir.leaves().get(0), 0);
+            docValueFetcher.setNextReader(ir.leaves().get(0));
+            nativeFetcher.setNextReader(ir.leaves().get(0));
+            List<Object> fromDocValues = docValueFetcher.fetchValues(s, 0, new ArrayList<>());
+            List<Object> fromNative = nativeFetcher.fetchValues(s, 0, new ArrayList<>());
+            /*
+             * The native fetcher uses byte, short, etc but doc values always
+             * uses long or double. This difference is fine because on the outside
+             * users can't see it.
+             */
+            fromNative = fromNative.stream().map(o -> {
+                if (o instanceof Integer || o instanceof Short || o instanceof Byte) {
+                    return ((Number) o).longValue();
+                }
+                if (o instanceof Float) {
+                    return ((Float) o).doubleValue();
+                }
+                return o;
+            }).collect(toList());
+
+            if (dedupAfterFetch()) {
+                fromNative = fromNative.stream().distinct().collect(Collectors.toList());
+            }
+            /*
+             * Doc values sort according to something appropriate to the field
+             * and the native fetchers usually don't sort. We're ok with this
+             * difference. But we have to convince the test we're ok with it.
+             */
+            assertThat("fetching " + value, fromNative, containsInAnyOrder(fromDocValues.toArray()));
+        });
     }
 
     @Override
diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextIntegrationTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextIntegrationTests.java
@@ -185,6 +185,24 @@ public void testSmallValueNotStored() throws IOException {
         }
     }
 
+    public void testPhraseQuery() throws IOException {
+        var createRequest = new CreateIndexRequest(INDEX).mapping(mapping);
+        createRequest.settings(LOGSDB_SETTING);
+        assertAcked(admin().indices().create(createRequest));
+
+        String smallMessage = "cat dog 123 house mouse";
+        final String message = randomBoolean() ? smallMessage : smallMessage.repeat(32_000 / smallMessage.length());
+
+        List<String> logMessages = List.of(message);
+        indexDocs(logMessages);
+        assertMappings();
+
+        var query = QueryBuilders.matchPhraseQuery("field_pattern_text", "dog 123 house");
+        var searchRequest = client().prepareSearch(INDEX).setQuery(query);
+
+        assertNoFailuresAndResponse(searchRequest, searchResponse -> { assertEquals(1, searchResponse.getHits().getTotalHits().value()); });
+    }
+
     public void testQueryResultsSameAsMatchOnlyText() throws IOException {
         var createRequest = new CreateIndexRequest(INDEX).mapping(mapping);
 
@@ -338,6 +356,14 @@ public static String randomMessage(int minLength) {
         return sb.toString();
     }
 
+    public static String randomMessageMaybeLarge() {
+        if (randomDouble() < 0.2) {
+            return randomMessage(32 * 1024);
+        } else {
+            return randomMessage();
+        }
+    }
+
     public static String randomMessage() {
         if (rarely()) {
             return randomRealisticUnicodeOfCodepointLength(randomIntBetween(1, 100));