Skip to content

Commit 4071c72

Browse files
committed
Address TODO, instead og rely on synthesizing the source use either stored field or doc values of parent keyword field or stored field or doc values of multi field.
1 parent 5b591b7 commit 4071c72

File tree

2 files changed

+107
-12
lines changed

2 files changed

+107
-12
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 57 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.apache.lucene.document.Field;
1515
import org.apache.lucene.document.FieldType;
1616
import org.apache.lucene.document.StoredField;
17+
import org.apache.lucene.index.DocValues;
1718
import org.apache.lucene.index.IndexOptions;
1819
import org.apache.lucene.index.LeafReaderContext;
1920
import org.apache.lucene.index.Term;
@@ -40,7 +41,6 @@
4041
import org.elasticsearch.index.fielddata.IndexFieldData;
4142
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
4243
import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
43-
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
4444
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
4545
import org.elasticsearch.index.mapper.BlockLoader;
4646
import org.elasticsearch.index.mapper.BlockSourceReader;
@@ -220,18 +220,34 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
220220
"Field [" + name() + "] of type [" + CONTENT_TYPE + "] cannot run positional queries since [_source] is disabled."
221221
);
222222
}
223-
if (searchExecutionContext.isSourceSynthetic() && withinMultiField == false && hasCompatibleMultiFields == false) {
223+
if (searchExecutionContext.isSourceSynthetic() && withinMultiField) {
224+
String parentField = searchExecutionContext.parentPath(name());
225+
var parent = searchExecutionContext.lookup().fieldType(parentField);
226+
if (parent.isStored()) {
227+
return storedFieldFetcher(parentField);
228+
} else if (parent.hasDocValues()) {
229+
return docValuesFieldFetcher(parentField);
230+
} else {
231+
assert false : "parent field should either be stored or have doc values";
232+
}
233+
} else if (searchExecutionContext.isSourceSynthetic() && hasCompatibleMultiFields) {
234+
var mapper = (MatchOnlyTextFieldMapper) searchExecutionContext.getMappingLookup().getMapper(name());
235+
var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(mapper);
236+
if (kwd != null) {
237+
var fieldType = kwd.fieldType();
238+
if (fieldType.isStored()) {
239+
return storedFieldFetcher(fieldType.name());
240+
} else if (fieldType.hasDocValues()) {
241+
return docValuesFieldFetcher(fieldType.name());
242+
} else {
243+
assert false : "multi field should either be stored or have doc values";
244+
}
245+
} else {
246+
assert false : "multi field of type keyword should exist";
247+
}
248+
} else if (searchExecutionContext.isSourceSynthetic()) {
224249
String name = storedFieldNameForSyntheticSource();
225-
// TODO: go the parent field and load either via stored fields or doc values the values instead synthesizing complete source
226-
// (in case of synthetic source and if this field is a multi field, then it will not have a stored field.)
227-
StoredFieldLoader loader = StoredFieldLoader.create(false, Set.of(name));
228-
return context -> {
229-
LeafStoredFieldLoader leafLoader = loader.getLoader(context, null);
230-
return docId -> {
231-
leafLoader.advanceTo(docId);
232-
return leafLoader.storedFields().get(name);
233-
};
234-
};
250+
return storedFieldFetcher(name);
235251
}
236252
return context -> {
237253
ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null);
@@ -247,6 +263,35 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
247263
};
248264
}
249265

266+
private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher(String name) {
267+
return context -> {
268+
var sortedDocValues = DocValues.getSortedSet(context.reader(), name);
269+
return docId -> {
270+
if (sortedDocValues.advanceExact(docId)) {
271+
var values = new ArrayList<>(sortedDocValues.docValueCount());
272+
for (int i = 0; i < sortedDocValues.docValueCount(); i++) {
273+
long ord = sortedDocValues.nextOrd();
274+
values.add(sortedDocValues.lookupOrd(ord).utf8ToString());
275+
}
276+
return values;
277+
} else {
278+
return List.of();
279+
}
280+
};
281+
};
282+
}
283+
284+
private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> storedFieldFetcher(String name) {
285+
var loader = StoredFieldLoader.create(false, Set.of(name));
286+
return context -> {
287+
var leafLoader = loader.getLoader(context, null);
288+
return docId -> {
289+
leafLoader.advanceTo(docId);
290+
return leafLoader.storedFields().get(name);
291+
};
292+
};
293+
}
294+
250295
private Query toQuery(Query query, SearchExecutionContext searchExecutionContext) {
251296
return new ConstantScoreQuery(
252297
new SourceConfirmedTextQuery(query, getValueFetcherProvider(searchExecutionContext), indexAnalyzer)

modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,10 @@ synthetic_source with copy_to:
396396

397397
---
398398
synthetic_source match_only_text as multi-field:
399+
- requires:
400+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
401+
reason: "Source mode configured through index setting"
402+
399403
- do:
400404
indices.create:
401405
index: synthetic_source_test
@@ -433,6 +437,10 @@ synthetic_source match_only_text as multi-field:
433437

434438
---
435439
synthetic_source match_only_text with multi-field:
440+
- requires:
441+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
442+
reason: "Source mode configured through index setting"
443+
436444
- do:
437445
indices.create:
438446
index: synthetic_source_test
@@ -468,3 +476,45 @@ synthetic_source match_only_text with multi-field:
468476
- match:
469477
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
470478

479+
---
480+
synthetic_source match_only_text with store multi-field:
481+
- requires:
482+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
483+
reason: "Source mode configured through index setting"
484+
485+
- do:
486+
indices.create:
487+
index: synthetic_source_test
488+
body:
489+
settings:
490+
index:
491+
mapping.source.mode: synthetic
492+
mappings:
493+
properties:
494+
foo:
495+
type: match_only_text
496+
fields:
497+
raw:
498+
type: keyword
499+
store: true
500+
doc_value: false
501+
502+
- do:
503+
index:
504+
index: synthetic_source_test
505+
id: "1"
506+
refresh: true
507+
body:
508+
foo: "Apache Lucene powers Elasticsearch"
509+
510+
- do:
511+
search:
512+
index: synthetic_source_test
513+
body:
514+
query:
515+
match_phrase:
516+
foo: apache lucene
517+
518+
- match: { "hits.total.value": 1 }
519+
- match:
520+
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"

0 commit comments

Comments
 (0)