Skip to content

Commit 0c7b550

Browse files
authored
Fix match_only_text bugs if defined as multi-field (#130311)
Backporting #130188 to 8.19 branch. Bugs starting to occur when #129126 was merged. Closes #129737
1 parent 5f43af5 commit 0c7b550

File tree

3 files changed

+337
-23
lines changed

3 files changed

+337
-23
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 90 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.apache.lucene.document.Field;
1515
import org.apache.lucene.document.FieldType;
1616
import org.apache.lucene.document.StoredField;
17+
import org.apache.lucene.index.DocValues;
1718
import org.apache.lucene.index.IndexOptions;
1819
import org.apache.lucene.index.LeafReaderContext;
1920
import org.apache.lucene.index.Term;
@@ -41,7 +42,6 @@
4142
import org.elasticsearch.index.fielddata.IndexFieldData;
4243
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
4344
import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
44-
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
4545
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
4646
import org.elasticsearch.index.mapper.BlockLoader;
4747
import org.elasticsearch.index.mapper.BlockSourceReader;
@@ -132,7 +132,9 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) {
132132
tsi,
133133
indexAnalyzer,
134134
context.isSourceSynthetic(),
135-
meta.getValue()
135+
meta.getValue(),
136+
withinMultiField,
137+
multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField()
136138
);
137139
return ft;
138140
}
@@ -162,17 +164,24 @@ public static class MatchOnlyTextFieldType extends StringFieldType {
162164
private final TextFieldType textFieldType;
163165
private final String originalName;
164166

167+
private final boolean withinMultiField;
168+
private final boolean hasCompatibleMultiFields;
169+
165170
public MatchOnlyTextFieldType(
166171
String name,
167172
TextSearchInfo tsi,
168173
Analyzer indexAnalyzer,
169174
boolean isSyntheticSource,
170-
Map<String, String> meta
175+
Map<String, String> meta,
176+
boolean withinMultiField,
177+
boolean hasCompatibleMultiFields
171178
) {
172179
super(name, true, false, false, tsi, meta);
173180
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
174181
this.textFieldType = new TextFieldType(name, isSyntheticSource);
175182
this.originalName = isSyntheticSource ? name() + "._original" : null;
183+
this.withinMultiField = withinMultiField;
184+
this.hasCompatibleMultiFields = hasCompatibleMultiFields;
176185
}
177186

178187
public MatchOnlyTextFieldType(String name) {
@@ -181,7 +190,9 @@ public MatchOnlyTextFieldType(String name) {
181190
new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
182191
Lucene.STANDARD_ANALYZER,
183192
false,
184-
Collections.emptyMap()
193+
Collections.emptyMap(),
194+
false,
195+
false
185196
);
186197
}
187198

@@ -208,16 +219,34 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
208219
"Field [" + name() + "] of type [" + CONTENT_TYPE + "] cannot run positional queries since [_source] is disabled."
209220
);
210221
}
211-
if (searchExecutionContext.isSourceSynthetic()) {
222+
if (searchExecutionContext.isSourceSynthetic() && withinMultiField) {
223+
String parentField = searchExecutionContext.parentPath(name());
224+
var parent = searchExecutionContext.lookup().fieldType(parentField);
225+
if (parent.isStored()) {
226+
return storedFieldFetcher(parentField);
227+
} else if (parent.hasDocValues()) {
228+
return docValuesFieldFetcher(parentField);
229+
} else {
230+
assert false : "parent field should either be stored or have doc values";
231+
}
232+
} else if (searchExecutionContext.isSourceSynthetic() && hasCompatibleMultiFields) {
233+
var mapper = (MatchOnlyTextFieldMapper) searchExecutionContext.getMappingLookup().getMapper(name());
234+
var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(mapper);
235+
if (kwd != null) {
236+
var fieldType = kwd.fieldType();
237+
if (fieldType.isStored()) {
238+
return storedFieldFetcher(fieldType.name());
239+
} else if (fieldType.hasDocValues()) {
240+
return docValuesFieldFetcher(fieldType.name());
241+
} else {
242+
assert false : "multi field should either be stored or have doc values";
243+
}
244+
} else {
245+
assert false : "multi field of type keyword should exist";
246+
}
247+
} else if (searchExecutionContext.isSourceSynthetic()) {
212248
String name = storedFieldNameForSyntheticSource();
213-
StoredFieldLoader loader = StoredFieldLoader.create(false, Set.of(name));
214-
return context -> {
215-
LeafStoredFieldLoader leafLoader = loader.getLoader(context, null);
216-
return docId -> {
217-
leafLoader.advanceTo(docId);
218-
return leafLoader.storedFields().get(name);
219-
};
220-
};
249+
return storedFieldFetcher(name);
221250
}
222251
return context -> {
223252
ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null);
@@ -233,6 +262,35 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
233262
};
234263
}
235264

265+
private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher(String name) {
266+
return context -> {
267+
var sortedDocValues = DocValues.getSortedSet(context.reader(), name);
268+
return docId -> {
269+
if (sortedDocValues.advanceExact(docId)) {
270+
var values = new ArrayList<>(sortedDocValues.docValueCount());
271+
for (int i = 0; i < sortedDocValues.docValueCount(); i++) {
272+
long ord = sortedDocValues.nextOrd();
273+
values.add(sortedDocValues.lookupOrd(ord).utf8ToString());
274+
}
275+
return values;
276+
} else {
277+
return List.of();
278+
}
279+
};
280+
};
281+
}
282+
283+
private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> storedFieldFetcher(String name) {
284+
var loader = StoredFieldLoader.create(false, Set.of(name));
285+
return context -> {
286+
var leafLoader = loader.getLoader(context, null);
287+
return docId -> {
288+
leafLoader.advanceTo(docId);
289+
return leafLoader.storedFields().get(name);
290+
};
291+
};
292+
}
293+
236294
private Query toQuery(Query query, SearchExecutionContext searchExecutionContext) {
237295
return new ConstantScoreQuery(
238296
new SourceConfirmedTextQuery(query, getValueFetcherProvider(searchExecutionContext), indexAnalyzer)
@@ -505,18 +563,27 @@ public MatchOnlyTextFieldType fieldType() {
505563

506564
@Override
507565
protected SyntheticSourceSupport syntheticSourceSupport() {
508-
return new SyntheticSourceSupport.Native(
509-
() -> new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), fieldType().name(), leafName()) {
510-
@Override
511-
protected void write(XContentBuilder b, Object value) throws IOException {
512-
if (value instanceof BytesRef valueBytes) {
513-
b.value(valueBytes.utf8ToString());
514-
} else {
515-
assert value instanceof String;
516-
b.value(value.toString());
566+
if (storeSource) {
567+
return new SyntheticSourceSupport.Native(
568+
() -> new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), fieldType().name(), leafName()) {
569+
@Override
570+
protected void write(XContentBuilder b, Object value) throws IOException {
571+
if (value instanceof BytesRef valueBytes) {
572+
b.value(valueBytes.utf8ToString());
573+
} else {
574+
assert value instanceof String;
575+
b.value(value.toString());
576+
}
517577
}
518578
}
579+
);
580+
} else {
581+
var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(this);
582+
if (kwd != null) {
583+
return new SyntheticSourceSupport.Native(() -> kwd.syntheticFieldLoader(fullPath(), leafName()));
519584
}
520-
);
585+
assert false : "there should be a suite field mapper with native synthetic source support";
586+
return super.syntheticSourceSupport();
587+
}
521588
}
522589
}

modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,4 +394,170 @@ synthetic_source with copy_to:
394394
- match:
395395
hits.hits.0.fields.copy.0: "Apache Lucene powers Elasticsearch"
396396

397+
---
398+
synthetic_source match_only_text as multi-field:
399+
- requires:
400+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
401+
reason: "Source mode configured through index setting"
402+
403+
- do:
404+
indices.create:
405+
index: synthetic_source_test
406+
body:
407+
settings:
408+
index:
409+
mapping.source.mode: synthetic
410+
mappings:
411+
properties:
412+
foo:
413+
type: keyword
414+
fields:
415+
text:
416+
type: match_only_text
417+
418+
- do:
419+
index:
420+
index: synthetic_source_test
421+
id: "1"
422+
refresh: true
423+
body:
424+
foo: "Apache Lucene powers Elasticsearch"
425+
426+
- do:
427+
search:
428+
index: synthetic_source_test
429+
body:
430+
query:
431+
match_phrase:
432+
foo.text: apache lucene
433+
434+
- match: { "hits.total.value": 1 }
435+
- match:
436+
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
437+
438+
---
439+
synthetic_source match_only_text as multi-field with stored keyword as parent:
440+
- requires:
441+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
442+
reason: "Source mode configured through index setting"
443+
444+
- do:
445+
indices.create:
446+
index: synthetic_source_test
447+
body:
448+
settings:
449+
index:
450+
mapping.source.mode: synthetic
451+
mappings:
452+
properties:
453+
foo:
454+
type: keyword
455+
store: true
456+
doc_values: false
457+
fields:
458+
text:
459+
type: match_only_text
460+
461+
- do:
462+
index:
463+
index: synthetic_source_test
464+
id: "1"
465+
refresh: true
466+
body:
467+
foo: "Apache Lucene powers Elasticsearch"
468+
469+
- do:
470+
search:
471+
index: synthetic_source_test
472+
body:
473+
query:
474+
match_phrase:
475+
foo.text: apache lucene
476+
477+
- match: { "hits.total.value": 1 }
478+
- match:
479+
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
397480

481+
---
482+
synthetic_source match_only_text with multi-field:
483+
- requires:
484+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
485+
reason: "Source mode configured through index setting"
486+
487+
- do:
488+
indices.create:
489+
index: synthetic_source_test
490+
body:
491+
settings:
492+
index:
493+
mapping.source.mode: synthetic
494+
mappings:
495+
properties:
496+
foo:
497+
type: match_only_text
498+
fields:
499+
raw:
500+
type: keyword
501+
502+
- do:
503+
index:
504+
index: synthetic_source_test
505+
id: "1"
506+
refresh: true
507+
body:
508+
foo: "Apache Lucene powers Elasticsearch"
509+
510+
- do:
511+
search:
512+
index: synthetic_source_test
513+
body:
514+
query:
515+
match_phrase:
516+
foo: apache lucene
517+
518+
- match: { "hits.total.value": 1 }
519+
- match:
520+
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
521+
522+
---
523+
synthetic_source match_only_text with stored multi-field:
524+
- requires:
525+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
526+
reason: "Source mode configured through index setting"
527+
528+
- do:
529+
indices.create:
530+
index: synthetic_source_test
531+
body:
532+
settings:
533+
index:
534+
mapping.source.mode: synthetic
535+
mappings:
536+
properties:
537+
foo:
538+
type: match_only_text
539+
fields:
540+
raw:
541+
type: keyword
542+
store: true
543+
doc_values: false
544+
545+
- do:
546+
index:
547+
index: synthetic_source_test
548+
id: "1"
549+
refresh: true
550+
body:
551+
foo: "Apache Lucene powers Elasticsearch"
552+
553+
- do:
554+
search:
555+
index: synthetic_source_test
556+
body:
557+
query:
558+
match_phrase:
559+
foo: apache lucene
560+
561+
- match: { "hits.total.value": 1 }
562+
- match:
563+
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"

0 commit comments

Comments
 (0)