Skip to content

Commit 1750e47

Browse files
committed
Fix match_only_text bugs if defined as multi-field (elastic#130188)
* Fix match_only_text bugs if defined as multi-field Bugs starting to occur when elastic#129126 was merged. Closes elastic#129737
1 parent f8b1241 commit 1750e47

File tree

3 files changed

+337
-23
lines changed

3 files changed

+337
-23
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 90 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.apache.lucene.document.Field;
1515
import org.apache.lucene.document.FieldType;
1616
import org.apache.lucene.document.StoredField;
17+
import org.apache.lucene.index.DocValues;
1718
import org.apache.lucene.index.IndexOptions;
1819
import org.apache.lucene.index.LeafReaderContext;
1920
import org.apache.lucene.index.Term;
@@ -40,7 +41,6 @@
4041
import org.elasticsearch.index.fielddata.IndexFieldData;
4142
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
4243
import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
43-
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
4444
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
4545
import org.elasticsearch.index.mapper.BlockLoader;
4646
import org.elasticsearch.index.mapper.BlockSourceReader;
@@ -133,7 +133,9 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) {
133133
tsi,
134134
indexAnalyzer,
135135
context.isSourceSynthetic(),
136-
meta.getValue()
136+
meta.getValue(),
137+
withinMultiField,
138+
multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField()
137139
);
138140
return ft;
139141
}
@@ -163,17 +165,24 @@ public static class MatchOnlyTextFieldType extends StringFieldType {
163165
private final TextFieldType textFieldType;
164166
private final String originalName;
165167

168+
private final boolean withinMultiField;
169+
private final boolean hasCompatibleMultiFields;
170+
166171
public MatchOnlyTextFieldType(
167172
String name,
168173
TextSearchInfo tsi,
169174
Analyzer indexAnalyzer,
170175
boolean isSyntheticSource,
171-
Map<String, String> meta
176+
Map<String, String> meta,
177+
boolean withinMultiField,
178+
boolean hasCompatibleMultiFields
172179
) {
173180
super(name, true, false, false, tsi, meta);
174181
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
175182
this.textFieldType = new TextFieldType(name, isSyntheticSource);
176183
this.originalName = isSyntheticSource ? name + "._original" : null;
184+
this.withinMultiField = withinMultiField;
185+
this.hasCompatibleMultiFields = hasCompatibleMultiFields;
177186
}
178187

179188
public MatchOnlyTextFieldType(String name) {
@@ -182,7 +191,9 @@ public MatchOnlyTextFieldType(String name) {
182191
new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
183192
Lucene.STANDARD_ANALYZER,
184193
false,
185-
Collections.emptyMap()
194+
Collections.emptyMap(),
195+
false,
196+
false
186197
);
187198
}
188199

@@ -209,16 +220,34 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
209220
"Field [" + name() + "] of type [" + CONTENT_TYPE + "] cannot run positional queries since [_source] is disabled."
210221
);
211222
}
212-
if (searchExecutionContext.isSourceSynthetic()) {
223+
if (searchExecutionContext.isSourceSynthetic() && withinMultiField) {
224+
String parentField = searchExecutionContext.parentPath(name());
225+
var parent = searchExecutionContext.lookup().fieldType(parentField);
226+
if (parent.isStored()) {
227+
return storedFieldFetcher(parentField);
228+
} else if (parent.hasDocValues()) {
229+
return docValuesFieldFetcher(parentField);
230+
} else {
231+
assert false : "parent field should either be stored or have doc values";
232+
}
233+
} else if (searchExecutionContext.isSourceSynthetic() && hasCompatibleMultiFields) {
234+
var mapper = (MatchOnlyTextFieldMapper) searchExecutionContext.getMappingLookup().getMapper(name());
235+
var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(mapper);
236+
if (kwd != null) {
237+
var fieldType = kwd.fieldType();
238+
if (fieldType.isStored()) {
239+
return storedFieldFetcher(fieldType.name());
240+
} else if (fieldType.hasDocValues()) {
241+
return docValuesFieldFetcher(fieldType.name());
242+
} else {
243+
assert false : "multi field should either be stored or have doc values";
244+
}
245+
} else {
246+
assert false : "multi field of type keyword should exist";
247+
}
248+
} else if (searchExecutionContext.isSourceSynthetic()) {
213249
String name = storedFieldNameForSyntheticSource();
214-
StoredFieldLoader loader = StoredFieldLoader.create(false, Set.of(name));
215-
return context -> {
216-
LeafStoredFieldLoader leafLoader = loader.getLoader(context, null);
217-
return docId -> {
218-
leafLoader.advanceTo(docId);
219-
return leafLoader.storedFields().get(name);
220-
};
221-
};
250+
return storedFieldFetcher(name);
222251
}
223252
return context -> {
224253
ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null);
@@ -234,6 +263,35 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
234263
};
235264
}
236265

266+
private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher(String name) {
267+
return context -> {
268+
var sortedDocValues = DocValues.getSortedSet(context.reader(), name);
269+
return docId -> {
270+
if (sortedDocValues.advanceExact(docId)) {
271+
var values = new ArrayList<>(sortedDocValues.docValueCount());
272+
for (int i = 0; i < sortedDocValues.docValueCount(); i++) {
273+
long ord = sortedDocValues.nextOrd();
274+
values.add(sortedDocValues.lookupOrd(ord).utf8ToString());
275+
}
276+
return values;
277+
} else {
278+
return List.of();
279+
}
280+
};
281+
};
282+
}
283+
284+
private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> storedFieldFetcher(String name) {
285+
var loader = StoredFieldLoader.create(false, Set.of(name));
286+
return context -> {
287+
var leafLoader = loader.getLoader(context, null);
288+
return docId -> {
289+
leafLoader.advanceTo(docId);
290+
return leafLoader.storedFields().get(name);
291+
};
292+
};
293+
}
294+
237295
private Query toQuery(Query query, SearchExecutionContext searchExecutionContext) {
238296
return new ConstantScoreQuery(
239297
new SourceConfirmedTextQuery(query, getValueFetcherProvider(searchExecutionContext), indexAnalyzer)
@@ -506,18 +564,27 @@ public MatchOnlyTextFieldType fieldType() {
506564

507565
@Override
508566
protected SyntheticSourceSupport syntheticSourceSupport() {
509-
return new SyntheticSourceSupport.Native(
510-
() -> new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), fieldType().name(), leafName()) {
511-
@Override
512-
protected void write(XContentBuilder b, Object value) throws IOException {
513-
if (value instanceof BytesRef valueBytes) {
514-
b.value(valueBytes.utf8ToString());
515-
} else {
516-
assert value instanceof String;
517-
b.value(value.toString());
567+
if (storeSource) {
568+
return new SyntheticSourceSupport.Native(
569+
() -> new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), fieldType().name(), leafName()) {
570+
@Override
571+
protected void write(XContentBuilder b, Object value) throws IOException {
572+
if (value instanceof BytesRef valueBytes) {
573+
b.value(valueBytes.utf8ToString());
574+
} else {
575+
assert value instanceof String;
576+
b.value(value.toString());
577+
}
518578
}
519579
}
580+
);
581+
} else {
582+
var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(this);
583+
if (kwd != null) {
584+
return new SyntheticSourceSupport.Native(() -> kwd.syntheticFieldLoader(fullPath(), leafName()));
520585
}
521-
);
586+
assert false : "there should be a suite field mapper with native synthetic source support";
587+
return super.syntheticSourceSupport();
588+
}
522589
}
523590
}

modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,4 +394,170 @@ synthetic_source with copy_to:
394394
- match:
395395
hits.hits.0.fields.copy.0: "Apache Lucene powers Elasticsearch"
396396

397+
---
398+
synthetic_source match_only_text as multi-field:
399+
- requires:
400+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
401+
reason: "Source mode configured through index setting"
402+
403+
- do:
404+
indices.create:
405+
index: synthetic_source_test
406+
body:
407+
settings:
408+
index:
409+
mapping.source.mode: synthetic
410+
mappings:
411+
properties:
412+
foo:
413+
type: keyword
414+
fields:
415+
text:
416+
type: match_only_text
417+
418+
- do:
419+
index:
420+
index: synthetic_source_test
421+
id: "1"
422+
refresh: true
423+
body:
424+
foo: "Apache Lucene powers Elasticsearch"
425+
426+
- do:
427+
search:
428+
index: synthetic_source_test
429+
body:
430+
query:
431+
match_phrase:
432+
foo.text: apache lucene
433+
434+
- match: { "hits.total.value": 1 }
435+
- match:
436+
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
437+
438+
---
439+
synthetic_source match_only_text as multi-field with stored keyword as parent:
440+
- requires:
441+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
442+
reason: "Source mode configured through index setting"
443+
444+
- do:
445+
indices.create:
446+
index: synthetic_source_test
447+
body:
448+
settings:
449+
index:
450+
mapping.source.mode: synthetic
451+
mappings:
452+
properties:
453+
foo:
454+
type: keyword
455+
store: true
456+
doc_values: false
457+
fields:
458+
text:
459+
type: match_only_text
460+
461+
- do:
462+
index:
463+
index: synthetic_source_test
464+
id: "1"
465+
refresh: true
466+
body:
467+
foo: "Apache Lucene powers Elasticsearch"
468+
469+
- do:
470+
search:
471+
index: synthetic_source_test
472+
body:
473+
query:
474+
match_phrase:
475+
foo.text: apache lucene
476+
477+
- match: { "hits.total.value": 1 }
478+
- match:
479+
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
397480

481+
---
482+
synthetic_source match_only_text with multi-field:
483+
- requires:
484+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
485+
reason: "Source mode configured through index setting"
486+
487+
- do:
488+
indices.create:
489+
index: synthetic_source_test
490+
body:
491+
settings:
492+
index:
493+
mapping.source.mode: synthetic
494+
mappings:
495+
properties:
496+
foo:
497+
type: match_only_text
498+
fields:
499+
raw:
500+
type: keyword
501+
502+
- do:
503+
index:
504+
index: synthetic_source_test
505+
id: "1"
506+
refresh: true
507+
body:
508+
foo: "Apache Lucene powers Elasticsearch"
509+
510+
- do:
511+
search:
512+
index: synthetic_source_test
513+
body:
514+
query:
515+
match_phrase:
516+
foo: apache lucene
517+
518+
- match: { "hits.total.value": 1 }
519+
- match:
520+
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
521+
522+
---
523+
synthetic_source match_only_text with stored multi-field:
524+
- requires:
525+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
526+
reason: "Source mode configured through index setting"
527+
528+
- do:
529+
indices.create:
530+
index: synthetic_source_test
531+
body:
532+
settings:
533+
index:
534+
mapping.source.mode: synthetic
535+
mappings:
536+
properties:
537+
foo:
538+
type: match_only_text
539+
fields:
540+
raw:
541+
type: keyword
542+
store: true
543+
doc_values: false
544+
545+
- do:
546+
index:
547+
index: synthetic_source_test
548+
id: "1"
549+
refresh: true
550+
body:
551+
foo: "Apache Lucene powers Elasticsearch"
552+
553+
- do:
554+
search:
555+
index: synthetic_source_test
556+
body:
557+
query:
558+
match_phrase:
559+
foo: apache lucene
560+
561+
- match: { "hits.total.value": 1 }
562+
- match:
563+
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"

0 commit comments

Comments
 (0)