Skip to content

Commit 8f5665f

Browse files
committed
Merged with main
1 parent fa1b376 commit 8f5665f

File tree

44 files changed

+1301
-490
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+1301
-490
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 185 additions & 122 deletions
Large diffs are not rendered by default.

modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ public void test_block_loader_uses_stored_fields_for_loading_when_synthetic_sour
243243

244244
// then
245245
// verify that we delegate block loading to the synthetic source delegate
246-
assertThat(blockLoader, Matchers.instanceOf(MatchOnlyTextFieldType.BytesFromMixedStringsBytesRefBlockLoader.class));
246+
assertTrue(blockLoader instanceof MatchOnlyTextFieldType.BytesFromMixedStringsBytesRefBlockLoader);
247247
}
248248

249249
public void test_block_loader_uses_synthetic_source_delegate_when_ignore_above_is_not_set() {
@@ -271,7 +271,7 @@ public void test_block_loader_uses_synthetic_source_delegate_when_ignore_above_i
271271

272272
// then
273273
// verify that we delegate block loading to the synthetic source delegate
274-
assertThat(blockLoader, Matchers.instanceOf(BlockLoader.Delegating.class));
274+
assertTrue(blockLoader instanceof BlockLoader.Delegating);
275275
}
276276

277277
public void test_block_loader_does_not_use_synthetic_source_delegate_when_ignore_above_is_set() {
@@ -319,7 +319,7 @@ public void test_block_loader_does_not_use_synthetic_source_delegate_when_ignore
319319

320320
// then
321321
// verify that we don't delegate anything
322-
assertThat(blockLoader, Matchers.not(Matchers.instanceOf(BlockLoader.Delegating.class)));
322+
assertFalse(blockLoader instanceof BlockLoader.Delegating);
323323
}
324324

325325
public void test_block_loader_does_not_use_synthetic_source_delegate_when_ignore_above_is_set_at_index_level() {
@@ -367,6 +367,6 @@ public void test_block_loader_does_not_use_synthetic_source_delegate_when_ignore
367367

368368
// then
369369
// verify that we don't delegate anything
370-
assertThat(blockLoader, Matchers.not(Matchers.instanceOf(BlockLoader.Delegating.class)));
370+
assertFalse(blockLoader instanceof BlockLoader.Delegating);
371371
}
372372
}

modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,114 @@ synthetic_source match_only_text with ignored multi-field:
650650
- match:
651651
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
652652

653+
---
654+
synthetic_source match_only_text with ignored multi-field and multiple values in the same doc:
655+
- requires:
656+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
657+
reason: "Source mode configured through index setting"
658+
659+
- do:
660+
indices.create:
661+
index: synthetic_source_test
662+
body:
663+
settings:
664+
index:
665+
mapping.source.mode: synthetic
666+
mappings:
667+
properties:
668+
foo:
669+
type: match_only_text
670+
fields:
671+
raw:
672+
type: keyword
673+
ignore_above: 20
674+
675+
- do:
676+
index:
677+
index: synthetic_source_test
678+
id: "1"
679+
refresh: true
680+
body:
681+
foo: [ "This value is too long and will be ignored", "This value is short" ]
682+
683+
- do:
684+
search:
685+
index: synthetic_source_test
686+
body:
687+
query:
688+
match_phrase:
689+
foo: this value is
690+
691+
- match: { "hits.total.value": 1 }
692+
- match: { hits.hits.0._source.foo.0: "This value is too long and will be ignored" }
693+
- match: { hits.hits.0._source.foo.1: "This value is short" }
694+
695+
# now, flip the values around
696+
- do:
697+
index:
698+
index: synthetic_source_test
699+
id: "1"
700+
refresh: true
701+
body:
702+
foo: [ "This value is short", "This value is too long and will be ignored" ]
703+
704+
- do:
705+
search:
706+
index: synthetic_source_test
707+
body:
708+
query:
709+
match_phrase:
710+
foo: this value is
711+
712+
- match: { "hits.total.value": 1 }
713+
# the order will be the same since text fields currently don't take offsets into account
714+
- match: { hits.hits.0._source.foo.0: "This value is too long and will be ignored" }
715+
- match: { hits.hits.0._source.foo.1: "This value is short" }
716+
717+
---
718+
synthetic_source match_only_text with ignored multi-field and multiple values in the same doc and preserved order:
719+
- requires:
720+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
721+
reason: "Source mode configured through index setting"
722+
723+
- do:
724+
indices.create:
725+
index: synthetic_source_test
726+
body:
727+
settings:
728+
index:
729+
mapping.source.mode: synthetic
730+
mappings:
731+
properties:
732+
foo:
733+
type: match_only_text
734+
# this will force the order to be preserved
735+
synthetic_source_keep: arrays
736+
fields:
737+
raw:
738+
type: keyword
739+
ignore_above: 20
740+
741+
- do:
742+
index:
743+
index: synthetic_source_test
744+
id: "1"
745+
refresh: true
746+
body:
747+
foo: [ "This value is short", "This value is too long and will be ignored" ]
748+
749+
- do:
750+
search:
751+
index: synthetic_source_test
752+
body:
753+
query:
754+
match_phrase:
755+
foo: this value is
756+
757+
- match: { "hits.total.value": 1 }
758+
- match: { hits.hits.0._source.foo.0: "This value is short" }
759+
- match: { hits.hits.0._source.foo.1: "This value is too long and will be ignored" }
760+
653761
---
654762
synthetic_source match_only_text with stored multi-field:
655763
- requires:

plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java

Lines changed: 90 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,20 @@
2020
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
2121
import org.apache.lucene.document.Field;
2222
import org.apache.lucene.document.FieldType;
23+
import org.apache.lucene.document.StoredField;
2324
import org.apache.lucene.index.IndexOptions;
2425
import org.elasticsearch.ElasticsearchParseException;
2526
import org.elasticsearch.index.IndexVersion;
2627
import org.elasticsearch.index.analysis.AnalyzerScope;
2728
import org.elasticsearch.index.analysis.IndexAnalyzers;
2829
import org.elasticsearch.index.analysis.NamedAnalyzer;
30+
import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader;
2931
import org.elasticsearch.index.mapper.DocumentParserContext;
3032
import org.elasticsearch.index.mapper.FieldMapper;
3133
import org.elasticsearch.index.mapper.KeywordFieldMapper;
3234
import org.elasticsearch.index.mapper.MapperBuilderContext;
3335
import org.elasticsearch.index.mapper.SourceFieldMapper;
36+
import org.elasticsearch.index.mapper.SourceLoader;
3437
import org.elasticsearch.index.mapper.StringStoredFieldFieldLoader;
3538
import org.elasticsearch.index.mapper.TextFieldMapper;
3639
import org.elasticsearch.index.mapper.TextParams;
@@ -78,7 +81,7 @@ private static NamedAnalyzer wrapAnalyzer(NamedAnalyzer in) {
7881
);
7982
}
8083

81-
public static class Builder extends FieldMapper.Builder {
84+
public static class Builder extends BuilderWithExtendedSyntheticSourceSupport {
8285

8386
final Parameter<SimilarityProvider> similarity = TextParams.similarity(m -> builder(m).similarity.getValue());
8487
final Parameter<String> indexOptions = TextParams.textIndexOptions(m -> builder(m).indexOptions.getValue());
@@ -87,25 +90,31 @@ public static class Builder extends FieldMapper.Builder {
8790

8891
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
8992

90-
private final IndexVersion indexCreatedVersion;
9193
private final TextParams.Analyzers analyzers;
92-
private final boolean isSyntheticSourceEnabled;
9394
private final Parameter<Boolean> store;
9495

95-
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
96-
super(name);
97-
this.indexCreatedVersion = indexCreatedVersion;
96+
public Builder(
97+
String name,
98+
IndexVersion indexCreatedVersion,
99+
IndexAnalyzers indexAnalyzers,
100+
boolean isSyntheticSourceEnabled,
101+
boolean isWithinMultiField
102+
) {
103+
super(name, indexCreatedVersion, isSyntheticSourceEnabled, isWithinMultiField);
98104
this.analyzers = new TextParams.Analyzers(
99105
indexAnalyzers,
100106
m -> builder(m).analyzers.getIndexAnalyzer(),
101107
m -> builder(m).analyzers.positionIncrementGap.getValue(),
102108
indexCreatedVersion
103109
);
104-
this.isSyntheticSourceEnabled = isSyntheticSourceEnabled;
105-
this.store = Parameter.storeParam(
106-
m -> builder(m).store.getValue(),
107-
() -> isSyntheticSourceEnabled && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false
108-
);
110+
this.store = Parameter.storeParam(m -> builder(m).store.getValue(), this::storeDefault);
111+
}
112+
113+
private boolean storeDefault() {
114+
if (TextFieldMapper.keywordMultiFieldsNotStoredWhenIgnoredIndexVersionCheck(indexCreatedVersion())) {
115+
return false;
116+
}
117+
return isSyntheticSourceEnabled() && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
109118
}
110119

111120
@Override
@@ -135,6 +144,7 @@ private AnnotatedTextFieldType buildFieldType(FieldType fieldType, MapperBuilder
135144
store.getValue(),
136145
tsi,
137146
context.isSourceSynthetic(),
147+
isWithinMultiField(),
138148
TextFieldMapper.SyntheticSourceHelper.syntheticSourceDelegate(fieldType.stored(), multiFields),
139149
meta.getValue()
140150
);
@@ -165,7 +175,13 @@ public AnnotatedTextFieldMapper build(MapperBuilderContext context) {
165175
}
166176

167177
public static final TypeParser PARSER = new TypeParser(
168-
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), SourceFieldMapper.isSynthetic(c.getIndexSettings()))
178+
(n, c) -> new Builder(
179+
n,
180+
c.indexVersionCreated(),
181+
c.getIndexAnalyzers(),
182+
SourceFieldMapper.isSynthetic(c.getIndexSettings()),
183+
c.isWithinMultiField()
184+
)
169185
);
170186

171187
/**
@@ -484,15 +500,17 @@ private void emitAnnotation(int firstSpannedTextPosInc, int annotationPosLen) th
484500
}
485501

486502
public static final class AnnotatedTextFieldType extends TextFieldMapper.TextFieldType {
503+
487504
private AnnotatedTextFieldType(
488505
String name,
489506
boolean store,
490507
TextSearchInfo tsi,
491508
boolean isSyntheticSource,
509+
boolean isWithinMultiField,
492510
KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate,
493511
Map<String, String> meta
494512
) {
495-
super(name, true, store, tsi, isSyntheticSource, syntheticSourceDelegate, meta, false, false);
513+
super(name, true, store, tsi, isSyntheticSource, isWithinMultiField, syntheticSourceDelegate, meta, false, false);
496514
}
497515

498516
public AnnotatedTextFieldType(String name, Map<String, String> meta) {
@@ -505,9 +523,9 @@ public String typeName() {
505523
}
506524
}
507525

526+
private final IndexVersion indexCreatedVersion;
508527
private final FieldType fieldType;
509528
private final Builder builder;
510-
511529
private final NamedAnalyzer indexAnalyzer;
512530

513531
protected AnnotatedTextFieldMapper(
@@ -518,10 +536,18 @@ protected AnnotatedTextFieldMapper(
518536
Builder builder
519537
) {
520538
super(simpleName, mappedFieldType, builderParams);
539+
521540
assert fieldType.tokenized();
541+
522542
this.fieldType = freezeAndDeduplicateFieldType(fieldType);
523543
this.builder = builder;
524544
this.indexAnalyzer = wrapAnalyzer(builder.analyzers.getIndexAnalyzer());
545+
this.indexCreatedVersion = builder.indexCreatedVersion();
546+
}
547+
548+
@Override
549+
public AnnotatedTextFieldType fieldType() {
550+
return (AnnotatedTextFieldType) super.fieldType();
525551
}
526552

527553
@Override
@@ -544,6 +570,26 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
544570
context.addToFieldNames(fieldType().name());
545571
}
546572
}
573+
574+
// if synthetic source needs to be supported, yet the field isn't stored, then we need to rely on something else
575+
if (needsToSupportSyntheticSource() && fieldType.stored() == false) {
576+
// if we can rely on the synthetic source delegate for synthetic source, then return
577+
if (fieldType().canUseSyntheticSourceDelegateForSyntheticSource(value)) {
578+
return;
579+
}
580+
581+
// otherwise, we need to store a copy of this value so that synthetic source can load it
582+
final String fieldName = fieldType().syntheticSourceFallbackFieldName();
583+
context.doc().add(new StoredField(fieldName, value));
584+
}
585+
}
586+
587+
private boolean needsToSupportSyntheticSource() {
588+
if (TextFieldMapper.multiFieldsNotStoredByDefault_indexVersionCheck(indexCreatedVersion)) {
589+
// if we're within a multi field, then supporting synthetic source isn't necessary as that's the responsibility of the parent
590+
return fieldType().isSyntheticSourceEnabled() && fieldType().isWithinMultiField() == false;
591+
}
592+
return fieldType().isSyntheticSourceEnabled();
547593
}
548594

549595
@Override
@@ -553,8 +599,13 @@ protected String contentType() {
553599

554600
@Override
555601
public FieldMapper.Builder getMergeBuilder() {
556-
return new Builder(leafName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers, builder.isSyntheticSourceEnabled)
557-
.init(this);
602+
return new Builder(
603+
leafName(),
604+
builder.indexCreatedVersion(),
605+
builder.analyzers.indexAnalyzers,
606+
fieldType().isSyntheticSourceEnabled(),
607+
fieldType().isWithinMultiField()
608+
).init(this);
558609
}
559610

560611
@Override
@@ -568,11 +619,32 @@ protected void write(XContentBuilder b, Object value) throws IOException {
568619
});
569620
}
570621

622+
return new SyntheticSourceSupport.Native(() -> syntheticFieldLoader(fullPath(), leafName()));
623+
}
624+
625+
private SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String fullFieldName, String leafFieldName) {
626+
// since we don't know whether the delegate field loader can be used for synthetic source until parsing, we need to check both this
627+
// field and the delegate
628+
629+
// first field loader - to check whether the field's value was stored under this match_only_text field
630+
final String fieldName = fieldType().syntheticSourceFallbackFieldName();
631+
final var thisFieldLayer = new CompositeSyntheticFieldLoader.StoredFieldLayer(fieldName) {
632+
@Override
633+
protected void writeValue(Object value, XContentBuilder b) throws IOException {
634+
b.value(value.toString());
635+
}
636+
};
637+
638+
final CompositeSyntheticFieldLoader fieldLoader = new CompositeSyntheticFieldLoader(leafFieldName, fullFieldName, thisFieldLayer);
639+
640+
// second loader - to check whether the field's value was stored by a keyword delegate field
571641
var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(this);
572642
if (kwd != null) {
573-
return new SyntheticSourceSupport.Native(() -> kwd.syntheticFieldLoader(fullPath(), leafName()));
643+
// merge the two field loaders into one
644+
var kwdFieldLoader = kwd.syntheticFieldLoader(fullPath(), leafName());
645+
return fieldLoader.mergedWith(kwdFieldLoader);
574646
}
575647

576-
return super.syntheticSourceSupport();
648+
return fieldLoader;
577649
}
578650
}

0 commit comments

Comments
 (0)