Skip to content

Commit 101b69f

Browse files
committed
Abstracted how Text fields use Keyword fields inside of Text fields
1 parent e7abaf6 commit 101b69f

File tree

25 files changed

+619
-423
lines changed

25 files changed

+619
-423
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 149 additions & 115 deletions
Large diffs are not rendered by default.

plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java

Lines changed: 70 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,22 @@
2020
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
2121
import org.apache.lucene.document.Field;
2222
import org.apache.lucene.document.FieldType;
23+
import org.apache.lucene.document.StoredField;
2324
import org.apache.lucene.index.IndexOptions;
25+
import org.apache.lucene.util.BytesRef;
2426
import org.elasticsearch.ElasticsearchParseException;
2527
import org.elasticsearch.index.IndexVersion;
2628
import org.elasticsearch.index.analysis.AnalyzerScope;
2729
import org.elasticsearch.index.analysis.IndexAnalyzers;
2830
import org.elasticsearch.index.analysis.NamedAnalyzer;
31+
import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader;
2932
import org.elasticsearch.index.mapper.DocumentParserContext;
3033
import org.elasticsearch.index.mapper.FieldMapper;
3134
import org.elasticsearch.index.mapper.KeywordFieldMapper;
3235
import org.elasticsearch.index.mapper.MapperBuilderContext;
33-
import org.elasticsearch.index.mapper.SourceFieldMapper;
36+
import org.elasticsearch.index.mapper.SourceLoader;
3437
import org.elasticsearch.index.mapper.StringStoredFieldFieldLoader;
38+
import org.elasticsearch.index.mapper.TextFamilyFieldMapper;
3539
import org.elasticsearch.index.mapper.TextFieldMapper;
3640
import org.elasticsearch.index.mapper.TextParams;
3741
import org.elasticsearch.index.mapper.TextSearchInfo;
@@ -61,7 +65,7 @@
6165
* This code is largely a copy of TextFieldMapper which is less than ideal -
6266
* my attempts to subclass TextFieldMapper failed but we can revisit this.
6367
**/
64-
public class AnnotatedTextFieldMapper extends FieldMapper {
68+
public class AnnotatedTextFieldMapper extends TextFamilyFieldMapper {
6569

6670
public static final String CONTENT_TYPE = "annotated_text";
6771

@@ -84,28 +88,26 @@ public static class Builder extends FieldMapper.Builder {
8488
final Parameter<String> indexOptions = TextParams.textIndexOptions(m -> builder(m).indexOptions.getValue());
8589
final Parameter<Boolean> norms = TextParams.norms(true, m -> builder(m).norms.getValue());
8690
final Parameter<String> termVectors = TextParams.termVectors(m -> builder(m).termVectors.getValue());
91+
private final Parameter<Boolean> store = Parameter.storeParam(m -> builder(m).store.getValue(), false);
8792

8893
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
8994

9095
private final IndexVersion indexCreatedVersion;
9196
private final TextParams.Analyzers analyzers;
92-
private final boolean isSyntheticSourceEnabled;
93-
private final Parameter<Boolean> store;
97+
private final boolean isWithinMultiField;
9498

95-
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
99+
private boolean isSyntheticSourceEnabled;
100+
101+
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean isWithinMultiField) {
96102
super(name);
97103
this.indexCreatedVersion = indexCreatedVersion;
104+
this.isWithinMultiField = isWithinMultiField;
98105
this.analyzers = new TextParams.Analyzers(
99106
indexAnalyzers,
100107
m -> builder(m).analyzers.getIndexAnalyzer(),
101108
m -> builder(m).analyzers.positionIncrementGap.getValue(),
102109
indexCreatedVersion
103110
);
104-
this.isSyntheticSourceEnabled = isSyntheticSourceEnabled;
105-
this.store = Parameter.storeParam(
106-
m -> builder(m).store.getValue(),
107-
() -> isSyntheticSourceEnabled && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false
108-
);
109111
}
110112

111113
@Override
@@ -154,6 +156,7 @@ public AnnotatedTextFieldMapper build(MapperBuilderContext context) {
154156
}
155157
}
156158
BuilderParams builderParams = builderParams(this, context);
159+
this.isSyntheticSourceEnabled = context.isSourceSynthetic();
157160
return new AnnotatedTextFieldMapper(
158161
leafName(),
159162
fieldType,
@@ -165,7 +168,7 @@ public AnnotatedTextFieldMapper build(MapperBuilderContext context) {
165168
}
166169

167170
public static final TypeParser PARSER = new TypeParser(
168-
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), SourceFieldMapper.isSynthetic(c.getIndexSettings()))
171+
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.isWithinMultiField())
169172
);
170173

171174
/**
@@ -505,9 +508,9 @@ public String typeName() {
505508
}
506509
}
507510

511+
private final IndexVersion indexCreatedVersion;
508512
private final FieldType fieldType;
509513
private final Builder builder;
510-
511514
private final NamedAnalyzer indexAnalyzer;
512515

513516
protected AnnotatedTextFieldMapper(
@@ -517,11 +520,26 @@ protected AnnotatedTextFieldMapper(
517520
BuilderParams builderParams,
518521
Builder builder
519522
) {
520-
super(simpleName, mappedFieldType, builderParams);
523+
super(
524+
simpleName,
525+
builder.indexCreatedVersion,
526+
builder.isSyntheticSourceEnabled,
527+
builder.isWithinMultiField,
528+
mappedFieldType,
529+
builderParams
530+
);
531+
521532
assert fieldType.tokenized();
533+
522534
this.fieldType = freezeAndDeduplicateFieldType(fieldType);
523535
this.builder = builder;
524536
this.indexAnalyzer = wrapAnalyzer(builder.analyzers.getIndexAnalyzer());
537+
this.indexCreatedVersion = builder.indexCreatedVersion;
538+
}
539+
540+
@Override
541+
public AnnotatedTextFieldType fieldType() {
542+
return (AnnotatedTextFieldType) super.fieldType();
525543
}
526544

527545
@Override
@@ -531,18 +549,32 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {
531549

532550
@Override
533551
protected void parseCreateField(DocumentParserContext context) throws IOException {
534-
final String value = context.parser().textOrNull();
552+
final var value = context.parser().optimizedTextOrNull();
535553

536554
if (value == null) {
537555
return;
538556
}
539557

540558
if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) {
541-
Field field = new Field(mappedFieldType.name(), value, fieldType);
559+
Field field = new Field(mappedFieldType.name(), value.string(), fieldType);
542560
context.doc().add(field);
543561
if (fieldType.omitNorms()) {
544562
context.addToFieldNames(fieldType().name());
545563
}
564+
} else if (needsToSupportSyntheticSource() && fieldType.stored() == false) {
565+
// if synthetic source needs to be supported, yet the field isn't stored, then we need to rely on something
566+
// else to support synthetic source
567+
568+
// if we can rely on the synthetic source delegate for synthetic source, then return
569+
if (fieldType().canUseSyntheticSourceDelegateForSyntheticSource(value.string())) {
570+
return;
571+
}
572+
573+
// otherwise, we need to store a copy of this value so that synthetic source can load it
574+
var utfBytes = value.bytes();
575+
var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length());
576+
final String fieldName = fieldType().syntheticSourceFallbackFieldName(true);
577+
context.doc().add(new StoredField(fieldName, bytesRef));
546578
}
547579
}
548580

@@ -553,8 +585,7 @@ protected String contentType() {
553585

554586
@Override
555587
public FieldMapper.Builder getMergeBuilder() {
556-
return new Builder(leafName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers, builder.isSyntheticSourceEnabled)
557-
.init(this);
588+
return new Builder(leafName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers, isWithinMultiField).init(this);
558589
}
559590

560591
@Override
@@ -568,11 +599,31 @@ protected void write(XContentBuilder b, Object value) throws IOException {
568599
});
569600
}
570601

602+
return new SyntheticSourceSupport.Native(() -> syntheticFieldLoader(fullPath(), leafName()));
603+
}
604+
605+
private SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String fullFieldName, String leafFieldName) {
606+
// since we don't know whether the delegate field loader can be used for synthetic source until parsing, we
607+
// need to check both this field and the delegate
608+
609+
// first field loader, representing this field
610+
final String fieldName = fieldType().syntheticSourceFallbackFieldName(isSyntheticSourceEnabled);
611+
final var thisFieldLayer = new CompositeSyntheticFieldLoader.StoredFieldLayer(fieldName) {
612+
@Override
613+
protected void writeValue(Object value, XContentBuilder b) throws IOException {
614+
b.value(value.toString());
615+
}
616+
};
617+
618+
final CompositeSyntheticFieldLoader fieldLoader = new CompositeSyntheticFieldLoader(leafFieldName, fullFieldName, thisFieldLayer);
619+
620+
// second loader, representing a delegate field, if one exists
571621
var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(this);
572622
if (kwd != null) {
573-
return new SyntheticSourceSupport.Native(() -> kwd.syntheticFieldLoader(fullPath(), leafName()));
623+
// merge the two field loaders into one
624+
return fieldLoader.mergedWith(kwd.syntheticFieldLoader(fullPath(), leafName()));
574625
}
575626

576-
return super.syntheticSourceSupport();
627+
return fieldLoader;
577628
}
578629
}

server/src/main/java/org/elasticsearch/index/mapper/CompositeSyntheticFieldLoader.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,18 @@ public String fieldName() {
132132
return this.fullFieldName;
133133
}
134134

135+
/**
136+
* Returns a new {@link CompositeSyntheticFieldLoader} that merges this field loader with the given one.
137+
*/
138+
public CompositeSyntheticFieldLoader mergedWith(CompositeSyntheticFieldLoader other) {
139+
if (other == null) {
140+
return new CompositeSyntheticFieldLoader(leafFieldName, fullFieldName, List.copyOf(parts));
141+
}
142+
List<Layer> mergedParts = new ArrayList<>(parts);
143+
mergedParts.addAll(other.parts);
144+
return new CompositeSyntheticFieldLoader(leafFieldName, fullFieldName, mergedParts);
145+
}
146+
135147
/**
136148
* Represents one layer of loading synthetic source values for a field
137149
* as a part of {@link CompositeSyntheticFieldLoader}.

server/src/main/java/org/elasticsearch/index/mapper/DynamicFieldsBuilder.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -335,10 +335,9 @@ public boolean newDynamicStringField(DocumentParserContext context, String name)
335335
);
336336
} else {
337337
return createDynamicField(
338-
new TextFieldMapper.Builder(name, context.indexAnalyzers(), SourceFieldMapper.isSynthetic(context.indexSettings()))
339-
.addMultiField(
340-
new KeywordFieldMapper.Builder("keyword", context.indexSettings().getIndexVersionCreated()).ignoreAbove(256)
341-
),
338+
new TextFieldMapper.Builder(name, context.indexAnalyzers()).addMultiField(
339+
new KeywordFieldMapper.Builder("keyword", context.indexSettings().getIndexVersionCreated(), true).ignoreAbove(256)
340+
),
342341
context
343342
);
344343
}

server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -610,28 +610,13 @@ public static class Builder {
610610

611611
private final Map<String, Function<MapperBuilderContext, FieldMapper>> mapperBuilders = new HashMap<>();
612612

613-
private boolean hasSyntheticSourceCompatibleKeywordField;
614-
615613
public Builder add(FieldMapper.Builder builder) {
616614
mapperBuilders.put(builder.leafName(), builder::build);
617-
618-
if (builder instanceof KeywordFieldMapper.Builder kwd) {
619-
if (kwd.hasNormalizer() == false && (kwd.hasDocValues() || kwd.isStored())) {
620-
hasSyntheticSourceCompatibleKeywordField = true;
621-
}
622-
}
623-
624615
return this;
625616
}
626617

627618
private void add(FieldMapper mapper) {
628619
mapperBuilders.put(mapper.leafName(), context -> mapper);
629-
630-
if (mapper instanceof KeywordFieldMapper kwd) {
631-
if (kwd.hasNormalizer() == false && (kwd.fieldType().hasDocValues() || kwd.fieldType().isStored())) {
632-
hasSyntheticSourceCompatibleKeywordField = true;
633-
}
634-
}
635620
}
636621

637622
private void update(FieldMapper toMerge, MapperMergeContext context) {
@@ -649,10 +634,6 @@ public boolean hasMultiFields() {
649634
return mapperBuilders.isEmpty() == false;
650635
}
651636

652-
public boolean hasSyntheticSourceCompatibleKeywordField() {
653-
return hasSyntheticSourceCompatibleKeywordField;
654-
}
655-
656637
public MultiFields build(Mapper.Builder mainFieldBuilder, MapperBuilderContext context) {
657638
if (mapperBuilders.isEmpty()) {
658639
return empty();

0 commit comments

Comments
 (0)