Skip to content

Commit 474a405

Browse files
committed
Add index version for match_only_text stored field in binary format
1 parent 53a41db commit 474a405

File tree

2 files changed

+39
-8
lines changed

2 files changed

+39
-8
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.elasticsearch.common.text.UTF8DecodingReader;
3636
import org.elasticsearch.common.unit.Fuzziness;
3737
import org.elasticsearch.index.IndexVersion;
38+
import org.elasticsearch.index.IndexVersions;
3839
import org.elasticsearch.index.analysis.IndexAnalyzers;
3940
import org.elasticsearch.index.analysis.NamedAnalyzer;
4041
import org.elasticsearch.index.fielddata.FieldDataContext;
@@ -105,8 +106,15 @@ public static class Builder extends FieldMapper.Builder {
105106

106107
private final TextParams.Analyzers analyzers;
107108
private final boolean withinMultiField;
109+
private final boolean storedFieldInBinaryFormat;
108110

109-
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean withinMultiField) {
111+
public Builder(
112+
String name,
113+
IndexVersion indexCreatedVersion,
114+
IndexAnalyzers indexAnalyzers,
115+
boolean withinMultiField,
116+
boolean storedFieldInBinaryFormat
117+
) {
110118
super(name);
111119
this.indexCreatedVersion = indexCreatedVersion;
112120
this.analyzers = new TextParams.Analyzers(
@@ -116,6 +124,7 @@ public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers ind
116124
indexCreatedVersion
117125
);
118126
this.withinMultiField = withinMultiField;
127+
this.storedFieldInBinaryFormat = storedFieldInBinaryFormat;
119128
}
120129

121130
@Override
@@ -135,7 +144,8 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) {
135144
context.isSourceSynthetic(),
136145
meta.getValue(),
137146
withinMultiField,
138-
multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField()
147+
multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField(),
148+
storedFieldInBinaryFormat
139149
);
140150
return ft;
141151
}
@@ -156,7 +166,13 @@ public MatchOnlyTextFieldMapper build(MapperBuilderContext context) {
156166
}
157167

158168
public static final TypeParser PARSER = new TypeParser(
159-
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.isWithinMultiField())
169+
(n, c) -> new Builder(
170+
n,
171+
c.indexVersionCreated(),
172+
c.getIndexAnalyzers(),
173+
c.isWithinMultiField(),
174+
c.indexVersionCreated().onOrAfter(IndexVersions.MATCH_ONLY_TEXT_STORED_AS_BYTES)
175+
)
160176
);
161177

162178
public static class MatchOnlyTextFieldType extends StringFieldType {
@@ -167,6 +183,7 @@ public static class MatchOnlyTextFieldType extends StringFieldType {
167183

168184
private final boolean withinMultiField;
169185
private final boolean hasCompatibleMultiFields;
186+
private final boolean storedFieldInBinaryFormat;
170187

171188
public MatchOnlyTextFieldType(
172189
String name,
@@ -175,14 +192,16 @@ public MatchOnlyTextFieldType(
175192
boolean isSyntheticSource,
176193
Map<String, String> meta,
177194
boolean withinMultiField,
178-
boolean hasCompatibleMultiFields
195+
boolean hasCompatibleMultiFields,
196+
boolean storedFieldInBinaryFormat
179197
) {
180198
super(name, true, false, false, tsi, meta);
181199
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
182200
this.textFieldType = new TextFieldType(name, isSyntheticSource);
183201
this.originalName = isSyntheticSource ? name + "._original" : null;
184202
this.withinMultiField = withinMultiField;
185203
this.hasCompatibleMultiFields = hasCompatibleMultiFields;
204+
this.storedFieldInBinaryFormat = storedFieldInBinaryFormat;
186205
}
187206

188207
public MatchOnlyTextFieldType(String name) {
@@ -193,6 +212,7 @@ public MatchOnlyTextFieldType(String name) {
193212
false,
194213
Collections.emptyMap(),
195214
false,
215+
false,
196216
false
197217
);
198218
}
@@ -451,7 +471,11 @@ protected BytesRef toBytesRef(Object v) {
451471
@Override
452472
public BlockLoader blockLoader(BlockLoaderContext blContext) {
453473
if (textFieldType.isSyntheticSource()) {
454-
return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource());
474+
if (storedFieldInBinaryFormat) {
475+
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource());
476+
} else {
477+
return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource());
478+
}
455479
}
456480
SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name()));
457481
// MatchOnlyText never has norms, so we have to use the field names field
@@ -502,6 +526,7 @@ private String storedFieldNameForSyntheticSource() {
502526
private final boolean storeSource;
503527
private final FieldType fieldType;
504528
private final boolean withinMultiField;
529+
private final boolean storedFieldInBinaryFormat;
505530

506531
private MatchOnlyTextFieldMapper(
507532
String simpleName,
@@ -521,6 +546,7 @@ private MatchOnlyTextFieldMapper(
521546
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
522547
this.storeSource = storeSource;
523548
this.withinMultiField = builder.withinMultiField;
549+
this.storedFieldInBinaryFormat = builder.storedFieldInBinaryFormat;
524550
}
525551

526552
@Override
@@ -530,7 +556,7 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {
530556

531557
@Override
532558
public FieldMapper.Builder getMergeBuilder() {
533-
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField).init(this);
559+
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField, storedFieldInBinaryFormat).init(this);
534560
}
535561

536562
@Override
@@ -547,8 +573,12 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
547573
context.addToFieldNames(fieldType().name());
548574

549575
if (storeSource) {
550-
final var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length());
551-
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), bytesRef));
576+
if (storedFieldInBinaryFormat) {
577+
final var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length());
578+
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), bytesRef));
579+
} else {
580+
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), value.string()));
581+
}
552582
}
553583
}
554584

server/src/main/java/org/elasticsearch/index/IndexVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ private static Version parseUnchecked(String version) {
178178
public static final IndexVersion UPGRADE_TO_LUCENE_10_2_2 = def(9_030_0_00, Version.LUCENE_10_2_2);
179179
public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT = def(9_031_0_00, Version.LUCENE_10_2_2);
180180
public static final IndexVersion DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW = def(9_032_0_00, Version.LUCENE_10_2_2);
181+
public static final IndexVersion MATCH_ONLY_TEXT_STORED_AS_BYTES = def(9_033_0_00, Version.LUCENE_10_2_2);
181182

182183
/*
183184
* STOP! READ THIS FIRST! No, really,

0 commit comments

Comments
 (0)