Skip to content

Commit 1a13d75

Browse files
[9.1] Add index version for match_only_text stored field in binary format (#130363) (#130414)
* Add index version for match_only_text stored field in binary format (#130363) Follow-up to #130049 to gate using the binary format for the stored field in match_only_text fields behind an index version. * Fix index version check
1 parent cc34953 commit 1a13d75

File tree

3 files changed

+56
-10
lines changed

3 files changed

+56
-10
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.elasticsearch.common.text.UTF8DecodingReader;
3636
import org.elasticsearch.common.unit.Fuzziness;
3737
import org.elasticsearch.index.IndexVersion;
38+
import org.elasticsearch.index.IndexVersions;
3839
import org.elasticsearch.index.analysis.IndexAnalyzers;
3940
import org.elasticsearch.index.analysis.NamedAnalyzer;
4041
import org.elasticsearch.index.fielddata.FieldDataContext;
@@ -105,8 +106,15 @@ public static class Builder extends FieldMapper.Builder {
105106

106107
private final TextParams.Analyzers analyzers;
107108
private final boolean withinMultiField;
109+
private final boolean storedFieldInBinaryFormat;
108110

109-
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean withinMultiField) {
111+
public Builder(
112+
String name,
113+
IndexVersion indexCreatedVersion,
114+
IndexAnalyzers indexAnalyzers,
115+
boolean withinMultiField,
116+
boolean storedFieldInBinaryFormat
117+
) {
110118
super(name);
111119
this.indexCreatedVersion = indexCreatedVersion;
112120
this.analyzers = new TextParams.Analyzers(
@@ -116,6 +124,7 @@ public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers ind
116124
indexCreatedVersion
117125
);
118126
this.withinMultiField = withinMultiField;
127+
this.storedFieldInBinaryFormat = storedFieldInBinaryFormat;
119128
}
120129

121130
@Override
@@ -135,7 +144,8 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) {
135144
context.isSourceSynthetic(),
136145
meta.getValue(),
137146
withinMultiField,
138-
multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField()
147+
multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField(),
148+
storedFieldInBinaryFormat
139149
);
140150
return ft;
141151
}
@@ -155,8 +165,22 @@ public MatchOnlyTextFieldMapper build(MapperBuilderContext context) {
155165
}
156166
}
157167

168+
private static boolean isSyntheticSourceStoredFieldInBinaryFormat(IndexVersion indexCreatedVersion) {
169+
return indexCreatedVersion.onOrAfter(IndexVersions.MATCH_ONLY_TEXT_STORED_AS_BYTES)
170+
|| indexCreatedVersion.between(
171+
IndexVersions.MATCH_ONLY_TEXT_STORED_AS_BYTES_BACKPORT_8_X,
172+
IndexVersions.UPGRADE_TO_LUCENE_10_0_0
173+
);
174+
}
175+
158176
public static final TypeParser PARSER = new TypeParser(
159-
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.isWithinMultiField())
177+
(n, c) -> new Builder(
178+
n,
179+
c.indexVersionCreated(),
180+
c.getIndexAnalyzers(),
181+
c.isWithinMultiField(),
182+
isSyntheticSourceStoredFieldInBinaryFormat(c.indexVersionCreated())
183+
)
160184
);
161185

162186
public static class MatchOnlyTextFieldType extends StringFieldType {
@@ -167,6 +191,7 @@ public static class MatchOnlyTextFieldType extends StringFieldType {
167191

168192
private final boolean withinMultiField;
169193
private final boolean hasCompatibleMultiFields;
194+
private final boolean storedFieldInBinaryFormat;
170195

171196
public MatchOnlyTextFieldType(
172197
String name,
@@ -175,14 +200,16 @@ public MatchOnlyTextFieldType(
175200
boolean isSyntheticSource,
176201
Map<String, String> meta,
177202
boolean withinMultiField,
178-
boolean hasCompatibleMultiFields
203+
boolean hasCompatibleMultiFields,
204+
boolean storedFieldInBinaryFormat
179205
) {
180206
super(name, true, false, false, tsi, meta);
181207
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
182208
this.textFieldType = new TextFieldType(name, isSyntheticSource);
183209
this.originalName = isSyntheticSource ? name + "._original" : null;
184210
this.withinMultiField = withinMultiField;
185211
this.hasCompatibleMultiFields = hasCompatibleMultiFields;
212+
this.storedFieldInBinaryFormat = storedFieldInBinaryFormat;
186213
}
187214

188215
public MatchOnlyTextFieldType(String name) {
@@ -193,6 +220,7 @@ public MatchOnlyTextFieldType(String name) {
193220
false,
194221
Collections.emptyMap(),
195222
false,
223+
false,
196224
false
197225
);
198226
}
@@ -451,7 +479,11 @@ protected BytesRef toBytesRef(Object v) {
451479
@Override
452480
public BlockLoader blockLoader(BlockLoaderContext blContext) {
453481
if (textFieldType.isSyntheticSource()) {
454-
return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource());
482+
if (storedFieldInBinaryFormat) {
483+
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource());
484+
} else {
485+
return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource());
486+
}
455487
}
456488
SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name()));
457489
// MatchOnlyText never has norms, so we have to use the field names field
@@ -502,6 +534,7 @@ private String storedFieldNameForSyntheticSource() {
502534
private final boolean storeSource;
503535
private final FieldType fieldType;
504536
private final boolean withinMultiField;
537+
private final boolean storedFieldInBinaryFormat;
505538

506539
private MatchOnlyTextFieldMapper(
507540
String simpleName,
@@ -521,6 +554,7 @@ private MatchOnlyTextFieldMapper(
521554
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
522555
this.storeSource = storeSource;
523556
this.withinMultiField = builder.withinMultiField;
557+
this.storedFieldInBinaryFormat = builder.storedFieldInBinaryFormat;
524558
}
525559

526560
@Override
@@ -530,7 +564,7 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {
530564

531565
@Override
532566
public FieldMapper.Builder getMergeBuilder() {
533-
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField).init(this);
567+
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField, storedFieldInBinaryFormat).init(this);
534568
}
535569

536570
@Override
@@ -547,8 +581,12 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
547581
context.addToFieldNames(fieldType().name());
548582

549583
if (storeSource) {
550-
final var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length());
551-
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), bytesRef));
584+
if (storedFieldInBinaryFormat) {
585+
final var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length());
586+
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), bytesRef));
587+
} else {
588+
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), value.string()));
589+
}
552590
}
553591
}
554592

modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,10 @@
2626
import org.apache.lucene.tests.index.RandomIndexWriter;
2727
import org.apache.lucene.util.BytesRef;
2828
import org.elasticsearch.common.Strings;
29+
import org.elasticsearch.common.settings.Settings;
2930
import org.elasticsearch.core.Tuple;
3031
import org.elasticsearch.index.IndexSettings;
32+
import org.elasticsearch.index.IndexVersions;
3133
import org.elasticsearch.index.mapper.DocumentMapper;
3234
import org.elasticsearch.index.mapper.KeywordFieldMapper;
3335
import org.elasticsearch.index.mapper.LuceneDocument;
@@ -356,10 +358,14 @@ public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() thr
356358
}
357359

358360
public void testLoadSyntheticSourceFromStringOrBytesRef() throws IOException {
359-
DocumentMapper mapper = createSytheticSourceMapperService(mapping(b -> {
361+
var mappings = mapping(b -> {
360362
b.startObject("field1").field("type", "match_only_text").endObject();
361363
b.startObject("field2").field("type", "match_only_text").endObject();
362-
})).documentMapper();
364+
});
365+
var settings = Settings.builder().put("index.mapping.source.mode", "synthetic").build();
366+
DocumentMapper mapper = createMapperService(IndexVersions.UPGRADE_TO_LUCENE_10_2_2, settings, () -> true, mappings)
367+
.documentMapper();
368+
363369
try (Directory directory = newDirectory()) {
364370
RandomIndexWriter iw = indexWriterForSyntheticSource(directory);
365371

server/src/main/java/org/elasticsearch/index/IndexVersions.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ private static Version parseUnchecked(String version) {
145145
public static final IndexVersion MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED_8_19 = def(8_533_0_00, Version.LUCENE_9_12_1);
146146
public static final IndexVersion UPGRADE_TO_LUCENE_9_12_2 = def(8_534_0_00, Version.LUCENE_9_12_2);
147147
public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X = def(8_535_0_00, Version.LUCENE_9_12_2);
148+
public static final IndexVersion MATCH_ONLY_TEXT_STORED_AS_BYTES_BACKPORT_8_X = def(8_536_0_00, Version.LUCENE_9_12_2);
148149
public static final IndexVersion UPGRADE_TO_LUCENE_10_0_0 = def(9_000_0_00, Version.LUCENE_10_0_0);
149150
public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT = def(9_001_0_00, Version.LUCENE_10_0_0);
150151
public static final IndexVersion TIME_BASED_K_ORDERED_DOC_ID = def(9_002_0_00, Version.LUCENE_10_0_0);
@@ -178,6 +179,7 @@ private static Version parseUnchecked(String version) {
178179
public static final IndexVersion UPGRADE_TO_LUCENE_10_2_2 = def(9_030_0_00, Version.LUCENE_10_2_2);
179180
public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT = def(9_031_0_00, Version.LUCENE_10_2_2);
180181
public static final IndexVersion DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW = def(9_032_0_00, Version.LUCENE_10_2_2);
182+
public static final IndexVersion MATCH_ONLY_TEXT_STORED_AS_BYTES = def(9_033_0_00, Version.LUCENE_10_2_2);
181183

182184
/*
183185
* STOP! READ THIS FIRST! No, really,

0 commit comments

Comments
 (0)