Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.elasticsearch.common.text.UTF8DecodingReader;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.FieldDataContext;
Expand Down Expand Up @@ -105,8 +106,15 @@ public static class Builder extends FieldMapper.Builder {

private final TextParams.Analyzers analyzers;
private final boolean withinMultiField;
private final boolean storedFieldInBinaryFormat;

public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean withinMultiField) {
public Builder(
String name,
IndexVersion indexCreatedVersion,
IndexAnalyzers indexAnalyzers,
boolean withinMultiField,
boolean storedFieldInBinaryFormat
) {
super(name);
this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers(
Expand All @@ -116,6 +124,7 @@ public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers ind
indexCreatedVersion
);
this.withinMultiField = withinMultiField;
this.storedFieldInBinaryFormat = storedFieldInBinaryFormat;
}

@Override
Expand All @@ -135,7 +144,8 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) {
context.isSourceSynthetic(),
meta.getValue(),
withinMultiField,
multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField()
multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField(),
storedFieldInBinaryFormat
);
return ft;
}
Expand All @@ -156,7 +166,13 @@ public MatchOnlyTextFieldMapper build(MapperBuilderContext context) {
}

public static final TypeParser PARSER = new TypeParser(
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.isWithinMultiField())
(n, c) -> new Builder(
n,
c.indexVersionCreated(),
c.getIndexAnalyzers(),
c.isWithinMultiField(),
c.indexVersionCreated().onOrAfter(IndexVersions.MATCH_ONLY_TEXT_STORED_AS_BYTES)
)
);

public static class MatchOnlyTextFieldType extends StringFieldType {
Expand All @@ -167,6 +183,7 @@ public static class MatchOnlyTextFieldType extends StringFieldType {

private final boolean withinMultiField;
private final boolean hasCompatibleMultiFields;
private final boolean storedFieldInBinaryFormat;

public MatchOnlyTextFieldType(
String name,
Expand All @@ -175,14 +192,16 @@ public MatchOnlyTextFieldType(
boolean isSyntheticSource,
Map<String, String> meta,
boolean withinMultiField,
boolean hasCompatibleMultiFields
boolean hasCompatibleMultiFields,
boolean storedFieldInBinaryFormat
) {
super(name, true, false, false, tsi, meta);
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
this.textFieldType = new TextFieldType(name, isSyntheticSource);
this.originalName = isSyntheticSource ? name + "._original" : null;
this.withinMultiField = withinMultiField;
this.hasCompatibleMultiFields = hasCompatibleMultiFields;
this.storedFieldInBinaryFormat = storedFieldInBinaryFormat;
}

public MatchOnlyTextFieldType(String name) {
Expand All @@ -193,6 +212,7 @@ public MatchOnlyTextFieldType(String name) {
false,
Collections.emptyMap(),
false,
false,
false
);
}
Expand Down Expand Up @@ -451,7 +471,11 @@ protected BytesRef toBytesRef(Object v) {
@Override
public BlockLoader blockLoader(BlockLoaderContext blContext) {
if (textFieldType.isSyntheticSource()) {
return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource());
if (storedFieldInBinaryFormat) {
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource());
} else {
return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource());
}
}
SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name()));
// MatchOnlyText never has norms, so we have to use the field names field
Expand Down Expand Up @@ -502,6 +526,7 @@ private String storedFieldNameForSyntheticSource() {
private final boolean storeSource;
private final FieldType fieldType;
private final boolean withinMultiField;
private final boolean storedFieldInBinaryFormat;

private MatchOnlyTextFieldMapper(
String simpleName,
Expand All @@ -521,6 +546,7 @@ private MatchOnlyTextFieldMapper(
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
this.storeSource = storeSource;
this.withinMultiField = builder.withinMultiField;
this.storedFieldInBinaryFormat = builder.storedFieldInBinaryFormat;
}

@Override
Expand All @@ -530,7 +556,7 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {

@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField).init(this);
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField, storedFieldInBinaryFormat).init(this);
}

@Override
Expand All @@ -547,8 +573,12 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
context.addToFieldNames(fieldType().name());

if (storeSource) {
final var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length());
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), bytesRef));
if (storedFieldInBinaryFormat) {
final var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length());
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), bytesRef));
} else {
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), value.string()));
}
Comment on lines +584 to +589
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not 100% sure about this. Maybe we want to continue writing in binary format even for older indices? Since we already updated the mapper to handle mixed string and byteref values, we may as well take advantage of the throughput benefits?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this way the logic is simpler? So maybe keep it like this? Indices will rollover at some point and then binary format will be used.

}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ private static Version parseUnchecked(String version) {
public static final IndexVersion UPGRADE_TO_LUCENE_10_2_2 = def(9_030_0_00, Version.LUCENE_10_2_2);
public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT = def(9_031_0_00, Version.LUCENE_10_2_2);
public static final IndexVersion DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW = def(9_032_0_00, Version.LUCENE_10_2_2);
public static final IndexVersion MATCH_ONLY_TEXT_STORED_AS_BYTES = def(9_033_0_00, Version.LUCENE_10_2_2);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should already add the 8.19 version (including logic) to this PR, which should make back porting easier.


/*
* STOP! READ THIS FIRST! No, really,
Expand Down