|
27 | 27 | import org.elasticsearch.index.mapper.Mapper; |
28 | 28 | import org.elasticsearch.index.mapper.MapperService; |
29 | 29 | import org.elasticsearch.index.mapper.SeqNoFieldMapper; |
| 30 | +import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper; |
| 31 | +import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper; |
30 | 32 | import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; |
31 | 33 |
|
| 34 | +import java.util.Collections; |
| 35 | +import java.util.HashSet; |
| 36 | +import java.util.Set; |
| 37 | + |
32 | 38 | /** |
33 | 39 | * Class that encapsulates the logic of figuring out the most appropriate file format for a given field, across postings, doc values and |
34 | 40 | * vectors. |
35 | 41 | */ |
36 | 42 | public class PerFieldFormatSupplier { |
37 | 43 |
|
38 | | - private static final FeatureFlag SEQNO_FIELD_USE_TSDB_DOC_VALUES_FORMAT = new FeatureFlag("seqno_field_use_tsdb_doc_values_format"); |
| 44 | + static final FeatureFlag SEQNO_FIELD_USE_TSDB_DOC_VALUES_FORMAT = new FeatureFlag("seqno_field_use_tsdb_doc_values_format"); |
| 45 | + private static final Set<String> INCLUDE_META_FIELDS; |
| 46 | + |
| 47 | + static { |
| 48 | + // TODO: should we just allow all fields to use tsdb doc values codec? |
| 49 | + // Avoid using tsdb codec for fields like _seq_no, _primary_term. |
| 50 | + // But _tsid and _ts_routing_hash should always use the tsdb codec. |
| 51 | + Set<String> includeMetaField = new HashSet<>(3); |
| 52 | + includeMetaField.add(TimeSeriesIdFieldMapper.NAME); |
| 53 | + includeMetaField.add(TimeSeriesRoutingHashFieldMapper.NAME); |
| 54 | + if (SEQNO_FIELD_USE_TSDB_DOC_VALUES_FORMAT.isEnabled()) { |
| 55 | + includeMetaField.add(SeqNoFieldMapper.NAME); |
| 56 | + } |
| 57 | + // Don't the include _recovery_source_size and _recovery_source fields, since their values can be trimmed away in |
| 58 | + // RecoverySourcePruneMergePolicy, which leads to inconsistencies between merge stats and actual values. |
| 59 | + INCLUDE_META_FIELDS = Collections.unmodifiableSet(includeMetaField); |
| 60 | + } |
39 | 61 |
|
40 | 62 | private static final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat(); |
41 | 63 | private static final KnnVectorsFormat knnVectorsFormat = new Lucene99HnswVectorsFormat(); |
@@ -126,13 +148,7 @@ boolean useTSDBDocValuesFormat(final String field) { |
126 | 148 | } |
127 | 149 |
|
128 | 150 | private boolean excludeFields(String fieldName) { |
129 | | - // TODO: should we just allow all fields to use tsdb doc values codec? |
130 | | - // Avoid using tsdb codec for fields like _seq_no, _primary_term. |
131 | | - // But _tsid and _ts_routing_hash should always use the tsdb codec. |
132 | | - return fieldName.startsWith("_") |
133 | | - && fieldName.equals("_tsid") == false |
134 | | - && fieldName.equals("_ts_routing_hash") == false |
135 | | - && (SEQNO_FIELD_USE_TSDB_DOC_VALUES_FORMAT.isEnabled() && fieldName.equals(SeqNoFieldMapper.NAME) == false); |
| 151 | + return fieldName.startsWith("_") && INCLUDE_META_FIELDS.contains(fieldName) == false; |
136 | 152 | } |
137 | 153 |
|
138 | 154 | private boolean isTimeSeriesModeIndex() { |
|
0 commit comments