Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
e562e8c
Change document _id format for time series datastreams
tlrx Oct 24, 2025
9a9df49
fix bug
tlrx Oct 27, 2025
39e6cd4
Merge branch 'main' into 2025/10/24/new-id-format
tlrx Oct 28, 2025
f6234c3
fix remaining bug
tlrx Oct 28, 2025
51d66a3
fix sorting
tlrx Oct 28, 2025
6fd8a69
Merge branch 'main' into 2025/10/24/new-id-format
tlrx Oct 28, 2025
9babebf
Merge branch 'main' into 2025/10/24/new-id-format
tlrx Oct 28, 2025
8cfa2fa
fix compiling and tests
tlrx Oct 28, 2025
46cc58b
Merge branch 'main' into 2025/10/24/new-id-format
tlrx Oct 29, 2025
d885dda
fix sort config
tlrx Oct 29, 2025
b22f59c
fix sort config
tlrx Oct 29, 2025
ab2be04
Merge branch 'main' into 2025/10/24/new-id-format
tlrx Oct 29, 2025
63911a7
Merge branch 'main' into 2025/10/24/new-id-format
tlrx Oct 30, 2025
b50b64c
fix merge
tlrx Oct 30, 2025
933e280
compute useTimeSeriesSyntheticId in metadata
tlrx Oct 30, 2025
ad94e5d
Merge branch 'main' into 2025/10/24/new-id-format
tlrx Nov 3, 2025
4662f94
remove update
tlrx Nov 3, 2025
b3428c7
startDocID >= 0
tlrx Nov 3, 2025
3f81d60
get from searcher
tlrx Nov 3, 2025
15a1e4c
remove comment
tlrx Nov 3, 2025
d71316d
timestamp
tlrx Nov 3, 2025
96eb36a
Update docs/changelog/137274.yaml
tlrx Nov 3, 2025
136a267
ensure no postings
tlrx Nov 3, 2025
dda5531
Merge branch 'main' into 2025/10/24/new-id-format
tlrx Nov 3, 2025
3b22c46
remove sort
tlrx Nov 3, 2025
6a4a9e1
Merge branch 'main' into 2025/10/24/new-id-format
tlrx Nov 4, 2025
546e23b
remove compound
tlrx Nov 4, 2025
5529733
Merge branch 'main' into 2025/10/24/new-id-format
tlrx Nov 4, 2025
7e82813
Merge branch 'main' into 2025/10/24/new-id-format
tlrx Nov 4, 2025
d731f9f
Merge branch 'main' into 2025/10/24/new-id-format
tlrx Nov 5, 2025
eb05d57
Merge branch '2025/10/24/new-id-format' of github.com:tlrx/elasticsea…
tlrx Nov 5, 2025
3655dc3
feedback
tlrx Nov 5, 2025
59687e8
Merge branch 'main' into 2025/10/24/new-id-format
tlrx Nov 5, 2025
608ff67
fix setting registration
tlrx Nov 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,8 @@ public Iterator<Setting<?>> settings() {
@Nullable
private final IndexReshardingMetadata reshardingMetadata;

private final boolean useTimeSeriesSyntheticId;

private IndexMetadata(
final Index index,
final long version,
Expand Down Expand Up @@ -754,7 +756,8 @@ private IndexMetadata(
@Nullable final IndexMetadataStats stats,
@Nullable final Double writeLoadForecast,
@Nullable Long shardSizeInBytesForecast,
@Nullable IndexReshardingMetadata reshardingMetadata
@Nullable IndexReshardingMetadata reshardingMetadata,
final boolean useTimeSeriesSyntheticId
) {
this.index = index;
this.version = version;
Expand Down Expand Up @@ -815,6 +818,7 @@ private IndexMetadata(
this.shardSizeInBytesForecast = shardSizeInBytesForecast;
assert numberOfShards * routingFactor == routingNumShards : routingNumShards + " must be a multiple of " + numberOfShards;
this.reshardingMetadata = reshardingMetadata;
this.useTimeSeriesSyntheticId = useTimeSeriesSyntheticId;
}

IndexMetadata withMappingMetadata(MappingMetadata mapping) {
Expand Down Expand Up @@ -870,7 +874,8 @@ IndexMetadata withMappingMetadata(MappingMetadata mapping) {
this.stats,
this.writeLoadForecast,
this.shardSizeInBytesForecast,
this.reshardingMetadata
this.reshardingMetadata,
this.useTimeSeriesSyntheticId
);
}

Expand Down Expand Up @@ -933,7 +938,8 @@ public IndexMetadata withInSyncAllocationIds(int shardId, Set<String> inSyncSet)
this.stats,
this.writeLoadForecast,
this.shardSizeInBytesForecast,
this.reshardingMetadata
this.reshardingMetadata,
this.useTimeSeriesSyntheticId
);
}

Expand Down Expand Up @@ -1004,7 +1010,8 @@ public IndexMetadata withSetPrimaryTerm(int shardId, long primaryTerm) {
this.stats,
this.writeLoadForecast,
this.shardSizeInBytesForecast,
this.reshardingMetadata
this.reshardingMetadata,
this.useTimeSeriesSyntheticId
);
}

Expand Down Expand Up @@ -1066,7 +1073,8 @@ public IndexMetadata withTimestampRanges(IndexLongFieldRange timestampRange, Ind
this.stats,
this.writeLoadForecast,
this.shardSizeInBytesForecast,
this.reshardingMetadata
this.reshardingMetadata,
this.useTimeSeriesSyntheticId
);
}

Expand Down Expand Up @@ -1123,7 +1131,8 @@ public IndexMetadata withIncrementedVersion() {
this.stats,
this.writeLoadForecast,
this.shardSizeInBytesForecast,
this.reshardingMetadata
this.reshardingMetadata,
this.useTimeSeriesSyntheticId
);
}

Expand Down Expand Up @@ -1314,6 +1323,13 @@ public Instant getTimeSeriesEnd() {
return timeSeriesEnd;
}

/**
* @return whether the index is a time-series index that uses synthetic ids or not.
*/
public boolean useTimeSeriesSyntheticId() {
return useTimeSeriesSyntheticId;
}

/**
* Return the concrete mapping for this index or {@code null} if this index has no mappings at all.
*/
Expand Down Expand Up @@ -2497,6 +2513,14 @@ IndexMetadata build(boolean repair) {
String indexModeString = settings.get(IndexSettings.MODE.getKey());
final IndexMode indexMode = indexModeString != null ? IndexMode.fromString(indexModeString.toLowerCase(Locale.ROOT)) : null;
final boolean isTsdb = indexMode == IndexMode.TIME_SERIES;
boolean useTimeSeriesSyntheticId = false;
if (isTsdb && indexCreatedVersion.onOrAfter(IndexVersions.TIME_SERIES_USE_SYNTHETIC_ID)) {
var setting = settings.get(IndexSettings.USE_SYNTHETIC_ID.getKey());
if (setting != null && setting.equalsIgnoreCase(Boolean.TRUE.toString())) {
assert IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG;
useTimeSeriesSyntheticId = true;
}
}
return new IndexMetadata(
new Index(index, uuid),
version,
Expand Down Expand Up @@ -2546,7 +2570,8 @@ IndexMetadata build(boolean repair) {
stats,
indexWriteLoadForecast,
shardSizeInBytesForecast,
reshardingMetadata
reshardingMetadata,
useTimeSeriesSyntheticId
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper;
import org.elasticsearch.index.mapper.TsidExtractingIdFieldMapper;
import org.elasticsearch.transport.Transports;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentParserConfiguration;
Expand Down Expand Up @@ -321,6 +322,7 @@ public abstract static class ExtractFromSource extends IndexRouting {
protected final XContentParserConfiguration parserConfig;
private final IndexMode indexMode;
private final boolean trackTimeSeriesRoutingHash;
private final boolean useTimeSeriesSyntheticId;
private final boolean addIdWithRoutingHash;
private int hash = Integer.MAX_VALUE;

Expand All @@ -333,6 +335,7 @@ public abstract static class ExtractFromSource extends IndexRouting {
assert indexMode != null : "Index mode must be set for ExtractFromSource routing";
this.trackTimeSeriesRoutingHash = indexMode == IndexMode.TIME_SERIES
&& metadata.getCreationVersion().onOrAfter(IndexVersions.TIME_SERIES_ROUTING_HASH_IN_ID);
this.useTimeSeriesSyntheticId = metadata.useTimeSeriesSyntheticId();
addIdWithRoutingHash = indexMode == IndexMode.LOGSDB;
this.parserConfig = XContentParserConfiguration.EMPTY.withFiltering(null, Set.copyOf(includePaths), null, true);
}
Expand Down Expand Up @@ -417,10 +420,19 @@ private int idToHash(String id) {
if (idBytes.length < 4) {
throw new ResourceNotFoundException("invalid id [{}] for index [{}] in " + indexMode.getName() + " mode", id, indexName);
}
// For TSDB, the hash is stored as the id prefix.
// For LogsDB with routing on sort fields, the routing hash is stored in the range[id.length - 9, id.length - 5] of the id,
// see IndexRequest#autoGenerateTimeBasedId.
return hashToShardId(ByteUtils.readIntLE(idBytes, addIdWithRoutingHash ? idBytes.length - 9 : 0));
int hash;
if (addIdWithRoutingHash) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I wonder if we would see some performance degradation from all this new branching? I won't expect it to be important but I wanted to mention it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this specific branch is OK, but your comment made me think about how the IndexRouting is instanciated, and we don't want to read the USE_SYNTHETIC_ID setting for routing every operation.

So I pushed 933e280 to compute the useTimeSeriesSyntheticId flag once and for all when IndexMetadata are built, and uses this flag for routing operations.

// For LogsDB with routing on sort fields, the routing hash is stored in the range[id.length - 9, id.length - 5] of the id,
// see IndexRequest#autoGenerateTimeBasedId.
hash = ByteUtils.readIntLE(idBytes, idBytes.length - 9);
} else if (useTimeSeriesSyntheticId) {
// For TSDB with synthetic ids, the hash is stored as the id suffix.
hash = TsidExtractingIdFieldMapper.extractRoutingHashFromSyntheticId(new BytesRef(idBytes));
} else {
// For TSDB, the hash is stored as the id prefix.
hash = ByteUtils.readIntLE(idBytes, 0);
}
return hashToShardId(hash);
}

@Override
Expand Down Expand Up @@ -510,7 +522,6 @@ public static class ForIndexDimensions extends ExtractFromSource {

@Override
protected int hashSource(IndexRequest indexRequest) {
// System.out.println("hashSource for tsid");
BytesRef tsid = indexRequest.tsid();
if (tsid == null) {
tsid = buildTsid(indexRequest.getContentType(), indexRequest.indexSource().bytes());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CloseableThreadLocal;
import org.elasticsearch.common.util.ByteUtils;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.core.Assertions;
import org.elasticsearch.index.mapper.TsidExtractingIdFieldMapper;

import java.io.IOException;
import java.util.Base64;
Expand Down Expand Up @@ -153,22 +153,30 @@ public static DocIdAndVersion timeSeriesLoadDocIdAndVersion(IndexReader reader,
* This allows this method to know whether there is no document with the specified id without loading the docid for
* the specified id.
*
* @param reader The reader load docid, version and seqno from.
* @param uid The term that describes the uid of the document to load docid, version and seqno for.
* @param id The id that contains the encoded timestamp. The timestamp is used to skip checking the id for entire segments.
* @param loadSeqNo Whether to load sequence number from _seq_no doc values field.
* @param reader The reader load docid, version and seqno from.
* @param uid The term that describes the uid of the document to load docid, version and seqno for.
* @param id The id that contains the encoded timestamp. The timestamp is used to skip checking the id for entire segments.
* @param loadSeqNo Whether to load sequence number from _seq_no doc values field.
* @param useSyntheticId Whether the id is a synthetic (true) or standard (false ) document id.
* @return the internal doc ID and version for the specified term from the specified reader or
* returning <code>null</code> if no document was found for the specified id
* @throws IOException In case of an i/o related failure
*/
public static DocIdAndVersion timeSeriesLoadDocIdAndVersion(IndexReader reader, BytesRef uid, String id, boolean loadSeqNo)
throws IOException {
byte[] idAsBytes = Base64.getUrlDecoder().decode(id);
assert idAsBytes.length == 20;
// id format: [4 bytes (basic hash routing fields), 8 bytes prefix of 128 murmurhash dimension fields, 8 bytes
// @timestamp)
long timestamp = ByteUtils.readLongBE(idAsBytes, 12);

public static DocIdAndVersion timeSeriesLoadDocIdAndVersion(
IndexReader reader,
BytesRef uid,
String id,
boolean loadSeqNo,
boolean useSyntheticId
) throws IOException {
final long timestamp;
if (useSyntheticId) {
assert uid.equals(new BytesRef(Base64.getUrlDecoder().decode(id)));
timestamp = TsidExtractingIdFieldMapper.extractTimestampFromSyntheticId(uid);
} else {
byte[] idAsBytes = Base64.getUrlDecoder().decode(id);
timestamp = TsidExtractingIdFieldMapper.extractTimestampFromId(idAsBytes);
}
PerThreadIDVersionAndSeqNoLookup[] lookups = getLookupState(reader, true);
List<LeafReaderContext> leaves = reader.leaves();
// iterate in default order, the segments should be sorted by DataStream#TIMESERIES_LEAF_READERS_SORTER
Expand Down
45 changes: 27 additions & 18 deletions server/src/main/java/org/elasticsearch/index/IndexMode.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,13 @@
import java.io.IOException;
import java.time.Instant;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.function.BooleanSupplier;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static java.util.stream.Collectors.toSet;

/**
* "Mode" that controls which behaviors and settings an index supports.
Expand Down Expand Up @@ -141,7 +139,16 @@ void validateWithOtherSettings(Map<Setting<?>, Object> settings) {
throw new IllegalArgumentException(error(IndexMetadata.INDEX_ROUTING_PARTITION_SIZE_SETTING));
}

var settingsWithIndexMode = Settings.builder().put(IndexSettings.MODE.getKey(), getName()).build();
Settings settingsWithIndexMode;
if (IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG) {
settingsWithIndexMode = Settings.builder()
.put(IndexSettings.MODE.getKey(), getName())
// Default values of some index sort settings depend of the feature flag and USE_SYNTHETIC_ID setting
.put(IndexSettings.USE_SYNTHETIC_ID.getKey(), (Boolean) settings.get(IndexSettings.USE_SYNTHETIC_ID))
.build();
} else {
settingsWithIndexMode = Settings.builder().put(IndexSettings.MODE.getKey(), getName()).build();
}

for (Setting<?> unsupported : TIME_SERIES_UNSUPPORTED) {
if (false == Objects.equals(unsupported.getDefault(settingsWithIndexMode), settings.get(unsupported))) {
Expand Down Expand Up @@ -460,20 +467,22 @@ private static CompressedXContent createDefaultMapping(boolean includeHostName)
IndexSortConfig.INDEX_SORT_MISSING_SETTING
);

static final List<Setting<?>> VALIDATE_WITH_SETTINGS = List.copyOf(
Stream.concat(
Stream.of(
IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING,
IndexMetadata.INDEX_ROUTING_PARTITION_SIZE_SETTING,
IndexMetadata.INDEX_ROUTING_PATH,
IndexMetadata.INDEX_DIMENSIONS,
IndexSettings.LOGSDB_ROUTE_ON_SORT_FIELDS,
IndexSettings.TIME_SERIES_START_TIME,
IndexSettings.TIME_SERIES_END_TIME
),
TIME_SERIES_UNSUPPORTED.stream()
).collect(toSet())
);
static final List<Setting<?>> VALIDATE_WITH_SETTINGS;
static {
var settings = new HashSet<Setting<?>>();
settings.add(IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING);
settings.add(IndexMetadata.INDEX_ROUTING_PARTITION_SIZE_SETTING);
settings.add(IndexMetadata.INDEX_ROUTING_PATH);
settings.add(IndexMetadata.INDEX_DIMENSIONS);
settings.add(IndexSettings.LOGSDB_ROUTE_ON_SORT_FIELDS);
settings.add(IndexSettings.TIME_SERIES_START_TIME);
settings.add(IndexSettings.TIME_SERIES_END_TIME);
if (IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG) {
settings.add(IndexSettings.USE_SYNTHETIC_ID);
}
settings.addAll(TIME_SERIES_UNSUPPORTED);
VALIDATE_WITH_SETTINGS = List.copyOf(settings);
}

private final String name;

Expand Down
44 changes: 38 additions & 6 deletions server/src/main/java/org/elasticsearch/index/IndexSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,19 @@ public boolean isES87TSDBCodecEnabled() {
false,
new Setting.Validator<>() {
@Override
public void validate(Boolean value) {}
public void validate(Boolean enabled) {
if (enabled) {
if (TSDB_SYNTHETIC_ID_FEATURE_FLAG == false) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"The setting [%s] is only permitted when the feature flag is enabled.",
USE_SYNTHETIC_ID.getKey()
)
);
}
}
}

@Override
public void validate(Boolean enabled, Map<Setting<?>, Object> settings) {
Expand Down Expand Up @@ -983,7 +995,7 @@ private void setRetentionLeaseMillis(final TimeValue retentionLease) {
private final boolean recoverySourceEnabled;
private final boolean recoverySourceSyntheticEnabled;
private final boolean useDocValuesSkipper;
private final boolean tsdbSyntheticId;
private final boolean useTimeSeriesSyntheticId;

/**
* The maximum number of refresh listeners allows on this shard.
Expand Down Expand Up @@ -1170,8 +1182,28 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
&& scopedSettings.get(RECOVERY_USE_SYNTHETIC_SOURCE_SETTING);
useDocValuesSkipper = DOC_VALUES_SKIPPER && scopedSettings.get(USE_DOC_VALUES_SKIPPER);
seqNoIndexOptions = scopedSettings.get(SEQ_NO_INDEX_OPTIONS_SETTING);
tsdbSyntheticId = TSDB_SYNTHETIC_ID_FEATURE_FLAG && scopedSettings.get(USE_SYNTHETIC_ID);
assert tsdbSyntheticId == false || mode == IndexMode.TIME_SERIES : mode;
final var useSyntheticId = scopedSettings.get(USE_SYNTHETIC_ID);
if (indexMetadata.useTimeSeriesSyntheticId() != useSyntheticId) {
assert false;
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"The setting [%s] is set to [%s] but index metadata has a different value [%s].",
USE_SYNTHETIC_ID.getKey(),
useSyntheticId,
indexMetadata.useTimeSeriesSyntheticId()
)
);
}
if (useSyntheticId) {
assert TSDB_SYNTHETIC_ID_FEATURE_FLAG;
assert indexMetadata.useTimeSeriesSyntheticId();
assert indexMetadata.getIndexMode() == IndexMode.TIME_SERIES : indexMetadata.getIndexMode();
assert indexMetadata.getCreationVersion().onOrAfter(IndexVersions.TIME_SERIES_USE_SYNTHETIC_ID);
useTimeSeriesSyntheticId = true;
} else {
useTimeSeriesSyntheticId = false;
}
if (recoverySourceSyntheticEnabled) {
if (DiscoveryNode.isStateless(settings)) {
throw new IllegalArgumentException("synthetic recovery source is only allowed in stateful");
Expand Down Expand Up @@ -1907,8 +1939,8 @@ public boolean useDocValuesSkipper() {
/**
* @return Whether the index is a time-series index that use synthetic ids.
*/
public boolean useTsdbSyntheticId() {
return tsdbSyntheticId;
public boolean useTimeSeriesSyntheticId() {
return useTimeSeriesSyntheticId;
}

/**
Expand Down
Loading