Skip to content

Commit 778ab8f

Browse files
Re-structure document ID generation favoring _id inverted index compression (#104683)
This implementation restructures auto-generated document IDs to maximize compression within Lucene's terms dictionary. The key insight is placing stable or slowly-changing components at the start of the ID - the most significant bytes of the timestamp change very gradually (the first byte shifts only every 35 years, the second every 50 days). This careful ordering means that large sequences of IDs generated close in time will share common prefixes, allowing Lucene's Finite State Transducer (FST) to store terms more compactly. To maintain uniqueness while preserving these compression benefits, the ID combines three elements: a timestamp that ensures time-based ordering, the coordinator's MAC address for cluster-wide uniqueness, and a sequence number for handling high-throughput scenarios. The timestamp handling is particularly robust, using atomic operations to prevent backwards movement even if the system clock shifts. For high-volume indices generating millions of documents, this optimization can lead to substantial storage savings while maintaining strict guarantees about ID uniqueness and ordering.
1 parent 7039a1d commit 778ab8f

File tree

10 files changed

+225
-55
lines changed

10 files changed

+225
-55
lines changed

docs/changelog/104683.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 104683
2+
summary: "Feature: re-structure document ID generation favoring _id inverted index compression"
3+
area: Logs
4+
type: enhancement
5+
issues: []

server/src/internalClusterTest/java/org/elasticsearch/action/bulk/BulkIntegrationIT.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,11 @@ public void testBulkWithWriteIndexAndRouting() {
9999
// allowing the auto-generated timestamp to externally be set would allow making the index inconsistent with duplicate docs
100100
public void testExternallySetAutoGeneratedTimestamp() {
101101
IndexRequest indexRequest = new IndexRequest("index1").source(Collections.singletonMap("foo", "baz"));
102-
indexRequest.autoGenerateId();
102+
if (randomBoolean()) {
103+
indexRequest.autoGenerateId();
104+
} else {
105+
indexRequest.autoGenerateTimeBasedId();
106+
}
103107
if (randomBoolean()) {
104108
indexRequest.id("test");
105109
}

server/src/main/java/org/elasticsearch/action/index/IndexRequest.java

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
import java.util.Locale;
5252
import java.util.Map;
5353
import java.util.Objects;
54+
import java.util.function.Supplier;
5455

5556
import static org.elasticsearch.action.ValidateActions.addValidationError;
5657
import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM;
@@ -76,6 +77,9 @@ public class IndexRequest extends ReplicatedWriteRequest<IndexRequest> implement
7677
private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(IndexRequest.class);
7778
private static final TransportVersion PIPELINES_HAVE_RUN_FIELD_ADDED = TransportVersions.V_8_10_X;
7879

80+
private static final Supplier<String> ID_GENERATOR = UUIDs::base64UUID;
81+
private static final Supplier<String> K_SORTED_TIME_BASED_ID_GENERATOR = UUIDs::base64TimeBasedKOrderedUUID;
82+
7983
/**
8084
* Max length of the source document to include into string()
8185
*
@@ -692,10 +696,18 @@ public void process(IndexRouting indexRouting) {
692696
* request compatible with the append-only optimization.
693697
*/
694698
public void autoGenerateId() {
695-
assert id == null;
696-
assert autoGeneratedTimestamp == UNSET_AUTO_GENERATED_TIMESTAMP : "timestamp has already been generated!";
697-
assert ifSeqNo == UNASSIGNED_SEQ_NO;
698-
assert ifPrimaryTerm == UNASSIGNED_PRIMARY_TERM;
699+
assertBeforeGeneratingId();
700+
autoGenerateTimestamp();
701+
id(ID_GENERATOR.get());
702+
}
703+
704+
public void autoGenerateTimeBasedId() {
705+
assertBeforeGeneratingId();
706+
autoGenerateTimestamp();
707+
id(K_SORTED_TIME_BASED_ID_GENERATOR.get());
708+
}
709+
710+
private void autoGenerateTimestamp() {
699711
/*
700712
* Set the auto generated timestamp so the append only optimization
701713
* can quickly test if this request *must* be unique without reaching
@@ -704,8 +716,13 @@ public void autoGenerateId() {
704716
* never work before 1970, but that's ok. It's after 1970.
705717
*/
706718
autoGeneratedTimestamp = Math.max(0, System.currentTimeMillis());
707-
String uid = UUIDs.base64UUID();
708-
id(uid);
719+
}
720+
721+
private void assertBeforeGeneratingId() {
722+
assert id == null;
723+
assert autoGeneratedTimestamp == UNSET_AUTO_GENERATED_TIMESTAMP : "timestamp has already been generated!";
724+
assert ifSeqNo == UNASSIGNED_SEQ_NO;
725+
assert ifPrimaryTerm == UNASSIGNED_PRIMARY_TERM;
709726
}
710727

711728
/**

server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
import org.elasticsearch.common.xcontent.XContentHelper;
2525
import org.elasticsearch.core.Nullable;
2626
import org.elasticsearch.features.NodeFeature;
27+
import org.elasticsearch.index.IndexMode;
28+
import org.elasticsearch.index.IndexVersion;
2729
import org.elasticsearch.index.IndexVersions;
2830
import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper;
2931
import org.elasticsearch.transport.Transports;
@@ -147,11 +149,15 @@ public void checkIndexSplitAllowed() {}
147149

148150
private abstract static class IdAndRoutingOnly extends IndexRouting {
149151
private final boolean routingRequired;
152+
private final IndexVersion creationVersion;
153+
private final IndexMode indexMode;
150154

151155
IdAndRoutingOnly(IndexMetadata metadata) {
152156
super(metadata);
157+
this.creationVersion = metadata.getCreationVersion();
153158
MappingMetadata mapping = metadata.mapping();
154159
this.routingRequired = mapping == null ? false : mapping.routingRequired();
160+
this.indexMode = metadata.getIndexMode();
155161
}
156162

157163
protected abstract int shardId(String id, @Nullable String routing);
@@ -161,7 +167,11 @@ public void process(IndexRequest indexRequest) {
161167
// generate id if not already provided
162168
final String id = indexRequest.id();
163169
if (id == null) {
164-
indexRequest.autoGenerateId();
170+
if (creationVersion.onOrAfter(IndexVersions.TIME_BASED_K_ORDERED_DOC_ID) && indexMode == IndexMode.LOGSDB) {
171+
indexRequest.autoGenerateTimeBasedId();
172+
} else {
173+
indexRequest.autoGenerateId();
174+
}
165175
} else if (id.isEmpty()) {
166176
throw new IllegalArgumentException("if _id is specified it must not be empty");
167177
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.common;
11+
12+
import java.nio.ByteBuffer;
13+
import java.util.Base64;
14+
15+
/**
16+
* Generates a base64-encoded, k-ordered UUID string optimized for compression and efficient indexing.
17+
* <p>
18+
* This method produces a time-based UUID where slowly changing components like the timestamp appear first,
19+
* improving prefix-sharing and compression during indexing. It ensures uniqueness across nodes by incorporating
20+
* a timestamp, a MAC address, and a sequence ID.
21+
* <p>
22+
* <b>Timestamp:</b> Represents the current time in milliseconds, ensuring ordering and uniqueness.
23+
* <br>
24+
* <b>MAC Address:</b> Ensures uniqueness across different coordinators.
25+
* <br>
26+
* <b>Sequence ID:</b> Differentiates UUIDs generated within the same millisecond, ensuring uniqueness even at high throughput.
27+
* <p>
28+
* The result is a compact base64-encoded string, optimized for efficient compression of the _id field in an inverted index.
29+
*/
30+
public class TimeBasedKOrderedUUIDGenerator extends TimeBasedUUIDGenerator {
31+
private static final Base64.Encoder BASE_64_NO_PADDING = Base64.getEncoder().withoutPadding();
32+
33+
@Override
34+
public String getBase64UUID() {
35+
final int sequenceId = this.sequenceNumber.incrementAndGet() & 0x00FF_FFFF;
36+
37+
// Calculate timestamp to ensure ordering and avoid backward movement in case of time shifts.
38+
// Uses AtomicLong to guarantee that timestamp increases even if the system clock moves backward.
39+
// If the sequenceId overflows (reaches 0 within the same millisecond), the timestamp is incremented
40+
// to ensure strict ordering.
41+
long timestamp = this.lastTimestamp.accumulateAndGet(
42+
currentTimeMillis(),
43+
sequenceId == 0 ? (lastTimestamp, currentTimeMillis) -> Math.max(lastTimestamp, currentTimeMillis) + 1 : Math::max
44+
);
45+
46+
final byte[] uuidBytes = new byte[15];
47+
final ByteBuffer buffer = ByteBuffer.wrap(uuidBytes);
48+
49+
buffer.put((byte) (timestamp >>> 40)); // changes every 35 years
50+
buffer.put((byte) (timestamp >>> 32)); // changes every ~50 days
51+
buffer.put((byte) (timestamp >>> 24)); // changes every ~4.5h
52+
buffer.put((byte) (timestamp >>> 16)); // changes every ~65 secs
53+
54+
// MAC address of the coordinator might change if there are many coordinators in the cluster
55+
// and the indexing api does not necessarily target the same coordinator.
56+
byte[] macAddress = macAddress();
57+
assert macAddress.length == 6;
58+
buffer.put(macAddress, 0, macAddress.length);
59+
60+
buffer.put((byte) (sequenceId >>> 16));
61+
62+
// From hereinafter everything is almost like random and does not compress well
63+
// due to unlikely prefix-sharing
64+
buffer.put((byte) (timestamp >>> 8));
65+
buffer.put((byte) (sequenceId >>> 8));
66+
buffer.put((byte) timestamp);
67+
buffer.put((byte) sequenceId);
68+
69+
assert buffer.position() == uuidBytes.length;
70+
71+
return BASE_64_NO_PADDING.encodeToString(uuidBytes);
72+
}
73+
}

server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ class TimeBasedUUIDGenerator implements UUIDGenerator {
2424

2525
// We only use bottom 3 bytes for the sequence number. Paranoia: init with random int so that if JVM/OS/machine goes down, clock slips
2626
// backwards, and JVM comes back up, we are less likely to be on the same sequenceNumber at the same time:
27-
private final AtomicInteger sequenceNumber = new AtomicInteger(SecureRandomHolder.INSTANCE.nextInt());
27+
protected final AtomicInteger sequenceNumber = new AtomicInteger(SecureRandomHolder.INSTANCE.nextInt());
2828

2929
// Used to ensure clock moves forward:
30-
private final AtomicLong lastTimestamp = new AtomicLong(0);
30+
protected final AtomicLong lastTimestamp = new AtomicLong(0);
3131

3232
private static final byte[] SECURE_MUNGED_ADDRESS = MacAddressProvider.getSecureMungedAddress();
3333

server/src/main/java/org/elasticsearch/common/UUIDs.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
public class UUIDs {
1717

1818
private static final RandomBasedUUIDGenerator RANDOM_UUID_GENERATOR = new RandomBasedUUIDGenerator();
19+
20+
private static final UUIDGenerator TIME_BASED_K_ORDERED_GENERATOR = new TimeBasedKOrderedUUIDGenerator();
1921
private static final UUIDGenerator TIME_UUID_GENERATOR = new TimeBasedUUIDGenerator();
2022

2123
/**
@@ -33,6 +35,14 @@ public static String base64UUID() {
3335
return TIME_UUID_GENERATOR.getBase64UUID();
3436
}
3537

38+
public static String base64TimeBasedKOrderedUUID() {
39+
return TIME_BASED_K_ORDERED_GENERATOR.getBase64UUID();
40+
}
41+
42+
public static String base64TimeBasedUUID() {
43+
return TIME_UUID_GENERATOR.getBase64UUID();
44+
}
45+
3646
/**
3747
* The length of a UUID string generated by {@link #randomBase64UUID} and {@link #randomBase64UUIDSecureString}.
3848
*/

server/src/main/java/org/elasticsearch/index/IndexVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ private static Version parseUnchecked(String version) {
132132
public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT_BACKPORT = def(8_519_00_0, Version.LUCENE_9_12_0);
133133
public static final IndexVersion UPGRADE_TO_LUCENE_10_0_0 = def(9_000_00_0, Version.LUCENE_10_0_0);
134134
public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT = def(9_001_00_0, Version.LUCENE_10_0_0);
135+
public static final IndexVersion TIME_BASED_K_ORDERED_DOC_ID = def(9_002_00_0, Version.LUCENE_10_0_0);
135136
/*
136137
* STOP! READ THIS FIRST! No, really,
137138
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _

server/src/test/java/org/elasticsearch/action/index/IndexRequestTests.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,12 @@ public void testAutoGenerateId() {
128128
assertTrue("expected > 0 but got: " + request.getAutoGeneratedTimestamp(), request.getAutoGeneratedTimestamp() > 0);
129129
}
130130

131+
public void testAutoGenerateTimeBasedId() {
132+
IndexRequest request = new IndexRequest("index");
133+
request.autoGenerateTimeBasedId();
134+
assertTrue("expected > 0 but got: " + request.getAutoGeneratedTimestamp(), request.getAutoGeneratedTimestamp() > 0);
135+
}
136+
131137
public void testIndexResponse() {
132138
ShardId shardId = new ShardId(randomAlphaOfLengthBetween(3, 10), randomAlphaOfLengthBetween(3, 10), randomIntBetween(0, 1000));
133139
String id = randomAlphaOfLengthBetween(3, 10);

0 commit comments

Comments
 (0)