Skip to content

Commit 6b7751d

Browse files
Replace encoder with url encoder (elastic#116699)
Document IDs are frequently used in HTTP requests, such as `GET /index/_doc/{id}`, where they must be URL-safe to avoid issues with invalid characters. This change ensures that IDs generated by `TimeBasedKOrderedUUIDGenerator` are properly Base64 URL-encoded, free of characters that could break URLs. We also test that no IDs include invalid characters like +, /, or = to guarantee they are fully compliant with URL-safe requirements. Moreover `TimeBasedKOrderedUUIDGenerator` and `TimeBasedUUIDGenerator` are refactored to allow injection of dependencies which enables us to increase test coverage by including tests for high-throughput scenarios, sequence id overflow and unreliable clocks usage.
1 parent 8cd4a26 commit 6b7751d

File tree

5 files changed

+354
-42
lines changed

5 files changed

+354
-42
lines changed

server/src/main/java/org/elasticsearch/common/TimeBasedKOrderedUUIDGenerator.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
package org.elasticsearch.common;
1111

1212
import java.nio.ByteBuffer;
13-
import java.util.Base64;
13+
import java.util.function.Supplier;
1414

1515
/**
1616
* Generates a base64-encoded, k-ordered UUID string optimized for compression and efficient indexing.
@@ -28,18 +28,25 @@
2828
* The result is a compact base64-encoded string, optimized for efficient compression of the _id field in an inverted index.
2929
*/
3030
public class TimeBasedKOrderedUUIDGenerator extends TimeBasedUUIDGenerator {
31-
private static final Base64.Encoder BASE_64_NO_PADDING = Base64.getEncoder().withoutPadding();
31+
32+
public TimeBasedKOrderedUUIDGenerator(
33+
final Supplier<Long> timestampSupplier,
34+
final Supplier<Integer> sequenceIdSupplier,
35+
final Supplier<byte[]> macAddressSupplier
36+
) {
37+
super(timestampSupplier, sequenceIdSupplier, macAddressSupplier);
38+
}
3239

3340
@Override
3441
public String getBase64UUID() {
35-
final int sequenceId = this.sequenceNumber.incrementAndGet() & 0x00FF_FFFF;
42+
final int sequenceId = sequenceNumber.incrementAndGet() & 0x00FF_FFFF;
3643

3744
// Calculate timestamp to ensure ordering and avoid backward movement in case of time shifts.
3845
// Uses AtomicLong to guarantee that timestamp increases even if the system clock moves backward.
3946
// If the sequenceId overflows (reaches 0 within the same millisecond), the timestamp is incremented
4047
// to ensure strict ordering.
4148
long timestamp = this.lastTimestamp.accumulateAndGet(
42-
currentTimeMillis(),
49+
timestampSupplier.get(),
4350
sequenceId == 0 ? (lastTimestamp, currentTimeMillis) -> Math.max(lastTimestamp, currentTimeMillis) + 1 : Math::max
4451
);
4552

@@ -68,6 +75,6 @@ public String getBase64UUID() {
6875

6976
assert buffer.position() == uuidBytes.length;
7077

71-
return BASE_64_NO_PADDING.encodeToString(uuidBytes);
78+
return Strings.BASE_64_NO_PADDING_URL_ENCODER.encodeToString(uuidBytes);
7279
}
7380
}

server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import java.util.concurrent.atomic.AtomicInteger;
1313
import java.util.concurrent.atomic.AtomicLong;
14+
import java.util.function.Supplier;
1415

1516
/**
1617
* These are essentially flake ids but we use 6 (not 8) bytes for timestamp, and use 3 (not 2) bytes for sequence number. We also reorder
@@ -19,34 +20,41 @@
1920
* For more information about flake ids, check out
2021
* https://archive.fo/2015.07.08-082503/http://www.boundary.com/blog/2012/01/flake-a-decentralized-k-ordered-unique-id-generator-in-erlang/
2122
*/
22-
2323
class TimeBasedUUIDGenerator implements UUIDGenerator {
2424

2525
// We only use bottom 3 bytes for the sequence number. Paranoia: init with random int so that if JVM/OS/machine goes down, clock slips
2626
// backwards, and JVM comes back up, we are less likely to be on the same sequenceNumber at the same time:
27-
protected final AtomicInteger sequenceNumber = new AtomicInteger(SecureRandomHolder.INSTANCE.nextInt());
27+
protected final AtomicInteger sequenceNumber;
28+
protected final AtomicLong lastTimestamp;
2829

29-
// Used to ensure clock moves forward:
30-
protected final AtomicLong lastTimestamp = new AtomicLong(0);
30+
protected final Supplier<Long> timestampSupplier;
3131

3232
private static final byte[] SECURE_MUNGED_ADDRESS = MacAddressProvider.getSecureMungedAddress();
3333

3434
static {
3535
assert SECURE_MUNGED_ADDRESS.length == 6;
3636
}
3737

38-
// protected for testing
39-
protected long currentTimeMillis() {
40-
return System.currentTimeMillis();
38+
static final int SIZE_IN_BYTES = 15;
39+
private final byte[] macAddress;
40+
41+
TimeBasedUUIDGenerator(
42+
final Supplier<Long> timestampSupplier,
43+
final Supplier<Integer> sequenceIdSupplier,
44+
final Supplier<byte[]> macAddressSupplier
45+
) {
46+
this.timestampSupplier = timestampSupplier;
47+
// NOTE: getting the mac address every time using the supplier is expensive, hence we cache it.
48+
this.macAddress = macAddressSupplier.get();
49+
this.sequenceNumber = new AtomicInteger(sequenceIdSupplier.get());
50+
// Used to ensure clock moves forward:
51+
this.lastTimestamp = new AtomicLong(0);
4152
}
4253

43-
// protected for testing
4454
protected byte[] macAddress() {
45-
return SECURE_MUNGED_ADDRESS;
55+
return macAddress;
4656
}
4757

48-
static final int SIZE_IN_BYTES = 15;
49-
5058
@Override
5159
public String getBase64UUID() {
5260
final int sequenceId = sequenceNumber.incrementAndGet() & 0xffffff;
@@ -55,7 +63,7 @@ public String getBase64UUID() {
5563
// still vulnerable if we are shut down, clock goes backwards, and we restart... for this we
5664
// randomize the sequenceNumber on init to decrease chance of collision:
5765
long timestamp = this.lastTimestamp.accumulateAndGet(
58-
currentTimeMillis(),
66+
timestampSupplier.get(),
5967
// Always force the clock to increment whenever sequence number is 0, in case we have a long
6068
// time-slip backwards:
6169
sequenceId == 0 ? (lastTimestamp, currentTimeMillis) -> Math.max(lastTimestamp, currentTimeMillis) + 1 : Math::max

server/src/main/java/org/elasticsearch/common/UUIDs.java

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,29 @@
1212
import org.elasticsearch.common.settings.SecureString;
1313

1414
import java.util.Random;
15+
import java.util.concurrent.atomic.AtomicInteger;
16+
import java.util.function.Supplier;
1517

18+
/**
19+
* Utility class for generating various types of UUIDs.
20+
*/
1621
public class UUIDs {
22+
private static final AtomicInteger sequenceNumber = new AtomicInteger(SecureRandomHolder.INSTANCE.nextInt());
23+
public static final Supplier<Long> DEFAULT_TIMESTAMP_SUPPLIER = System::currentTimeMillis;
24+
public static final Supplier<Integer> DEFAULT_SEQUENCE_ID_SUPPLIER = sequenceNumber::incrementAndGet;
25+
public static final Supplier<byte[]> DEFAULT_MAC_ADDRESS_SUPPLIER = MacAddressProvider::getSecureMungedAddress;
26+
private static final UUIDGenerator RANDOM_UUID_GENERATOR = new RandomBasedUUIDGenerator();
27+
private static final UUIDGenerator TIME_BASED_K_ORDERED_GENERATOR = new TimeBasedKOrderedUUIDGenerator(
28+
DEFAULT_TIMESTAMP_SUPPLIER,
29+
DEFAULT_SEQUENCE_ID_SUPPLIER,
30+
DEFAULT_MAC_ADDRESS_SUPPLIER
31+
);
1732

18-
private static final RandomBasedUUIDGenerator RANDOM_UUID_GENERATOR = new RandomBasedUUIDGenerator();
19-
20-
private static final UUIDGenerator TIME_BASED_K_ORDERED_GENERATOR = new TimeBasedKOrderedUUIDGenerator();
21-
private static final UUIDGenerator TIME_UUID_GENERATOR = new TimeBasedUUIDGenerator();
33+
private static final UUIDGenerator TIME_UUID_GENERATOR = new TimeBasedUUIDGenerator(
34+
DEFAULT_TIMESTAMP_SUPPLIER,
35+
DEFAULT_SEQUENCE_ID_SUPPLIER,
36+
DEFAULT_MAC_ADDRESS_SUPPLIER
37+
);
2238

2339
/**
2440
* The length of a UUID string generated by {@link #base64UUID}.

0 commit comments

Comments
 (0)