Skip to content

Commit f281ba9

Browse files
[8.x] Replace encoder with url encoder (#116699) (#119079)
Document IDs are frequently used in HTTP requests, such as `GET /index/_doc/{id}`, where they must be URL-safe to avoid issues with invalid characters. This change ensures that IDs generated by `TimeBasedKOrderedUUIDGenerator` are properly Base64 URL-encoded, free of characters that could break URLs. We also test that no IDs include invalid characters like +, /, or = to guarantee they are fully compliant with URL-safe requirements. Moreover `TimeBasedKOrderedUUIDGenerator` and `TimeBasedUUIDGenerator` are refactored to allow injection of dependencies which enables us to increase test coverage by including tests for high-throughput scenarios, sequence id overflow and unreliable clocks usage.
1 parent 1bbfec0 commit f281ba9

File tree

5 files changed

+356
-43
lines changed

5 files changed

+356
-43
lines changed

server/src/main/java/org/elasticsearch/common/TimeBasedKOrderedUUIDGenerator.java

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import java.nio.ByteBuffer;
1313
import java.util.Base64;
14+
import java.util.function.Supplier;
1415

1516
/**
1617
* Generates a base64-encoded, k-ordered UUID string optimized for compression and efficient indexing.
@@ -28,18 +29,27 @@
2829
* The result is a compact base64-encoded string, optimized for efficient compression of the _id field in an inverted index.
2930
*/
3031
public class TimeBasedKOrderedUUIDGenerator extends TimeBasedUUIDGenerator {
31-
private static final Base64.Encoder BASE_64_NO_PADDING = Base64.getEncoder().withoutPadding();
32+
33+
private static final Base64.Encoder BASE_64_NO_PADDING_URL_ENCODER = Base64.getUrlEncoder().withoutPadding();
34+
35+
public TimeBasedKOrderedUUIDGenerator(
36+
final Supplier<Long> timestampSupplier,
37+
final Supplier<Integer> sequenceIdSupplier,
38+
final Supplier<byte[]> macAddressSupplier
39+
) {
40+
super(timestampSupplier, sequenceIdSupplier, macAddressSupplier);
41+
}
3242

3343
@Override
3444
public String getBase64UUID() {
35-
final int sequenceId = this.sequenceNumber.incrementAndGet() & 0x00FF_FFFF;
45+
final int sequenceId = sequenceNumber.incrementAndGet() & 0x00FF_FFFF;
3646

3747
// Calculate timestamp to ensure ordering and avoid backward movement in case of time shifts.
3848
// Uses AtomicLong to guarantee that timestamp increases even if the system clock moves backward.
3949
// If the sequenceId overflows (reaches 0 within the same millisecond), the timestamp is incremented
4050
// to ensure strict ordering.
4151
long timestamp = this.lastTimestamp.accumulateAndGet(
42-
currentTimeMillis(),
52+
timestampSupplier.get(),
4353
sequenceId == 0 ? (lastTimestamp, currentTimeMillis) -> Math.max(lastTimestamp, currentTimeMillis) + 1 : Math::max
4454
);
4555

@@ -68,6 +78,6 @@ public String getBase64UUID() {
6878

6979
assert buffer.position() == uuidBytes.length;
7080

71-
return BASE_64_NO_PADDING.encodeToString(uuidBytes);
81+
return BASE_64_NO_PADDING_URL_ENCODER.encodeToString(uuidBytes);
7282
}
7383
}

server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import java.util.Base64;
1313
import java.util.concurrent.atomic.AtomicInteger;
1414
import java.util.concurrent.atomic.AtomicLong;
15+
import java.util.function.Supplier;
1516

1617
/**
1718
* These are essentially flake ids but we use 6 (not 8) bytes for timestamp, and use 3 (not 2) bytes for sequence number. We also reorder
@@ -20,36 +21,41 @@
2021
* For more information about flake ids, check out
2122
* https://archive.fo/2015.07.08-082503/http://www.boundary.com/blog/2012/01/flake-a-decentralized-k-ordered-unique-id-generator-in-erlang/
2223
*/
23-
2424
class TimeBasedUUIDGenerator implements UUIDGenerator {
2525

2626
// We only use bottom 3 bytes for the sequence number. Paranoia: init with random int so that if JVM/OS/machine goes down, clock slips
2727
// backwards, and JVM comes back up, we are less likely to be on the same sequenceNumber at the same time:
28-
protected final AtomicInteger sequenceNumber = new AtomicInteger(SecureRandomHolder.INSTANCE.nextInt());
28+
protected final AtomicInteger sequenceNumber;
29+
protected final AtomicLong lastTimestamp;
2930

30-
// Used to ensure clock moves forward:
31-
protected final AtomicLong lastTimestamp = new AtomicLong(0);
31+
protected final Supplier<Long> timestampSupplier;
3232

3333
private static final byte[] SECURE_MUNGED_ADDRESS = MacAddressProvider.getSecureMungedAddress();
3434

3535
static {
3636
assert SECURE_MUNGED_ADDRESS.length == 6;
3737
}
38-
3938
private static final Base64.Encoder BASE_64_NO_PADDING = Base64.getUrlEncoder().withoutPadding();
40-
41-
// protected for testing
42-
protected long currentTimeMillis() {
43-
return System.currentTimeMillis();
39+
static final int SIZE_IN_BYTES = 15;
40+
private final byte[] macAddress;
41+
42+
TimeBasedUUIDGenerator(
43+
final Supplier<Long> timestampSupplier,
44+
final Supplier<Integer> sequenceIdSupplier,
45+
final Supplier<byte[]> macAddressSupplier
46+
) {
47+
this.timestampSupplier = timestampSupplier;
48+
// NOTE: getting the mac address every time using the supplier is expensive, hence we cache it.
49+
this.macAddress = macAddressSupplier.get();
50+
this.sequenceNumber = new AtomicInteger(sequenceIdSupplier.get());
51+
// Used to ensure clock moves forward:
52+
this.lastTimestamp = new AtomicLong(0);
4453
}
4554

46-
// protected for testing
4755
protected byte[] macAddress() {
48-
return SECURE_MUNGED_ADDRESS;
56+
return macAddress;
4957
}
5058

51-
static final int SIZE_IN_BYTES = 15;
52-
5359
@Override
5460
public String getBase64UUID() {
5561
final int sequenceId = sequenceNumber.incrementAndGet() & 0xffffff;
@@ -58,7 +64,7 @@ public String getBase64UUID() {
5864
// still vulnerable if we are shut down, clock goes backwards, and we restart... for this we
5965
// randomize the sequenceNumber on init to decrease chance of collision:
6066
long timestamp = this.lastTimestamp.accumulateAndGet(
61-
currentTimeMillis(),
67+
timestampSupplier.get(),
6268
// Always force the clock to increment whenever sequence number is 0, in case we have a long
6369
// time-slip backwards:
6470
sequenceId == 0 ? (lastTimestamp, currentTimeMillis) -> Math.max(lastTimestamp, currentTimeMillis) + 1 : Math::max

server/src/main/java/org/elasticsearch/common/UUIDs.java

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,29 @@
1212
import org.elasticsearch.common.settings.SecureString;
1313

1414
import java.util.Random;
15+
import java.util.concurrent.atomic.AtomicInteger;
16+
import java.util.function.Supplier;
1517

18+
/**
19+
* Utility class for generating various types of UUIDs.
20+
*/
1621
public class UUIDs {
22+
private static final AtomicInteger sequenceNumber = new AtomicInteger(SecureRandomHolder.INSTANCE.nextInt());
23+
public static final Supplier<Long> DEFAULT_TIMESTAMP_SUPPLIER = System::currentTimeMillis;
24+
public static final Supplier<Integer> DEFAULT_SEQUENCE_ID_SUPPLIER = sequenceNumber::incrementAndGet;
25+
public static final Supplier<byte[]> DEFAULT_MAC_ADDRESS_SUPPLIER = MacAddressProvider::getSecureMungedAddress;
26+
private static final UUIDGenerator RANDOM_UUID_GENERATOR = new RandomBasedUUIDGenerator();
27+
private static final UUIDGenerator TIME_BASED_K_ORDERED_GENERATOR = new TimeBasedKOrderedUUIDGenerator(
28+
DEFAULT_TIMESTAMP_SUPPLIER,
29+
DEFAULT_SEQUENCE_ID_SUPPLIER,
30+
DEFAULT_MAC_ADDRESS_SUPPLIER
31+
);
1732

18-
private static final RandomBasedUUIDGenerator RANDOM_UUID_GENERATOR = new RandomBasedUUIDGenerator();
19-
20-
private static final UUIDGenerator TIME_BASED_K_ORDERED_GENERATOR = new TimeBasedKOrderedUUIDGenerator();
21-
private static final UUIDGenerator TIME_UUID_GENERATOR = new TimeBasedUUIDGenerator();
33+
private static final UUIDGenerator TIME_UUID_GENERATOR = new TimeBasedUUIDGenerator(
34+
DEFAULT_TIMESTAMP_SUPPLIER,
35+
DEFAULT_SEQUENCE_ID_SUPPLIER,
36+
DEFAULT_MAC_ADDRESS_SUPPLIER
37+
);
2238

2339
/**
2440
* The length of a UUID string generated by {@link #base64UUID}.

0 commit comments

Comments
 (0)