77//! # ID Structure
88//!
99//! ```text
10- //! | 42 bits: timestamp (ms since epoch) | 22 bits: random |
10+ //! | 42 bits: timestamp (ms since epoch) | 22 bits: sequence |
1111//! ```
1212//!
1313//! - Timestamp: milliseconds since 2024-01-01 00:00:00 UTC (~139 years range)
14- //! - Random: ~4 million unique IDs per millisecond (collision-resistant )
14+ //! - Sequence: counter that increments within each millisecond (4.2M IDs/ms guaranteed unique )
1515//!
1616//! # Persistence
1717//!
2525//! - **Timestamp portion (42 bits)**: Predictable to within milliseconds. An attacker who knows
2626//! when a node will start can estimate the timestamp portion.
2727//!
28- //! - **Random portion (22 bits)**: Uses `rand::rng()` (OS-provided CSPRNG on most platforms) for
29- //! 4.2 million possible values per millisecond. This makes exact ID prediction impractical
30- //! without the following considerations:
28+ //! - **Sequence portion (22 bits)**: Deterministic counter within each millisecond. IDs are
29+ //! predictable if the generation time and sequence position are known.
3130//!
3231//! - **Threat: Malicious ID Manipulation**: An attacker could generate an ID with an artificially
3332//! low timestamp to win leader election. Mitigations:
3635//! - Network-level access controls limit cluster participation
3736//! - Production deployments should use authenticated discovery
3837//!
39- //! - **Threat: ID Collision**: With 22 bits of randomness, birthday paradox gives ~1.2% collision
40- //! probability at 10,000 IDs per millisecond. In practice, cluster formation involves at most
41- //! tens of nodes, making collisions extremely unlikely.
38+ //! - **Uniqueness Guarantee**: Unlike random-based approaches, the sequence counter guarantees no
39+ //! collisions within the same process. Cross-process collisions are still possible if multiple
40+ //! processes generate IDs at the exact same millisecond, but this is extremely rare in practice
41+ //! since node ID generation happens once at startup.
4242//!
4343//! For environments requiring stronger guarantees, consider using hardware security
4444//! modules (HSMs) or centralized ID assignment from a trusted coordinator.
@@ -48,17 +48,29 @@ use std::{
4848 time:: { SystemTime , UNIX_EPOCH } ,
4949} ;
5050
51- use rand :: Rng ;
51+ use parking_lot :: Mutex ;
5252use snafu:: { ResultExt , Snafu } ;
5353
5454/// Custom epoch: 2024-01-01 00:00:00 UTC (milliseconds since Unix epoch).
5555const EPOCH_MS : u64 = 1704067200000 ;
5656
57- /// Number of bits used for the random portion.
58- const RANDOM_BITS : u32 = 22 ;
57+ /// Number of bits used for the sequence portion.
58+ const SEQUENCE_BITS : u32 = 22 ;
5959
60- /// Mask for extracting the random portion (22 bits).
61- const RANDOM_MASK : u64 = ( 1 << RANDOM_BITS ) - 1 ;
60+ /// Mask for extracting the sequence portion (22 bits).
61+ const SEQUENCE_MASK : u64 = ( 1 << SEQUENCE_BITS ) - 1 ;
62+
63+ /// State for sequence-based ID generation.
64+ struct SnowflakeState {
65+ /// Last timestamp used for ID generation.
66+ last_timestamp : u64 ,
67+ /// Sequence counter within the current millisecond.
68+ sequence : u64 ,
69+ }
70+
71+ /// Global state for thread-safe ID generation.
72+ static SNOWFLAKE_STATE : Mutex < SnowflakeState > =
73+ Mutex :: new ( SnowflakeState { last_timestamp : 0 , sequence : 0 } ) ;
6274
6375/// Error type for node ID operations.
6476#[ derive( Debug , Snafu ) ]
@@ -106,8 +118,9 @@ pub enum NodeIdError {
106118
107119/// Generate a new Snowflake ID.
108120///
109- /// The ID combines a timestamp (milliseconds since 2024-01-01) with random bits
110- /// to produce a globally unique, time-ordered identifier.
121+ /// The ID combines a timestamp (milliseconds since 2024-01-01) with a sequence counter
122+ /// to produce a globally unique, time-ordered identifier. The sequence counter guarantees
123+ /// uniqueness for up to 4.2 million IDs per millisecond.
111124///
112125/// # Errors
113126///
@@ -123,7 +136,7 @@ pub enum NodeIdError {
123136/// std::thread::sleep(std::time::Duration::from_millis(1));
124137/// let id2 = generate_snowflake_id().unwrap();
125138///
126- /// // IDs generated later have higher values (with very high probability )
139+ /// // IDs generated later have higher values (guaranteed )
127140/// assert!(id2 > id1);
128141/// ```
129142pub fn generate_snowflake_id ( ) -> Result < u64 , NodeIdError > {
@@ -133,9 +146,37 @@ pub fn generate_snowflake_id() -> Result<u64, NodeIdError> {
133146 . as_millis ( ) as u64 ;
134147
135148 let timestamp = now_ms. saturating_sub ( EPOCH_MS ) ;
136- let random: u64 = rand:: rng ( ) . random :: < u64 > ( ) & RANDOM_MASK ;
137149
138- Ok ( ( timestamp << RANDOM_BITS ) | random)
150+ let mut state = SNOWFLAKE_STATE . lock ( ) ;
151+
152+ let sequence = if timestamp > state. last_timestamp {
153+ // New millisecond - reset sequence
154+ state. last_timestamp = timestamp;
155+ state. sequence = 0 ;
156+ 0
157+ } else if timestamp == state. last_timestamp {
158+ // Same millisecond - increment sequence
159+ state. sequence += 1 ;
160+ if state. sequence > SEQUENCE_MASK {
161+ // Sequence overflow - wait for next millisecond
162+ // This is extremely rare (>4.2M IDs in 1ms) but we handle it safely
163+ drop ( state) ; // Release lock while waiting
164+ std:: thread:: sleep ( std:: time:: Duration :: from_millis ( 1 ) ) ;
165+ return generate_snowflake_id ( ) ; // Recurse with new timestamp
166+ }
167+ state. sequence
168+ } else {
169+ // Clock went backwards - use last timestamp to maintain monotonicity
170+ state. sequence += 1 ;
171+ if state. sequence > SEQUENCE_MASK {
172+ drop ( state) ;
173+ std:: thread:: sleep ( std:: time:: Duration :: from_millis ( 1 ) ) ;
174+ return generate_snowflake_id ( ) ;
175+ }
176+ state. sequence
177+ } ;
178+
179+ Ok ( ( state. last_timestamp << SEQUENCE_BITS ) | sequence)
139180}
140181
141182/// Extract the timestamp portion from a Snowflake ID.
@@ -144,14 +185,14 @@ pub fn generate_snowflake_id() -> Result<u64, NodeIdError> {
144185#[ must_use]
145186#[ cfg( test) ]
146187pub fn extract_timestamp ( id : u64 ) -> u64 {
147- id >> RANDOM_BITS
188+ id >> SEQUENCE_BITS
148189}
149190
150- /// Extract the random portion from a Snowflake ID.
191+ /// Extract the sequence portion from a Snowflake ID.
151192#[ must_use]
152193#[ cfg( test) ]
153- pub fn extract_random ( id : u64 ) -> u64 {
154- id & RANDOM_MASK
194+ pub fn extract_sequence ( id : u64 ) -> u64 {
195+ id & SEQUENCE_MASK
155196}
156197
157198/// Load existing node ID or generate and persist a new one.
@@ -263,28 +304,27 @@ mod tests {
263304 let id = generate_snowflake_id ( ) . unwrap ( ) ;
264305
265306 let timestamp = extract_timestamp ( id) ;
266- let random = extract_random ( id) ;
307+ let sequence = extract_sequence ( id) ;
267308
268309 // Verify reconstruction
269- let reconstructed = ( timestamp << RANDOM_BITS ) | random ;
310+ let reconstructed = ( timestamp << SEQUENCE_BITS ) | sequence ;
270311 assert_eq ! ( id, reconstructed, "ID should reconstruct from parts" ) ;
271312
272313 // Timestamp should be reasonable (after epoch, not too far in future)
273314 assert ! ( timestamp > 0 , "timestamp should be positive" ) ;
274315 assert ! ( timestamp < ( 1u64 << TIMESTAMP_BITS ) , "timestamp should fit in 42 bits" ) ;
275316
276- // Random should fit in 22 bits
277- assert ! ( random <= RANDOM_MASK , "random portion should fit in 22 bits" ) ;
317+ // Sequence should fit in 22 bits
318+ assert ! ( sequence <= SEQUENCE_MASK , "sequence portion should fit in 22 bits" ) ;
278319 }
279320
280321 #[ test]
281322 fn test_snowflake_ids_are_unique ( ) {
282323 // Generate IDs quickly and verify uniqueness.
283- // With 22 bits of randomness (~4.2M possibilities), collision probability
284- // for n IDs is ~n²/(2*4.2M). For 100 IDs: ~0.0001%, very safe.
285- // We reduced from 1000 to 100 to avoid rare CI flakiness.
324+ // With a sequence counter, uniqueness is guaranteed within the same process.
325+ // Test with 1000 IDs to verify the sequence counter works correctly.
286326 let mut ids = HashSet :: new ( ) ;
287- for _ in 0 ..100 {
327+ for _ in 0 ..1000 {
288328 let id = generate_snowflake_id ( ) . unwrap ( ) ;
289329 assert ! ( ids. insert( id) , "Snowflake IDs should be unique, got duplicate: {id}" ) ;
290330 }
@@ -377,10 +417,32 @@ mod tests {
377417 #[ test]
378418 fn test_bit_allocation ( ) {
379419 // Verify bit allocation: 42 + 22 = 64
380- assert_eq ! ( TIMESTAMP_BITS + RANDOM_BITS , 64 ) ;
420+ assert_eq ! ( TIMESTAMP_BITS + SEQUENCE_BITS , 64 ) ;
381421
382422 // Verify mask covers exactly 22 bits
383- assert_eq ! ( RANDOM_MASK , 0x3FFFFF ) ;
384- assert_eq ! ( RANDOM_MASK . count_ones( ) , 22 ) ;
423+ assert_eq ! ( SEQUENCE_MASK , 0x3FFFFF ) ;
424+ assert_eq ! ( SEQUENCE_MASK . count_ones( ) , 22 ) ;
425+ }
426+
427+ #[ test]
428+ fn test_sequence_increments_within_same_millisecond ( ) {
429+ // Generate multiple IDs rapidly - sequence should increment
430+ let id1 = generate_snowflake_id ( ) . unwrap ( ) ;
431+ let id2 = generate_snowflake_id ( ) . unwrap ( ) ;
432+ let id3 = generate_snowflake_id ( ) . unwrap ( ) ;
433+
434+ // All IDs should be unique and increasing
435+ assert ! ( id2 > id1, "IDs should be monotonically increasing" ) ;
436+ assert ! ( id3 > id2, "IDs should be monotonically increasing" ) ;
437+
438+ // If they're in the same millisecond, sequence should increment
439+ let ts1 = extract_timestamp ( id1) ;
440+ let ts2 = extract_timestamp ( id2) ;
441+
442+ if ts1 == ts2 {
443+ let seq1 = extract_sequence ( id1) ;
444+ let seq2 = extract_sequence ( id2) ;
445+ assert_eq ! ( seq2, seq1 + 1 , "sequence should increment within same millisecond" ) ;
446+ }
385447 }
386448}
0 commit comments