Skip to content

Commit 558e36b

Browse files
committed
chore: consistent hash for snapshots
1 parent db58719 commit 558e36b

File tree

5 files changed

+121
-8
lines changed

5 files changed

+121
-8
lines changed

Cargo.lock

Lines changed: 37 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ async-trait = "0.1"
3333
arc-swap = { version = "1.7" }
3434
uuid = { version = "1.17.0", features = ["v4", "v5", "v7", "serde"] }
3535
futures-lite = { version = "2.6.0", features = ["futures-io"] }
36+
blake3 = "1.8"
3637

3738
[patch.crates-io]
3839
# We're using a specific commit here because rust-rocksdb doesn't publish the latest version that includes the memory alignment fix.

collab/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ chrono = "0.4.22"
2626
unicode-segmentation = "1.10.1"
2727
lazy_static = "1.4.0"
2828
fastrand = "2.1.0"
29+
blake3.workspace = true
2930

3031
[target.'cfg(target_arch = "wasm32")'.dependencies]
3132
web-sys = { version = "0.3" }

collab/src/core/collab.rs

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,10 @@ use std::panic;
55
use std::panic::AssertUnwindSafe;
66

77
use arc_swap::ArcSwapOption;
8+
use blake3::Hasher;
9+
use serde_json::json;
810
use std::sync::Arc;
911
use std::vec::IntoIter;
10-
11-
use serde_json::json;
12-
1312
use tokio_stream::wrappers::WatchStream;
1413
use tracing::trace;
1514
use yrs::block::{ClientID, Prelim};
@@ -241,6 +240,55 @@ pub fn make_yrs_doc(object_id: &str, skip_gc: bool, client_id: ClientID) -> Doc
241240

242241
pub type CollabVersion = Uuid;
243242

243+
pub trait ConsistentHash {
244+
fn hash(&self, h: &mut blake3::Hasher);
245+
fn consistent_hash(&self) -> u128 {
246+
use blake3::Hasher;
247+
let mut h = Hasher::new();
248+
249+
self.hash(&mut h);
250+
251+
let mut hash = [0; 16];
252+
h.finalize_xof().fill(&mut hash);
253+
u128::from_be_bytes(hash)
254+
}
255+
}
256+
257+
impl ConsistentHash for yrs::StateVector {
258+
fn hash(&self, h: &mut Hasher) {
259+
let mut clients = self.iter().map(|(k, _)| k).collect::<Vec<_>>();
260+
clients.sort();
261+
for client in clients {
262+
let clock = self.get(client);
263+
h.update(&client.to_be_bytes());
264+
h.update(&clock.to_be_bytes());
265+
}
266+
}
267+
}
268+
269+
impl ConsistentHash for yrs::DeleteSet {
270+
fn hash(&self, h: &mut Hasher) {
271+
let mut clients = self.iter().map(|(c, _)| c).collect::<Vec<_>>();
272+
clients.sort();
273+
for client in clients {
274+
if let Some(range) = self.range(client) {
275+
h.update(&client.to_be_bytes());
276+
for r in range.iter() {
277+
h.update(&r.start.to_be_bytes());
278+
h.update(&r.end.to_be_bytes());
279+
}
280+
}
281+
}
282+
}
283+
}
284+
285+
impl ConsistentHash for yrs::Snapshot {
286+
fn hash(&self, h: &mut Hasher) {
287+
ConsistentHash::hash(&self.state_map, h);
288+
ConsistentHash::hash(&self.delete_set, h);
289+
}
290+
}
291+
244292
pub struct CollabOptions {
245293
pub object_id: Uuid,
246294
pub data_source: Option<DataSource>,

collab/tests/edit_test/state_vec_test.rs

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1+
use collab::core::collab::ConsistentHash;
12
use serde_json::json;
23
use yrs::types::ToJson;
34
use yrs::updates::decoder::Decode;
4-
use yrs::{Doc, Map, MapPrelim, MapRef, ReadTxn, Transact, Update};
5+
use yrs::{Doc, Map, MapPrelim, MapRef, ReadTxn, StateVector, Text, Transact, Update};
56

67
#[tokio::test]
78
async fn state_vec_apply_test() {
@@ -120,6 +121,35 @@ async fn two_way_sync_result_undetermined() {
120121
assert_eq!(a, b);
121122
}
122123

124+
#[test]
125+
fn snapshot_produces_consistent_hash() {
126+
let d1 = Doc::with_client_id(0xdeadbeef);
127+
let txt1 = d1.get_or_insert_text("text");
128+
let mut t1 = d1.transact_mut();
129+
txt1.insert(&mut t1, 0, "Hello world!");
130+
txt1.remove_range(&mut t1, 4, 5);
131+
132+
// we need at least 2 client IDs to produce a non-trivial state vector where order could be
133+
// possibly different between runs
134+
let d2 = Doc::with_client_id(123);
135+
let txt2 = d2.get_or_insert_text("text");
136+
let mut t2 = d2.transact_mut();
137+
t2.apply_update(
138+
Update::decode_v1(&t1.encode_state_as_update_v1(&StateVector::default())).unwrap(),
139+
)
140+
.unwrap();
141+
txt2.insert(&mut t2, 0, "Acronym!");
142+
txt2.remove_range(&mut t2, 1, 2);
143+
144+
let snapshot = t2.snapshot();
145+
146+
// We're going to use consistent hash to uniquely identify the snapshot based on its internal
147+
// state. This way we can use it as ID and quickly compare if two snapshots are identical.
148+
let hash = snapshot.consistent_hash();
149+
let expected: u128 = 42481876838278106308370919647884892234; // produced in previous run
150+
assert_eq!(hash, expected);
151+
}
152+
123153
#[tokio::test]
124154
async fn two_way_sync_test() {
125155
let doc_1 = Doc::new();

0 commit comments

Comments
 (0)