diff --git a/.changeset/big-suns-share.md b/.changeset/big-suns-share.md new file mode 100644 index 000000000..de4c4b634 --- /dev/null +++ b/.changeset/big-suns-share.md @@ -0,0 +1,6 @@ +--- +"loro-crdt": patch +"loro-crdt-map": patch +--- + +perf(loro-internal): remove quadratic slow paths in text import/checkout #895 diff --git a/.gitignore b/.gitignore index 57c84edc1..bae70c7ff 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ node_modules/ .idea/ coverage/ trace-*.json +.trace_analysis/ .loro loom_test.json .env diff --git a/crates/fuzz/fuzz/Cargo.lock b/crates/fuzz/fuzz/Cargo.lock index e49ce210b..931b61301 100644 --- a/crates/fuzz/fuzz/Cargo.lock +++ b/crates/fuzz/fuzz/Cargo.lock @@ -68,9 +68,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "bitflags" -version = "1.3.2" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" [[package]] name = "bitmaps" @@ -87,6 +87,15 @@ version = "3.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d084b0137aaa901caf9f1e8b21daa6aa24d41cd806e111335541eff9683bd6" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bumpalo" version = "3.19.0" @@ -137,6 +146,15 @@ dependencies = [ "thiserror 2.0.16", ] +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + [[package]] name = "critical-section" version = "1.1.2" @@ -168,6 +186,16 @@ version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "darling" version = "0.20.10" @@ -220,6 +248,16 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "either" version = "1.10.0" @@ -301,16 +339,15 @@ dependencies = [ "ensure-cov", "enum-as-inner 0.6.0", "enum_dispatch", - "fxhash", "itertools 0.12.1", "loro 0.16.12", - "loro 0.16.2 (git+https://github.com/loro-dev/loro.git?tag=loro-crdt%400.16.7)", - "loro 0.16.2 (git+https://github.com/loro-dev/loro.git?rev=90470658435ec4c62b5af59ebb82fe9e1f5aa761)", - "loro 1.5.11", + "loro 1.10.3", + "loro 1.8.1", "num_cpus", "pretty_assertions", "rand", "rayon", + "rustc-hash", "serde_json", "tabled", "tracing", @@ -322,7 +359,7 @@ version = "0.0.0" dependencies = [ "fuzz", "libfuzzer-sys", - "loro 1.5.11", + "loro 1.10.3", ] [[package]] @@ -348,18 +385,28 @@ dependencies = [ "windows", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "generic-btree" -version = "0.10.5" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "210507e6dec78bb1304e52a174bd99efdd83894219bf20d656a066a0ce2fedc5" +checksum = "a0c1bce85c110ab718fd139e0cc89c51b63bd647b14a767e24bdfc77c83df79b" dependencies = [ "arref", - "fxhash", - "heapless 0.7.17", + "heapless 0.9.2", "itertools 0.11.0", "loro-thunderdome", "proc-macro2", + "rustc-hash", ] [[package]] @@ -423,6 +470,16 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "heapless" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af2455f757db2b292a9b1768c4b70186d443bcb3b316252d6b540aec1cd89ed" +dependencies = [ + "hash32 0.3.1", + "stable_deref_trait", +] + [[package]] name = "heck" version = "0.4.1" @@ -552,11 +609,10 @@ dependencies = [ [[package]] name = "lock_api" -version = "0.4.11" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" dependencies = [ - "autocfg", "scopeguard", ] @@ -581,32 +637,6 @@ dependencies = [ "tracing-subscriber", ] -[[package]] -name = "loro" -version = "0.16.2" -source = "git+https://github.com/loro-dev/loro.git?tag=loro-crdt%400.16.7#d2b0520f8633f96146a49ec205bd5e7056880f1a" -dependencies = [ - "either", - "enum-as-inner 0.6.0", - "generic-btree", - "loro-delta 0.16.2 (git+https://github.com/loro-dev/loro.git?tag=loro-crdt%400.16.7)", - "loro-internal 0.16.2 (git+https://github.com/loro-dev/loro.git?tag=loro-crdt%400.16.7)", - "tracing", -] - -[[package]] -name = "loro" -version = "0.16.2" -source = "git+https://github.com/loro-dev/loro.git?rev=90470658435ec4c62b5af59ebb82fe9e1f5aa761#90470658435ec4c62b5af59ebb82fe9e1f5aa761" -dependencies = [ - "either", - "enum-as-inner 0.6.0", - "generic-btree", - "loro-delta 0.16.2 (git+https://github.com/loro-dev/loro.git?rev=90470658435ec4c62b5af59ebb82fe9e1f5aa761)", - "loro-internal 0.16.2 (git+https://github.com/loro-dev/loro.git?rev=90470658435ec4c62b5af59ebb82fe9e1f5aa761)", - "tracing", -] - [[package]] name = "loro" version = "0.16.12" @@ -623,78 +653,78 @@ dependencies = [ [[package]] name = "loro" -version = "1.5.11" +version = "1.8.1" +source = "git+https://github.com/loro-dev/loro.git?rev=c897c346d9fd46dccf44de7ef4e72799fa9c9769#c897c346d9fd46dccf44de7ef4e72799fa9c9769" dependencies = [ "enum-as-inner 0.6.0", - "fxhash", "generic-btree", - "loro-common 1.5.10", - "loro-delta 1.5.6", - "loro-internal 1.5.10", - "loro-kv-store 1.5.10", + "loro-common 1.8.1", + "loro-delta 1.6.0", + "loro-internal 1.8.1", + "loro-kv-store 1.8.1", + "rustc-hash", "tracing", ] [[package]] -name = "loro-common" -version = "0.16.2" -source = "git+https://github.com/loro-dev/loro.git?tag=loro-crdt%400.16.7#d2b0520f8633f96146a49ec205bd5e7056880f1a" +name = "loro" +version = "1.10.3" dependencies = [ - "arbitrary", "enum-as-inner 0.6.0", - "fxhash", - "loro-rle 0.16.2 (git+https://github.com/loro-dev/loro.git?tag=loro-crdt%400.16.7)", - "nonmax", - "serde", - "serde_columnar", - "string_cache", - "thiserror 1.0.57", + "generic-btree", + "loro-common 1.10.0", + "loro-delta 1.9.1", + "loro-internal 1.10.3", + "loro-kv-store 1.10.0", + "rustc-hash", + "tracing", ] [[package]] name = "loro-common" -version = "0.16.2" -source = "git+https://github.com/loro-dev/loro.git?rev=90470658435ec4c62b5af59ebb82fe9e1f5aa761#90470658435ec4c62b5af59ebb82fe9e1f5aa761" +version = "0.16.12" +source = "git+https://github.com/loro-dev/loro.git?tag=loro-crdt%401.0.0-alpha.4#9bfe97bce4912c6dc8439817497d18423a0e8cb7" dependencies = [ "arbitrary", "enum-as-inner 0.6.0", "fxhash", - "loro-rle 0.16.2 (git+https://github.com/loro-dev/loro.git?rev=90470658435ec4c62b5af59ebb82fe9e1f5aa761)", + "leb128", + "loro-rle 0.16.12", "nonmax", "serde", "serde_columnar", + "serde_json", "string_cache", "thiserror 1.0.57", ] [[package]] name = "loro-common" -version = "0.16.12" -source = "git+https://github.com/loro-dev/loro.git?tag=loro-crdt%401.0.0-alpha.4#9bfe97bce4912c6dc8439817497d18423a0e8cb7" +version = "1.8.1" +source = "git+https://github.com/loro-dev/loro.git?rev=c897c346d9fd46dccf44de7ef4e72799fa9c9769#c897c346d9fd46dccf44de7ef4e72799fa9c9769" dependencies = [ "arbitrary", "enum-as-inner 0.6.0", - "fxhash", "leb128", - "loro-rle 0.16.12", + "loro-rle 1.6.0 (git+https://github.com/loro-dev/loro.git?rev=c897c346d9fd46dccf44de7ef4e72799fa9c9769)", "nonmax", + "rustc-hash", "serde", "serde_columnar", "serde_json", - "string_cache", "thiserror 1.0.57", ] [[package]] name = "loro-common" -version = "1.5.10" +version = "1.10.0" dependencies = [ "arbitrary", "enum-as-inner 0.6.0", - "fxhash", "leb128", - "loro-rle 1.2.7", + "loro-rle 1.6.0", "nonmax", + "rustc-hash", "serde", "serde_columnar", "serde_json", @@ -704,20 +734,8 @@ dependencies = [ [[package]] name = "loro-delta" -version = "0.16.2" -source = "git+https://github.com/loro-dev/loro.git?tag=loro-crdt%400.16.7#d2b0520f8633f96146a49ec205bd5e7056880f1a" -dependencies = [ - "arrayvec", - "enum-as-inner 0.5.1", - "generic-btree", - "heapless 0.8.0", - "tracing", -] - -[[package]] -name = "loro-delta" -version = "0.16.2" -source = "git+https://github.com/loro-dev/loro.git?rev=90470658435ec4c62b5af59ebb82fe9e1f5aa761#90470658435ec4c62b5af59ebb82fe9e1f5aa761" +version = "0.16.12" +source = "git+https://github.com/loro-dev/loro.git?tag=loro-crdt%401.0.0-alpha.4#9bfe97bce4912c6dc8439817497d18423a0e8cb7" dependencies = [ "arrayvec", "enum-as-inner 0.5.1", @@ -728,19 +746,18 @@ dependencies = [ [[package]] name = "loro-delta" -version = "0.16.12" -source = "git+https://github.com/loro-dev/loro.git?tag=loro-crdt%401.0.0-alpha.4#9bfe97bce4912c6dc8439817497d18423a0e8cb7" +version = "1.6.0" +source = "git+https://github.com/loro-dev/loro.git?rev=c897c346d9fd46dccf44de7ef4e72799fa9c9769#c897c346d9fd46dccf44de7ef4e72799fa9c9769" dependencies = [ "arrayvec", "enum-as-inner 0.5.1", "generic-btree", "heapless 0.8.0", - "tracing", ] [[package]] name = "loro-delta" -version = "1.5.6" +version = "1.9.1" dependencies = [ "arrayvec", "enum-as-inner 0.5.1", @@ -750,48 +767,15 @@ dependencies = [ [[package]] name = "loro-internal" -version = "0.16.2" -source = "git+https://github.com/loro-dev/loro.git?tag=loro-crdt%400.16.7#d2b0520f8633f96146a49ec205bd5e7056880f1a" -dependencies = [ - "append-only-bytes", - "arref", - "either", - "enum-as-inner 0.5.1", - "enum_dispatch", - "fxhash", - "generic-btree", - "getrandom", - "im", - "itertools 0.12.1", - "leb128", - "loro-common 0.16.2 (git+https://github.com/loro-dev/loro.git?tag=loro-crdt%400.16.7)", - "loro-delta 0.16.2 (git+https://github.com/loro-dev/loro.git?tag=loro-crdt%400.16.7)", - "loro-rle 0.16.2 (git+https://github.com/loro-dev/loro.git?tag=loro-crdt%400.16.7)", - "loro_fractional_index 0.16.2 (git+https://github.com/loro-dev/loro.git?tag=loro-crdt%400.16.7)", - "md5", - "num", - "num-derive", - "num-traits", - "once_cell", - "postcard", - "rand", - "serde", - "serde_columnar", - "serde_json", - "smallvec", - "thiserror 1.0.57", - "tracing", -] - -[[package]] -name = "loro-internal" -version = "0.16.2" -source = "git+https://github.com/loro-dev/loro.git?rev=90470658435ec4c62b5af59ebb82fe9e1f5aa761#90470658435ec4c62b5af59ebb82fe9e1f5aa761" +version = "0.16.12" +source = "git+https://github.com/loro-dev/loro.git?tag=loro-crdt%401.0.0-alpha.4#9bfe97bce4912c6dc8439817497d18423a0e8cb7" dependencies = [ "append-only-bytes", "arref", + "bytes", "either", - "enum-as-inner 0.5.1", + "ensure-cov", + "enum-as-inner 0.6.0", "enum_dispatch", "fxhash", "generic-btree", @@ -799,16 +783,19 @@ dependencies = [ "im", "itertools 0.12.1", "leb128", - "loro-common 0.16.2 (git+https://github.com/loro-dev/loro.git?rev=90470658435ec4c62b5af59ebb82fe9e1f5aa761)", - "loro-delta 0.16.2 (git+https://github.com/loro-dev/loro.git?rev=90470658435ec4c62b5af59ebb82fe9e1f5aa761)", - "loro-rle 0.16.2 (git+https://github.com/loro-dev/loro.git?rev=90470658435ec4c62b5af59ebb82fe9e1f5aa761)", - "loro_fractional_index 0.16.2 (git+https://github.com/loro-dev/loro.git?rev=90470658435ec4c62b5af59ebb82fe9e1f5aa761)", + "loro-common 0.16.12", + "loro-delta 0.16.12", + "loro-kv-store 0.16.2", + "loro-rle 0.16.12", + "loro_fractional_index 0.16.12", "md5", + "nonmax", "num", "num-derive", "num-traits", "once_cell", "postcard", + "pretty_assertions", "rand", "serde", "serde_columnar", @@ -816,12 +803,13 @@ dependencies = [ "smallvec", "thiserror 1.0.57", "tracing", + "xxhash-rust", ] [[package]] name = "loro-internal" -version = "0.16.12" -source = "git+https://github.com/loro-dev/loro.git?tag=loro-crdt%401.0.0-alpha.4#9bfe97bce4912c6dc8439817497d18423a0e8cb7" +version = "1.8.1" +source = "git+https://github.com/loro-dev/loro.git?rev=c897c346d9fd46dccf44de7ef4e72799fa9c9769#c897c346d9fd46dccf44de7ef4e72799fa9c9769" dependencies = [ "append-only-bytes", "arref", @@ -830,38 +818,41 @@ dependencies = [ "ensure-cov", "enum-as-inner 0.6.0", "enum_dispatch", - "fxhash", "generic-btree", "getrandom", "im", "itertools 0.12.1", "leb128", - "loro-common 0.16.12", - "loro-delta 0.16.12", - "loro-kv-store 0.16.2", - "loro-rle 0.16.12", - "loro_fractional_index 0.16.12", + "loom", + "loro-common 1.8.1", + "loro-delta 1.6.0", + "loro-kv-store 1.8.1", + "loro-rle 1.6.0 (git+https://github.com/loro-dev/loro.git?rev=c897c346d9fd46dccf44de7ef4e72799fa9c9769)", + "loro_fractional_index 1.6.0 (git+https://github.com/loro-dev/loro.git?rev=c897c346d9fd46dccf44de7ef4e72799fa9c9769)", "md5", "nonmax", "num", - "num-derive", "num-traits", "once_cell", + "parking_lot", "postcard", "pretty_assertions", "rand", + "rustc-hash", "serde", "serde_columnar", "serde_json", "smallvec", "thiserror 1.0.57", + "thread_local", "tracing", + "wasm-bindgen", "xxhash-rust", ] [[package]] name = "loro-internal" -version = "1.5.10" +version = "1.10.3" dependencies = [ "append-only-bytes", "arref", @@ -870,26 +861,29 @@ dependencies = [ "ensure-cov", "enum-as-inner 0.6.0", "enum_dispatch", - "fxhash", "generic-btree", "getrandom", "im", "itertools 0.12.1", "leb128", "loom", - "loro-common 1.5.10", - "loro-delta 1.5.6", - "loro-kv-store 1.5.10", - "loro-rle 1.2.7", - "loro_fractional_index 1.2.7", + "loro-common 1.10.0", + "loro-delta 1.9.1", + "loro-kv-store 1.10.0", + "loro-rle 1.6.0", + "loro_fractional_index 1.6.0", "md5", "nonmax", "num", "num-traits", "once_cell", + "parking_lot", + "pest", + "pest_derive", "postcard", "pretty_assertions", "rand", + "rustc-hash", "serde", "serde_columnar", "serde_json", @@ -919,36 +913,39 @@ dependencies = [ [[package]] name = "loro-kv-store" -version = "1.5.10" +version = "1.8.1" +source = "git+https://github.com/loro-dev/loro.git?rev=c897c346d9fd46dccf44de7ef4e72799fa9c9769#c897c346d9fd46dccf44de7ef4e72799fa9c9769" dependencies = [ "bytes", "ensure-cov", - "fxhash", - "loro-common 1.5.10", + "loro-common 1.8.1", "lz4_flex", "once_cell", "quick_cache", + "rustc-hash", "tracing", "xxhash-rust", ] [[package]] -name = "loro-rle" -version = "0.16.2" -source = "git+https://github.com/loro-dev/loro.git?tag=loro-crdt%400.16.7#d2b0520f8633f96146a49ec205bd5e7056880f1a" +name = "loro-kv-store" +version = "1.10.0" dependencies = [ - "append-only-bytes", - "arref", - "enum-as-inner 0.6.0", - "fxhash", - "num", - "smallvec", + "bytes", + "ensure-cov", + "loro-common 1.10.0", + "lz4_flex", + "once_cell", + "quick_cache", + "rustc-hash", + "tracing", + "xxhash-rust", ] [[package]] name = "loro-rle" -version = "0.16.2" -source = "git+https://github.com/loro-dev/loro.git?rev=90470658435ec4c62b5af59ebb82fe9e1f5aa761#90470658435ec4c62b5af59ebb82fe9e1f5aa761" +version = "0.16.12" +source = "git+https://github.com/loro-dev/loro.git?tag=loro-crdt%401.0.0-alpha.4#9bfe97bce4912c6dc8439817497d18423a0e8cb7" dependencies = [ "append-only-bytes", "arref", @@ -960,20 +957,17 @@ dependencies = [ [[package]] name = "loro-rle" -version = "0.16.12" -source = "git+https://github.com/loro-dev/loro.git?tag=loro-crdt%401.0.0-alpha.4#9bfe97bce4912c6dc8439817497d18423a0e8cb7" +version = "1.6.0" dependencies = [ "append-only-bytes", - "arref", - "enum-as-inner 0.6.0", - "fxhash", "num", "smallvec", ] [[package]] name = "loro-rle" -version = "1.2.7" +version = "1.6.0" +source = "git+https://github.com/loro-dev/loro.git?rev=c897c346d9fd46dccf44de7ef4e72799fa9c9769#c897c346d9fd46dccf44de7ef4e72799fa9c9769" dependencies = [ "append-only-bytes", "num", @@ -988,21 +982,11 @@ checksum = "3f3d053a135388e6b1df14e8af1212af5064746e9b87a06a345a7a779ee9695a" [[package]] name = "loro_fractional_index" -version = "0.16.2" -source = "git+https://github.com/loro-dev/loro.git?tag=loro-crdt%400.16.7#d2b0520f8633f96146a49ec205bd5e7056880f1a" -dependencies = [ - "imbl", - "rand", - "serde", - "smallvec", -] - -[[package]] -name = "loro_fractional_index" -version = "0.16.2" -source = "git+https://github.com/loro-dev/loro.git?rev=90470658435ec4c62b5af59ebb82fe9e1f5aa761#90470658435ec4c62b5af59ebb82fe9e1f5aa761" +version = "0.16.12" +source = "git+https://github.com/loro-dev/loro.git?tag=loro-crdt%401.0.0-alpha.4#9bfe97bce4912c6dc8439817497d18423a0e8cb7" dependencies = [ "imbl", + "once_cell", "rand", "serde", "smallvec", @@ -1010,19 +994,17 @@ dependencies = [ [[package]] name = "loro_fractional_index" -version = "0.16.12" -source = "git+https://github.com/loro-dev/loro.git?tag=loro-crdt%401.0.0-alpha.4#9bfe97bce4912c6dc8439817497d18423a0e8cb7" +version = "1.6.0" dependencies = [ - "imbl", "once_cell", "rand", "serde", - "smallvec", ] [[package]] name = "loro_fractional_index" -version = "1.2.7" +version = "1.6.0" +source = "git+https://github.com/loro-dev/loro.git?rev=c897c346d9fd46dccf44de7ef4e72799fa9c9769#c897c346d9fd46dccf44de7ef4e72799fa9c9769" dependencies = [ "once_cell", "rand", @@ -1202,9 +1184,9 @@ dependencies = [ [[package]] name = "parking_lot" -version = "0.12.1" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" dependencies = [ "lock_api", "parking_lot_core", @@ -1212,15 +1194,58 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.9" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-targets", + "windows-link 0.2.1", +] + +[[package]] +name = "pest" +version = "2.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9eb05c21a464ea704b53158d358a31e6425db2f63a1a7312268b05fe2b75f7" +dependencies = [ + "memchr", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f9dbced329c441fa79d80472764b1a2c7e57123553b8519b36663a2fb234ed" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bb96d5051a78f44f43c8f712d8e810adb0ebf923fc9ed2655a7f66f63ba8ee5" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "pest_meta" +version = "2.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "602113b5b5e8621770cfd490cfd90b9f84ab29bd2b0e49ad83eb6d186cef2365" +dependencies = [ + "pest", + "sha2", ] [[package]] @@ -1388,9 +1413,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.4.1" +version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ "bitflags", ] @@ -1439,6 +1464,12 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustc_version" version = "0.4.0" @@ -1489,9 +1520,9 @@ dependencies = [ [[package]] name = "serde_columnar" -version = "0.3.11" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5910a00acc21b3f106b9e3977cabf8d4c15b62ea585664f08ec6fedb118d88e0" +checksum = "2a16e404f17b16d0273460350e29b02d76ba0d70f34afdc9a4fa034c97d6c6eb" dependencies = [ "itertools 0.11.0", "postcard", @@ -1502,9 +1533,9 @@ dependencies = [ [[package]] name = "serde_columnar_derive" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44cea1995b758f1b344f484e77a02d9d85c8a62c9ce0e5f1850e27e2f7eebbc9" +checksum = "45958fce4903f67e871fbf15ac78e289269b21ebd357d6fecacdba233629112e" dependencies = [ "darling", "proc-macro2", @@ -1534,6 +1565,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -1781,6 +1823,12 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" +[[package]] +name = "ucd-trie" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" + [[package]] name = "unicode-ident" version = "1.0.12" @@ -1900,7 +1948,7 @@ dependencies = [ "windows-collections", "windows-core", "windows-future", - "windows-link", + "windows-link 0.1.3", "windows-numerics", ] @@ -1921,7 +1969,7 @@ checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" dependencies = [ "windows-implement", "windows-interface", - "windows-link", + "windows-link 0.1.3", "windows-result", "windows-strings", ] @@ -1933,7 +1981,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" dependencies = [ "windows-core", - "windows-link", + "windows-link 0.1.3", "windows-threading", ] @@ -1965,6 +2013,12 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + [[package]] name = "windows-numerics" version = "0.2.0" @@ -1972,7 +2026,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" dependencies = [ "windows-core", - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -1981,7 +2035,7 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" dependencies = [ - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -1990,22 +2044,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows-link 0.1.3", ] [[package]] @@ -2014,51 +2053,9 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" dependencies = [ - "windows-link", + "windows-link 0.1.3", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - [[package]] name = "xxhash-rust" version = "0.8.12" diff --git a/crates/fuzz/tests/test.rs b/crates/fuzz/tests/test.rs index f35f31167..492320ded 100644 --- a/crates/fuzz/tests/test.rs +++ b/crates/fuzz/tests/test.rs @@ -14293,6 +14293,69 @@ fn import_enhance_error() { ); } +#[test] +fn perf_opt_issue() { + test_multi_sites( + 5, + vec![FuzzTarget::All], + &mut [ + Handle { + site: 0, + target: 153, + container: 153, + action: Generic(GenericAction { + value: Container(Tree), + bool: true, + key: 2576980377, + pos: 10066329, + length: 3429941800045903872, + prop: 12852917281039111, + }), + }, + Handle { + site: 53, + target: 223, + container: 255, + action: Generic(GenericAction { + value: Container(Tree), + bool: true, + key: 3051861429, + pos: 13093571283691877813, + length: 18446532040284176163, + prop: 18446744073709551419, + }), + }, + SyncAllUndo { + site: 255, + op_len: 3014655, + }, + SyncAllUndo { + site: 153, + op_len: 486539265, + }, + Handle { + site: 0, + target: 153, + container: 153, + action: Generic(GenericAction { + value: I32(-1718026240), + bool: true, + key: 2576980377, + pos: 11068046445937007001, + length: 10066329, + prop: 12225303613249355776, + }), + }, + SyncAllUndo { + site: 255, + op_len: 4292818211, + }, + SyncAll, + Sync { from: 67, to: 0 }, + ], + ); +} + #[test] fn minify() { minify_error( diff --git a/crates/loro-internal/src/container/list/list_op.rs b/crates/loro-internal/src/container/list/list_op.rs index c0968a23c..d8c07f00e 100644 --- a/crates/loro-internal/src/container/list/list_op.rs +++ b/crates/loro-internal/src/container/list/list_op.rs @@ -8,8 +8,8 @@ use serde::{Deserialize, Serialize}; use crate::{ container::richtext::TextStyleInfoFlag, + container::richtext::richtext_state::unicode_to_utf8_index, op::{ListSlice, SliceRange}, - utils::string_slice::unicode_range_to_byte_range, InternalString, }; @@ -614,17 +614,32 @@ impl Sliceable for InnerListOp { InnerListOp::InsertText { slice, unicode_start, - unicode_len: _, + unicode_len, pos, } => InnerListOp::InsertText { slice: { - let (a, b) = unicode_range_to_byte_range( - // SAFETY: we know it's a valid utf8 string - unsafe { std::str::from_utf8_unchecked(slice) }, - from, - to, - ); - slice.slice(a, b) + // SAFETY: we know it's a valid utf8 string + let s = unsafe { std::str::from_utf8_unchecked(slice) }; + let total_unicode_len = *unicode_len as usize; + let total_bytes_len = slice.len(); + let (from_byte, to_byte) = if total_bytes_len == total_unicode_len { + // ASCII fast path: unicode index equals byte offset. + (from, to) + } else { + let from_byte = if from == 0 { + 0 + } else { + unicode_to_utf8_index(s, from).expect("unicode index should be valid") + }; + let to_byte = if to == total_unicode_len { + total_bytes_len + } else { + unicode_to_utf8_index(s, to).expect("unicode index should be valid") + }; + (from_byte, to_byte) + }; + + slice.slice(from_byte, to_byte) }, unicode_start: *unicode_start + from as u32, unicode_len: (to - from) as u32, @@ -644,12 +659,14 @@ impl Sliceable for InnerListOp { #[cfg(test)] mod test { + use append_only_bytes::BytesSlice; use loro_common::ID; - use rle::{Mergable, Sliceable}; + use rle::{HasLength as _, Mergable, Sliceable}; + use std::time::Instant; use crate::{container::list::list_op::DeleteSpanWithId, op::ListSlice}; - use super::{DeleteSpan, ListOp}; + use super::{DeleteSpan, InnerListOp, ListOp}; #[test] fn fix_fields_order() { @@ -749,4 +766,38 @@ mod test { a_with_id.merge(&b_with_id, &()); assert!(a_with_id.span.signed_len == -2); } + + #[test] + #[ignore] + fn perf_insert_text_slice_split_suffix() { + // Run with: + // cargo test -p loro-internal perf_insert_text_slice_split_suffix -- --ignored --nocapture + const TEXT_LEN: usize = 2 * 1024 * 1024; + const CHUNK_UNICODE_LEN: usize = 4096; + + let text = "a".repeat(TEXT_LEN); + let slice = BytesSlice::from_bytes(text.as_bytes()); + let mut op = InnerListOp::InsertText { + slice, + unicode_start: 0, + unicode_len: TEXT_LEN as u32, + pos: 0, + }; + + let start = Instant::now(); + let mut total = 0usize; + while op.content_len() > CHUNK_UNICODE_LEN { + let end = CHUNK_UNICODE_LEN.min(op.content_len()); + let _prefix = op.slice(0, end); + op = op.slice(end, op.content_len()); + total += end; + } + total += op.content_len(); + let elapsed = start.elapsed(); + assert_eq!(total, TEXT_LEN); + println!( + "perf_insert_text_slice_split_suffix: text_len={}, chunk_unicode_len={}, elapsed={:?}", + TEXT_LEN, CHUNK_UNICODE_LEN, elapsed + ); + } } diff --git a/crates/loro-internal/src/container/richtext/richtext_state.rs b/crates/loro-internal/src/container/richtext/richtext_state.rs index 18708701b..d0e198df3 100644 --- a/crates/loro-internal/src/container/richtext/richtext_state.rs +++ b/crates/loro-internal/src/container/richtext/richtext_state.rs @@ -433,13 +433,21 @@ mod text_chunk { } fn check(&self) { - if cfg!(any(debug_assertions, test)) { - assert_eq!(self.unicode_len, self.as_str().chars().count() as i32); - assert_eq!( - self.utf16_len, - self.as_str().chars().map(|c| c.len_utf16()).sum::() as i32 - ); + if !cfg!(any(debug_assertions, test)) { + return; + } + + let bytes_len = self.bytes.len() as i32; + if bytes_len == self.unicode_len { + assert_eq!(self.utf16_len, self.unicode_len); + return; } + + assert_eq!(self.unicode_len, self.as_str().chars().count() as i32); + assert_eq!( + self.utf16_len, + self.as_str().chars().map(|c| c.len_utf16()).sum::() as i32 + ); } pub(crate) fn entity_range_to_event_range(&self, range: Range) -> Range { @@ -484,6 +492,67 @@ mod text_chunk { impl generic_btree::rle::Sliceable for TextChunk { fn _slice(&self, range: Range) -> Self { assert!(range.start < range.end); + let total_unicode_len = self.unicode_len as usize; + if range.start == 0 && range.end == total_unicode_len { + return self.clone(); + } + + // Fast path for ASCII text: unicode index == byte index, and utf16 index == unicode index. + if self.bytes.len() as i32 == self.unicode_len { + let ans = Self { + unicode_len: range.len() as i32, + bytes: self.bytes.slice_clone(range.start..range.end), + utf16_len: range.len() as i32, + id: self.id.inc(range.start as i32), + }; + ans.check(); + return ans; + } + + // Fast path for slicing a suffix/prefix to avoid scanning the whole chunk. + if range.end == total_unicode_len { + let mut utf16_offset = 0; + let mut start_byte = self.bytes.len(); + for (unicode_index, (byte_index, c)) in self.as_str().char_indices().enumerate() { + if unicode_index == range.start { + start_byte = byte_index; + break; + } + utf16_offset += c.len_utf16(); + } + + let ans = Self { + unicode_len: (total_unicode_len - range.start) as i32, + bytes: self.bytes.slice_clone(start_byte..), + utf16_len: self.utf16_len - utf16_offset as i32, + id: self.id.inc(range.start as i32), + }; + ans.check(); + return ans; + } + + if range.start == 0 { + let mut utf16_len = 0; + let mut end_byte = self.bytes.len(); + for (unicode_index, (byte_index, c)) in self.as_str().char_indices().enumerate() { + if unicode_index == range.end { + end_byte = byte_index; + break; + } + utf16_len += c.len_utf16(); + } + + let ans = Self { + unicode_len: range.end as i32, + bytes: self.bytes.slice_clone(..end_byte), + utf16_len: utf16_len as i32, + id: self.id, + }; + ans.check(); + return ans; + } + + // General middle slice. let mut utf16_len = 0; let mut start = 0; let mut end = 0; @@ -2421,6 +2490,11 @@ impl RichtextState { self.tree.root_cache().entity_len as usize } + #[inline(always)] + pub(crate) fn content_node_len(&self) -> usize { + self.tree.node_len() + } + pub fn diagnose(&self) { println!( "rope_nodes: {}, style_nodes: {}, text_len: {}", diff --git a/crates/loro-internal/src/container/richtext/tracker.rs b/crates/loro-internal/src/container/richtext/tracker.rs index 07761bc4c..18eac0106 100644 --- a/crates/loro-internal/src/container/richtext/tracker.rs +++ b/crates/loro-internal/src/container/richtext/tracker.rs @@ -140,11 +140,22 @@ impl Tracker { } fn update_insert_by_split(&mut self, split: &[LeafIndex]) { - for &new_leaf_idx in split { - let leaf = self.rope.tree().get_elem(new_leaf_idx).unwrap(); - - self.id_to_cursor - .update_insert(leaf.id_span(), new_leaf_idx) + match split.len() { + 0 => {} + 1 => { + let new_leaf_idx = split[0]; + let leaf = self.rope.tree().get_elem(new_leaf_idx).unwrap(); + self.id_to_cursor + .update_insert(leaf.id_span(), new_leaf_idx); + } + _ => { + let mut updates = Vec::with_capacity(split.len()); + for &new_leaf_idx in split { + let leaf = self.rope.tree().get_elem(new_leaf_idx).unwrap(); + updates.push((leaf.id_span(), new_leaf_idx)); + } + self.id_to_cursor.update_insert_batch(&mut updates); + } } } @@ -629,6 +640,7 @@ mod test { use generic_btree::rle::HasLength; use super::*; + use std::time::Instant; #[test] fn test_len() { @@ -672,4 +684,87 @@ mod test { assert!(v[1].is_activated()); assert_eq!(v[1].rle_len(), 6); } + + #[test] + #[ignore] + fn perf_update_insert_by_split_quadratic() { + // Run with: + // cargo test -p loro-internal perf_update_insert_by_split_quadratic -- --ignored --nocapture + const CHUNK_LEN: usize = 256; + let fragments: usize = std::env::var("LORO_PERF_FRAGMENTS") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(8192); + const PEER_A: PeerID = 1; + const PEER_B: PeerID = 2; + + let doc_len = CHUNK_LEN * fragments; + + let mut t = Tracker::new(); + t.insert( + IdFull::new(PEER_A, 0, 0), + 0, + RichtextChunk::new_text(0..doc_len as u32), + ); + t.id_to_cursor.diagnose(); + + let start = Instant::now(); + let expected_fragment_updates = (fragments as u64) * ((fragments - 1) as u64) / 2; + + for i in 0..(fragments - 1) { + let pos = (i + 1) * CHUNK_LEN + i; + let op_id = IdFull::new(PEER_B, i as Counter, i as Lamport); + let chunk = RichtextChunk::new_text( + (doc_len as u32 + i as u32)..(doc_len as u32 + i as u32 + 1), + ); + t.insert(op_id, pos, chunk); + } + + let elapsed = start.elapsed(); + let before_vv = vv!(PEER_A => doc_len as Counter); + let after_vv = vv!(PEER_A => doc_len as Counter, PEER_B => (fragments - 1) as Counter); + let diff_start = Instant::now(); + let diff_len = t.diff(&before_vv, &after_vv).count(); + let diff_elapsed = diff_start.elapsed(); + assert_eq!(t.rope.tree().iter().count(), 1 + 2 * (fragments - 1)); + println!( + "perf_update_insert_by_split_quadratic: doc_len={}, fragments={}, expected_fragment_updates={}, insert_elapsed={:?}, diff_items={}, diff_elapsed={:?}", + doc_len, fragments, expected_fragment_updates, elapsed, diff_len, diff_elapsed + ); + } + + #[test] + #[ignore] + fn perf_update_insert_by_split_quadratic_unknown() { + // Run with: + // LORO_PERF_FRAGMENTS=8192 cargo test -p loro-internal perf_update_insert_by_split_quadratic_unknown -- --ignored --nocapture + const CHUNK_LEN: usize = 256; + let fragments: usize = std::env::var("LORO_PERF_FRAGMENTS") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(8192); + const PEER_B: PeerID = 2; + + let doc_len = CHUNK_LEN * fragments; + + let mut t = Tracker::new_with_unknown(); + t.checkout(&vv!()); + t.id_to_cursor.diagnose(); + + let start = Instant::now(); + for i in 0..(fragments - 1) { + let pos = (i + 1) * CHUNK_LEN + i; + let op_id = IdFull::new(PEER_B, i as Counter, i as Lamport); + let chunk = RichtextChunk::new_text( + (doc_len as u32 + i as u32)..(doc_len as u32 + i as u32 + 1), + ); + t.insert(op_id, pos, chunk); + } + + let elapsed = start.elapsed(); + println!( + "perf_update_insert_by_split_quadratic_unknown: doc_len={}, fragments={}, elapsed={:?}", + doc_len, fragments, elapsed + ); + } } diff --git a/crates/loro-internal/src/container/richtext/tracker/id_to_cursor.rs b/crates/loro-internal/src/container/richtext/tracker/id_to_cursor.rs index 1d1b0a70d..2c907f66e 100644 --- a/crates/loro-internal/src/container/richtext/tracker/id_to_cursor.rs +++ b/crates/loro-internal/src/container/richtext/tracker/id_to_cursor.rs @@ -109,6 +109,74 @@ impl IdToCursor { assert_eq!(start_counter, id_span.counter.end); } + pub fn update_insert_batch(&mut self, updates: &mut [(IdSpan, LeafIndex)]) { + if updates.is_empty() { + return; + } + + if updates.len() == 1 { + let (span, leaf) = updates[0]; + self.update_insert(span, leaf); + return; + } + + let mut per_peer: FxHashMap> = FxHashMap::default(); + for &(id_span, new_leaf) in updates.iter() { + per_peer + .entry(id_span.peer) + .or_default() + .push((id_span, new_leaf)); + } + + for (peer, peer_updates) in per_peer { + let Some(list) = self.map.get_mut(&peer) else { + continue; + }; + + let mut per_fragment: FxHashMap> = + FxHashMap::default(); + + for (id_span, new_leaf) in peer_updates { + debug_assert!(!id_span.is_reversed()); + let mut index = + match list.binary_search_by_key(&id_span.counter.start, |x| x.counter) { + Ok(index) => index, + Err(index) => index.saturating_sub(1), + }; + + let mut start_counter = id_span.counter.start; + while start_counter < id_span.counter.end + && index < list.len() + && start_counter < list[index].counter_end() + { + let fragment = &list[index]; + let from = (start_counter - fragment.counter) as usize; + let to = ((id_span.counter.end - fragment.counter) as usize) + .min(fragment.cursor.rle_len()); + + if from != to { + per_fragment + .entry(index) + .or_default() + .push((from, to, new_leaf)); + } + + start_counter += (to - from) as Counter; + index += 1; + } + + assert_eq!(start_counter, id_span.counter.end); + } + + let mut fragment_indexes: Vec = per_fragment.keys().copied().collect(); + fragment_indexes.sort_unstable(); + for index in fragment_indexes { + let updates = per_fragment.get(&index).unwrap(); + list[index].cursor.update_insert_many(updates); + } + } + } + pub fn iter_all(&self) -> impl Iterator + '_ { self.map.iter().flat_map(|(peer, list)| { list.iter() @@ -352,6 +420,80 @@ mod insert_set { } } + pub(crate) fn update_many(&mut self, updates: &[(usize, usize, LeafIndex)]) { + if updates.is_empty() { + return; + } + + if updates.len() == 1 { + let (from, to, leaf) = updates[0]; + self.update(from, to, leaf); + return; + } + + let len = self.len(); + if len > MAX_FRAGMENT_LEN { + for &(from, to, leaf) in updates { + self.update(from, to, leaf); + } + return; + } + + let mut dense: SmallVec<[LeafIndex; MAX_FRAGMENT_LEN]> = SmallVec::with_capacity(len); + match self { + InsertSet::Small(set) => { + for insert in set.set.iter() { + for _ in 0..insert.len { + dense.push(insert.leaf); + } + } + } + InsertSet::Large(set) => { + for insert in set.tree.iter() { + for _ in 0..insert.len { + dense.push(insert.leaf); + } + } + } + } + debug_assert_eq!(dense.len(), len); + + for &(from, to, leaf) in updates { + debug_assert!(from <= to); + debug_assert!(to <= len); + for i in from..to { + dense[i] = leaf; + } + } + + let mut new_set: SmallVec<[Insert; 1]> = SmallVec::new(); + if !dense.is_empty() { + let mut cur_leaf = dense[0]; + let mut cur_len: usize = 1; + for &leaf in dense.iter().skip(1) { + if leaf == cur_leaf { + cur_len += 1; + } else { + new_set.push(Insert { + leaf: cur_leaf, + len: cur_len as u32, + }); + cur_leaf = leaf; + cur_len = 1; + } + } + new_set.push(Insert { + leaf: cur_leaf, + len: cur_len as u32, + }); + } + + *self = InsertSet::Small(SmallInsertSet { + set: new_set, + len: len as u32, + }); + } + pub(crate) fn get_insert(&self, pos: usize) -> Option { match self { Self::Small(set) => { @@ -548,9 +690,10 @@ mod insert_set { impl SmallInsertSet { fn update(&mut self, from: usize, to: usize, new_leaf: LeafIndex) { let mut cur_scan_index: usize = 0; - let mut new_set = SmallVec::new(); + let old_set = std::mem::take(&mut self.set); + let mut new_set = SmallVec::with_capacity(old_set.len() + 2); let mut new_leaf_inserted = false; - for insert in self.set.iter() { + for insert in old_set.iter() { if new_leaf_inserted { let end = cur_scan_index + insert.len as usize; if end <= to { @@ -791,6 +934,18 @@ impl Cursor { } } + fn update_insert_many(&mut self, updates: &[(usize, usize, LeafIndex)]) { + match self { + Self::Insert(set) => { + set.update_many(updates); + } + Self::Move { .. } => { + unreachable!("update_insert_many on Move") + } + _ => unreachable!(), + } + } + fn get_insert(&self, pos: usize) -> Option { if pos >= self.rle_len() { return None; diff --git a/crates/loro-internal/src/handler/tree.rs b/crates/loro-internal/src/handler/tree.rs index 16f4d16fc..36e5829e2 100644 --- a/crates/loro-internal/src/handler/tree.rs +++ b/crates/loro-internal/src/handler/tree.rs @@ -462,7 +462,9 @@ impl TreeHandler { } } - let index = self + let old_parent = self.get_node_parent(&target).unwrap(); + let old_index = self.get_index_by_tree_id(&target).unwrap(); + let mut index = self .get_index_by_fractional_index( &parent, &NodePosition { @@ -471,6 +473,9 @@ impl TreeHandler { }, ) .unwrap_or(0); + if old_parent == parent && old_index < index { + index -= 1; + } let with_event = !parent .tree_id() .is_some_and(|p| self.is_node_deleted(&p).unwrap()); @@ -500,8 +505,8 @@ impl TreeHandler { index, position: position.clone(), // the old parent should be exist, so we can unwrap - old_parent: self.get_node_parent(&target).unwrap(), - old_index: self.get_index_by_tree_id(&target).unwrap(), + old_parent, + old_index, }, }]), &inner.doc, diff --git a/crates/loro-internal/src/state/richtext_state.rs b/crates/loro-internal/src/state/richtext_state.rs index a5c5ed1a7..8b8bce4b7 100644 --- a/crates/loro-internal/src/state/richtext_state.rs +++ b/crates/loro-internal/src/state/richtext_state.rs @@ -1,4 +1,4 @@ -use generic_btree::{rle::HasLength, Cursor}; +use generic_btree::{rle::HasLength, rle::Sliceable as _, Cursor}; use loro_common::{ContainerID, InternalString, LoroError, LoroResult, LoroValue, ID}; use loro_delta::DeltaRopeBuilder; use rustc_hash::{FxHashMap, FxHashSet}; @@ -577,6 +577,149 @@ impl ContainerState for RichtextState { unreachable!() }; + // Fast path for plain-text deltas (no style anchors / style ranges). + // + // Rebuilding avoids repeated BTree queries and mutations when the delta is very "choppy" + // (many small edit spans), but it allocates and clones chunks, so it can be slower for + // small deltas. Use a cheap cost model to enable it only when it's likely beneficial. + let should_fast_apply = { + #[inline] + fn ilog2_ceil(x: usize) -> usize { + debug_assert!(x > 0); + (usize::BITS - (x - 1).leading_zeros()) as usize + } + + let state = self.state.get_mut(); + if state.has_styles() { + false + } else { + // `edit_actions` approximates how many BTree mutations the incremental path will do: + // each Replace with delete>0 becomes a drain, and each Replace with value>0 becomes an insert. + let mut edit_actions: usize = 0; + let mut is_plain_text_delta = true; + for span in richtext.iter() { + match span { + loro_delta::DeltaItem::Retain { .. } => {} + loro_delta::DeltaItem::Replace { value, delete, .. } => { + if *delete > 0 { + edit_actions += 1; + } + if value.rle_len() > 0 { + if !matches!(value, RichtextStateChunk::Text(_)) { + is_plain_text_delta = false; + break; + } + edit_actions += 1; + } + } + } + } + + if !is_plain_text_delta || edit_actions == 0 { + false + } else { + let content_nodes = state.content_node_len().max(1); + let log_n = ilog2_ceil(content_nodes + 1).max(1); + let incremental_score = edit_actions.saturating_mul(log_n); + let rebuild_score = content_nodes.saturating_add(edit_actions); + + let old_len = richtext.old_len().max(1); + let avg_action_span = old_len / edit_actions; + // A very rough proxy for "choppiness": many edit actions with small average span. + // The thresholds are intentionally conservative to avoid rebuilding for small or + // localized deltas. + let is_choppy = edit_actions >= 256 && avg_action_span <= 32; + + is_choppy && incremental_score >= rebuild_score.saturating_mul(4) + } + } + }; + + if should_fast_apply { + let new_state = { + let state = self.state.get_mut(); + let mut chunks: Vec = Vec::new(); + + let mut src_iter = state.iter_chunk(); + let mut cur = src_iter.next(); + let mut cur_offset: usize = 0; + + for span in richtext.iter() { + match span { + loro_delta::DeltaItem::Retain { len, .. } => { + let mut left = *len; + while left > 0 { + let chunk = cur.expect("Delta retain exceeds source length"); + let chunk_len = chunk.rle_len(); + if chunk_len == 0 { + cur = src_iter.next(); + cur_offset = 0; + continue; + } + let available = chunk_len - cur_offset; + let take = left.min(available); + if take == chunk_len && cur_offset == 0 { + chunks.push(chunk.clone()); + } else { + chunks.push(chunk.slice(cur_offset..cur_offset + take)); + } + + left -= take; + cur_offset += take; + if cur_offset == chunk_len { + cur = src_iter.next(); + cur_offset = 0; + } + } + } + loro_delta::DeltaItem::Replace { value, delete, .. } => { + let mut left = *delete; + while left > 0 { + let chunk = cur.expect("Delta delete exceeds source length"); + let chunk_len = chunk.rle_len(); + if chunk_len == 0 { + cur = src_iter.next(); + cur_offset = 0; + continue; + } + let available = chunk_len - cur_offset; + let take = left.min(available); + left -= take; + cur_offset += take; + if cur_offset == chunk_len { + cur = src_iter.next(); + cur_offset = 0; + } + } + + if value.rle_len() > 0 { + chunks.push(value.clone()); + } + } + } + } + + if let Some(chunk) = cur { + let chunk_len = chunk.rle_len(); + if cur_offset < chunk_len { + if cur_offset == 0 { + chunks.push(chunk.clone()); + } else { + chunks.push(chunk.slice(cur_offset..chunk_len)); + } + } + } + for chunk in src_iter { + chunks.push(chunk.clone()); + } + + InnerState::from_chunks(chunks.into_iter()) + }; + + *self.state.get_mut() = new_state; + return; + } + let mut style_starts: FxHashMap, usize> = FxHashMap::default(); let mut entity_index = 0; for span in richtext.iter() { diff --git a/crates/loro-internal/src/utils/string_slice.rs b/crates/loro-internal/src/utils/string_slice.rs index 1a0364967..7d582eccb 100644 --- a/crates/loro-internal/src/utils/string_slice.rs +++ b/crates/loro-internal/src/utils/string_slice.rs @@ -323,6 +323,7 @@ impl Default for StringSlice { } impl loro_delta::delta_trait::DeltaValue for StringSlice {} +#[allow(dead_code)] pub fn unicode_range_to_byte_range(s: &str, start: usize, end: usize) -> (usize, usize) { debug_assert!(start <= end); let start_unicode_index = start; diff --git a/crates/loro/tests/perf_import_quadratic.rs b/crates/loro/tests/perf_import_quadratic.rs new file mode 100644 index 000000000..f2140b925 --- /dev/null +++ b/crates/loro/tests/perf_import_quadratic.rs @@ -0,0 +1,93 @@ +use loro::{ExportMode, LoroDoc}; +use std::time::Instant; + +#[test] +#[ignore] +fn perf_import_insert_split_quadratic_e2e() { + // Run with: + // cargo test -p loro perf_import_insert_split_quadratic_e2e -- --ignored --nocapture + // + // You can scale it with: + // LORO_PERF_FRAGMENTS=16384 cargo test -p loro perf_import_insert_split_quadratic_e2e -- --ignored --nocapture + const CHUNK_LEN: usize = 256; + const PEER_A: u64 = 1; + const PEER_B: u64 = 2; + const PEER_C: u64 = 3; + + let fragments: usize = std::env::var("LORO_PERF_FRAGMENTS") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(8192); + assert!(fragments > 1); + + let doc_len = CHUNK_LEN * fragments; + let expected_fragment_updates = (fragments as u64) * ((fragments - 1) as u64) / 2; + + let doc_a = LoroDoc::new(); + doc_a.set_peer_id(PEER_A).unwrap(); + let text_a = doc_a.get_text("t"); + let base = "a".repeat(doc_len); + text_a.insert(0, &base).unwrap(); + doc_a.commit(); + let size_a = doc_a.with_oplog(|oplog| oplog.diagnose_size()); + println!("doc_a: atom_ops={}, ops={}", size_a.total_atom_ops, size_a.total_ops); + let updates_a = doc_a.export(ExportMode::all_updates()).unwrap(); + + let doc_b = LoroDoc::new(); + doc_b.set_peer_id(PEER_B).unwrap(); + let text_b = doc_b.get_text("t"); + doc_b.import(&updates_a).unwrap(); + let size_b = doc_b.with_oplog(|oplog| oplog.diagnose_size()); + println!( + "doc_b(after import a): atom_ops={}, ops={}", + size_b.total_atom_ops, size_b.total_ops + ); + let base_vv = doc_b.oplog_vv(); + + for i in 0..(fragments - 1) { + let pos = (i + 1) * CHUNK_LEN + i; + text_b.insert(pos, "x").unwrap(); + } + doc_b.commit(); + let size_b2 = doc_b.with_oplog(|oplog| oplog.diagnose_size()); + println!( + "doc_b(after inserts): atom_ops={}, ops={}, changes={}", + size_b2.total_atom_ops, size_b2.total_ops, size_b2.total_changes + ); + let updates_b = doc_b.export(ExportMode::updates(&base_vv)).unwrap(); + assert!(!updates_b.is_empty()); + println!("updates_b: bytes={}", updates_b.len()); + + let doc_c = LoroDoc::new(); + doc_c.set_peer_id(PEER_C).unwrap(); + let text_c = doc_c.get_text("t"); + doc_c.import(&updates_a).unwrap(); + + // Isolate oplog decode/merge cost by importing in detached mode. + let doc_d = LoroDoc::new(); + doc_d.set_peer_id(PEER_C + 1).unwrap(); + let text_d = doc_d.get_text("t"); + doc_d.import(&updates_a).unwrap(); + doc_d.detach(); + let start = Instant::now(); + doc_d.import(&updates_b).unwrap(); + let detached_elapsed = start.elapsed(); + let start = Instant::now(); + doc_d.checkout_to_latest(); + let attach_elapsed = start.elapsed(); + assert_eq!(text_d.len_unicode(), doc_len + (fragments - 1)); + println!( + "perf_import_insert_split_quadratic_detached: detached_elapsed={:?}, attach_elapsed={:?}", + detached_elapsed, attach_elapsed + ); + + let start = Instant::now(); + doc_c.import(&updates_b).unwrap(); + let elapsed = start.elapsed(); + + assert_eq!(text_c.len_unicode(), doc_len + (fragments - 1)); + println!( + "perf_import_insert_split_quadratic_e2e: doc_len={}, fragments={}, expected_fragment_updates={}, elapsed={:?}", + doc_len, fragments, expected_fragment_updates, elapsed + ); +}