diff --git a/Cargo.lock b/Cargo.lock index 5ab1b4468d..8a70c59138 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -201,6 +201,24 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" +[[package]] +name = "anndists" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4747593401c8d692fb589ac2a208a27ef968b95f9392af837728933348fc199c" +dependencies = [ + "anyhow", + "cfg-if", + "cpu-time", + "env_logger 0.10.2", + "lazy_static", + "log", + "num-traits", + "num_cpus", + "rand 0.8.5", + "rayon", +] + [[package]] name = "anstream" version = "0.6.18" @@ -1118,6 +1136,12 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + [[package]] name = "base64" version = "0.21.7" @@ -1216,15 +1240,30 @@ dependencies = [ "syn 2.0.101", ] +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec 0.6.3", +] + [[package]] name = "bit-set" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" dependencies = [ - "bit-vec", + "bit-vec 0.8.0", ] +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bit-vec" version = "0.8.0" @@ -1304,6 +1343,21 @@ dependencies = [ "piper", ] +[[package]] +name = "bm25" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9874599901ae2aaa19b1485145be2fa4e9af42d1b127672a03a7099ab6350bac" +dependencies = [ + "cached", + "deunicode", + "fxhash", + "rust-stemmers", + "stop-words", + "unicode-segmentation", + "whichlang", +] + [[package]] name = "bs58" version = "0.5.1" @@ -1341,6 +1395,20 @@ name = "bytemuck" version = "1.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9134a6ef01ce4b366b50689c94f82c14bc72bc5d0386829828a2e2752ef7958c" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ecc273b49b3205b83d648f0690daa588925572cc5063745bfe547fe7ec8e1a1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] [[package]] name = "byteorder" @@ -1370,6 +1438,39 @@ dependencies = [ "either", ] +[[package]] +name = "cached" +version = "0.55.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0839c297f8783316fcca9d90344424e968395413f0662a5481f79c6648bbc14" +dependencies = [ + "ahash", + "cached_proc_macro", + "cached_proc_macro_types", + "hashbrown 0.14.5", + "once_cell", + "thiserror 2.0.12", + "web-time", +] + +[[package]] +name = "cached_proc_macro" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673992d934f0711b68ebb3e1b79cdc4be31634b37c98f26867ced0438ca5c603" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "cached_proc_macro_types" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade8366b8bd5ba243f0a58f036cc0ca8a2f069cff1a2351ef1cac6b083e16fc0" + [[package]] name = "cairo-rs" version = "0.18.5" @@ -1404,6 +1505,62 @@ dependencies = [ "serde", ] +[[package]] +name = "candle-core" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9f51e2ecf6efe9737af8f993433c839f956d2b6ed4fd2dd4a7c6d8b0fa667ff" +dependencies = [ + "byteorder", + "gemm 0.17.1", + "half", + "memmap2", + "num-traits", + "num_cpus", + "rand 0.9.1", + "rand_distr", + "rayon", + "safetensors", + "thiserror 1.0.69", + "ug", + "yoke 0.7.5", + "zip", +] + +[[package]] +name = "candle-nn" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1980d53280c8f9e2c6cbe1785855d7ff8010208b46e21252b978badf13ad69d" +dependencies = [ + "candle-core", + "half", + "num-traits", + "rayon", + "safetensors", + "serde", + "thiserror 1.0.69", +] + +[[package]] +name = "candle-transformers" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "186cb80045dbe47e0b387ea6d3e906f02fb3056297080d9922984c90e90a72b0" +dependencies = [ + "byteorder", + "candle-core", + "candle-nn", + "fancy-regex 0.13.0", + "num-traits", + "rand 0.9.1", + "rayon", + "serde", + "serde_json", + "serde_plain", + "tracing", +] + [[package]] name = "cast" version = "0.3.0" @@ -1995,6 +2152,16 @@ dependencies = [ "libc", ] +[[package]] +name = "cpu-time" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9e393a7668fe1fad3075085b86c781883000b4ede868f43627b34a87c8b7ded" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -2332,6 +2499,12 @@ dependencies = [ "syn 2.0.101", ] +[[package]] +name = "deunicode" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04" + [[package]] name = "dialoguer" version = "0.11.0" @@ -2554,6 +2727,25 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" +[[package]] +name = "dyn-stack" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e53799688f5632f364f8fb387488dd05db9fe45db7011be066fc20e7027f8b" +dependencies = [ + "bytemuck", + "reborrow", +] + +[[package]] +name = "dyn-stack" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490bd48eb68fffcfed519b4edbfd82c69cbe741d175b84f0e0cbe8c57cbe0bdd" +dependencies = [ + "bytemuck", +] + [[package]] name = "either" version = "1.15.0" @@ -2587,6 +2779,18 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" +[[package]] +name = "enum-as-inner" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.101", +] + [[package]] name = "enumflags2" version = "0.7.11" @@ -2624,6 +2828,19 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7f84e12ccf0a7ddc17a6c41c93326024c42920d7ee630d04950e6926645c0fe" +[[package]] +name = "env_logger" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" +dependencies = [ + "humantime", + "is-terminal", + "log", + "regex", + "termcolor", +] + [[package]] name = "env_logger" version = "0.11.8" @@ -2669,6 +2886,15 @@ version = "3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" +[[package]] +name = "esaxx-rs" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" +dependencies = [ + "cc", +] + [[package]] name = "event-listener" version = "5.4.0" @@ -2739,13 +2965,24 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" +[[package]] +name = "fancy-regex" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2" +dependencies = [ + "bit-set 0.5.3", + "regex-automata 0.4.9", + "regex-syntax 0.8.5", +] + [[package]] name = "fancy-regex" version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298" dependencies = [ - "bit-set", + "bit-set 0.8.0", "regex-automata 0.4.9", "regex-syntax 0.8.5", ] @@ -3833,6 +4070,243 @@ dependencies = [ "x11", ] +[[package]] +name = "gemm" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ab24cc62135b40090e31a76a9b2766a501979f3070fa27f689c27ec04377d32" +dependencies = [ + "dyn-stack 0.10.0", + "gemm-c32 0.17.1", + "gemm-c64 0.17.1", + "gemm-common 0.17.1", + "gemm-f16 0.17.1", + "gemm-f32 0.17.1", + "gemm-f64 0.17.1", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 10.7.0", + "seq-macro", +] + +[[package]] +name = "gemm" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab96b703d31950f1aeddded248bc95543c9efc7ac9c4a21fda8703a83ee35451" +dependencies = [ + "dyn-stack 0.13.0", + "gemm-c32 0.18.2", + "gemm-c64 0.18.2", + "gemm-common 0.18.2", + "gemm-f16 0.18.2", + "gemm-f32 0.18.2", + "gemm-f64 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 11.5.0", + "seq-macro", +] + +[[package]] +name = "gemm-c32" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9c030d0b983d1e34a546b86e08f600c11696fde16199f971cd46c12e67512c0" +dependencies = [ + "dyn-stack 0.10.0", + "gemm-common 0.17.1", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 10.7.0", + "seq-macro", +] + +[[package]] +name = "gemm-c32" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6db9fd9f40421d00eea9dd0770045a5603b8d684654816637732463f4073847" +dependencies = [ + "dyn-stack 0.13.0", + "gemm-common 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 11.5.0", + "seq-macro", +] + +[[package]] +name = "gemm-c64" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbb5f2e79fefb9693d18e1066a557b4546cd334b226beadc68b11a8f9431852a" +dependencies = [ + "dyn-stack 0.10.0", + "gemm-common 0.17.1", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 10.7.0", + "seq-macro", +] + +[[package]] +name = "gemm-c64" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfcad8a3d35a43758330b635d02edad980c1e143dc2f21e6fd25f9e4eada8edf" +dependencies = [ + "dyn-stack 0.13.0", + "gemm-common 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 11.5.0", + "seq-macro", +] + +[[package]] +name = "gemm-common" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2e7ea062c987abcd8db95db917b4ffb4ecdfd0668471d8dc54734fdff2354e8" +dependencies = [ + "bytemuck", + "dyn-stack 0.10.0", + "half", + "num-complex", + "num-traits", + "once_cell", + "paste", + "pulp 0.18.22", + "raw-cpuid 10.7.0", + "rayon", + "seq-macro", + "sysctl 0.5.5", +] + +[[package]] +name = "gemm-common" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a352d4a69cbe938b9e2a9cb7a3a63b7e72f9349174a2752a558a8a563510d0f3" +dependencies = [ + "bytemuck", + "dyn-stack 0.13.0", + "half", + "libm", + "num-complex", + "num-traits", + "once_cell", + "paste", + "pulp 0.21.5", + "raw-cpuid 11.5.0", + "rayon", + "seq-macro", + "sysctl 0.6.0", +] + +[[package]] +name = "gemm-f16" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ca4c06b9b11952071d317604acb332e924e817bd891bec8dfb494168c7cedd4" +dependencies = [ + "dyn-stack 0.10.0", + "gemm-common 0.17.1", + "gemm-f32 0.17.1", + "half", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 10.7.0", + "rayon", + "seq-macro", +] + +[[package]] +name = "gemm-f16" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff95ae3259432f3c3410eaa919033cd03791d81cebd18018393dc147952e109" +dependencies = [ + "dyn-stack 0.13.0", + "gemm-common 0.18.2", + "gemm-f32 0.18.2", + "half", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 11.5.0", + "rayon", + "seq-macro", +] + +[[package]] +name = "gemm-f32" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9a69f51aaefbd9cf12d18faf273d3e982d9d711f60775645ed5c8047b4ae113" +dependencies = [ + "dyn-stack 0.10.0", + "gemm-common 0.17.1", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 10.7.0", + "seq-macro", +] + +[[package]] +name = "gemm-f32" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc8d3d4385393304f407392f754cd2dc4b315d05063f62cf09f47b58de276864" +dependencies = [ + "dyn-stack 0.13.0", + "gemm-common 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 11.5.0", + "seq-macro", +] + +[[package]] +name = "gemm-f64" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa397a48544fadf0b81ec8741e5c0fba0043008113f71f2034def1935645d2b0" +dependencies = [ + "dyn-stack 0.10.0", + "gemm-common 0.17.1", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 10.7.0", + "seq-macro", +] + +[[package]] +name = "gemm-f64" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35b2a4f76ce4b8b16eadc11ccf2e083252d8237c1b589558a49b0183545015bd" +dependencies = [ + "dyn-stack 0.13.0", + "gemm-common 0.18.2", + "num-complex", + "num-traits", + "paste", + "raw-cpuid 11.5.0", + "seq-macro", +] + [[package]] name = "generator" version = "0.8.5" @@ -4126,8 +4600,12 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" dependencies = [ + "bytemuck", "cfg-if", "crunchy", + "num-traits", + "rand 0.9.1", + "rand_distr", ] [[package]] @@ -4152,6 +4630,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ "ahash", + "allocator-api2", ] [[package]] @@ -4197,6 +4676,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + [[package]] name = "hermit-abi" version = "0.4.0" @@ -4215,6 +4700,29 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hf-hub" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc03dcb0b0a83ae3f3363ec811014ae669f083e4e499c66602f447c4828737a1" +dependencies = [ + "dirs 5.0.1", + "futures", + "http 1.3.1", + "indicatif", + "libc", + "log", + "num_cpus", + "rand 0.8.5", + "reqwest", + "serde", + "serde_json", + "thiserror 2.0.12", + "tokio", + "ureq", + "windows-sys 0.59.0", +] + [[package]] name = "hmac" version = "0.12.1" @@ -4224,6 +4732,31 @@ dependencies = [ "digest", ] +[[package]] +name = "hnsw_rs" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e59cf4d04a56c67454ad104938ae4c785bc5db7ac17d27b93e3cb5a9abe6a5" +dependencies = [ + "anndists", + "anyhow", + "bincode", + "cfg-if", + "cpu-time", + "env_logger 0.10.2", + "hashbrown 0.14.5", + "indexmap 2.9.0", + "lazy_static", + "log", + "mmap-rs", + "num-traits", + "num_cpus", + "parking_lot", + "rand 0.8.5", + "rayon", + "serde", +] + [[package]] name = "home" version = "0.5.11" @@ -4315,6 +4848,12 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "humantime" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" + [[package]] name = "hyper" version = "0.14.32" @@ -4447,7 +4986,7 @@ checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" dependencies = [ "displaydoc", "potential_utf", - "yoke", + "yoke 0.8.0", "zerofrom", "zerovec", ] @@ -4519,7 +5058,7 @@ dependencies = [ "stable_deref_trait", "tinystr", "writeable", - "yoke", + "yoke 0.8.0", "zerofrom", "zerotrie", "zerovec", @@ -4744,6 +5283,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.12.1" @@ -5010,6 +5558,12 @@ dependencies = [ "windows-targets 0.53.0", ] +[[package]] +name = "libm" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" + [[package]] name = "libmimalloc-sys" version = "0.1.42" @@ -5188,6 +5742,22 @@ dependencies = [ "tracing", ] +[[package]] +name = "macro_rules_attribute" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a82271f7bc033d84bbca59a3ce3e4159938cb08a9c3aebbe54d215131518a13" +dependencies = [ + "macro_rules_attribute-proc_macro", + "paste", +] + +[[package]] +name = "macro_rules_attribute-proc_macro" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8dd856d451cc0da70e2ef2ce95a18e39a93b7558bedf10201ad28503f918568" + [[package]] name = "malloc_buf" version = "0.0.6" @@ -5248,6 +5818,16 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "memmap2" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" +dependencies = [ + "libc", + "stable_deref_trait", +] + [[package]] name = "memmem" version = "0.1.1" @@ -5263,6 +5843,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "memoffset" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" +dependencies = [ + "autocfg", +] + [[package]] name = "memoffset" version = "0.9.1" @@ -5343,6 +5932,23 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "mmap-rs" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86968d85441db75203c34deefd0c88032f275aaa85cee19a1dcfff6ae9df56da" +dependencies = [ + "bitflags 1.3.2", + "combine", + "libc", + "mach2", + "nix 0.26.4", + "sysctl 0.5.5", + "thiserror 1.0.69", + "widestring", + "windows 0.48.0", +] + [[package]] name = "mockito" version = "1.7.0" @@ -5389,6 +5995,27 @@ dependencies = [ "uuid", ] +[[package]] +name = "monostate" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aafe1be9d0c75642e3e50fedc7ecadf1ef1cbce6eb66462153fc44245343fbee" +dependencies = [ + "monostate-impl", + "serde", +] + +[[package]] +name = "monostate-impl" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c402a4092d5e204f32c9e155431046831fa712637043c58cb73bc6bc6c9663b5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + [[package]] name = "muda" version = "0.15.3" @@ -5493,6 +6120,19 @@ dependencies = [ "pin-utils", ] +[[package]] +name = "nix" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "libc", + "memoffset 0.7.1", + "pin-utils", +] + [[package]] name = "nix" version = "0.29.0" @@ -5713,7 +6353,7 @@ dependencies = [ "chrono-humanize", "dirs 5.0.1", "dirs-sys 0.4.1", - "fancy-regex", + "fancy-regex 0.14.0", "heck 0.5.0", "indexmap 2.9.0", "log", @@ -5764,7 +6404,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "327999b774d78b301a6b68c33d312a1a8047c59fb8971b6552ebf823251f1481" dependencies = [ "crossterm_winapi", - "fancy-regex", + "fancy-regex 0.14.0", "log", "lscolors", "nix 0.29.0", @@ -5776,6 +6416,20 @@ dependencies = [ "unicase", ] +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + [[package]] name = "num-bigint" version = "0.4.6" @@ -5786,6 +6440,16 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "bytemuck", + "num-traits", +] + [[package]] name = "num-conv" version = "0.1.0" @@ -5822,6 +6486,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + [[package]] name = "num-rational" version = "0.4.2" @@ -5840,6 +6515,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi 0.3.9", + "libc", ] [[package]] @@ -6973,19 +7659,45 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f42ea446cab60335f76979ec15e12619a2165b5ae2c12166bef27d283a9fadf" dependencies = [ - "idna", - "psl-types", + "idna", + "psl-types", +] + +[[package]] +name = "pulldown-cmark" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e8bbe1a966bd2f362681a44f6edce3c2310ac21e4d5067a6e7ec396297a6ea0" +dependencies = [ + "bitflags 2.9.1", + "memchr", + "unicase", +] + +[[package]] +name = "pulp" +version = "0.18.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0a01a0dc67cf4558d279f0c25b0962bd08fc6dec0137699eae304103e882fe6" +dependencies = [ + "bytemuck", + "libm", + "num-complex", + "reborrow", ] [[package]] -name = "pulldown-cmark" -version = "0.13.0" +name = "pulp" +version = "0.21.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e8bbe1a966bd2f362681a44f6edce3c2310ac21e4d5067a6e7ec396297a6ea0" +checksum = "96b86df24f0a7ddd5e4b95c94fc9ed8a98f1ca94d3b01bdce2824097e7835907" dependencies = [ - "bitflags 2.9.1", - "memchr", - "unicase", + "bytemuck", + "cfg-if", + "libm", + "num-complex", + "reborrow", + "version_check", ] [[package]] @@ -7325,6 +8037,16 @@ dependencies = [ "getrandom 0.3.3", ] +[[package]] +name = "rand_distr" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" +dependencies = [ + "num-traits", + "rand 0.9.1", +] + [[package]] name = "rand_hc" version = "0.2.0" @@ -7393,6 +8115,24 @@ dependencies = [ "rgb", ] +[[package]] +name = "raw-cpuid" +version = "10.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "raw-cpuid" +version = "11.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6df7ab838ed27997ba19a4664507e6f82b41fe6e20be42929332156e5e85146" +dependencies = [ + "bitflags 2.9.1", +] + [[package]] name = "raw-window-handle" version = "0.6.2" @@ -7409,6 +8149,17 @@ dependencies = [ "rayon-core", ] +[[package]] +name = "rayon-cond" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9" +dependencies = [ + "either", + "itertools 0.11.0", + "rayon", +] + [[package]] name = "rayon-core" version = "1.12.1" @@ -7419,6 +8170,12 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "reborrow" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03251193000f4bd3b042892be858ee50e8b3719f2b08e5833ac4353724632430" + [[package]] name = "redox_syscall" version = "0.5.12" @@ -7557,6 +8314,7 @@ dependencies = [ "serde_json", "serde_urlencoded", "sync_wrapper", + "system-configuration", "tokio", "tokio-rustls 0.26.2", "tokio-socks", @@ -7566,6 +8324,7 @@ dependencies = [ "url", "wasm-bindgen", "wasm-bindgen-futures", + "wasm-streams", "web-sys", "webpki-roots", "windows-registry", @@ -7683,6 +8442,16 @@ dependencies = [ "ordered-multimap", ] +[[package]] +name = "rust-stemmers" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" +dependencies = [ + "serde", + "serde_derive", +] + [[package]] name = "rustc-demangle" version = "0.1.24" @@ -7883,6 +8652,16 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "safetensors" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44560c11236a6130a46ce36c836a62936dc81ebf8c36a37947423571be0e55b6" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "same-file" version = "1.0.6" @@ -7988,6 +8767,33 @@ dependencies = [ "thin-slice", ] +[[package]] +name = "semantic_search_client" +version = "1.10.1" +dependencies = [ + "anyhow", + "bm25", + "candle-core", + "candle-nn", + "candle-transformers", + "chrono", + "dirs 5.0.1", + "hf-hub", + "hnsw_rs", + "indicatif", + "once_cell", + "rayon", + "serde", + "serde_json", + "tempfile", + "thiserror 2.0.12", + "tokenizers", + "tokio", + "tracing", + "uuid", + "walkdir", +] + [[package]] name = "semver" version = "1.0.26" @@ -8003,6 +8809,12 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f97841a747eef040fcd2e7b3b9a220a7205926e60488e673d9e4926d27772ce5" +[[package]] +name = "seq-macro" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" + [[package]] name = "serde" version = "1.0.219" @@ -8320,7 +9132,7 @@ dependencies = [ "crossbeam", "defer-drop", "derive_builder", - "env_logger", + "env_logger 0.11.8", "fuzzy-matcher", "indexmap 2.9.0", "log", @@ -8363,6 +9175,17 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "socks" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b" +dependencies = [ + "byteorder", + "libc", + "winapi", +] + [[package]] name = "soup3" version = "0.5.0" @@ -8409,6 +9232,18 @@ dependencies = [ "strum 0.24.1", ] +[[package]] +name = "spm_precompiled" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326" +dependencies = [ + "base64 0.13.1", + "nom", + "serde", + "unicode-segmentation", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -8421,6 +9256,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "stop-words" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c6a86be9f7fa4559b7339669e72026eb437f5e9c5a85c207fe1033079033a17" +dependencies = [ + "serde_json", +] + [[package]] name = "string_cache" version = "0.8.9" @@ -8624,6 +9468,34 @@ dependencies = [ "libc", ] +[[package]] +name = "sysctl" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea" +dependencies = [ + "bitflags 2.9.1", + "byteorder", + "enum-as-inner", + "libc", + "thiserror 1.0.69", + "walkdir", +] + +[[package]] +name = "sysctl" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc" +dependencies = [ + "bitflags 2.9.1", + "byteorder", + "enum-as-inner", + "libc", + "thiserror 1.0.69", + "walkdir", +] + [[package]] name = "sysinfo" version = "0.33.1" @@ -8780,6 +9652,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + [[package]] name = "terminal_size" version = "0.4.2" @@ -8989,6 +9870,38 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +[[package]] +name = "tokenizers" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3169b3195f925496c895caee7978a335d49218488ef22375267fba5a46a40bd7" +dependencies = [ + "aho-corasick", + "derive_builder", + "esaxx-rs", + "getrandom 0.2.16", + "indicatif", + "itertools 0.13.0", + "lazy_static", + "log", + "macro_rules_attribute", + "monostate", + "onig", + "paste", + "rand 0.8.5", + "rayon", + "rayon-cond", + "regex", + "regex-syntax 0.8.5", + "serde", + "serde_json", + "spm_precompiled", + "thiserror 2.0.12", + "unicode-normalization-alignments", + "unicode-segmentation", + "unicode_categories", +] + [[package]] name = "tokio" version = "1.45.0" @@ -9418,6 +10331,27 @@ dependencies = [ "winapi", ] +[[package]] +name = "ug" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90b70b37e9074642bc5f60bb23247fd072a84314ca9e71cdf8527593406a0dd3" +dependencies = [ + "gemm 0.18.2", + "half", + "libloading 0.8.7", + "memmap2", + "num", + "num-traits", + "num_cpus", + "rayon", + "safetensors", + "serde", + "thiserror 1.0.69", + "tracing", + "yoke 0.7.5", +] + [[package]] name = "unicase" version = "2.8.1" @@ -9436,6 +10370,15 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" +[[package]] +name = "unicode-normalization-alignments" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de" +dependencies = [ + "smallvec", +] + [[package]] name = "unicode-segmentation" version = "1.12.0" @@ -9454,6 +10397,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + [[package]] name = "unsafe-libyaml" version = "0.2.11" @@ -9466,6 +10415,25 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "ureq" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" +dependencies = [ + "base64 0.22.1", + "flate2", + "log", + "once_cell", + "rustls 0.23.27", + "rustls-pki-types", + "serde", + "serde_json", + "socks", + "url", + "webpki-roots", +] + [[package]] name = "url" version = "2.5.4" @@ -9709,6 +10677,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-streams" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "wayland-backend" version = "0.3.10" @@ -9930,6 +10911,12 @@ dependencies = [ "winsafe", ] +[[package]] +name = "whichlang" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b9aa3ad29c3d08283ac6b769e3ec15ad1ddb88af7d2e9bc402c574973b937e7" + [[package]] name = "whoami" version = "1.6.0" @@ -9941,6 +10928,12 @@ dependencies = [ "web-sys", ] +[[package]] +name = "widestring" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd7cf3379ca1aac9eea11fba24fd7e315d621f8dfe35c8d7d2be8b793726e07d" + [[package]] name = "winapi" version = "0.3.9" @@ -9972,6 +10965,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets 0.48.5", +] + [[package]] name = "windows" version = "0.56.0" @@ -10765,6 +11767,18 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive 0.7.5", + "zerofrom", +] + [[package]] name = "yoke" version = "0.8.0" @@ -10773,10 +11787,22 @@ checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" dependencies = [ "serde", "stable_deref_trait", - "yoke-derive", + "yoke-derive 0.8.0", "zerofrom", ] +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", + "synstructure", +] + [[package]] name = "yoke-derive" version = "0.8.0" @@ -10994,7 +12020,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" dependencies = [ "displaydoc", - "yoke", + "yoke 0.8.0", "zerofrom", ] @@ -11004,7 +12030,7 @@ version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428" dependencies = [ - "yoke", + "yoke 0.8.0", "zerofrom", "zerovec-derive", ] @@ -11020,6 +12046,21 @@ dependencies = [ "syn 2.0.101", ] +[[package]] +name = "zip" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cc23c04387f4da0374be4533ad1208cbb091d5c11d070dfef13676ad6497164" +dependencies = [ + "arbitrary", + "crc32fast", + "crossbeam-utils", + "displaydoc", + "indexmap 2.9.0", + "num_enum", + "thiserror 1.0.69", +] + [[package]] name = "zstd" version = "0.13.3" diff --git a/Cargo.toml b/Cargo.toml index d1d00816f3..ca543ccfdf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,6 @@ members = [ "tests/fig-api/fig-api-mock", "tests/figterm2", ] -exclude = ["crates/semantic_search_client"] [workspace.package] authors = [ diff --git a/crates/semantic_search_client/src/index/vector_index.rs b/crates/semantic_search_client/src/index/vector_index.rs index 770641dd7b..3a849ec6d4 100644 --- a/crates/semantic_search_client/src/index/vector_index.rs +++ b/crates/semantic_search_client/src/index/vector_index.rs @@ -9,6 +9,8 @@ use tracing::{ pub struct VectorIndex { /// The HNSW index index: Hnsw<'static, f32, DistCosine>, + /// Counter to track the number of elements + count: std::sync::atomic::AtomicUsize, } impl VectorIndex { @@ -33,7 +35,10 @@ impl VectorIndex { ); debug!("Vector index created successfully"); - Self { index } + Self { + index, + count: std::sync::atomic::AtomicUsize::new(0), + } } /// Insert a vector into the index @@ -44,6 +49,7 @@ impl VectorIndex { /// * `id` - The ID associated with the vector pub fn insert(&self, vector: &[f32], id: usize) { self.index.insert((vector, id)); + self.count.fetch_add(1, std::sync::atomic::Ordering::SeqCst); } /// Search for nearest neighbors @@ -72,9 +78,7 @@ impl VectorIndex { /// /// The number of elements in the index pub fn len(&self) -> usize { - // Since HNSW doesn't provide a direct way to get the count, - // we'll use a simple counter that's updated when items are inserted - self.index.get_ef_construction() + self.count.load(std::sync::atomic::Ordering::SeqCst) } /// Check if the index is empty @@ -83,7 +87,6 @@ impl VectorIndex { /// /// `true` if the index is empty, `false` otherwise pub fn is_empty(&self) -> bool { - // For simplicity, we'll assume it's empty if ef_construction is at default value - self.index.get_ef_construction() == 100 + self.len() == 0 } }