Merge pull request #631 from unum-cloud/main-dev

ashvardanian · web-flow · commit 818b626be86d · 2025-07-04T16:27:17.000+01:00
diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml
@@ -10,7 +10,7 @@ env:
   GH_TOKEN: ${{ secrets.SEMANTIC_RELEASE_TOKEN }}
   PYTHONUTF8: 1
   PYTHON_VERSION: 3.11
-  DOTNET_VERSION: 7.0.x
+  DOTNET_VERSION: 8.0.x
   ANDROID_NDK_VERSION: 26.3.11579264
   ANDROID_SDK_VERSION: 21
 
@@ -390,7 +390,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: ['37', '38', '39', '310', '311', '312', '313', '313t']
+        python-version: ['38', '39', '310', '311', '312', '313', '313t']
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -420,7 +420,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: ['37', '38', '39', '310', '311', '312', '313', '313t']
+        python-version: ['38', '39', '310', '311', '312', '313', '313t']
     steps:
       - name: Check out refreshed version
         uses: actions/checkout@v4
@@ -792,7 +792,7 @@ jobs:
       - name: Setup .NET
         uses: actions/setup-dotnet@v3
         with:
-          dotnet-version: 7.0.x
+          dotnet-version: 8.0.x
 
       - name: Pack project
         run: |
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -160,7 +160,15 @@ cmake --build build_artifacts --config Release
 
 Python bindings are built using PyBind11 and are available on [PyPi](https://pypi.org/project/usearch/).
 The compilation settings are controlled by the `setup.py` and are independent from CMake used for C/C++ builds.
-To install USearch locally:
+To install USearch locally using `uv`:
+
+```sh
+uv venv --python 3.11           # or your preferred Python version
+source .venv/bin/activate       # to activate the virtual environment
+uv pip install -e .             # to build locally from source
+```
+
+Or using `pip` directly:
 
 ```sh
 pip install -e .
@@ -183,7 +191,7 @@ Linting:
 
 ```sh
 pip install ruff
-ruff --format=github --select=E9,F63,F7,F82 --target-version=py37 python
+ruff --format=github --select=E9,F63,F7,F82 --target-version=py310 python
 ```
 
 Before merging your changes you may want to test your changes against the entire matrix of Python versions USearch supports.
diff --git a/csharp/src/Cloud.Unum.USearch.Tests/Cloud.Unum.USearch.Tests.csproj b/csharp/src/Cloud.Unum.USearch.Tests/Cloud.Unum.USearch.Tests.csproj
@@ -1,7 +1,7 @@
 <Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
-    <TargetFramework>net7.0</TargetFramework>
+    <TargetFramework>net8.0</TargetFramework>
     <ImplicitUsings>enable</ImplicitUsings>
     <IsPackable>false</IsPackable>
     <Nullable>enable</Nullable>
diff --git a/csharp/src/Cloud.Unum.USearch.Tests/USearchIndexTests.cs b/csharp/src/Cloud.Unum.USearch.Tests/USearchIndexTests.cs
@@ -227,6 +227,38 @@ public void Add_DoubleVector_UpdatesIndexOptions()
         }
     }
 
+
+    [Fact]
+    public void Add_BitsVector_UpdatesIndexOptions()
+    {
+        // Arrange
+        const uint Dimensions = 10;
+        const uint AddKey = 1;
+        const uint NonExistentKey = 2;
+        const uint ExpectedSize = 1;
+        const uint ExpectedCapacity = 1;
+
+        var indexOptions = new IndexOptions(
+            metricKind: MetricKind.Hamming,
+            quantization: ScalarKind.Bits1,
+            dimensions: Dimensions
+        );
+
+        var inputVector = GenerateBitsVector((int)Dimensions);
+
+        using (var index = new USearchIndex(indexOptions))
+        {
+            // Act
+            index.Add(AddKey, inputVector);
+
+            // Assert
+            Assert.True(index.Contains(AddKey));
+            Assert.False(index.Contains(NonExistentKey));
+            Assert.Equal(ExpectedSize, index.Size());
+            Assert.True(ExpectedCapacity <= index.Capacity());
+        }
+    }
+
     [Fact]
     public void Add_ManyFloatVectorsUnderSameKeySeparatelyInMultiKeyIndex_UpdatesIndexOptions()
     {
@@ -488,6 +520,39 @@ public void Get_ManyByteVectorsUnderSameKeyInMultiKeyIndex_ReturnsCorrectValue()
         }
     }
 
+    [Fact]
+    public void Get_ManyBitsVectorsUnderSameKeyInMultiKeyIndex_ReturnsCorrectValue()
+    {
+        // Arrange
+        const uint Dimensions = 10;
+        const ulong AddKey = 1;
+        const int RetrieveCount = 5;
+        const int BatchSize = 10;
+
+        var indexOptions = new IndexOptions(
+            metricKind: MetricKind.Hamming,
+            quantization: ScalarKind.Bits1,
+            dimensions: Dimensions,
+            multi: true
+        );
+
+        (var inputKeys, var inputVectors) = (
+            Enumerable.Repeat(AddKey, BatchSize).ToArray(),
+            GenerateManyBitsVectors(BatchSize, (int)Dimensions)
+        );
+
+        using (var index = new USearchIndex(indexOptions))
+        {
+            index.Add(inputKeys, inputVectors);
+
+            // Act
+            int foundVectorsCount = index.Get(AddKey, RetrieveCount, out float[][] retrievedVectors);
+
+            // Assert
+            Assert.Equal(RetrieveCount, foundVectorsCount);
+        }
+    }
+
     [Fact]
     public void Get_ManyDoubleVectorsUnderSameKeyInMultiKeyIndex_ReturnsCorrectCountValue()
     {
@@ -1213,6 +1278,11 @@ private static sbyte[] GenerateByteVector(int vectorLength)
         return Enumerable.Range(0, vectorLength).Select(i => (sbyte)i).ToArray();
     }
 
+    private static byte[] GenerateBitsVector(int vectorLength)
+    {
+        return Enumerable.Range(0, vectorLength).Select(i => (byte)i).ToArray();
+    }
+
     private static sbyte[][] GenerateManyByteVectors(int n, int vectorLength)
     {
         var result = new sbyte[n][];
@@ -1223,6 +1293,16 @@ private static sbyte[][] GenerateManyByteVectors(int n, int vectorLength)
         return result;
     }
 
+    private static byte[][] GenerateManyBitsVectors(int n, int vectorLength)
+    {
+        var result = new byte[n][];
+        for (int i = 0; i < n; i++)
+        {
+            result[i] = Enumerable.Range(0, vectorLength).Select(i => (byte)i).ToArray();
+        }
+        return result;
+    }
+
     private static double[] GenerateDoubleVector(int n)
     {
         return Enumerable.Range(0, n).Select(i => (double)i).ToArray();
@@ -1241,7 +1321,7 @@ private static float[][] GenerateManyDoubleVectors(int n, int vectorLength)
     #endregion
 }
 
-internal class RealEqualityComparer<T> : IEqualityComparer<T> where T : unmanaged
+internal sealed class RealEqualityComparer<T> : IEqualityComparer<T> where T : unmanaged
 {
     private readonly T _threshold;
 
diff --git a/csharp/src/Cloud.Unum.USearch/USearchIndex.cs b/csharp/src/Cloud.Unum.USearch/USearchIndex.cs
@@ -251,6 +251,27 @@ public void Add(ulong key, double[] vector)
         }
     }
 
+    /// <summary>
+    /// Adds a vector with a specific key to the index.
+    /// </summary>
+    /// <param name="key">The key associated with the vector.</param>
+    /// <param name="vector">The vector data to be added.</param>
+    public void Add(ulong key, byte[] vector)
+    {
+        this.CheckIncreaseCapacity(1);
+        GCHandle handle = GCHandle.Alloc(vector, GCHandleType.Pinned);
+        try
+        {
+            IntPtr vectorPtr = handle.AddrOfPinnedObject();
+            usearch_add(this._index, key, vectorPtr, ScalarKind.Bits1, out IntPtr error);
+            HandleError(error);
+        }
+        finally
+        {
+            handle.Free();
+        }
+    }
+
     /// <summary>
     /// Adds multiple vectors with specific keys to the index.
     /// </summary>
@@ -299,6 +320,30 @@ public void Add(ulong[] keys, sbyte[][] vectors)
         }
     }
 
+    /// <summary>
+    /// Adds multiple vectors with specific keys to the index.
+    /// </summary>
+    /// <param name="keys">The keys associated with the vectors.</param>
+    /// <param name="vectors">The vector data to be added.</param>
+    public void Add(ulong[] keys, byte[][] vectors)
+    {
+        this.CheckIncreaseCapacity((ulong)vectors.Length);
+        for (int i = 0; i < vectors.Length; i++)
+        {
+            GCHandle handle = GCHandle.Alloc(vectors[i], GCHandleType.Pinned);
+            try
+            {
+                IntPtr vectorPtr = handle.AddrOfPinnedObject();
+                usearch_add(this._index, keys[i], vectorPtr, ScalarKind.Bits1, out IntPtr error);
+                HandleError(error);
+            }
+            finally
+            {
+                handle.Free();
+            }
+        }
+    }
+
     /// <summary>
     /// Adds multiple vectors with specific keys to the index.
     /// </summary>
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
@@ -757,12 +757,12 @@ class index_dense_gt {
     };
 
     // clang-format off
-    add_result_t add(vector_key_t key, b1x8_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from.b1x8); }
-    add_result_t add(vector_key_t key, i8_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from.i8); }
-    add_result_t add(vector_key_t key, f16_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from.f16); }
-    add_result_t add(vector_key_t key, bf16_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from.bf16); }
-    add_result_t add(vector_key_t key, f32_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from.f32); }
-    add_result_t add(vector_key_t key, f64_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from.f64); }
+    add_result_t add(vector_key_t key, b1x8_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.b1x8); }
+    add_result_t add(vector_key_t key, i8_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.i8); }
+    add_result_t add(vector_key_t key, f16_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.f16); }
+    add_result_t add(vector_key_t key, bf16_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.bf16); }
+    add_result_t add(vector_key_t key, f32_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.f32); }
+    add_result_t add(vector_key_t key, f64_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.f64); }
 
     search_result_t search(b1x8_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.b1x8); }
     search_result_t search(i8_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.i8); }
@@ -2002,14 +2002,13 @@ class index_dense_gt {
     template <typename scalar_at>
     add_result_t add_(                             //
         vector_key_t key, scalar_at const* vector, //
-        std::size_t thread, bool force_vector_copy, cast_punned_t const& cast) {
+        std::size_t thread, bool copy_vector, cast_punned_t const& cast) {
 
         if (!multi() && config().enable_key_lookups && contains(key))
             return add_result_t{}.failed("Duplicate keys not allowed in high-level wrappers");
 
         // Cast the vector, if needed for compatibility with `metric_`
         thread_lock_t lock = thread_lock_(thread);
-        bool copy_vector = !config_.exclude_vectors || force_vector_copy;
         byte_t const* vector_data = reinterpret_cast<byte_t const*>(vector);
         {
             byte_t* casted_data = cast_buffer_.data() + metric_.bytes_per_vector() * lock.thread_id;
diff --git a/python/scripts/test_index.py b/python/scripts/test_index.py
@@ -280,7 +280,7 @@ def test_index_contains_remove_rename(batch_size):
     removed_keys = keys[: batch_size // 2]
     remaining_keys = keys[batch_size // 2 :]
     index.remove(removed_keys)
-    del index[removed_keys] # ! This will trigger the `__delitem__` dunder method
+    del index[removed_keys]  # ! This will trigger the `__delitem__` dunder method
     assert len(index) == (len(keys) - len(removed_keys))
     assert np.sum(index.contains(keys)) == len(remaining_keys)
     assert np.sum(index.count(keys)) == len(remaining_keys)
@@ -345,3 +345,33 @@ def test_index_clustering(ndim, metric, quantization, dtype, batch_size):
     clusters: Clustering = index.cluster(min_count=3, max_count=10, threads=threads)
     unique_clusters = set(clusters.matches.keys.flatten().tolist())
     assert len(unique_clusters) >= 3 and len(unique_clusters) <= 10
+
+
+def test_index_copied_memory_usage():
+    """Test that copy=False results in lower memory usage than copy=True."""
+    reset_randomness()
+
+    ndim = 128
+    batch_size = 1000
+    dtype = np.float32  # ! Ensure same type for both vectors and index
+    vectors = random_vectors(count=batch_size, ndim=ndim, dtype=dtype)
+    keys = np.arange(batch_size)
+
+    # Create index with `copy=True`
+    index_copied = Index(ndim=ndim, metric=MetricKind.Cos, dtype=dtype, multi=False)
+    index_copied.add(keys, vectors, copy=True, threads=threads)
+
+    # Create index with `copy=False`
+    index_viewing = Index(ndim=ndim, metric=MetricKind.Cos, dtype=dtype, multi=False)
+    index_viewing.add(keys, vectors, copy=False, threads=threads)
+
+    # Both should have same number of entries
+    assert len(index_copied) == len(index_viewing) == batch_size
+
+    # Memory usage should be larger when `copy=True`
+    memory_with_copy = index_copied.memory_usage
+    memory_without_copy = index_viewing.memory_usage
+
+    assert (
+        memory_with_copy > memory_without_copy
+    ), f"Expected default index addition to use more memory than copy=False ({memory_with_copy} vs {memory_without_copy})"