Skip to content

Commit 818b626

Browse files
authored
Merge pull request #631 from unum-cloud/main-dev
2 parents 68e403a + 6b8c0e2 commit 818b626

File tree

8 files changed

+179
-17
lines changed

8 files changed

+179
-17
lines changed

.github/workflows/prerelease.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ env:
1010
GH_TOKEN: ${{ secrets.SEMANTIC_RELEASE_TOKEN }}
1111
PYTHONUTF8: 1
1212
PYTHON_VERSION: 3.11
13-
DOTNET_VERSION: 7.0.x
13+
DOTNET_VERSION: 8.0.x
1414
ANDROID_NDK_VERSION: 26.3.11579264
1515
ANDROID_SDK_VERSION: 21
1616

@@ -390,7 +390,7 @@ jobs:
390390
strategy:
391391
matrix:
392392
os: [ubuntu-latest, macos-latest, windows-latest]
393-
python-version: ['37', '38', '39', '310', '311', '312', '313', '313t']
393+
python-version: ['38', '39', '310', '311', '312', '313', '313t']
394394
steps:
395395
- uses: actions/checkout@v4
396396
- name: Set up Python

.github/workflows/release.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ jobs:
420420
strategy:
421421
matrix:
422422
os: [ubuntu-latest, macos-latest, windows-latest]
423-
python-version: ['37', '38', '39', '310', '311', '312', '313', '313t']
423+
python-version: ['38', '39', '310', '311', '312', '313', '313t']
424424
steps:
425425
- name: Check out refreshed version
426426
uses: actions/checkout@v4
@@ -792,7 +792,7 @@ jobs:
792792
- name: Setup .NET
793793
uses: actions/setup-dotnet@v3
794794
with:
795-
dotnet-version: 7.0.x
795+
dotnet-version: 8.0.x
796796

797797
- name: Pack project
798798
run: |

CONTRIBUTING.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,15 @@ cmake --build build_artifacts --config Release
160160

161161
Python bindings are built using PyBind11 and are available on [PyPi](https://pypi.org/project/usearch/).
162162
The compilation settings are controlled by the `setup.py` and are independent from CMake used for C/C++ builds.
163-
To install USearch locally:
163+
To install USearch locally using `uv`:
164+
165+
```sh
166+
uv venv --python 3.11 # or your preferred Python version
167+
source .venv/bin/activate # to activate the virtual environment
168+
uv pip install -e . # to build locally from source
169+
```
170+
171+
Or using `pip` directly:
164172

165173
```sh
166174
pip install -e .
@@ -183,7 +191,7 @@ Linting:
183191

184192
```sh
185193
pip install ruff
186-
ruff --format=github --select=E9,F63,F7,F82 --target-version=py37 python
194+
ruff --format=github --select=E9,F63,F7,F82 --target-version=py310 python
187195
```
188196

189197
Before merging your changes you may want to test your changes against the entire matrix of Python versions USearch supports.

csharp/src/Cloud.Unum.USearch.Tests/Cloud.Unum.USearch.Tests.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<Project Sdk="Microsoft.NET.Sdk">
22

33
<PropertyGroup>
4-
<TargetFramework>net7.0</TargetFramework>
4+
<TargetFramework>net8.0</TargetFramework>
55
<ImplicitUsings>enable</ImplicitUsings>
66
<IsPackable>false</IsPackable>
77
<Nullable>enable</Nullable>

csharp/src/Cloud.Unum.USearch.Tests/USearchIndexTests.cs

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,38 @@ public void Add_DoubleVector_UpdatesIndexOptions()
227227
}
228228
}
229229

230+
231+
[Fact]
232+
public void Add_BitsVector_UpdatesIndexOptions()
233+
{
234+
// Arrange
235+
const uint Dimensions = 10;
236+
const uint AddKey = 1;
237+
const uint NonExistentKey = 2;
238+
const uint ExpectedSize = 1;
239+
const uint ExpectedCapacity = 1;
240+
241+
var indexOptions = new IndexOptions(
242+
metricKind: MetricKind.Hamming,
243+
quantization: ScalarKind.Bits1,
244+
dimensions: Dimensions
245+
);
246+
247+
var inputVector = GenerateBitsVector((int)Dimensions);
248+
249+
using (var index = new USearchIndex(indexOptions))
250+
{
251+
// Act
252+
index.Add(AddKey, inputVector);
253+
254+
// Assert
255+
Assert.True(index.Contains(AddKey));
256+
Assert.False(index.Contains(NonExistentKey));
257+
Assert.Equal(ExpectedSize, index.Size());
258+
Assert.True(ExpectedCapacity <= index.Capacity());
259+
}
260+
}
261+
230262
[Fact]
231263
public void Add_ManyFloatVectorsUnderSameKeySeparatelyInMultiKeyIndex_UpdatesIndexOptions()
232264
{
@@ -488,6 +520,39 @@ public void Get_ManyByteVectorsUnderSameKeyInMultiKeyIndex_ReturnsCorrectValue()
488520
}
489521
}
490522

523+
[Fact]
524+
public void Get_ManyBitsVectorsUnderSameKeyInMultiKeyIndex_ReturnsCorrectValue()
525+
{
526+
// Arrange
527+
const uint Dimensions = 10;
528+
const ulong AddKey = 1;
529+
const int RetrieveCount = 5;
530+
const int BatchSize = 10;
531+
532+
var indexOptions = new IndexOptions(
533+
metricKind: MetricKind.Hamming,
534+
quantization: ScalarKind.Bits1,
535+
dimensions: Dimensions,
536+
multi: true
537+
);
538+
539+
(var inputKeys, var inputVectors) = (
540+
Enumerable.Repeat(AddKey, BatchSize).ToArray(),
541+
GenerateManyBitsVectors(BatchSize, (int)Dimensions)
542+
);
543+
544+
using (var index = new USearchIndex(indexOptions))
545+
{
546+
index.Add(inputKeys, inputVectors);
547+
548+
// Act
549+
int foundVectorsCount = index.Get(AddKey, RetrieveCount, out float[][] retrievedVectors);
550+
551+
// Assert
552+
Assert.Equal(RetrieveCount, foundVectorsCount);
553+
}
554+
}
555+
491556
[Fact]
492557
public void Get_ManyDoubleVectorsUnderSameKeyInMultiKeyIndex_ReturnsCorrectCountValue()
493558
{
@@ -1213,6 +1278,11 @@ private static sbyte[] GenerateByteVector(int vectorLength)
12131278
return Enumerable.Range(0, vectorLength).Select(i => (sbyte)i).ToArray();
12141279
}
12151280

1281+
private static byte[] GenerateBitsVector(int vectorLength)
1282+
{
1283+
return Enumerable.Range(0, vectorLength).Select(i => (byte)i).ToArray();
1284+
}
1285+
12161286
private static sbyte[][] GenerateManyByteVectors(int n, int vectorLength)
12171287
{
12181288
var result = new sbyte[n][];
@@ -1223,6 +1293,16 @@ private static sbyte[][] GenerateManyByteVectors(int n, int vectorLength)
12231293
return result;
12241294
}
12251295

1296+
private static byte[][] GenerateManyBitsVectors(int n, int vectorLength)
1297+
{
1298+
var result = new byte[n][];
1299+
for (int i = 0; i < n; i++)
1300+
{
1301+
result[i] = Enumerable.Range(0, vectorLength).Select(i => (byte)i).ToArray();
1302+
}
1303+
return result;
1304+
}
1305+
12261306
private static double[] GenerateDoubleVector(int n)
12271307
{
12281308
return Enumerable.Range(0, n).Select(i => (double)i).ToArray();
@@ -1241,7 +1321,7 @@ private static float[][] GenerateManyDoubleVectors(int n, int vectorLength)
12411321
#endregion
12421322
}
12431323

1244-
internal class RealEqualityComparer<T> : IEqualityComparer<T> where T : unmanaged
1324+
internal sealed class RealEqualityComparer<T> : IEqualityComparer<T> where T : unmanaged
12451325
{
12461326
private readonly T _threshold;
12471327

csharp/src/Cloud.Unum.USearch/USearchIndex.cs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,27 @@ public void Add(ulong key, double[] vector)
251251
}
252252
}
253253

254+
/// <summary>
255+
/// Adds a vector with a specific key to the index.
256+
/// </summary>
257+
/// <param name="key">The key associated with the vector.</param>
258+
/// <param name="vector">The vector data to be added.</param>
259+
public void Add(ulong key, byte[] vector)
260+
{
261+
this.CheckIncreaseCapacity(1);
262+
GCHandle handle = GCHandle.Alloc(vector, GCHandleType.Pinned);
263+
try
264+
{
265+
IntPtr vectorPtr = handle.AddrOfPinnedObject();
266+
usearch_add(this._index, key, vectorPtr, ScalarKind.Bits1, out IntPtr error);
267+
HandleError(error);
268+
}
269+
finally
270+
{
271+
handle.Free();
272+
}
273+
}
274+
254275
/// <summary>
255276
/// Adds multiple vectors with specific keys to the index.
256277
/// </summary>
@@ -299,6 +320,30 @@ public void Add(ulong[] keys, sbyte[][] vectors)
299320
}
300321
}
301322

323+
/// <summary>
324+
/// Adds multiple vectors with specific keys to the index.
325+
/// </summary>
326+
/// <param name="keys">The keys associated with the vectors.</param>
327+
/// <param name="vectors">The vector data to be added.</param>
328+
public void Add(ulong[] keys, byte[][] vectors)
329+
{
330+
this.CheckIncreaseCapacity((ulong)vectors.Length);
331+
for (int i = 0; i < vectors.Length; i++)
332+
{
333+
GCHandle handle = GCHandle.Alloc(vectors[i], GCHandleType.Pinned);
334+
try
335+
{
336+
IntPtr vectorPtr = handle.AddrOfPinnedObject();
337+
usearch_add(this._index, keys[i], vectorPtr, ScalarKind.Bits1, out IntPtr error);
338+
HandleError(error);
339+
}
340+
finally
341+
{
342+
handle.Free();
343+
}
344+
}
345+
}
346+
302347
/// <summary>
303348
/// Adds multiple vectors with specific keys to the index.
304349
/// </summary>

include/usearch/index_dense.hpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -757,12 +757,12 @@ class index_dense_gt {
757757
};
758758

759759
// clang-format off
760-
add_result_t add(vector_key_t key, b1x8_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from.b1x8); }
761-
add_result_t add(vector_key_t key, i8_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from.i8); }
762-
add_result_t add(vector_key_t key, f16_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from.f16); }
763-
add_result_t add(vector_key_t key, bf16_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from.bf16); }
764-
add_result_t add(vector_key_t key, f32_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from.f32); }
765-
add_result_t add(vector_key_t key, f64_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from.f64); }
760+
add_result_t add(vector_key_t key, b1x8_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.b1x8); }
761+
add_result_t add(vector_key_t key, i8_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.i8); }
762+
add_result_t add(vector_key_t key, f16_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.f16); }
763+
add_result_t add(vector_key_t key, bf16_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.bf16); }
764+
add_result_t add(vector_key_t key, f32_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.f32); }
765+
add_result_t add(vector_key_t key, f64_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.f64); }
766766

767767
search_result_t search(b1x8_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.b1x8); }
768768
search_result_t search(i8_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.i8); }
@@ -2002,14 +2002,13 @@ class index_dense_gt {
20022002
template <typename scalar_at>
20032003
add_result_t add_( //
20042004
vector_key_t key, scalar_at const* vector, //
2005-
std::size_t thread, bool force_vector_copy, cast_punned_t const& cast) {
2005+
std::size_t thread, bool copy_vector, cast_punned_t const& cast) {
20062006

20072007
if (!multi() && config().enable_key_lookups && contains(key))
20082008
return add_result_t{}.failed("Duplicate keys not allowed in high-level wrappers");
20092009

20102010
// Cast the vector, if needed for compatibility with `metric_`
20112011
thread_lock_t lock = thread_lock_(thread);
2012-
bool copy_vector = !config_.exclude_vectors || force_vector_copy;
20132012
byte_t const* vector_data = reinterpret_cast<byte_t const*>(vector);
20142013
{
20152014
byte_t* casted_data = cast_buffer_.data() + metric_.bytes_per_vector() * lock.thread_id;

python/scripts/test_index.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ def test_index_contains_remove_rename(batch_size):
280280
removed_keys = keys[: batch_size // 2]
281281
remaining_keys = keys[batch_size // 2 :]
282282
index.remove(removed_keys)
283-
del index[removed_keys] # ! This will trigger the `__delitem__` dunder method
283+
del index[removed_keys] # ! This will trigger the `__delitem__` dunder method
284284
assert len(index) == (len(keys) - len(removed_keys))
285285
assert np.sum(index.contains(keys)) == len(remaining_keys)
286286
assert np.sum(index.count(keys)) == len(remaining_keys)
@@ -345,3 +345,33 @@ def test_index_clustering(ndim, metric, quantization, dtype, batch_size):
345345
clusters: Clustering = index.cluster(min_count=3, max_count=10, threads=threads)
346346
unique_clusters = set(clusters.matches.keys.flatten().tolist())
347347
assert len(unique_clusters) >= 3 and len(unique_clusters) <= 10
348+
349+
350+
def test_index_copied_memory_usage():
351+
"""Test that copy=False results in lower memory usage than copy=True."""
352+
reset_randomness()
353+
354+
ndim = 128
355+
batch_size = 1000
356+
dtype = np.float32 # ! Ensure same type for both vectors and index
357+
vectors = random_vectors(count=batch_size, ndim=ndim, dtype=dtype)
358+
keys = np.arange(batch_size)
359+
360+
# Create index with `copy=True`
361+
index_copied = Index(ndim=ndim, metric=MetricKind.Cos, dtype=dtype, multi=False)
362+
index_copied.add(keys, vectors, copy=True, threads=threads)
363+
364+
# Create index with `copy=False`
365+
index_viewing = Index(ndim=ndim, metric=MetricKind.Cos, dtype=dtype, multi=False)
366+
index_viewing.add(keys, vectors, copy=False, threads=threads)
367+
368+
# Both should have same number of entries
369+
assert len(index_copied) == len(index_viewing) == batch_size
370+
371+
# Memory usage should be larger when `copy=True`
372+
memory_with_copy = index_copied.memory_usage
373+
memory_without_copy = index_viewing.memory_usage
374+
375+
assert (
376+
memory_with_copy > memory_without_copy
377+
), f"Expected default index addition to use more memory than copy=False ({memory_with_copy} vs {memory_without_copy})"

0 commit comments

Comments
 (0)