Skip to content

Commit 6ee72bf

Browse files
authored
Merge pull request #108 from TileDB-Inc/ihn/add-utils
Add vector_search.utils
2 parents 09b855b + 7dd61a1 commit 6ee72bf

File tree

2 files changed

+38
-0
lines changed

2 files changed

+38
-0
lines changed

apis/python/src/tiledb/vector_search/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from . import utils
12
from .index import FlatIndex, IVFFlatIndex
23
from .ingestion import ingest
34
from .module import load_as_array
@@ -29,4 +30,5 @@
2930
"ivf_index_tdb",
3031
"array_to_matrix",
3132
"partition_ivf_index",
33+
"utils"
3234
]
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import tiledb
2+
import numpy as np
3+
import io
4+
5+
def _load_vecs_t(uri, dtype, ctx_or_config=None):
6+
with tiledb.scope_ctx(ctx_or_config) as ctx:
7+
dtype = np.dtype(dtype)
8+
vfs = tiledb.VFS(ctx.config())
9+
with vfs.open(uri, "rb") as f:
10+
d = f.read(-1)
11+
raw = np.frombuffer(d, dtype=np.uint8)
12+
ndim = raw[:4].view(np.int32)[0]
13+
14+
elem_nbytes = int(4 + ndim * dtype.itemsize)
15+
if raw.size % elem_nbytes != 0:
16+
raise ValueError(
17+
f"Mismatched dims to bytes in file {uri}: {raw.size}, elem_nbytes"
18+
)
19+
# take a view on the whole array as
20+
# (ndim, sizeof(t)*ndim), and return the actual elements
21+
#return raw.view(np.uint8).reshape((elem_nbytes,-1))[4:,:].view(dtype).reshape((ndim,-1))
22+
23+
if dtype != np.uint8:
24+
return raw.view(np.int32).reshape((-1,ndim + 1))[:,1:].view(dtype)
25+
else:
26+
return raw.view(np.uint8).reshape((-1,ndim + 1))[:,1:].view(dtype)
27+
#return raw
28+
29+
def load_ivecs(uri, ctx_or_config=None):
30+
return _load_vecs_t(uri, np.int32, ctx_or_config)
31+
32+
def load_fvecs(uri, ctx_or_config=None):
33+
return _load_vecs_t(uri, np.float32, ctx_or_config)
34+
35+
def load_bvecs(uri, ctx_or_config=None):
36+
return _load_vecs_t(uri, np.uint8, ctx_or_config)

0 commit comments

Comments
 (0)