1+ import tiledb
2+ import numpy as np
3+ import io
4+
5+ def _load_vecs_t (uri , dtype , ctx_or_config = None ):
6+ with tiledb .scope_ctx (ctx_or_config ) as ctx :
7+ dtype = np .dtype (dtype )
8+ vfs = tiledb .VFS (ctx .config ())
9+ with vfs .open (uri , "rb" ) as f :
10+ d = f .read (- 1 )
11+ raw = np .frombuffer (d , dtype = np .uint8 )
12+ ndim = raw [:4 ].view (np .int32 )[0 ]
13+
14+ elem_nbytes = int (4 + ndim * dtype .itemsize )
15+ if raw .size % elem_nbytes != 0 :
16+ raise ValueError (
17+ f"Mismatched dims to bytes in file { uri } : { raw .size } , elem_nbytes"
18+ )
19+ # take a view on the whole array as
20+ # (ndim, sizeof(t)*ndim), and return the actual elements
21+ #return raw.view(np.uint8).reshape((elem_nbytes,-1))[4:,:].view(dtype).reshape((ndim,-1))
22+
23+ if dtype != np .uint8 :
24+ return raw .view (np .int32 ).reshape ((- 1 ,ndim + 1 ))[:,1 :].view (dtype )
25+ else :
26+ return raw .view (np .uint8 ).reshape ((- 1 ,ndim + 1 ))[:,1 :].view (dtype )
27+ #return raw
28+
29+ def load_ivecs (uri , ctx_or_config = None ):
30+ return _load_vecs_t (uri , np .int32 , ctx_or_config )
31+
32+ def load_fvecs (uri , ctx_or_config = None ):
33+ return _load_vecs_t (uri , np .float32 , ctx_or_config )
34+
35+ def load_bvecs (uri , ctx_or_config = None ):
36+ return _load_vecs_t (uri , np .uint8 , ctx_or_config )
0 commit comments