Skip to content

Commit 82677c5

Browse files
committed
move helpers
1 parent f559e68 commit 82677c5

File tree

1 file changed

+66
-1
lines changed

1 file changed

+66
-1
lines changed

ms2query/database/database_utils.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import io
2-
import numpy as np
32
from typing import Optional, Union
3+
import numpy as np
4+
45

56
_NPY_MAGIC = b"\x93NUMPY"
67

@@ -53,3 +54,67 @@ def blob_to_array(b: Union[bytes, memoryview], dtype, copy: bool = True) -> np.n
5354
# (Only valid if you *originally* stored arr.tobytes(); no shape info here.)
5455
arr = np.frombuffer(b, dtype=dtype)
5556
return arr.copy() if copy else arr
57+
58+
59+
60+
# =========================
61+
# Fingerprint helpers
62+
# =========================
63+
64+
def encode_sparse_fp(bits: Optional[np.ndarray], counts: Optional[np.ndarray]) -> tuple[bytes, bytes]:
65+
"""Store bits as uint32 indices, counts as int32
66+
67+
Parameters
68+
----------
69+
bits : array-like of uint32 bit indices
70+
counts : array-like of int32 counts
71+
72+
Returns (bits_blob, counts_blob). Accepts None -> empty blobs."""
73+
if bits is None:
74+
b = b""
75+
else:
76+
arr = np.asarray(bits)
77+
if arr.dtype != np.uint32:
78+
arr = arr.astype(np.uint32, copy=False)
79+
b = arr.tobytes(order="C")
80+
if counts is None:
81+
c = b""
82+
else:
83+
arrc = np.asarray(counts)
84+
if arrc.dtype != np.int32 and arrc.dtype != np.uint32 and arrc.dtype != np.uint16 and arrc.dtype != np.uint8:
85+
arrc = arrc.astype(np.int32, copy=False)
86+
c = arrc.tobytes(order="C")
87+
return b, c
88+
89+
90+
def decode_sparse_fp(bits_blob: bytes, counts_blob: bytes) -> tuple[np.ndarray, np.ndarray]:
91+
"""Inverse of encode_sparse_fp.
92+
93+
Parameters
94+
----------
95+
bits_blob : BLOB bytes of uint32 bit indices
96+
counts_blob : BLOB bytes of int32 counts
97+
98+
Returns (bits_uint32, counts_int32). Empty blobs -> empty arrays.
99+
"""
100+
bits = np.frombuffer(bits_blob, dtype=np.uint32).copy() if bits_blob else np.zeros(0, dtype=np.uint32)
101+
# Guess signedness: store as int32 by default
102+
counts = np.frombuffer(counts_blob, dtype=np.int32).copy() if counts_blob else np.zeros(0, dtype=np.int32)
103+
return bits, counts
104+
105+
106+
def encode_dense_fp(vec: Optional[np.ndarray]) -> bytes:
107+
"""Encode a dense vector as float32 bytes. None -> empty blob."""
108+
if vec is None:
109+
return b""
110+
arr = np.asarray(vec)
111+
if arr.dtype != np.float32:
112+
arr = arr.astype(np.float32, copy=False)
113+
return arr.ravel().tobytes(order="C")
114+
115+
116+
def decode_dense_fp(blob: bytes, dtype: str = "float32") -> np.ndarray:
117+
"""Decode dense vector from blob with the given dtype (default float32)."""
118+
if not blob:
119+
return np.zeros(0, dtype=np.float32 if dtype == "float32" else np.dtype(dtype))
120+
return np.frombuffer(blob, dtype=np.dtype(dtype)).copy()

0 commit comments

Comments
 (0)