Skip to content

Commit 8586d10

Browse files
committed
Add HLL opt for NumPy 2.x
1 parent a2897a8 commit 8586d10

File tree

3 files changed

+20
-9
lines changed

3 files changed

+20
-9
lines changed

hyperloglog/hll.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,22 @@ def get_rho(w, max_width):
5555
return rho
5656

5757

58-
def bit_length_vec(arr):
59-
# 64-bit safe
60-
_, high_exp = np.frexp(arr >> 32)
61-
_, low_exp = np.frexp(arr & 0xFFFFFFFF)
62-
return np.where(high_exp, high_exp + 32, low_exp)
58+
# Check for NumPy 2.x
59+
if hasattr(np,'bitwise_count'):
60+
def bit_length_vec(arr):
61+
bits = arr >> 1
62+
bits |= arr
63+
bits |= bits >> 2
64+
bits |= bits >> 4
65+
bits |= bits >> 8
66+
bits |= bits >> 16
67+
bits |= bits >> 32
68+
return np.bitwise_count(bits)
69+
else:
70+
def bit_length_vec(arr):
71+
_, high_exp = np.frexp(arr >> 32)
72+
_, low_exp = np.frexp(arr & 0xFFFFFFFF)
73+
return np.where(high_exp, high_exp + 32, low_exp)
6374

6475

6576
def get_rho_vec(w, max_width):
@@ -119,8 +130,9 @@ def add(self, value):
119130
x = int.from_bytes(sha1(packb(value)).digest()[:8], byteorder='big')
120131
j = x & (self.m - 1)
121132
w = x >> self.p
133+
rho = get_rho(w, 64 - self.p)
122134

123-
self.M[j] = max(self.M[j], get_rho(w, 64 - self.p))
135+
self.M[j] = max(self.M[j], rho)
124136

125137
def add_bulk(self, values):
126138
"""

hyperloglog/shll.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,8 @@ def add(self, timestamp, value):
8080
Rmax = None
8181
tmp = []
8282
tmax = None
83-
tmp2 = heapq.merge(self.LPFM[j], ((timestamp, R),), reverse=True)
8483

85-
for t, R in tmp2:
84+
for t, R in heapq.merge(self.LPFM[j], ((timestamp, R),), reverse=True):
8685
if tmax is None:
8786
tmax = t - self.window
8887

hyperloglog/test/test_hll.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def test_calc_cardinality_bulk(self):
8989
a = HyperLogLog(rel_err)
9090

9191
# add bytes
92-
a.add_bulk([os.urandom(20) for i in range(card)])
92+
a.add_bulk((os.urandom(20) for i in range(card)))
9393

9494
s += a.card()
9595

0 commit comments

Comments
 (0)