Skip to content

Commit 71468cc

Browse files
committed
Fix performance degradation for NumPy 1.x
1 parent c8a268e commit 71468cc

File tree

2 files changed

+11
-3
lines changed

2 files changed

+11
-3
lines changed

hyperloglog/hll.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,19 @@ def bit_length_vec(arr):
6666
bits |= bits >> 16
6767
bits |= bits >> 32
6868
return np.bitwise_count(bits)
69+
70+
# NumPy 2.x doesn't have performance drawback for small integers
71+
HLL_COUNTER_TYPE = np.int8
72+
6973
else:
7074
def bit_length_vec(arr):
7175
_, high_exp = np.frexp(arr >> 32)
7276
_, low_exp = np.frexp(arr & 0xFFFFFFFF)
7377
return np.where(high_exp, high_exp + 32, low_exp)
7478

79+
# int8/16/32 are smaller but much slower than int64 for NumPy 1.x
80+
HLL_COUNTER_TYPE = np.int64
81+
7582

7683
def get_rho_vec(w, max_width):
7784
rho = max_width - bit_length_vec(w) + 1
@@ -108,7 +115,7 @@ def __init__(self, error_rate):
108115
self.alpha = get_alpha(p)
109116
self.p = p
110117
self.m = 1 << p
111-
self.M = np.zeros(self.m, np.int8)
118+
self.M = np.zeros(self.m, HLL_COUNTER_TYPE)
112119

113120
def __getstate__(self):
114121
return dict([x, getattr(self, x)] for x in self.__slots__)
@@ -132,7 +139,8 @@ def add(self, value):
132139
w = x >> self.p
133140
rho = get_rho(w, 64 - self.p)
134141

135-
self.M[j] = max(self.M[j], rho)
142+
if rho > self.M[j]:
143+
self.M[j] = rho
136144

137145
def add_bulk(self, values):
138146
"""

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from distutils.core import setup
44

5-
version = '0.1.4'
5+
version = '0.1.5'
66

77
setup(
88
name='hyperloglog',

0 commit comments

Comments
 (0)