Skip to content

Commit b2f812e

Browse files
committed
0.0.195
1 parent bf41800 commit b2f812e

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

orso/profiler/profiler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212

1313
import numpy
1414

15-
from orso.cityhash import CityHash32
1615
from orso.profiler import distogram
1716
from orso.schema import FlatColumn
1817
from orso.types import OrsoTypes
@@ -79,18 +78,19 @@ def find_mfvs(data, top_n=MOST_FREQUENT_VALUE_SIZE):
7978

8079

8180
def get_kvm_hashes(data, size: int): # slowest function
81+
from xxhash import xxh32
8282
min_hashes = []
8383

8484
data = list(set(data))
8585

8686
# Build a list with the hash values of the first 'size' elements or all elements if fewer.
87-
min_hashes = [-CityHash32(str(element)) for element in data[:size]]
87+
min_hashes = [-xxh32(str(element)).intdigest() for element in data[:size]]
8888

8989
# Transform the list into a heap in-place.
9090
heapq.heapify(min_hashes)
9191

9292
for element in data[size:]:
93-
hash_value = CityHash32(str(element))
93+
hash_value = xxh32(str(element)).intdigest()
9494

9595
# If the current hash is smaller than the largest in the heap
9696
if hash_value < -min_hashes[0]:

orso/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,5 @@
1010
# See the License for the specific language governing permissions and
1111
# limitations under the License.
1212

13-
__version__: str = "0.0.194"
13+
__version__: str = "0.0.195"
1414
__author__: str = "@joocer"

0 commit comments

Comments
 (0)