File tree Expand file tree Collapse file tree 2 files changed +4
-4
lines changed
Expand file tree Collapse file tree 2 files changed +4
-4
lines changed Original file line number Diff line number Diff line change 1212
1313import numpy
1414
15- from orso .cityhash import CityHash32
1615from orso .profiler import distogram
1716from orso .schema import FlatColumn
1817from orso .types import OrsoTypes
@@ -79,18 +78,19 @@ def find_mfvs(data, top_n=MOST_FREQUENT_VALUE_SIZE):
7978
8079
8180def get_kvm_hashes (data , size : int ): # slowest function
81+ from xxhash import xxh32
8282 min_hashes = []
8383
8484 data = list (set (data ))
8585
8686 # Build a list with the hash values of the first 'size' elements or all elements if fewer.
87- min_hashes = [- CityHash32 (str (element )) for element in data [:size ]]
87+ min_hashes = [- xxh32 (str (element )). intdigest ( ) for element in data [:size ]]
8888
8989 # Transform the list into a heap in-place.
9090 heapq .heapify (min_hashes )
9191
9292 for element in data [size :]:
93- hash_value = CityHash32 (str (element ))
93+ hash_value = xxh32 (str (element )). intdigest ( )
9494
9595 # If the current hash is smaller than the largest in the heap
9696 if hash_value < - min_hashes [0 ]:
Original file line number Diff line number Diff line change 1010# See the License for the specific language governing permissions and
1111# limitations under the License.
1212
13- __version__ : str = "0.0.194 "
13+ __version__ : str = "0.0.195 "
1414__author__ : str = "@joocer"
You can’t perform that action at this time.
0 commit comments