1+ # distutils: language = c++
12# Copyright 1999-2021 Alibaba Group Holding Ltd.
23#
34# Licensed under the Apache License, Version 2.0 (the "License");
@@ -22,12 +23,16 @@ import uuid
2223from datetime import date, datetime, timedelta, tzinfo
2324from enum import Enum
2425from functools import lru_cache, partial
26+ from random import getrandbits
2527from weakref import WeakSet
2628
2729import numpy as np
2830import pandas as pd
2931import cloudpickle
3032cimport cython
33+ from libc.stdint cimport uint_fast64_t
34+ from libc.stdlib cimport malloc, free
35+ from .lib.cython.libcpp cimport mt19937_64
3136try :
3237 from pandas.tseries.offsets import Tick as PDTick
3338except ImportError :
@@ -420,5 +425,46 @@ cdef class Timer:
420425 self .duration = time.time() - self ._start
421426
422427
428+ cdef mt19937_64 _rnd_gen
429+ cdef bint _rnd_is_seed_set = False
430+
431+
432+ cpdef void reset_id_random_seed() except * :
433+ cdef bytes seed_bytes
434+ global _rnd_is_seed_set
435+
436+ seed_bytes = getrandbits(64 ).to_bytes(8 , " little" )
437+ _rnd_gen.seed((< uint_fast64_t * >< char * > seed_bytes)[0 ])
438+ _rnd_is_seed_set = True
439+
440+
441+ cpdef bytes new_random_id(int byte_len):
442+ cdef uint_fast64_t * res_ptr
443+ cdef uint_fast64_t res_data[4 ]
444+ cdef int i, qw_num = byte_len >> 3
445+ cdef bytes res
446+
447+ if not _rnd_is_seed_set:
448+ reset_id_random_seed()
449+
450+ if (qw_num << 3 ) < byte_len:
451+ qw_num += 1
452+
453+ if qw_num <= 4 :
454+ # use stack memory to accelerate
455+ res_ptr = res_data
456+ else :
457+ res_ptr = < uint_fast64_t * > malloc(qw_num << 3 )
458+
459+ try :
460+ for i in range (qw_num):
461+ res_ptr[i] = _rnd_gen()
462+ return < bytes> ((< char * > & (res_ptr[0 ]))[:byte_len])
463+ finally :
464+ # free memory if allocated by malloc
465+ if res_ptr != res_data:
466+ free(res_ptr)
467+
468+
423469__all__ = [' to_str' , ' to_binary' , ' to_text' , ' TypeDispatcher' , ' tokenize' , ' tokenize_int' ,
424- ' register_tokenizer' , ' ceildiv' , ' Timer' ]
470+ ' register_tokenizer' , ' ceildiv' , ' Timer' , ' reset_id_random_seed ' , ' new_random_id ' ]
0 commit comments