Skip to content

Commit c62b23f

Browse files
authored
Use np.random.Generator for rng (#694)
* Use np.random.Generator for rng Replace RandomState with random.Generator for generating random numbers. The RandomState is marked as a legacy api, and will have no further improvements made to it. The Generator API also gives us some extra features, like the ability to specify dtypes rather than converting after the fact etc. * spelling * windows fix
1 parent f475283 commit c62b23f

File tree

12 files changed

+35
-31
lines changed

12 files changed

+35
-31
lines changed

implicit/cpu/als.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class AlternatingLeastSquares(MatrixFactorizationBase):
4545
num_threads : int, optional
4646
The number of threads to use for fitting the model and batch recommend calls.
4747
Specifying 0 means to default to the number of cores on the machine.
48-
random_state : int, numpy.random.RandomState or None, optional
48+
random_state : int, numpy.random.RandomState, np.random.Generator or None, optional
4949
The random state for seeding the initial item and user factors.
5050
Default is None.
5151
@@ -141,9 +141,9 @@ def fit(self, user_items, show_progress=True, callback=None):
141141
s = time.time()
142142
# Initialize the variables randomly if they haven't already been set
143143
if self.user_factors is None:
144-
self.user_factors = random_state.rand(users, self.factors).astype(self.dtype) * 0.01
144+
self.user_factors = random_state.random((users, self.factors), dtype=self.dtype) * 0.01
145145
if self.item_factors is None:
146-
self.item_factors = random_state.rand(items, self.factors).astype(self.dtype) * 0.01
146+
self.item_factors = random_state.random((items, self.factors), dtype=self.dtype) * 0.01
147147

148148
log.debug("Initialized factors in %s", time.time() - s)
149149

implicit/cpu/bpr.pyx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):
9393
num_threads : int, optional
9494
The number of threads to use for fitting the model and batch recommend calls.
9595
Specifying 0 means to default to the number of cores on the machine.
96-
random_state : int, RandomState or None, optional
96+
random_state : int, RandomState, Generator or None, optional
9797
The random state for seeding the initial item and user factors.
9898
Default is None.
9999
@@ -156,15 +156,15 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):
156156
# Note: the final dimension is for the item bias term - which is set to a 1 for all users
157157
# this simplifies interfacing with approximate nearest neighbours libraries etc
158158
if self.item_factors is None:
159-
self.item_factors = (rs.rand(items, self.factors + 1).astype(self.dtype) - .5)
159+
self.item_factors = (rs.random((items, self.factors + 1), dtype=self.dtype) - .5)
160160
self.item_factors /= self.factors
161161

162162
# set factors to all zeros for items without any ratings
163163
item_counts = np.bincount(user_items.indices, minlength=items)
164164
self.item_factors[item_counts == 0] = np.zeros(self.factors + 1)
165165

166166
if self.user_factors is None:
167-
self.user_factors = (rs.rand(users, self.factors + 1).astype(self.dtype) - .5)
167+
self.user_factors = (rs.random((users, self.factors + 1), dtype=self.dtype) - .5)
168168
self.user_factors /= self.factors
169169

170170
# set factors to all zeros for users without any ratings
@@ -183,7 +183,7 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):
183183
num_threads = multiprocessing.cpu_count()
184184

185185
# initialize RNG's, one per thread. Also pass the seeds for each thread's RNG
186-
cdef long[:] rng_seeds = rs.randint(0, 2**31, size=num_threads)
186+
cdef long[:] rng_seeds = rs.integers(0, 2**31, size=num_threads, dtype="long")
187187
cdef RNGVector rng = RNGVector(num_threads, len(user_items.data) - 1, rng_seeds)
188188

189189
log.debug("Running %i BPR training epochs", self.iterations)

implicit/cpu/lmf.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,14 +151,14 @@ class LogisticMatrixFactorization(MatrixFactorizationBase):
151151
# user_factors[-2] = user bias, item factors[-1] = item bias
152152
# This significantly simplifies both training, and serving
153153
if self.item_factors is None:
154-
self.item_factors = rs.normal(size=(items, self.factors + 2)).astype(np.float32)
154+
self.item_factors = rs.standard_normal(size=(items, self.factors + 2), dtype=np.float32)
155155
self.item_factors[:, -1] = 1.0
156156

157157
# set factors to all zeros for items without any ratings
158158
self.item_factors[item_counts == 0] = np.zeros(self.factors + 2)
159159

160160
if self.user_factors is None:
161-
self.user_factors = rs.normal(size=(users, self.factors + 2)).astype(np.float32)
161+
self.user_factors = rs.standard_normal(size=(users, self.factors + 2), dtype=np.float32)
162162
self.user_factors[:, -2] = 1.0
163163

164164
# set factors to all zeros for users without any ratings
@@ -173,7 +173,7 @@ class LogisticMatrixFactorization(MatrixFactorizationBase):
173173
num_threads = multiprocessing.cpu_count()
174174

175175
# initialize RNG's, one per thread. Also pass the seeds for each thread's RNG
176-
cdef long[:] rng_seeds = rs.randint(0, 2**31, size=num_threads)
176+
cdef long[:] rng_seeds = rs.integers(0, 2**31, size=num_threads, dtype="long")
177177
cdef RNGVector rng = RNGVector(num_threads, len(user_items.data) - 1, rng_seeds)
178178

179179
log.debug("Running %i LMF training epochs", self.iterations)

implicit/evaluation.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def train_test_split(ratings, train_percentage=0.8, random_state=None):
3030

3131
ratings = ratings.tocoo()
3232
random_state = check_random_state(random_state)
33-
random_index = random_state.random_sample(len(ratings.data))
33+
random_index = random_state.random(len(ratings.data))
3434
train_index = random_index < train_percentage
3535
test_index = random_index >= train_percentage
3636

implicit/gpu/bpr.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class BayesianPersonalizedRanking(MatrixFactorizationBase):
3333
When sampling negative items, check if the randomly picked negative item has actually
3434
been liked by the user. This check increases the time needed to train but usually leads
3535
to better predictions.
36-
random_state : int, RandomState or None, optional
36+
random_state : int, RandomState, Generator or None, optional
3737
The random state for seeding the initial item and user factors.
3838
Default is None.
3939
@@ -103,7 +103,7 @@ def fit(self, user_items, show_progress=True, callback=None):
103103
# Note: the final dimension is for the item bias term - which is set to a 1 for all users
104104
# this simplifies interfacing with approximate nearest neighbours libraries etc
105105
if self.item_factors is None:
106-
item_factors = rs.rand(items, self.factors + 1).astype("float32") - 0.5
106+
item_factors = rs.random((items, self.factors + 1), "float32") - 0.5
107107
item_factors /= self.factors
108108

109109
# set factors to all zeros for items without any ratings
@@ -112,7 +112,7 @@ def fit(self, user_items, show_progress=True, callback=None):
112112
self.item_factors = implicit.gpu.Matrix(item_factors)
113113

114114
if self.user_factors is None:
115-
user_factors = rs.rand(users, self.factors + 1).astype("float32") - 0.5
115+
user_factors = rs.random((users, self.factors + 1), "float32") - 0.5
116116
user_factors /= self.factors
117117

118118
# set factors to all zeros for users without any ratings
@@ -142,7 +142,7 @@ def fit(self, user_items, show_progress=True, callback=None):
142142
Y,
143143
self.learning_rate,
144144
self.regularization,
145-
rs.randint(2**31),
145+
rs.integers(2**31),
146146
self.verify_negative_samples,
147147
)
148148
progress.update(1)

implicit/gpu/matrix_factorization_base.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,14 +239,18 @@ def check_random_state(random_state):
239239
240240
Parameters
241241
----------
242-
random_state : int, None or RandomState
242+
random_state : int, None, np.random.RandomState or np.random.Generator
243243
The existing RandomState. If None, or an int, will be used
244244
to seed a new curand RandomState generator
245245
"""
246246
if isinstance(random_state, np.random.RandomState):
247247
# we need to convert from numpy random state our internal random state
248248
return implicit.gpu.RandomState(random_state.randint(2**31))
249249

250+
if isinstance(random_state, np.random.Generator):
251+
# we need to convert from numpy random state our internal random state
252+
return implicit.gpu.RandomState(random_state.integers(2**31))
253+
250254
# otherwise try to initialize a new one, and let it fail through
251255
# on the numpy side if it doesn't work
252256
return implicit.gpu.RandomState(random_state or int(time.time()))

implicit/utils.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,21 +65,22 @@ def check_blas_config():
6565
def check_random_state(random_state):
6666
"""Validate the random state.
6767
68-
Check a random seed or existing numpy RandomState
69-
and get back an initialized RandomState.
68+
Check a random seed or existing numpy rng
69+
and get back an initialized numpy.randon.Generator
7070
7171
Parameters
7272
----------
73-
random_state : int, None or RandomState
73+
random_state : int, None, np.random.RandomState or np.random.Generator
7474
The existing RandomState. If None, or an int, will be used
7575
to seed a new numpy RandomState.
7676
"""
77-
# if it's an existing random state, pass through
77+
# backwards compatibility
7878
if isinstance(random_state, np.random.RandomState):
79-
return random_state
79+
return np.random.default_rng(random_state.rand_int(2**31))
80+
8081
# otherwise try to initialize a new one, and let it fail through
8182
# on the numpy side if it doesn't work
82-
return np.random.RandomState(random_state)
83+
return np.random.default_rng(random_state)
8384

8485

8586
def augment_inner_product_matrix(factors):

pyproject.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,5 @@ CUDACXX = "/usr/local/cuda/bin/nvcc"
3737
[tool.cibuildwheel.macos]
3838
archs = ["x86_64", "universal2", "arm64"]
3939

40-
41-
4240
[tool.pytest.ini_options]
4341
filterwarnings = ['ignore::implicit.utils.ParameterWarning']

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,6 @@ def exclude_non_implicit_cmake_files(cmake_manifest):
4444
"Collaborative Filtering, Recommender Systems"
4545
),
4646
packages=find_packages(),
47-
install_requires=["numpy", "scipy>=0.16", "tqdm>=4.27", "threadpoolctl"],
47+
install_requires=["numpy>=1.17.0", "scipy>=0.16", "tqdm>=4.27", "threadpoolctl"],
4848
cmake_process_manifest_hook=exclude_non_implicit_cmake_files,
4949
)

tests/als_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def test_factorize(use_native, use_gpu, use_cg, dtype):
179179
reconstructed = rows.dot(cols.T)
180180
for i in range(counts.shape[0]):
181181
for j in range(counts.shape[1]):
182-
assert pytest.approx(counts[i, j], abs=1e-4) == reconstructed[i, j], (
182+
assert pytest.approx(counts[i, j], abs=1e-3) == reconstructed[i, j], (
183183
"failed to reconstruct row=%s, col=%s,"
184184
" value=%.5f, dtype=%s, cg=%s, native=%s gpu=%s"
185185
% (i, j, reconstructed[i, j], dtype, use_cg, use_native, use_gpu)

0 commit comments

Comments
 (0)