Skip to content

Commit ac11874

Browse files
authored
fixed implicit bpr tests (#246)
Fixed BPR tests setting `num_threads=1`
1 parent 1b4e1be commit ac11874

File tree

2 files changed

+14
-11
lines changed

2 files changed

+14
-11
lines changed

rectools/models/implicit_bpr.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ class ImplicitBPRWrapperModel(VectorModel[ImplicitBPRWrapperModelConfig]):
8787
8888
See https://benfred.github.io/implicit/api/models/cpu/bpr.html for details of the base model.
8989
90+
Please note that implicit BPR model training is not deterministic with num_threads > 1 or use_gpu=True.
91+
https://github.com/benfred/implicit/issues/710
92+
9093
Parameters
9194
----------
9295
model : BayesianPersonalizedRanking

tests/models/test_implicit_bpr.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626
assert_second_fit_refits_model,
2727
)
2828

29+
# Note that num_threads > 1 for BayesianPersonalizedRanking CPU training will make model training undeterministic
30+
# https://github.com/benfred/implicit/issues/710
31+
# GPU training is always underministic
32+
2933

3034
@pytest.mark.parametrize("use_gpu", (False, True) if HAS_CUDA else (False,))
3135
class TestImplicitBPRWrapperModel:
@@ -57,7 +61,7 @@ def dataset(self) -> Dataset:
5761
pd.DataFrame(
5862
{
5963
Columns.User: [10, 10, 20, 20],
60-
Columns.Item: [17, 15, 17, 15],
64+
Columns.Item: [17, 13, 17, 15],
6165
Columns.Rank: [1, 2, 1, 2],
6266
}
6367
),
@@ -97,7 +101,7 @@ def test_basic(
97101
use_gpu: bool,
98102
) -> None:
99103
base_model = BayesianPersonalizedRanking(
100-
factors=2, num_threads=2, iterations=100, use_gpu=use_gpu, random_state=42
104+
factors=2, num_threads=1, iterations=100, use_gpu=use_gpu, random_state=42
101105
)
102106
self._init_model_factors_inplace(base_model, dataset)
103107
model = ImplicitBPRWrapperModel(model=base_model).fit(dataset)
@@ -116,7 +120,7 @@ def test_basic(
116120

117121
def test_consistent_with_pure_implicit(self, dataset: Dataset, use_gpu: bool) -> None:
118122
base_model = BayesianPersonalizedRanking(
119-
factors=2, num_threads=2, iterations=100, use_gpu=use_gpu, random_state=42
123+
factors=2, num_threads=1, iterations=100, use_gpu=use_gpu, random_state=42
120124
)
121125
self._init_model_factors_inplace(base_model, dataset)
122126
users = np.array([10, 20, 30, 40])
@@ -150,7 +154,7 @@ def test_gpu_ranking_consistent_with_pure_implicit(
150154
use_gpu: bool,
151155
) -> None:
152156
base_model = BayesianPersonalizedRanking(
153-
factors=2, num_threads=2, iterations=100, use_gpu=False, random_state=42
157+
factors=2, num_threads=1, iterations=100, use_gpu=False, random_state=42
154158
)
155159
self._init_model_factors_inplace(base_model, dataset)
156160
users = np.array([10, 20, 30, 40])
@@ -205,7 +209,7 @@ def test_with_whitelist(
205209
use_gpu: bool,
206210
) -> None:
207211
base_model = BayesianPersonalizedRanking(
208-
factors=32, num_threads=2, iterations=100, use_gpu=use_gpu, random_state=42
212+
factors=32, num_threads=1, iterations=100, use_gpu=use_gpu, random_state=42
209213
)
210214
model = ImplicitBPRWrapperModel(model=base_model).fit(dataset)
211215
actual = model.recommend(
@@ -265,7 +269,7 @@ def test_i2i(
265269
use_gpu: bool,
266270
) -> None:
267271
base_model = BayesianPersonalizedRanking(
268-
factors=2, num_threads=2, iterations=100, use_gpu=use_gpu, random_state=1
272+
factors=2, num_threads=1, iterations=100, use_gpu=use_gpu, random_state=1
269273
)
270274
self._init_model_factors_inplace(base_model, dataset)
271275
model = ImplicitBPRWrapperModel(model=base_model).fit(dataset)
@@ -283,8 +287,6 @@ def test_i2i(
283287
)
284288

285289
def test_second_fit_refits_model(self, dataset: Dataset, use_gpu: bool) -> None:
286-
# note that num_threads > 1 will make model training undeterministic
287-
# https://github.com/benfred/implicit/issues/710
288290
# GPU training is always nondeterministic so we only test for CPU training
289291
if use_gpu:
290292
pytest.skip("BPR is nondeterministic on GPU")
@@ -298,7 +300,7 @@ def set_random_state() -> None:
298300
assert_second_fit_refits_model(model, dataset, set_random_state)
299301

300302
def test_dumps_loads(self, dataset: Dataset, use_gpu: bool) -> None:
301-
base_model = BayesianPersonalizedRanking(factors=8, num_threads=2, use_gpu=use_gpu, random_state=1)
303+
base_model = BayesianPersonalizedRanking(factors=8, num_threads=1, use_gpu=use_gpu, random_state=1)
302304
model = ImplicitBPRWrapperModel(model=base_model).fit(dataset)
303305
assert_dumps_loads_do_not_change_model(model, dataset)
304306

@@ -489,8 +491,6 @@ def test_custom_model_class(self) -> None:
489491
def test_get_config_and_from_config_compatibility(
490492
self, simple_types: bool, recommend_use_gpu: tp.Optional[bool], recommend_n_threads: tp.Optional[int]
491493
) -> None:
492-
# note that num_threads > 1 will make model training undeterministic
493-
# https://github.com/benfred/implicit/issues/710
494494
initial_config = {
495495
"model": {"factors": 4, "num_threads": 1, "iterations": 2, "random_state": 42},
496496
"verbose": 1,

0 commit comments

Comments
 (0)