Skip to content

Commit 77a6348

Browse files
authored
Feature/vector rank params (#218)
`recommend_n_threads` and `recommend_use_gpu_ranking` arguments to EASEModel and vector models Closes #199
1 parent 5c43ac8 commit 77a6348

18 files changed

+710
-133
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
### Added
1111
- `ImplicitBPRWrapperModel` model ([#232](https://github.com/MobileTeleSystems/RecTools/pull/232))
12+
- All vector models and `EASEModel` support for enabling ranking on GPU and selecting number of threads for CPU ranking. Added `recommend_n_threads` and `recommend_use_gpu_ranking` parameters to `EASEModel`, `ImplicitALSWrapperModel`, `ImplicitBPRWrapperModel`, `PureSVDModel` and `DSSMModel`. Added `recommend_use_gpu_ranking` to `LightFMWrapperModel`. GPU and CPU ranking may provide different ordering of items with identical scores in recommendation table, so this could change ordering items in recommendations since GPU ranking is now used as a default one. ([#218](https://github.com/MobileTeleSystems/RecTools/pull/218))
1213

1314
## [0.9.0] - 11.12.2024
1415

examples/9_model_configs_and_saving.ipynb

Lines changed: 89 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
},
2828
{
2929
"cell_type": "code",
30-
"execution_count": null,
30+
"execution_count": 1,
3131
"metadata": {},
3232
"outputs": [],
3333
"source": [
@@ -36,6 +36,7 @@
3636
"from rectools.models import (\n",
3737
" ImplicitItemKNNWrapperModel, \n",
3838
" ImplicitALSWrapperModel, \n",
39+
" ImplicitBPRWrapperModel, \n",
3940
" EASEModel, \n",
4041
" PopularInCategoryModel, \n",
4142
" PopularModel, \n",
@@ -329,7 +330,7 @@
329330
"- \"ItemItemRecommender\"\n",
330331
"- A path to a class (including any custom class) that can be imported. Like \"implicit.nearest_neighbours.TFIDFRecommender\"\n",
331332
"\n",
332-
"Specify wrapped model hyper-params under the \"model.params\" key"
333+
"Specify wrapped model hyper-params under the \"model\" dict relevant keys."
333334
]
334335
},
335336
{
@@ -381,9 +382,9 @@
381382
"\n",
382383
"Specify which model you want to wrap under the \"model.cls\" key. Since there is only one default model, you can skip this step. \"implicit.als.AlternatingLeastSquares\" will be used by default. Also you can pass a path to a class (including any custom class) that can be imported.\n",
383384
"\n",
384-
"Specify wrapped model hyper-params under the \"model.params\" key. \n",
385+
"Specify wrapped model hyper-params under the \"model\" dict relevant keys. \n",
385386
"\n",
386-
"Specify wrapper hyper-params under relevant keys."
387+
"Specify wrapper hyper-params under relevant config keys."
387388
]
388389
},
389390
{
@@ -440,6 +441,73 @@
440441
"model.get_params(simple_types=True)"
441442
]
442443
},
444+
{
445+
"cell_type": "markdown",
446+
"metadata": {},
447+
"source": [
448+
"### BPR-MF\n",
449+
"`ImplicitBPRWrapperModel` is a wrapper. \n",
450+
"Use \"model\" key in config to specify wrapped model class and params: \n",
451+
"\n",
452+
"Specify which model you want to wrap un\\der the \"model.cls\" key. Since there is only one default model, you can skip this step. \"implicit.bpr.BayesianPersonalizedRanking\" will be used by default. Also you can pass a path to a class (including any custom class) that can be imported.\n",
453+
"\n",
454+
"Specify wrapped model hyper-params under the \"model\" dict relevant keys. \n",
455+
"\n",
456+
"Specify wrapper hyper-params under relevant config keys."
457+
]
458+
},
459+
{
460+
"cell_type": "code",
461+
"execution_count": 12,
462+
"metadata": {},
463+
"outputs": [],
464+
"source": [
465+
"config = {\n",
466+
" \"model\": {\n",
467+
" # \"cls\": \"BayesianPersonalizedRanking\", # will work too\n",
468+
" # \"cls\": \"implicit.bpr.BayesianPersonalizedRanking\", # will work too\n",
469+
" \"factors\": 16,\n",
470+
" \"num_threads\": 2,\n",
471+
" \"iterations\": 2,\n",
472+
" \"random_state\": 32\n",
473+
" },\n",
474+
" \"recommend_use_gpu_ranking\": False,\n",
475+
"}\n",
476+
"model = ImplicitBPRWrapperModel.from_config(config)"
477+
]
478+
},
479+
{
480+
"cell_type": "code",
481+
"execution_count": 13,
482+
"metadata": {},
483+
"outputs": [
484+
{
485+
"data": {
486+
"text/plain": [
487+
"{'cls': 'ImplicitBPRWrapperModel',\n",
488+
" 'verbose': 0,\n",
489+
" 'model.cls': 'BayesianPersonalizedRanking',\n",
490+
" 'model.factors': 16,\n",
491+
" 'model.learning_rate': 0.01,\n",
492+
" 'model.regularization': 0.01,\n",
493+
" 'model.dtype': 'float64',\n",
494+
" 'model.iterations': 2,\n",
495+
" 'model.verify_negative_samples': True,\n",
496+
" 'model.random_state': 32,\n",
497+
" 'model.use_gpu': True,\n",
498+
" 'recommend_n_threads': None,\n",
499+
" 'recommend_use_gpu_ranking': False}"
500+
]
501+
},
502+
"execution_count": 13,
503+
"metadata": {},
504+
"output_type": "execute_result"
505+
}
506+
],
507+
"source": [
508+
"model.get_params(simple_types=True)"
509+
]
510+
},
443511
{
444512
"cell_type": "markdown",
445513
"metadata": {},
@@ -535,9 +603,9 @@
535603
"\n",
536604
"Specify which model you want to wrap under the \"model.cls\" key. Since there is only one default model, you can skip this step. \"LightFM\" will be used by default. Also you can pass a path to a class (including any custom class) that can be imported. Like \"lightfm.lightfm.LightFM\"\n",
537605
"\n",
538-
"Specify wrapped model hyper-params under the \"model.params\" key. \n",
606+
"Specify wrapped model hyper-params under the \"model\" dict relevant keys. \n",
539607
"\n",
540-
"Specify wrapper hyper-params under relevant keys."
608+
"Specify wrapper hyper-params under relevant config keys."
541609
]
542610
},
543611
{
@@ -736,8 +804,22 @@
736804
}
737805
],
738806
"metadata": {
807+
"kernelspec": {
808+
"display_name": "rectools",
809+
"language": "python",
810+
"name": "rectools"
811+
},
739812
"language_info": {
740-
"name": "python"
813+
"codemirror_mode": {
814+
"name": "ipython",
815+
"version": 3
816+
},
817+
"file_extension": ".py",
818+
"mimetype": "text/x-python",
819+
"name": "python",
820+
"nbconvert_exporter": "python",
821+
"pygments_lexer": "ipython3",
822+
"version": "3.9.12"
741823
}
742824
},
743825
"nbformat": 4,

rectools/models/dssm.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ def inference_users(self, dataloader: DataLoader[tp.Any]) -> np.ndarray:
215215
return vectors
216216

217217

218-
class DSSMModel(VectorModel):
218+
class DSSMModel(VectorModel): # pylint: disable=too-many-instance-attributes
219219
"""
220220
Wrapper for `rectools.models.dssm.DSSM`
221221
@@ -267,6 +267,17 @@ class DSSMModel(VectorModel):
267267
deterministic : bool, default ``False``
268268
If ``True``, sets whether PyTorch operations must use deterministic algorithms.
269269
Use `pytorch_lightning.seed_everything` together with this param to fix the random state.
270+
recommend_n_threads: int, default 0
271+
Number of threads to use for recommendation ranking on CPU.
272+
Specifying ``0`` means to default to the number of cores on the machine.
273+
If you want to change this parameter after model is initialized,
274+
you can manually assign new value to model `recommend_n_threads` attribute.
275+
recommend_use_gpu_ranking: bool, default ``True``
276+
Flag to use GPU for recommendation ranking. Please note that GPU and CPU ranking may provide
277+
different ordering of items with identical scores in recommendation table.
278+
If ``True``, `implicit.gpu.HAS_CUDA` will also be checked before ranking.
279+
If you want to change this parameter after model is initialized,
280+
you can manually assign new value to model `recommend_use_gpu_ranking` attribute.
270281
"""
271282

272283
recommends_for_warm = True
@@ -292,6 +303,8 @@ def __init__(
292303
loggers: tp.Union[Logger, tp.Iterable[Logger], bool] = True,
293304
verbose: int = 0,
294305
deterministic: bool = False,
306+
recommend_n_threads: int = 0,
307+
recommend_use_gpu_ranking: bool = True,
295308
) -> None:
296309
super().__init__(verbose=verbose)
297310
self.model: DSSM
@@ -313,6 +326,8 @@ def __init__(
313326
self.train_dataset_type = train_dataset_type
314327
self.user_dataset_type = user_dataset_type
315328
self.item_dataset_type = item_dataset_type
329+
self.recommend_n_threads = recommend_n_threads
330+
self.recommend_use_gpu_ranking = recommend_use_gpu_ranking
316331

317332
def _fit(self, dataset: Dataset, dataset_valid: tp.Optional[Dataset] = None) -> None: # type: ignore
318333
self.trainer = deepcopy(self._trainer)

rectools/models/ease.py

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@
1515
"""EASE model."""
1616

1717
import typing as tp
18+
import warnings
1819

1920
import numpy as np
2021
import typing_extensions as tpe
22+
from implicit.gpu import HAS_CUDA
2123
from scipy import sparse
2224

2325
from rectools import InternalIds
@@ -33,7 +35,8 @@ class EASEModelConfig(ModelConfig):
3335
"""Config for `EASE` model."""
3436

3537
regularization: float = 500.0
36-
num_threads: int = 1
38+
recommend_n_threads: int = 0
39+
recommend_use_gpu_ranking: bool = True
3740

3841

3942
class EASEModel(ModelBase[EASEModelConfig]):
@@ -51,10 +54,22 @@ class EASEModel(ModelBase[EASEModelConfig]):
5154
----------
5255
regularization : float
5356
The regularization factor of the weights.
57+
num_threads: Optional[int], default ``None``
58+
Deprecated, use `recommend_n_threads` instead.
59+
Number of threads used for recommendation ranking on CPU.
60+
recommend_n_threads: int, default 0
61+
Number of threads to use for recommendation ranking on CPU.
62+
Specifying ``0`` means to default to the number of cores on the machine.
63+
If you want to change this parameter after model is initialized,
64+
you can manually assign new value to model `recommend_n_threads` attribute.
65+
recommend_use_gpu_ranking: bool, default ``True``
66+
Flag to use GPU for recommendation ranking. Please note that GPU and CPU ranking may provide
67+
different ordering of items with identical scores in recommendation table.
68+
If ``True``, `implicit.gpu.HAS_CUDA` will also be checked before ranking.
69+
If you want to change this parameter after model is initialized,
70+
you can manually assign new value to model `recommend_use_gpu_ranking` attribute.
5471
verbose : int, default 0
5572
Degree of verbose output. If 0, no output will be provided.
56-
num_threads: int, default 1
57-
Number of threads used for `recommend` method.
5873
"""
5974

6075
recommends_for_warm = False
@@ -65,23 +80,45 @@ class EASEModel(ModelBase[EASEModelConfig]):
6580
def __init__(
6681
self,
6782
regularization: float = 500.0,
68-
num_threads: int = 1,
83+
num_threads: tp.Optional[int] = None,
84+
recommend_n_threads: int = 0,
85+
recommend_use_gpu_ranking: bool = True,
6986
verbose: int = 0,
7087
):
7188

7289
super().__init__(verbose=verbose)
7390
self.weight: np.ndarray
7491
self.regularization = regularization
75-
self.num_threads = num_threads
92+
93+
if num_threads is not None:
94+
warnings.warn(
95+
"""
96+
`num_threads` argument is deprecated and will be removed in future releases.
97+
Please use `recommend_n_threads` instead.
98+
"""
99+
)
100+
recommend_n_threads = num_threads
101+
102+
self.recommend_n_threads = recommend_n_threads
103+
self.recommend_use_gpu_ranking = recommend_use_gpu_ranking
76104

77105
def _get_config(self) -> EASEModelConfig:
78106
return EASEModelConfig(
79-
cls=self.__class__, regularization=self.regularization, num_threads=self.num_threads, verbose=self.verbose
107+
cls=self.__class__,
108+
regularization=self.regularization,
109+
recommend_n_threads=self.recommend_n_threads,
110+
recommend_use_gpu_ranking=self.recommend_use_gpu_ranking,
111+
verbose=self.verbose,
80112
)
81113

82114
@classmethod
83115
def _from_config(cls, config: EASEModelConfig) -> tpe.Self:
84-
return cls(regularization=config.regularization, num_threads=config.num_threads, verbose=config.verbose)
116+
return cls(
117+
regularization=config.regularization,
118+
recommend_n_threads=config.recommend_n_threads,
119+
recommend_use_gpu_ranking=config.recommend_use_gpu_ranking,
120+
verbose=config.verbose,
121+
)
85122

86123
def _fit(self, dataset: Dataset) -> None: # type: ignore
87124
ui_csr = dataset.get_user_item_matrix(include_weights=True)
@@ -117,7 +154,8 @@ def _recommend_u2i(
117154
k=k,
118155
filter_pairs_csr=ui_csr_for_filter,
119156
sorted_object_whitelist=sorted_item_ids_to_recommend,
120-
num_threads=self.num_threads,
157+
num_threads=self.recommend_n_threads,
158+
use_gpu=self.recommend_use_gpu_ranking and HAS_CUDA,
121159
)
122160

123161
return all_user_ids, all_reco_ids, all_scores

0 commit comments

Comments
 (0)