@@ -42,62 +42,80 @@ def make_fair_classification(n_samples=100, n_features=5, ind_sensitive=0):
4242 return X , y , X_sen
4343
4444
45- def make_ratings (n_users , n_items , n_factors = 20 ,
46- n_interactions = None , density = 0.01 ,
47- noise_std = 0.1 , seed = None ,
48- rating_min = 1.0 , rating_max = 5.0 , return_params = True ):
45+ def make_mf_dataset (n_users , n_items , n_factors = 20 ,
46+ n_interactions = None , density = 0.01 ,
47+ noise_std = 0.1 , seed = None ,
48+ rating_min = 1.0 , rating_max = 5.0 , return_params = True ):
4949 """
5050 Generate synthetic rating data using matrix factorization model.
51-
51+
5252 Creates synthetic user-item rating data based on the matrix factorization
5353 approach commonly used in recommender systems. The ratings are generated
5454 as: rating = mu + user_bias + item_bias + user_factor * item_factor + noise
55-
55+
5656 Parameters
5757 ----------
5858 n_users : int
5959 Number of users in the synthetic dataset
60+
6061 n_items : int
6162 Number of items in the synthetic dataset
63+
6264 n_factors : int, default=20
6365 Number of latent factors for user and item embeddings
66+
6467 n_interactions : int, optional
6568 Exact number of user-item interactions. If None, calculated as density * total_pairs
69+
6670 density : float, default=0.01
6771 Density of the rating matrix (ignored if n_interactions is specified)
72+
6873 noise_std : float, default=0.1
6974 Standard deviation of Gaussian noise added to ratings
75+
7076 seed : int, optional
7177 Random seed for reproducible results
78+
7279 rating_min : float, default=1.0
7380 Minimum possible rating value
81+
7482 rating_max : float, default=5.0
7583 Maximum possible rating value
84+
7685 return_params : bool, default=True
7786 If True, returns the underlying model parameters (P, Q, bu, bi, mu)
78-
87+
7988 Returns
8089 -------
8190 dict
8291 Dictionary containing:
83- - 'X' : ndarray of shape (n_interactions, 2)
92+
93+ - **X** : ndarray of shape (n_interactions, 2)
8494 User-item pairs where X[:, 0] are user indices and X[:, 1] are item indices
85- - 'y' : ndarray of shape (n_interactions,)
95+ - **y** : ndarray of shape (n_interactions,)
8696 Synthetic ratings for each user-item pair
87- - ' params' : dict, optional
97+ - ** params** : dict, optional
8898 Only returned if return_params=True. Contains:
89- * 'P' : ndarray of shape (n_users, n_factors) - User factor matrix
90- * 'Q' : ndarray of shape (n_items, n_factors) - Item factor matrix
91- * 'bu' : ndarray of shape (n_users,) - User biases
92- * 'bi' : ndarray of shape (n_items,) - Item biases
93- * 'mu' : float - Global mean rating
94-
99+
100+ * **P** : ndarray of shape (n_users, n_factors)
101+ User factor matrix
102+ * **Q** : ndarray of shape (n_items, n_factors)
103+ Item factor matrix
104+ * **bu** : ndarray of shape (n_users,)
105+ User biases
106+ * **bi** : ndarray of shape (n_items,)
107+ Item biases
108+ * **mu** : float
109+ Global mean rating
110+
95111 Notes
96112 -----
97113 The rating generation follows the standard matrix factorization model:
114+
98115 r_ui = μ + b_u + b_i + p_u · q_i^T + ε
99- where ε ~ N(0, noise_std²)
100-
116+
117+ where ε ~ N(0, noise_std²)
118+
101119 The generated ratings are clipped to stay within [rating_min, rating_max] range.
102120 """
103121 rng = np .random .RandomState (seed )
0 commit comments