Merge pull request #26 from DataboyUsen/main

statmlben · web-flow · commit c9aa347965df · 2025-11-11T14:30:39.000+08:00
doc: MF constraint revision and matching updates for new-added loss functions
diff --git a/doc/source/examples/MF.ipynb b/doc/source/examples/MF.ipynb
diff --git a/doc/source/tutorials/ReHLine_MF.rst b/doc/source/tutorials/ReHLine_MF.rst
@@ -10,9 +10,9 @@ Considering a User-Item-Rating triplet dataset :math:`(u, i, r_{ui})` derived fr
 
 .. math::
         \min_{\substack{
-            \mathbf{P} \in \mathbb{R}^{n \times r}\ 
+            \mathbf{P} \in \mathbb{R}^{n \times k}\ 
             \pmb{\alpha} \in \mathbb{R}^n \\
-            \mathbf{Q} \in \mathbb{R}^{m \times r}\ 
+            \mathbf{Q} \in \mathbb{R}^{m \times k}\ 
             \pmb{\beta} \in \mathbb{R}^m
         }} 
         \left[
@@ -26,31 +26,24 @@ Considering a User-Item-Rating triplet dataset :math:`(u, i, r_{ui})` derived fr
 
 .. math::
         \ \text{ s.t. } \ 
-        \mathbf{A} \begin{bmatrix}
-                        \pmb{\alpha} & \mathbf{P}
-                    \end{bmatrix}^T + 
-                    \mathbf{b}\mathbf{1}_{n}^T \geq \mathbf{0}
-        \ \text{ and } \ 
-        \mathbf{A} \begin{bmatrix}
-                        \pmb{\beta} & \mathbf{Q}
-                    \end{bmatrix}^T + 
-                    \mathbf{b}\mathbf{1}_{m}^T \geq \mathbf{0}
-
+        \mathbf{A} \begin{pmatrix} \alpha_u \\ \mathbf{p}_u \end{pmatrix} + \mathbf{b} \geq \mathbf{0},\ u = 1,\dots,n
+        \quad \text{and} \quad
+        \mathbf{A} \begin{pmatrix} \beta_i \\ \mathbf{q}_i \end{pmatrix} + \mathbf{b} \geq \mathbf{0},\ i = 1,\dots,m
 
 where
 
 - :math:`\text{PLQ}(\cdot , \cdot)` 
   is a convex piecewise linear-quadratic loss function. You can find built-in loss functions in the `Loss <./loss.rst>`_ section.
   
-- :math:`\mathbf{A}` is a :math:`K \times r` matrix and :math:`\mathbf{b}` is a :math:`K`-dimensional vector 
-  representing :math:`K` linear constraints. See `Constraints <./constraint.rst>`_ for more details.
+- :math:`\mathbf{A}` is a :math:`d \times (k+1)` matrix and :math:`\mathbf{b}` is a :math:`d`-dimensional vector 
+  representing :math:`d` linear constraints. See `Constraints <./constraint.rst>`_ for more details.
 
 - :math:`\Omega`
   is a user-item collection that records all training data
 
 - :math:`n` is number of users, :math:`m` is number of items
 
-- :math:`r` is length of latent factors (rank of MF) 
+- :math:`k` is length of latent factors (rank of MF) 
 
 - :math:`C` is regularization parameter, :math:`\rho` balances regularization strength between user and item
 
@@ -214,69 +207,13 @@ The model complexity is mainly controlled by :code:`C` and :code:`rank`.
        mae = mean_absolute_error(y_test, y_pred)
        print(f"rank={rank_value}: MAE = {mae:.3f}")
 
-Convergence Tracking
-^^^^^^^^^^^^^^^^^^^^
-
-You can customize the optimization process by setting your preferred iteration counts and tolerance levels. 
-Training progress can be monitored either by enabling :code:`verbose` output during fitting or by examining the :code:`history` attribute after fitting.
-
-.. code-block:: python
-
-    clf = plqMF_Ridge(
-        C=0.001,               
-        rank=6,                
-        loss={'name': 'mae'},  
-        n_users=user_num,     
-        n_items=item_num,  
-        max_iter_CD=15,                ## Outer CD iterations
-        tol_CD=1e-5,                   ## Outer CD tolerance  
-        max_iter=8000,                 ## ReHLine solver iterations
-        tol=1e-2,                      ## ReHLine solver tolerance
-        verbose=1,                     ## Enable progress output
-    )
-    clf.fit(X_train, y_train)
-
-    print(clf.history)                 ## Check training trace of cumulative loss and objection value
-
-Different Gaussian initial conditions can be manually set by :code:`init_mean` and :code:`init_sd`:
-
-.. code-block:: python
-
-    # Initialize model with positive shifted normal 
-    clf = plqMF_Ridge(
-        C=0.001,
-        rank=6,
-        loss={'name': 'mae'},
-        n_users=user_num,
-        n_items=item_num,
-        init_mean=1.0,                 ## Manually set mean of normal distribution
-        init_sd=0.5                    ## Manually set sd of normal distribution
-    )
-
 Practical Guidance
 ^^^^^^^^^^^^^^^^^^
 
 - The first column of :code:`X` corresponds to **users**, and the second column corresponds to **items**. Please ensure this aligns with your :code:`n_users` and :code:`n_items` parameters.
 - The default penalty strength is relatively weak; it is recommended to set a relatively small :code:`C` value initially.
 - When using larger :code:`C` values, consider increasing :code:`max_iter` to avoid ConvergenceWarning.
 
-
-Regularization Conversion
--------------------------
-The regularization in this algorithm is tuned via :math:`C` and :math:`\rho`. For users who prefer to set the penalty strength directly, you may achieve conversion through the following formula:
-
-.. math::
-        \lambda_{\text{user}} = \frac{\rho}{Cn}
-        \quad\text{and}\quad  
-        \lambda_{\text{item}} = \frac{(1 - \rho)}{Cm}
-
-
-.. math::
-        C = \frac{1}{m \cdot \lambda_{\text{item}} + n \cdot \lambda_{\text{user}}}
-        \quad\text{and}\quad  
-        \rho = \frac{1}{\frac{m \cdot \lambda_{\text{item}}}{ n \cdot \lambda_{\text{user}}}+1}
-
-
 Example
 -------
 
diff --git a/doc/source/tutorials/loss.rst b/doc/source/tutorials/loss.rst
@@ -34,6 +34,9 @@ Classification loss
    - | ``name``: 'sSVM' / 'smooth SVM' / 'smooth hinge'
    - | ``loss={'name': 'sSVM'}``
 
+ * - **Squared SVM**
+   - | ``name``: 'squared SVM' / 'squared svm' / 'squared hinge'
+   - | ``loss={'name': 'squared SVM'}``
 
 Regression loss
 ~~~~~~~~~~~~~~~
@@ -61,6 +64,14 @@ Regression loss
      | ``epsilon`` (*float*): 0.1
    - | ``loss={'name': 'svr', 'epsilon': 0.1}``
 
+ * - **MAE**
+   - | ``name``: 'MAE' / 'mae' / 'mean absolute error'
+   - | ``loss={'name': 'mae'}``
+
+ * - **MSE**
+   - | ``name``: 'MSE' / 'mse' / 'mean squared error'
+   - | ``loss={'name': 'mse'}``
+
 Related Examples
 ----------------
 
diff --git a/rehline/_base.py b/rehline/_base.py
@@ -278,8 +278,8 @@ def _make_loss_rehline_param(loss, X, y):
     """The `_make_loss_rehline_param` function generates parameters for the ReHLine solver, based on the provided training data.
 
     The function supports various loss functions, including:
-        - 'hinge'
-        - 'svm' or 'SVM'
+        - 'hinge' or 'svm' or 'SVM'
+        - 'squared hinge' or 'squared svm' or 'squared SVM'
         - 'mae' or 'MAE' or 'mean absolute error'
         - 'check' or 'quantile' or 'quantile regression' or 'QR'
         - 'sSVM' or 'smooth SVM' or 'smooth hinge'
@@ -393,16 +393,16 @@ def _make_loss_rehline_param(loss, X, y):
         U = np.array([[1.0] * n, [-1.0] * n])
         V = np.array([-y , y])
 
-    elif (loss['name'] == 'SVM square') \
-            or (loss['name'] == 'svm square') \
-            or (loss['name'] == 'hinge square'):
+    elif (loss['name'] == 'squared SVM') \
+            or (loss['name'] == 'squared svm') \
+            or (loss['name'] == 'squared hinge'):
         Tau = np.inf * np.ones((1, n)) 
         S = - np.sqrt(2) * y.reshape(1,-1)
         T = np.sqrt(2) * np.ones((1, n)) 
 
     elif (loss['name'] == 'MSE') \
             or (loss['name'] == 'mse') \
-            or (loss['name'] == 'mean square error'):
+            or (loss['name'] == 'mean squared error'):
         Tau = np.inf * np.ones((2, n)) 
         S = np.array([[np.sqrt(2)] * n, [-np.sqrt(2)] * n])
         T = np.array([-np.sqrt(2) * y , np.sqrt(2) * y])
diff --git a/rehline/_mf_class.py b/rehline/_mf_class.py
@@ -16,9 +16,9 @@ class plqMF_Ridge(_BaseReHLine, BaseEstimator):
 
     .. math::
         \min_{\substack{
-            \mathbf{P} \in \mathbb{R}^{n \times r}\ 
+            \mathbf{P} \in \mathbb{R}^{n \times k}\ 
             \pmb{\alpha} \in \mathbb{R}^n \\
-            \mathbf{Q} \in \mathbb{R}^{m \times r}\ 
+            \mathbf{Q} \in \mathbb{R}^{m \times k}\ 
             \pmb{\beta} \in \mathbb{R}^m
         }} 
         \left[
@@ -32,21 +32,15 @@ class plqMF_Ridge(_BaseReHLine, BaseEstimator):
 
     .. math::
         \ \text{ s.t. } \ 
-        \mathbf{A} \begin{bmatrix}
-                        \pmb{\alpha} & \mathbf{P}
-                    \end{bmatrix}^T + 
-                    \mathbf{b}\mathbf{1}_{n}^T \geq \mathbf{0}
-        \ \text{ and } \ 
-        \mathbf{A} \begin{bmatrix}
-                        \pmb{\beta} & \mathbf{Q}
-                    \end{bmatrix}^T + 
-                    \mathbf{b}\mathbf{1}_{m}^T \geq \mathbf{0}
+        \mathbf{A} \begin{pmatrix} \alpha_u \\ \mathbf{p}_u \end{pmatrix} + \mathbf{b} \geq \mathbf{0},\ u = 1,\dots,n
+        \quad \text{and} \quad
+        \mathbf{A} \begin{pmatrix} \beta_i \\ \mathbf{q}_i \end{pmatrix} + \mathbf{b} \geq \mathbf{0},\ i = 1,\dots,m
         
     The function supports various loss functions, including:
         - 'hinge', 'svm' or 'SVM'
         - 'MAE' or 'mae' or 'mean absolute error'
-        - 'hinge square' or 'svm square' or 'SVM square'
-        - 'MSE' or 'mse' or 'mean square error'
+        - 'squared hinge' or 'squared svm' or 'squared SVM'
+        - 'MSE' or 'mse' or 'mean squared error'
 
     The following constraint types are supported:
         * 'nonnegative' or '>=0': A non-negativity constraint.
@@ -454,21 +448,21 @@ def obj(self, X, y, loss):
 
         elif (loss['name'] == 'MSE') \
             or (loss['name'] == 'mse') \
-            or (loss['name'] == 'mean square error'):
+            or (loss['name'] == 'mean squared error'):
             loss_term =  np.sum( (self.decision_function(X) - y) ** 2 )
             
         elif (loss['name'] == 'hinge') \
             or (loss['name'] == 'svm') \
             or (loss['name'] == 'SVM'):
             loss_term = np.sum( np.maximum(0, 1 - y * self.decision_function(X)) )
         
-        elif (loss['name'] == 'hinge square') \
-            or (loss['name'] == 'svm square') \
-            or (loss['name'] == 'SVM square'):
+        elif (loss['name'] == 'squared hinge') \
+            or (loss['name'] == 'squared svm') \
+            or (loss['name'] == 'squared SVM'):
             loss_term = np.sum( np.maximum(0, 1 - y * self.decision_function(X)) ** 2 )
 
         else:
             raise ValueError(f"Unsupported loss function: {loss['name']}. "
-                            f"Supported losses are: 'mae', 'mse', 'hinge', 'hinge square'")
+                            f"Supported losses are: 'mae', 'mse', 'hinge', 'squared hinge'")
 
         return loss_term, self.C * loss_term + penalty
diff --git a/rehline/_sklearn_mixin.py b/rehline/_sklearn_mixin.py
@@ -442,6 +442,7 @@ def decision_function(self, X):
     def predict(self, X):
         """
         Predict targets as the linear decision function.
+        
         Parameters
         ----------
         X : ndarray of shape (n_samples, n_features)
diff --git a/tests/_test_mf.py b/tests/_test_mf.py
@@ -178,9 +178,9 @@ def evaluate_single_params(params):
 print("="*50)
 
 
-## Choose Hinge Square Loss
+## Choose Squared Hinge Loss
 fixed_params = {
-    'loss': {'name': 'svm square'},       
+    'loss': {'name': 'squared svm'},       
     'n_users': user_num,            
     'n_items': item_num,            
     'max_iter': 100000,
@@ -191,7 +191,7 @@ def evaluate_single_params(params):
 
 best_params, best_score, best_acc, all_results = parallel_grid_search(plqMF_Ridge, param_grid, fixed_params, X_train, y_train_bin, X_test, y_test_bin, n_jobs)
 print("\n" + "="*50)
-print("BEST RESULTS(Using Hinge Square Loss)")
+print("BEST RESULTS(Using Squared Hinge Loss)")
 print("="*50)
 print("Optimal Parameters:")
 for param, value in best_params.items():