@@ -32,9 +32,10 @@ class _BaseFactorizationMachine(six.with_metaclass(ABCMeta, _BasePoly)):
3232 @abstractmethod
3333 def __init__ (self , degree = 2 , loss = 'squared' , n_components = 2 , alpha = 1 ,
3434 beta = 1 , tol = 1e-6 , fit_lower = 'explicit' , fit_linear = True ,
35- learning_rate = 0.001 , solver = 'cd' , warm_start = False ,
36- init_lambdas = 'ones' , max_iter = 10000 , verbose = False ,
37- callback = None , n_calls = 100 , random_state = None ):
35+ learning_rate = 0.001 , scale_regularization = True ,
36+ solver = 'cd' , warm_start = False , init_lambdas = 'ones' ,
37+ max_iter = 10000 , verbose = False , callback = None , n_calls = 100 ,
38+ random_state = None ):
3839 self .degree = degree
3940 self .loss = loss
4041 self .n_components = n_components
@@ -44,6 +45,7 @@ def __init__(self, degree=2, loss='squared', n_components=2, alpha=1,
4445 self .fit_lower = fit_lower
4546 self .fit_linear = fit_linear
4647 self .learning_rate = learning_rate
48+ self .scale_regularization = scale_regularization
4749 self .solver = solver
4850 self .warm_start = warm_start
4951 self .init_lambdas = init_lambdas
@@ -82,10 +84,20 @@ def fit(self, X, y):
8284
8385 X , y = self ._check_X_y (X , y )
8486 X = self ._augment (X )
85- n_features = X .shape [ 1 ] # augmented
87+ n_samples , n_features = X .shape # augmented
8688 rng = check_random_state (self .random_state )
8789 loss_obj = self ._get_loss (self .loss )
8890
91+ # Scale regularization params to make losses equivalent.
92+ if self .scale_regularization and self .solver == 'cd' :
93+ alpha = 0.5 * self .alpha * n_samples
94+ beta = 0.5 * self .beta * n_samples
95+ elif not self .scale_regularization and self .solver == 'adagrad' :
96+ alpha = self .alpha / 0.5 * n_samples
97+ beta = self .beta / 0.5 * n_samples
98+ else :
99+ alpha , beta = self .alpha , self .beta
100+
89101 if not (self .warm_start and hasattr (self , 'w_' )):
90102 self .w_ = np .zeros (n_features , dtype = np .double )
91103
@@ -95,7 +107,7 @@ def fit(self, X, y):
95107 n_orders = 1
96108
97109 if not (self .warm_start and hasattr (self , 'P_' )):
98- self .P_ = 0.01 * rng .randn (n_orders , self .n_components , n_features )
110+ self .P_ = rng .randn (n_orders , self .n_components , n_features )
99111 if 'ada' in self .solver :
100112 # ensure each slice P[0], P[1]... is in F-order
101113 self .P_ = np .transpose (self .P_ , [1 , 2 , 0 ])
@@ -125,7 +137,7 @@ def fit(self, X, y):
125137
126138 converged = _cd_direct_ho (self .P_ , self .w_ , dataset , X_col_norms ,
127139 y , y_pred , self .lams_ , self .degree ,
128- self . alpha , self . beta , self .fit_linear ,
140+ alpha , beta , self .fit_linear ,
129141 self .fit_lower == 'explicit' , loss_obj ,
130142 self .max_iter , self .tol , self .verbose )
131143 if not converged :
@@ -141,9 +153,9 @@ def fit(self, X, y):
141153
142154 dataset = get_dataset (X , order = "c" )
143155 _fast_fm_adagrad (self , self .w_ , self .P_ [0 ], dataset , y ,
144- self .degree , self . alpha , self .beta ,
145- self . fit_linear , loss_obj , self .max_iter ,
146- self .learning_rate , self . callback , self .n_calls )
156+ self .degree , alpha , beta , self .fit_linear ,
157+ loss_obj , self .max_iter , self . learning_rate ,
158+ self .callback , self .n_calls )
147159 return self
148160
149161 def _get_output (self , X ):
@@ -212,9 +224,17 @@ class FactorizationMachineRegressor(_BaseFactorizationMachine,
212224 coordinate descent. If False, the model can still capture linear
213225 effects if ``fit_lower == 'augment'``.
214226
215- learning_rate: double, default: 0.001
227+ learning_rate : double, default: 0.001
216228 Learning rate for 'adagrad' solver. Ignored by other solvers.
217229
230+ scale_regularization : boolean, default: True
231+ Whether to adjust regularization according to the number of samples.
232+ This helps if, after tuning regularization, the model will be retrained
233+ on more data.
234+
235+ If set, the loss optimized is mean_i(l_i) + 0.5 || params || ^2
236+ If not set, the loss becomes sum_i(l_i) + || params || ^ 2
237+
218238 solver : {'cd'|'adagrad'}, default: 'cd'
219239 - 'cd': Uses a coordinate descent solver. Currently limited to
220240 degree=3.
@@ -292,14 +312,15 @@ class FactorizationMachineRegressor(_BaseFactorizationMachine,
292312 """
293313 def __init__ (self , degree = 2 , n_components = 2 , alpha = 1 , beta = 1 , tol = 1e-6 ,
294314 fit_lower = 'explicit' , fit_linear = True , learning_rate = 0.001 ,
295- solver = 'cd' , warm_start = False , init_lambdas = 'ones' ,
296- max_iter = 10000 , verbose = False , callback = None , n_calls = 100 ,
297- random_state = None ):
315+ scale_regularization = True , solver = 'cd' , warm_start = False ,
316+ init_lambdas = 'ones' , max_iter = 10000 , verbose = False ,
317+ callback = None , n_calls = 100 , random_state = None ):
298318
299319 super (FactorizationMachineRegressor , self ).__init__ (
300320 degree , 'squared' , n_components , alpha , beta , tol , fit_lower ,
301- fit_linear , learning_rate , solver , warm_start , init_lambdas ,
302- max_iter , verbose , callback , n_calls , random_state )
321+ fit_linear , learning_rate , scale_regularization , solver ,
322+ warm_start , init_lambdas , max_iter , verbose , callback , n_calls ,
323+ random_state )
303324
304325
305326class FactorizationMachineClassifier (_BaseFactorizationMachine ,
@@ -355,9 +376,17 @@ class FactorizationMachineClassifier(_BaseFactorizationMachine,
355376 coordinate descent. If False, the model can still capture linear
356377 effects if ``fit_lower == 'augment'``.
357378
358- learning_rate: double, default: 0.001
379+ learning_rate : double, default: 0.001
359380 Learning rate for 'adagrad' solver. Ignored by other solvers.
360381
382+ scale_regularization : boolean, default: True
383+ Whether to adjust regularization according to the number of samples.
384+ This helps if, after tuning regularization, the model will be retrained
385+ on more data.
386+
387+ If set, the loss optimized is mean_i(l_i) + 0.5 || params || ^2
388+ If not set, the loss becomes sum_i(l_i) + || params || ^ 2
389+
361390 solver : {'cd'|'adagrad'}, default: 'cd'
362391 - 'cd': Uses a coordinate descent solver. Currently limited to
363392 degree=3.
@@ -436,11 +465,12 @@ class FactorizationMachineClassifier(_BaseFactorizationMachine,
436465
437466 def __init__ (self , degree = 2 , loss = 'squared_hinge' , n_components = 2 , alpha = 1 ,
438467 beta = 1 , tol = 1e-6 , fit_lower = 'explicit' , fit_linear = True ,
439- learning_rate = 0.001 , solver = 'cd' , warm_start = False ,
440- init_lambdas = 'ones' , max_iter = 10000 , verbose = False ,
441- callback = None , n_calls = 100 , random_state = None ):
468+ learning_rate = 0.001 , scale_regularization = True , solver = 'cd' ,
469+ warm_start = False , init_lambdas = 'ones' , max_iter = 10000 ,
470+ verbose = False , callback = None , n_calls = 100 , random_state = None ):
442471
443472 super (FactorizationMachineClassifier , self ).__init__ (
444473 degree , loss , n_components , alpha , beta , tol , fit_lower ,
445- fit_linear , learning_rate , solver , warm_start , init_lambdas ,
446- max_iter , verbose , callback , n_calls , random_state )
474+ fit_linear , learning_rate , scale_regularization , solver ,
475+ warm_start , init_lambdas , max_iter , verbose , callback , n_calls ,
476+ random_state )
0 commit comments