@@ -32,9 +32,10 @@ class _BaseFactorizationMachine(six.with_metaclass(ABCMeta, _BasePoly)):
32
32
@abstractmethod
33
33
def __init__ (self , degree = 2 , loss = 'squared' , n_components = 2 , alpha = 1 ,
34
34
beta = 1 , tol = 1e-6 , fit_lower = 'explicit' , fit_linear = True ,
35
- learning_rate = 0.001 , solver = 'cd' , warm_start = False ,
36
- init_lambdas = 'ones' , max_iter = 10000 , verbose = False ,
37
- callback = None , n_calls = 100 , random_state = None ):
35
+ learning_rate = 0.001 , scale_regularization = True ,
36
+ solver = 'cd' , warm_start = False , init_lambdas = 'ones' ,
37
+ max_iter = 10000 , verbose = False , callback = None , n_calls = 100 ,
38
+ random_state = None ):
38
39
self .degree = degree
39
40
self .loss = loss
40
41
self .n_components = n_components
@@ -44,6 +45,7 @@ def __init__(self, degree=2, loss='squared', n_components=2, alpha=1,
44
45
self .fit_lower = fit_lower
45
46
self .fit_linear = fit_linear
46
47
self .learning_rate = learning_rate
48
+ self .scale_regularization = scale_regularization
47
49
self .solver = solver
48
50
self .warm_start = warm_start
49
51
self .init_lambdas = init_lambdas
@@ -82,10 +84,20 @@ def fit(self, X, y):
82
84
83
85
X , y = self ._check_X_y (X , y )
84
86
X = self ._augment (X )
85
- n_features = X .shape [ 1 ] # augmented
87
+ n_samples , n_features = X .shape # augmented
86
88
rng = check_random_state (self .random_state )
87
89
loss_obj = self ._get_loss (self .loss )
88
90
91
+ # Scale regularization params to make losses equivalent.
92
+ if self .scale_regularization and self .solver == 'cd' :
93
+ alpha = 0.5 * self .alpha * n_samples
94
+ beta = 0.5 * self .beta * n_samples
95
+ elif not self .scale_regularization and self .solver == 'adagrad' :
96
+ alpha = self .alpha / 0.5 * n_samples
97
+ beta = self .beta / 0.5 * n_samples
98
+ else :
99
+ alpha , beta = self .alpha , self .beta
100
+
89
101
if not (self .warm_start and hasattr (self , 'w_' )):
90
102
self .w_ = np .zeros (n_features , dtype = np .double )
91
103
@@ -95,7 +107,7 @@ def fit(self, X, y):
95
107
n_orders = 1
96
108
97
109
if not (self .warm_start and hasattr (self , 'P_' )):
98
- self .P_ = 0.01 * rng .randn (n_orders , self .n_components , n_features )
110
+ self .P_ = rng .randn (n_orders , self .n_components , n_features )
99
111
if 'ada' in self .solver :
100
112
# ensure each slice P[0], P[1]... is in F-order
101
113
self .P_ = np .transpose (self .P_ , [1 , 2 , 0 ])
@@ -125,7 +137,7 @@ def fit(self, X, y):
125
137
126
138
converged = _cd_direct_ho (self .P_ , self .w_ , dataset , X_col_norms ,
127
139
y , y_pred , self .lams_ , self .degree ,
128
- self . alpha , self . beta , self .fit_linear ,
140
+ alpha , beta , self .fit_linear ,
129
141
self .fit_lower == 'explicit' , loss_obj ,
130
142
self .max_iter , self .tol , self .verbose )
131
143
if not converged :
@@ -141,9 +153,9 @@ def fit(self, X, y):
141
153
142
154
dataset = get_dataset (X , order = "c" )
143
155
_fast_fm_adagrad (self , self .w_ , self .P_ [0 ], dataset , y ,
144
- self .degree , self . alpha , self .beta ,
145
- self . fit_linear , loss_obj , self .max_iter ,
146
- self .learning_rate , self . callback , self .n_calls )
156
+ self .degree , alpha , beta , self .fit_linear ,
157
+ loss_obj , self .max_iter , self . learning_rate ,
158
+ self .callback , self .n_calls )
147
159
return self
148
160
149
161
def _get_output (self , X ):
@@ -212,9 +224,17 @@ class FactorizationMachineRegressor(_BaseFactorizationMachine,
212
224
coordinate descent. If False, the model can still capture linear
213
225
effects if ``fit_lower == 'augment'``.
214
226
215
- learning_rate: double, default: 0.001
227
+ learning_rate : double, default: 0.001
216
228
Learning rate for 'adagrad' solver. Ignored by other solvers.
217
229
230
+ scale_regularization : boolean, default: True
231
+ Whether to adjust regularization according to the number of samples.
232
+ This helps if, after tuning regularization, the model will be retrained
233
+ on more data.
234
+
235
+ If set, the loss optimized is mean_i(l_i) + 0.5 || params || ^2
236
+ If not set, the loss becomes sum_i(l_i) + || params || ^ 2
237
+
218
238
solver : {'cd'|'adagrad'}, default: 'cd'
219
239
- 'cd': Uses a coordinate descent solver. Currently limited to
220
240
degree=3.
@@ -292,14 +312,15 @@ class FactorizationMachineRegressor(_BaseFactorizationMachine,
292
312
"""
293
313
def __init__ (self , degree = 2 , n_components = 2 , alpha = 1 , beta = 1 , tol = 1e-6 ,
294
314
fit_lower = 'explicit' , fit_linear = True , learning_rate = 0.001 ,
295
- solver = 'cd' , warm_start = False , init_lambdas = 'ones' ,
296
- max_iter = 10000 , verbose = False , callback = None , n_calls = 100 ,
297
- random_state = None ):
315
+ scale_regularization = True , solver = 'cd' , warm_start = False ,
316
+ init_lambdas = 'ones' , max_iter = 10000 , verbose = False ,
317
+ callback = None , n_calls = 100 , random_state = None ):
298
318
299
319
super (FactorizationMachineRegressor , self ).__init__ (
300
320
degree , 'squared' , n_components , alpha , beta , tol , fit_lower ,
301
- fit_linear , learning_rate , solver , warm_start , init_lambdas ,
302
- max_iter , verbose , callback , n_calls , random_state )
321
+ fit_linear , learning_rate , scale_regularization , solver ,
322
+ warm_start , init_lambdas , max_iter , verbose , callback , n_calls ,
323
+ random_state )
303
324
304
325
305
326
class FactorizationMachineClassifier (_BaseFactorizationMachine ,
@@ -355,9 +376,17 @@ class FactorizationMachineClassifier(_BaseFactorizationMachine,
355
376
coordinate descent. If False, the model can still capture linear
356
377
effects if ``fit_lower == 'augment'``.
357
378
358
- learning_rate: double, default: 0.001
379
+ learning_rate : double, default: 0.001
359
380
Learning rate for 'adagrad' solver. Ignored by other solvers.
360
381
382
+ scale_regularization : boolean, default: True
383
+ Whether to adjust regularization according to the number of samples.
384
+ This helps if, after tuning regularization, the model will be retrained
385
+ on more data.
386
+
387
+ If set, the loss optimized is mean_i(l_i) + 0.5 || params || ^2
388
+ If not set, the loss becomes sum_i(l_i) + || params || ^ 2
389
+
361
390
solver : {'cd'|'adagrad'}, default: 'cd'
362
391
- 'cd': Uses a coordinate descent solver. Currently limited to
363
392
degree=3.
@@ -436,11 +465,12 @@ class FactorizationMachineClassifier(_BaseFactorizationMachine,
436
465
437
466
def __init__ (self , degree = 2 , loss = 'squared_hinge' , n_components = 2 , alpha = 1 ,
438
467
beta = 1 , tol = 1e-6 , fit_lower = 'explicit' , fit_linear = True ,
439
- learning_rate = 0.001 , solver = 'cd' , warm_start = False ,
440
- init_lambdas = 'ones' , max_iter = 10000 , verbose = False ,
441
- callback = None , n_calls = 100 , random_state = None ):
468
+ learning_rate = 0.001 , scale_regularization = True , solver = 'cd' ,
469
+ warm_start = False , init_lambdas = 'ones' , max_iter = 10000 ,
470
+ verbose = False , callback = None , n_calls = 100 , random_state = None ):
442
471
443
472
super (FactorizationMachineClassifier , self ).__init__ (
444
473
degree , loss , n_components , alpha , beta , tol , fit_lower ,
445
- fit_linear , learning_rate , solver , warm_start , init_lambdas ,
446
- max_iter , verbose , callback , n_calls , random_state )
474
+ fit_linear , learning_rate , scale_regularization , solver ,
475
+ warm_start , init_lambdas , max_iter , verbose , callback , n_calls ,
476
+ random_state )
0 commit comments