@@ -112,164 +112,6 @@ def decision_function(self, x):
112112 pred = self .shape_fit_ .decision_function (xb )
113113 return pred
114114
115- def fit_middle_update_adam (self , x , y , val_ratio = 0.2 , tol = 0.0001 ,
116- max_middle_iter = 100 , n_middle_iter_no_change = 5 , max_inner_iter = 100 , n_inner_iter_no_change = 5 ,
117- batch_size = 100 , learning_rate = 1e-3 , beta_1 = 0.9 , beta_2 = 0.999 , stratify = True , verbose = False ):
118-
119- """fine tune the fitted Sim model using middle update method (adam)
120-
121- Parameters
122- ---------
123- x : array-like of shape (n_samples, n_features)
124- containing the input dataset
125- y : array-like of shape (n_samples,)
126- containing target values
127- val_ratio : float, optional, default=0.2
128- the split ratio for validation set
129- tol : float, optional, default=0.0001
130- the tolerance for early stopping
131- max_middle_iter : int, optional, default=3
132- the maximal number of middle iteration
133- n_middle_iter_no_change : int, optional, default=3
134- the tolerance of non-improving middle iterations
135- max_inner_iter : int, optional, default=100
136- the maximal number of inner iteration (epoch) for "adam" optimizer
137- n_inner_iter_no_change : int, optional, default=5
138- the tolerance of non-improving inner iteration for adam optimizer
139- batch_size : int, optional, default=100
140- the batch_size for adam optimizer
141- learning_rate : float, optional, default=1e-4
142- the learning rate for adam optimizer
143- beta_1 : float, optional, default=0.9
144- the beta_1 parameter for adam optimizer
145- beta_2 : float, optional, default=0.999
146- the beta_1 parameter for adam optimizer
147- stratify : bool, optional, default=True
148- whether to stratify the target variable when splitting the validation set
149- verbose : bool, optional, default=False
150- whether to show the training history
151- """
152-
153- x , y = self ._validate_input (x , y )
154- n_samples = x .shape [0 ]
155- if is_regressor (self ):
156- idx1 , idx2 = train_test_split (np .arange (n_samples ), test_size = val_ratio ,
157- random_state = self .random_state )
158- tr_x , tr_y , val_x , val_y = x [idx1 ], y [idx1 ], x [idx2 ], y [idx2 ]
159- elif is_classifier (self ):
160- if stratify :
161- idx1 , idx2 = train_test_split (np .arange (n_samples ),test_size = val_ratio , stratify = y , random_state = self .random_state )
162- else :
163- idx1 , idx2 = train_test_split (np .arange (n_samples ),test_size = val_ratio , random_state = self .random_state )
164- tr_x , tr_y , val_x , val_y = x [idx1 ], y [idx1 ], x [idx2 ], y [idx2 ]
165-
166- batch_size = min (batch_size , tr_x .shape [0 ])
167- val_xb = np .dot (val_x , self .beta_ )
168- if is_regressor (self ):
169- val_pred = self .shape_fit_ .predict (val_xb )
170- val_loss = self .shape_fit_ .get_loss (val_y , val_pred )
171- elif is_classifier (self ):
172- val_pred = self .shape_fit_ .predict_proba (val_xb )[:, 1 ]
173- val_loss = self .shape_fit_ .get_loss (val_y , val_pred )
174-
175- self_copy = deepcopy (self )
176- no_middle_iter_change = 0
177- val_loss_middle_iter_best = val_loss
178- for middle_iter in range (max_middle_iter ):
179-
180- m_t = 0 # moving average of the gradient
181- v_t = 0 # moving average of the gradient square
182- num_updates = 0
183- no_inner_iter_change = 0
184- theta_0 = self_copy .beta_
185- train_size = tr_x .shape [0 ]
186- val_loss_inner_iter_best = np .inf
187- for inner_iter in range (max_inner_iter ):
188-
189- shuffle_index = np .arange (tr_x .shape [0 ])
190- np .random .shuffle (shuffle_index )
191- tr_x = tr_x [shuffle_index ]
192- tr_y = tr_y [shuffle_index ]
193-
194- for iterations in range (train_size // batch_size ):
195-
196- num_updates += 1
197- offset = (iterations * batch_size ) % train_size
198- batch_xx = tr_x [offset :(offset + batch_size ), :]
199- batch_yy = tr_y [offset :(offset + batch_size )]
200-
201- xb = np .dot (batch_xx , theta_0 )
202- if is_regressor (self_copy ):
203- r = batch_yy - self_copy .shape_fit_ .predict (xb ).ravel ()
204- elif is_classifier (self_copy ):
205- r = batch_yy - self_copy .shape_fit_ .predict_proba (xb )[:, 1 ]
206-
207- # gradient
208- dfxb = self_copy .shape_fit_ .diff (xb , order = 1 ).ravel ()
209- g_t = np .average ((- dfxb * r ).reshape (- 1 , 1 ) * batch_xx , axis = 0 ).reshape (- 1 , 1 )
210-
211- # update the moving average
212- m_t = beta_1 * m_t + (1 - beta_1 ) * g_t
213- v_t = beta_2 * v_t + (1 - beta_2 ) * (g_t * g_t )
214- # calculates the bias-corrected estimates
215- m_cap = m_t / (1 - (beta_1 ** (num_updates )))
216- v_cap = v_t / (1 - (beta_2 ** (num_updates )))
217- # updates the parameters
218- theta_0 = theta_0 - (learning_rate * m_cap ) / (np .sqrt (v_cap ) + 1e-8 )
219-
220- # validation loss
221- val_xb = np .dot (val_x , theta_0 )
222- if is_regressor (self_copy ):
223- val_pred = self_copy .shape_fit_ .predict (val_xb )
224- val_loss = self_copy .shape_fit_ .get_loss (val_y , val_pred )
225- elif is_classifier (self_copy ):
226- val_pred = self_copy .shape_fit_ .predict_proba (val_xb )[:, 1 ]
227- val_loss = self_copy .shape_fit_ .get_loss (val_y , val_pred )
228- if verbose :
229- print ("Middle iter:" , middle_iter + 1 , "Inner iter:" , inner_iter + 1 , "with validation loss:" , np .round (val_loss , 5 ))
230- # stop criterion
231- if val_loss > val_loss_inner_iter_best - tol :
232- no_inner_iter_change += 1
233- else :
234- no_inner_iter_change = 0
235- if val_loss < val_loss_inner_iter_best :
236- val_loss_inner_iter_best = val_loss
237-
238- if no_inner_iter_change >= n_inner_iter_no_change :
239- break
240-
241- ## normalization
242- if np .linalg .norm (theta_0 ) > 0 :
243- theta_0 = theta_0 / np .linalg .norm (theta_0 )
244- if (theta_0 [np .argmax (np .abs (theta_0 ))] < 0 ):
245- theta_0 = - theta_0
246-
247- # ridge update
248- self_copy .beta_ = theta_0
249- tr_xb = np .dot (tr_x , self_copy .beta_ )
250- self_copy ._estimate_shape (tr_xb , tr_y , np .min (tr_xb ), np .max (tr_xb ))
251-
252- val_xb = np .dot (val_x , self_copy .beta_ )
253- if is_regressor (self_copy ):
254- val_pred = self_copy .shape_fit_ .predict (val_xb )
255- val_loss = self_copy .shape_fit_ .get_loss (val_y , val_pred )
256- elif is_classifier (self_copy ):
257- val_pred = self_copy .shape_fit_ .predict_proba (val_xb )[:, 1 ]
258- val_loss = self_copy .shape_fit_ .get_loss (val_y , val_pred )
259-
260- if val_loss > val_loss_middle_iter_best - tol :
261- no_middle_iter_change += 1
262- else :
263- no_middle_iter_change = 0
264- if val_loss < val_loss_middle_iter_best :
265- self .beta_ = self_copy .beta_
266- self .shape_fit_ = self_copy .shape_fit_
267- val_loss_middle_iter_best = val_loss
268- if no_middle_iter_change >= n_middle_iter_no_change :
269- break
270-
271- self = deepcopy (self_copy )
272-
273115 def visualize (self ):
274116
275117 """draw the fitted projection indices and ridge function
0 commit comments