update diff function in smoothing spline; remove leaf node update option in simtree; update version 0.1.4

[zebinyang] · [zebinyang] · commit 11776cdf24f8 · 2021-09-02T00:15:29.000+08:00
diff --git a/setup.py b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup
 
 setup(name='simtree',
-      version='0.1.3',
+      version='0.1.4',
       description='Single-index model tree',
       url='https://github.com/ZebinYang/SIMTree',
       author='Zebin Yang',
diff --git a/simtree/__init__.py b/simtree/__init__.py
@@ -8,5 +8,5 @@
         "SIMTreeRegressor", "SIMTreeClassifier",
         "CustomMobTreeRegressor", "CustomMobTreeClassifier"]
 
-__version__ = '0.1.3'
+__version__ = '0.1.4'
 __author__ = 'Zebin Yang'
diff --git a/simtree/sim.py b/simtree/sim.py
@@ -112,164 +112,6 @@ def decision_function(self, x):
         pred = self.shape_fit_.decision_function(xb)
         return pred
 
-    def fit_middle_update_adam(self, x, y, val_ratio=0.2, tol=0.0001,
-                  max_middle_iter=100, n_middle_iter_no_change=5, max_inner_iter=100, n_inner_iter_no_change=5,
-                  batch_size=100, learning_rate=1e-3, beta_1=0.9, beta_2=0.999, stratify=True, verbose=False):
-
-        """fine tune the fitted Sim model using middle update method (adam)
-
-        Parameters
-        ---------
-        x : array-like of shape (n_samples, n_features)
-            containing the input dataset
-        y : array-like of shape (n_samples,)
-            containing target values
-        val_ratio : float, optional, default=0.2
-            the split ratio for validation set
-        tol : float, optional, default=0.0001
-            the tolerance for early stopping
-        max_middle_iter : int, optional, default=3
-            the maximal number of middle iteration
-        n_middle_iter_no_change : int, optional, default=3
-            the tolerance of non-improving middle iterations
-        max_inner_iter : int, optional, default=100
-            the maximal number of inner iteration (epoch) for "adam" optimizer
-        n_inner_iter_no_change : int, optional, default=5
-            the tolerance of non-improving inner iteration for adam optimizer
-        batch_size : int, optional, default=100
-            the batch_size for adam optimizer
-        learning_rate : float, optional, default=1e-4
-            the learning rate for adam optimizer
-        beta_1 : float, optional, default=0.9
-            the beta_1 parameter for adam optimizer
-        beta_2 : float, optional, default=0.999
-            the beta_1 parameter for adam optimizer
-        stratify : bool, optional, default=True
-            whether to stratify the target variable when splitting the validation set
-        verbose : bool, optional, default=False
-            whether to show the training history
-        """
-            
-        x, y = self._validate_input(x, y)
-        n_samples = x.shape[0]
-        if is_regressor(self):
-            idx1, idx2 = train_test_split(np.arange(n_samples), test_size=val_ratio,
-                                          random_state=self.random_state)
-            tr_x, tr_y, val_x, val_y = x[idx1], y[idx1], x[idx2], y[idx2]
-        elif is_classifier(self):
-            if stratify:
-                idx1, idx2 = train_test_split(np.arange(n_samples),test_size=val_ratio, stratify=y, random_state=self.random_state)
-            else:
-                idx1, idx2 = train_test_split(np.arange(n_samples),test_size=val_ratio, random_state=self.random_state)
-            tr_x, tr_y, val_x, val_y = x[idx1], y[idx1], x[idx2], y[idx2]
-        
-        batch_size = min(batch_size, tr_x.shape[0])
-        val_xb = np.dot(val_x, self.beta_)
-        if is_regressor(self):
-            val_pred = self.shape_fit_.predict(val_xb)
-            val_loss = self.shape_fit_.get_loss(val_y, val_pred)
-        elif is_classifier(self):
-            val_pred = self.shape_fit_.predict_proba(val_xb)[:, 1]
-            val_loss = self.shape_fit_.get_loss(val_y, val_pred)
-
-        self_copy = deepcopy(self)
-        no_middle_iter_change = 0
-        val_loss_middle_iter_best = val_loss
-        for middle_iter in range(max_middle_iter):
-
-            m_t = 0 # moving average of the gradient
-            v_t = 0 # moving average of the gradient square
-            num_updates = 0
-            no_inner_iter_change = 0
-            theta_0 = self_copy.beta_ 
-            train_size = tr_x.shape[0]
-            val_loss_inner_iter_best = np.inf
-            for inner_iter in range(max_inner_iter):
-
-                shuffle_index = np.arange(tr_x.shape[0])
-                np.random.shuffle(shuffle_index)
-                tr_x = tr_x[shuffle_index]
-                tr_y = tr_y[shuffle_index]
-
-                for iterations in range(train_size // batch_size):
-
-                    num_updates += 1
-                    offset = (iterations * batch_size) % train_size
-                    batch_xx = tr_x[offset:(offset + batch_size), :]
-                    batch_yy = tr_y[offset:(offset + batch_size)]
-
-                    xb = np.dot(batch_xx, theta_0)
-                    if is_regressor(self_copy):
-                        r = batch_yy - self_copy.shape_fit_.predict(xb).ravel()
-                    elif is_classifier(self_copy):
-                        r = batch_yy - self_copy.shape_fit_.predict_proba(xb)[:, 1]
-                    
-                    # gradient
-                    dfxb = self_copy.shape_fit_.diff(xb, order=1).ravel()
-                    g_t = np.average((- dfxb * r).reshape(-1, 1) * batch_xx, axis=0).reshape(-1, 1)
-
-                    # update the moving average 
-                    m_t = beta_1 * m_t + (1 - beta_1) * g_t
-                    v_t = beta_2 * v_t + (1 - beta_2) * (g_t * g_t)
-                    # calculates the bias-corrected estimates
-                    m_cap = m_t / (1 - (beta_1 ** (num_updates)))  
-                    v_cap = v_t / (1 - (beta_2 ** (num_updates)))
-                    # updates the parameters
-                    theta_0 = theta_0 - (learning_rate * m_cap) / (np.sqrt(v_cap) + 1e-8)
-
-                # validation loss
-                val_xb = np.dot(val_x, theta_0)
-                if is_regressor(self_copy):
-                    val_pred = self_copy.shape_fit_.predict(val_xb)
-                    val_loss = self_copy.shape_fit_.get_loss(val_y, val_pred)
-                elif is_classifier(self_copy):
-                    val_pred = self_copy.shape_fit_.predict_proba(val_xb)[:, 1]
-                    val_loss = self_copy.shape_fit_.get_loss(val_y, val_pred)
-                if verbose:
-                    print("Middle iter:", middle_iter + 1, "Inner iter:", inner_iter + 1, "with validation loss:", np.round(val_loss, 5))
-                # stop criterion
-                if val_loss > val_loss_inner_iter_best - tol:
-                    no_inner_iter_change += 1
-                else:
-                    no_inner_iter_change = 0
-                if val_loss < val_loss_inner_iter_best:
-                    val_loss_inner_iter_best = val_loss
-                
-                if no_inner_iter_change >= n_inner_iter_no_change:
-                    break
-  
-            ## normalization
-            if np.linalg.norm(theta_0) > 0:
-                theta_0 = theta_0 / np.linalg.norm(theta_0)
-                if (theta_0[np.argmax(np.abs(theta_0))] < 0):
-                    theta_0 = - theta_0
-
-            # ridge update
-            self_copy.beta_ = theta_0
-            tr_xb = np.dot(tr_x, self_copy.beta_)
-            self_copy._estimate_shape(tr_xb, tr_y, np.min(tr_xb), np.max(tr_xb))
-            
-            val_xb = np.dot(val_x, self_copy.beta_)
-            if is_regressor(self_copy):
-                val_pred = self_copy.shape_fit_.predict(val_xb)
-                val_loss = self_copy.shape_fit_.get_loss(val_y, val_pred)
-            elif is_classifier(self_copy):
-                val_pred = self_copy.shape_fit_.predict_proba(val_xb)[:, 1]
-                val_loss = self_copy.shape_fit_.get_loss(val_y, val_pred)
-
-            if val_loss > val_loss_middle_iter_best - tol:
-                no_middle_iter_change += 1
-            else:
-                no_middle_iter_change = 0
-            if val_loss < val_loss_middle_iter_best:
-                self.beta_ = self_copy.beta_
-                self.shape_fit_ = self_copy.shape_fit_
-                val_loss_middle_iter_best = val_loss
-            if no_middle_iter_change >= n_middle_iter_no_change:
-                break
-                
-        self = deepcopy(self_copy)
-
     def visualize(self):
 
         """draw the fitted projection indices and ridge function
diff --git a/simtree/simtree.py b/simtree/simtree.py
@@ -28,7 +28,7 @@ class SIMTree(metaclass=ABCMeta):
 
     def __init__(self, max_depth=2, min_samples_leaf=50, min_impurity_decrease=0, feature_names=None,
                  split_features=None, n_screen_grid=5, n_feature_search=10, n_split_grid=20,
-                 degree=3, knot_num=30, reg_lambda=0, reg_gamma=1e-5, leaf_update=False, random_state=0):
+                 degree=3, knot_num=30, reg_lambda=0, reg_gamma=1e-5, random_state=0):
 
         super(SIMTree, self).__init__(max_depth=max_depth,
                                  min_samples_leaf=min_samples_leaf,
@@ -43,7 +43,6 @@ def __init__(self, max_depth=2, min_samples_leaf=50, min_impurity_decrease=0, fe
         self.knot_num = knot_num
         self.reg_gamma = reg_gamma
         self.reg_lambda = reg_lambda
-        self.leaf_update = leaf_update
 
     def _validate_hyperparameters(self):
 
@@ -286,7 +285,7 @@ class SIMTreeRegressor(SIMTree, MoBTreeRegressor, RegressorMixin):
 
     def __init__(self, max_depth=2, min_samples_leaf=50, min_impurity_decrease=0, feature_names=None,
                  split_features=None, n_screen_grid=5, n_feature_search=10, n_split_grid=20,
-                 degree=3, knot_num=30, reg_lambda=0, reg_gamma=1e-5, leaf_update=False, random_state=0):
+                 degree=3, knot_num=30, reg_lambda=0, reg_gamma=1e-5, random_state=0):
 
         super(SIMTreeRegressor, self).__init__(max_depth=max_depth,
                                  min_samples_leaf=min_samples_leaf,
@@ -300,7 +299,6 @@ def __init__(self, max_depth=2, min_samples_leaf=50, min_impurity_decrease=0, fe
                                  knot_num=knot_num,
                                  reg_lambda=reg_lambda,
                                  reg_gamma=reg_gamma,
-                                 leaf_update=leaf_update,
                                  random_state=random_state)
 
         self.base_estimator = SimRegressor(reg_lambda=0, reg_gamma=1e-5, degree=self.degree,
@@ -321,11 +319,6 @@ def build_leaf(self, sample_indice):
                       cv=5, refit="mse", n_jobs=1, error_score=np.nan)
         grid.fit(self.x[sample_indice], self.y[sample_indice].ravel())
         best_estimator = grid.best_estimator_
-        if self.leaf_update:
-            best_estimator.fit_middle_update_adam(self.x[sample_indice], self.y[sample_indice].ravel(),
-                                      max_middle_iter=1000, n_middle_iter_no_change=10,
-                                      max_inner_iter=1000, n_inner_iter_no_change=10,
-                                      batch_size=min(int(0.2 * len(sample_indice)), 100))
         predict_func = lambda x: best_estimator.predict(x)
         best_impurity = self.get_loss(self.y[sample_indice], best_estimator.predict(self.x[sample_indice]))
         return predict_func, best_estimator, best_impurity
@@ -335,7 +328,7 @@ class SIMTreeClassifier(SIMTree, MoBTreeClassifier, ClassifierMixin):
 
     def __init__(self, max_depth=2, min_samples_leaf=50, min_impurity_decrease=0, feature_names=None,
                  split_features=None, n_screen_grid=5, n_feature_search=10, n_split_grid=20,
-                 degree=3, knot_num=30, reg_lambda=0, reg_gamma=1e-5, leaf_update=False, random_state=0):
+                 degree=3, knot_num=30, reg_lambda=0, reg_gamma=1e-5, random_state=0):
 
         super(SIMTreeClassifier, self).__init__(max_depth=max_depth,
                                  min_samples_leaf=min_samples_leaf,
@@ -349,7 +342,6 @@ def __init__(self, max_depth=2, min_samples_leaf=50, min_impurity_decrease=0, fe
                                  knot_num=knot_num,
                                  reg_lambda=reg_lambda,
                                  reg_gamma=reg_gamma,
-                                 leaf_update=leaf_update,
                                  random_state=random_state)
 
         self.base_estimator = SimClassifier(reg_lambda=0, reg_gamma=1e-5, degree=self.degree,
@@ -375,11 +367,6 @@ def build_leaf(self, sample_indice):
                           cv=5, refit="auc", n_jobs=1, error_score=np.nan)
             grid.fit(self.x[sample_indice], self.y[sample_indice].ravel())
             best_estimator = grid.best_estimator_
-            if self.leaf_update:
-                best_estimator.fit_middle_update_adam(self.x[sample_indice], self.y[sample_indice].ravel(),
-                                          max_middle_iter=100, n_middle_iter_no_change=5,
-                                          max_inner_iter=100, n_inner_iter_no_change=5,
-                                          batch_size=min(int(0.2 * len(sample_indice)), 100))
             predict_func = lambda x: best_estimator.predict_proba(x)[:, 1]
             best_impurity = self.get_loss(self.y[sample_indice], best_estimator.predict_proba(self.x[sample_indice])[:, 1])
         return predict_func, best_estimator, best_impurity
diff --git a/simtree/smspline.py b/simtree/smspline.py
@@ -54,7 +54,7 @@ def _estimate_density(self, x):
 
         self.density_, self.bins_ = np.histogram(x, bins=10, density=True)
 
-    def diff(self, x, order=1):
+    def diff(self, x, order=1, delta=1e-5):
 
         """method to calculate derivatives of the fitted adaptive spline to the input
 
@@ -63,25 +63,23 @@ def diff(self, x, order=1):
         x : array-like of shape (n_samples, 1)
             containing the input dataset
         order : int
-            order of derivative
+            order of derivative, not larger than 2
+        delta : float
+            the small value used for calculating finite difference
         """
+        
         if order > self.degree:
             raise Exception("order should not be greater than degree")
         if isinstance(self.sm_, (np.ndarray, np.int, int, np.floating, float)):
             derivative = np.zeros((x.shape[0], 1))
-        elif "modelspec" in self.sm_.names:
-            modelspec = self.sm_[int(np.where(self.sm_.names == "modelspec")[0][0])]
-            knots = np.array(modelspec[0])
-            coefs = np.array(modelspec[11]).reshape(-1, 1)
-            basis = bigsplines.ssBasis((x - self.xmin) / (self.xmax - self.xmin), knots, m=1 if self.degree==1 else 2, d=order,
-                               xmin=self.xmin, xmax=self.xmax, periodic=False, intercept=True)
-            derivative = np.dot(basis[0], coefs).ravel()
         else:
-            knots = np.array(self.sm_[12])
-            coefs = np.array(self.sm_[15]).reshape(-1, 1)
-            basis = bigsplines.ssBasis((x - self.xmin) / (self.xmax - self.xmin), knots, m=1 if self.degree==1 else 2, d=order,
-                               xmin=0, xmax=1, periodic=False, intercept=True)
-            derivative = np.dot(basis[0], coefs).ravel()
+            if order == 1:
+                derivative = (self.decision_function(x + delta / 2) - self.decision_function(x - delta / 2)) / delta
+            elif order == 2:
+                derivative = (self.decision_function(x + delta) + self.decision_function(x - delta) 
+                          - 2 * self.decision_function(x)) / delta ** 2
+            else:
+                raise Exception("higher order derivatives is not supported now.")
         return derivative
 
     def visualize(self):