mdh266 · mdh266 · Jun 5, 2021 · Jun 5, 2021 · Jun 5, 2021 · Jun 5, 2021
diff --git a/.travis.yml b/.travis.yml
@@ -1,15 +1,13 @@
 language: python
 python:
-- "3.6"
 - "3.7"
-- "3.8"
 before_install:
 - pip install pytest==5.4.3
 - pip install pytest-cov==2.10.0
 - pip install codecov==2.1.8
 install:
-- python setup.py install
+- pip install .
 script:
-- pytest --cov
+- python setup.py test 
 after_success:
 - codecov
diff --git a/README.md b/README.md
@@ -80,17 +80,17 @@ Uses the `setup.py` generated by [PyScaffold](https://pypi.org/project/PyScaffol
 
 ## Test
 -----------------
-Uses the `setup.py` generated by [PyScaffold](https://pypi.org/project/PyScaffold/):
+Uses the [pytest](https://docs.pytest.org/) and [pytest-cov](https://pypi.org/project/pytest-cov/):
 
-    python setup.py test
+    pytest 
 
 ## Dependencies
 --------------
 Dependencies are minimal:
 
-    - Python (>= 3.6)
-    - [Scikit-Learn](https://scikit-learn.org/stable/) (>=0.23)
-    - [Pandas](https://pandas.pydata.org/) (>=1.0)
+- [Python (>= 3.6)](https://www.python.org/downloads/release/python-360/)
+- [Scikit-Learn (>=0.23)](https://scikit-learn.org/stable/)
+- [Pandas (>=1.0)](https://pandas.pydata.org/) 
 
 
 ## References

diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,5 @@
+[pytest]
+testpaths =
+    src
+    tests
+addopts = --cov --cov-append
diff --git a/setup.cfg b/setup.cfg
@@ -33,6 +33,7 @@ setup_requires = pyscaffold>=3.2a0,<3.3a0
 install_requires =
     pandas>=1.0
     scikit-learn>=0.23
+    black==21.5b2
 
 # The usage of test_requires is discouraged, see `Dependency Management` docs
 tests_require =

diff --git a/src/randomforests/Forest.py b/src/randomforests/Forest.py
@@ -1,5 +1,6 @@
 import numpy as np
 
+
 class RandomForest:
     """
     A Random Forest base class.
@@ -29,12 +30,12 @@ class RandomForest:
 
     def __init__(self, n_trees=10, max_depth=2, min_size=1):
         self.max_depth = max_depth
-        self.min_size  = min_size
-        self.n_trees   = n_trees
-        self.cost      = None
-        self.trees     = None
+        self.min_size = min_size
+        self.n_trees = n_trees
+        self.cost = None
+        self.trees = None
 
-    def _subsample(self, dataset : np.ndarray) -> np.ndarray:
+    def _subsample(self, dataset: np.ndarray) -> np.ndarray:
         """
         This function returns a bootstrapped version of the dataset which
         has the same number of rows.
@@ -50,20 +51,20 @@ def _subsample(self, dataset : np.ndarray) -> np.ndarray:
 
         number_of_rows = dataset.shape[0]
         sample_of_rows = number_of_rows
-        random_indices = np.random.choice(number_of_rows,
-                                          size=sample_of_rows,
-                                          replace=True)
-        return dataset[random_indices,:]
+        random_indices = np.random.choice(
+            number_of_rows, size=sample_of_rows, replace=True
+        )
+        return dataset[random_indices, :]
 
     def set_params(self, **parameters):
         for parameter, value in parameters.items():
             setattr(self, parameter, value)
         return self
 
-
     def get_params(self, deep=True):
-        return {"max_depth" : self.max_depth,
-                "min_size"  : self.min_size,
-                "cost"      : self.cost,
-                "n_trees"   : self.n_trees}
-
+        return {
+            "max_depth": self.max_depth,
+            "min_size": self.min_size,
+            "cost": self.cost,
+            "n_trees": self.n_trees,
+        }
diff --git a/src/randomforests/ForestClassifier.py b/src/randomforests/ForestClassifier.py
@@ -10,7 +10,8 @@
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.metrics import accuracy_score
 
-class RandomForestClassifier (BaseEstimator, ClassifierMixin, RandomForest):
+
+class RandomForestClassifier(BaseEstimator, ClassifierMixin, RandomForest):
     """
     A random forest classification model that extends the abstract base class
     of random forest.
@@ -33,26 +34,28 @@ class RandomForestClassifier (BaseEstimator, ClassifierMixin, RandomForest):
         The cost function
     """
 
-    def __init__(self, n_trees : int = 10, max_depth : int =2, min_size : int =1, cost : str ='gini'):
+    def __init__(
+        self,
+        n_trees: int = 10,
+        max_depth: int = 2,
+        min_size: int = 1,
+        cost: str = "gini",
+    ):
         """
         Constructor for random forest classifier. This mainly just initialize
         the attributes of the class by calling the base class constructor.
         However, here is where it is the cost function string is checked
         to make sure it either using 'gini', otherwise an error is thrown.
 
         """
-        super().__init__(n_trees   = n_trees,
-                         max_depth = max_depth,
-                         min_size  = min_size)
+        super().__init__(n_trees=n_trees, max_depth=max_depth, min_size=min_size)
 
-        if cost == 'gini':
-            self.cost  = "gini"
+        if cost == "gini":
+            self.cost = "gini"
         else:
-            raise NameError('Not valid cost function')
-
-
+            raise NameError("Not valid cost function")
 
-    def fit(self, X, y = None):
+    def fit(self, X, y=None):
         """
         Fit the random forest to the training set train.
 
@@ -64,15 +67,15 @@ def fit(self, X, y = None):
         """
 
         n_features = round(sqrt(X.shape[1]))
-        dataset    = _make_dataset(X,y)
-        self.trees = [self._bootstrap_tree(dataset    = dataset,
-                                           n_features = n_features)
-                      for i in range(self.n_trees)]
+        dataset = _make_dataset(X, y)
+        self.trees = [
+            self._bootstrap_tree(dataset=dataset, n_features=n_features)
+            for i in range(self.n_trees)
+        ]
 
         return self
 
-
-    def predict(self, x : pd.DataFrame) -> int:
+    def predict(self, x: pd.DataFrame) -> int:
         """
         Predict the class that this sample datapoint belongs to.
 
@@ -94,7 +97,6 @@ def predict(self, x : pd.DataFrame) -> int:
 
         return sp.stats.mode(preds)[0][0]
 
-
     def score(self, X=None, y=None):
         """
         Returns the accuracy of the model
@@ -107,12 +109,14 @@ def score(self, X=None, y=None):
 
         """
 
-        return accuracy_score(y,self.predict(X))
+        return accuracy_score(y, self.predict(X))
 
-    def _bootstrap_tree(self, dataset : np.ndarray, n_features : int) -> DecisionTreeClassifier:
+    def _bootstrap_tree(
+        self, dataset: np.ndarray, n_features: int
+    ) -> DecisionTreeClassifier:
 
         sample = self._subsample(dataset)
-        tree   = DecisionTreeClassifier(max_depth  = self.max_depth,
-                                        min_size   = self.min_size,
-                                        n_features = n_features)
-        return tree.fit(sample[:,:-1],sample[:,-1])
+        tree = DecisionTreeClassifier(
+            max_depth=self.max_depth, min_size=self.min_size, n_features=n_features
+        )
+        return tree.fit(sample[:, :-1], sample[:, -1])
diff --git a/src/randomforests/ForestRegressor.py b/src/randomforests/ForestRegressor.py
@@ -9,7 +9,8 @@
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.metrics import mean_squared_error
 
-class RandomForestRegressor (BaseEstimator, ClassifierMixin, RandomForest):
+
+class RandomForestRegressor(BaseEstimator, ClassifierMixin, RandomForest):
     """
     A random forest regression model that extends the abstract base class
     of random forest.
@@ -32,25 +33,28 @@ class RandomForestRegressor (BaseEstimator, ClassifierMixin, RandomForest):
         The cost function
     """
 
-    def __init__(self, n_trees : int = 10, max_depth : int =2, min_size : int =1, cost : str = "mse"):
+    def __init__(
+        self,
+        n_trees: int = 10,
+        max_depth: int = 2,
+        min_size: int = 1,
+        cost: str = "mse",
+    ):
         """
         Constructor for random forest regressor. This mainly just initialize
         the attributes of the class by calling the base class constructor.
         However, here is where it is the cost function string is checked
         to make sure it either using 'mse', otherwise an error is thrown.
 
         """
-        super().__init__(n_trees   = n_trees,
-                         max_depth = max_depth,
-                         min_size  = min_size)
+        super().__init__(n_trees=n_trees, max_depth=max_depth, min_size=min_size)
 
-        if cost == 'mse':
-            self.cost  = "mse"
+        if cost == "mse":
+            self.cost = "mse"
         else:
-            raise NameError('Not valid cost function')
-
+            raise NameError("Not valid cost function")
 
-    def fit(self, X, y = None):
+    def fit(self, X, y=None):
         """
         Fit the random forest to the training set train.
 
@@ -70,14 +74,15 @@ def fit(self, X, y = None):
         """
 
         n_features = round(sqrt(X.shape[1]))
-        dataset    = _make_dataset(X,y)
-        self.trees = [self._bootstrap_tree(dataset    = dataset,
-                                           n_features = n_features)
-                      for i in range(self.n_trees)]
+        dataset = _make_dataset(X, y)
+        self.trees = [
+            self._bootstrap_tree(dataset=dataset, n_features=n_features)
+            for i in range(self.n_trees)
+        ]
 
         return self
 
-    def predict(self, x : pd.DataFrame) -> int:
+    def predict(self, x: pd.DataFrame) -> int:
         """
         Predict the value for this sample datapoint
 
@@ -94,10 +99,10 @@ def predict(self, x : pd.DataFrame) -> int:
             rows = x.to_numpy()
         else:
             rows = x
-        
+
         preds = np.vstack([tree.predict(rows) for tree in self.trees])
 
-        return np.mean(preds,axis=0)
+        return np.mean(preds, axis=0)
 
     def score(self, X=None, y=None):
         """
@@ -114,12 +119,14 @@ def score(self, X=None, y=None):
         float
         """
 
-        return mean_squared_error(y,self.predict(X))
+        return mean_squared_error(y, self.predict(X))
 
-    def _bootstrap_tree(self, dataset : np.ndarray, n_features : int) -> DecisionTreeRegressor:
+    def _bootstrap_tree(
+        self, dataset: np.ndarray, n_features: int
+    ) -> DecisionTreeRegressor:
 
         sample = self._subsample(dataset)
-        tree   = DecisionTreeRegressor(max_depth  = self.max_depth,
-                                       min_size   = self.min_size,
-                                       n_features = n_features)
-        return tree.fit(sample[:,:-1],sample[:,-1])
+        tree = DecisionTreeRegressor(
+            max_depth=self.max_depth, min_size=self.min_size, n_features=n_features
+        )
+        return tree.fit(sample[:, :-1], sample[:, -1])