alteryx
diff --git a/‎checkmates/data_checks/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎checkmates/data_checks/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎checkmates/data_checks/checks/invalid_target_data_check.py‎
Lines changed: 448 additions & 0 deletions b/‎checkmates/data_checks/checks/invalid_target_data_check.py‎
Lines changed: 448 additions & 0 deletions
diff --git a/‎checkmates/exceptions/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎checkmates/exceptions/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎checkmates/exceptions/exceptions.py‎
Lines changed: 7 additions & 0 deletions b/‎checkmates/exceptions/exceptions.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎checkmates/objectives/__init__.py‎
Lines changed: 11 additions & 0 deletions b/‎checkmates/objectives/__init__.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎checkmates/objectives/objective_base.py‎
Lines changed: 217 additions & 0 deletions b/‎checkmates/objectives/objective_base.py‎
Lines changed: 217 additions & 0 deletions
diff --git a/‎checkmates/objectives/regression_objective.py‎
Lines changed: 10 additions & 0 deletions b/‎checkmates/objectives/regression_objective.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎checkmates/objectives/standard_metrics.py‎
Lines changed: 100 additions & 0 deletions b/‎checkmates/objectives/standard_metrics.py‎
Lines changed: 100 additions & 0 deletions
@@ -35,6 +35,7 @@
 from checkmates.data_checks.checks.sparsity_data_check import SparsityDataCheck
 from checkmates.data_checks.checks.datetime_format_data_check import DateTimeFormatDataCheck
 from checkmates.data_checks.checks.multicollinearity_data_check import MulticollinearityDataCheck
+from checkmates.data_checks.checks.invalid_target_data_check import InvalidTargetDataCheck
 
 
 
 
@@ -3,4 +3,6 @@
     DataCheckInitError,
     MissingComponentError,
     ValidationErrorCode,
+    ObjectiveCreationError,
+    ObjectiveNotFoundError
 )
@@ -7,6 +7,13 @@ class MissingComponentError(Exception):
 
     pass
 
+class ObjectiveNotFoundError(Exception):
+    """Exception to raise when specified objective does not exist."""
+
+    pass
+
+class ObjectiveCreationError(Exception):
+    """Exception when get_objective tries to instantiate an objective and required args are not provided."""
 
 class DataCheckInitError(Exception):
     """Exception raised when a data check can't initialize with the parameters given."""
 
@@ -0,0 +1,11 @@
+from checkmates.objectives.objective_base import ObjectiveBase
+from checkmates.objectives.regression_objective import RegressionObjective
+
+from checkmates.objectives.utils import get_objective
+from checkmates.objectives.utils import get_default_primary_search_objective
+from checkmates.objectives.utils import get_non_core_objectives
+from checkmates.objectives.utils import get_core_objectives
+
+
+from checkmates.objectives.standard_metrics import RootMeanSquaredLogError
+from checkmates.objectives.standard_metrics import MeanSquaredLogError
@@ -0,0 +1,217 @@
+"""Base class for all objectives."""
+from abc import ABC, abstractmethod
+
+import numpy as np
+import pandas as pd
+
+from checkmates.problem_types import handle_problem_types
+from checkmates.utils import classproperty
+
+
+class ObjectiveBase(ABC):
+    """Base class for all objectives."""
+
+    problem_types = None
+
+    @property
+    @classmethod
+    @abstractmethod
+    def name(cls):
+        """Returns a name describing the objective."""
+
+    @property
+    @classmethod
+    @abstractmethod
+    def greater_is_better(cls):
+        """Returns a boolean determining if a greater score indicates better model performance."""
+
+    @property
+    @classmethod
+    @abstractmethod
+    def score_needs_proba(cls):
+        """Returns a boolean determining if the score() method needs probability estimates.
+
+        This should be true for objectives which work with predicted
+        probabilities, like log loss or AUC, and false for objectives
+        which compare predicted class labels to the actual labels, like
+        F1 or correlation.
+        """
+
+    @property
+    @classmethod
+    @abstractmethod
+    def perfect_score(cls):
+        """Returns the score obtained by evaluating this objective on a perfect model."""
+
+    @property
+    @classmethod
+    @abstractmethod
+    def is_bounded_like_percentage(cls):
+        """Returns whether this objective is bounded between 0 and 1, inclusive."""
+
+    @property
+    @classmethod
+    @abstractmethod
+    def expected_range(cls):
+        """Returns the expected range of the objective, which is not necessarily the possible ranges.
+
+        For example, our expected R2 range is from [-1, 1], although the
+        actual range is (-inf, 1].
+        """
+
+    @classmethod
+    @abstractmethod
+    def objective_function(
+        cls,
+        y_true,
+        y_predicted,
+        y_train=None,
+        X=None,
+        sample_weight=None,
+    ):
+        """Computes the relative value of the provided predictions compared to the actual labels, according a specified metric.
+
+        Args:
+            y_predicted (pd.Series): Predicted values of length [n_samples]
+            y_true (pd.Series): Actual class labels of length [n_samples]
+            y_train (pd.Series): Observed training values of length [n_samples]
+            X (pd.DataFrame or np.ndarray): Extra data of shape [n_samples, n_features] necessary to calculate score
+            sample_weight (pd.DataFrame or np.ndarray): Sample weights used in computing objective value result
+
+        Returns:
+            Numerical value used to calculate score
+        """
+
+    @classproperty
+    def positive_only(cls):
+        """If True, this objective is only valid for positive data. Defaults to False."""
+        return False
+
+    def score(self, y_true, y_predicted, y_train=None, X=None, sample_weight=None):
+        """Returns a numerical score indicating performance based on the differences between the predicted and actual values.
+
+        Args:
+            y_predicted (pd.Series): Predicted values of length [n_samples]
+            y_true (pd.Series): Actual class labels of length [n_samples]
+            y_train (pd.Series): Observed training values of length [n_samples]
+            X (pd.DataFrame or np.ndarray): Extra data of shape [n_samples, n_features] necessary to calculate score
+            sample_weight (pd.DataFrame or np.ndarray): Sample weights used in computing objective value result
+
+        Returns:
+            score
+        """
+        if X is not None:
+            X = self._standardize_input_type(X)
+        if y_train is not None:
+            y_train = self._standardize_input_type(y_train)
+        y_true = self._standardize_input_type(y_true)
+        y_predicted = self._standardize_input_type(y_predicted)
+        self.validate_inputs(y_true, y_predicted)
+        return self.objective_function(
+            y_true,
+            y_predicted,
+            y_train=y_train,
+            X=X,
+            sample_weight=sample_weight,
+        )
+
+    @staticmethod
+    def _standardize_input_type(input_data):
+        """Standardize input to pandas for scoring.
+
+        Args:
+            input_data (list, pd.DataFrame, pd.Series, or np.ndarray): A matrix of predictions or predicted probabilities
+
+        Returns:
+            pd.DataFrame or pd.Series: a pd.Series, or pd.DataFrame object if predicted probabilities were provided.
+        """
+        if isinstance(input_data, (pd.Series, pd.DataFrame)):
+            return input_data
+        if isinstance(input_data, list):
+            if isinstance(input_data[0], list):
+                return pd.DataFrame(input_data)
+            return pd.Series(input_data)
+        if isinstance(input_data, np.ndarray):
+            if len(input_data.shape) == 1:
+                return pd.Series(input_data)
+            return pd.DataFrame(input_data)
+
+    def validate_inputs(self, y_true, y_predicted):
+        """Validates the input based on a few simple checks.
+
+        Args:
+            y_predicted (pd.Series, or pd.DataFrame): Predicted values of length [n_samples].
+            y_true (pd.Series): Actual class labels of length [n_samples].
+
+        Raises:
+            ValueError: If the inputs are malformed.
+        """
+        if y_predicted.shape[0] != y_true.shape[0]:
+            raise ValueError(
+                "Inputs have mismatched dimensions: y_predicted has shape {}, y_true has shape {}".format(
+                    len(y_predicted),
+                    len(y_true),
+                ),
+            )
+        if len(y_true) == 0:
+            raise ValueError("Length of inputs is 0")
+
+        if isinstance(y_true, pd.DataFrame):
+            y_true = y_true.to_numpy().flatten()
+        if np.isnan(y_true).any() or np.isinf(y_true).any():
+            raise ValueError("y_true contains NaN or infinity")
+
+        if isinstance(y_predicted, pd.DataFrame):
+            y_predicted = y_predicted.to_numpy().flatten()
+        if np.isnan(y_predicted).any() or np.isinf(y_predicted).any():
+            raise ValueError("y_predicted contains NaN or infinity")
+        if self.score_needs_proba and np.any([(y_predicted < 0) | (y_predicted > 1)]):
+            raise ValueError(
+                "y_predicted contains probability estimates not within [0, 1]",
+            )
+
+    @classmethod
+    def calculate_percent_difference(cls, score, baseline_score):
+        """Calculate the percent difference between scores.
+
+        Args:
+            score (float): A score. Output of the score method of this objective.
+            baseline_score (float): A score. Output of the score method of this objective. In practice,
+                this is the score achieved on this objective with a baseline estimator.
+
+        Returns:
+            float: The percent difference between the scores. Note that for objectives that can be interpreted
+                as percentages, this will be the difference between the reference score and score. For all other
+                objectives, the difference will be normalized by the reference score.
+        """
+        if pd.isna(score) or pd.isna(baseline_score):
+            return np.nan
+
+        if np.isclose(baseline_score - score, 0, atol=1e-10):
+            return 0
+
+        # Return inf when dividing by 0
+        if (
+            np.isclose(baseline_score, 0, atol=1e-10)
+            and not cls.is_bounded_like_percentage
+        ):
+            return np.inf
+
+        decrease = False
+        if (baseline_score > score and cls.greater_is_better) or (
+            baseline_score < score and not cls.greater_is_better
+        ):
+            decrease = True
+
+        difference = baseline_score - score
+        change = (
+            difference
+            if cls.is_bounded_like_percentage
+            else difference / baseline_score
+        )
+        return 100 * (-1) ** (decrease) * np.abs(change)
+
+    @classmethod
+    def is_defined_for_problem_type(cls, problem_type):
+        """Returns whether or not an objective is defined for a problem type."""
+        return handle_problem_types(problem_type) in cls.problem_types
@@ -0,0 +1,10 @@
+"""Base class for all regression objectives."""
+from checkmates.objectives.objective_base import ObjectiveBase
+from checkmates.problem_types import ProblemTypes
+
+
+class RegressionObjective(ObjectiveBase):
+    """Base class for all regression objectives."""
+
+    problem_types = [ProblemTypes.REGRESSION, ProblemTypes.TIME_SERIES_REGRESSION]
+    """[ProblemTypes.REGRESSION, ProblemTypes.TIME_SERIES_REGRESSION]"""
@@ -0,0 +1,100 @@
+"""Standard machine learning objective functions."""
+import numpy as np
+import pandas as pd
+from sklearn import metrics
+
+from checkmates.objectives.regression_objective import RegressionObjective
+from checkmates.utils import classproperty
+
+class RootMeanSquaredLogError(RegressionObjective):
+    """Root mean squared log error for regression.
+
+    Only valid for nonnegative inputs. Otherwise, will throw a ValueError.
+
+    Example:
+        >>> y_true = pd.Series([1.5, 2, 3, 1, 0.5, 1, 2.5, 2.5, 1, 0.5, 2])
+        >>> y_pred = pd.Series([1.5, 2.5, 2, 1, 0.5, 1, 3, 2.25, 0.75, 0.25, 1.75])
+        >>> np.testing.assert_almost_equal(RootMeanSquaredLogError().objective_function(y_true, y_pred), 0.13090204)
+    """
+
+    name = "Root Mean Squared Log Error"
+    greater_is_better = False
+    score_needs_proba = False
+    perfect_score = 0.0
+    is_bounded_like_percentage = False  # Range [0, Inf)
+    expected_range = [0, float("inf")]
+
+    def objective_function(
+        self,
+        y_true,
+        y_predicted,
+        y_train=None,
+        X=None,
+        sample_weight=None,
+    ):
+        """Objective function for root mean squared log error for regression."""
+
+        def rmsle(y_true, y_pred):
+            return np.sqrt(
+                metrics.mean_squared_log_error(
+                    y_true,
+                    y_pred,
+                    sample_weight=sample_weight,
+                ),
+            )
+
+        # Multiseries time series regression
+        if isinstance(y_true, pd.DataFrame):
+            raw_rmsles = []
+            for i in range(len(y_true.columns)):
+                y_true_i = y_true.iloc[:, i]
+                y_predicted_i = y_predicted.iloc[:, i]
+                raw_rmsles.append(rmsle(y_true_i, y_predicted_i))
+            return np.mean(raw_rmsles)
+
+        # All univariate regression
+        return rmsle(y_true, y_predicted)
+
+    @classproperty
+    def positive_only(self):
+        """If True, this objective is only valid for positive data."""
+        return True
+
+
+class MeanSquaredLogError(RegressionObjective):
+    """Mean squared log error for regression.
+
+    Only valid for nonnegative inputs. Otherwise, will throw a ValueError.
+
+    Example:
+        >>> y_true = pd.Series([1.5, 2, 3, 1, 0.5, 1, 2.5, 2.5, 1, 0.5, 2])
+        >>> y_pred = pd.Series([1.5, 2.5, 2, 1, 0.5, 1, 3, 2.25, 0.75, 0.25, 1.75])
+        >>> np.testing.assert_almost_equal(MeanSquaredLogError().objective_function(y_true, y_pred), 0.0171353)
+    """
+
+    name = "Mean Squared Log Error"
+    greater_is_better = False
+    score_needs_proba = False
+    perfect_score = 0.0
+    is_bounded_like_percentage = False  # Range [0, Inf)
+    expected_range = [0, float("inf")]
+
+    def objective_function(
+        self,
+        y_true,
+        y_predicted,
+        y_train=None,
+        X=None,
+        sample_weight=None,
+    ):
+        """Objective function for mean squared log error for regression."""
+        return metrics.mean_squared_log_error(
+            y_true,
+            y_predicted,
+            sample_weight=sample_weight,
+        )
+
+    @classproperty
+    def positive_only(self):
+        """If True, this objective is only valid for positive data."""
+        return True
Original file line number	Diff line number	Diff line change
`@@ -3,4 +3,6 @@`
`3`	`3`	`DataCheckInitError,`
`4`	`4`	`MissingComponentError,`
`5`	`5`	`ValidationErrorCode,`
	`6`	`+ ObjectiveCreationError,`
	`7`	`+ ObjectiveNotFoundError`
`6`	`8`	`)`