Merge pull request #99 from CITCOM-project/linting

christopher-wild · web-flow · commit 30eaaa4d4d88 · 2022-10-14T02:49:53.000-07:00
Linting Updates
diff --git a/.pylintrc b/.pylintrc
@@ -151,7 +151,7 @@ disable=raw-checker-failed,
         suppressed-message,
         useless-suppression,
         deprecated-pragma,
-        use-symbolic-message-instead
+        use-symbolic-message-instead,
 
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option
@@ -168,7 +168,7 @@ argument-naming-style=snake_case
 # Regular expression matching correct argument names. Overrides argument-
 # naming-style. If left empty, argument names will be checked with the set
 # naming style.
-#argument-rgx=
+argument-rgx=^[a-z][a-z0-9]*((_[a-z0-9]+)*)?$
 
 # Naming style matching correct attribute names.
 attr-naming-style=snake_case
@@ -196,7 +196,7 @@ class-attribute-naming-style=any
 # Regular expression matching correct class attribute names. Overrides class-
 # attribute-naming-style. If left empty, class attribute names will be checked
 # with the set naming style.
-#class-attribute-rgx=
+class-attribute-rgx=^[a-z][a-z0-9]*((_[a-z0-9]+)*)?$
 
 # Naming style matching correct class constant names.
 class-const-naming-style=UPPER_CASE
@@ -234,6 +234,7 @@ function-naming-style=snake_case
 #function-rgx=
 
 # Good variable names which should always be accepted, separated by a comma.
+
 good-names=i,
            j,
            k,
@@ -243,7 +244,7 @@ good-names=i,
 
 # Good variable names regexes, separated by a comma. If names match any regex,
 # they will always be accepted
-good-names-rgxs=
+variable-rgx=^[a-z][a-z0-9]*((_[a-z0-9]+)*)?$
 
 # Include a hint for the correct naming format with invalid-name.
 include-naming-hint=no
diff --git a/causal_testing/data_collection/data_collector.py b/causal_testing/data_collection/data_collector.py
@@ -21,7 +21,6 @@ def collect_data(self, **kwargs) -> pd.DataFrame:
         Populate the dataframe with execution data.
         :return df: A pandas dataframe containing execution data for the system-under-test.
         """
-        pass
 
     def filter_valid_data(self, data: pd.DataFrame, check_pos: bool = True) -> pd.DataFrame:
         """Check is execution data is valid for the scenario-under-test.
@@ -45,7 +44,7 @@ def filter_valid_data(self, data: pd.DataFrame, check_pos: bool = True) -> pd.Da
         solver = z3.Solver()
         for c in self.scenario.constraints:
             solver.assert_and_track(c, f"background: {c}")
-        sat = list()
+        sat = []
         unsat_core = None
         for _, row in data.iterrows():
             solver.push()
@@ -73,7 +72,10 @@ def filter_valid_data(self, data: pd.DataFrame, check_pos: bool = True) -> pd.Da
         size_diff = len(data) - len(satisfying_data)
         if size_diff > 0:
             logger.warning(
-                f"Discarded {size_diff}/{len(data)} values due to constraint violations.\n" f"For example{unsat_core}"
+                "Discarded %s/%s values due to constraint violations.\n" "For example%s",
+                size_diff,
+                len(data),
+                unsat_core,
             )
         return satisfying_data
 
@@ -116,8 +118,6 @@ def run_system_with_input_configuration(self, input_configuration: dict) -> pd.D
         :return: A pandas dataframe containing execution data obtained by executing the system-under-test with the
         specified input configuration.
         """
-        pass
-
 
 class ObservationalDataCollector(DataCollector):
     """A data collector that extracts data that is relevant to the specified scenario from a csv of execution data."""
diff --git a/causal_testing/generation/abstract_causal_test_case.py b/causal_testing/generation/abstract_causal_test_case.py
@@ -103,8 +103,9 @@ def _generate_concrete_tests(
             optimizer.add_soft([self.scenario.variables[v].z3 == row[v] for v in run_columns])
             if optimizer.check() == z3.unsat:
                 logger.warning(
-                    "Satisfiability of test case was unsat.\n"
-                    + f"Constraints\n{optimizer}\nUnsat core {optimizer.unsat_core()}"
+                    "Satisfiability of test case was unsat.\n" "Constraints \n %s \n Unsat core %s",
+                    optimizer,
+                    optimizer.unsat_core(),
                 )
             model = optimizer.model()
 
@@ -187,13 +188,16 @@ def generate_concrete_tests(
                 for var in control_configs.columns
             }
             # Putting treatment and control values in messes it up because the two are not independent...
-            # This is potentially problematic as constraints might mean we don't get good coverage if we use control values alone
-            # We might then need to carefully craft our _control value_ generating distributions so that we can get good coverage
+            # This is potentially problematic as constraints might mean we don't get good coverage if we use control
+            # values alone
+            # We might then need to carefully craft our _control value_ generating distributions so that we can get
+            # good coverage
             # without the generated treatment values violating any constraints.
 
             # treatment_configs = pd.DataFrame([test.treatment_input_configuration for test in concrete_tests])
             # both_configs = pd.concat([control_configs, treatment_configs])
-            # ks_stats = {var: stats.kstest(both_configs[var], var.distribution.cdf).statistic for var in both_configs.columns}
+            # ks_stats = {var: stats.kstest(both_configs[var], var.distribution.cdf).statistic for var in
+            # both_configs.columns}
             effect_modifier_configs = pd.DataFrame([test.effect_modifier_configuration for test in concrete_tests])
             ks_stats.update(
                 {
diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py
@@ -124,8 +124,7 @@ def generate_tests(self, effects: dict, mutates: dict, estimators: dict, f_flag:
             logger.info([(v.name, v.distribution) for v in abstract_test.treatment_variables])
             logger.info("Number of concrete tests for test case: %s", str(len(concrete_tests)))
             failures = self._execute_tests(concrete_tests, estimators, test, f_flag)
-
-        logger.info(f"{failures}/{len(concrete_tests)} failed")
+        logger.info("%s/%s failed", failures, len(concrete_tests))
 
     def _execute_tests(self, concrete_tests, estimators, test, f_flag):
         failures = 0
diff --git a/causal_testing/specification/causal_dag.py b/causal_testing/specification/causal_dag.py
@@ -349,8 +349,10 @@ def adjustment_set_is_minimal(self, treatments: list[str], outcomes: list[str],
                 proper_backdoor_graph, treatments, outcomes, smaller_adjustment_set
             ):
                 logger.info(
-                    f"Z={adjustment_set} is not minimal because Z'=Z\\{{'{variable}'}}="
-                    f"{smaller_adjustment_set} is also a valid adjustment set."
+                    "Z=%s is not minimal because Z'=Z\\{{'%s'}}=" "%s is also a valid adjustment set.",
+                    adjustment_set,
+                    variable,
+                    smaller_adjustment_set,
                 )
                 return False
 
@@ -393,16 +395,22 @@ def constructive_backdoor_criterion(
 
             if not set(covariates).issubset(set(self.graph.nodes).difference(descendents_of_proper_casual_paths)):
                 logger.info(
-                    f"Failed Condition 1: Z={covariates} **is** a descendent of some variable on a proper causal "
-                    f"path between X={treatments} and Y={outcomes}."
+                    "Failed Condition 1: Z=%s **is** a descendent of some variable on a proper causal "
+                    "path between X=%s and Y=%s.",
+                    covariates,
+                    treatments,
+                    outcomes,
                 )
                 return False
 
         # Condition (2)
         if not nx.d_separated(proper_backdoor_graph.graph, set(treatments), set(outcomes), set(covariates)):
             logger.info(
-                f"Failed Condition 2: Z={covariates} **does not** d-separate X={treatments} and Y={outcomes} in"
-                f" the proper back-door graph relative to X and Y."
+                "Failed Condition 2: Z=%s **does not** d-separate X=%s and Y=%s in"
+                " the proper back-door graph relative to X and Y.",
+                covariates,
+                treatments,
+                outcomes,
             )
             return False
 
diff --git a/causal_testing/specification/causal_specification.py b/causal_testing/specification/causal_specification.py
@@ -10,6 +10,10 @@
 
 
 class CausalSpecification(ABC):
+    """
+    Abstract Class for the Causal Specification (combination of Scenario and Causal Dag)
+    """
+
     def __init__(self, scenario: Scenario, causal_dag: CausalDAG):
         self.scenario = scenario
         self.causal_dag = causal_dag
diff --git a/causal_testing/testing/causal_test_engine.py b/causal_testing/testing/causal_test_engine.py
@@ -210,10 +210,11 @@ def _check_positivity_violation(self, variables_list):
         if not set(variables_list).issubset(self.scenario_execution_data_df.columns):
             missing_variables = set(variables_list) - set(self.scenario_execution_data_df.columns)
             logger.warning(
-                f"Positivity violation: missing data for variables {missing_variables}.\n"
-                f"Causal inference is only valid if a well-specified parametric model is used.\n"
-                f"Alternatively, consider restricting analysis to executions without the variables:"
-                f" {missing_variables}."
+                "Positivity violation: missing data for variables {missing_variables}.\n"
+                "Causal inference is only valid if a well-specified parametric model is used.\n"
+                "Alternatively, consider restricting analysis to executions without the variables:"
+                " %s.",
+                missing_variables,
             )
             return True
         else:
diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
@@ -36,7 +36,7 @@ def __init__(
         if effect_modifier_configuration is not None:
             self.effect_modifier_configuration = effect_modifier_configuration
         else:
-            self.effect_modifier_configuration = dict()
+            self.effect_modifier_configuration = {}
 
     def __str__(self):
         base_str = (
diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
@@ -48,7 +48,7 @@ def __init__(
         self.outcome = outcome
         self.df = df
         if effect_modifiers is None:
-            self.effect_modifiers = dict()
+            self.effect_modifiers = {}
         elif isinstance(effect_modifiers, set) or isinstance(effect_modifiers, list):
             self.effect_modifiers = {k.name for k in effect_modifiers}
         elif isinstance(effect_modifiers, dict):
@@ -64,7 +64,6 @@ def add_modelling_assumptions(self):
         Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
         must hold if the resulting causal inference is to be considered valid.
         """
-        pass
 
     @abstractmethod
     def estimate_ate(self) -> float:
@@ -73,15 +72,13 @@ def estimate_ate(self) -> float:
         in the linear regression equation.
         :return: The intercept and coefficient of the linear regression equation
         """
-        pass
 
     def compute_confidence_intervals(self) -> list[float, float]:
         """
         Estimate the 95% Wald confidence intervals for the effect of changing the treatment from control values to
         treatment values on the outcome.
         :return: 95% Wald confidence intervals.
         """
-        pass
 
 
 class LogisticRegressionEstimator(Estimator):