Temporal estimation in

jmafoster1 · jmafoster1 · commit 495102919a2a · 2024-07-23T16:32:42.000+01:00
diff --git a/causal_testing/specification/capabilities.py b/causal_testing/specification/capabilities.py
@@ -0,0 +1,71 @@
+from causal_testing.specification.variable import Variable
+
+
+class Capability:
+    """
+    Data class to encapsulate temporal interventions.
+    """
+
+    def __init__(self, variable: Variable, value: any, start_time: int, end_time: float):
+        self.variable = variable
+        self.value = value
+        self.start_time = start_time
+        self.end_time = end_time
+
+    def __eq__(self, other):
+        return (
+            type(other) == type(self)
+            and self.variable == other.variable
+            and self.value == other.value
+            and self.start_time == other.start_time
+            and self.end_time == other.end_time
+        )
+
+    def __repr__(self):
+        return f"({self.variable}, {self.value}, {self.start_time}-{self.end_time})"
+
+
+class TreatmentSequence:
+    """
+    Class to represent a list of capabilities, i.e. a treatment regime.
+    """
+
+    def __init__(self, timesteps_per_intervention, capabilities):
+        self.timesteps_per_intervention = timesteps_per_intervention
+        self.capabilities = [
+            Capability(var, val, t, t + timesteps_per_intervention)
+            for (var, val), t in zip(
+                capabilities,
+                range(
+                    timesteps_per_intervention,
+                    (len(capabilities) * timesteps_per_intervention) + 1,
+                    timesteps_per_intervention,
+                ),
+            )
+        ]
+        # This is a bodge so that causal test adequacy works
+        self.name = tuple([c.variable for c in self.capabilities])
+
+    def set_value(self, index: int, value: float):
+        """
+        Set the value of capability at the given index.
+        :param index - the index of the element to update.
+        :param value - the desired value of the capability.
+        """
+        self.capabilities[index].value = value
+
+    def copy(self):
+        """
+        Return a deep copy of the capability list.
+        """
+        strategy = TreatmentSequence(
+            self.timesteps_per_intervention,
+            [(c.variable, c.value) for c in self.capabilities],
+        )
+        return strategy
+
+    def total_time(self):
+        """
+        Calculate the total duration of the treatment strategy.
+        """
+        return (len(self.capabilities) + 1) * self.timesteps_per_intervention
diff --git a/causal_testing/testing/causal_test_adequacy.py b/causal_testing/testing/causal_test_adequacy.py
@@ -71,14 +71,21 @@ class DataAdequacy:
     """
 
     def __init__(
-        self, test_case: CausalTestCase, estimator: Estimator, data_collector: DataCollector, bootstrap_size: int = 100
+        self,
+        test_case: CausalTestCase,
+        estimator: Estimator,
+        data_collector: DataCollector,
+        bootstrap_size: int = 100,
+        group_by=None,
     ):
         self.test_case = test_case
         self.estimator = estimator
         self.data_collector = data_collector
         self.kurtosis = None
         self.outcomes = None
+        self.successful = None
         self.bootstrap_size = bootstrap_size
+        self.group_by = group_by
 
     def measure_adequacy(self):
         """
@@ -87,11 +94,14 @@ def measure_adequacy(self):
         results = []
         for i in range(self.bootstrap_size):
             estimator = deepcopy(self.estimator)
-            estimator.df = estimator.df.sample(len(estimator.df), replace=True, random_state=i)
-            # try:
+
+            if self.group_by is not None:
+                ids = pd.Series(estimator.df[self.group_by].unique())
+                ids = ids.sample(len(ids), replace=True, random_state=i)
+                estimator.df = estimator.df[estimator.df[self.group_by].isin(ids)]
+            else:
+                estimator.df = estimator.df.sample(len(estimator.df), replace=True, random_state=i)
             results.append(self.test_case.execute_test(estimator, self.data_collector))
-            # except np.LinAlgError:
-            # continue
         outcomes = [self.test_case.expected_causal_effect.apply(c) for c in results]
         results = pd.DataFrame(c.to_dict() for c in results)[["effect_estimate", "ci_low", "ci_high"]]
 
@@ -111,8 +121,14 @@ def convert_to_df(field):
 
         effect_estimate = pd.concat(results["effect_estimate"].tolist(), axis=1).transpose().reset_index(drop=True)
         self.kurtosis = effect_estimate.kurtosis()
-        self.outcomes = sum(outcomes)
+        self.outcomes = sum(filter(lambda x: x is not None, outcomes))
+        self.successful = sum([x is not None for x in outcomes])
 
     def to_dict(self):
         "Returns the adequacy object as a dictionary."
-        return {"kurtosis": self.kurtosis.to_dict(), "bootstrap_size": self.bootstrap_size, "passing": self.outcomes}
+        return {
+            "kurtosis": self.kurtosis.to_dict(),
+            "bootstrap_size": self.bootstrap_size,
+            "passing": self.outcomes,
+            "successful": self.successful,
+        }
diff --git a/causal_testing/testing/causal_test_case.py b/causal_testing/testing/causal_test_case.py
@@ -92,7 +92,7 @@ def _return_causal_test_results(self, estimator) -> CausalTestResult:
         except np.linalg.LinAlgError:
             return CausalTestResult(
                 estimator=estimator,
-                test_value=TestValue(self.estimate_type, "LinAlgError"),
+                test_value=TestValue(self.estimate_type, None),
                 effect_modifier_configuration=self.effect_modifier_configuration,
                 confidence_intervals=None,
             )
diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
@@ -27,7 +27,9 @@ class SomeEffect(CausalTestOutcome):
     """An extension of TestOutcome representing that the expected causal effect should not be zero."""
 
     def apply(self, res: CausalTestResult) -> bool:
-        if res.test_value.type == "risk_ratio":
+        if res.ci_low() is None or res.ci_high() is None:
+            return None
+        if res.test_value.type in ("risk_ratio", "hazard_ratio"):
             return any(
                 1 < ci_low < ci_high or ci_low < ci_high < 1 for ci_low, ci_high in zip(res.ci_low(), res.ci_high())
             )
@@ -52,7 +54,7 @@ def __init__(self, atol: float = 1e-10, ctol: float = 0.05):
         self.ctol = ctol
 
     def apply(self, res: CausalTestResult) -> bool:
-        if res.test_value.type == "risk_ratio":
+        if res.test_value.type in ("risk_ratio", "hazard_ratio"):
             return any(
                 ci_low < 1 < ci_high or np.isclose(value, 1.0, atol=self.atol)
                 for ci_low, ci_high, value in zip(res.ci_low(), res.ci_high(), res.test_value.value)
diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
diff --git a/pyproject.toml b/pyproject.toml

Original file line number	Diff line number	Diff line change
`@@ -92,7 +92,7 @@ def _return_causal_test_results(self, estimator) -> CausalTestResult:`
`92`	`92`	`except np.linalg.LinAlgError:`
`93`	`93`	`return CausalTestResult(`
`94`	`94`	`estimator=estimator,`
`95`		`- test_value=TestValue(self.estimate_type, "LinAlgError"),`
	`95`	`+ test_value=TestValue(self.estimate_type, None),`
`96`	`96`	`effect_modifier_configuration=self.effect_modifier_configuration,`
`97`	`97`	`confidence_intervals=None,`
`98`	`98`	`)`