diff --git a/causal_testing/estimation/cubic_spline_estimator.py b/causal_testing/estimation/cubic_spline_estimator.py
index 19f48a11..fa6a46ce 100644
--- a/causal_testing/estimation/cubic_spline_estimator.py
+++ b/causal_testing/estimation/cubic_spline_estimator.py
@@ -8,6 +8,7 @@
 
 from causal_testing.specification.variable import Variable
 from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
+from causal_testing.estimation.effect_estimate import EffectEstimate
 from causal_testing.testing.base_test_case import BaseTestCase
 
 logger = logging.getLogger(__name__)
@@ -47,7 +48,7 @@ def __init__(
             )
             self.formula = f"{base_test_case.outcome_variable.name} ~ cr({'+'.join(terms)}, df={basis})"
 
-    def estimate_ate_calculated(self, adjustment_config: dict = None) -> pd.Series:
+    def estimate_ate_calculated(self, adjustment_config: dict = None) -> EffectEstimate:
         """Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
         by changing the treatment variable from the control value to the treatment value. Here, we actually
         calculate the expected outcomes under control and treatment and divide one by the other. This
@@ -74,4 +75,4 @@ def estimate_ate_calculated(self, adjustment_config: dict = None) -> pd.Series:
         x[self.base_test_case.treatment_variable.name] = self.control_value
         control = model.predict(x).iloc[0]
 
-        return pd.Series(treatment - control)
+        return EffectEstimate("ate", pd.Series(treatment - control))
diff --git a/causal_testing/estimation/effect_estimate.py b/causal_testing/estimation/effect_estimate.py
new file mode 100644
index 00000000..ad89abd3
--- /dev/null
+++ b/causal_testing/estimation/effect_estimate.py
@@ -0,0 +1,43 @@
+"""
+This module contains the EffectEstimate dataclass.
+"""
+
+from dataclasses import dataclass
+import pandas as pd
+
+
+@dataclass
+class EffectEstimate:
+    """
+    A dataclass to hold the value and confidence intervals of a causal effect estimate
+
+    :ivar type: The type of estimate, e.g. ate, or risk_ratio
+                (used to determine whether the estimate matches the expected effect)
+    :ivar value: The estimated causal effect
+    :ivar ci_low: The lower confidence interval
+    :ivar ci_high: The upper confidence interval
+    """
+
+    type: str
+    value: pd.Series
+    ci_low: pd.Series = None
+    ci_high: pd.Series = None
+
+    def ci_valid(self) -> bool:
+        """Return whether or not the result has valid confidence invervals"""
+        return (
+            self.ci_low is not None
+            and self.ci_high is not None
+            and not (pd.isnull(self.ci_low).any() or pd.isnull(self.ci_high).any())
+        )
+
+    def to_dict(self) -> dict:
+        """Return representation as a dict."""
+        d = {"effect_measure": self.type, "effect_estimate": self.value.to_dict()}
+        if self.ci_valid():
+            return d | {"ci_low": self.ci_low.to_dict(), "ci_high": self.ci_high.to_dict()}
+        return d
+
+    def to_df(self) -> pd.DataFrame:
+        """Return representation as a pandas dataframe."""
+        return pd.DataFrame({"effect_estimate": self.value, "ci_low": self.ci_low, "ci_high": self.ci_high})
diff --git a/causal_testing/estimation/experimental_estimator.py b/causal_testing/estimation/experimental_estimator.py
index cc91d853..ce2d6faa 100644
--- a/causal_testing/estimation/experimental_estimator.py
+++ b/causal_testing/estimation/experimental_estimator.py
@@ -5,6 +5,7 @@
 import pandas as pd
 
 from causal_testing.estimation.abstract_estimator import Estimator
+from causal_testing.estimation.effect_estimate import EffectEstimate
 from causal_testing.testing.base_test_case import BaseTestCase
 
 
@@ -55,7 +56,7 @@ def run_system(self, configuration: dict) -> dict:
         :returns: The resulting output as a dict.
         """
 
-    def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
+    def estimate_ate(self) -> EffectEstimate:
         """Estimate the average treatment effect of the treatment on the outcome. That is, the change in outcome caused
         by changing the treatment variable from the control value to the treatment value.
 
@@ -88,14 +89,20 @@ def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         ci_low = difference.iloc[ci_low_index]
         ci_high = difference.iloc[self.repeats - ci_low_index]
 
-        return pd.Series(
-            {self.base_test_case.treatment_variable.name: difference.mean()[self.base_test_case.outcome_variable.name]}
-        ), [
+        return EffectEstimate(
+            "ate",
+            pd.Series(
+                {
+                    self.base_test_case.treatment_variable.name: difference.mean()[
+                        self.base_test_case.outcome_variable.name
+                    ]
+                }
+            ),
             pd.Series({self.base_test_case.treatment_variable.name: ci_low[self.base_test_case.outcome_variable.name]}),
             pd.Series(
                 {self.base_test_case.treatment_variable.name: ci_high[self.base_test_case.outcome_variable.name]}
             ),
-        ]
+        )
 
     def estimate_risk_ratio(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         """Estimate the risk ratio of the treatment on the outcome. That is, the change in outcome caused
@@ -130,11 +137,11 @@ def estimate_risk_ratio(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         ci_low = difference.iloc[ci_low_index]
         ci_high = difference.iloc[self.repeats - ci_low_index]
 
-        return pd.Series(
-            {self.base_test_case.treatment_variable.name: difference.mean()[self.base_test_case.outcome_variable.name]}
-        ), [
+        return EffectEstimate(
+            "ate",
+            {self.base_test_case.treatment_variable.name: difference.mean()[self.base_test_case.outcome_variable.name]},
             pd.Series({self.base_test_case.treatment_variable.name: ci_low[self.base_test_case.outcome_variable.name]}),
             pd.Series(
                 {self.base_test_case.treatment_variable.name: ci_high[self.base_test_case.outcome_variable.name]}
             ),
-        ]
+        )
diff --git a/causal_testing/estimation/instrumental_variable_estimator.py b/causal_testing/estimation/instrumental_variable_estimator.py
index e322f9a7..2b5eaf2d 100644
--- a/causal_testing/estimation/instrumental_variable_estimator.py
+++ b/causal_testing/estimation/instrumental_variable_estimator.py
@@ -7,6 +7,7 @@
 import statsmodels.api as sm
 
 from causal_testing.estimation.abstract_estimator import Estimator
+from causal_testing.estimation.effect_estimate import EffectEstimate
 from causal_testing.testing.base_test_case import BaseTestCase
 
 logger = logging.getLogger(__name__)
@@ -61,7 +62,7 @@ def add_modelling_assumptions(self):
         """
         )
 
-    def estimate_iv_coefficient(self, df) -> float:
+    def iv_coefficient(self, df) -> float:
         """
         Estimate the linear regression coefficient of the treatment on the
         outcome.
@@ -75,16 +76,16 @@ def estimate_iv_coefficient(self, df) -> float:
         # Estimate the coefficient of I on X by cancelling
         return ab / a
 
-    def estimate_coefficient(self, bootstrap_size=100) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
+    def estimate_coefficient(self, bootstrap_size=100) -> EffectEstimate:
         """
         Estimate the unit ate (i.e. coefficient) of the treatment on the
         outcome.
         """
         bootstraps = sorted(
-            [self.estimate_iv_coefficient(self.df.sample(len(self.df), replace=True)) for _ in range(bootstrap_size)]
+            [self.iv_coefficient(self.df.sample(len(self.df), replace=True)) for _ in range(bootstrap_size)]
         )
         bound = ceil((bootstrap_size * self.alpha) / 2)
         ci_low = pd.Series(bootstraps[bound])
         ci_high = pd.Series(bootstraps[bootstrap_size - bound])
 
-        return pd.Series(self.estimate_iv_coefficient(self.df)), [ci_low, ci_high]
+        return EffectEstimate("coefficient", pd.Series(self.iv_coefficient(self.df)), ci_low, ci_high)
diff --git a/causal_testing/estimation/ipcw_estimator.py b/causal_testing/estimation/ipcw_estimator.py
index 902927f7..ab038df1 100644
--- a/causal_testing/estimation/ipcw_estimator.py
+++ b/causal_testing/estimation/ipcw_estimator.py
@@ -11,6 +11,7 @@
 from lifelines import CoxPHFitter
 
 from causal_testing.estimation.abstract_estimator import Estimator
+from causal_testing.estimation.effect_estimate import EffectEstimate
 from causal_testing.testing.base_test_case import BaseTestCase
 from causal_testing.specification.variable import Variable
 
@@ -285,7 +286,7 @@ def preprocess_data(self):
         if len(self.df.loc[self.df["trtrand"] == 1]) == 0:
             raise ValueError(f"No individuals began the treatment strategy {self.treatment_strategy}")
 
-    def estimate_hazard_ratio(self):
+    def estimate_hazard_ratio(self) -> EffectEstimate:
         """
         Estimate the hazard ratio.
         """
@@ -380,4 +381,4 @@ def estimate_hazard_ratio(self):
 
         ci_low, ci_high = [np.exp(cox_ph.confidence_intervals_)[col] for col in cox_ph.confidence_intervals_.columns]
 
-        return (cox_ph.hazard_ratios_, (ci_low, ci_high))
+        return EffectEstimate("hazard_ratio", cox_ph.hazard_ratios_, ci_low, ci_high)
diff --git a/causal_testing/estimation/linear_regression_estimator.py b/causal_testing/estimation/linear_regression_estimator.py
index d818a9e9..aee433d2 100644
--- a/causal_testing/estimation/linear_regression_estimator.py
+++ b/causal_testing/estimation/linear_regression_estimator.py
@@ -10,6 +10,7 @@
 from causal_testing.specification.variable import Variable
 from causal_testing.estimation.genetic_programming_regression_fitter import GP
 from causal_testing.estimation.abstract_regression_estimator import RegressionEstimator
+from causal_testing.estimation.effect_estimate import EffectEstimate
 from causal_testing.testing.base_test_case import BaseTestCase
 
 logger = logging.getLogger(__name__)
@@ -92,7 +93,7 @@ def gp_formula(
         formula = gp.simplify(formula)
         self.formula = f"{self.base_test_case.outcome_variable.name} ~ I({formula}) - 1"
 
-    def estimate_coefficient(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
+    def estimate_coefficient(self) -> EffectEstimate:
         """Estimate the unit average treatment effect of the treatment on the outcome. That is, the change in outcome
         caused by a unit change in treatment.
 
@@ -121,9 +122,9 @@ def estimate_coefficient(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         ), f"{treatment} not in\n{'  ' + str(model.params.index).replace(newline, newline + '  ')}"
         unit_effect = model.params[treatment]  # Unit effect is the coefficient of the treatment
         [ci_low, ci_high] = self._get_confidence_intervals(model, treatment)
-        return unit_effect, [ci_low, ci_high]
+        return EffectEstimate("coefficient", unit_effect, ci_low, ci_high)
 
-    def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
+    def estimate_ate(self) -> EffectEstimate:
         """Estimate the average treatment effect of the treatment on the outcome. That is, the change in outcome caused
         by changing the treatment variable from the control value to the treatment value.
 
@@ -146,10 +147,10 @@ def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         t_test_results = model.t_test(individuals.loc["treated"] - individuals.loc["control"])
         ate = pd.Series(t_test_results.effect[0])
         confidence_intervals = list(t_test_results.conf_int(alpha=self.alpha).flatten())
-        confidence_intervals = [pd.Series(interval) for interval in confidence_intervals]
-        return ate, confidence_intervals
+        ci_low, ci_high = [pd.Series(interval) for interval in confidence_intervals]
+        return EffectEstimate("ate", ate, ci_low, ci_high)
 
-    def estimate_risk_ratio(self, adjustment_config: dict = None) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
+    def estimate_risk_ratio(self, adjustment_config: dict = None) -> EffectEstimate:
         """Estimate the risk_ratio effect of the treatment on the outcome. That is, the change in outcome caused
         by changing the treatment variable from the control value to the treatment value.
 
@@ -159,9 +160,11 @@ def estimate_risk_ratio(self, adjustment_config: dict = None) -> tuple[pd.Series
         control_outcome, treatment_outcome = prediction.iloc[1], prediction.iloc[0]
         ci_low = pd.Series(treatment_outcome["mean_ci_lower"] / control_outcome["mean_ci_upper"])
         ci_high = pd.Series(treatment_outcome["mean_ci_upper"] / control_outcome["mean_ci_lower"])
-        return pd.Series(treatment_outcome["mean"] / control_outcome["mean"]), [ci_low, ci_high]
+        return EffectEstimate(
+            "risk_ratio", pd.Series(treatment_outcome["mean"] / control_outcome["mean"]), ci_low, ci_high
+        )
 
-    def estimate_ate_calculated(self, adjustment_config: dict = None) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
+    def estimate_ate_calculated(self, adjustment_config: dict = None) -> EffectEstimate:
         """Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
         by changing the treatment variable from the control value to the treatment value. Here, we actually
         calculate the expected outcomes under control and treatment and divide one by the other. This
@@ -177,7 +180,7 @@ def estimate_ate_calculated(self, adjustment_config: dict = None) -> tuple[pd.Se
         control_outcome, treatment_outcome = prediction.iloc[1], prediction.iloc[0]
         ci_low = pd.Series(treatment_outcome["mean_ci_lower"] - control_outcome["mean_ci_upper"])
         ci_high = pd.Series(treatment_outcome["mean_ci_upper"] - control_outcome["mean_ci_lower"])
-        return pd.Series(treatment_outcome["mean"] - control_outcome["mean"]), [ci_low, ci_high]
+        return EffectEstimate("ate", pd.Series(treatment_outcome["mean"] - control_outcome["mean"]), ci_low, ci_high)
 
     def _get_confidence_intervals(self, model, treatment):
         confidence_intervals = model.conf_int(alpha=self.alpha, cols=None)
diff --git a/causal_testing/estimation/logistic_regression_estimator.py b/causal_testing/estimation/logistic_regression_estimator.py
index 091d9c30..f4f14725 100644
--- a/causal_testing/estimation/logistic_regression_estimator.py
+++ b/causal_testing/estimation/logistic_regression_estimator.py
@@ -6,6 +6,7 @@
 import pandas as pd
 import statsmodels.formula.api as smf
 
+from causal_testing.estimation.effect_estimate import EffectEstimate
 from causal_testing.estimation.abstract_regression_estimator import RegressionEstimator
 
 logger = logging.getLogger(__name__)
@@ -32,7 +33,7 @@ def add_modelling_assumptions(self):
         self.modelling_assumptions.append("The outcome must be binary.")
         self.modelling_assumptions.append("Independently and identically distributed errors.")
 
-    def estimate_unit_odds_ratio(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
+    def estimate_unit_odds_ratio(self) -> EffectEstimate:
         """Estimate the odds ratio of increasing the treatment by one. In logistic regression, this corresponds to the
         coefficient of the treatment of interest.
 
@@ -40,7 +41,9 @@ def estimate_unit_odds_ratio(self) -> tuple[pd.Series, list[pd.Series, pd.Series
         """
         model = self.fit_model(self.df)
         ci_low, ci_high = np.exp(model.conf_int(self.alpha).loc[self.base_test_case.treatment_variable.name])
-        return pd.Series(np.exp(model.params[self.base_test_case.treatment_variable.name])), [
+        return EffectEstimate(
+            "odds_ratio",
+            pd.Series(np.exp(model.params[self.base_test_case.treatment_variable.name])),
             pd.Series(ci_low),
             pd.Series(ci_high),
-        ]
+        )
diff --git a/causal_testing/main.py b/causal_testing/main.py
index 594be0e6..3cbbf493 100644
--- a/causal_testing/main.py
+++ b/causal_testing/main.py
@@ -19,7 +19,7 @@
 from causal_testing.testing.causal_test_case import CausalTestCase
 from causal_testing.testing.base_test_case import BaseTestCase
 from causal_testing.testing.causal_effect import NoEffect, SomeEffect, Positive, Negative
-from causal_testing.testing.causal_test_result import CausalTestResult, TestValue
+from causal_testing.testing.causal_test_result import CausalTestResult
 from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
 from causal_testing.estimation.logistic_regression_estimator import LogisticRegressionEstimator
 
@@ -332,7 +332,6 @@ def create_causal_test(self, test: dict, base_test: BaseTestCase) -> CausalTestC
             expected_causal_effect=expected_effect,
             estimate_type=test.get("estimate_type", "ate"),
             estimate_params=test.get("estimate_params"),
-            effect_modifier_configuration=test.get("effect_modifier_configuration"),
             estimator=estimator,
         )
 
@@ -376,10 +375,7 @@ def run_tests_in_batches(self, batch_size: int = 100, silent: bool = False) -> L
                             logger.error(f"Type or attribute error in test: {str(e)}")
                             raise
                         batch_results.append(
-                            CausalTestResult(
-                                estimator=test_case.estimator,
-                                test_value=TestValue("Error", str(e)),
-                            )
+                            CausalTestResult(effect_estimate=None, estimator=test_case.estimator, error_message=str(e))
                         )
 
                     progress.update(1)
@@ -410,10 +406,7 @@ def run_tests(self, silent=False) -> List[CausalTestResult]:
                 if not silent:
                     logger.error(f"Error running test {test_case}: {str(e)}")
                     raise
-                result = CausalTestResult(
-                    estimator=test_case.estimator,
-                    test_value=TestValue("Error", str(e)),
-                )
+                result = CausalTestResult(estimator=test_case.estimator, effect_estimate=None, error_message=str(e))
                 results.append(result)
                 logger.info(f"Test errored: {test_case}")
 
@@ -432,17 +425,10 @@ def save_results(self, results: List[CausalTestResult], output_path: str = None)
         # Combine test configs with their results
         json_results = []
         for test_config, test_case, result in zip(test_configs["tests"], self.test_cases, results):
-            # Handle effect estimate - could be a Series or other format
-            effect_estimate = result.test_value.value
-            if isinstance(effect_estimate, pd.Series):
-                effect_estimate = effect_estimate.to_dict()
-
-            # Handle confidence intervals - convert to list if needed
-            ci_low = result.ci_low()
-            ci_high = result.ci_high()
-
             # Determine if test failed based on expected vs actual effect
-            test_passed = test_case.expected_causal_effect.apply(result) if result.test_value.type != "Error" else False
+            test_passed = (
+                test_case.expected_causal_effect.apply(result) if result.effect_estimate is not None else False
+            )
 
             output = {
                 "name": test_config["name"],
@@ -454,15 +440,16 @@ def save_results(self, results: List[CausalTestResult], output_path: str = None)
                 "alpha": test_config.get("alpha", 0.05),
                 "skip": test_config.get("skip", False),
                 "passed": test_passed,
-                "result": {
-                    "treatment": result.estimator.base_test_case.treatment_variable.name,
-                    "outcome": result.estimator.base_test_case.outcome_variable.name,
-                    "adjustment_set": list(result.adjustment_set) if result.adjustment_set else [],
-                    "effect_measure": result.test_value.type,
-                    "effect_estimate": effect_estimate,
-                    "ci_low": ci_low,
-                    "ci_high": ci_high,
-                },
+                "result": (
+                    {
+                        "treatment": result.estimator.base_test_case.treatment_variable.name,
+                        "outcome": result.estimator.base_test_case.outcome_variable.name,
+                        "adjustment_set": list(result.adjustment_set) if result.adjustment_set else [],
+                    }
+                    | result.effect_estimate.to_dict()
+                    if result.effect_estimate
+                    else {"error": result.error_message}
+                ),
             }
             json_results.append(output)
 
diff --git a/causal_testing/surrogate/surrogate_search_algorithms.py b/causal_testing/surrogate/surrogate_search_algorithms.py
index 14e3254f..19e2e8f0 100644
--- a/causal_testing/surrogate/surrogate_search_algorithms.py
+++ b/causal_testing/surrogate/surrogate_search_algorithms.py
@@ -1,4 +1,4 @@
-"""Module containing implementation of search algorithm for surrogate search """
+"""Module containing implementation of search algorithm for surrogate search"""
 
 # Fitness functions are required to be iteratively defined, including all variables within.
 
@@ -45,7 +45,7 @@ def fitness_function(ga, solution, idx):  # pylint: disable=unused-argument
                 for i, adjustment in enumerate(surrogate_model.adjustment_set):
                     adjustment_dict[adjustment] = solution[i + 1]
 
-                ate = surrogate_model.estimate_ate_calculated(adjustment_dict)
+                ate = surrogate_model.estimate_ate_calculated(adjustment_dict).value
                 if len(ate) > 1:
                     raise ValueError(
                         "Multiple ate values provided but currently only single values supported in this method"
diff --git a/causal_testing/testing/causal_effect.py b/causal_testing/testing/causal_effect.py
index a9e36d6b..69a368de 100644
--- a/causal_testing/testing/causal_effect.py
+++ b/causal_testing/testing/causal_effect.py
@@ -27,18 +27,20 @@ class SomeEffect(CausalEffect):
     """An extension of CausalEffect representing that the expected causal effect should not be zero."""
 
     def apply(self, res: CausalTestResult) -> bool:
-        if res.ci_low() is None or res.ci_high() is None:
+        if res.effect_estimate.ci_low is None or res.effect_estimate.ci_high is None:
             return None
-        if res.test_value.type in ("risk_ratio", "hazard_ratio", "unit_odds_ratio"):
+        if res.effect_estimate.type in ("risk_ratio", "hazard_ratio", "unit_odds_ratio"):
             return any(
-                1 < ci_low < ci_high or ci_low < ci_high < 1 for ci_low, ci_high in zip(res.ci_low(), res.ci_high())
+                1 < ci_low < ci_high or ci_low < ci_high < 1
+                for ci_low, ci_high in zip(res.effect_estimate.ci_low, res.effect_estimate.ci_high)
             )
-        if res.test_value.type in ("coefficient", "ate"):
+        if res.effect_estimate.type in ("coefficient", "ate"):
             return any(
-                0 < ci_low < ci_high or ci_low < ci_high < 0 for ci_low, ci_high in zip(res.ci_low(), res.ci_high())
+                0 < ci_low < ci_high or ci_low < ci_high < 0
+                for ci_low, ci_high in zip(res.effect_estimate.ci_low, res.effect_estimate.ci_high)
             )
 
-        raise ValueError(f"Test Value type {res.test_value.type} is not valid for this CausalEffect")
+        raise ValueError(f"Test Value type {res.effect_estimate.type} is not valid for this CausalEffect")
 
 
 class NoEffect(CausalEffect):
@@ -54,23 +56,31 @@ def __init__(self, atol: float = 1e-10, ctol: float = 0.05):
         self.ctol = ctol
 
     def apply(self, res: CausalTestResult) -> bool:
-        if res.test_value.type in ("risk_ratio", "hazard_ratio", "unit_odds_ratio"):
+        if res.effect_estimate.type in ("risk_ratio", "hazard_ratio", "unit_odds_ratio"):
             return any(
                 ci_low < 1 < ci_high or np.isclose(value, 1.0, atol=self.atol)
-                for ci_low, ci_high, value in zip(res.ci_low(), res.ci_high(), res.test_value.value)
+                for ci_low, ci_high, value in zip(
+                    res.effect_estimate.ci_low, res.effect_estimate.ci_high, res.effect_estimate.value
+                )
+            )
+        if res.effect_estimate.type in ("coefficient", "ate"):
+            value = (
+                res.effect_estimate.value
+                if isinstance(res.effect_estimate.ci_high, Iterable)
+                else [res.effect_estimate.value]
             )
-        if res.test_value.type in ("coefficient", "ate"):
-            value = res.test_value.value if isinstance(res.ci_high(), Iterable) else [res.test_value.value]
+            for ci_low, ci_high, v in zip(res.effect_estimate.ci_low, res.effect_estimate.ci_high, value):
+                print(not ((ci_low < 0 < ci_high) or abs(v) < self.atol))
             return (
                 sum(
                     not ((ci_low < 0 < ci_high) or abs(v) < self.atol)
-                    for ci_low, ci_high, v in zip(res.ci_low(), res.ci_high(), value)
+                    for ci_low, ci_high, v in zip(res.effect_estimate.ci_low, res.effect_estimate.ci_high, value)
                 )
                 / len(value)
                 < self.ctol
             )
 
-        raise ValueError(f"Test Value type {res.test_value.type} is not valid for this CausalEffect")
+        raise ValueError(f"Test Value type {res.effect_estimate.type} is not valid for this CausalEffect")
 
 
 class ExactValue(CausalEffect):
@@ -97,11 +107,11 @@ def __init__(self, value: float, atol: float = None, ci_low: float = None, ci_hi
                 )
 
     def apply(self, res: CausalTestResult) -> bool:
-        close = np.isclose(res.test_value.value, self.value, atol=self.atol)
-        if res.ci_valid() and self.ci_low is not None and self.ci_high is not None:
+        close = np.isclose(res.effect_estimate.value, self.value, atol=self.atol)
+        if res.effect_estimate.ci_valid and self.ci_low is not None and self.ci_high is not None:
             return all(
                 close and self.ci_low <= ci_low and self.ci_high >= ci_high
-                for ci_low, ci_high in zip(res.ci_low(), res.ci_high())
+                for ci_low, ci_high in zip(res.effect_estimate.ci_low, res.effect_estimate.ci_high)
             )
         return close
 
@@ -114,13 +124,13 @@ class Positive(SomeEffect):
     Currently only single values are supported for the test value"""
 
     def apply(self, res: CausalTestResult) -> bool:
-        if len(res.test_value.value) > 1:
+        if len(res.effect_estimate.value) > 1:
             raise ValueError("Positive Effects are currently only supported on single float datatypes")
-        if res.test_value.type in {"ate", "coefficient"}:
-            return bool(res.test_value.value[0] > 0)
-        if res.test_value.type == "risk_ratio":
-            return bool(res.test_value.value[0] > 1)
-        raise ValueError(f"Test Value type {res.test_value.type} is not valid for this CausalEffect")
+        if res.effect_estimate.type in {"ate", "coefficient"}:
+            return bool(res.effect_estimate.value[0] > 0)
+        if res.effect_estimate.type == "risk_ratio":
+            return bool(res.effect_estimate.value[0] > 1)
+        raise ValueError(f"Test Value type {res.effect_estimate.type} is not valid for this CausalEffect")
 
 
 class Negative(SomeEffect):
@@ -128,11 +138,11 @@ class Negative(SomeEffect):
     Currently only single values are supported for the test value"""
 
     def apply(self, res: CausalTestResult) -> bool:
-        if len(res.test_value.value) > 1:
+        if len(res.effect_estimate.value) > 1:
             raise ValueError("Negative Effects are currently only supported on single float datatypes")
-        if res.test_value.type in {"ate", "coefficient"}:
-            return bool(res.test_value.value[0] < 0)
-        if res.test_value.type == "risk_ratio":
-            return bool(res.test_value.value[0] < 1)
+        if res.effect_estimate.type in {"ate", "coefficient"}:
+            return bool(res.effect_estimate.value[0] < 0)
+        if res.effect_estimate.type == "risk_ratio":
+            return bool(res.effect_estimate.value[0] < 1)
         # Dead code but necessary for pylint
-        raise ValueError(f"Test Value type {res.test_value.type} is not valid for this CausalEffect")
+        raise ValueError(f"Test Value type {res.effect_estimate.type} is not valid for this CausalEffect")
diff --git a/causal_testing/testing/causal_test_adequacy.py b/causal_testing/testing/causal_test_adequacy.py
index 8a12b4e2..fcbc8f00 100644
--- a/causal_testing/testing/causal_test_adequacy.py
+++ b/causal_testing/testing/causal_test_adequacy.py
@@ -115,24 +115,10 @@ def measure_adequacy(self):
                 logger.warning(f"Adequacy ValueError: {e}")
                 continue
         outcomes = [self.test_case.expected_causal_effect.apply(c) for c in results]
-        results = pd.DataFrame(c.to_dict() for c in results)[["effect_estimate", "ci_low", "ci_high"]]
-
-        def convert_to_df(field):
-            converted = []
-            for r in results[field]:
-                if isinstance(r, float):
-                    converted.append(
-                        pd.DataFrame({self.test_case.base_test_case.treatment_variable.name: [r]}).transpose()
-                    )
-                else:
-                    converted.append(r)
-            return converted
-
-        for field in ["effect_estimate", "ci_low", "ci_high"]:
-            results[field] = convert_to_df(field)
-
-        effect_estimate = pd.concat(results["effect_estimate"].tolist(), axis=1).transpose().reset_index(drop=True)
-        self.kurtosis = effect_estimate.kurtosis()
+        results = pd.concat([c.effect_estimate.to_df() for c in results])
+        results["var"] = results.index
+
+        self.kurtosis = results.groupby("var").apply(lambda x: x.kurtosis())["effect_estimate"]
         self.outcomes = sum(filter(lambda x: x is not None, outcomes))
         self.successful = sum(x is not None for x in outcomes)
 
diff --git a/causal_testing/testing/causal_test_case.py b/causal_testing/testing/causal_test_case.py
index c4969ec4..08895b76 100644
--- a/causal_testing/testing/causal_test_case.py
+++ b/causal_testing/testing/causal_test_case.py
@@ -1,13 +1,11 @@
 """This module contains the CausalTestCase class, a class that holds the information required for a causal test"""
 
 import logging
-from typing import Any
 
-from causal_testing.specification.variable import Variable
 from causal_testing.testing.causal_effect import CausalEffect
 from causal_testing.testing.base_test_case import BaseTestCase
 from causal_testing.estimation.abstract_estimator import Estimator
-from causal_testing.testing.causal_test_result import CausalTestResult, TestValue
+from causal_testing.testing.causal_test_result import CausalTestResult
 
 
 logger = logging.getLogger(__name__)
@@ -29,14 +27,12 @@ def __init__(
         expected_causal_effect: CausalEffect,
         estimate_type: str = "ate",
         estimate_params: dict = None,
-        effect_modifier_configuration: dict[Variable:Any] = None,
         estimator: type(Estimator) = None,
     ):
         """
         :param base_test_case: A BaseTestCase object consisting of a treatment variable, outcome variable and effect
         :param expected_causal_effect: The expected causal effect (Positive, Negative, No Effect).
         :param estimate_type: A string which denotes the type of estimate to return.
-        :param effect_modifier_configuration: The assignment of the effect modifiers to use for estimates.
         :param estimator: An Estimator class object
         """
         self.base_test_case = base_test_case
@@ -53,11 +49,6 @@ def __init__(
 
         self.effect = base_test_case.effect
 
-        if effect_modifier_configuration:
-            self.effect_modifier_configuration = effect_modifier_configuration
-        else:
-            self.effect_modifier_configuration = {}
-
     def execute_test(self, estimator: type(Estimator) = None) -> CausalTestResult:
         """
         Execute a causal test case and return the causal test result.
@@ -74,12 +65,10 @@ def execute_test(self, estimator: type(Estimator) = None) -> CausalTestResult:
         if not hasattr(estimator, f"estimate_{self.estimate_type}"):
             raise AttributeError(f"{estimator.__class__} has no {self.estimate_type} method.")
         estimate_effect = getattr(estimator, f"estimate_{self.estimate_type}")
-        effect, confidence_intervals = estimate_effect(**self.estimate_params)
+        effect_estimate = estimate_effect(**self.estimate_params)
         return CausalTestResult(
             estimator=estimator,
-            test_value=TestValue(self.estimate_type, effect),
-            effect_modifier_configuration=self.effect_modifier_configuration,
-            confidence_intervals=confidence_intervals,
+            effect_estimate=effect_estimate,
         )
 
     def __str__(self):
diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
index b662d5f4..8ba7afff 100644
--- a/causal_testing/testing/causal_test_result.py
+++ b/causal_testing/testing/causal_test_result.py
@@ -1,21 +1,7 @@
-"""This module contains the CausalTestResult class, which is a container for the results of a causal test, and the
-TestValue dataclass.
-"""
-
-from typing import Any
-from dataclasses import dataclass
-import pandas as pd
+"""This module contains the CausalTestResult class, which is a container for the results of a causal test."""
 
 from causal_testing.estimation.abstract_estimator import Estimator
-from causal_testing.specification.variable import Variable
-
-
-@dataclass
-class TestValue:
-    """A dataclass to hold both the type and value of a causal test result"""
-
-    type: str
-    value: float
+from causal_testing.estimation.effect_estimate import EffectEstimate
 
 
 class CausalTestResult:
@@ -24,13 +10,11 @@ class CausalTestResult:
     confidence intervals."""
 
     def __init__(
-        # pylint: disable=too-many-arguments
         self,
         estimator: Estimator,
-        test_value: TestValue,
-        confidence_intervals: [pd.Series, pd.Series] = None,
-        effect_modifier_configuration: {Variable: Any} = None,
+        effect_estimate: EffectEstimate,
         adequacy=None,
+        error_message: str = None,
     ):
         self.estimator = estimator
         self.adequacy = adequacy
@@ -38,25 +22,12 @@ def __init__(
             self.adjustment_set = estimator.adjustment_set
         else:
             self.adjustment_set = set()
-        self.test_value = test_value
-        self.confidence_intervals = confidence_intervals
-
-        if effect_modifier_configuration is not None:
-            self.effect_modifier_configuration = effect_modifier_configuration
-        else:
-            self.effect_modifier_configuration = {}
+        self.effect_estimate = effect_estimate
+        self.error_message = error_message
 
     def __str__(self):
-        def push(s, inc="  "):
-            return inc + str(s).replace("\n", "\n" + inc)
-
-        result_str = str(self.test_value.value)
-        if "\n" in result_str:
-            result_str = "\n" + push(self.test_value.value)
-        if isinstance(self.estimator.base_test_case.treatment_variable, list):
-            treatment = [x.name for x in self.estimator.base_test_case.treatment_variable]
-        else:
-            treatment = self.estimator.base_test_case.treatment_variable.name
+        result_str = str(self.effect_estimate.value.to_dict())
+        treatment = self.estimator.base_test_case.treatment_variable.name
         base_str = (
             f"Causal Test Result\n==============\n"
             f"Treatment: {treatment}\n"
@@ -67,12 +38,11 @@ def push(s, inc="  "):
         )
         if hasattr(self.estimator, "formula"):
             base_str += f"Formula: {self.estimator.formula}\n"
-        base_str += f"{self.test_value.type}: {result_str}\n"
+        base_str += f"{self.effect_estimate.type}: {result_str}\n"
         confidence_str = ""
-        if self.confidence_intervals:
-            ci_str = " " + str(self.confidence_intervals)
-            if "\n" in ci_str:
-                ci_str = " " + push(pd.DataFrame(self.confidence_intervals).transpose().to_string(header=False))
+        if self.effect_estimate.ci_valid():
+            ci_str = f"CI low: {self.effect_estimate.ci_low.to_dict}\n"
+            ci_str += f"CI high: {self.effect_estimate.ci_high.to_dict}\n"
             confidence_str += f"Confidence intervals:{ci_str}\n"
             confidence_str += f"Alpha:{self.estimator.alpha}\n"
         adequacy_str = ""
@@ -94,44 +64,8 @@ def to_dict(self, json=False):
             "treatment_value": self.estimator.treatment_value,
             "outcome": self.estimator.base_test_case.outcome_variable.name,
             "adjustment_set": list(self.adjustment_set) if json else self.adjustment_set,
-            "effect_measure": self.test_value.type,
-            "effect_estimate": (
-                self.test_value.value.to_dict()
-                if json and hasattr(self.test_value.value, "to_dict")
-                else self.test_value.value
-            ),
-            "ci_low": self.ci_low().to_dict() if json and hasattr(self.ci_low(), "to_dict") else self.ci_low(),
-            "ci_high": self.ci_high().to_dict() if json and hasattr(self.ci_high(), "to_dict") else self.ci_high(),
-        }
+            "effect_measure": self.effect_estimate.type,
+        } | self.effect_estimate.to_dict()
         if self.adequacy:
             base_dict["adequacy"] = self.adequacy.to_dict()
         return base_dict
-
-    def ci_low(self):
-        """Return the lower bracket of the confidence intervals."""
-        if self.confidence_intervals:
-            if isinstance(self.confidence_intervals[0], pd.Series):
-                return self.confidence_intervals[0].to_list()
-            return self.confidence_intervals[0]
-        return None
-
-    def ci_high(self):
-        """Return the higher bracket of the confidence intervals."""
-        if self.confidence_intervals:
-            if isinstance(self.confidence_intervals[1], pd.Series):
-                return self.confidence_intervals[1].to_list()
-            return self.confidence_intervals[1]
-        return None
-
-    def ci_valid(self) -> bool:
-        """Return whether or not the result has valid confidence invervals"""
-        return self.ci_low() and (not pd.isnull(self.ci_low())) and self.ci_high() and (not pd.isnull(self.ci_high()))
-
-    def summary(self):
-        """Summarise the causal test result as an intuitive sentence."""
-        treatment_variable = self.estimator.base_test_case.treatment_variable
-        print(
-            f"The causal effect of changing {treatment_variable.name} = {self.estimator.control_value} to "
-            f"{treatment_variable.name}' = {self.estimator.treatment_value} is {self.test_value.value}"
-            f"(95% confidence intervals: {self.confidence_intervals})."
-        )
diff --git a/examples/covasim_/doubling_beta/example_beta.py b/examples/covasim_/doubling_beta/example_beta.py
index e1a7be33..23842930 100644
--- a/examples/covasim_/doubling_beta/example_beta.py
+++ b/examples/covasim_/doubling_beta/example_beta.py
@@ -78,13 +78,13 @@ def doubling_beta_CATE_on_csv(
 
     # Store results for plotting
     results_dict["association"] = {
-        "ate": association_test_result.test_value.value,
-        "cis": association_test_result.confidence_intervals,
+        "ate": association_test_result.effect_estimate.value,
+        "cis": [association_test_result.effect_estimate.ci_low, association_test_result.effect_estimate.ci_high],
         "df": past_execution_df,
     }
     results_dict["causation"] = {
-        "ate": causal_test_result.test_value.value,
-        "cis": causal_test_result.confidence_intervals,
+        "ate": causal_test_result.effect_estimate.value,
+        "cis": [causal_test_result.effect_estimate.ci_low, causal_test_result.effect_estimate.ci_high],
         "df": past_execution_df,
     }
 
@@ -98,8 +98,11 @@ def doubling_beta_CATE_on_csv(
         counterfactual_causal_test_result = causal_test_case.execute_test()
 
         results_dict["counterfactual"] = {
-            "ate": counterfactual_causal_test_result.test_value.value,
-            "cis": counterfactual_causal_test_result.confidence_intervals,
+            "ate": counterfactual_causal_test_result.effect_estimate.value,
+            "cis": [
+                counterfactual_causal_test_result.effect_estimate.ci_low,
+                counterfactual_causal_test_result.effect_estimate.ci_high,
+            ],
             "df": counterfactual_past_execution_df,
         }
         if verbose:
diff --git a/examples/covasim_/vaccinating_elderly/example_vaccine.py b/examples/covasim_/vaccinating_elderly/example_vaccine.py
index 9d8dbbbb..69b679c4 100644
--- a/examples/covasim_/vaccinating_elderly/example_vaccine.py
+++ b/examples/covasim_/vaccinating_elderly/example_vaccine.py
@@ -69,7 +69,8 @@ def setup_test_case(verbose: bool = False):
     for outcome_variable, expected_effect in expected_outcome_effects.items():
         base_test_case = BaseTestCase(treatment_variable=vaccine, outcome_variable=outcome_variable)
         causal_test_case = CausalTestCase(
-            base_test_case=base_test_case, expected_causal_effect=expected_effect,
+            base_test_case=base_test_case,
+            expected_causal_effect=expected_effect,
         )
         # 7. Obtain the minimal adjustment set for the causal test case from the causal DAG
         minimal_adjustment_set = causal_dag.identification(base_test_case)
@@ -89,9 +90,12 @@ def setup_test_case(verbose: bool = False):
         if verbose:
             logging.info("Causation:\n%s", causal_test_result)
 
-        results_dict[outcome_variable.name]["ate"] = causal_test_result.test_value.value
+        results_dict[outcome_variable.name]["ate"] = causal_test_result.effect_estimate.value
 
-        results_dict[outcome_variable.name]["cis"] = causal_test_result.confidence_intervals
+        results_dict[outcome_variable.name]["cis"] = [
+            causal_test_result.effect_estimate.ci_low,
+            causal_test_result.effect_estimate.ci_high,
+        ]
 
         results_dict[outcome_variable.name]["test_passes"] = causal_test_case.expected_causal_effect.apply(
             causal_test_result
diff --git a/examples/lr91/example_max_conductances.py b/examples/lr91/example_max_conductances.py
index 50ca0728..ed381271 100644
--- a/examples/lr91/example_max_conductances.py
+++ b/examples/lr91/example_max_conductances.py
@@ -140,7 +140,10 @@ def effects_on_APD90(observational_data_path, treatment_var, control_val, treatm
     # 9. Run the causal test and print results
     causal_test_result = causal_test_case.execute_test()
     logger.info("%s", causal_test_result)
-    return causal_test_result.test_value.value, causal_test_result.confidence_intervals
+    return causal_test_result.effect_estimate.value, (
+        causal_test_result.effect_estimate.ci_low,
+        causal_test_result.effect_estimate.ci_high,
+    )
 
 
 def plot_ates_with_cis(results_dict: dict, xs: list, save: bool = False, show: bool = False):
diff --git a/examples/poisson-line-process/example_pure_python.py b/examples/poisson-line-process/example_pure_python.py
index 04f18616..fb7a7030 100644
--- a/examples/poisson-line-process/example_pure_python.py
+++ b/examples/poisson-line-process/example_pure_python.py
@@ -11,6 +11,7 @@
 from causal_testing.testing.causal_effect import ExactValue, Positive
 from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
 from causal_testing.estimation.abstract_estimator import Estimator
+from causal_testing.estimation.effect_estimate import EffectEstimate
 from causal_testing.testing.base_test_case import BaseTestCase
 
 
@@ -26,7 +27,7 @@ def add_modelling_assumptions(self):
         """
         self.modelling_assumptions += "The data must contain runs with the exact configuration of interest."
 
-    def estimate_ate(self) -> float:
+    def estimate_ate(self) -> EffectEstimate:
         """Estimate the outcomes under control and treatment.
         :return: The empirical average treatment effect.
         """
@@ -36,7 +37,7 @@ def estimate_ate(self) -> float:
         treatment_results = self.df.where(self.df[self.base_test_case.treatment_variable.name] == self.treatment_value)[
             self.base_test_case.outcome_variable.name
         ].dropna()
-        return treatment_results.mean() - control_results.mean(), None
+        return EffectEstimate("ate", treatment_results.mean() - control_results.mean())
 
     def estimate_risk_ratio(self) -> float:
         """Estimate the outcomes under control and treatment.
@@ -48,7 +49,7 @@ def estimate_risk_ratio(self) -> float:
         treatment_results = self.df.where(self.df[self.base_test_case.treatment_variable.name] == self.treatment_value)[
             self.base_test_case.outcome_variable.name
         ].dropna()
-        return treatment_results.mean() / control_results.mean(), None
+        return EffectEstimate("risk_ratio", treatment_results.mean() / control_results.mean())
 
 
 # 1. Read in the Causal DAG
@@ -134,8 +135,8 @@ def test_poisson_intensity_num_shapes(save=False):
             "height": obs_causal_test_result.estimator.treatment_value,
             "control": obs_causal_test_result.estimator.control_value,
             "treatment": obs_causal_test_result.estimator.treatment_value,
-            "smt_risk_ratio": smt_causal_test_result.test_value.value,
-            "obs_risk_ratio": obs_causal_test_result.test_value.value[0],
+            "smt_risk_ratio": smt_causal_test_result.effect_estimate.value,
+            "obs_risk_ratio": obs_causal_test_result.effect_estimate.value[0],
         }
         for smt_causal_test_result, obs_causal_test_result in test_results
     ]
@@ -153,7 +154,6 @@ def test_poisson_width_num_shapes(save=False):
             base_test_case=base_test_case,
             expected_causal_effect=Positive(),
             estimate_type="ate_calculated",
-            effect_modifier_configuration={"intensity": i},
             estimator=LinearRegressionEstimator(
                 base_test_case=base_test_case,
                 treatment_value=w + 1.0,
@@ -173,10 +173,10 @@ def test_poisson_width_num_shapes(save=False):
         {
             "control": causal_test_result.estimator.control_value,
             "treatment": causal_test_result.estimator.treatment_value,
-            "intensity": causal_test_result.effect_modifier_configuration["intensity"],
-            "ate": causal_test_result.test_value.value[0],
-            "ci_low": causal_test_result.confidence_intervals[0][0],
-            "ci_high": causal_test_result.confidence_intervals[1][0],
+            "intensity": causal_test_result.estimator.effect_modifiers["intensity"],
+            "ate": causal_test_result.effect_estimate.value[0],
+            "ci_low": causal_test_result.effect_estimate.ci_low,
+            "ci_high": causal_test_result.effect_estimate.ci_high,
         }
         for causal_test_result in test_results
     ]
diff --git a/tests/estimation_tests/test_cubic_spline_estimator.py b/tests/estimation_tests/test_cubic_spline_estimator.py
index 38b230d7..0298a893 100644
--- a/tests/estimation_tests/test_cubic_spline_estimator.py
+++ b/tests/estimation_tests/test_cubic_spline_estimator.py
@@ -24,10 +24,10 @@ def test_program_11_3_cublic_spline(self):
 
         cublic_spline_estimator = CubicSplineRegressionEstimator(base_test_case, 1, 0, set(), 3, df)
 
-        ate_1 = cublic_spline_estimator.estimate_ate_calculated()
+        ate_1 = cublic_spline_estimator.estimate_ate_calculated().value
 
         cublic_spline_estimator.treatment_value = 2
-        ate_2 = cublic_spline_estimator.estimate_ate_calculated()
+        ate_2 = cublic_spline_estimator.estimate_ate_calculated().value
 
         # Doubling the treatemebnt value should roughly but not exactly double the ATE
         self.assertNotEqual(ate_1[0] * 2, ate_2[0])
diff --git a/tests/estimation_tests/test_experimental_estimator.py b/tests/estimation_tests/test_experimental_estimator.py
index cabf13a4..9cfa7f86 100644
--- a/tests/estimation_tests/test_experimental_estimator.py
+++ b/tests/estimation_tests/test_experimental_estimator.py
@@ -42,10 +42,13 @@ def test_estimate_ate(self):
             alpha=0.05,
             repeats=200,
         )
-        ate, [ci_low, ci_high] = estimator.estimate_ate()
-        self.assertEqual(ate["X"], 2)
-        self.assertEqual(ci_low["X"], 2)
-        self.assertEqual(ci_high["X"], 2)
+        effect_estimate = estimator.estimate_ate()
+        print(effect_estimate.value)
+        print(effect_estimate.ci_low)
+        print(effect_estimate.ci_high)
+        self.assertEqual(effect_estimate.value["X"], 2)
+        self.assertEqual(effect_estimate.ci_low["X"], 2)
+        self.assertEqual(effect_estimate.ci_high["X"], 2)
 
     def test_estimate_risk_ratio(self):
         estimator = ConcreteExperimentalEstimator(
@@ -57,7 +60,7 @@ def test_estimate_risk_ratio(self):
             alpha=0.05,
             repeats=200,
         )
-        rr, [ci_low, ci_high] = estimator.estimate_risk_ratio()
-        self.assertEqual(rr["X"], 2)
-        self.assertEqual(ci_low["X"], 2)
-        self.assertEqual(ci_high["X"], 2)
+        effect_estimate = estimator.estimate_risk_ratio()
+        self.assertEqual(effect_estimate.value["X"], 2)
+        self.assertEqual(effect_estimate.ci_low["X"], 2)
+        self.assertEqual(effect_estimate.ci_high["X"], 2)
diff --git a/tests/estimation_tests/test_instrumental_variable_estimator.py b/tests/estimation_tests/test_instrumental_variable_estimator.py
index c22819d7..0c831775 100644
--- a/tests/estimation_tests/test_instrumental_variable_estimator.py
+++ b/tests/estimation_tests/test_instrumental_variable_estimator.py
@@ -31,7 +31,7 @@ def test_estimate_coefficient(self):
             adjustment_set=set(),
             instrument="Z",
         )
-        coefficient, [low, high] = iv_estimator.estimate_coefficient()
-        self.assertEqual(coefficient[0], 2)
-        self.assertEqual(low[0], 2)
-        self.assertEqual(high[0], 2)
+        effect_estimate = iv_estimator.estimate_coefficient()
+        self.assertEqual(effect_estimate.value[0], 2)
+        self.assertEqual(effect_estimate.ci_low[0], 2)
+        self.assertEqual(effect_estimate.ci_high[0], 2)
diff --git a/tests/estimation_tests/test_ipcw_estimator.py b/tests/estimation_tests/test_ipcw_estimator.py
index a1f5ff06..9bdddb14 100644
--- a/tests/estimation_tests/test_ipcw_estimator.py
+++ b/tests/estimation_tests/test_ipcw_estimator.py
@@ -32,8 +32,8 @@ def test_estimate_hazard_ratio(self):
             fit_bltd_switch_formula=self.fit_bl_switch_formula,
             eligibility=None,
         )
-        estimate, _ = estimation_model.estimate_hazard_ratio()
-        self.assertEqual(round(estimate["trtrand"], 3), 1.351)
+        estimate = estimation_model.estimate_hazard_ratio()
+        self.assertEqual(round(estimate.value["trtrand"], 3), 1.351)
 
     def test_invalid_treatment_strategies(self):
         with self.assertRaises(ValueError):
diff --git a/tests/estimation_tests/test_linear_regression_estimator.py b/tests/estimation_tests/test_linear_regression_estimator.py
index a1af4c9a..0cfe8a1a 100644
--- a/tests/estimation_tests/test_linear_regression_estimator.py
+++ b/tests/estimation_tests/test_linear_regression_estimator.py
@@ -75,17 +75,24 @@ def test_linear_regression_categorical_ate(self):
         df = self.scarf_df.copy()
         base_test_case = BaseTestCase(Input("color", float), Output("completed", float))
         logistic_regression_estimator = LinearRegressionEstimator(base_test_case, None, None, set(), df)
-        _, confidence = logistic_regression_estimator.estimate_coefficient()
-        self.assertTrue(all([ci_low < 0 < ci_high for ci_low, ci_high in zip(confidence[0], confidence[1])]))
+        effect_estimate = logistic_regression_estimator.estimate_coefficient()
+        self.assertTrue(
+            all([ci_low < 0 < ci_high for ci_low, ci_high in zip(effect_estimate.ci_low, effect_estimate.ci_high)])
+        )
 
     def test_program_11_2(self):
         """Test whether our linear regression implementation produces the same results as program 11.2 (p. 141)."""
         df = self.chapter_11_df
         linear_regression_estimator = LinearRegressionEstimator(self.base_test_case, None, None, set(), df)
-        ate, _ = linear_regression_estimator.estimate_coefficient()
+        effect_estimate = linear_regression_estimator.estimate_coefficient()
 
         # Increasing treatments from 90 to 100 should be the same as 10 times the unit ATE
-        self.assertTrue(all(round(ate["treatments"], 1) == round(ate_single, 1) for ate_single in ate))
+        self.assertTrue(
+            all(
+                round(effect_estimate.value["treatments"], 1) == round(ate_single, 1)
+                for ate_single in effect_estimate.value
+            )
+        )
 
     def test_program_11_3(self):
         """Test whether our linear regression implementation produces the same results as program 11.3 (p. 144)."""
@@ -93,9 +100,14 @@ def test_program_11_3(self):
         linear_regression_estimator = LinearRegressionEstimator(
             self.base_test_case, None, None, set(), df, formula="outcomes ~ treatments + I(treatments ** 2)"
         )
-        ate, _ = linear_regression_estimator.estimate_coefficient()
+        effect_estimate = linear_regression_estimator.estimate_coefficient()
         # Increasing treatments from 90 to 100 should be the same as 10 times the unit ATE
-        self.assertTrue(all(round(ate["treatments"], 3) == round(ate_single, 3) for ate_single in ate))
+        self.assertTrue(
+            all(
+                round(effect_estimate.value["treatments"], 3) == round(ate_single, 3)
+                for ate_single in effect_estimate.value
+            )
+        )
 
     def test_program_15_1A(self):
         """Test whether our linear regression implementation produces the same results as program 15.1 (p. 163, 184)."""
@@ -131,8 +143,8 @@ def test_program_15_1A(self):
                              (qsmk * smokeintensity)""",
         )
 
-        coefficient, _ = linear_regression_estimator.estimate_coefficient()
-        self.assertEqual(round(coefficient["qsmk"], 1), 2.6)
+        effect_estimate = linear_regression_estimator.estimate_coefficient()
+        self.assertEqual(round(effect_estimate.value["qsmk"], 1), 2.6)
 
     def test_program_15_no_interaction(self):
         """Test whether our linear regression implementation produces the same results as program 15.1 (p. 163, 184)
@@ -164,10 +176,10 @@ def test_program_15_no_interaction(self):
         )
         # terms_to_square = ["age", "wt71", "smokeintensity", "smokeyrs"]
         # for term_to_square in terms_to_square:
-        ate, [ci_low, ci_high] = linear_regression_estimator.estimate_coefficient()
+        effect_estimate = linear_regression_estimator.estimate_coefficient()
 
-        self.assertEqual(round(ate[0], 1), 3.5)
-        self.assertEqual([round(ci_low[0], 1), round(ci_high[0], 1)], [2.6, 4.3])
+        self.assertEqual(round(effect_estimate.value[0], 1), 3.5)
+        self.assertEqual([round(effect_estimate.ci_low[0], 1), round(effect_estimate.ci_high[0], 1)], [2.6, 4.3])
 
     def test_program_15_no_interaction_ate(self):
         """Test whether our linear regression implementation produces the same results as program 15.1 (p. 163, 184)
@@ -199,9 +211,9 @@ def test_program_15_no_interaction_ate(self):
         )
         # terms_to_square = ["age", "wt71", "smokeintensity", "smokeyrs"]
         # for term_to_square in terms_to_square:
-        ate, [ci_low, ci_high] = linear_regression_estimator.estimate_ate()
-        self.assertEqual(round(ate[0], 1), 3.5)
-        self.assertEqual([round(ci_low[0], 1), round(ci_high[0], 1)], [2.6, 4.3])
+        effect_estimate = linear_regression_estimator.estimate_ate()
+        self.assertEqual(round(effect_estimate.value[0], 1), 3.5)
+        self.assertEqual([round(effect_estimate.ci_low[0], 1), round(effect_estimate.ci_high[0], 1)], [2.6, 4.3])
 
     def test_program_15_no_interaction_ate_calculated(self):
         """Test whether our linear regression implementation produces the same results as program 15.1 (p. 163, 184)
@@ -234,11 +246,11 @@ def test_program_15_no_interaction_ate_calculated(self):
         # terms_to_square = ["age", "wt71", "smokeintensity", "smokeyrs"]
         # for term_to_square in terms_to_square:
 
-        ate, [ci_low, ci_high] = linear_regression_estimator.estimate_ate_calculated(
+        effect_estimate = linear_regression_estimator.estimate_ate_calculated(
             adjustment_config={k: self.nhefs_df.mean()[k] for k in covariates}
         )
-        self.assertEqual(round(ate[0], 1), 3.5)
-        self.assertEqual([round(ci_low[0], 1), round(ci_high[0], 1)], [1.9, 5])
+        self.assertEqual(round(effect_estimate.value[0], 1), 3.5)
+        self.assertEqual([round(effect_estimate.ci_low[0], 1), round(effect_estimate.ci_high[0], 1)], [1.9, 5])
 
     def test_program_11_2_with_robustness_validation(self):
         """Test whether our linear regression estimator, as used in test_program_11_2 can correctly estimate robustness."""
@@ -258,10 +270,10 @@ def test_gp(self):
         linear_regression_estimator = LinearRegressionEstimator(base_test_case, 0, 1, set(), df.astype(float))
         linear_regression_estimator.gp_formula(seeds=["reciprocal(add(X, 1))"])
         self.assertEqual(linear_regression_estimator.formula, "Y ~ I(1/(X + 1)) - 1")
-        ate, (ci_low, ci_high) = linear_regression_estimator.estimate_ate_calculated()
-        self.assertEqual(round(ate[0], 2), 0.50)
-        self.assertEqual(round(ci_low[0], 2), 0.50)
-        self.assertEqual(round(ci_high[0], 2), 0.50)
+        effect_estimate = linear_regression_estimator.estimate_ate_calculated()
+        self.assertEqual(round(effect_estimate.value[0], 2), 0.50)
+        self.assertEqual(round(effect_estimate.ci_low[0], 2), 0.50)
+        self.assertEqual(round(effect_estimate.ci_high[0], 2), 0.50)
 
     def test_gp_power(self):
         df = pd.DataFrame()
@@ -274,10 +286,10 @@ def test_gp_power(self):
             linear_regression_estimator.formula,
             "Y ~ I(2*X**2) - 1",
         )
-        ate, (ci_low, ci_high) = linear_regression_estimator.estimate_ate_calculated()
-        self.assertEqual(round(ate[0], 2), -2.00)
-        self.assertEqual(round(ci_low[0], 2), -2.00)
-        self.assertEqual(round(ci_high[0], 2), -2.00)
+        effect_estimate = linear_regression_estimator.estimate_ate_calculated()
+        self.assertEqual(round(effect_estimate.value[0], 2), -2.00)
+        self.assertEqual(round(effect_estimate.ci_low[0], 2), -2.00)
+        self.assertEqual(round(effect_estimate.ci_high[0], 2), -2.00)
 
 
 class TestLinearRegressionInteraction(unittest.TestCase):
@@ -297,9 +309,8 @@ def test_X1_effect(self):
         lr_model = LinearRegressionEstimator(
             base_test_case, 1, 0, {"X2"}, effect_modifiers={"x2": 0}, formula="Y ~ X1 + X2 + (X1 * X2)", df=self.df
         )
-        test_results = lr_model.estimate_ate()
-        ate = test_results[0][0]
-        self.assertAlmostEqual(ate, 2.0)
+        effect_estimate = lr_model.estimate_ate()
+        self.assertAlmostEqual(effect_estimate.value[0], 2.0)
 
     def test_categorical_confidence_intervals(self):
         base_test_case = BaseTestCase(Input("color", float), Output("length_in", float))
@@ -310,9 +321,15 @@ def test_categorical_confidence_intervals(self):
             adjustment_set={},
             df=self.scarf_df,
         )
-        coefficients, [ci_low, ci_high] = lr_model.estimate_coefficient()
+        effect_estimate = lr_model.estimate_coefficient()
 
         # The precise values don't really matter. This test is primarily intended to make sure the return type is correct.
-        self.assertTrue(coefficients.round(2).equals(pd.Series({"color[T.grey]": 0.92, "color[T.orange]": -4.25})))
-        self.assertTrue(ci_low.round(2).equals(pd.Series({"color[T.grey]": -22.12, "color[T.orange]": -25.58})))
-        self.assertTrue(ci_high.round(2).equals(pd.Series({"color[T.grey]": 23.95, "color[T.orange]": 17.08})))
+        self.assertTrue(
+            effect_estimate.value.round(2).equals(pd.Series({"color[T.grey]": 0.92, "color[T.orange]": -4.25}))
+        )
+        self.assertTrue(
+            effect_estimate.ci_low.round(2).equals(pd.Series({"color[T.grey]": -22.12, "color[T.orange]": -25.58}))
+        )
+        self.assertTrue(
+            effect_estimate.ci_high.round(2).equals(pd.Series({"color[T.grey]": 23.95, "color[T.orange]": 17.08}))
+        )
diff --git a/tests/estimation_tests/test_logistic_regression_estimator.py b/tests/estimation_tests/test_logistic_regression_estimator.py
index 35ec5367..17e2cd42 100644
--- a/tests/estimation_tests/test_logistic_regression_estimator.py
+++ b/tests/estimation_tests/test_logistic_regression_estimator.py
@@ -19,5 +19,5 @@ def test_odds_ratio(self):
         logistic_regression_estimator = LogisticRegressionEstimator(
             BaseTestCase(Input("length_in", float), Output("completed", bool)), 65, 55, set(), df
         )
-        odds, _ = logistic_regression_estimator.estimate_unit_odds_ratio()
-        self.assertEqual(round(odds[0], 4), 0.8948)
+        effect_estimate = logistic_regression_estimator.estimate_unit_odds_ratio()
+        self.assertEqual(round(effect_estimate.value[0], 4), 0.8948)
diff --git a/tests/main_tests/test_main.py b/tests/main_tests/test_main.py
index 3e93f88c..7f9e22a2 100644
--- a/tests/main_tests/test_main.py
+++ b/tests/main_tests/test_main.py
@@ -2,11 +2,12 @@
 from pathlib import Path
 import tempfile
 import os
+from unittest.mock import patch
+
 
 import shutil
 import json
 import pandas as pd
-from unittest.mock import patch
 
 from causal_testing.main import CausalTestingPaths, CausalTestingFramework
 from causal_testing.__main__ import main
@@ -135,6 +136,8 @@ def test_ctf(self):
         framework.load_tests()
         results = framework.run_tests()
 
+        print(results)
+
         # Save results
         framework.save_results(results)
 
@@ -142,7 +145,7 @@ def test_ctf(self):
             test_configs = json.load(f)
 
         tests_passed = [
-            test_case.expected_causal_effect.apply(result) if result.test_value.type != "Error" else False
+            test_case.expected_causal_effect.apply(result) if result.effect_estimate is not None else False
             for test_config, test_case, result in zip(test_configs["tests"], framework.test_cases, results)
         ]
 
@@ -171,6 +174,15 @@ def test_ctf_batches(self):
 
         self.assertEqual([result["passed"] for result in all_results], [True])
 
+    def test_ctf_exception(self):
+        framework = CausalTestingFramework(self.paths, query="test_input < 0")
+        framework.setup()
+
+        # Load and run tests
+        framework.load_tests()
+        with self.assertRaises(ValueError):
+            framework.run_tests()
+
     def test_ctf_batches_exception_silent(self):
         framework = CausalTestingFramework(self.paths, query="test_input < 0")
         framework.setup()
@@ -193,7 +205,7 @@ def test_ctf_batches_exception_silent(self):
                     all_results.extend(json.load(f))
 
         self.assertEqual([result["passed"] for result in all_results], [False])
-        self.assertEqual([result["result"]["effect_measure"] for result in all_results], ["Error"])
+        self.assertIsNotNone([result["result"].get("error") for result in all_results])
 
     def test_ctf_batches_exception(self):
         framework = CausalTestingFramework(self.paths, query="test_input < 0")
@@ -301,7 +313,7 @@ def test_combined_queries(self):
         self.assertTrue((causal_test.estimator.df["test_output"] > 0).all())
 
     def test_parse_args(self):
-        with unittest.mock.patch(
+        with patch(
             "sys.argv",
             [
                 "causal_testing",
@@ -320,7 +332,7 @@ def test_parse_args(self):
             self.assertTrue((self.output_path.parent / "main.json").exists())
 
     def test_parse_args_batches(self):
-        with unittest.mock.patch(
+        with patch(
             "sys.argv",
             [
                 "causal_testing",
@@ -342,7 +354,7 @@ def test_parse_args_batches(self):
 
     def test_parse_args_generation(self):
         with tempfile.TemporaryDirectory() as tmp:
-            with unittest.mock.patch(
+            with patch(
                 "sys.argv",
                 [
                     "causal_testing",
@@ -358,7 +370,7 @@ def test_parse_args_generation(self):
 
     def test_parse_args_generation_non_default(self):
         with tempfile.TemporaryDirectory() as tmp:
-            with unittest.mock.patch(
+            with patch(
                 "sys.argv",
                 [
                     "causal_testing",
diff --git a/tests/surrogate_tests/test_causal_surrogate_assisted.py b/tests/surrogate_tests/test_causal_surrogate_assisted.py
index b2824b00..e56c54f7 100644
--- a/tests/surrogate_tests/test_causal_surrogate_assisted.py
+++ b/tests/surrogate_tests/test_causal_surrogate_assisted.py
@@ -1,3 +1,7 @@
+import os
+import shutil, tempfile
+import pandas as pd
+import numpy as np
 import unittest
 from causal_testing.specification.causal_dag import CausalDAG
 from causal_testing.specification.causal_specification import CausalSpecification
@@ -11,11 +15,6 @@
 from causal_testing.surrogate.surrogate_search_algorithms import GeneticSearchAlgorithm
 from causal_testing.estimation.cubic_spline_estimator import CubicSplineRegressionEstimator
 
-import os
-import shutil, tempfile
-import pandas as pd
-import numpy as np
-
 
 class TestSimulationResult(unittest.TestCase):
 
diff --git a/tests/testing_tests/test_causal_effect.py b/tests/testing_tests/test_causal_effect.py
index 89d02ae1..29eeb61d 100644
--- a/tests/testing_tests/test_causal_effect.py
+++ b/tests/testing_tests/test_causal_effect.py
@@ -1,8 +1,9 @@
 import unittest
 import pandas as pd
 from causal_testing.testing.causal_effect import ExactValue, SomeEffect, Positive, Negative, NoEffect
-from causal_testing.testing.causal_test_result import CausalTestResult, TestValue
+from causal_testing.testing.causal_test_result import CausalTestResult
 from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
+from causal_testing.estimation.effect_estimate import EffectEstimate
 from causal_testing.utils.validation import CausalValidator
 from causal_testing.testing.base_test_case import BaseTestCase
 from causal_testing.specification.variable import Input, Output
@@ -21,16 +22,13 @@ def setUp(self) -> None:
         )
 
     def test_None_ci(self):
-        test_value = TestValue(type="ate", value=0)
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=[None, None],
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(type="ate", value=pd.Series(0)),
         )
 
-        self.assertIsNone(ctr.ci_low())
-        self.assertIsNone(ctr.ci_high())
+        self.assertIsNone(ctr.effect_estimate.ci_low)
+        self.assertIsNone(ctr.effect_estimate.ci_high)
         self.assertEqual(
             ctr.to_dict(),
             {
@@ -39,24 +37,19 @@ def test_None_ci(self):
                 "treatment_value": 1,
                 "outcome": "A",
                 "adjustment_set": set(),
-                "effect_estimate": 0,
+                "effect_estimate": {0: 0},
                 "effect_measure": "ate",
-                "ci_high": None,
-                "ci_low": None,
             },
         )
 
     def test_empty_adjustment_set(self):
-        test_value = TestValue(type="ate", value=0)
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=None,
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(type="ate", value=pd.Series(0)),
         )
 
-        self.assertIsNone(ctr.ci_low())
-        self.assertIsNone(ctr.ci_high())
+        self.assertIsNone(ctr.effect_estimate.ci_low)
+        self.assertIsNone(ctr.effect_estimate.ci_high)
         self.assertEqual(
             str(ctr),
             (
@@ -67,149 +60,116 @@ def test_empty_adjustment_set(self):
                 "Outcome: A\n"
                 "Adjustment set: set()\n"
                 "Formula: A ~ A\n"
-                "ate: 0\n"
+                "ate: {0: 0}\n"
             ),
         )
 
     def test_Positive_ate_pass(self):
-        test_value = TestValue(type="ate", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=None,
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(type="ate", value=pd.Series(5.05)),
         )
         ev = Positive()
         self.assertTrue(ev.apply(ctr))
 
     def test_Positive_risk_ratio_pass(self):
-        test_value = TestValue(type="risk_ratio", value=pd.Series(2))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=None,
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(type="risk_ratio", value=pd.Series(5.05)),
         )
         ev = Positive()
         self.assertTrue(ev.apply(ctr))
 
     def test_Positive_fail(self):
-        test_value = TestValue(type="ate", value=pd.Series(0))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=None,
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(type="ate", value=pd.Series(0)),
         )
         ev = Positive()
         self.assertFalse(ev.apply(ctr))
 
     def test_Positive_fail_ci(self):
-        test_value = TestValue(type="ate", value=pd.Series(0))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=[pd.Series(-1), pd.Series(1)],
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(type="ate", value=pd.Series(0), ci_low=pd.Series(-1), ci_high=pd.Series(1)),
         )
         ev = Positive()
         self.assertFalse(ev.apply(ctr))
 
     def test_Negative_ate_pass(self):
-        test_value = TestValue(type="ate", value=pd.Series(-5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=None,
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(type="ate", value=pd.Series(-5.05)),
         )
         ev = Negative()
         self.assertTrue(ev.apply(ctr))
 
     def test_Negative_risk_ratio_pass(self):
-        test_value = TestValue(type="risk_ratio", value=pd.Series(0.2))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=None,
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(type="risk_ratio", value=pd.Series(0.2)),
         )
         ev = Negative()
         self.assertTrue(ev.apply(ctr))
 
     def test_Negative_fail(self):
-        test_value = TestValue(type="ate", value=pd.Series(0))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=None,
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(type="ate", value=pd.Series(0)),
         )
         ev = Negative()
         self.assertFalse(ev.apply(ctr))
 
     def test_Negative_fail_ci(self):
-        test_value = TestValue(type="ate", value=pd.Series(0))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=[pd.Series(-1), pd.Series(1)],
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(type="ate", value=pd.Series(0), ci_low=pd.Series(-1), ci_high=pd.Series(1)),
         )
         ev = Negative()
         self.assertFalse(ev.apply(ctr))
 
     def test_exactValue_pass(self):
-        test_value = TestValue(type="ate", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=None,
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(type="ate", value=pd.Series(5.05)),
         )
         ev = ExactValue(5, 0.1)
         self.assertTrue(ev.apply(ctr))
 
     def test_exactValue_pass_ci(self):
-        test_value = TestValue(type="ate", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=[pd.Series(4), pd.Series(6)],
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(
+                type="ate", value=pd.Series(5.05), ci_low=pd.Series(4), ci_high=pd.Series(6)
+            ),
         )
         ev = ExactValue(5, 0.1)
         self.assertTrue(ev.apply(ctr))
 
     def test_exactValue_ci_pass_ci(self):
-        test_value = TestValue(type="ate", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=[pd.Series(4.1), pd.Series(5.9)],
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(
+                type="ate", value=pd.Series(5.05), ci_low=pd.Series(4.1), ci_high=pd.Series(5.9)
+            ),
         )
         ev = ExactValue(5, ci_low=4, ci_high=6)
         self.assertTrue(ev.apply(ctr))
 
     def test_exactValue_ci_fail_ci(self):
-        test_value = TestValue(type="ate", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=[pd.Series(3.9), pd.Series(6.1)],
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(
+                type="ate", value=pd.Series(5.05), ci_low=pd.Series(3.9), ci_high=pd.Series(6.1)
+            ),
         )
         ev = ExactValue(5, ci_low=4, ci_high=6)
         self.assertFalse(ev.apply(ctr))
 
     def test_exactValue_fail(self):
-        test_value = TestValue(type="ate", value=pd.Series(0))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=None,
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(type="ate", value=pd.Series(0)),
         )
         ev = ExactValue(5, 0.1)
         self.assertFalse(ev.apply(ctr))
@@ -235,12 +195,11 @@ def test_invalid_ci_atol(self):
             ExactValue(1000, ci_low=999, ci_high=1001, atol=50)
 
     def test_invalid(self):
-        test_value = TestValue(type="invalid", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=[pd.Series(4.8), pd.Series(6.7)],
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(
+                type="invalid", value=pd.Series(5.05), ci_low=pd.Series(4.8), ci_high=pd.Series(6.7)
+            ),
         )
         with self.assertRaises(ValueError):
             SomeEffect().apply(ctr)
@@ -252,92 +211,59 @@ def test_invalid(self):
             Negative().apply(ctr)
 
     def test_someEffect_pass_coefficient(self):
-        test_value = TestValue(type="coefficient", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=[pd.Series(4.8), pd.Series(6.7)],
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(
+                type="coefficient", value=pd.Series(5.05), ci_low=pd.Series(4.8), ci_high=pd.Series(6.7)
+            ),
         )
         self.assertTrue(SomeEffect().apply(ctr))
         self.assertFalse(NoEffect().apply(ctr))
 
     def test_someEffect_pass_ate(self):
-        test_value = TestValue(type="ate", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=[pd.Series(4.8), pd.Series(6.7)],
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(
+                type="coefficient", value=pd.Series(5.05), ci_low=pd.Series(4.8), ci_high=pd.Series(6.7)
+            ),
         )
         self.assertTrue(SomeEffect().apply(ctr))
         self.assertFalse(NoEffect().apply(ctr))
 
     def test_someEffect_pass_rr(self):
-        test_value = TestValue(type="risk_ratio", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=[pd.Series(4.8), pd.Series(6.7)],
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(
+                type="coefficient", value=pd.Series(5.05), ci_low=pd.Series(4.8), ci_high=pd.Series(6.7)
+            ),
         )
         self.assertTrue(SomeEffect().apply(ctr))
         self.assertFalse(NoEffect().apply(ctr))
 
     def test_someEffect_fail(self):
-        test_value = TestValue(type="ate", value=pd.Series(0))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=[pd.Series(-0.1), pd.Series(0.2)],
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(
+                type="ate", value=pd.Series(0), ci_low=pd.Series(-0.1), ci_high=pd.Series(0.2)
+            ),
         )
         self.assertFalse(SomeEffect().apply(ctr))
         self.assertTrue(NoEffect().apply(ctr))
 
     def test_someEffect_None(self):
-        test_value = TestValue(type="ate", value=pd.Series(0))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=None,
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(type="ate", value=pd.Series(0)),
         )
         self.assertEqual(SomeEffect().apply(ctr), None)
 
-    def test_someEffect_str(self):
-        test_value = TestValue(type="ate", value=0)
-        ctr = CausalTestResult(
-            estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=[pd.Series(-0.1), pd.Series(0.2)],
-            effect_modifier_configuration=None,
-        )
-        ev = SomeEffect()
-        self.assertEqual(
-            ctr.to_dict(),
-            {
-                "treatment": "A",
-                "control_value": 0,
-                "treatment_value": 1,
-                "outcome": "A",
-                "adjustment_set": set(),
-                "effect_estimate": 0,
-                "effect_measure": "ate",
-                "ci_low": [-0.1],
-                "ci_high": [0.2],
-            },
-        )
-
     def test_someEffect_dict(self):
-        test_value = TestValue(type="ate", value=0)
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=[pd.Series(-0.1), pd.Series(0.2)],
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(
+                type="ate", value=pd.Series(0), ci_low=pd.Series(-0.1), ci_high=pd.Series(0.2)
+            ),
         )
-        ev = SomeEffect()
         self.assertEqual(
             ctr.to_dict(),
             {
@@ -346,10 +272,10 @@ def test_someEffect_dict(self):
                 "treatment_value": 1,
                 "outcome": "A",
                 "adjustment_set": set(),
-                "effect_estimate": 0,
+                "effect_estimate": {0: 0},
                 "effect_measure": "ate",
-                "ci_low": [-0.1],
-                "ci_high": [0.2],
+                "ci_low": {0: -0.1},
+                "ci_high": {0: 0.2},
             },
         )
 
@@ -374,12 +300,9 @@ def test_negative_risk_ratio_e_value_using_ci(self):
         self.assertEqual(round(e_value, 4), 1.4625)
 
     def test_multiple_value_exception_caught(self):
-        test_value = TestValue(type="ate", value=pd.Series([0, 1]))
         ctr = CausalTestResult(
             estimator=self.estimator,
-            test_value=test_value,
-            confidence_intervals=[None, None],
-            effect_modifier_configuration=None,
+            effect_estimate=EffectEstimate(type="ate", value=pd.Series([0, 1])),
         )
         with self.assertRaises(ValueError):
             Positive().apply(ctr)
diff --git a/tests/testing_tests/test_causal_test_case.py b/tests/testing_tests/test_causal_test_case.py
index 8e4ef8cc..2c552c73 100644
--- a/tests/testing_tests/test_causal_test_case.py
+++ b/tests/testing_tests/test_causal_test_case.py
@@ -126,7 +126,7 @@ def test_execute_test_observational_linear_regression_estimator(self):
             self.df,
         )
         causal_test_result = self.causal_test_case.execute_test(estimation_model)
-        pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(4.0), atol=1e-10)
+        pd.testing.assert_series_equal(causal_test_result.effect_estimate.value, pd.Series(4.0), atol=1e-10)
 
     def test_execute_test_observational_linear_regression_estimator_direct_effect(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
@@ -150,7 +150,7 @@ def test_execute_test_observational_linear_regression_estimator_direct_effect(se
         self.treatment_value = 1
         self.control_value = 0
         causal_test_result = causal_test_case.execute_test()
-        pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(4.0), atol=1e-10)
+        pd.testing.assert_series_equal(causal_test_result.effect_estimate.value, pd.Series(4.0), atol=1e-10)
 
     def test_execute_test_observational_linear_regression_estimator_coefficient(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
@@ -164,7 +164,7 @@ def test_execute_test_observational_linear_regression_estimator_coefficient(self
         )
         self.causal_test_case.estimate_type = "coefficient"
         causal_test_result = self.causal_test_case.execute_test(estimation_model)
-        pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series({"D": 0.0}), atol=1e-1)
+        pd.testing.assert_series_equal(causal_test_result.effect_estimate.value, pd.Series({"D": 0.0}), atol=1e-1)
 
     def test_execute_test_observational_linear_regression_estimator_risk_ratio(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
@@ -178,7 +178,7 @@ def test_execute_test_observational_linear_regression_estimator_risk_ratio(self)
         )
         self.causal_test_case.estimate_type = "risk_ratio"
         causal_test_result = self.causal_test_case.execute_test(estimation_model)
-        pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(0.0), atol=1)
+        pd.testing.assert_series_equal(causal_test_result.effect_estimate.value, pd.Series(0.0), atol=1)
 
     def test_invalid_estimate_type(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
@@ -206,7 +206,7 @@ def test_execute_test_observational_linear_regression_estimator_squared_term(sel
             formula=f"C ~ A + {'+'.join(self.minimal_adjustment_set)} + (D ** 2)",
         )
         causal_test_result = self.causal_test_case.execute_test(estimation_model)
-        pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(4.0), atol=1)
+        pd.testing.assert_series_equal(causal_test_result.effect_estimate.value, pd.Series(4.0), atol=1)
 
     def test_estimate_params_none(self):
         """Check that estimate_params defaults to empty dict when None is passed into the estimator object"""
@@ -248,4 +248,4 @@ def test_estimate_params_with_formula(self):
             estimator=estimator,
         )
         self.assertEqual(causal_test_case.estimate_params, estimate_params)
-        self.assertEqual(round(causal_test_case.execute_test().test_value.value[0], 3), 1.444)
+        self.assertEqual(round(causal_test_case.execute_test().effect_estimate.value[0], 3), 1.444)