Integrated optimised CausalDAG class

jmafoster1 · jmafoster1 · commit 64596f63204e · 2025-08-01T15:38:25.000+01:00
diff --git a/causal_testing/main.py b/causal_testing/main.py
@@ -131,7 +131,7 @@ def load_dag(self) -> CausalDAG:
         """
         logger.info(f"Loading DAG from {self.paths.dag_path}")
         dag = CausalDAG(str(self.paths.dag_path), ignore_cycles=self.ignore_cycles)
-        logger.info(f"DAG loaded with {len(dag.graph.nodes)} nodes and {len(dag.graph.edges)} edges")
+        logger.info(f"DAG loaded with {len(dag.nodes)} nodes and {len(dag.edges)} edges")
         return dag
 
     def _read_dataframe(self, data_path):
@@ -163,18 +163,18 @@ def create_variables(self) -> None:
         """
         Create variable objects from DAG nodes based on their connectivity.
         """
-        for node_name, node_data in self.dag.graph.nodes(data=True):
+        for node_name, node_data in self.dag.nodes(data=True):
             if node_name not in self.data.columns and not node_data.get("hidden", False):
                 raise ValueError(f"Node {node_name} missing from data. Should it be marked as hidden?")
 
             dtype = self.data.dtypes.get(node_name)
 
             # If node has no incoming edges, it's an input
-            if self.dag.graph.in_degree(node_name) == 0:
+            if self.dag.in_degree(node_name) == 0:
                 self.variables["inputs"][node_name] = Input(name=node_name, datatype=dtype)
 
             # Otherwise it's an output
-            if self.dag.graph.in_degree(node_name) > 0:
+            if self.dag.in_degree(node_name) > 0:
                 self.variables["outputs"][node_name] = Output(name=node_name, datatype=dtype)
 
     def create_scenario_and_specification(self) -> None:
diff --git a/causal_testing/specification/causal_dag.py b/causal_testing/specification/causal_dag.py
diff --git a/causal_testing/specification/optimised_causal_dag.py b/causal_testing/specification/optimised_causal_dag.py
diff --git a/causal_testing/surrogate/causal_surrogate_assisted.py b/causal_testing/surrogate/causal_surrogate_assisted.py
@@ -125,7 +125,7 @@ def generate_surrogates(
         surrogate_models = []
 
         for u, v in specification.causal_dag.edges:
-            edge_metadata = specification.causal_dag.graph.adj[u][v]
+            edge_metadata = specification.causal_dag.adj[u][v]
             if "included" in edge_metadata:
                 from_var = specification.scenario.variables.get(u)
                 to_var = specification.scenario.variables.get(v)
diff --git a/causal_testing/testing/metamorphic_relation.py b/causal_testing/testing/metamorphic_relation.py
@@ -109,13 +109,13 @@ def generate_metamorphic_relation(
     # Create a ShouldNotCause relation for each pair of nodes that are not directly connected
     if ((u, v) not in dag.edges) and ((v, u) not in dag.edges):
         # Case 1: U --> ... --> V
-        if u in nx.ancestors(dag.graph, v):
+        if u in nx.ancestors(dag, v):
             adj_sets = dag.direct_effect_adjustment_sets([u], [v], nodes_to_ignore=nodes_to_ignore)
             if adj_sets:
                 metamorphic_relations.append(ShouldNotCause(BaseTestCase(u, v), list(adj_sets[0])))
 
         # Case 2: V --> ... --> U
-        elif v in nx.ancestors(dag.graph, u):
+        elif v in nx.ancestors(dag, u):
             adj_sets = dag.direct_effect_adjustment_sets([v], [u], nodes_to_ignore=nodes_to_ignore)
             if adj_sets:
                 metamorphic_relations.append(ShouldNotCause(BaseTestCase(v, u), list(adj_sets[0])))
@@ -194,7 +194,7 @@ def generate_causal_tests(dag_path: str, output_path: str, ignore_cycles: bool =
     causal_dag = CausalDAG(dag_path, ignore_cycles=ignore_cycles)
 
     dag_nodes_to_test = [
-        node for node in causal_dag.nodes if nx.get_node_attributes(causal_dag.graph, "test", default=True)[node]
+        node for node in causal_dag.nodes if nx.get_node_attributes(causal_dag, "test", default=True)[node]
     ]
 
     if not causal_dag.is_acyclic() and ignore_cycles:
@@ -214,7 +214,7 @@ def generate_causal_tests(dag_path: str, output_path: str, ignore_cycles: bool =
     tests = [
         relation.to_json_stub(skip=False)
         for relation in relations
-        if len(list(causal_dag.graph.predecessors(relation.base_test_case.outcome_variable))) > 0
+        if len(list(causal_dag.predecessors(relation.base_test_case.outcome_variable))) > 0
     ]
 
     logger.info(f"Generated {len(tests)} tests. Saving to {output_path}.")
diff --git a/tests/main_tests/test_main.py b/tests/main_tests/test_main.py
@@ -93,7 +93,7 @@ def test_load_data_query(self):
     def test_load_dag_missing_node(self):
         framework = CausalTestingFramework(self.paths)
         framework.setup()
-        framework.dag.graph.add_node("missing")
+        framework.dag.add_node("missing")
         with self.assertRaises(ValueError):
             framework.create_variables()
 
diff --git a/tests/specification_tests/test_causal_dag.py b/tests/specification_tests/test_causal_dag.py
@@ -2,8 +2,7 @@
 import os
 import shutil, tempfile
 import networkx as nx
-from causal_testing.specification.causal_dag import CausalDAG, close_separator, list_all_min_sep, CausalDAG
-from causal_testing.specification.optimised_causal_dag import CausalDAG as OptimisedCausalDAG
+from causal_testing.specification.causal_dag import CausalDAG, close_separator, list_all_min_sep
 from causal_testing.specification.scenario import Scenario
 from causal_testing.specification.variable import Input, Output
 from causal_testing.testing.base_test_case import BaseTestCase
@@ -26,7 +25,7 @@ def test_enumerate_minimal_adjustment_sets(self):
         causal_dag = CausalDAG(self.dag_dot_path)
         xs, ys = ["X"], ["Y"]
         adjustment_sets = causal_dag.enumerate_minimal_adjustment_sets(xs, ys)
-        self.assertEqual([{"Z"}], adjustment_sets)
+        self.assertEqual([{"Z"}], list(adjustment_sets))
 
     def tearDown(self) -> None:
         shutil.rmtree(self.temp_dir_path)
@@ -47,19 +46,19 @@ def test_valid_iv(self):
 
     def test_unrelated_instrument(self):
         causal_dag = CausalDAG(self.dag_dot_path)
-        causal_dag.graph.remove_edge("I", "X")
+        causal_dag.remove_edge("I", "X")
         with self.assertRaises(ValueError):
             causal_dag.check_iv_assumptions("X", "Y", "I")
 
     def test_direct_cause(self):
         causal_dag = CausalDAG(self.dag_dot_path)
-        causal_dag.graph.add_edge("I", "Y")
+        causal_dag.add_edge("I", "Y")
         with self.assertRaises(ValueError):
             causal_dag.check_iv_assumptions("X", "Y", "I")
 
     def test_common_cause(self):
         causal_dag = CausalDAG(self.dag_dot_path)
-        causal_dag.graph.add_edge("U", "I")
+        causal_dag.add_edge("U", "I")
         with self.assertRaises(ValueError):
             causal_dag.check_iv_assumptions("X", "Y", "I")
 
@@ -280,12 +279,12 @@ def test_enumerate_minimal_adjustment_sets(self):
         causal_dag = CausalDAG(self.dag_dot_path)
         xs, ys = ["X1", "X2"], ["Y"]
         adjustment_sets = causal_dag.enumerate_minimal_adjustment_sets(xs, ys)
-        self.assertEqual([{"Z"}], adjustment_sets)
+        self.assertEqual([{"Z"}], list(adjustment_sets))
 
     def test_enumerate_minimal_adjustment_sets_multiple(self):
         """Test whether enumerate_minimal_adjustment_sets lists all minimum adjustment sets if multiple are possible."""
         causal_dag = CausalDAG()
-        causal_dag.graph.add_edges_from(
+        causal_dag.add_edges_from(
             [
                 ("X1", "X2"),
                 ("X2", "V"),
@@ -309,7 +308,7 @@ def test_enumerate_minimal_adjustment_sets_multiple(self):
     def test_enumerate_minimal_adjustment_sets_two_adjustments(self):
         """Test whether enumerate_minimal_adjustment_sets lists all possible minimum adjustment sets of arity two."""
         causal_dag = CausalDAG()
-        causal_dag.graph.add_edges_from(
+        causal_dag.add_edges_from(
             [
                 ("X1", "X2"),
                 ("X2", "V"),
@@ -336,7 +335,7 @@ def test_enumerate_minimal_adjustment_sets_two_adjustments(self):
     def test_dag_with_non_character_nodes(self):
         """Test identification for a DAG whose nodes are not just characters (strings of length greater than 1)."""
         causal_dag = CausalDAG()
-        causal_dag.graph.add_edges_from(
+        causal_dag.add_edges_from(
             [
                 ("va", "ba"),
                 ("ba", "ia"),
@@ -351,7 +350,7 @@ def test_dag_with_non_character_nodes(self):
         )
         xs, ys = ["ba"], ["da"]
         adjustment_sets = causal_dag.enumerate_minimal_adjustment_sets(xs, ys)
-        self.assertEqual(adjustment_sets, [{"aa"}, {"la"}, {"va"}])
+        self.assertEqual(list(adjustment_sets), [{"aa"}, {"la"}, {"va"}])
 
     def tearDown(self) -> None:
         shutil.rmtree(self.temp_dir_path)
@@ -485,148 +484,3 @@ def time_it(label, func, *args, **kwargs):
     result = func(*args, **kwargs)
     print(f"{label} took {time.time() - start:.6f} seconds")
     return result
-
-
-class TestOptimisedDAGIdentification(TestDAGIdentification):
-    """
-    Test the Causal DAG identification algorithms and supporting algorithms.
-    """
-
-    def test_is_min_adjustment_for_not_min_adjustment(self):
-        """Test whether is_min_adjustment can correctly test whether the minimum adjustment set is not minimal."""
-        causal_dag = CausalDAG(self.dag_dot_path)
-        xs, ys, zs = ["X1", "X2"], ["Y"], {"Z", "V"}
-
-        opt_dag = OptimisedCausalDAG(self.dag_dot_path)
-
-        norm_result = time_it("Norm", lambda: causal_dag.adjustment_set_is_minimal(xs, ys, zs))
-        opt_result = time_it("Opt", lambda: opt_dag.adjustment_set_is_minimal(xs, ys, zs))
-        self.assertEqual(norm_result, opt_result)
-
-    def test_is_min_adjustment_for_invalid_adjustment(self):
-        """Test whether is min_adjustment can correctly identify that the minimum adjustment set is invalid."""
-        causal_dag = OptimisedCausalDAG(self.dag_dot_path)
-        xs, ys, zs = ["X1", "X2"], ["Y"], set()
-        self.assertRaises(ValueError, causal_dag.adjustment_set_is_minimal, xs, ys, zs)
-
-    def test_get_ancestor_graph_of_causal_dag(self):
-        """Test whether get_ancestor_graph converts a CausalDAG to the correct ancestor graph."""
-        causal_dag = OptimisedCausalDAG(self.dag_dot_path)
-        xs, ys = ["X1", "X2"], ["Y"]
-        ancestor_graph = causal_dag.get_ancestor_graph(xs, ys)
-        self.assertEqual(list(ancestor_graph.nodes), ["X1", "X2", "D1", "Y", "Z"])
-        self.assertEqual(
-            list(ancestor_graph.edges),
-            [("X1", "X2"), ("X2", "D1"), ("D1", "Y"), ("Z", "X2"), ("Z", "Y")],
-        )
-
-    def test_get_ancestor_graph_of_proper_backdoor_graph(self):
-        """Test whether get_ancestor_graph converts a CausalDAG to the correct proper back-door graph."""
-        causal_dag = OptimisedCausalDAG(self.dag_dot_path)
-        xs, ys = ["X1", "X2"], ["Y"]
-        proper_backdoor_graph = causal_dag.get_proper_backdoor_graph(xs, ys)
-        ancestor_graph = proper_backdoor_graph.get_ancestor_graph(xs, ys)
-        self.assertEqual(list(ancestor_graph.nodes), ["X1", "X2", "D1", "Y", "Z"])
-        self.assertEqual(
-            list(ancestor_graph.edges),
-            [("X1", "X2"), ("D1", "Y"), ("Z", "X2"), ("Z", "Y")],
-        )
-
-    def test_enumerate_minimal_adjustment_sets(self):
-        """Test whether enumerate_minimal_adjustment_sets lists all possible minimum sized adjustment sets."""
-        causal_dag = OptimisedCausalDAG(self.dag_dot_path)
-        xs, ys = ["X1", "X2"], ["Y"]
-        adjustment_sets = causal_dag.enumerate_minimal_adjustment_sets(xs, ys)
-        self.assertEqual([{"Z"}], list(adjustment_sets))
-
-    def test_enumerate_minimal_adjustment_sets_multiple(self):
-        """Test whether enumerate_minimal_adjustment_sets lists all minimum adjustment sets if multiple are possible."""
-        causal_dag = OptimisedCausalDAG()
-        causal_dag.add_edges_from(
-            [
-                ("X1", "X2"),
-                ("X2", "V"),
-                ("Z1", "X2"),
-                ("Z1", "Z2"),
-                ("Z2", "Z3"),
-                ("Z3", "Y"),
-                ("D1", "Y"),
-                ("D1", "D2"),
-                ("Y", "D3"),
-            ]
-        )
-        opt_causal_dag = OptimisedCausalDAG()
-        opt_causal_dag.add_edges_from(
-            [
-                ("X1", "X2"),
-                ("X2", "V"),
-                ("Z1", "X2"),
-                ("Z1", "Z2"),
-                ("Z2", "Z3"),
-                ("Z3", "Y"),
-                ("D1", "Y"),
-                ("D1", "D2"),
-                ("Y", "D3"),
-            ]
-        )
-        xs, ys = ["X1", "X2"], ["Y"]
-
-        norm_adjustment_sets = time_it("Norm", lambda: causal_dag.enumerate_minimal_adjustment_sets(xs, ys))
-
-        opt_adjustment_sets = time_it("Opt", lambda: opt_causal_dag.enumerate_minimal_adjustment_sets(xs, ys))
-        set_of_opt_adjustment_sets = set(frozenset(min_separator) for min_separator in opt_adjustment_sets)
-
-        self.assertEqual(
-            {frozenset({"Z1"}), frozenset({"Z2"}), frozenset({"Z3"})},
-            set_of_opt_adjustment_sets,
-        )
-
-    def test_enumerate_minimal_adjustment_sets_two_adjustments(self):
-        """Test whether enumerate_minimal_adjustment_sets lists all possible minimum adjustment sets of arity two."""
-        causal_dag = OptimisedCausalDAG()
-        causal_dag.add_edges_from(
-            [
-                ("X1", "X2"),
-                ("X2", "V"),
-                ("Z1", "X2"),
-                ("Z1", "Z2"),
-                ("Z2", "Z3"),
-                ("Z3", "Y"),
-                ("D1", "Y"),
-                ("D1", "D2"),
-                ("Y", "D3"),
-                ("Z4", "X1"),
-                ("Z4", "Y"),
-                ("X2", "D1"),
-            ]
-        )
-        xs, ys = ["X1", "X2"], ["Y"]
-        adjustment_sets = causal_dag.enumerate_minimal_adjustment_sets(xs, ys)
-        set_of_adjustment_sets = set(frozenset(min_separator) for min_separator in adjustment_sets)
-        self.assertEqual(
-            {frozenset({"Z1", "Z4"}), frozenset({"Z2", "Z4"}), frozenset({"Z3", "Z4"})},
-            set_of_adjustment_sets,
-        )
-
-    def test_dag_with_non_character_nodes(self):
-        """Test identification for a DAG whose nodes are not just characters (strings of length greater than 1)."""
-        causal_dag = OptimisedCausalDAG()
-        causal_dag.add_edges_from(
-            [
-                ("va", "ba"),
-                ("ba", "ia"),
-                ("ba", "da"),
-                ("ba", "ra"),
-                ("la", "va"),
-                ("la", "aa"),
-                ("aa", "ia"),
-                ("aa", "da"),
-                ("aa", "ra"),
-            ]
-        )
-        xs, ys = ["ba"], ["da"]
-        adjustment_sets = causal_dag.enumerate_minimal_adjustment_sets(xs, ys)
-        self.assertEqual(list(adjustment_sets), [{"aa"}, {"la"}, {"va"}])
-
-    def tearDown(self) -> None:
-        shutil.rmtree(self.temp_dir_path)
diff --git a/tests/testing_tests/test_metamorphic_relations.py b/tests/testing_tests/test_metamorphic_relations.py
@@ -48,7 +48,7 @@ def test_should_not_cause_json_stub(self):
         """Test if the ShouldCause MR passes all metamorphic tests where the DAG perfectly represents the program
         and there is only a single input."""
         causal_dag = CausalDAG(self.dag_dot_path)
-        causal_dag.graph.remove_nodes_from(["X2", "X3"])
+        causal_dag.remove_nodes_from(["X2", "X3"])
         adj_set = list(causal_dag.direct_effect_adjustment_sets(["X1"], ["Z"])[0])
         should_not_cause_MR = ShouldNotCause(BaseTestCase("X1", "Z"), adj_set)
         self.assertEqual(
@@ -70,7 +70,7 @@ def test_should_cause_json_stub(self):
         """Test if the ShouldCause MR passes all metamorphic tests where the DAG perfectly represents the program
         and there is only a single input."""
         causal_dag = CausalDAG(self.dag_dot_path)
-        causal_dag.graph.remove_nodes_from(["X2", "X3"])
+        causal_dag.remove_nodes_from(["X2", "X3"])
         adj_set = list(causal_dag.direct_effect_adjustment_sets(["X1"], ["Z"])[0])
         should_cause_MR = ShouldCause(BaseTestCase("X1", "Z"), adj_set)
         self.assertEqual(
@@ -218,8 +218,7 @@ def test_generate_causal_tests_ignore_cycles(self):
                 map(
                     lambda x: x.to_json_stub(skip=False),
                     filter(
-                        lambda relation: len(list(dcg.graph.predecessors(relation.base_test_case.outcome_variable)))
-                        > 0,
+                        lambda relation: len(list(dcg.predecessors(relation.base_test_case.outcome_variable))) > 0,
                         relations,
                     ),
                 )
@@ -238,8 +237,7 @@ def test_generate_causal_tests(self):
                 map(
                     lambda x: x.to_json_stub(skip=False),
                     filter(
-                        lambda relation: len(list(dag.graph.predecessors(relation.base_test_case.outcome_variable)))
-                        > 0,
+                        lambda relation: len(list(dag.predecessors(relation.base_test_case.outcome_variable))) > 0,
                         relations,
                     ),
                 )