Experimental optimisations, proposed by ChatGPT.

neilwalkinshaw · neilwalkinshaw · commit 197c5d712a8a · 2025-07-11T11:00:12.000+01:00
diff --git a/causal_testing/specification/causal_dag.py b/causal_testing/specification/causal_dag.py
@@ -10,11 +10,15 @@
 import networkx as nx
 import pydot
 
+from typing import Generator, Set
+
 from causal_testing.testing.base_test_case import BaseTestCase
 
 from .scenario import Scenario
 from .variable import Output
 
+from itertools import combinations
+
 Node = Union[str, int]  # Node type hint: A node is a string or an int
 
 logger = logging.getLogger(__name__)
@@ -591,3 +595,164 @@ def to_dot_string(self) -> str:
 
     def __str__(self):
         return f"Nodes: {self.nodes}\nEdges: {self.edges}"
+
+class OptimisedCausalDAG(CausalDAG):
+
+
+    def enumerate_minimal_adjustment_sets(self, treatments: list[str], outcomes: list[str]) -> list[set[str]]:
+        """Compute minimal adjustment sets using ancestor moral graph and Takata's separator algorithm."""
+
+        # Step 1: Build the proper back-door graph and its moralized ancestor graph
+        pbd_graph = self.get_proper_backdoor_graph(treatments, outcomes)
+        ancestor_graph = pbd_graph.get_ancestor_graph(treatments, outcomes)
+        moral_graph = nx.moral_graph(ancestor_graph.graph)
+
+        # Step 2: Add artificial TREATMENT and OUTCOME nodes
+        moral_graph.add_edges_from([("TREATMENT", t) for t in treatments])
+        moral_graph.add_edges_from([("OUTCOME", y) for y in outcomes])
+
+        # Step 3: Efficiently collect unique neighbors (excluding original nodes)
+        treatment_neighbors = set()
+        for t in treatments:
+            treatment_neighbors.update(moral_graph[t])
+        treatment_neighbors.difference_update(treatments)
+
+        outcome_neighbors = set()
+        for y in outcomes:
+            outcome_neighbors.update(moral_graph[y])
+        outcome_neighbors.difference_update(outcomes)
+
+        # Step 4: Add clique edges among neighbors to preserve connectivity after node deletion
+        moral_graph.add_edges_from(combinations(treatment_neighbors, 2))
+        moral_graph.add_edges_from(combinations(outcome_neighbors, 2))
+
+        # Step 5: Find minimal separators between artificial nodes
+        outcome_node_set = set(moral_graph["OUTCOME"]) | {"OUTCOME"}
+        sep_candidates = list_all_min_sep_opt(
+            moral_graph,
+            "TREATMENT",
+            "OUTCOME",
+            {"TREATMENT"},
+            outcome_node_set,
+        )
+
+        # Step 6: Filter using constructive back-door criterion
+        valid_sets = [
+            s for s in sep_candidates
+            if self.constructive_backdoor_criterion(pbd_graph, treatments, outcomes, s)
+        ]
+
+        return valid_sets
+
+    def constructive_backdoor_criterion(
+            self,
+            proper_backdoor_graph: CausalDAG,
+            treatments: list[str],
+            outcomes: list[str],
+            covariates: list[str],
+    ) -> bool:
+        """
+        Optimized check for the constructive back-door criterion.
+        """
+
+        covariate_set = set(covariates)
+
+        # Condition (1): Covariates must not be descendants of any node on a proper causal path
+        proper_path_vars = self.proper_causal_pathway(treatments, outcomes)
+
+        if proper_path_vars:
+            # Collect all descendants including each proper causal path var itself
+            all_descendants = set()
+            for var in proper_path_vars:
+                all_descendants.update(nx.descendants(self.graph, var))
+                all_descendants.add(var)
+
+            if covariate_set & all_descendants:
+                # Covariates intersect with disallowed descendants — fail condition 1
+                if logger.isEnabledFor(logging.INFO):
+                    logger.info(
+                        "Failed Condition 1: Z=%s **is** a descendant of variables on a proper causal path between X=%s and Y=%s.",
+                        covariates,
+                        treatments,
+                        outcomes,
+                    )
+                return False
+
+        # Condition (2): Z must d-separate X and Y in the proper back-door graph
+        if not nx.d_separated(
+                proper_backdoor_graph.graph,
+                set(treatments),
+                set(outcomes),
+                covariate_set,
+        ):
+            if logger.isEnabledFor(logging.INFO):
+                logger.info(
+                    "Failed Condition 2: Z=%s **does not** d-separate X=%s and Y=%s in the proper back-door graph.",
+                    covariates,
+                    treatments,
+                    outcomes,
+                )
+            return False
+
+        return True
+
+
+def list_all_min_sep_opt(
+        graph: nx.Graph,
+        treatment_node,
+        outcome_node,
+        treatment_node_set: Set,
+        outcome_node_set: Set,
+) -> Generator[Set, None, None]:
+    """List all minimal treatment-outcome separators in an undirected graph (Takata 2013)."""
+
+    # Step 1: Compute the close separator
+    close_separator_set = close_separator(graph, treatment_node, outcome_node, treatment_node_set)
+
+    # Step 2: Remove separator to identify connected components
+    subgraph = graph.copy()
+    subgraph.remove_nodes_from(close_separator_set)
+
+    # Step 3: Find the component containing the treatment node
+    treatment_component = None
+    for component in nx.connected_components(subgraph):
+        if treatment_node in component:
+            treatment_component = component
+            break
+
+    # Step 4: Stop early if no component found or intersects outcome set
+    if treatment_component is None or treatment_component & outcome_node_set:
+        return
+
+    # Step 5: Update treatment node set to the connected component
+    treatment_node_set = treatment_component
+
+    # Step 6: Get neighbours of the treatment set
+    neighbour_nodes = set()
+    for node in treatment_node_set:
+        neighbour_nodes.update(graph[node])
+    neighbour_nodes.difference_update(treatment_node_set)
+
+    # Step 7: If neighbours exist outside the outcome set, recurse
+    remaining = neighbour_nodes - outcome_node_set
+    if remaining:
+        chosen = sample(sorted(remaining), 1)[0]  # Choose one deterministically (sorted) but randomly
+        # Left branch: add to treatment set
+        yield from list_all_min_sep_opt(
+            graph,
+            treatment_node,
+            outcome_node,
+            treatment_node_set | {chosen},
+            outcome_node_set,
+        )
+        # Right branch: add to outcome set
+        yield from list_all_min_sep_opt(
+            graph,
+            treatment_node,
+            outcome_node,
+            treatment_node_set,
+            outcome_node_set | {chosen},
+        )
+    else:
+        # Step 8: All neighbours are in outcome set — we found a separator
+        yield neighbour_nodes
diff --git a/tests/specification_tests/test_causal_dag.py b/tests/specification_tests/test_causal_dag.py
@@ -2,7 +2,7 @@
 import os
 import shutil, tempfile
 import networkx as nx
-from causal_testing.specification.causal_dag import CausalDAG, close_separator, list_all_min_sep
+from causal_testing.specification.causal_dag import CausalDAG, close_separator, list_all_min_sep, OptimisedCausalDAG
 from causal_testing.specification.scenario import Scenario
 from causal_testing.specification.variable import Input, Output
 from causal_testing.testing.base_test_case import BaseTestCase
@@ -475,3 +475,166 @@ def test_hidden_varaible_adjustment_sets(self):
 
     def tearDown(self) -> None:
         shutil.rmtree(self.temp_dir_path)
+
+def time_it(label, func, *args, **kwargs):
+    import time
+    start = time.time()
+    result = func(*args, **kwargs)
+    print(f"{label} took {time.time() - start:.6f} seconds")
+    return result
+
+class TestOptimisedDAGIdentification(TestDAGIdentification):
+    """
+    Test the Causal DAG identification algorithms and supporting algorithms.
+    """
+
+    def test_is_min_adjustment_for_not_min_adjustment(self):
+        """Test whether is_min_adjustment can correctly test whether the minimum adjustment set is not minimal."""
+        causal_dag = CausalDAG(self.dag_dot_path)
+        xs, ys, zs = ["X1", "X2"], ["Y"], {"Z", "V"}
+
+        opt_dag = OptimisedCausalDAG(self.dag_dot_path)
+
+        norm_result = time_it(
+            "Norm",
+            lambda: causal_dag.adjustment_set_is_minimal(xs, ys, zs)
+        )
+        opt_result = time_it(
+            "Opt",
+            lambda: opt_dag.adjustment_set_is_minimal(xs, ys, zs)
+        )
+        self.assertEqual(norm_result, opt_result)
+
+    def test_is_min_adjustment_for_invalid_adjustment(self):
+        """Test whether is min_adjustment can correctly identify that the minimum adjustment set is invalid."""
+        causal_dag = OptimisedCausalDAG(self.dag_dot_path)
+        xs, ys, zs = ["X1", "X2"], ["Y"], set()
+        self.assertRaises(ValueError, causal_dag.adjustment_set_is_minimal, xs, ys, zs)
+
+    def test_get_ancestor_graph_of_causal_dag(self):
+        """Test whether get_ancestor_graph converts a CausalDAG to the correct ancestor graph."""
+        causal_dag = OptimisedCausalDAG(self.dag_dot_path)
+        xs, ys = ["X1", "X2"], ["Y"]
+        ancestor_graph = causal_dag.get_ancestor_graph(xs, ys)
+        self.assertEqual(list(ancestor_graph.nodes), ["X1", "X2", "D1", "Y", "Z"])
+        self.assertEqual(
+            list(ancestor_graph.edges),
+            [("X1", "X2"), ("X2", "D1"), ("D1", "Y"), ("Z", "X2"), ("Z", "Y")],
+        )
+
+    def test_get_ancestor_graph_of_proper_backdoor_graph(self):
+        """Test whether get_ancestor_graph converts a CausalDAG to the correct proper back-door graph."""
+        causal_dag = OptimisedCausalDAG(self.dag_dot_path)
+        xs, ys = ["X1", "X2"], ["Y"]
+        proper_backdoor_graph = causal_dag.get_proper_backdoor_graph(xs, ys)
+        ancestor_graph = proper_backdoor_graph.get_ancestor_graph(xs, ys)
+        self.assertEqual(list(ancestor_graph.nodes), ["X1", "X2", "D1", "Y", "Z"])
+        self.assertEqual(
+            list(ancestor_graph.edges),
+            [("X1", "X2"), ("D1", "Y"), ("Z", "X2"), ("Z", "Y")],
+        )
+
+    def test_enumerate_minimal_adjustment_sets(self):
+        """Test whether enumerate_minimal_adjustment_sets lists all possible minimum sized adjustment sets."""
+        causal_dag = OptimisedCausalDAG(self.dag_dot_path)
+        xs, ys = ["X1", "X2"], ["Y"]
+        adjustment_sets = causal_dag.enumerate_minimal_adjustment_sets(xs, ys)
+        self.assertEqual([{"Z"}], adjustment_sets)
+
+    def test_enumerate_minimal_adjustment_sets_multiple(self):
+        """Test whether enumerate_minimal_adjustment_sets lists all minimum adjustment sets if multiple are possible."""
+        causal_dag = CausalDAG()
+        causal_dag.graph.add_edges_from(
+            [
+                ("X1", "X2"),
+                ("X2", "V"),
+                ("Z1", "X2"),
+                ("Z1", "Z2"),
+                ("Z2", "Z3"),
+                ("Z3", "Y"),
+                ("D1", "Y"),
+                ("D1", "D2"),
+                ("Y", "D3"),
+            ]
+        )
+        opt_causal_dag = CausalDAG()
+        opt_causal_dag.graph.add_edges_from(
+            [
+                ("X1", "X2"),
+                ("X2", "V"),
+                ("Z1", "X2"),
+                ("Z1", "Z2"),
+                ("Z2", "Z3"),
+                ("Z3", "Y"),
+                ("D1", "Y"),
+                ("D1", "D2"),
+                ("Y", "D3"),
+            ]
+        )
+        xs, ys = ["X1", "X2"], ["Y"]
+
+        norm_adjustment_sets = time_it(
+            "Norm",
+            lambda: causal_dag.enumerate_minimal_adjustment_sets(xs, ys)
+        )
+
+        opt_adjustment_sets = time_it(
+            "Opt",
+            lambda: opt_causal_dag.enumerate_minimal_adjustment_sets(xs, ys)
+        )
+        set_of_opt_adjustment_sets = set(frozenset(min_separator) for min_separator in opt_adjustment_sets)
+
+        self.assertEqual(
+            {frozenset({"Z1"}), frozenset({"Z2"}), frozenset({"Z3"})},
+            set_of_opt_adjustment_sets,
+        )
+
+    def test_enumerate_minimal_adjustment_sets_two_adjustments(self):
+        """Test whether enumerate_minimal_adjustment_sets lists all possible minimum adjustment sets of arity two."""
+        causal_dag = OptimisedCausalDAG()
+        causal_dag.graph.add_edges_from(
+            [
+                ("X1", "X2"),
+                ("X2", "V"),
+                ("Z1", "X2"),
+                ("Z1", "Z2"),
+                ("Z2", "Z3"),
+                ("Z3", "Y"),
+                ("D1", "Y"),
+                ("D1", "D2"),
+                ("Y", "D3"),
+                ("Z4", "X1"),
+                ("Z4", "Y"),
+                ("X2", "D1"),
+            ]
+        )
+        xs, ys = ["X1", "X2"], ["Y"]
+        adjustment_sets = causal_dag.enumerate_minimal_adjustment_sets(xs, ys)
+        set_of_adjustment_sets = set(frozenset(min_separator) for min_separator in adjustment_sets)
+        self.assertEqual(
+            {frozenset({"Z1", "Z4"}), frozenset({"Z2", "Z4"}), frozenset({"Z3", "Z4"})},
+            set_of_adjustment_sets,
+        )
+
+    def test_dag_with_non_character_nodes(self):
+        """Test identification for a DAG whose nodes are not just characters (strings of length greater than 1)."""
+        causal_dag = OptimisedCausalDAG()
+        causal_dag.graph.add_edges_from(
+            [
+                ("va", "ba"),
+                ("ba", "ia"),
+                ("ba", "da"),
+                ("ba", "ra"),
+                ("la", "va"),
+                ("la", "aa"),
+                ("aa", "ia"),
+                ("aa", "da"),
+                ("aa", "ra"),
+            ]
+        )
+        xs, ys = ["ba"], ["da"]
+        adjustment_sets = causal_dag.enumerate_minimal_adjustment_sets(xs, ys)
+        self.assertEqual(adjustment_sets, [{"aa"}, {"la"}, {"va"}])
+
+    def tearDown(self) -> None:
+        shutil.rmtree(self.temp_dir_path)