py-why
diff --git a/‎causallearn/search/ConstraintBased/FCI.py‎
Lines changed: 3 additions & 3 deletions b/‎causallearn/search/ConstraintBased/FCI.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎causallearn/utils/Fas.py‎ renamed to ‎causallearn/utils/FAS.py‎
Lines changed: 58 additions & 72 deletions b/‎causallearn/utils/Fas.py‎ renamed to ‎causallearn/utils/FAS.py‎
Lines changed: 58 additions & 72 deletions
diff --git a/‎tests/TestFAS.py‎
Lines changed: 99 additions & 0 deletions b/‎tests/TestFAS.py‎
Lines changed: 99 additions & 0 deletions
diff --git a/‎tests/__pycache__/TestGES.cpython-38-pytest-7.1.2.pyc‎
-6.13 KB b/‎tests/__pycache__/TestGES.cpython-38-pytest-7.1.2.pyc‎
-6.13 KB
diff --git a/‎tests/__pycache__/TestLocalScore.cpython-38-pytest-7.1.2.pyc‎
-4.79 KB b/‎tests/__pycache__/TestLocalScore.cpython-38-pytest-7.1.2.pyc‎
-4.79 KB
diff --git a/‎tests/__pycache__/TestPC.cpython-38-pytest-7.1.2.pyc‎
-22.4 KB b/‎tests/__pycache__/TestPC.cpython-38-pytest-7.1.2.pyc‎
-22.4 KB
diff --git a/‎tests/__pycache__/__init__.cpython-38.pyc‎
-133 Bytes b/‎tests/__pycache__/__init__.cpython-38.pyc‎
-133 Bytes
diff --git a/‎tests/__pycache__/utils_simulate_data.cpython-38.pyc‎
-3.85 KB b/‎tests/__pycache__/utils_simulate_data.cpython-38.pyc‎
-3.85 KB
@@ -13,7 +13,7 @@
 from causallearn.utils.ChoiceGenerator import ChoiceGenerator
 from causallearn.utils.DepthChoiceGenerator import DepthChoiceGenerator
 from causallearn.utils.cit import *
-from causallearn.utils.Fas import fas
+from causallearn.utils.FAS import fas
 from causallearn.utils.PCUtils.BackgroundKnowledge import BackgroundKnowledge
 
 
@@ -754,8 +754,8 @@ def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float =
         nodes.append(node)
 
     # FAS (“Fast Adjacency Search”) is the adjacency search of the PC algorithm, used as a first step for the FCI algorithm.
-    graph, sep_sets = fas(dataset, nodes, independence_test_method=independence_test_method, alpha=alpha,
-                          knowledge=background_knowledge, depth=depth, verbose=verbose)
+    graph, sep_sets, test_results = fas(dataset, nodes, independence_test_method=independence_test_method, alpha=alpha,
+                                        knowledge=background_knowledge, depth=depth, verbose=verbose)
 
     reorientAllWith(graph, Endpoint.CIRCLE)
 
 
@@ -1,26 +1,23 @@
 from __future__ import annotations
 
-from copy import deepcopy
 from itertools import combinations
 from typing import List, Dict, Tuple, Set
 
 from numpy import ndarray
 from tqdm.auto import tqdm
 
-from causallearn.graph.Edges import Edges
 from causallearn.graph.GeneralGraph import GeneralGraph
 from causallearn.graph.GraphClass import CausalGraph
 from causallearn.graph.Node import Node
-from causallearn.utils.ChoiceGenerator import ChoiceGenerator
 from causallearn.utils.PCUtils.Helper import append_value
 from causallearn.utils.cit import *
 from causallearn.utils.PCUtils.BackgroundKnowledge import BackgroundKnowledge
 
 
-def fas(data: ndarray, nodes: List[Node], independence_test_method: CIT | None=None, alpha: float = 0.05,
+def fas(data: ndarray, nodes: List[Node], independence_test_method: CIT_Base, alpha: float = 0.05,
         knowledge: BackgroundKnowledge | None = None, depth: int = -1,
         verbose: bool = False, stable: bool = True, show_progress: bool = True) -> Tuple[
-    GeneralGraph, Dict[Tuple[int, int], Set[int]]]:
+    GeneralGraph, Dict[Tuple[int, int], Set[int]], Dict[Tuple[int, int, Set[int]], float]]:
     """
     Implements the "fast adjacency search" used in several causal algorithm in this file. In the fast adjacency
     search, at a given stage of the search, an edge X*-*Y is removed from the graph if X _||_ Y | S, where S is a subset
@@ -50,78 +47,87 @@ def fas(data: ndarray, nodes: List[Node], independence_test_method: CIT | None=N
     Returns
     -------
     graph: Causal graph skeleton, where graph.graph[i,j] = graph.graph[j,i] = -1 indicates i --- j.
-    sep_sets: separated sets of graph
+    sep_sets: Separated sets of graph
+    test_results: Results of conditional independence tests
     """
-
-    assert type(data) == np.ndarray
-    assert 0 < alpha < 1
+    ## ------- check parameters ------------
+    if type(data) != np.ndarray:
+        raise TypeError("'data' must be 'np.ndarray' type!")
+    if not all(isinstance(node, Node) for node in nodes):
+        raise TypeError("'nodes' must be 'List[Node]' type!")
+    if not isinstance(independence_test_method, CIT_Base):
+        raise TypeError("'independence_test_method' must be 'CIT_Base' type!")
+    if type(alpha) != float or alpha <= 0 or alpha >= 1:
+        raise TypeError("'alpha' must be 'float' type and between 0 and 1!")
+    if knowledge is not None and type(knowledge) != BackgroundKnowledge:
+        raise TypeError("'knowledge' must be 'BackgroundKnowledge' type!")
+    if type(depth) != int or depth < -1:
+        raise TypeError("'depth' must be 'int' type >= -1!")
+    ## ------- end check parameters ------------
+
+    if depth == -1:
+        depth = float('inf')
 
     no_of_var = data.shape[1]
     node_names = [node.get_name() for node in nodes]
     cg = CausalGraph(no_of_var, node_names)
     cg.set_ind_test(independence_test_method)
     sep_sets: Dict[Tuple[int, int], Set[int]] = {}
-
-    depth = -1
-    pbar = tqdm(total=no_of_var) if show_progress else None
-    while cg.max_degree() - 1 > depth:
-        depth += 1
-        edge_removal = []
-        if show_progress:
-            pbar.reset()
-        for x in range(no_of_var):
+    test_results: Dict[Tuple[int, int, Set[int]], float] = {}
+
+    def remove_if_exists(x: int, y: int) -> None:
+        edge = cg.G.get_edge(cg.G.nodes[x], cg.G.nodes[y])
+        if edge is not None:
+            cg.G.remove_edge(edge)
+
+    var_range = tqdm(range(no_of_var), leave=True) if show_progress \
+        else range(no_of_var)
+    current_depth: int = -1
+    while cg.max_degree() - 1 > current_depth and current_depth < depth:
+        current_depth += 1
+        edge_removal = set()
+        for x in var_range:
             if show_progress:
-                pbar.update()
-            if show_progress:
-                pbar.set_description(f'Depth={depth}, working on node {x}')
+                var_range.set_description(f'Depth={current_depth}, working on node {x}')
+                var_range.update()
             Neigh_x = cg.neighbors(x)
-            if len(Neigh_x) < depth - 1:
+            if len(Neigh_x) < current_depth - 1:
                 continue
             for y in Neigh_x:
-                knowledge_ban_edge = False
                 sepsets = set()
-                if knowledge is not None and (
-                        knowledge.is_forbidden(cg.G.nodes[x], cg.G.nodes[y])
-                        and knowledge.is_forbidden(cg.G.nodes[y], cg.G.nodes[x])):
-                    knowledge_ban_edge = True
-                if knowledge_ban_edge:
+                if (knowledge is not None and
+                    knowledge.is_forbidden(cg.G.nodes[x], cg.G.nodes[y])
+                    and knowledge.is_forbidden(cg.G.nodes[y], cg.G.nodes[x])):
                     if not stable:
-                        edge1 = cg.G.get_edge(cg.G.nodes[x], cg.G.nodes[y])
-                        if edge1 is not None:
-                            cg.G.remove_edge(edge1)
-                        edge2 = cg.G.get_edge(cg.G.nodes[y], cg.G.nodes[x])
-                        if edge2 is not None:
-                            cg.G.remove_edge(edge2)
+                        remove_if_exists(x, y)
+                        remove_if_exists(y, x)
                         append_value(cg.sepset, x, y, ())
                         append_value(cg.sepset, y, x, ())
                         sep_sets[(x, y)] = set()
                         sep_sets[(y, x)] = set()
                         break
                     else:
-                        edge_removal.append((x, y))  # after all conditioning sets at
-                        edge_removal.append((y, x))  # depth l have been considered
+                        edge_removal.add((x, y))  # after all conditioning sets at
+                        edge_removal.add((y, x))  # depth l have been considered
 
                 Neigh_x_noy = np.delete(Neigh_x, np.where(Neigh_x == y))
-                for S in combinations(Neigh_x_noy, depth):
+                for S in combinations(Neigh_x_noy, current_depth):
                     p = cg.ci_test(x, y, S)
+                    test_results[(x, y, S)] = p
                     if p > alpha:
                         if verbose:
                             print('%d ind %d | %s with p-value %f\n' % (x, y, S, p))
                         if not stable:
-                            edge1 = cg.G.get_edge(cg.G.nodes[x], cg.G.nodes[y])
-                            if edge1 is not None:
-                                cg.G.remove_edge(edge1)
-                            edge2 = cg.G.get_edge(cg.G.nodes[y], cg.G.nodes[x])
-                            if edge2 is not None:
-                                cg.G.remove_edge(edge2)
+                            remove_if_exists(x, y)
+                            remove_if_exists(y, x)
                             append_value(cg.sepset, x, y, S)
                             append_value(cg.sepset, y, x, S)
                             sep_sets[(x, y)] = set(S)
                             sep_sets[(y, x)] = set(S)
                             break
                         else:
-                            edge_removal.append((x, y))  # after all conditioning sets at
-                            edge_removal.append((y, x))  # depth l have been considered
+                            edge_removal.add((x, y))  # after all conditioning sets at
+                            edge_removal.add((y, x))  # depth l have been considered
                             for s in S:
                                 sepsets.add(s)
                     else:
@@ -130,32 +136,12 @@ def fas(data: ndarray, nodes: List[Node], independence_test_method: CIT | None=N
                 append_value(cg.sepset, x, y, tuple(sepsets))
                 append_value(cg.sepset, y, x, tuple(sepsets))
 
-        if show_progress:
-            pbar.refresh()
-
-        for (x, y) in list(set(edge_removal)):
-            edge1 = cg.G.get_edge(cg.G.nodes[x], cg.G.nodes[y])
-            if edge1 is not None:
-                cg.G.remove_edge(edge1)
+        for (x, y) in edge_removal:
+            remove_if_exists(x, y)
             if cg.sepset[x, y] is not None:
-                origin_list = []
-                for l_out in cg.sepset[x, y]:
-                    for l_in in l_out:
-                        origin_list.append(l_in)
-                sep_sets[(x, y)] = set(origin_list)
-                sep_sets[(y, x)] = set(origin_list)
-
-
-    # for x in range(no_of_var):
-    #     for y in range(x, no_of_var):
-    #         if cg.sepset[x, y] is not None:
-    #             origin_list = []
-    #             for l_out in cg.sepset[x, y]:
-    #                 for l_in in l_out:
-    #                     origin_list.append(l_in)
-    #             sep_sets[(x, y)] = set(origin_list)
-
-    if show_progress:
-        pbar.close()
+                origin_set = set(l_in for l_out in cg.sepset[x, y]
+                                 for l_in in l_out)
+                sep_sets[(x, y)] = origin_set
+                sep_sets[(y, x)] = origin_set
 
-    return cg.G, sep_sets
+    return cg.G, sep_sets, test_results
@@ -0,0 +1,99 @@
+import hashlib
+import os
+import random
+import unittest
+
+import numpy as np
+
+from causallearn.graph.GraphNode import GraphNode
+from causallearn.utils.cit import CIT, chisq, fisherz, kci, d_separation
+from causallearn.utils.FAS import fas
+from causallearn.utils.PCUtils.BackgroundKnowledge import BackgroundKnowledge
+
+BENCHMARK_TXTFILE_TO_MD5 = {
+    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_asia_fci_chisq_0.05.txt": "65f54932a9d8224459e56c40129e6d8b",
+    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_cancer_fci_chisq_0.05.txt": "0312381641cb3b4818e0c8539f74e802",
+    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_earthquake_fci_chisq_0.05.txt": "a1160b92ce15a700858552f08e43b7de",
+    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_sachs_fci_chisq_0.05.txt": "dced4a202fc32eceb75f53159fc81f3b",
+    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_survey_fci_chisq_0.05.txt": "b1a28eee1e0c6ea8a64ac1624585c3f4",
+    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_alarm_fci_chisq_0.05.txt": "c3bbc2b8aba456a4258dd071a42085bc",
+    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_barley_fci_chisq_0.05.txt": "4a5000e7a582083859ee6aef15073676",
+    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_child_fci_chisq_0.05.txt": "6b7858589e12f04b0f489ba4589a1254",
+    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_insurance_fci_chisq_0.05.txt": "9975942b936aa2b1fc90c09318ca2d08",
+    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_water_fci_chisq_0.05.txt": "48eee804d59526187b7ecd0519556ee5",
+    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hailfinder_fci_chisq_0.05.txt": "6b9a6b95b6474f8530e85c022f4e749c",
+    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hepar2_fci_chisq_0.05.txt": "4aae21ff3d9aa2435515ed2ee402294c",
+    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_win95pts_fci_chisq_0.05.txt": "648fdf271e1440c06ca2b31b55ef1f3f",
+    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_andes_fci_chisq_0.05.txt": "04092ae93e54c727579f08bf5dc34c77",
+    "tests/TestData/benchmark_returned_results/linear_10_fci_fisherz_0.05.txt": "289c86f9c665bf82bbcc4c9e1dcec3e7"
+}
+
+# verify files integrity first
+for file_path, expected_MD5 in BENCHMARK_TXTFILE_TO_MD5.items():
+    with open(file_path, 'rb') as fin:
+        assert hashlib.md5(fin.read()).hexdigest() == expected_MD5, \
+            f'{file_path} is corrupted. Please download it again from https://github.com/cmu-phil/causal-learn/blob/5918419/tests/TestData'
+
+
+class TestFAS(unittest.TestCase):
+    def test_inputs(self):
+        data = np.loadtxt('tests/data_linear_10.txt', skiprows=1)
+        alpha = 0.05
+        cit = CIT(data, fisherz, alpha=alpha)
+        nodes = [GraphNode(f"X{i + 1}") for i in range(data.shape[1])]
+        bgk = BackgroundKnowledge()
+        self.assertRaises(TypeError, fas, data=None, nodes=nodes, independence_test_method=cit, alpha=alpha, knowledge=bgk, verbose=False)
+        self.assertRaises(TypeError, fas, data=data, nodes=None, independence_test_method=cit, alpha=alpha, knowledge=bgk, verbose=False)
+        self.assertRaises(TypeError, fas, data=data, nodes=nodes, independence_test_method=None, alpha=alpha, knowledge=bgk, verbose=False)
+        self.assertRaises(TypeError, fas, data=data, nodes=nodes, independence_test_method=cit, alpha=1, knowledge=bgk, verbose=False)
+        self.assertRaises(TypeError, fas, data=data, nodes=nodes, independence_test_method=cit, alpha=0, knowledge=bgk, verbose=False)
+        self.assertRaises(TypeError, fas, data=data, nodes=nodes, independence_test_method=cit, alpha=alpha, knowledge=data, verbose=False)
+
+    @staticmethod
+    def run_test_with_random_background(data, cit, alpha):
+        random.seed(42)
+
+        nodes = [GraphNode(f"X{i + 1}") for i in range(data.shape[1])]
+        bgk = BackgroundKnowledge()
+        for _ in range(5):
+            node1, node2 = random.sample(nodes, 2)
+            bgk.add_forbidden_by_node(node1, node2)
+            bgk.add_forbidden_by_node(node2, node1)
+            G, edges, test_results = fas(data, nodes, cit, alpha, knowledge=bgk, verbose=False)
+            assert G.num_vars == data.shape[1], 'Graph should contain the same number of nodes as variables.'
+            assert all(G.get_edge(x, y) is None for x, y in bgk.forbidden_rules_specs), 'Graph contains forbidden edges.'
+
+    @staticmethod
+    def run_test_at_depths(data, cit, alpha):
+        random.seed(42)
+
+        nodes = [GraphNode(f"X{i + 1}") for i in range(data.shape[1])]
+        for _ in range(3):
+            depth = random.randint(1, min(data.shape[1], 5))
+            G, edges, test_results = fas(data, nodes, cit, alpha, depth=depth, verbose=False)
+            assert max(len(S) for _, _, S in test_results.keys()) <= depth, 'Tests performed with depth greater than maximum depth.'
+
+    def test_bnlearn_discrete_datasets(self):
+        benchmark_names = [
+            "asia", "cancer", "earthquake", "sachs", "survey",
+            "alarm", "barley", "child", "insurance", "water",
+            "hailfinder", "hepar2", "win95pts",
+            "andes"
+        ]
+
+        bnlearn_path = 'tests/TestData/bnlearn_discrete_10000/data'
+        alpha = 0.05
+        for bname in benchmark_names:
+            print(f'Testing discrete dataset "{bname}...')
+            data = np.loadtxt(os.path.join(bnlearn_path, f'{bname}.txt'), skiprows=1)
+            cit = CIT(data, chisq, alpha=alpha)
+            TestFAS.run_test_with_random_background(data, cit, alpha)
+            TestFAS.run_test_at_depths(data, cit, alpha)
+
+    def test_continuous_dataset(self):
+        print('Testing continuous dataset...')
+        data = np.loadtxt('tests/data_linear_10.txt', skiprows=1)
+        alpha = 0.05
+        cit = CIT(data, fisherz, alpha=alpha)
+        TestFAS.run_test_with_random_background(data, cit, alpha)
+        TestFAS.run_test_at_depths(data, cit, alpha)