Merge pull request #212 from py-why/add_boss

kunwuz · web-flow · commit 282f20d80d6e · 2025-01-03T15:02:06.000-05:00
Add boss doc by Bryan
diff --git a/causallearn/search/PermutationBased/BOSS.py b/causallearn/search/PermutationBased/BOSS.py
@@ -53,7 +53,7 @@ def boss(
     if n < p:
         warnings.warn("The number of features is much larger than the sample size!")
 
-    if score_func == "local_score_CV_general":  
+    if score_func == "local_score_CV_general":
         # % k-fold negative cross validated likelihood based on regression in RKHS
         if parameters is None:
             parameters = {
@@ -63,13 +63,13 @@ def boss(
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_cv_general, parameters=parameters
         )
-    elif score_func == "local_score_marginal_general":  
+    elif score_func == "local_score_marginal_general":
         # negative marginal likelihood based on regression in RKHS
         parameters = {}
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_marginal_general, parameters=parameters
         )
-    elif score_func == "local_score_CV_multi":  
+    elif score_func == "local_score_CV_multi":
         # k-fold negative cross validated likelihood based on regression in RKHS
         # for data with multi-variate dimensions
         if parameters is None:
@@ -83,7 +83,7 @@ def boss(
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_cv_multi, parameters=parameters
         )
-    elif score_func == "local_score_marginal_multi":  
+    elif score_func == "local_score_marginal_multi":
         # negative marginal likelihood based on regression in RKHS
         # for data with multi-variate dimensions
         if parameters is None:
@@ -93,22 +93,22 @@ def boss(
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_marginal_multi, parameters=parameters
         )
-    elif score_func == "local_score_BIC":  
+    elif score_func == "local_score_BIC":
         # SEM BIC score
         warnings.warn("Please use 'local_score_BIC_from_cov' instead")
         if parameters is None:
             parameters = {"lambda_value": 2}
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BIC, parameters=parameters
         )
-    elif score_func == "local_score_BIC_from_cov":  
+    elif score_func == "local_score_BIC_from_cov":
         # SEM BIC score
         if parameters is None:
             parameters = {"lambda_value": 2}
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters
         )
-    elif score_func == "local_score_BDeu":  
+    elif score_func == "local_score_BDeu":
         # BDeu score
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BDeu, parameters=None
@@ -204,4 +204,4 @@ def better_mutation(v, order, gsts):
     order.remove(v)
     order.insert(best - int(best > i), v)
 
-    return True
+    return True
diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py
@@ -16,7 +16,7 @@
     local_score_marginal_general,
     local_score_marginal_multi,
 )
-from causallearn.search.PermutationBased.gst import GST;
+from causallearn.search.PermutationBased.gst import GST
 from causallearn.score.LocalScoreFunctionClass import LocalScoreClass
 from causallearn.utils.DAG2CPDAG import dag2cpdag
 
@@ -111,7 +111,7 @@ def grasp(
     if n < p:
         warnings.warn("The number of features is much larger than the sample size!")
 
-    if score_func == "local_score_CV_general":  
+    if score_func == "local_score_CV_general":
         # k-fold negative cross validated likelihood based on regression in RKHS
         if parameters is None:
             parameters = {
@@ -127,7 +127,7 @@ def grasp(
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_marginal_general, parameters=parameters
         )
-    elif score_func == "local_score_CV_multi": 
+    elif score_func == "local_score_CV_multi":
         # k-fold negative cross validated likelihood based on regression in RKHS
         # for data with multi-variate dimensions
         if parameters is None:
@@ -141,7 +141,7 @@ def grasp(
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_cv_multi, parameters=parameters
         )
-    elif score_func == "local_score_marginal_multi":  
+    elif score_func == "local_score_marginal_multi":
         # negative marginal likelihood based on regression in RKHS
         # for data with multi-variate dimensions
         if parameters is None:
@@ -151,22 +151,22 @@ def grasp(
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_marginal_multi, parameters=parameters
         )
-    elif score_func == "local_score_BIC":  
+    elif score_func == "local_score_BIC":
         # SEM BIC score
         warnings.warn("Please use 'local_score_BIC_from_cov' instead")
         if parameters is None:
             parameters = {"lambda_value": 2}
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BIC, parameters=parameters
         )
-    elif score_func == "local_score_BIC_from_cov":  
+    elif score_func == "local_score_BIC_from_cov":
         # SEM BIC score
         if parameters is None:
             parameters = {"lambda_value": 2}
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters
         )
-    elif score_func == "local_score_BDeu":  
+    elif score_func == "local_score_BDeu":
         # BDeu score
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BDeu, parameters=None
@@ -204,7 +204,7 @@ def grasp(
             sys.stdout.flush()
 
     runtime = time.perf_counter() - runtime
-    
+
     if verbose:
         sys.stdout.write("\nGRaSP completed in: %.2fs \n" % runtime)
         sys.stdout.flush()
diff --git a/docs/source/search_methods_index/Permutation-based causal discovery methods/GRaSP.rst b/docs/source/search_methods_index/Permutation-based causal discovery methods/GRaSP.rst
@@ -6,7 +6,7 @@ GRaSP
 Algorithm Introduction
 --------------------------------------
 
-Greedy relaxation of the sparsest permutation (GRaSP) algorithm [1]_.
+Greedy relaxations of the sparsest permutation (GRaSP) algorithm [1]_.
 
 
 Usage
@@ -19,7 +19,7 @@ Usage
     G = grasp(X)
 
     # or customized parameters
-    G = grasp(X, score_func, depth, maxP, parameters)
+    G = grasp(X, score_func, depth, parameters)
 
     # Visualization using pydot
     from causallearn.utils.GraphUtils import GraphUtils
@@ -50,8 +50,6 @@ and n_features is the number of features.
               - ":ref:`local_score_CV_multi <Generalized score with cross validation>`": Generalized score with cross validation for data with multi-dimensional variables [2]_.
               - ":ref:`local_score_marginal_multi <Generalized score with marginal likelihood>`": Generalized score with marginal likelihood for data with multi-dimensional variables [2]_.
 
-**maxP**: Allowed maximum number of parents when searching the graph. Default: None.
-
 **parameters**: Needed when using CV likelihood. Default: None.
               - parameters['kfold']: k-fold cross validation.
               - parameters['lambda']: regularization parameter.
diff --git a/docs/source/search_methods_index/Permutation-based causal discovery methods/boss.rst b/docs/source/search_methods_index/Permutation-based causal discovery methods/boss.rst
@@ -0,0 +1,68 @@
+.. _BOSS:
+
+BOSS
+==============================================
+
+Algorithm Introduction
+--------------------------------------
+
+Best order score search (BOSS) algorithm [1]_.
+
+
+Usage
+----------------------------
+.. code-block:: python
+
+    from causallearn.search.PermutationBased.BOSS import boss
+
+    # default parameters
+    G = boss(X)
+
+    # or customized parameters
+    G = boss(X, score_func, parameters)
+
+    # Visualization using pydot
+    from causallearn.utils.GraphUtils import GraphUtils
+    import matplotlib.image as mpimg
+    import matplotlib.pyplot as plt
+    import io
+
+    pyd = GraphUtils.to_pydot(G)
+    tmp_png = pyd.create_png(f="png")
+    fp = io.BytesIO(tmp_png)
+    img = mpimg.imread(fp, format='png')
+    plt.axis('off')
+    plt.imshow(img)
+    plt.show()
+
+Visualization using pydot is recommended (`usage example <https://github.com/cmu-phil/causal-learn/blob/main/tests/TestBOSS.py>`_). If specific label names are needed, please refer to this `usage example <https://github.com/cmu-phil/causal-learn/blob/e4e73f8b58510a3cd5a9125ba50c0ac62a425ef3/tests/TestGraphVisualization.py#L106>`_ (e.g., GraphUtils.to_pydot(G, labels=["A", "B", "C"]).
+
+Parameters
+-------------------
+**X**: numpy.ndarray, shape (n_samples, n_features). Data, where n_samples is the number of samples
+and n_features is the number of features.
+
+**score_func**: The score function you would like to use, including (see :ref:`score_functions`.). Default: 'local_score_BIC'.
+              - ":ref:`local_score_BIC <BIC score>`": BIC score [3]_.
+              - ":ref:`local_score_BDeu <BDeu score>`": BDeu score [4]_.
+              - ":ref:`local_score_CV_general <Generalized score with cross validation>`": Generalized score with cross validation for data with single-dimensional variables [2]_.
+              - ":ref:`local_score_marginal_general <Generalized score with marginal likelihood>`": Generalized score with marginal likelihood for data with single-dimensional variables [2]_.
+              - ":ref:`local_score_CV_multi <Generalized score with cross validation>`": Generalized score with cross validation for data with multi-dimensional variables [2]_.
+              - ":ref:`local_score_marginal_multi <Generalized score with marginal likelihood>`": Generalized score with marginal likelihood for data with multi-dimensional variables [2]_.
+
+**parameters**: Needed when using CV likelihood. Default: None.
+              - parameters['kfold']: k-fold cross validation.
+              - parameters['lambda']: regularization parameter.
+              - parameters['dlabel']: for variables with multi-dimensions, indicate which dimensions belong to the i-th variable.
+
+
+
+Returns
+-------------------
+- **G**: learned general graph, where G.graph[j,i]=1 and G.graph[i,j]=-1 indicate i --> j; G.graph[i,j] = G.graph[j,i] = -1 indicates i --- j.
+
+
+.. [1] Andrews, B., Ramsey, J., Sanchez Romero, R., Camchong, J., & Kummerfeld, E. (2023). Fast scalable and accurate discovery of dags using the best order score search and grow shrink trees. Advances in Neural Information Processing Systems, 36, 63945-63956.
+.. [2] Huang, B., Zhang, K., Lin, Y., Schölkopf, B., & Glymour, C. (2018, July). Generalized score functions for causal discovery. In Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining (pp. 1551-1560).
+.. [3] Schwarz, G. (1978). Estimating the dimension of a model. The annals of statistics, 461-464.
+.. [4] Buntine, W. (1991). Theory refinement on Bayesian networks. In Uncertainty proceedings 1991 (pp. 52-60). Morgan Kaufmann.