felixleopoldo
diff --git a/‎VERSION‎
Lines changed: 1 addition & 1 deletion b/‎VERSION‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/render_data_docs.py‎
Lines changed: 17 additions & 1 deletion b/‎docs/render_data_docs.py‎
Lines changed: 17 additions & 1 deletion
diff --git a/‎docs/source/available_background_knowledge.rst‎
Lines changed: 18 additions & 16 deletions b/‎docs/source/available_background_knowledge.rst‎
Lines changed: 18 additions & 16 deletions
diff --git a/‎docs/source/available_data.rst‎
Lines changed: 6 additions & 0 deletions b/‎docs/source/available_data.rst‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎docs/source/available_structure_learning_algorithms.rst‎
Lines changed: 5 additions & 1 deletion b/‎docs/source/available_structure_learning_algorithms.rst‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎docs/source/data_formats.rst‎
Lines changed: 10 additions & 0 deletions b/‎docs/source/data_formats.rst‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎docs/source/index.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/index.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/structure_learning_algorithms/huge_glasso.rst‎
Lines changed: 54 additions & 1 deletion b/‎docs/source/structure_learning_algorithms/huge_glasso.rst‎
Lines changed: 54 additions & 1 deletion
diff --git a/‎workflow/rules/data/mvpc_gen_data/bibtex.bib‎
Lines changed: 20 additions & 0 deletions b/‎workflow/rules/data/mvpc_gen_data/bibtex.bib‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎workflow/rules/data/mvpc_gen_data/docs.rst‎
Lines changed: 2 additions & 0 deletions b/‎workflow/rules/data/mvpc_gen_data/docs.rst‎
Lines changed: 2 additions & 0 deletions
@@ -1 +1 @@
-2.9.0
+2.10.0
@@ -206,13 +206,29 @@ def info_to_small_table():
         module_str += "\n\n"
         module_str += content
         module_str += "\n\n"
+
 
     if p.name == "fixed_data":
         with open(p/"data_info.json") as json_data_file:
             fixed_data_info = json.load(json_data_file)
         module_str += fixed_data_to_table(fixed_data_info, p)
         module_str += "\n\n"
-        
+    else:
+        with open(s) as json_file:    
+            schema = json.load(json_file)
+
+        tmp = any(["description" in obj 
+            for prop, obj in schema["items"]["properties"].items() 
+            if prop != "id"])
+
+        if tmp:
+            module_str += ".. rubric:: Some fields described \n"
+            for prop, obj in sorted(schema["items"]["properties"].items()):
+                if prop == "id":
+                    continue
+                if "description" in obj:                
+                    module_str += "* ``{}`` {} \n".format(prop, obj["description"])
+
 
     if dump != "":
         module_str += "\n\n"
 
@@ -1,37 +1,39 @@
 .. _edge_constraints:
 
-Edge Constraints
+Edge constraints
 ---------------------------------------
 
 Benchpress allows users to incorporate edge constraints to guide structure learning algorithms in several packages: 
-**pcalg**, **bnlearn**, **tetrad**, **gobnilp**, and **bidag**. These constraints enable the inclusion of prior knowledge to refine 
+**pcalg**, **mvpc**, **bnlearn**, **tetrad**, **gobnilp**, and **bidag**. These constraints enable the inclusion of prior knowledge to refine 
 the search space of causal graphs, improving the reliability of the inferred relationships. Users can specify **forbidden or 
 required edges**, **tiers for temporal ordering**, and **group-based constraints**.
 
 The edge constraints should be defined in a JSON file located within the ``resources/constraints`` folder.
 
 .. rubric:: Supported Constraints
 
-+--------------------+---------------------+---------------------+--------------------+----------------------+-----------------------+
-| **Package**        | **forbidden_edges** | **required_edges**  | **tiers**          | **forbidden_groups** | **required_groups**   |
-+====================+=====================+=====================+====================+======================+=======================+
-| pcalg              | X                   | X                   | N/A                | N/A                  | N/A                   |
-+--------------------+---------------------+---------------------+--------------------+----------------------+-----------------------+
-| bnlearn            | X                   | X                   | X                  | X                    | X                     |
-+--------------------+---------------------+---------------------+--------------------+----------------------+-----------------------+
-| tetrad             | X                   | X                   | X                  | X                    | X                     |
-+--------------------+---------------------+---------------------+--------------------+----------------------+-----------------------+
-| gobnilp            | X                   | X                   | X                  | X                    | X                     |
-+--------------------+---------------------+---------------------+--------------------+----------------------+-----------------------+
-| bidag              | X                   | N/A                 | N/A                | X                    | N/A                   |
-+--------------------+---------------------+---------------------+--------------------+----------------------+-----------------------+
++-------------+---------------------+--------------------+-----------+----------------------+---------------------+
+| **Package** | **forbidden_edges** | **required_edges** | **tiers** | **forbidden_groups** | **required_groups** |
++=============+=====================+====================+===========+======================+=====================+
+| mvpc        | X                   | X                  | N/A       | N/A                  | N/A                 |
++-------------+---------------------+--------------------+-----------+----------------------+---------------------+
+| pcalg       | X                   | X                  | N/A       | N/A                  | N/A                 |
++-------------+---------------------+--------------------+-----------+----------------------+---------------------+
+| bnlearn     | X                   | X                  | X         | X                    | X                   |
++-------------+---------------------+--------------------+-----------+----------------------+---------------------+
+| tetrad      | X                   | X                  | X         | X                    | X                   |
++-------------+---------------------+--------------------+-----------+----------------------+---------------------+
+| gobnilp     | X                   | X                  | X         | X                    | X                   |
++-------------+---------------------+--------------------+-----------+----------------------+---------------------+
+| bidag       | X                   | N/A                | N/A       | X                    | N/A                 |
++-------------+---------------------+--------------------+-----------+----------------------+---------------------+
 
 .. rubric:: Description
 
 - ``forbidden_edges``: A list of directed edges that are explicitly prohibited from existing between specific nodes. Each edge is defined as a pair of nodes, where the first node cannot directly cause the second node. 
 - ``required_edges``: A list of directed edges that are enforced between specific nodes. Each edge is defined as a pair of nodes, where the first node must directly cause the second node. 
 
-  - *Note: For algorithms in the* **pcalg** *package, the above attributes only specify the presence or absence of edges and do not control their directionality.*
+  - *Note: For algorithms in the* **pcalg** and **mvpc** *package, the above attributes only specify the presence or absence of edges and do not control their directionality.*
 - ``tiers``: Defines a temporal ordering of nodes across multiple levels (or) tiers. Nodes in one tier are constrained from causing nodes in any of the preceding tiers. 
 - ``tier_settings``: 
 
 
@@ -13,6 +13,7 @@ Data
     data/fixed_data
     data/gcastle_iidsim
     data/iid
+    data/mvpc_gen_data
 The available data modules are listed below.
 
 
@@ -41,6 +42,11 @@ The available data modules are listed below.
      - 
      - 
      - :ref:`iid` 
+   * - Missing data generation
+     - `DAG <https://en.wikipedia.org/wiki/Directed_acyclic_graph>`__
+     - `MVPC <https://github.com/felixleopoldo/MVPC>`__
+     - d901361
+     - :ref:`mvpc_gen_data` 
 
 
 
@@ -30,7 +30,6 @@ Algorithms
     structure_learning_algorithms/bnlearn_sihitonpc
     structure_learning_algorithms/bnlearn_tabu
     structure_learning_algorithms/causaldag_gsp
-    structure_learning_algorithms/causallearn_ges
     structure_learning_algorithms/causallearn_grasp
     structure_learning_algorithms/corr_thresh
     structure_learning_algorithms/dualpc
@@ -53,6 +52,7 @@ Algorithms
     structure_learning_algorithms/huge_glasso
     structure_learning_algorithms/huge_mb
     structure_learning_algorithms/huge_tiger
+    structure_learning_algorithms/mvpc
     structure_learning_algorithms/paralleldg
     structure_learning_algorithms/pcalg_gies
     structure_learning_algorithms/pcalg_pc
@@ -259,6 +259,10 @@ To add new modules, see :ref:`new_modules`.
      - `UG <https://en.wikipedia.org/wiki/Graph_(discrete_mathematics)#Graph>`__
      - `huge <https://cran.r-project.org/web/packages/huge/index.html>`__
      - :ref:`huge_tiger` 
+   * - MVPC
+     - `CPDAG <https://search.r-project.org/CRAN/refmans/pcalg/html/dag2cpdag.html>`__
+     - `MVPC <https://github.com/felixleopoldo/MVPC>`__
+     - :ref:`mvpc` 
    * - Parallel DG
      - `DG <https://en.wikipedia.org/wiki/Chordal_graph>`__
      - `parallelDG <https://github.com/melmasri/parallelDG>`__
 
@@ -124,6 +124,16 @@ If in the continuous example above there would be two additional observations wh
     1.2,1.2,2.2,4.2,1,0
     1.1,1.5,1.4,2.2,1,1
 
+Missing data
+*************
+
+Missing data is indicated by the absence of a value. Below is an example of a dataset were the second row for bolumn b is missing.
+
+.. rubric:: Example (missing data)
+
+    a,b,c,d
+    0.2,2.3,5.3,0.5
+    3.2,,2.5,1.2   
 
 
 Parameters
 
@@ -96,6 +96,7 @@ generated datasets, the workflow also includes a number of standard datasets and
 
 .. rubric:: News
 
+* 2024-11-30: Benchpress 2.10.0. This version includes algorithms from the MVPC package for sampling (:ref:`mvpc_gen_data`) and causal discovery (:ref:`mvpc`) in the presence of missing data.
 * 2024-11-24: Benchpress 2.9.0. This version comes with three new major features. 
 
     I) The ability to incorporate background knowledge in terms of :ref:`edge_constraints`. Thanks to `Gomathi Lakshmanan <https://www.linkedin.com/in/gomathi-l/>`_ for this great feature. 
 
@@ -31,7 +31,60 @@ huge_glasso
 
 .. rubric:: Description
 
-Abstract: We consider the problem of estimating sparse graphs by a lasso penalty applied to the inverse covariance matrix. Using a coordinate descent procedure for the lasso, we develop a simple algorithm—the graphical lasso—that is remarkably fast: It solves a 1000-node problem (∼500000 parameters) in at most a minute and is 30–4000 times faster than competing methods. It also provides a conceptual link between the exact problem and the approximation suggested by Meinshausen and Bühlmann (2006). We illustrate the method on some cell-signaling data from proteomics.
+Abstract:
+We consider the problem of estimating the marginal independence structure of a Bayesian network from observational data in the form of an undirected graph called the unconditional dependence graph. We show that unconditional dependence graphs of Bayesian networks correspond to the graphs having equal independence and intersection numbers. Using this observation, a Gröbner basis for a toric ideal associated to unconditional dependence graphs of Bayesian networks is given and then extended by additional binomial relations to connect the space of all such graphs. An MCMC method, called GrUES (Gröbner-based Unconditional Equivalence Search), is implemented based on the resulting moves and applied to synthetic Gaussian data. GrUES recovers the true marginal independence structure via a penalized maximum likelihood or MAP estimate at a higher rate than simple independence tests while also yielding an estimate of the posterior, for which the 20% HPD credible sets include the true structure at a high rate for data-generating graphs with density at least 0.5.
+
+.. rubric:: Example
+
+Config file: `grues_vs_corr-thresh.json <https://github.com/felixleopoldo/benchpress/blob/master/workflow/rules/structure_learning_algorithms/grues/grues_vs_corr-thresh.json>`_
+
+Command:
+
+.. code:: bash
+
+    snakemake --cores all --use-singularity --configfile workflow/rules/structure_learning_algorithms/grues/grues_vs_corr-thresh.json
+
+:numref:`roc_grues_vs_thresh` shows the ROC and :numref:`shd_grues_vs_thresh` shows the SHD comparing GrUES to correlation thresholding for datsets from five different graphs corresponding to a 5-variable random Gaussian SEM whose nodes have average degree of 1 and whose edge weights were allowed to be close to 0. Each dataset contains 300 observations and each Markov chain has 10000 observations. Note that SHD between a learned UDG and true CPDAG is not the most reasonable comparison because an inflated FPR will be reported---see :footcite:t:`grues2023` for discussion and a more reasonable benchmark.
+
+:numref:`adj_grues` shows that GrUES estimates the correct `UDG <https://arxiv.org/pdf/2210.00822.pdf#subsection.2.2>`__ while correlation thresholding (:numref:`adj_thresh`) misses the edge `1---2`.
+
+
+.. _roc_grues_vs_thresh:
+
+.. figure:: ../../../workflow/rules/structure_learning_algorithms/grues/images/roc.png
+    :width: 320
+    :alt: ROC (FPR vs. TPR) GrUES vs corr_thresh example
+    :align: left
+
+    ROC of GrUES vs corr_thresh.
+
+.. _shd_grues_vs_thresh:
+
+.. figure:: ../../../workflow/rules/structure_learning_algorithms/grues/images/shd.png
+    :width: 320
+    :alt: SHD GrUES vs corr_thresh example
+    :align: right
+
+    SHD of GrUES vs corr_thresh.
+
+.. _adj_grues:
+
+.. figure:: ../../../workflow/rules/structure_learning_algorithms/grues/images/diffplot_30.png
+    :width: 320
+    :alt: adjacency matrix GrUES example
+    :align: left
+
+    Adj mat learned by GrUES.
+
+.. _adj_thresh:
+
+.. figure:: ../../../workflow/rules/structure_learning_algorithms/grues/images/diffplot_15.png
+    :width: 320
+    :alt: adjacency matrix corr_thresh example
+    :align: right
+
+    Adj mat learned by corr_thresh.
+
 
 .. rubric:: Some fields described 
 * ``lambda`` A positive number to control the regularization. Typical usage is to leave the input lambda: null and have the program compute its own. 
 
@@ -0,0 +1,20 @@
+@article{mohan2013graphical,
+  title={Graphical models for inference with missing data},
+  author={Mohan, Karthika and Pearl, Judea and Tian, Jin},
+  journal={Advances in neural information processing systems},
+  volume={26},
+  year={2013},
+  url={https://proceedings.neurips.cc/paper_files/paper/2013/file/0ff8033cf9437c213ee13937b1c4c455-Paper.pdf}
+}
+
+@article{rubin1976inference,
+  title={Inference and missing data},
+  author={Rubin, Donald B},
+  journal={Biometrika},
+  volume={63},
+  number={3},
+  pages={581--592},
+  year={1976},
+  publisher={Oxford University Press},
+  url={https://academic.oup.com/biomet/article-abstract/63/3/581/270932?redirectedFrom=fulltext}
+}
@@ -0,0 +1,2 @@
+Module for synthetic Gaussian data generation with different types of missingness: missing at random (MAR), 
+missing completely at random (MCAR), and missing not at random (MNAR) :footcite:t:`mohan2013graphical`, :footcite:t:`rubin1976inference`.
Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,7 @@ Data`
`13`	`13`	`data/fixed_data`
`14`	`14`	`data/gcastle_iidsim`
`15`	`15`	`data/iid`
	`16`	`+ data/mvpc_gen_data`
`16`	`17`	`The available data modules are listed below.`
`17`	`18`
`18`	`19`
`@@ -41,6 +42,11 @@ The available data modules are listed below.`
`41`	`42`	`-`
`42`	`43`	`-`
`43`	`44`	- :ref:`iid`
	`45`	`+ * - Missing data generation`
	`46`	+ - `DAG <https://en.wikipedia.org/wiki/Directed_acyclic_graph>`__
	`47`	+ - `MVPC <https://github.com/felixleopoldo/MVPC>`__
	`48`	`+ - d901361`
	`49`	+ - :ref:`mvpc_gen_data`
`44`	`50`
`45`	`51`
`46`	`52`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+Module for synthetic Gaussian data generation with different types of missingness: missing at random (MAR),`
	`2`	+missing completely at random (MCAR), and missing not at random (MNAR) :footcite:t:`mohan2013graphical`, :footcite:t:`rubin1976inference`.