diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
index 7e8d2a062..26c0bc947 100644
--- a/.github/workflows/CompatHelper.yml
+++ b/.github/workflows/CompatHelper.yml
@@ -2,7 +2,7 @@ name: CompatHelper
 
 on:
   schedule:
-    - cron: '00 * * * *'
+    - cron: '00 8 * * *'
 
 jobs:
   CompatHelper:
diff --git a/.gitignore b/.gitignore
index dc1a26755..36072ee56 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,4 @@ Manifest.toml
 *.scss
 *.css
 vscode/*
+*.cov
\ No newline at end of file
diff --git a/Project.toml b/Project.toml
index aea5460d1..bee865bca 100644
--- a/Project.toml
+++ b/Project.toml
@@ -2,7 +2,7 @@ name = "CausalityTools"
 uuid = "5520caf5-2dd7-5c5d-bfcb-a00e56ac49f7"
 authors = ["Kristian Agasøster Haaga <kahaaga@gmail.com>", "Tor Einar Møller <temolle@gmail.com>", "George Datseris <datseris.george@gmail.com>"]
 repo = "https://github.com/kahaaga/CausalityTools.jl.git"
-version = "2.10.1"
+version = "3.0.0"
 
 [deps]
 Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
@@ -12,10 +12,9 @@ DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2"
 DelayEmbeddings = "5732040d-69e3-5649-938a-b6b4f237613f"
 Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
-DynamicalSystemsBase = "6e36e845-645a-534a-86f2-f5d4aa5a06b4"
 Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
 HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
-LabelledArrays = "2ee39098-c373-598a-b85f-a56591580800"
+LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
 Neighborhood = "645ca80c-8b79-4109-87ea-e1f58159d116"
@@ -24,37 +23,36 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 RecurrenceAnalysis = "639c3291-70d9-5ea2-8c5b-839eba1ee399"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 Scratch = "6c6a2e73-6563-6170-7368-637461726353"
-SimpleDiffEq = "05bca326-078c-5bf0-a5bf-ce7c7982d7fd"
+Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 StateSpaceSets = "40b095a5-5852-4c12-98c7-d43bf788e795"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
 TimeseriesSurrogates = "c804724b-8c18-5caa-8579-6025a0767c70"
 
 [compat]
 Accessors = "^0.1.28"
 Combinatorics = "1"
-ComplexityMeasures = "2 - 2.8"
-DSP = "0.7"
+ComplexityMeasures = "3.6.5"
+DSP = "^0.7"
 DelayEmbeddings = "2.7"
 Distances = "^0.10"
-Distributions = "^0.24, 0.25"
-DynamicalSystemsBase = "3"
+Distributions = "^0.25"
 Graphs = "^1.8"
-HypothesisTests = "0.8, 1, 0.10, 0.11"
-LabelledArrays = "1.6.7"
-NearestNeighbors = "0.4"
-Neighborhood = "0.2.2"
+HypothesisTests = "^0.11"
+NearestNeighbors = "^0.4"
+Neighborhood = "^0.2.4"
 ProgressMeter = "1.7"
 RecurrenceAnalysis = "2"
-Reexport = "0.2, 1"
+Reexport = "1"
 Scratch = "1"
-SimpleDiffEq = "^1"
+Setfield = "1.1.1"
 SpecialFunctions = "2"
-StateSpaceSets = "^1.4"
+StateSpaceSets = "^1.5"
 StaticArrays = "^1"
-StatsBase = "^0.33, 0.34"
-TimeseriesSurrogates = "2.5"
-julia = "^1.6"
+StatsBase = "^0.34"
+StyledStrings = "1"
+TimeseriesSurrogates = "2.6"
+julia = "^1.10"
diff --git a/README.md b/README.md
index 1e18b6b3b..75408d96b 100644
--- a/README.md
+++ b/README.md
@@ -6,8 +6,7 @@
 [![codecov](https://codecov.io/gh/JuliaDynamics/CausalityTools.jl/branch/master/graph/badge.svg?token=0b71n6x6AP)](https://codecov.io/gh/JuliaDynamics/CausalityTools.jl)
 [![DOI](https://zenodo.org/badge/135443027.svg)](https://zenodo.org/badge/latestdoi/135443027)
 
-CausalityTools.jl is a package for quantifying associations and dynamical coupling
-between datasets, independence testing and causal inference.
+CausalityTools.jl is a package for quantifying associations, independence testing and causal inference.
 
 All further information is provided in the
 [documentation](https://juliadynamics.github.io/CausalityTools.jl/dev), which you can either
@@ -15,16 +14,17 @@ find online or build locally by running the `docs/make.jl` file.
 
 ## Key features
 
-- Association measures from conventional statistics, information theory and dynamical
-    systems theory, for example distance correlation, mutual information, transfer entropy,
-    convergent cross mapping and a lot more!
-- A dedicated API for independence testing, which comes with automatic compatibility with
-    every measure-estimator combination you can think of. For example, we offer the generic
-    `SurrogateTest`, which is fully compatible with
-    [TimeseriesSurrogates.jl](https://github.com/JuliaDynamics/TimeseriesSurrogates.jl),
-    and the `LocalPermutationTest` for conditional indepencence testing.
-- A dedicated API for causal network inference based on these measures and independence
-    tests.
+- **Association API**: includes measures and their estimators for pairwise, conditional and other forms of 
+    association from conventional statistics, from dynamical systems theory, and from information theory: partial correlation, distance correlation, (conditional) mutual information, transfer entropy, convergent cross mapping and a lot more!
+- **Independence testing API**, which is automatically compatible with
+    every association measure estimator implemented in the package. 
+- **Causal (network) inference API** integrating the association measures and independence testing framework.
+
+## Addititional features
+
+- Multivariate probabilities estimation, extending the API from 
+    [ComplexityMeasures.jl](https://github.com/JuliaDynamics/ComplexityMeasures.jl).
+
 
 ## Installation
 
diff --git a/changelog.md b/changelog.md
index 8952ba547..7cffe3147 100644
--- a/changelog.md
+++ b/changelog.md
@@ -1,5 +1,70 @@
 # Changelog
 
+## 3.0 (new major release)
+
+This release contains several breaking changes. Any code from before v3.0 will need 
+updating to continue working with v3.0.
+
+The main reason for these breaking changes is that estimators now store the 
+definitions they estimate. This way, we reduce the amount of code we have to write 
+maintain, document and test. At the same time, we hope it is a bit more user-friendly 
+to only relate to "one way of thinking" about estimating association measures.
+
+### Breaking changes 
+
+#### Association measures
+
+- The function `association(measure_or_est, input_data...)` is the central function that computes 
+    all association measures. The first argument is either a measure definition (if it has no 
+    estimator), or an estimator. This means that if `input_data` consists of two input datasets, 
+    then a pairwise association is estimated. If `input_data` consists of three input datasets, then typically a conditional association is estimated (but exceptions are possible).
+
+#### Independence testing 
+
+- `SurrogateTest` is now `SurrogateAssociationTest`
+- `SurrogateTestResult` is now `SurrogateAssociationTestResult`
+
+#### Example systems 
+
+- All example systems are removed.
+
+#### Crossmap API
+
+The crossmap API has been overhauled. 
+
+- `CrossmapEstimator`s now take the `CrossmapMeasure` definition as their first argument.
+    For example, you'll have to do `ExpandingSegment(CCM(); libsizes = 10:10:50)` instead
+    of `ExpandingSegment(; libsizes = 10:10:50)`.
+
+#### Information API
+
+The information API has been overhauled.
+
+- Multivariate information measures now store their parameters explicitly, instead 
+    of using `ComplexityMeasures.EntropyDefinition` to do so. For example, to 
+    define Shannon-type conditional mutual information, one should do 
+    `CMIShannon(base = 2)` instead of `CMIShannon(Shannon(base = 2))`.
+- New generic discrete estimator `JointDistribution` for estimating multivariate
+    information measures. This estimators explicitly computes the joint distribution
+    based on the given discretization, and can be applied to any measure which is 
+    defined as a function of a joint distribution.
+- New generic decomposition-based estimator `EntropyDecomposition`. This estimator
+    computes some multivariate information measure by rewriting the measure definition
+    as a combination of some lower-level measure. For example, `CMIShannon` can be 
+    rewritten as a sum of `Shannon` entropies. Each of these terms can then 
+    be estimated using some differential entropy estimator, e.g. `ZhuSingh` or `Kraskov`.
+- New generic decomposition-based estimator `MIDecomposition`. This estimator
+    computes some multivariate information measure by rewriting the measure definition
+    as a combination of some mutual information measure.
+- New generic decomposition-based estimator `CMIDecomposition`. This estimator
+    computes some multivariate information measure by rewriting the measure definition
+    as a combination of some conditional mutual information measure.
+
+### Bux fixes
+
+- There was an error in the implementation of `PartMutualInformation`. It is now fixed using explicit loops for computing the measures from a probability distribution.
+
+
 ## 2.10
 
 - Progress bars in some independence tests (surrogate, local permutation) can be
diff --git a/docs/Project.toml b/docs/Project.toml
index c67b54c1c..20a67c83a 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,5 +1,4 @@
 [deps]
-BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 CausalityTools = "5520caf5-2dd7-5c5d-bfcb-a00e56ac49f7"
 ComplexityMeasures = "ab4b797d-85ee-42ba-b621-05d793b346a2"
@@ -18,6 +17,7 @@ LabelledArrays = "2ee39098-c373-598a-b85f-a56591580800"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 StateSpaceSets = "40b095a5-5852-4c12-98c7-d43bf788e795"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
diff --git a/docs/build_docs_with_style.jl b/docs/build_docs_with_style.jl
new file mode 100644
index 000000000..69d38f8b7
--- /dev/null
+++ b/docs/build_docs_with_style.jl
@@ -0,0 +1,73 @@
+CI = get(ENV, "CI", nothing) == "true" || get(ENV, "GITHUB_TOKEN", nothing) !== nothing
+
+ import Pkg
+ Pkg.pkg"add Documenter@1"
+
+ # Load documenter
+ using Documenter
+ using DocumenterTools: Themes
+ ENV["JULIA_DEBUG"] = "Documenter"
+
+ # For easier debugging when downloading from a specific branch.
+ github_user = "JuliaDynamics"
+ branch = "master"
+ download_path = "https://raw.githubusercontent.com/$github_user/doctheme/$branch"
+
+ import Downloads
+ for file in ("juliadynamics-lightdefs.scss", "juliadynamics-darkdefs.scss", "juliadynamics-style.scss")
+     Downloads.download("$download_path/$file", joinpath(@__DIR__, file))
+ end
+
+ # create the themes
+ for w in ("light", "dark")
+     header = read(joinpath(@__DIR__, "juliadynamics-style.scss"), String)
+     theme = read(joinpath(@__DIR__, "juliadynamics-$(w)defs.scss"), String)
+     write(joinpath(@__DIR__, "juliadynamics-$(w).scss"), header*"\n"*theme)
+ end
+
+ # compile the themes
+ Themes.compile(joinpath(@__DIR__, "juliadynamics-light.scss"), joinpath(@__DIR__, "src/assets/themes/documenter-light.css"))
+ Themes.compile(joinpath(@__DIR__, "juliadynamics-dark.scss"), joinpath(@__DIR__, "src/assets/themes/documenter-dark.css"))
+
+ # Download and apply CairoMakie plotting style
+ using CairoMakie
+ Downloads.download("$download_path/style.jl", joinpath(@__DIR__, "style.jl"))
+ include("style.jl")
+
+ function build_docs_with_style(pages, modules...; bib = nothing, authors = "George Datseris", draft = false, kwargs...)
+     settings = (
+         modules = [modules...],
+         format = Documenter.HTML(
+             prettyurls = CI,
+             assets = [
+                 asset("https://fonts.googleapis.com/css?family=Montserrat|Source+Code+Pro&display=swap", class=:css),
+             ],
+             collapselevel = 3,
+             size_threshold = 1000 * 5^10, # 200 KiB
+             size_threshold_warn = true,
+         ),
+         sitename = "$(modules[1]).jl",
+         authors,
+         pages,
+         draft,
+         doctest = false,
+         warnonly = true,
+         checkdocs = :exported,
+         kwargs...
+     )
+
+     if isnothing(bib)
+         makedocs(; settings...)
+     else
+         makedocs(; plugins=[bib], settings...)
+     end
+
+     if CI
+         deploydocs(
+             repo = "github.com/JuliaDynamics/$(modules[1]).jl.git",
+             target = "build",
+             push_preview = true
+         )
+     end
+
+ end 
\ No newline at end of file
diff --git a/docs/make.jl b/docs/make.jl
index ca62735b7..c68fd213d 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -10,21 +10,42 @@ using ComplexityMeasures
 using StateSpaceSets
 
 pages = [
-    "Overview" => "index.md",
-    "Association measures" => "measures.md",
-    "Independence testing" => "independence.md",
-    "Causal graphs" => "causal_graphs.md",
-    "APIs and estimators" => "api.md",
-    "Examples" => "examples.md",
-    "Predefined systems" => "coupled_systems.md",
-    "Experimental" => "experimental.md",
+    "CausalityTools.jl" => "index.md",
+    "Association measures" => "associations.md",
+    "Independence" => "independence.md",
+    "Network/graph inference" => "causal_graphs.md",
+    "Examples" => [
+        "Association measures" => "examples/examples_associations.md",
+        "Graph inference" => "examples/examples_infer_graphs.md",
+        "Extended examples" => [
+            "extended_examples/cross_mapping.md",
+            "extended_examples/pairwise_asymmetric_inference.md",
+            "extended_examples/mutual_information.md",
+        ],
+        #"Correlation examples" => "examples/examples_correlation.md",
+        #"Closeness examples" => "examples/examples_closeness.md",
+        #"Crossmap examples" => "examples/examples_cross_mappings.md",
+    ],
+    
+    "Basics and tutorials" => [
+        "Encoding elements" => "encoding_tutorial.md",
+        "Encoding input datasets" => "discretization_tutorial.md",
+        "Counts and probabilities" => "probabilities_tutorial.md",
+        "Information measures" => "info_tutorial.md",
+
+    ],
+    # "Independence testing" => "independence.md",
+    # "Causal graphs" => "causal_graphs.md",
+    # "Predefined systems" => "coupled_systems.md",
+    # "Experimental" => "experimental.md",
     "References" => "references.md",
 ]
 
-Downloads.download(
-    "https://raw.githubusercontent.com/JuliaDynamics/doctheme/master/build_docs_with_style.jl",
-    joinpath(@__DIR__, "build_docs_with_style.jl")
-)
+
+# Downloads.download(
+#     "https://raw.githubusercontent.com/JuliaDynamics/doctheme/master/build_docs_with_style.jl",
+#     joinpath(@__DIR__, "build_docs_with_style.jl")
+# )
 include("build_docs_with_style.jl")
 
 bibliography = CitationBibliography(
diff --git a/docs/refs.bib b/docs/refs.bib
index 0f5bd18a6..6edfe0ba6 100644
--- a/docs/refs.bib
+++ b/docs/refs.bib
@@ -406,8 +406,8 @@ @article{Zhao2016
   pages={5130--5135},
   year={2016},
   publisher={National Acad Sciences},
-  doi={10.1016/j.ins.2009.03.002},
-  url={https://doi.org/10.1016/j.ins.2009.03.002},
+  doi={10.1073/pnas.1522586113},
+  url={https://www.pnas.org/doi/pdf/10.1073/pnas.1522586113},
 }
 
 @article{Jizba2012,
@@ -725,3 +725,551 @@ @article{Frenzel2007
   doi = {10.1103/PhysRevLett.99.204101},
   url = {https://link.aps.org/doi/10.1103/PhysRevLett.99.204101}
 }
+
+@ARTICLE{vanErven2014,
+  author={van Erven, Tim and Harremos, Peter},
+  journal={IEEE Transactions on Information Theory}, 
+  title={Rényi Divergence and Kullback-Leibler Divergence}, 
+  year={2014},
+  volume={60},
+  number={7},
+  pages={3797-3820},
+  doi={10.1109/TIT.2014.2320500}}
+
+  @inproceedings{Poczos2012,
+  title={Nonparametric estimation of conditional information and divergences},
+  author={Póczos, Barnabás and Schneider, Jeff},
+  booktitle={Artificial Intelligence and Statistics},
+  pages={914--923},
+  year={2012},
+  organization={PMLR}
+}
+
+% For ComplexityMeasures.jl references
+
+@article{Zahl1977,
+  title={Jackknifing an index of diversity},
+  author={Zahl, Samuel},
+  journal={Ecology},
+  volume={58},
+  number={4},
+  pages={907--913},
+  year={1977},
+  publisher={Wiley Online Library},
+  doi={https://doi.org/10.2307/1936227}
+}
+
+
+@article{Shannon1948,
+  title={A mathematical theory of communication},
+  author={Shannon, Claude Elwood},
+  journal={The Bell system technical journal},
+  volume={27},
+  number={3},
+  pages={379--423},
+  year={1948},
+  publisher={Nokia Bell Labs},
+  doi={https://doi.org/10.1002/j.1538-7305.1948.tb01338.x},
+}
+
+@inproceedings{Rényi1961,
+  title={On measures of entropy and information},
+  author={Rényi, Alfréd},
+  booktitle={Proceedings of the Fourth Berkeley Symposium on Mathematical Statistics and Probability, Volume 1: Contributions to the Theory of Statistics},
+  volume={4},
+  pages={547--562},
+  year={1961},
+  organization={University of California Press},
+  url={https://projecteuclid.org/ebook/Download?urlid=bsmsp/1200512181&isFullBook=false}
+}
+
+
+@article{Tsallis1988,
+  title={Possible generalization of Boltzmann-Gibbs statistics},
+  author={Tsallis, Constantino},
+  journal={Journal of statistical physics},
+  volume={52},
+  pages={479--487},
+  year={1988},
+  publisher={Springer},
+  doi={https://doi.org/10.1007/BF01016429},
+}
+
+
+
+@article{Arora2022,
+  title={Estimating the Entropy of Linguistic Distributions}, 
+  author={Aryaman Arora and Clara Meister and Ryan Cotterell},
+  year={2022},
+  eprint={2204.01469},
+  archivePrefix={arXiv},
+  primaryClass={cs.CL},
+  url={https://arxiv.org/abs/2204.01469},
+  journal={arXiv}
+}
+
+
+@article{Miller1955,
+  title={Note on the bias of information estimates},
+  author={Miller, George},
+  journal={Information theory in psychology: Problems and methods},
+  year={1955},
+  publisher={Free Press}
+}
+
+
+@article{Schurmann2004,
+  title={Bias analysis in entropy estimation},
+  author={Schuermann, Thomas},
+  journal={Journal of Physics A: Mathematical and General},
+  volume={37},
+  number={27},
+  pages={L295},
+  year={2004},
+  publisher={IOP Publishing},
+  doi={https://doi.org/10.1088/0305-4470/37/27/L02},
+}
+
+
+@article{Grassberger2022,
+  AUTHOR = {Grassberger, Peter},
+  TITLE = {On Generalized Schuermann Entropy Estimators},
+  JOURNAL = {Entropy},
+  VOLUME = {24},
+  YEAR = {2022},
+  NUMBER = {5},
+  ARTICLE-NUMBER = {680},
+  URL = {https://www.mdpi.com/1099-4300/24/5/680},
+  PubMedID = {35626564},
+  ISSN = {1099-4300},
+  ABSTRACT = {We present a new class of estimators of Shannon entropy for severely undersampled discrete distributions. It is based on a generalization of an estimator proposed by T. Schuermann, which itself is a generalization of an estimator proposed by myself. For a special set of parameters, they are completely free of bias and have a finite variance, something which is widely believed to be impossible. We present also detailed numerical tests, where we compare them with other recent estimators and with exact results, and point out a clash with Bayesian estimators for mutual information.},
+  DOI = {10.3390/e24050680}
+}
+
+
+@article{Horvitz1952,
+  author = { D. G.   Horvitz  and  D. J.   Thompson },
+  title = {A Generalization of Sampling Without Replacement from a Finite Universe},
+  journal = {Journal of the American Statistical Association},
+  volume = {47},
+  number = {260},
+  pages = {663-685},
+  year  = {1952},
+  publisher = {Taylor & Francis},
+  doi = {https://doi.org/10.1080/01621459.1952.10483446},
+  URL = {https://www.tandfonline.com/doi/abs/10.1080/01621459.1952.10483446},
+  eprint = {https://www.tandfonline.com/doi/pdf/10.1080/01621459.1952.10483446},
+}
+
+@article{Chao2003,
+	abstract = {A biological community usually has a large number of species with relatively small abundances. When a random sample of individuals is selected and each individual is classified according to species identity, some rare species may not be discovered. This paper is concerned with the estimation of Shannon's index of diversity when the number of species and the species abundances are unknown. The traditional estimator that ignores the missing species underestimates when there is a non-negligible number of unseen species. We provide a different approach based on unequal probability sampling theory because species have different probabilities of being discovered in the sample. No parametric forms are assumed for the species abundances. The proposed estimation procedure combines the Horvitz--Thompson (1952) adjustment for missing species and the concept of sample coverage, which is used to properly estimate the relative abundances of species discovered in the sample. Simulation results show that the proposed estimator works well under various abundance models even when a relatively large fraction of the species is missing. Three real data sets, two from biology and the other one from numismatics, are given for illustration.},
+	author = {Chao, Anne and Shen, Tsung-Jen},
+	date = {2003/12/01},
+	doi = {10.1023/A:1026096204727},
+	id = {Chao2003},
+	isbn = {1573-3009},
+	journal = {Environmental and Ecological Statistics},
+	number = {4},
+	pages = {429--443},
+	title = {Nonparametric estimation of Shannon's index of diversity when there are unseen species in sample},
+	url = {https://doi.org/10.1023/A:1026096204727},
+	volume = {10},
+	year = {2003},
+}
+
+
+@article{BandtPompe2002,
+  title = {Permutation Entropy: A Natural Complexity Measure for Time Series},
+  author = {Bandt, Christoph and Pompe, Bernd},
+  journal = {Phys. Rev. Lett.},
+  volume = {88},
+  issue = {17},
+  pages = {174102},
+  numpages = {4},
+  year = {2002},
+  month = {Apr},
+  publisher = {American Physical Society},
+  doi = {10.1103/PhysRevLett.88.174102},
+  url = {https://link.aps.org/doi/10.1103/PhysRevLett.88.174102},
+}
+
+
+
+@article{He2016,
+  title = {Multivariate permutation entropy and its application for complexity analysis of chaotic systems},
+  journal = {Physica A: Statistical Mechanics and its Applications},
+  volume = {461},
+  pages = {812-823},
+  year = {2016},
+  issn = {0378-4371},
+  doi = {https://doi.org/10.1016/j.physa.2016.06.012},
+  url = {https://www.sciencedirect.com/science/article/pii/S0378437116302801},
+  author = {Shaobo He and Kehui Sun and Huihai Wang},
+  keywords = {Permutation entropy, Multivariate complexity, Simplified Lorenz system, Financial chaotic system},
+  abstract = {To measure the complexity of multivariate systems, the multivariate permutation entropy (MvPE) algorithm is proposed. It is employed to measure complexity of multivariate system in the phase space. As an application, MvPE is applied to analyze the complexity of chaotic systems, including hyperchaotic Hénon map, fractional-order simplified Lorenz system and financial chaotic system. Results show that MvPE algorithm is effective for analyzing the complexity of the multivariate systems. It also shows that fractional-order system does not become more complex with derivative order varying. Compared with PE, MvPE has better robustness for noise and sampling interval, and the results are not affected by different normalization methods.}
+}
+
+
+@article{Zunino2017,
+  title={Permutation entropy based time series analysis: Equalities in the input signal can lead to false conclusions},
+  author={Zunino, Luciano and Olivares, Felipe and Scholkmann, Felix and Rosso, Osvaldo A},
+  journal={Physics Letters A},
+  volume={381},
+  number={22},
+  pages={1883--1892},
+  year={2017},
+  publisher={Elsevier},
+  doi={https://doi.org/10.1016/j.physleta.2017.03.052}
+}
+
+
+@article{Li2018,
+  AUTHOR = {Li, Guohui and Guan, Qianru and Yang, Hong},
+  TITLE = {Noise Reduction Method of Underwater Acoustic Signals Based on CEEMDAN, Effort-To-Compress Complexity, Refined Composite Multiscale Dispersion Entropy and Wavelet Threshold Denoising},
+  JOURNAL = {Entropy},
+  VOLUME = {21},
+  YEAR = {2019},
+  NUMBER = {1},
+  ARTICLE-NUMBER = {11},
+  URL = {https://www.mdpi.com/1099-4300/21/1/11},
+  PubMedID = {33266727},
+  ISSN = {1099-4300},
+  ABSTRACT = {Owing to the problems that imperfect decomposition process of empirical mode decomposition (EMD) denoising algorithm and poor self-adaptability, it will be extremely difficult to reduce the noise of signal. In this paper, a noise reduction method of underwater acoustic signal denoising based on complete ensemble empirical mode decomposition with adaptive noise (CEEMDAN), effort-to-compress complexity (ETC), refined composite multiscale dispersion entropy (RCMDE) and wavelet threshold denoising is proposed. Firstly, the original signal is decomposed into several IMFs by CEEMDAN and noise IMFs can be identified according to the ETC of IMFs. Then, calculating the RCMDE of remaining IMFs, these IMFs are divided into three kinds of IMFs by RCMDE, namely noise-dominant IMFs, real signal-dominant IMFs, real IMFs. Finally, noise IMFs are removed, wavelet soft threshold denoising is applied to noise-dominant IMFs and real signal-dominant IMFs. The denoised signal can be obtained by combining the real IMFs with the denoised IMFs after wavelet soft threshold denoising. Chaotic signals with different signal-to-noise ratio (SNR) are used for denoising experiments by comparing with EMD_MSE_WSTD and EEMD_DE_WSTD, it shows that the proposed algorithm has higher SNR and smaller root mean square error (RMSE). In order to further verify the effectiveness of the proposed method, which is applied to noise reduction of real underwater acoustic signals. The results show that the denoised underwater acoustic signals not only eliminate noise interference also restore the topological structure of the chaotic attractors more clearly, which lays a foundation for the further processing of underwater acoustic signals.},
+  DOI = {10.3390/e21010011},
+}
+
+
+@article{Rostaghi2016,
+  title={Dispersion entropy: A measure for time-series analysis},
+  author={Rostaghi, Mostafa and Azami, Hamed},
+  journal={IEEE Signal Processing Letters},
+  volume={23},
+  number={5},
+  pages={610--614},
+  year={2016},
+  publisher={IEEE},
+  doi={https://doi.org/10.1109/LSP.2016.2542881},
+}
+
+
+@article{KozachenkoLeonenko1987,
+  title={Sample estimate of the entropy of a random vector},
+  author={Kozachenko, Lyudmyla F and Leonenko, Nikolai N},
+  journal={Problemy Peredachi Informatsii},
+  volume={23},
+  number={2},
+  pages={9--16},
+  year={1987},
+  publisher={Russian Academy of Sciences, Branch of Informatics, Computer Equipment and~…},
+  url={https://www.mathnet.ru/php/archive.phtml?wshow=paper&jrnid=ppi&paperid=797&option_lang=eng}
+}
+
+@article{Charzyńska2015,
+  AUTHOR = {Charzyńska, Agata and Gambin, Anna},
+  TITLE = {Improvement of the k-nn Entropy Estimator with Applications in Systems Biology},
+  JOURNAL = {Entropy},
+  VOLUME = {18},
+  YEAR = {2016},
+  NUMBER = {1},
+  ARTICLE-NUMBER = {13},
+  URL = {https://www.mdpi.com/1099-4300/18/1/13},
+  ISSN = {1099-4300},
+  ABSTRACT = {In this paper, we investigate efficient estimation of differential entropy for multivariate random variables. We propose bias correction for the nearest neighbor estimator, which yields more accurate results in higher dimensions. In order to demonstrate the accuracy of the improvement, we calculated the corrected estimator for several families of random variables. For multivariate distributions, we considered the case of independent marginals and the dependence structure between the marginal distributions described by Gaussian copula. The presented solution may be particularly useful for high dimensional data, like those analyzed in the systems biology field. To illustrate such an application, we exploit differential entropy to define the robustness of biochemical kinetic models.},
+  DOI = {10.3390/e18010013}
+}
+
+
+@inproceedings{Gao2015,
+  title = {Efficient Estimation of Mutual Information for Strongly Dependent Variables},
+  author = {Gao, Shuyang and Ver Steeg, Greg and Galstyan, Aram},
+  booktitle = {Proceedings of the Eighteenth International Conference on Artificial Intelligence and Statistics},
+  pages = {277--286},
+  year = {2015},
+  editor = {Lebanon, Guy and Vishwanathan, S. V. N.},
+  volume = {38},
+  series = {Proceedings of Machine Learning Research},
+  address = {San Diego, California, USA},
+  month = {09--12 May},
+  publisher =  {PMLR},
+  pdf = {http://proceedings.mlr.press/v38/gao15.pdf},
+  url = {https://proceedings.mlr.press/v38/gao15.html},
+  abstract = {We demonstrate that a popular class of non-parametric mutual information (MI) estimators based on k-nearest-neighbor graphs requires number of samples that scales exponentially with the true MI. Consequently, accurate estimation of MI between two strongly dependent variables is possible only for prohibitively large sample size. This important yet overlooked shortcoming of the existing estimators is due to their implicit reliance on  local uniformity of the underlying joint distribution. We introduce a new  estimator that is robust to local non-uniformity, works well with limited data, and is able to capture relationship strengths over many orders of magnitude. We demonstrate the superior performance of the proposed estimator on both synthetic and real-world data.}
+}
+
+
+@article{Goria2005,
+  author = { M. N. Goria  and  N. N. Leonenko  and  V. V. Mergel and P. L. Novi Inverardi},
+  title = {A new class of random vector entropy estimators and its applications in testing statistical hypotheses},
+  journal = {Journal of Nonparametric Statistics},
+  volume = {17},
+  number = {3},
+  pages = {277-297},
+  year  = {2005},
+  publisher = {Taylor & Francis},
+  doi = {https://doi.org/10.1080/104852504200026815},
+  URL = {https://doi.org/10.1080/104852504200026815},
+  eprint = {https://doi.org/10.1080/104852504200026815},
+}
+
+@article{Lord2018,
+  title={Geometric k-nearest neighbor estimation of entropy and mutual information},
+  author={Lord, Warren M and Sun, Jie and Bollt, Erik M},
+  journal={Chaos: An Interdisciplinary Journal of Nonlinear Science},
+  volume={28},
+  number={3},
+  year={2018},
+  publisher={AIP Publishing},
+  doi = {https://doi.org/10.1063/1.5011683},
+  url = {https://pubs.aip.org/aip/cha/article/28/3/033114/685022},
+}
+
+
+@article{LeonenkoProzantoSavani2008,
+  author = {Nikolai Leonenko and Luc Pronzato and Vippal Savani},
+  title = {A class of Rényi information estimators for multidimensional densities},
+  volume = {36},
+  journal = {The Annals of Statistics},
+  number = {5},
+  publisher = {Institute of Mathematical Statistics},
+  pages = {2153 -- 2182},
+  abstract = {A class of estimators of the Rényi and Tsallis entropies of an unknown distribution f in ℝm is presented. These estimators are based on the kth nearest-neighbor distances computed from a sample of N i.i.d. vectors with distribution f. We show that entropies of any order q, including Shannon’s entropy, can be estimated consistently with minimal assumptions on f. Moreover, we show that it is straightforward to extend the nearest-neighbor method to estimate the statistical distance between two distributions using one i.i.d. sample from each.},
+  keywords = {Entropy estimation, estimation of divergence, estimation of statistical distance, Havrda–Charvát entropy, nearest-neighbor distances, Rényi entropy, Tsallis entropy},
+  year = {2008},
+  doi = {https://doi.org/10.1214/07-AOS539},
+  URL = {https://doi.org/10.1214/07-AOS539}
+}
+
+
+@article{Vasicek1976,
+  title={A test for normality based on sample entropy},
+  author={Vasicek, Oldrich},
+  journal={Journal of the Royal Statistical Society Series B: Statistical Methodology},
+  volume={38},
+  number={1},
+  pages={54--59},
+  year={1976},
+  publisher={Oxford University Press},
+  doi={https://doi.org/10.1111/j.2517-6161.1976.tb01566.x}
+}
+
+
+@article{Alizadeh2010,
+  title={A new estimator of entropy},
+  author={Alizadeh, Noughabi Hadi and Arghami, Naser Reza},
+  year={2010},
+  journal={Journal of the Iranian Statistical Society (JIRSS)},
+  publisher={Journal of the Iranian Statistical Society (JIRSS)},
+  url={http://jirss.irstat.ir/article-1-81-en.pdf},
+}
+
+
+@article{Ebrahimi1994,
+  title = {Two measures of sample entropy},
+  journal = {Statistics \& Probability Letters},
+  volume = {20},
+  number = {3},
+  pages = {225-234},
+  year = {1994},
+  issn = {0167-7152},
+  doi = {10.1016/0167-7152(94)90046-9},
+  url = {https://www.sciencedirect.com/science/article/pii/0167715294900469},
+  author = {Nader Ebrahimi and Kurt Pflughoeft and Ehsan S. Soofi},
+  keywords = {Information theory, Entropy estimator, Exponential, Normal, Uniform},
+  abstract = {In many statistical studies the entropy of a distribution function is of prime interest. This paper proposes two estimators of the entropy. Both estimators are obtained by modifying the estimator proposed by Vasicek (1976). Consistency of both estimators is proved, and comparisons have been made with Vasicek's estimator and its generalization proposed by Dudewicz and Van der Meulen (1987). The results indicate that the proposed estimators have less bias and have less mean squared error than Vasicek's estimator and its generalization}
+}
+
+
+@article{Correa1995,
+  author = {Correa, Juan C.},
+  title = {A new estimator of entropy},
+  journal = {Communications in Statistics - Theory and Methods},
+  volume = {24},
+  number = {10},
+  pages = {2439-2449},
+  year  = {1995},
+  publisher = {Taylor & Francis},
+  doi = {10.1080/03610929508831626},
+  URL = {https://doi.org/10.1080/03610929508831626},
+  eprint = {https://doi.org/10.1080/03610929508831626}
+}
+
+@article{Berger2019,
+  publisher={MDPI},
+  author = {Berger, Sebastian and Kravtsiv, Andrii and Schneider, Gerhard and Jordan, Denis},
+  title = {Teaching Ordinal Patterns to a Computer: Efficient Encoding Algorithms Based on the Lehmer Code},
+  journal = {Entropy},
+  volume = {21},
+  year = {2019},
+  number = {10},
+  article-number = {1023},
+  url = {https://www.mdpi.com/1099-4300/21/10/1023},
+  issn = {1099-4300},
+  abstract = {Ordinal patterns are the common basis of various techniques used in the study of dynamical systems and nonlinear time series analysis. The present article focusses on the computational problem of turning time series into sequences of ordinal patterns. In a first step, a numerical encoding scheme for ordinal patterns is proposed. Utilising the classical Lehmer code, it enumerates ordinal patterns by consecutive non-negative integers, starting from zero. This compact representation considerably simplifies working with ordinal patterns in the digital domain. Subsequently, three algorithms for the efficient extraction of ordinal patterns from time series are discussed, including previously published approaches that can be adapted to the Lehmer code. The respective strengths and weaknesses of those algorithms are discussed, and further substantiated by benchmark results. One of the algorithms stands out in terms of scalability: its run-time increases linearly with both the pattern order and the sequence length, while its memory footprint is practically negligible. These properties enable the study of high-dimensional pattern spaces at low computational cost. In summary, the tools described herein may improve the efficiency of virtually any ordinal pattern-based analysis method, among them quantitative measures like permutation entropy and symbolic transfer entropy, but also techniques like forbidden pattern identification. Moreover, the concepts presented may allow for putting ideas into practice that up to now had been hindered by computational burden. To enable smooth evaluation, a function library written in the C programming language, as well as language bindings and native implementations for various numerical computation environments are provided in the supplements.},
+  doi = {10.3390/e21101023}
+}
+
+
+@article{Azami2016,
+title = {Amplitude-aware permutation entropy: Illustration in spike detection and signal segmentation},
+journal = {Computer Methods and Programs in Biomedicine},
+volume = {128},
+pages = {40-51},
+year = {2016},
+issn = {0169-2607},
+doi = {10.1016/j.cmpb.2016.02.008},
+url = {https://www.sciencedirect.com/science/article/pii/S0169260715301152},
+author = {Hamed Azami and Javier Escudero},
+keywords = {Signal irregularity, Amplitude-aware permutation entropy, Spike detection, Signal segmentation, Electroencephalogram, Extracellular neuronal data},
+abstract = {Background and objective
+Signal segmentation and spike detection are two important biomedical signal processing applications. Often, non-stationary signals must be segmented into piece-wise stationary epochs or spikes need to be found among a background of noise before being further analyzed. Permutation entropy (PE) has been proposed to evaluate the irregularity of a time series. PE is conceptually simple, structurally robust to artifacts, and computationally fast. It has been extensively used in many applications, but it has two key shortcomings. First, when a signal is symbolized using the Bandt–Pompe procedure, only the order of the amplitude values is considered and information regarding the amplitudes is discarded. Second, in the PE, the effect of equal amplitude values in each embedded vector is not addressed. To address these issues, we propose a new entropy measure based on PE: the amplitude-aware permutation entropy (AAPE).
+Methods
+AAPE is sensitive to the changes in the amplitude, in addition to the frequency, of the signals thanks to it being more flexible than the classical PE in the quantification of the signal motifs. To demonstrate how the AAPE method can enhance the quality of the signal segmentation and spike detection, a set of synthetic and realistic synthetic neuronal signals, electroencephalograms and neuronal data are processed. We compare the performance of AAPE in these problems against state-of-the-art approaches and evaluate the significance of the differences with a repeated ANOVA with post hoc Tukey's test.
+Results
+In signal segmentation, the accuracy of AAPE-based method is higher than conventional segmentation methods. AAPE also leads to more robust results in the presence of noise. The spike detection results show that AAPE can detect spikes well, even when presented with single-sample spikes, unlike PE. For multi-sample spikes, the changes in AAPE are larger than in PE.
+Conclusion
+We introduce a new entropy metric, AAPE, that enables us to consider amplitude information in the formulation of PE. The AAPE algorithm can be used in almost every irregularity-based application in various signal and image processing fields. We also made freely available the Matlab code of the AAPE.}
+}
+
+@article{Paninski2003,
+  title={Estimation of entropy and mutual information},
+  author={Paninski, Liam},
+  journal={Neural computation},
+  volume={15},
+  number={6},
+  pages={1191--1253},
+  year={2003},
+  publisher={MIT Press},
+  doi={10.1162/089976603321780272},
+  url={https://ieeexplore.ieee.org/abstract/document/6790247},
+}
+
+
+@article{PrichardTheiler1995,
+  title={Generalized redundancies for time series analysis},
+  author={Prichard, Dean and Theiler, James},
+  journal={Physica D: Nonlinear Phenomena},
+  volume={84},
+  number={3-4},
+  pages={476--493},
+  year={1995},
+  publisher={Elsevier},
+  doi={10.1016/0167-2789(95)00041-2},
+}
+
+
+@article{Rosso2001,
+  title = {Wavelet entropy: a new tool for analysis of short duration brain electrical signals},
+  journal = {Journal of Neuroscience Methods},
+  volume = {105},
+  number = {1},
+  pages = {65-75},
+  year = {2001},
+  issn = {0165-0270},
+  doi = {10.1016/S0165-0270(00)00356-3},
+  url = {https://www.sciencedirect.com/science/article/pii/S0165027000003563},
+  author = {Osvaldo A. Rosso and Susana Blanco and Juliana Yordanova and Vasil Kolev and Alejandra Figliola and Martin Schürmann and Erol Başar},
+  keywords = {EEG, event-related potentials (ERP), Visual evoked potential, Time–frequency signal analysis, Wavelet analysis, Signal entropy},
+  abstract = {Since traditional electrical brain signal analysis is mostly qualitative, the development of new quantitative methods is crucial for restricting the subjectivity in the study of brain signals. These methods are particularly fruitful when they are strongly correlated with intuitive physical concepts that allow a better understanding of brain dynamics. Here, new method based on orthogonal discrete wavelet transform (ODWT) is applied. It takes as a basic element the ODWT of the EEG signal, and defines the relative wavelet energy, the wavelet entropy (WE) and the relative wavelet entropy (RWE). The relative wavelet energy provides information about the relative energy associated with different frequency bands present in the EEG and their corresponding degree of importance. The WE carries information about the degree of order/disorder associated with a multi-frequency signal response, and the RWE measures the degree of similarity between different segments of the signal. In addition, the time evolution of the WE is calculated to give information about the dynamics in the EEG records. Within this framework, the major objective of the present work was to characterize in a quantitative way functional dynamics of order/disorder microstates in short duration EEG signals. For that aim, spontaneous EEG signals under different physiological conditions were analyzed. Further, specific quantifiers were derived to characterize how stimulus affects electrical events in terms of frequency synchronization (tuning) in the event related potentials.},
+}
+
+@article{Llanos2017,
+  title={Power spectral entropy as an information-theoretic correlate of manner of articulation in American English},
+  author={Llanos, Fernando and Alexander, Joshua M and Stilp, Christian E and Kluender, Keith R},
+  journal={The Journal of the Acoustical Society of America},
+  volume={141},
+  number={2},
+  pages={EL127--EL133},
+  year={2017},
+  publisher={AIP Publishing},
+  doi={10.1121/1.4976109},
+  url={https://pubmed.ncbi.nlm.nih.gov/28253693/}
+}
+
+@article{Tian2017,
+  title={Spectral entropy can predict changes of working memory performance reduced by short-time training in the delayed-match-to-sample task},
+  author={Tian, Yin and Zhang, Huiling and Xu, Wei and Zhang, Haiyong and Yang, Li and Zheng, Shuxing and Shi, Yupan},
+  journal={Frontiers in human neuroscience},
+  volume={11},
+  pages={437},
+  year={2017},
+  publisher={Frontiers Media SA},
+  doi={10.3389/fnhum.2017.00437},
+}
+
+@article{Datseris2024,
+  title={ComplexityMeasures. jl: scalable software to unify and accelerate entropy and complexity timeseries analysis},
+  author={Datseris, George and Haaga, Kristian Agas{\o}ster},
+  journal={arXiv preprint arXiv:2406.05011},
+  year={2024}
+}
+@article{Papapetrou2020,
+  title={Tsallis conditional mutual information in investigating long range correlation in symbol sequences},
+  author={Papapetrou, M and Kugiumtzis, D},
+  journal={Physica A: Statistical Mechanics and its Applications},
+  volume={540},
+  pages={123016},
+  year={2020},
+  publisher={Elsevier}
+}
+
+@article{Szekely2007,
+  author = {G{\'a}bor J. Sz{\'e}kely and Maria L. Rizzo and Nail K. Bakirov},
+  title = {{Measuring and testing dependence by correlation of distances}},
+  volume = {35},
+  journal = {The Annals of Statistics},
+  number = {6},
+  publisher = {Institute of Mathematical Statistics},
+  pages = {2769 -- 2794},
+  keywords = {Distance correlation, distance covariance, multivariate independence},
+  year = {2007},
+  doi = {10.1214/009053607000000505},
+  URL = {https://doi.org/10.1214/009053607000000505}
+}
+
+
+@article{Szekely2014,
+author = {G{\'a}bor J. Sz{\'e}kely and Maria L. Rizzo},
+title = {{Partial distance correlation with methods for dissimilarities}},
+volume = {42},
+journal = {The Annals of Statistics},
+number = {6},
+publisher = {Institute of Mathematical Statistics},
+pages = {2382 -- 2412},
+keywords = {dissimilarity, energy statistics, independence, multivariate, partial distance correlation},
+year = {2014},
+doi = {10.1214/14-AOS1255},
+URL = {https://doi.org/10.1214/14-AOS1255}
+}
+
+@article{Manis2017,
+  title={Bubble entropy: An entropy almost free of parameters},
+  author={Manis, George and Aktaruzzaman, MD and Sassi, Roberto},
+  journal={IEEE Transactions on Biomedical Engineering},
+  volume={64},
+  number={11},
+  pages={2711--2718},
+  year={2017},
+  publisher={IEEE}
+}
+
+@article{Wang2020,
+  title={Multiscale diversity entropy: A novel dynamical measure for fault diagnosis of rotating machinery},
+  author={Wang, Xianzhi and Si, Shubin and Li, Yongbo},
+  journal={IEEE Transactions on Industrial Informatics},
+  volume={17},
+  number={8},
+  pages={5419--5429},
+  year={2020},
+  publisher={IEEE},
+  doi={10.1109/TII.2020.3022369},
+}
+
+@book{Tsallis2009,
+  title={Introduction to nonextensive statistical mechanics: approaching a complex world},
+  author={Tsallis, Constantino},
+  volume={1},
+  number={1},
+  year={2009},
+  publisher={Springer},
+  url={https://link.springer.com/book/10.1007/978-0-387-85359-8}
+}
\ No newline at end of file
diff --git a/docs/src/api.md b/docs/src/api.md
deleted file mode 100644
index 846ee8086..000000000
--- a/docs/src/api.md
+++ /dev/null
@@ -1,37 +0,0 @@
-# API
-
-## Information API
-
-```@contents
-Pages = [
-    "api/api_information_overview.md",
-    "api/api_probabilities.md",
-    "api/api_contingency_table.md",
-    "api/api_entropies.md",
-    "api/api_conditional_entropy.md",
-    "api/api_mutualinfo.md",
-    "api/api_condmutualinfo.md",
-    "api/api_transferentropy.md",
-    "api/api_pmi.md",
-    "api/api_predictive_asymmetry.md",
-]
-Depth = 3
-```
-
-## Cross-map API
-
-```@contents
-Pages = [
-    "api/api_crossmap.md",
-]
-Depth = 3
-```
-
-## Recurrence API
-
-```@contents
-Pages = [
-    "api/api_recurrence.md",
-]
-Depth = 3
-```
diff --git a/docs/src/api/api_conditional_entropy.md b/docs/src/api/api_conditional_entropy.md
deleted file mode 100644
index 689eca3b3..000000000
--- a/docs/src/api/api_conditional_entropy.md
+++ /dev/null
@@ -1,20 +0,0 @@
-
-# Conditional entropy API
-
-The conditional entropy API is defined by
-
-- [`ConditionalEntropy`](@ref), and its subtypes
-- [`entropy_conditional`](@ref),
-
-```@docs
-entropy_conditional
-```
-
-## Definitions
-
-```@docs
-ConditionalEntropy
-CEShannon
-CETsallisFuruichi
-CETsallisAbe
-```
diff --git a/docs/src/api/api_condmutualinfo.md b/docs/src/api/api_condmutualinfo.md
deleted file mode 100644
index 897a25974..000000000
--- a/docs/src/api/api_condmutualinfo.md
+++ /dev/null
@@ -1,54 +0,0 @@
-
-# Conditional mutual information API
-
-The condition mutual information (CMI) API is defined by
-
-- [`ConditionalMutualInformation`](@ref), and its subtypes.
-- [`condmutualinfo`](@ref),
-- [`ConditionalMutualInformationEstimator`](@ref), and its subtypes.
-
-```@docs
-condmutualinfo
-```
-
-## Definitions
-
-```@docs
-ConditionalMutualInformation
-```
-
-## [`ConditionalMutualInformationEstimator`](@ref)s
-
-```@docs
-ConditionalMutualInformationEstimator
-```
-
-### [`GaussianCMI`](@ref) (parametric)
-
-```@docs
-GaussianCMI
-```
-
-### [`FPVP`](@ref)
-
-```@docs
-FPVP
-```
-
-### [`MesnerShalizi`](@ref)
-
-```@docs
-MesnerShalizi
-```
-
-### [`PoczosSchneiderCMI`](@ref)
-
-```@docs
-PoczosSchneiderCMI
-```
-
-### [`Rahimzamani`](@ref)
-
-```@docs
-Rahimzamani
-```
diff --git a/docs/src/api/api_contingency_table.md b/docs/src/api/api_contingency_table.md
deleted file mode 100644
index 63cae139f..000000000
--- a/docs/src/api/api_contingency_table.md
+++ /dev/null
@@ -1,38 +0,0 @@
-
-# [Contingency table API](@id contingency_table_api)
-
-To estimate discrete information theoretic quantities that are functions of more than
-one variable, we must estimate empirical joint probability mass functions (pmf).
-The function [`contingency_matrix`](@ref) accepts an arbitrary number of equal-length
-input data and returns the corresponding multidimensional contingency table as a
-[`ContingencyMatrix`](@ref). From this table, we can extract the necessary joint and
-marginal pmfs for computing any discrete function of multivariate discrete probability
-distributions. This is essentially the multivariate analogue of
-[`Probabilities`](@ref).
-
-But why would I use a [`ContingencyMatrix`](@ref) instead of some other indirect estimation
-method, you may ask. The answer is that [`ContingencyMatrix`](@ref) allows you to
-compute *any* of the information theoretic quantities offered in this package for *any*
-type of input data. You input data can literally be any hashable type, for example `String`,
-`Tuple{Int, String, Int}`, or `YourCustomHashableDataType`.
-
-In the case of numeric data, using a [`ContingencyMatrix`](@ref) is typically a
-bit slower than other dedicated estimation procedures.
-For example, quantities like discrete Shannon-type [`condmutualinfo`](@ref) are faster to
-estimate using a formulation based on sums of four entropies (the H4-principle). This
-is faster because we can both utilize the blazingly fast [`StateSpaceSet`](@ref) structure directly,
-and we can avoid *explicitly* estimating the entire joint pmf, which demands many
-extra calculation steps. Whatever you use in practice depends on your use case and
-available estimation methods, but you can always fall back to contingency matrices
-for any discrete measure.
-
-```@docs
-ContingencyMatrix
-contingency_matrix
-```
-
-## Utilities
-
-```@docs
-marginal_encodings
-```
diff --git a/docs/src/api/api_crossmap.md b/docs/src/api/api_crossmap.md
deleted file mode 100644
index 0f985e811..000000000
--- a/docs/src/api/api_crossmap.md
+++ /dev/null
@@ -1,39 +0,0 @@
-
-# [Cross mapping API](@id cross_mapping_api)
-
-Several cross mapping methods have emerged in the literature
-Following Sugihara et al. (2012)'s paper on the convergent cross mapping.
-In CausalityTools.jl, we provide a unified interface for using these cross mapping methods.
-We indicate the different types of cross mappings by
-passing an [`CrossmapMeasure`](@ref) instance as the first argument to [`crossmap`](@ref)
-or [`predict`](@ref).
-
-The cross mapping API consists of the following functions.
-
-- [`predict`](@ref)
-- [`crossmap`](@ref)
-
-These functions can dispatch on a [`CrossmapMeasure`](@ref), and we currently implement
-
-- [`ConvergentCrossMapping`](@ref).
-- [`PairwiseAsymmetricEmbedding`](@ref).
-
-```@docs
-crossmap
-predict
-```
-
-## Measures
-
-```@docs
-CrossmapMeasure
-```
-
-## Estimators
-
-```@docs
-CrossmapEstimator
-RandomVectors
-RandomSegment
-ExpandingSegment
-```
diff --git a/docs/src/api/api_entropies.md b/docs/src/api/api_entropies.md
deleted file mode 100644
index 215b11f84..000000000
--- a/docs/src/api/api_entropies.md
+++ /dev/null
@@ -1,109 +0,0 @@
-
-# [Entropies API](@id entropies)
-
-The entropies API is defined by
-
-- [`EntropyDefinition`](@ref)
-- [`entropy`](@ref)
-- [`DifferentialEntropyEstimator`](@ref)
-
-The entropies API is re-exported from [ComplexityMeasures.jl](https://github.com/JuliaDynamics/ComplexityMeasures.jl). Why? Continuous/differential versions of many information theoretic
-association measures can be written as a function of differential entropy terms, and can
-thus be estimated using [`DifferentialEntropyEstimator`](@ref)s.
-
-```@docs
-ComplexityMeasures.entropy
-```
-
-## Definitions
-
-```@docs
-EntropyDefinition
-Shannon
-Renyi
-Tsallis
-Kaniadakis
-Curado
-StretchedExponential
-```
-
-## [`DifferentialEntropyEstimator`](@ref)s
-
-CausalityTools.jl reexports [`DifferentialEntropyEstimator`](@ref)s from
-[ComplexityMeasures.jl](https://github.com/JuliaDynamics/ComplexityMeasures.jl).
-Why? Any information-based measure that can be written as a function of differential entropies
-can be estimated using a [`DifferentialEntropyEstimator`](@ref)s. 
-
-```@docs
-DifferentialEntropyEstimator
-```
-
-### Overview
-
-Only estimators compatible with multivariate data are applicable to the multi-argument measures
-provided by CausalityTools. Hence, some entropy estimators are missing from the overview
-here (see [ComplexityMeasures.jl](https://github.com/JuliaDynamics/ComplexityMeasures.jl) for
-details).
-
-Each [`DifferentialEntropyEstimator`](@ref)s uses a specialized technique to approximate relevant
-densities/integrals, and is often tailored to one or a few types of generalized entropy.
-For example, [`Kraskov`](@ref) estimates the [`Shannon`](@ref) entropy.
-
-| Estimator                    | Principle         | [`Shannon`](@ref) |
-| :--------------------------- | :---------------- | :---------------: |
-| [`KozachenkoLeonenko`](@ref) | Nearest neighbors |        ✓         |
-| [`Kraskov`](@ref)            | Nearest neighbors |        ✓         |
-| [`Zhu`](@ref)                | Nearest neighbors |        ✓         |
-| [`ZhuSingh`](@ref)           | Nearest neighbors |        ✓         |
-| [`Gao`](@ref)                | Nearest neighbors |        ✓         |
-| [`Goria`](@ref)              | Nearest neighbors |        ✓         |
-| [`Lord`](@ref)               | Nearest neighbors |        ✓         |
-
-### [`Kraskov`](@ref)
-
-```@docs
-Kraskov
-```
-
-### [`KozachenkoLeonenko`](@ref)
-
-```@docs
-KozachenkoLeonenko
-```
-
-### [`Zhu`](@ref)
-
-```@docs
-Zhu
-```
-
-### [`ZhuSingh`](@ref)
-
-```@docs
-ZhuSingh
-```
-
-### [`Gao`](@ref)
-
-```@docs
-Gao
-```
-
-### [`Goria`](@ref)
-
-```@docs
-Goria
-```
-
-### [`Lord`](@ref)
-
-```@docs
-Lord
-```
-
-## Utilities
-
-```@docs
-entropy_maximum
-entropy_normalized
-```
diff --git a/docs/src/api/api_information_overview.md b/docs/src/api/api_information_overview.md
deleted file mode 100644
index 099983102..000000000
--- a/docs/src/api/api_information_overview.md
+++ /dev/null
@@ -1,60 +0,0 @@
-# [Information API](@id information_api)
-
-This page outlines the information API. It contains a lot of information, so for
-convenience, we list all concrete implementation of pairwise and conditional
-association measures [here](@ref information_measures).
-
-## [Design](@id information_measures_design)
-
-We have taken great care to make sure that information estimators are reusable and modular.
-Functions have the following general form.
-
-```julia
-f([measure], estimator, input_data...)
-
-# Some examples
-mutualinfo(MIShannon(base = ℯ), Kraskov(k = 1), x, y)
-mutualinfo(MITsallisFuruichi(base = ℯ), KozachenkoLeonenko(k = 3), x, y)
-condmutualinfo(CMIShannon(base = 2), ValueHistogram(3), x, y, z)
-condmutualinfo(CMIRenyiJizba(base = 2), KSG2(k = 5), x, y, z)
-condmutualinfo(CMIRenyiPoczos(base = 2), PoczosSchneiderCMI(k = 10), x, y, z)
-```
-
-This modular design really shines when it comes to independence testing and causal graph
-inference. You can essentially test the performance of *any* independence `measure` with
-*any* `estimator`, as long as their combination is implemented (and if it's not,
-please submit a PR or issue!). We hope that this will both ease reproduction of
-existing literature results, and spawn new research. Please let us know if you use the
-package for something useful, or publish something based on it!
-
-Information measures are either estimated using one of the following basic estimator types,
-
-- [`ProbabilitiesEstimator`](@ref)s,
-- [`DifferentialEntropyEstimator`](@ref)s,
-
-or using measure-specific estimators:
-
-- [`MutualInformationEstimator`](@ref)s are used with [`mutualinfo`](@ref)
-- [`ConditionalMutualInformationEstimator`](@ref)s are used with [`condmutualinfo`](@ref)
-- [`TransferEntropyEstimator`](@ref)s are used with [`transferentropy`](@ref)
-
-## Naming convention: The same name for different things
-
-Upon doing a literature review on the possible variants of information theoretic measures,
-it become painstakingly obvious that authors use *the same name for different concepts*.
-For novices, and experienced practitioners too, this can be confusing.
-Our API clearly distinguishes between methods that are conceptually the same but named
-differently in the literature due to differing *estimation* strategies, from methods
-that actually have different definitions.
-
-- Multiple, equivalent definitions occur for example for the Shannon mutual
-    information (MI; [`MIShannon`](@ref)), which has both a discrete and continuous version, and there there are multiple equivalent mathematical formulas for them: a direct sum/integral
-    over a joint probability mass function (pmf), as a sum of three entropy terms, and as
-    a Kullback-Leibler divergence between the joint pmf and the product of the marginal
-    distributions. Since these definitions are all equivalent, we only need once type
-    ([`MIShannon`](@ref)) to represent them.
-- But Shannon MI is not the  only type of mutual information! For example, "Tsallis mutual information"
-    has been proposed in different variants by various authors. Despite sharing the
-    same name, these are actually *nonequivalent definitions*. We've thus assigned
-    them entirely different measure names (e.g. [`MITsallisFuruichi`](@ref) and
-    [`MITsallisMartin`](@ref)), with the author name at the end.
diff --git a/docs/src/api/api_mutualinfo.md b/docs/src/api/api_mutualinfo.md
deleted file mode 100644
index 5d8d83e1e..000000000
--- a/docs/src/api/api_mutualinfo.md
+++ /dev/null
@@ -1,54 +0,0 @@
-
-# [Mutual information API](@id api_mutualinfo)
-
-The mutual information (MI) API is defined by
-
-- [`MutualInformation`](@ref), and its subtypes.
-- [`mutualinfo`](@ref),
-- [`MutualInformationEstimator`](@ref), and its subtypes.
-
-```@docs
-mutualinfo
-```
-
-## Definitions
-
-```@docs
-MutualInformation
-```
-
-## [`MutualInformationEstimator`](@ref)s
-
-```@docs
-MutualInformationEstimator
-```
-
-## [`GaussianMI`](@ref) (parametric)
-
-```@docs
-GaussianMI
-```
-
-### [`KraskovStögbauerGrassberger1`](@ref)
-
-```@docs
-KraskovStögbauerGrassberger1
-```
-
-### [`KraskovStögbauerGrassberger2`](@ref)
-
-```@docs
-KraskovStögbauerGrassberger2
-```
-
-### [`GaoKannanOhViswanath`](@ref)
-
-```@docs
-GaoKannanOhViswanath
-```
-
-### [`GaoOhViswanath`](@ref)
-
-```@docs
-GaoOhViswanath
-```
diff --git a/docs/src/api/api_pmi.md b/docs/src/api/api_pmi.md
deleted file mode 100644
index 0f9e32ae6..000000000
--- a/docs/src/api/api_pmi.md
+++ /dev/null
@@ -1,11 +0,0 @@
-
-# [Part mutual information API](@id api_partmutualinfo)
-
-The part mutual information (PMI) API is defined by
-
-- [`PMI`](@ref), and its subtypes.
-- [`pmi`](@ref),
-
-```@docs
-pmi
-```
diff --git a/docs/src/api/api_probabilities.md b/docs/src/api/api_probabilities.md
deleted file mode 100644
index bd9839b02..000000000
--- a/docs/src/api/api_probabilities.md
+++ /dev/null
@@ -1,147 +0,0 @@
-
-# Probabilities API
-
-The probabilities API is defined by
-
-- [`ProbabilitiesEstimator`](@ref), and its subtypes.
-- [`probabilities`](@ref)
-- [`probabilities_and_outcomes`](@ref)
-
-See also [contingency tables](@ref contingency_table_api) for a multivariate version.
-
-The probabilities API is re-exported from [ComplexityMeasures.jl](https://github.com/JuliaDynamics/ComplexityMeasures.jl). Why? Most discrete information theoretic association measures are estimated
-using some sort of [`ProbabilitiesEstimator`](@ref)s, because their formulas are simply functions
-of probability mass functions.
-
-## Probabilities
-
-```@docs
-ProbabilitiesEstimator
-probabilities
-probabilities!
-Probabilities
-```
-
-## Estimators
-
-### Overview
-
-Here, we list probabilities estimators that are compatible with CausalityTools.jl. Note that not
-all probabilities estimators from ComplexityMeasures.jl are included. This is because for
-the information-based association measures here, the probabilities estimator must be
-compatible with multivariate data, or have an implementation for [`marginal_encodings`](@ref),
-which discretizes each dimension of the multivariate input data separately.
-
-| Estimator                     | Principle                                      |
-| :---------------------------- | :--------------------------------------------- |
-| [`Contingency`](@ref)         | Count co-occurrences, optionally discretize first |
-| [`CountOccurrences`](@ref)    | Count of unique elements                       |
-| [`ValueHistogram`](@ref)      | Binning (histogram)                            |
-| [`TransferOperator`](@ref)    | Binning (transfer operator)                    |
-| [`NaiveKernel`](@ref)         | Kernel density estimation                      |
-| [`SymbolicPermutation`](@ref) | Ordinal patterns                               |
-| [`Dispersion`](@ref)          | Dispersion patterns                            |
-
-### Contingency
-
-```@docs
-Contingency
-```
-
-### Count occurrences
-
-```@docs
-CountOccurrences
-```
-
-### Histograms (binning)
-
-```@docs
-ValueHistogram
-RectangularBinning
-FixedRectangularBinning
-```
-
-### Transfer operator (binning)
-
-```@docs
-TransferOperator
-```
-
-#### Utility methods/types
-
-For explicit estimation of the transfer operator, see
-[ComplexityMeasures.jl](https://github.com/JuliaDynamics/ComplexityMeasures.jl).
-
-```@docs
-InvariantMeasure
-invariantmeasure
-transfermatrix
-```
-
-### Symbolic permutations
-
-```@docs
-SymbolicPermutation
-```
-
-### Dispersion patterns
-
-```@docs
-Dispersion
-```
-
-### Kernel density
-
-```@docs
-NaiveKernel
-```
-
-### Timescales
-
-```@docs
-WaveletOverlap
-PowerSpectrum
-```
-
-### Diversity
-
-```@docs
-Diversity
-```
-
-## Utilities
-
-### Outcomes
-
-```@docs
-probabilities_and_outcomes
-outcomes
-outcome_space
-total_outcomes
-missing_outcomes
-```
-
-### Encodings
-
-Some probability estimators first "encode" input data into an intermediate representation indexed by the positive integers. This intermediate representation is called an "encoding".
-
-The encodings API is defined by:
-
-- [`Encoding`](@ref)
-- [`encode`](@ref)
-- [`decode`](@ref)
-
-```@docs
-Encoding
-encode
-decode
-```
-
-#### Available encodings
-
-```@docs
-OrdinalPatternEncoding
-GaussianCDFEncoding
-RectangularBinEncoding
-```
diff --git a/docs/src/api/api_recurrence.md b/docs/src/api/api_recurrence.md
deleted file mode 100644
index dd56a7719..000000000
--- a/docs/src/api/api_recurrence.md
+++ /dev/null
@@ -1,6 +0,0 @@
-# Recurrence API
-
-```@docs
-mcr
-rmcd
-```
diff --git a/docs/src/api/api_transferentropy.md b/docs/src/api/api_transferentropy.md
deleted file mode 100644
index 409d7eb2c..000000000
--- a/docs/src/api/api_transferentropy.md
+++ /dev/null
@@ -1,59 +0,0 @@
-
-# Transfer entropy API
-
-The transfer entropy API is made up of the following functions and types.
-
-- [`TransferEntropy`](@ref), and its subtypes.
-- [`transferentropy`](@ref).
-- [`TransferEntropyEstimator`](@ref), and its subtypes.
-
-```@docs
-transferentropy
-```
-
-## Definitions
-
-```@docs
-TransferEntropy
-```
-
-## [`TransferEntropyEstimator`](@ref)s
-
-```@docs
-TransferEntropyEstimator
-```
-
-### [`Zhu1`](@ref)
-
-```@docs
-Zhu1
-```
-
-### [`Lindner`](@ref)
-
-```@docs
-Lindner
-```
-
-## Convenience
-
-### [`SymbolicTransferEntropy`](@ref)
-
-```@docs
-SymbolicTransferEntropy
-```
-
-### [`Hilbert`](@ref)
-
-```@docs
-Hilbert
-Phase
-Amplitude
-```
-
-## Utilities
-
-```@docs
-optimize_marginals_te
-EmbeddingTE
-```
diff --git a/docs/src/associations.md b/docs/src/associations.md
new file mode 100644
index 000000000..72c32cca6
--- /dev/null
+++ b/docs/src/associations.md
@@ -0,0 +1,290 @@
+# Association measures
+
+## Associations API
+
+The most basic components of CausalityTools.jl are a collection of statistics that in some manner quantify the "association" between input datasets. Precisely what is meant by "association" depends on the measure, and precisely what is meant by "quantify" depends on the *estimator* of that measure. We formalize this notion below with the [`association`](@ref)
+function, which dispatches on [`AssociationMeasureEstimator`](@ref) and [`AssociationMeasure`](@ref).
+
+
+```@docs
+association
+AssociationMeasure
+AssociationMeasureEstimator
+```
+
+## [Information measures](@id information_api)
+
+```@docs
+MultivariateInformationMeasure
+MultivariateInformationMeasureEstimator
+```
+
+### Generic information estimators
+
+We provide a set of generic estimators that can be used to calculate 
+potentially several types of information measures.
+
+```@docs
+JointProbabilities
+EntropyDecomposition
+MIDecomposition
+CMIDecomposition
+```
+
+
+### [Conditional entropies](@id conditional_entropies)
+
+```@docs
+ConditionalEntropy
+ConditionalEntropyShannon
+ConditionalEntropyTsallisFuruichi
+ConditionalEntropyTsallisAbe
+```
+
+### [Divergences and distances](@id divergences_and_distances)
+
+```@docs
+HellingerDistance
+KLDivergence
+RenyiDivergence
+VariationDistance
+```
+
+### [Joint entropies](@id joint_entropies)
+
+```@docs
+JointEntropy
+JointEntropyShannon
+JointEntropyTsallis
+JointEntropyRenyi
+```
+
+### Mutual informations
+
+```@docs
+MutualInformation
+MIShannon
+MITsallisFuruichi
+MITsallisMartin
+MIRenyiJizba
+MIRenyiSarbu
+```
+
+### Mutual information estimators
+
+```@docs
+MutualInformationEstimator
+KraskovStögbauerGrassberger1
+KraskovStögbauerGrassberger2
+GaoKannanOhViswanath
+GaoOhViswanath
+GaussianMI
+```
+
+### Conditional mutual informations
+
+```@docs
+ConditionalMutualInformation
+CMIShannon
+CMIRenyiSarbu
+CMIRenyiJizba
+CMIRenyiPoczos
+CMITsallisPapapetrou
+```
+
+#### Conditional mutual information estimators
+
+```@docs
+ConditionalMutualInformationEstimator
+GaussianCMI
+FPVP
+MesnerShalizi
+Rahimzamani
+PoczosSchneiderCMI
+```
+
+### Partial mutual information
+
+```@docs
+PartialMutualInformation
+```
+
+### Transfer entropy
+
+```@docs
+TransferEntropy
+TEShannon
+TERenyiJizba
+```
+
+#### Transfer entropy estimators
+
+```@docs
+TransferEntropyEstimator
+Zhu1
+Lindner
+SymbolicTransferEntropy
+Hilbert
+Phase
+Amplitude
+```
+
+
+##### Utilities
+
+```@docs
+optimize_marginals_te
+EmbeddingTE
+```
+
+### Single-variable information API (from ComplexityMeasures.jl)
+
+Below we list some relevant types from
+[ComplexityMeasures.jl](https://github.com/JuliaDynamics/ComplexityMeasures.jl) that 
+are used for the [`EntropyDecomposition`](@ref) estimator.
+
+#### Entropies
+
+```@docs
+Shannon
+Renyi
+Tsallis
+Kaniadakis
+```
+
+#### Discrete information estimators
+
+```@docs
+DiscreteInfoEstimator
+PlugIn
+MillerMadow
+Schuermann
+GeneralizedSchuermann
+Jackknife
+HorvitzThompson
+ChaoShen
+```
+
+#### Differential information estimators
+
+```@docs
+DifferentialInfoEstimator
+Kraskov
+KozachenkoLeonenko
+Zhu
+ZhuSingh
+Gao
+Goria
+Lord
+LeonenkoProzantoSavani
+Vasicek
+AlizadehArghami
+Ebrahimi
+Correa
+```
+
+## [Correlation measures](@id correlation_api)
+
+This page lists all available [`CorrelationMeasure`](@ref)s, as 
+well as their convenience functions. The [examples](@ref correlation_examples)
+is also useful.
+
+### Pearson correlation
+
+```@docs
+PearsonCorrelation
+```
+
+### Partial correlation
+
+```@docs
+PartialCorrelation
+```
+
+### Distance correlation
+
+```@docs
+DistanceCorrelation
+```
+
+## [Cross-map measures](@id cross_map_api)
+
+The cross-map measures define different ways of quantifying association based on the 
+concept of "cross mapping", which has appeared in many contexts in the literature,
+and gained huge popularity with  [Sugihara2012](@citet)'s on *convergent cross mapping*.
+
+Since their paper, several cross mapping methods and frameworks have emerged in the
+literature. In CausalityTools.jl, we provide a unified interface for using these cross
+mapping methods.
+
+
+### Measures
+
+```@docs
+CrossmapMeasure
+ConvergentCrossMapping
+PairwiseAsymmetricInference
+```
+
+### Estimators
+
+```@docs
+CrossmapEstimator
+RandomVectors
+RandomSegment
+ExpandingSegment
+```
+
+### Advanced utility methods
+
+For most use cases, it is sufficient to provide a [`CrossmapEstimator`](@ref) to 
+[`association`](@ref) to compute a cross map measure. However, in some cases it 
+can be useful to have more fine-grained controls. We offer a few utility functions
+for this purpose.
+
+In the example where we [reproduce Figures 3C and 3D](@ref example_sugihara_figs3Cand3D) of [Sugihara2012](@ref), these lower-level 
+functions are used.
+
+```@docs
+predict
+crossmap
+```
+
+## [Closeness measures](@id closeness_api)
+
+### Joint distance distribution
+
+```@docs
+JointDistanceDistribution
+```
+
+### S-measure
+
+```@docs
+SMeasure
+```
+
+### H-measure
+
+```@docs
+HMeasure
+```
+
+### M-measure
+
+```@docs
+MMeasure
+```
+
+### L-measure
+
+```@docs
+LMeasure
+```
+
+## Recurrence measures
+
+```@docs
+MCR
+RMCD
+```
\ No newline at end of file
diff --git a/docs/src/examples/examples_jdd.md b/docs/src/basics.md
similarity index 100%
rename from docs/src/examples/examples_jdd.md
rename to docs/src/basics.md
diff --git a/docs/src/coupled_systems.md b/docs/src/coupled_systems.md
deleted file mode 100644
index c4eda893d..000000000
--- a/docs/src/coupled_systems.md
+++ /dev/null
@@ -1,53 +0,0 @@
-# Predefined coupled systems
-
-## Systems definition API
-
-The systems definition API is defined by
-
-- [`SystemDefinition`](@ref), [`DiscreteDefinition`](@ref), and [`ContinuousDefinition`](@ref).
-- [`system`](@ref)
-
-```@docs
-SystemDefinition
-DiscreteDefinition
-ContinuousDefinition
-system
-```
-
-## Discrete systems
-
-```@docs
-Anishchenko
-AR1Unidir
-AR1Bidir
-ChaoticMaps3
-Henon2
-Henon3
-Ikeda2
-ChaoticNoisyLinear2
-Logistic2Unidir
-Logistic2Bidir
-Logistic3CommonDriver
-Logistic4Chain
-Nonlinear3
-Peguin2
-UlamLattice
-Var1
-Verdes3
-```
-
-## Continuous systems
-
-```@docs
-ChuaCircuitsBidir6
-ChuaScrollSine3
-HindmarshRose3
-LorenzBidir6
-LorenzForced9
-MediatedLink9
-Repressilator6
-RosslerBidir6
-RosslerForced9
-RosslerLorenzUnidir6
-Thomas3
-```
diff --git a/docs/src/discretization_tutorial.md b/docs/src/discretization_tutorial.md
new file mode 100644
index 000000000..97afbdcd0
--- /dev/null
+++ b/docs/src/discretization_tutorial.md
@@ -0,0 +1,141 @@
+# [Discretization tutorial](@id discretization_tutorial)
+
+There are two main ways of discretizing data in CausalityTools. They are implemented as 
+the [`CodifyPoints`](@ref) and [`CodifyVariables`](@ref) types, which are used as 
+input to the [`codify`](@ref) function (extended from ComplexityMeasures.jl to multiple 
+variables).
+
+## [Encoding *rows* (one *point* at a time)](@id tutorial_codify_points)
+
+In some cases, it may be desireable to encode data on a row-wise basis. This 
+typically happens when working with pre-embedded time series. If we want to 
+apply something like [`OrdinalPatternEncoding`](@ref) to a pre-embedded 
+[`StateSpaceSet`](@ref), then we must encode each *point* individually,
+respecting the fact that time ordering is already taken care of by the 
+embedding procedure. [`CodifyPoints`](@ref) ensures input data are encoded 
+on a point-by-point basis.
+
+```@example
+using CausalityTools
+using StateSpaceSets
+using Random; rng = Xoshiro(1234)
+
+# The first variable is 2-dimensional and has 50 points
+x = StateSpaceSet(rand(rng, 50, 2))
+# The second variable is 3-dimensional and has 50 points
+y = StateSpaceSet(rand(rng, 50, 3))
+# The third variable is 4-dimensional and has 50 points
+z = StateSpaceSet(rand(rng, 50, 4))
+
+# One encoding scheme per input variable
+# encode `x` using `ox` on a point-by-point basis (Vector{SVector{4}} → Vector{Int})
+# encode `y` using `oy` on a point-by-point basis (Vector{SVector{3}} → Vector{Int})
+# encode `z` using `oz` on a point-by-point basis (Vector{SVector{2}} → Vector{Int})
+ox = OrdinalPatternEncoding(2)
+oy = OrdinalPatternEncoding(3)
+oz = OrdinalPatternEncoding(4)
+
+# This given three column vectors of integers.
+cx, cy, cz = codify(CodifyPoints(ox, oy, oz), x, y, z)
+
+[cx cy cz]
+```
+
+Notice that the 2-dimensional `x` has been encoded into integer values `1` or `2`, because
+there are `2!` possible ordinal patterns for dimension `m = 2`. The 3-dimensional `y` has 
+been encoded into integers in the range `1` to `3! = 6`, while the 4-dimensional `z` is 
+encoded into an even larger range of integers, because the number of possible ordinal patterns
+is `4! = 24` for 4-dimensional embedding vectors.
+
+## Encoding *columns* (one variable at a time)
+
+Sometimes, it may be desireable to encode input data one variable/column at a time.
+This typically happens when the input are either a single or multiple timeseries.
+
+To encode columns, we apply an [`Encoding`](@ref) using a sliding window across each input variable. 
+The width of the window is determined by the chosen encoding.
+For example, using [`ValueBinning`](@ref) will encode `N` value into `N` discretized
+values. [`CodifyVariables`](@ref) is used to enforce a sliding window encoding on a 
+per-variable basis.
+
+```@example
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+
+x = rand(rng, 100)
+o = ValueBinning(3)
+cx = codify(CodifyVariables(o), x)
+```
+
+We can verify that [`ValueBinning`](@ref) preserves the cardinality of the input dataset.
+
+```@example
+length(x) == length(cx)
+```
+
+Other outcome spaces such as [`Dispersion`](@ref) or [`OrdinalPatterns`](@ref) do not 
+preserve the cardinality of the input dataset, because when applied in a sliding window,
+they compress embedding vectors into single integers. This means that some points at the 
+end of each input variable are lost.
+
+```@example
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+
+x = rand(rng, 100)
+o = OrdinalPatterns(m = 3)
+cx = codify(CodifyVariables(o), x)
+```
+
+We can also simultaneously encode each variable/column of a [`StateSpaceSet`](@ref), as long 
+as we apply an encoding that results in the *same* number of encoded data points.
+
+```@example
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+
+x = rand(rng, 100)
+y = rand(rng, 100)
+o = OrdinalPatterns(m = 3)
+# Alternatively provide a tuple of input time series: codify(CodifyVariables(o), (x, y))
+cx, cy = codify(CodifyVariables(o), StateSpaceSet(x, y)) 
+
+[cx cy]
+```
+
+## Codify API
+
+
+A fundamental operation when computing multivariate information measures from data is *discretization*.  The following
+functions and types are used by CausalityTools.jl to perform discretization of input data.
+
+```@docs
+codify
+Discretization
+```
+
+### Encoding per variable/column
+
+```@docs
+CodifyVariables
+```
+
+The sliding-window discretization is formally done by applying some [`OutcomeSpace`](@ref) to each variable/column. Pick between the following outcome spaces
+
+```@docs
+UniqueElements
+CosineSimilarityBinning
+Dispersion
+OrdinalPatterns
+BubbleSortSwaps
+ValueBinning
+RectangularBinning
+FixedRectangularBinning
+```
+
+### Encoding per sample/row
+
+```@docs
+CodifyPoints
+```
+
diff --git a/docs/src/encoding_tutorial.md b/docs/src/encoding_tutorial.md
new file mode 100644
index 000000000..184732aec
--- /dev/null
+++ b/docs/src/encoding_tutorial.md
@@ -0,0 +1,22 @@
+# [Encoding](@id encoding_api_and_tutorial)
+
+To estimate [`Probabilities`](@ref), which are the input to [`MultivariateInformationMeasure`](@ref)s, 
+we encode "encode" input data into an intermediate representation indexed by the positive integers. 
+This intermediate representation is called an "encoding".
+
+We here re-export relevant types and functions [ComplexityMeasures.jl](https://github.com/JuliaDynamics/ComplexityMeasures.jl) that perform this type of coarse-graining.
+
+These encoding schemes are used as input to [`CodifyPoints`](@ref).
+
+```@docs
+Encoding
+GaussianCDFEncoding
+OrdinalPatternEncoding
+RelativeMeanEncoding
+RelativeFirstDifferenceEncoding
+UniqueElementsEncoding
+RectangularBinEncoding
+CombinationEncoding
+encode
+decode
+```
\ No newline at end of file
diff --git a/docs/src/examples.md b/docs/src/examples.md
index a7d06dd3f..c2043ef16 100644
--- a/docs/src/examples.md
+++ b/docs/src/examples.md
@@ -1,43 +1,418 @@
-# Examples
 
-## Association measures
+# [Cross mapping](@id examples_crossmappings)
 
-```@contents
-Pages = [
-    "examples/examples_mi.md",
-    "examples/examples_conditional_mutual_information.md",
-    "examples/examples_transferentropy.md",
-    "examples/examples_cross_mappings.md",
-    "examples/examples_closeness.md",
-    "examples/examples_predictive_asymmetry.md",
-]
-Depth = 3
+## [`ConvergentCrossMapping`](@ref) directly
+
+```@example
+using CausalityTools
+x, y = rand(200), rand(100)
+crossmap(CCM(), x, y)
+```
+
+## [`ConvergentCrossMapping`](@ref) with [`RandomVectors`](@ref)
+
+When cross-mapping with the [`RandomVectors`](@ref) estimator, a single random subsample
+of time indices (i.e. not in any particular order) of length `l` is drawn for each library
+size `l`, and cross mapping is performed using the embedding vectors corresponding
+to those time indices.
+
+```@example
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x, y = randn(rng, 200), randn(rng, 200)
+
+# We'll draw a single sample at each `l ∈ libsizes`. Sampling with replacement is then
+# necessary, because our 200-pt timeseries will result in embeddings with
+# less than 200 points.
+est = RandomVectors(CCM(); libsizes = 50:25:200, replace = true, rng)
+crossmap(est, x, y)
+```
+
+To generate a distribution of cross-map estimates for each `l ∈ libsizes`, just call
+crossmap repeatedly, e.g.
+
+```@example
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x, y = randn(rng, 200), randn(rng, 200)
+est = RandomVectors(CCM(); libsizes = 50:25:200, replace = true, rng)
+ρs = [crossmap(est, x, y) for i = 1:55]
+M = hcat(ρs...)
+```
+
+Now, the `k`-th row of `M` contains `55` estimates of the correspondence measure `ρ`
+at library size `libsizes[k]`.
+
+### [`ConvergentCrossMapping`](@ref) with [`RandomSegments`](@ref)
+
+When cross-mapping with the [`RandomSegments`](@ref) estimator, a single random subsample
+of continguous, ordered time indices of length `l` is drawn for each library
+size `l`, and cross mapping is performed using the embedding vectors corresponding
+to those time indices.
+
+```@example
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x, y = randn(rng, 200), randn(rng, 200)
+
+# We'll draw a single sample at each `l ∈ libsizes`. We limit the library size to 100, 
+# because drawing segments of the data longer than half the available data doesn't make
+# much sense.
+est = RandomSegment(CCM(); libsizes = 50:25:100, rng)
+crossmap(est, x, y)
+```
+
+As above, to generate a distribution of cross-map estimates for each `l ∈ libsizes`, just call
+crossmap repeatedly, e.g.
+
+```@example
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x, y = randn(rng, 200), randn(rng, 200)
+est = RandomSegment(CCM(); libsizes = 50:25:100, rng)
+ρs = [crossmap(est, x, y) for i = 1:80]
+M = hcat(ρs...)
 ```
 
-## Independence tests
+Now, the `k`-th row of `M` contains `80` estimates of the correspondence measure `ρ`
+at library size `libsizes[k]`.
 
-```@contents
-Pages = [
-    "examples/examples_independence.md",
-]
-Depth = 3
+### Reproducing Sugihara et al. (2012)
+
+!!! note "Run blocks consecutively"
+    If copying these examples and running them locally, make sure the relevant packages (given in the first block) are loaded first.
+
+#### Figure 3A
+
+Let's reproduce figure 3A too, focusing only on [`ConvergentCrossMapping`](@ref) this time. In this figure, they compute the cross mapping for libraries of increasing size, always starting at time index 1. This approach - which we here call the [`ExpandingSegment`](@ref) estimator - is one of many ways of estimating the correspondence between observed and predicted value.
+
+For this example, they use a bidirectional system with asymmetrical coupling strength.
+
+```@example MAIN_CCM
+using CausalityTools
+using Statistics
+using LabelledArrays
+using StaticArrays
+using DynamicalSystemsBase
+using StateSpaceSets
+using CairoMakie, Printf
+
+function eom_logistic_sugi(u, p, t)
+    (; rx, ry, βxy, βyx) = p
+    (; x, y) = u
+
+    dx = x*(rx - rx*x - βxy*y)
+    dy = y*(ry - ry*y - βyx*x)
+    return SVector{2}(dx, dy)
+end
+
+# βxy := effect on x of y
+# βyx := effect on y of x
+function logistic_sugi(; u0 = rand(2), rx, ry, βxy, βyx)
+    p = @LArray [rx, ry, βxy, βyx] (:rx, :ry, :βxy, :βyx)
+    DiscreteDynamicalSystem(eom_logistic_sugi, u0, p)
+end
+
+# Used in `reproduce_figure_3A_naive`, and `reproduce_figure_3A_ensemble` below.
+function add_to_fig!(fig_pos, libsizes, ρs_x̂y, ρs_ŷx; title = "", quantiles = false)
+    ax = Axis(fig_pos; title, aspect = 1,
+        xlabel = "Library size", ylabel = "Correlation (ρ)")
+    ylims!(ax, (-1, 1))
+    hlines!([0], linestyle = :dash, alpha = 0.5, color = :grey)
+    scatterlines!(libsizes, median.(ρs_x̂y), label = "x̂|y", color = :blue)
+    scatterlines!(libsizes, median.(ρs_ŷx), label = "ŷ|x", color = :red)
+    if quantiles
+        band!(libsizes, quantile.(ρs_x̂y, 0.05), quantile.(ρs_x̂y, 0.95), color = (:blue, 0.5))
+        band!(libsizes, quantile.(ρs_ŷx, 0.05), quantile.(ρs_ŷx, 0.95), color = (:red, 0.5))
+    end
+    axislegend(ax, position = :rb)
+end
+
+function reproduce_figure_3A_naive(definition::CrossmapMeasure)
+    sys_bidir = logistic_sugi(; u0 = [0.2, 0.4], rx = 3.7, ry = 3.700001, βxy = 0.02, βyx = 0.32);
+    x, y = columns(first(trajectory(sys_bidir, 3100, Ttr = 10000)));
+    libsizes = [20:2:50; 60:10:200; 300:50:500; 600:150:900; 1000:500:2000]
+    est = ExpandingSegment(definition; libsizes);
+    ρs_x̂y = crossmap(est, x, y)
+    ρs_ŷx = crossmap(est, y, x)
+
+    with_theme(theme_minimal(),
+        markersize = 5) do
+        fig = Figure(resolution = (800, 300))
+        add_to_fig!(fig[1, 1], libsizes, ρs_x̂y, ρs_ŷx; title = "`ExpandingSegment`")
+        fig
+    end
+end
+
+reproduce_figure_3A_naive(ConvergentCrossMapping(d = 3))
 ```
 
-## Causal graphs
+Hm. This looks a bit like the paper, but the curve is not smooth. We can do better!
+
+It is not clear from the paper exactly *what* they plot in their Figure 3A, if they plot an average of some kind, or precisely what parameters and initial conditions they use. However, we can get a smoother plot by using a [`Ensemble`](@ref). Combined with a [`CrossmapEstimator`](@ref), it uses Monte Carlo resampling on subsets of the input data to compute an ensemble of `ρ`s that we here use to compute the median and 90-th percentile range for each library size.
+
+```@example MAIN_CCM
+function reproduce_figure_3A_ensemble(definition::CrossmapMeasure)
+    sys_bidir = logistic_sugi(; u0 = [0.4, 0.2], rx = 3.8, ry = 3.5, βxy = 0.02, βyx = 0.1);
+    x, y = columns(first(trajectory(sys_bidir, 5000, Ttr = 10000)));
+    # Note: our time series are 1000 points long. When embedding, some points are
+    # lost, so we must use slightly less points for the segments than 
+    # there are points in the original time series.
+    libsizes = [20:2:50; 60:10:200; 300:50:500; 600:150:900; 1000:500:2000]
+    # No point in doing more than one rep, because there data are always the same
+    # for `ExpandingSegment.`
+    ensemble_ev = Ensemble(ExpandingSegment(definition; libsizes); nreps = 1)
+    ensemble_rs = Ensemble(RandomSegment(definition; libsizes); nreps = 30)
+    ensemble_rv = Ensemble(RandomVectors(definition; libsizes); nreps = 30)
+    ρs_x̂y_es = crossmap(ensemble_ev, x, y)
+    ρs_ŷx_es = crossmap(ensemble_ev, y, x)
+    ρs_x̂y_rs = crossmap(ensemble_rs, x, y)
+    ρs_ŷx_rs = crossmap(ensemble_rs, y, x)
+    ρs_x̂y_rv = crossmap(ensemble_rv, x, y)
+    ρs_ŷx_rv = crossmap(ensemble_rv, y, x)
 
-```@contents
-Pages = [
-    "examples/examples_graphs.md",
-]
-Depth = 3
+    with_theme(theme_minimal(),
+        markersize = 5) do
+        fig = Figure(resolution = (800, 300))
+        add_to_fig!(fig[1, 1], libsizes, ρs_x̂y_es, ρs_ŷx_es; title = "`ExpandingSegment`", quantiles = false) # quantiles make no sense for `ExpandingSegment`
+        add_to_fig!(fig[1, 2], libsizes, ρs_x̂y_rs, ρs_ŷx_rs; title = "`RandomSegment`", quantiles = true)
+        add_to_fig!(fig[1, 3], libsizes, ρs_x̂y_rv, ρs_ŷx_rv; title = "`RandomVector`", quantiles = true)
+        fig
+    end
+end
+
+reproduce_figure_3A_ensemble(ConvergentCrossMapping(d = 3, τ = -1))
 ```
 
-## Miscellaneous
+With the [`RandomVectors`](@ref) estimator, the mean of our ensemble `ρ`s seem to look pretty much identical to Figure 3A in Sugihara et al. The [`RandomSegment`](@ref) estimator also performs pretty well, but since subsampled segments are contiguous, there are probably some autocorrelation effects at play.
+
+We can avoid the autocorrelation issue by tuning the `w` parameter of the [`ConvergentCrossMapping`](@ref) measure, which is the 
+[Theiler window](https://juliadynamics.github.io/DynamicalSystems.jl/dev/embedding/StateSpaceSet/#Theiler-window). Setting the Theiler window to `w > 0`, we can exclude neighbors of a query point `p` that are close to `p` in time, and thus deal with autocorrelation issues that way (the default `w = 0` excludes only the point itself). Let's re-do the analysis with `w = 5`, just for fun.
 
-```@contents
-Pages = [
-    "examples/examples_entropy.md",
-    "examples/examples_conditional_entropy.md",
-]
-Depth = 3
+```@example MAIN_CCM
+reproduce_figure_3A_ensemble(ConvergentCrossMapping(d = 3, τ = -1, w = 5))
 ```
+
+There wasn't really that much of a difference, since for the logistic map, the autocorrelation function flips sign for every lag increase. However, for examples from other systems, tuning `w` may be important.
+
+
+#### Figure 3B
+
+What about figure 3B? Here they generate time series of length 400 for a range of values for both coupling parameters, and plot the dominant direction $\Delta = \rho(\hat{x} | y) - \rho(\hat{y} | x)$.
+
+In the paper, they use a 1000 different parameterizations for the logistic map parameters, but don't state what is summarized in the plot. For simplicity, we'll therefore just stick to `rx = ry = 3.7`, as in the examples above, and just loop over the coupling strengths in either direction.
+
+```@example MAIN_CCM
+function reproduce_figure_3B()
+    βxys = 0.0:0.025:0.4
+    βyxs = 0.0:0.025:0.4
+    ρx̂ys = zeros(length(βxys), length(βyxs))
+    ρŷxs = zeros(length(βxys), length(βyxs))
+
+    for (i, βxy) in enumerate(βxys)
+        for (j, βyx) in enumerate(βyxs)
+            sys_bidir = logistic_sugi(; u0 = [0.2, 0.4], rx = 3.7, ry = 3.7, βxy, βyx);
+            # Generate 300 points. Randomly select a 100-pt long segment.
+            x, y = columns(first(trajectory(sys_bidir, 1000, Ttr = 10000)));
+            definition = CCM(d = 3, w = 5, τ = -1)
+            ensemble = Ensemble(RandomVectors(definition; libsizes = 100), nreps = 50)
+            ρx̂ys[i, j] = mean(crossmap(ensemble, x, y))
+            ρŷxs[i, j] = mean(crossmap(ensemble, y, x))
+        end
+    end
+    Δ = ρŷxs .- ρx̂ys
+
+    with_theme(theme_minimal(),
+        markersize = 5) do
+        fig = Figure();
+        ax = Axis(fig[1, 1], xlabel = "βxy", ylabel = "βyx")
+        cont = contourf!(ax, Δ, levels = range(-1, 1, length = 10),
+            colormap = :curl)
+        ax.xticks = 1:length(βxys), string.([i % 2 == 0 ? βxys[i] : "" for i in 1:length(βxys)])
+        ax.yticks = 1:length(βyxs), string.([i % 2 == 0 ? βyxs[i] : "" for i in 1:length(βyxs)])
+        Colorbar(fig[1 ,2], cont, label = "Δ (ρ(ŷ|x) - ρ(x̂|y))")
+        tightlimits!(ax)
+        fig
+    end
+end
+
+reproduce_figure_3B()
+```
+
+#### Figures 3C and 3D
+
+Let's reproduce figures 3C and 3D in Sugihara et al. (2012)[^Sugihara2012], which
+introduced the [`ConvergentCrossMapping`](@ref) measure.
+Equations and parameters can be found in their supplementary material.
+Simulatenously, we also compute the [`PairwiseAsymmetricInference`](@ref) measure
+from McCracken & Weigel (2014)[^McCracken2014], which is a related method, but uses a
+slightly different embedding.
+
+[^Sugihara2012]:
+    Sugihara, G., May, R., Ye, H., Hsieh, C. H., Deyle, E., Fogarty, M., & Munch, S.
+    (2012). Detecting causality in complex ecosystems. science, 338(6106), 496-500.
+[^McCracken2014]:
+    McCracken, J. M., & Weigel, R. S. (2014). Convergent cross-mapping and pairwise
+    asymmetric inference. Physical Review E, 90(6), 062903.
+
+```@example MAIN_CCM
+using CausalityTools
+using Statistics
+using LabelledArrays
+using StaticArrays
+using DynamicalSystemsBase
+using StateSpaceSets
+using CairoMakie, Printf
+
+
+# -----------------------------------------------------------------------------------------
+# Create 500-point long time series for Sugihara et al. (2012)'s example for figure 3.
+# -----------------------------------------------------------------------------------------
+sys_unidir = logistic_sugi(; u0 = [0.2, 0.4], rx = 3.7, ry = 3.700001, βxy = 0.00, βyx = 0.32);
+x, y = columns(first(trajectory(sys_unidir, 500, Ttr = 10000)));
+
+# -----------------------------------------------------------------------------------------
+# Cross map.
+# -----------------------------------------------------------------------------------------
+m_ccm = ConvergentCrossMapping(d = 2)
+m_pai = PairwiseAsymmetricInference(d = 2)
+# Make predictions x̂y, i.e. predictions `x̂` made from embedding of y (AND x, if PAI)
+t̂ccm_x̂y, tccm_x̂y, ρccm_x̂y = predict(m_ccm, x, y)
+t̂pai_x̂y, tpai_x̂y, ρpai_x̂y = predict(m_pai, x, y);
+# Make predictions ŷx, i.e. predictions `ŷ` made from embedding of x (AND y, if PAI)
+t̂ccm_ŷx, tccm_ŷx, ρccm_ŷx = predict(m_ccm, y, x)
+t̂pai_ŷx, tpai_ŷx, ρpai_ŷx = predict(m_pai, y, x);
+
+# -----------------------------------------------------------------------------------------
+# Plot results
+# -----------------------------------------------------------------------------------------
+ρs = (ρccm_x̂y, ρpai_x̂y, ρccm_ŷx, ρpai_ŷx)
+sccm_x̂y, spai_x̂y, sccm_ŷx, spai_ŷx = (map(ρ -> (@sprintf "%.3f" ρ), ρs)...,)
+
+ρs = (ρccm_x̂y, ρpai_x̂y, ρccm_ŷx, ρpai_ŷx)
+sccm_x̂y, spai_x̂y, sccm_ŷx, spai_ŷx = (map(ρ -> (@sprintf "%.3f" ρ), ρs)...,)
+
+with_theme(theme_minimal(),
+    markersize = 5) do
+    fig = Figure();
+    ax_ŷx = Axis(fig[2,1], aspect = 1, xlabel = "y(t) (observed)", ylabel = "ŷ(t) | x (predicted)")
+    ax_x̂y = Axis(fig[2,2], aspect = 1, xlabel = "x(t) (observed)", ylabel = "x̂(t) | y (predicted)")
+    xlims!(ax_ŷx, (0, 1)), ylims!(ax_ŷx, (0, 1))
+    xlims!(ax_x̂y, (0, 1)), ylims!(ax_x̂y, (0, 1))
+    ax_ts = Axis(fig[1, 1:2], xlabel = "Time (t)", ylabel = "Value")
+    scatterlines!(ax_ts, x[1:300], label = "x")
+    scatterlines!(ax_ts, y[1:300], label = "y")
+    axislegend()
+    scatter!(ax_ŷx, tccm_ŷx, t̂ccm_ŷx, label = "CCM (ρ = $sccm_ŷx)", color = :black)
+    scatter!(ax_ŷx, tpai_ŷx, t̂pai_ŷx, label = "PAI (ρ = $spai_ŷx)", color = :red)
+    axislegend(ax_ŷx, position = :lt)
+    scatter!(ax_x̂y, tccm_x̂y, t̂ccm_x̂y, label = "CCM (ρ = $sccm_x̂y)", color = :black)
+    scatter!(ax_x̂y, tpai_x̂y, t̂pai_x̂y, label = "PAI (ρ = $spai_x̂y)", color = :red)
+    axislegend(ax_x̂y, position = :lt)
+    fig
+end
+```
+
+## [`PairwiseAsymmetricInference`](@ref)
+
+### Reproducing McCracken & Weigel (2014)
+
+Let's try to reproduce figure 8 from [McCracken2014](@citet)'s
+paper on [`PairwiseAsymmetricInference`](@ref) (PAI). We'll start by defining the their example B (equations 6-7). This system consists of two
+variables ``X`` and ``Y``, where ``X`` drives ``Y``.
+
+After we have computed the PAI in both directions, we define a measure of directionality as the difference between PAI in the ``X \to Y`` direction and in the ``Y \to X`` direction, so that if ``X`` drives ``Y``, then ``\Delta < 0``.
+
+```@example MAIN_CCM
+using CausalityTools
+using LabelledArrays
+using StaticArrays
+using DynamicalSystemsBase
+using StateSpaceSets
+using CairoMakie, Printf
+using Distributions: Normal
+using Statistics: mean, std
+
+function eom_nonlinear_sindriver(dx, x, p, n)
+    a, b, c, t, Δt = (p...,)
+    x, y = x[1], x[2]
+    𝒩 = Normal(0, 1)
+    
+    dx[1] = sin(t)
+    dx[2] = a*x * (1 - b*x) + c* rand(𝒩)
+    p[end-1] += 1 # update t
+
+    return
+end
+
+function nonlinear_sindriver(;u₀ = rand(2), a = 1.0, b = 1.0, c = 2.0, Δt = 1)
+    DiscreteDynamicalSystem(eom_nonlinear_sindriver, u₀, [a, b, c, 0, Δt])
+end
+
+function reproduce_figure_8_mccraken(; 
+        c = 2.0, Δt = 0.2,
+        as = 0.5:0.5:5.0,
+        bs = 0.5:0.5:5.0)
+    # -----------------------------------------------------------------------------------------
+    # Generate many time series for many different values of the parameters `a` and `b`,
+    # and compute PAI. This will replicate the upper right panel of 
+    # figure 8 in McCracken & Weigel (2014).
+    # -----------------------------------------------------------------------------------------
+    
+    measure = PairwiseAsymmetricInference(d = 3)
+
+    # Manually resample `nreps` length-`L` time series and use mean ρ(x̂|X̄y) - ρ(ŷ|Ȳx)
+    # for each parameter combination.
+    nreps = 50
+    L = 200 # length of timeseries
+    Δ = zeros(length(as), length(bs))
+    for (i, a) in enumerate(as)
+        for (j, b) in enumerate(bs)
+            s = nonlinear_sindriver(; a, b, c,  Δt)
+            x, y = columns(first(trajectory(s, 1000, Ttr = 10000)))
+            Δreps = zeros(nreps)
+            for i = 1:nreps
+                # Ensure we're subsampling at the same time indices. 
+                ind_start = rand(1:(1000-L))
+                r = ind_start:(ind_start + L)
+                Δreps[i] = @views crossmap(measure, y[r], x[r]) - 
+                    crossmap(measure, x[r], y[r])
+            end
+            Δ[i, j] = mean(Δreps)
+        end
+    end
+
+    # -----------------------------------------------------------------------------------------
+    # An example time series for plotting.
+    # -----------------------------------------------------------------------------------------
+    sys = nonlinear_sindriver(; a = 1.0, b = 1.0, c, Δt)
+    npts = 500
+    orbit = first(trajectory(sys, npts, Ttr = 10000))
+    x, y = columns(orbit)
+    with_theme(theme_minimal(),
+        markersize = 5) do
+        
+        X = x[1:300]
+        Y = y[1:300]
+        fig = Figure();
+        ax_ts = Axis(fig[1, 1:2], xlabel = "Time (t)", ylabel = "Value")
+        scatterlines!(ax_ts, (X .- mean(X)) ./ std(X), label = "x")
+        scatterlines!(ax_ts, (Y .- mean(Y)) ./ std(Y), label = "y")
+        axislegend()
+
+        ax_hm = Axis(fig[2, 1:2], xlabel = "a", ylabel = "b")
+        ax_hm.yticks = (1:length(as), string.([i % 2 == 0 ? as[i] : "" for i = 1:length(as)]))
+        ax_hm.xticks = (1:length(bs), string.([i % 2 == 0 ? bs[i] : "" for i = 1:length(bs)]))
+        hm = heatmap!(ax_hm, Δ,  colormap = :viridis)
+        Colorbar(fig[2, 3], hm; label = "Δ' = ρ(ŷ | yx) - ρ(x̂ | xy)")
+        fig
+    end
+end
+
+reproduce_figure_8_mccraken()
+```
+
+We haven't used as many parameter combinations as [McCracken2014](@citet) did, 
+but we get a figure that looks roughly similar to theirs.
+
+As expected, ``\Delta < 0`` for all parameter combinations, implying that ``X`` "PAI drives" ``Y``.
diff --git a/docs/src/examples/examples_associations.md b/docs/src/examples/examples_associations.md
new file mode 100644
index 000000000..699e6f9a0
--- /dev/null
+++ b/docs/src/examples/examples_associations.md
@@ -0,0 +1,1102 @@
+# Examples of association measure estimation
+
+## [`HellingerDistance`](@ref)
+
+### [From precomputed probabilities](@id example_HellingerDistance_precomputed_probabilities)
+
+```@example example_HellingerDistance
+using CausalityTools
+# From pre-computed PMFs
+p1 = Probabilities([0.1, 0.5, 0.2, 0.2])
+p2 = Probabilities([0.3, 0.3, 0.2, 0.2])
+association(HellingerDistance(), p1, p2)
+```
+
+### [[`JointProbabilities`](@ref) + [`OrdinalPatterns`](@ref)](@id example_HellingerDistance_JointProbabilities_OrdinalPatterns)
+
+We expect the Hellinger distance between two uncorrelated variables to be close to zero.
+
+```@example example_HellingerDistance
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+n = 100000
+x, y = rand(rng, n), rand(rng, n)
+est = JointProbabilities(HellingerDistance(), CodifyVariables(OrdinalPatterns(m=3)))
+div_hd = association(est, x, y) # pretty close to zero
+```
+
+## [`KLDivergence`](@ref)
+
+### [From precomputed probabilities](@id example_KLDivergence_precomputed_probabilities)
+
+```@example example_KLDivergence
+using CausalityTools
+# From pre-computed PMFs
+p1 = Probabilities([0.1, 0.5, 0.2, 0.2])
+p2 = Probabilities([0.3, 0.3, 0.2, 0.2])
+association(KLDivergence(), p1, p2)
+```
+
+### [[`JointProbabilities`](@ref) + [`OrdinalPatterns`](@ref)](@id example_KLDivergence_JointProbabilities_OrdinalPatterns)
+
+We expect the [`KlDivergence`](@ref) between two uncorrelated variables to be close to zero.
+
+```@example example_KLDivergence
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+n = 100000
+x, y = rand(rng, n), rand(rng, n)
+est = JointProbabilities(KLDivergence(), CodifyVariables(OrdinalPatterns(m=3)))
+div_hd = association(est, x, y) # pretty close to zero
+```
+
+
+## [`RenyiDivergence`](@ref)
+
+### [From precomputed probabilities](@id example_RenyiDivergence_precomputed_probabilities)
+
+```@example example_RenyiDivergence
+using CausalityTools
+# From pre-computed PMFs
+p1 = Probabilities([0.1, 0.5, 0.2, 0.2])
+p2 = Probabilities([0.3, 0.3, 0.2, 0.2])
+association(RenyiDivergence(), p1, p2)
+```
+
+### [[`JointProbabilities`](@ref) + [`OrdinalPatterns`](@ref)](@id example_RenyiDivergence_JointProbabilities_OrdinalPatterns)
+
+We expect the [`RenyiDivergence`](@ref) between two uncorrelated variables to be close to zero.
+
+```@example example_RenyiDivergence
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+n = 100000
+x, y = rand(rng, n), rand(rng, n)
+est = JointProbabilities(RenyiDivergence(), CodifyVariables(OrdinalPatterns(m=3)))
+div_hd = association(est, x, y) # pretty close to zero
+```
+
+
+## [`VariationDistance`](@ref)
+
+### [From precomputed probabilities](@id example_VariationDistance_precomputed_probabilities)
+
+```@example example_VariationDistance
+using CausalityTools
+# From pre-computed PMFs
+p1 = Probabilities([0.1, 0.5, 0.2, 0.2])
+p2 = Probabilities([0.3, 0.3, 0.2, 0.2])
+association(VariationDistance(), p1, p2)
+```
+
+### [[`JointProbabilities`](@ref) + [`OrdinalPatterns`](@ref)](@id example_VariationDistance_JointProbabilities_OrdinalPatterns)
+
+We expect the [`VariationDistance`](@ref) between two uncorrelated variables to be close to zero.
+
+```@example example_VariationDistance
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+n = 100000
+x, y = rand(rng, n), rand(rng, n)
+est = JointProbabilities(VariationDistance(), CodifyVariables(OrdinalPatterns(m=3)))
+div_hd = association(est, x, y) # pretty close to zero
+```
+
+## [`JointEntropyShannon`](@ref)
+
+### [[`JointProbabilities`](@ref) with [`Dispersion`](@ref)](@id example_JointEntropyShannon_Dispersion)
+
+```@example example_JointEntropyShannon
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x, y = rand(rng, 100), rand(rng, 100)
+measure = JointEntropyShannon()
+discretization = CodifyVariables(Dispersion(m = 2, c = 3))
+est = JointProbabilities(measure, discretization)
+association(est, x, y)
+```
+
+## [`JointEntropyTsallis`](@ref)
+
+### [[`JointProbabilities`](@ref) with [`OrdinalPatterns`](@ref)](@id example_JointEntropyTsallis_OrdinalPatterns)
+
+```@example example_JointEntropyTsallis
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x, y = rand(rng, 100), rand(rng, 100)
+measure = JointEntropyTsallis()
+discretization = CodifyVariables(OrdinalPatterns(m = 3))
+est = JointProbabilities(measure, discretization)
+association(est, x, y)
+```
+
+
+## [`JointEntropyRenyi`](@ref)
+
+### [[`JointProbabilities`](@ref) with [`OrdinalPatterns`](@ref)](@id example_JointEntropyRenyi_ValueBinning)
+
+```@example example_JointEntropyRenyi
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x, y = rand(rng, 100), rand(rng, 100)
+measure = JointEntropyRenyi(q = 0.5)
+discretization = CodifyVariables(ValueBinning(2))
+est = JointProbabilities(measure, discretization)
+association(est, x, y)
+```
+
+## [`ConditionalEntropyShannon`](@ref)
+
+### [Analytical examples](@id example_ConditionalEntropyShannon_analytical)
+
+This is essentially example 2.2.1 in Cover & Thomas (2006), where they use the following
+relative frequency table as an example. Notethat Julia is column-major, so we need to
+transpose their example. Then their `X` is in the first dimension of our table (along
+columns) and their `Y` is our second dimension (rows).
+
+```@example ce_contingency_table
+using CausalityTools
+freqs_yx = [1//8 1//16 1//32 1//32; 
+    1//16 1//8  1//32 1//32;
+    1//16 1//16 1//16 1//16; 
+    1//4  0//1  0//1  0//1];
+# `freqs_yx` is already normalized, se we can feed it directly to `Probabilities`
+pxy = Probabilities(freqs_yx)
+```
+
+The marginal distribution for `x` (first dimension) is
+
+```@example ce_contingency_table
+marginal(pxy, dims = 2)
+```
+
+The marginal distribution for `y` (second dimension) is
+
+```@example ce_contingency_table
+marginal(pxy, dims = 1)
+```
+
+And the Shannon conditional entropy ``H^S(X | Y)``
+
+```@example ce_contingency_table
+ce_x_given_y = association(ConditionalEntropyShannon(), pxy) |> Rational
+```
+
+This is the same as in their example. Hooray! To compute ``H^S(Y | X)``, we just need to
+flip the contingency matrix.
+
+```@example ce_contingency_table
+pyx = Probabilities(transpose(freqs_yx))
+ce_y_given_x = association(ConditionalEntropyShannon(), pyx) |> Rational
+```
+
+### [[`JointProbabilities`](@ref) + [`CodifyVariables`](@ref) + [`UniqueElements`](@ref)](@id example_ConditionalEntropyShannon_JointProbabilities_CodifyVariables_UniqueElements)
+
+We can of course also estimate conditional entropy from data. To do so, we'll use the 
+[`JointProbabilities`](@ref) estimator, which constructs a multivariate PMF for us.
+Thus, we don't explicitly need a set of counts, like in the example above, because they
+are estimated under the hood for us. 
+
+Let's first demonstrate on some categorical data. For that, we must use
+[`UniqueElements`](@ref) as the discretization (i.e. just count unique elements).
+
+```@example example_ConditionalEntropyShannon_JointProbabilities_CodifyVariables_UniqueElements
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+n = 1000
+rating = rand(rng, 1:6, n)
+movie = rand(rng, ["The Witcher: the movie", "Lord of the Rings"], n)
+
+disc = CodifyVariables(UniqueElements())
+est = JointProbabilities(ConditionalEntropyShannon(), disc)
+association(est, rating, movie)
+```
+
+### [[`JointProbabilities`](@ref) + [`CodifyPoints`](@ref) + [`UniqueElementsEncoding`](@ref)](@id example_ConditionalEntropyShannon_JointProbabilities_CodifyPoints_UniqueElementsEncoding)
+
+```@example example_ConditionalEntropyShannon_JointProbabilities_CodifyPoints_UniqueElementsEncoding
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x, y, z = rand(rng, 1:5, 100), rand(rng, 1:5, 100), rand(rng, 1:3, 100)
+X = StateSpaceSet(x, z)
+Y = StateSpaceSet(y, z)
+disc = CodifyPoints(UniqueElementsEncoding(X), UniqueElementsEncoding(Y));
+est = JointProbabilities(ConditionalEntropyShannon(), disc);
+association(est, X, Y)
+```
+
+## [`ConditionalEntropyTsallisAbe`](@ref)
+
+### [[`JointProbabilities`](@ref) + [`CodifyVariables`](@ref) + [`UniqueElements`](@ref)](@id example_ConditionalEntropyTsallisAbe_JointProbabilities_CodifyVariables_UniqueElements)
+
+We'll here repeat the analysis we did for [`ConditionalEntropyShannon`](@ref) above.
+
+```@example example_ConditionalEntropyTsallisAbe_JointProbabilities_CodifyVariables_UniqueElements
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+n = 1000
+rating = rand(rng, 1:6, n)
+movie = rand(rng, ["The Witcher: the movie", "Lord of the Rings"], n)
+
+disc = CodifyVariables(UniqueElements())
+est = JointProbabilities(ConditionalEntropyTsallisAbe(q =1.5), disc)
+association(est, rating, movie)
+```
+
+### [[`JointProbabilities`](@ref) + [`CodifyPoints`](@ref) + [`UniqueElementsEncoding`](@ref)](@id example_ConditionalEntropyTsallisAbe_JointProbabilities_CodifyPoints_UniqueElementsEncoding)
+
+```@example example_ConditionalEntropyTsallisAbe_JointProbabilities_CodifyPoints_UniqueElementsEncoding
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x, y, z = rand(rng, 1:5, 100), rand(rng, 1:5, 100), rand(rng, 1:3, 100)
+X = StateSpaceSet(x, z)
+Y = StateSpaceSet(y, z)
+disc = CodifyPoints(UniqueElementsEncoding(X), UniqueElementsEncoding(Y));
+est = JointProbabilities(ConditionalEntropyTsallisAbe(q = 1.5), disc);
+association(est, X, Y)
+```
+
+
+## [`ConditionalEntropyTsallisFuruichi`](@ref)
+
+### [[`JointProbabilities`](@ref) + [`CodifyVariables`](@ref) + [`UniqueElements`](@ref)](@id example_ConditionalEntropyTsallisFuruichi_JointProbabilities_CodifyVariables_UniqueElements)
+
+We'll here repeat the analysis we did for [`ConditionalEntropyShannon`](@ref) and [`ConditionalEntropyTsallisAbe`](@ref) above.
+
+```@example example_ConditionalEntropyTsallisFuruichi_JointProbabilities_CodifyVariables_UniqueElements
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+n = 1000
+rating = rand(rng, 1:6, n)
+movie = rand(rng, ["The Witcher: the movie", "Lord of the Rings"], n)
+
+disc = CodifyVariables(UniqueElements())
+est = JointProbabilities(ConditionalEntropyTsallisFuruichi(q =0.5), disc)
+association(est, rating, movie)
+```
+
+### [[`JointProbabilities`](@ref) + [`CodifyPoints`](@ref) + [`UniqueElementsEncoding`](@ref)](@id example_ConditionalEntropyTsallisFuruichi_JointProbabilities_CodifyPoints_UniqueElementsEncoding)
+
+```@example example_ConditionalEntropyTsallisFuruichi_JointProbabilities_CodifyPoints_UniqueElementsEncoding
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x, y, z = rand(rng, 1:5, 100), rand(rng, 1:5, 100), rand(rng, 1:3, 100)
+X = StateSpaceSet(x, z)
+Y = StateSpaceSet(y, z)
+disc = CodifyPoints(UniqueElementsEncoding(X), UniqueElementsEncoding(Y));
+est = JointProbabilities(ConditionalEntropyTsallisFuruichi(q = 0.5), disc);
+association(est, X, Y)
+```
+
+
+## [`MIShannon`](@ref)
+
+### [[`JointProbabilities`](@ref) + [`ValueBinning`](@ref)](@id example_MIShannon_JointProbabilities_ValueBinning)
+
+```@example mi_demonstration
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x = rand(rng, 1000)
+y = rand(rng, 1000)
+discretization = CodifyVariables(ValueBinning(FixedRectangularBinning(0, 1, 5)))
+est = JointProbabilities(MIShannon(), discretization)
+association(est, x, y)
+```
+
+
+### [[`JointProbabilities`](@ref) + [`UniqueElements`](@ref)](@id example_MIShannon_JointProbabilities_UniqueElements)
+
+The [`JointProbabilities`](@ref) estimator can also be used with categorical data.
+For example, let's compare the Shannon mutual information between the preferences
+of a population sample with regards to different foods.
+
+```@example mi_demonstration
+using CausalityTools
+n = 1000
+preferences = rand(["neutral", "like it", "hate it"], n);
+random_foods = rand(["water", "flour", "bananas", "booze", "potatoes", "beans", "soup"], n)
+biased_foods = map(preferences) do preference
+    if cmp(preference, "neutral") == 1
+        return rand(["water", "flour"])
+    elseif cmp(preference, "like it") == 1
+        return rand(["bananas", "booze"])
+    else
+        return rand(["potatoes", "beans", "soup"])
+    end
+end
+
+est = JointProbabilities(MIShannon(), UniqueElements())
+association(est, preferences, biased_foods), association(est, preferences, random_foods)
+```
+
+### [Dedicated [`GaussianMI`](@ref) estimator](@id example_MIShannon_GaussianMI)
+
+```@example mi_demonstration
+using CausalityTools
+using Distributions
+using Statistics
+
+n = 1000
+using CausalityTools
+x = randn(1000)
+y = rand(1000) .+ x
+association(GaussianMI(MIShannon()), x, y) # defaults to `MIShannon()`
+```
+
+### [Dedicated [`KraskovStögbauerGrassberger1`](@ref) estimator](@id example_MIShannon_KSG1)
+
+```@example mi_demonstration
+using CausalityTools
+x, y = rand(1000), rand(1000)
+association(KSG1(MIShannon(); k = 5), x, y)
+```
+
+### [Dedicated [`KraskovStögbauerGrassberger2`](@ref) estimator](@id example_MIShannon_KSG2)
+
+```@example mi_demonstration
+using CausalityTools
+x, y = rand(1000), rand(1000)
+association(KSG2(MIShannon(); k = 5), x, y)
+```
+
+### [Dedicated [`GaoKannanOhViswanath`](@ref) estimator](@id example_MIShannon_GaoKannanOhViswanath)
+
+```@example mi_demonstration
+using CausalityTools
+x, y = rand(1000), rand(1000)
+association(GaoKannanOhViswanath(MIShannon(); k = 10), x, y)
+```
+
+### [[`EntropyDecomposition`](@ref) + [`Kraskov`](@ref)](@id example_MIShannon_EntropyDecomposition_Kraskov)
+
+We can compute [`MIShannon`](@ref) by naively applying a [`DifferentialEntropyEstimator`](@ref).
+Note that this doesn't apply any bias correction.
+
+```@example mi_demonstration
+using CausalityTools
+x, y = rand(1000), rand(1000)
+association(EntropyDecomposition(MIShannon(), Kraskov(k = 3)), x, y)
+```
+
+
+### [[`EntropyDecomposition`](@ref) + [`BubbleSortSwaps`](@ref)](@id example_MIShannon_EntropyDecomposition_BubbleSortSwaps)
+
+We can also compute [`MIShannon`](@ref) by naively applying a [`DiscreteEntropyEstimator`](@ref).
+Note that this doesn't apply any bias correction.
+
+```@example mi_demonstration
+using CausalityTools
+x, y = rand(1000), rand(1000)
+disc = CodifyVariables(BubbleSortSwaps(m=5))
+hest = PlugIn(Shannon())
+association(EntropyDecomposition(MIShannon(), hest, disc), x, y)
+```
+
+
+### [[`EntropyDecomposition`](@ref) + [`Jackknife`](@ref) + [`ValueBinning`](@ref)](@id example_MIShannon_EntropyDecomposition_Jackknife_ValueBinning)
+
+A [`ValueBinning`](@ref) estimator can be used to bin the data and compute
+discrete Shannon mutual information.
+
+```@example mi_demonstration
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x = rand(rng, 50)
+y = rand(rng, 50)
+
+# Use the H3-estimation method with a discrete visitation frequency based 
+# probabilities estimator over a fixed grid covering the range of the data,
+# which is on [0, 1].
+discretization = CodifyVariables(ValueBinning(FixedRectangularBinning(0, 1, 5)))
+hest = Jackknife(Shannon())
+est = EntropyDecomposition(MIShannon(), hest, discretization)
+association(est, x, y)
+```
+
+## [`MIRenyiJizba`](@ref)
+
+### [[`JointProbabilities`](@ref) + [`UniqueElements`](@ref)](@id example_MIRenyiJizba_JointProbabilities_UniqueElements)
+
+[`MIRenyiJizba`](@ref) can be estimated for categorical data using [`JointProbabilities`](@ref) estimator
+with the [`UniqueElements`](@ref) outcome space.
+
+```@example example_mirenyijizba
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x = rand(rng, ["a", "b", "c"], 200);
+y = rand(rng, ["hello", "yoyo", "heyhey"], 200);
+est = JointProbabilities(MIRenyiJizba(), UniqueElements())
+association(est, x, y)
+```
+
+### [[`EntropyDecomposition`](@ref) + [`LeonenkoProzantoSavani`](@ref)](@id example_MIRenyiJizba_JointProbabilities_LeonenkoProzantoSavani)
+
+[`MIRenyiJizba`](@ref) can also estimated for numerical data using [`EntropyDecomposition`](@ref)
+in combination with any [`DifferentialInfoEstimator`](@ref) capable of estimating differential 
+[`Renyi`](@ref) entropy.
+
+```@example example_MIRenyiJizba
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x = randn(rng, 50); y = randn(rng, 50);
+def = MIRenyiJizba()
+est_diff = EntropyDecomposition(def, LeonenkoProzantoSavani(Renyi(), k=3))
+association(est_diff, x, y) 
+```
+
+### [[`EntropyDecomposition`](@ref) + [`LeonenkoProzantoSavani`](@ref)](@id example_MIRenyiJizba_EntropyDecomposition_ValueBinning)
+
+[`MIRenyiJizba`](@ref) can also estimated for numerical data using [`EntropyDecomposition`](@ref)
+in combination with any [`DiscreteInfoEstimator`](@ref) capable of estimating differential 
+[`Renyi`](@ref) entropy over some [`OutcomeSpace`](@ref), e.g. [`ValueBinning`](@ref).
+
+
+```@example example_MIRenyiJizba
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x = randn(rng, 50); y = randn(rng, 50);
+def = MIRenyiJizba()
+
+disc = CodifyVariables(ValueBinning(2))
+est_disc = EntropyDecomposition(def, PlugIn(Renyi()), disc);
+association(est_disc, x, y)
+```
+
+## [`MIRenyiSarbu`](@ref)
+
+[`MIRenyiSarbu`](@ref) can be estimated using the [`JointProbabilities`](@ref) estimator 
+in combination with any [`CodifyVariables`](@ref) or [`CodifyPoints`](@ref) discretization scheme.
+
+### [[`JointProbabilities`](@ref) + [`UniqueElements`](@ref)](@id example_MIRenyiSarbu_JointProbabilities_UniqueElements)
+
+```@example example_MIRenyiSarbu
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x = rand(rng, ["a", "b", "c"], 200)
+y = rand(rng, ["hello", "yoyo", "heyhey"], 200)
+
+est = JointProbabilities(MIRenyiSarbu(), CodifyVariables(UniqueElements()))
+association(est, x, y)
+```
+
+### [[`JointProbabilities`](@ref) + [`CosineSimilarityBinning`](@ref)](@id example_MIRenyiSarbu_JointProbabilities_CosineSimilarityBinning)
+
+```@example example_MIRenyiSarbu
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x = rand(rng, 200)
+y = rand(rng, 200)
+
+est = JointProbabilities(MIRenyiSarbu(), CodifyVariables(CosineSimilarityBinning()))
+association(est, x, y)
+```
+
+## [`MITsallisFuruichi`](@ref)
+
+### [[`JointProbabilities`](@ref) + [`UniqueElements`](@ref)](@id example_MITsallisFuruichi_JointProbabilities_UniqueElements)
+
+[`MITsallisFuruichi`](@ref) can be estimated using the [`JointProbabilities`](@ref) estimator 
+in combination with any [`CodifyVariables`](@ref) or [`CodifyPoints`](@ref) discretization scheme.
+
+```@example example_MITsallisFuruichi
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x = rand(rng, 200)
+y = rand(rng, 200)
+
+est = JointProbabilities(MITsallisFuruichi(q = 0.3), UniqueElements())
+association(est, x, y) 
+```
+
+### [[`EntropyDecomposition`](@ref) + [`LeonenkoProzantoSavani`](@ref)](@id example_MITsallisFuruichi_EntropyDecomposition_LeonenkoProsantoSavani)
+
+```@example example_MITsallisFuruichi
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x = rand(rng, 200)
+y = rand(rng, 200)
+
+est_diff = EntropyDecomposition(MITsallisFuruichi(), LeonenkoProzantoSavani(Tsallis(q= 2)))
+association(est_diff, x, y)
+```
+
+
+### [[`EntropyDecomposition`](@ref) + [`Dispersion`](@ref)](@id example_MITsallisFuruichi_EntropyDecomposition_Dispersion)
+
+```@example example_MITsallisFuruichi
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x = rand(rng, 200)
+y = rand(rng, 200)
+disc = CodifyVariables(Dispersion())
+est_disc = EntropyDecomposition(MITsallisFuruichi(), PlugIn(Tsallis()), disc)
+
+association(est_disc, x, y)
+```
+
+
+## [`MITsallisMartin`](@ref)
+
+### [[`JointProbabilities`](@ref) + [`UniqueElements`](@ref)](@id example_MITsallisMartin_JointProbabilities_UniqueElements)
+
+```@example example_MITsallisMartin
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x = rand(rng, 200)
+y = rand(rng, 200)
+
+est = JointProbabilities(MITsallisMartin(q = 1.5), UniqueElements())
+association(est, x, y) 
+```
+
+### [[`EntropyDecomposition`](@ref) + [`LeonenkoProzantoSavani`](@ref)](@id example_MITsallisMartin_EntropyDecomposition_LeonenkoProsantoSavani)
+
+[`MITsallisMartin`](@ref) can be estimated using a decomposition into entropy 
+terms using [`EntropyDecomposition`](@ref) with any compatible estimator 
+that can estimate differential [`Tsallis`](@ref) entropy. 
+
+
+```@example example_MITsallisMartin
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x = rand(rng, 500)
+y = rand(rng, 500)
+
+est_diff = EntropyDecomposition(MITsallisMartin(), LeonenkoProzantoSavani(Tsallis(q= 1.5)))
+association(est_diff, x, y)
+```
+
+### [[`EntropyDecomposition`](@ref) + [`OrdinalPatterns`](@ref)](@id example_MITsallisMartin_EntropyDecomposition_OrdinalPatterns)
+
+
+```@example 
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x = rand(rng, 200)
+y = rand(rng, 200)
+disc = CodifyVariables(OrdinalPatterns())
+est_disc = EntropyDecomposition(MITsallisMartin(), PlugIn(Tsallis()), disc)
+
+association(est_disc, x, y)
+```
+
+## [`CMIShannon`](@ref)
+
+### [`CMIShannon`](@ref) with [`GaussianCMI`](@ref)
+
+```@example mi_demonstration
+using CausalityTools
+using Distributions
+using Statistics
+
+n = 1000
+# A chain X → Y → Z
+x = randn(1000)
+y = randn(1000) .+ x
+z = randn(1000) .+ y
+association(GaussianCMI(), x, z, y) # defaults to `CMIShannon()`
+```
+
+### [[`CMIShannon`](@ref) with [`FPVP`](@ref)](@id example_CMIShannon_FPVP)
+
+```@example mi_demonstration
+using CausalityTools
+using Distributions
+using Statistics
+
+n = 1000
+# A chain X → Y → Z
+x = rand(Normal(-1, 0.5), n)
+y = rand(BetaPrime(0.5, 1.5), n) .+ x
+z = rand(Chisq(100), n)
+z = (z ./ std(z)) .+ y
+
+# We expect zero (in practice: very low) CMI when computing I(X; Z | Y), because
+# the link between X and Z is exclusively through Y, so when observing Y,
+# X and Z should appear independent.
+association(FPVP(k = 5), x, z, y) # defaults to `CMIShannon()`
+```
+
+### [`CMIShannon`](@ref) with [`MesnerShalizi`](@ref)
+
+```@example mi_demonstration
+using CausalityTools
+using Distributions
+using Statistics
+using Random; rng = Xoshiro(1234)
+
+n = 1000
+# A chain X → Y → Z
+x = rand(rng, Normal(-1, 0.5), n)
+y = rand(rng, BetaPrime(0.5, 1.5), n) .+ x
+z = rand(rng, Chisq(100), n)
+z = (z ./ std(z)) .+ y
+
+# We expect zero (in practice: very low) CMI when computing I(X; Z | Y), because
+# the link between X and Z is exclusively through Y, so when observing Y,
+# X and Z should appear independent.
+association(MesnerShalizi(; k = 10), x, z, y) # defaults to `CMIShannon()`
+```
+
+### [`CMIShannon`](@ref) with [`Rahimzamani`](@ref)
+
+```@example mi_demonstration
+using CausalityTools
+using Distributions
+using Statistics
+using Random; rng = Xoshiro(1234)
+
+n = 1000
+# A chain X → Y → Z
+x = rand(rng, Normal(-1, 0.5), n)
+y = rand(rng, BetaPrime(0.5, 1.5), n) .+ x
+z = rand(rng, Chisq(100), n)
+z = (z ./ std(z)) .+ y
+
+# We expect zero (in practice: very low) CMI when computing I(X; Z | Y), because
+# the link between X and Z is exclusively through Y, so when observing Y,
+# X and Z should appear independent.
+association(Rahimzamani(CMIShannon(base = 10); k = 10), x, z, y)
+```
+
+## [`CMIRenyiPoczos`](@ref)
+
+### [[`PoczosSchneiderCMI`](@ref)](@id CMIRenyiPoczos_PoczosSchneiderCMI)
+
+```@example example_cmirenyipoczos
+using CausalityTools
+using Distributions
+using Statistics
+using Random; rng = Xoshiro(1234)
+
+n = 1000
+# A chain X → Y → Z
+x = rand(rng, Normal(-1, 0.5), n)
+y = rand(rng, BetaPrime(0.5, 1.5), n) .+ x
+z = rand(rng, Chisq(100), n)
+z = (z ./ std(z)) .+ y
+
+# We expect zero (in practice: very low) CMI when computing I(X; Z | Y), because
+# the link between X and Z is exclusively through Y, so when observing Y,
+# X and Z should appear independent.
+est = PoczosSchneiderCMI(CMIRenyiPoczos(base = 2, q = 1.2); k = 5)
+association(est, x, z, y)
+```
+
+In addition to the dedicated [`ConditionalMutualInformationEstimator`](@ref)s, any [`MutualInformationEstimator`](@ref) can also be used to compute conditional
+mutual information using the chain rule of mutual information. However, the naive
+application of these estimators don't perform any bias correction when
+taking the difference of mutual information terms.
+
+## [`CMIShannon`](@ref)
+
+### [[`MIDecomposition`](@ref) + [`KSG1`](@ref)](@id example_CMIShannon_MIDecomposition_KSG1)
+
+```@example mi_demonstration
+using CausalityTools
+using Distributions
+using Statistics
+
+n = 1000
+# A chain X → Y → Z
+x = rand(Normal(-1, 0.5), n)
+y = rand(BetaPrime(0.5, 1.5), n) .+ x
+z = rand(Chisq(100), n)
+z = (z ./ std(z)) .+ y
+
+# We expect zero (in practice: very low) CMI when computing I(X; Z | Y), because
+# the link between X and Z is exclusively through Y, so when observing Y,
+# X and Z should appear independent.
+est = MIDecomposition(CMIShannon(base = 2), KSG1(k = 10))
+association(est, x, z, y)
+```
+
+### [[`EntropyDecomposition`](@ref) + [`Kraskov`](@ref)](@id example_CMIShannon_EntropyDecomposition_Kraskov)
+
+Any [`DifferentialEntropyEstimator`](@ref) can also be used to compute conditional
+mutual information using a sum of entropies. For that, we 
+usethe [`EntropyDecomposition`](@ref) estimator. No bias correction is applied for 
+[`EntropyDecomposition`](@ref) either.
+
+```@example
+using CausalityTools
+using Distributions
+using Random; rng = Xoshiro(1234)
+n = 500
+# A chain X → Y → Z
+x = rand(rng, Epanechnikov(0.5, 1.0), n)
+y = rand(rng, Normal(0, 0.2), n) .+ x
+z = rand(rng, FDist(3, 2), n)
+est = EntropyDecomposition(CMIShannon(), Kraskov(k = 5))
+association(est, x, z, y)
+```
+
+Any [`DiscreteInfoEstimator`](@ref) that computes entropy can also be used to compute
+conditional mutual information using a sum of entropies. For that, we also
+use [`EntropyDecomposition`](@ref). In the discrete case, we also have to specify a
+discretization (an [`OutcomeSpace`](@ref)).
+
+### [[`EntropyDecomposition`](@ref) + [`ValueBinning`](@ref)](@id example_CMIShannon_EntropyDecomposition_ValueBinning)
+
+```@example
+using CausalityTools
+using Distributions
+using Random; rng = Xoshiro(1234)
+n = 500
+# A chain X → Y → Z
+x = rand(rng, Epanechnikov(0.5, 1.0), n)
+y = rand(rng, Normal(0, 0.2), n) .+ x
+z = rand(rng, FDist(3, 2), n)
+discretization = CodifyVariables(ValueBinning(RectangularBinning(5)))
+hest = PlugIn(Shannon())
+est = EntropyDecomposition(CMIShannon(), hest, discretization)
+association(est, x, y, z)
+```
+
+## [`CMIRenyiJizba`](@ref)
+
+### [[`JointProbabilities`](@ref) + [`BubbleSortSwaps`](@ref)](@id example_CMIRenyiJizba_JointProbabilities_BubbleSortSwaps)
+
+```@example example_CMIRenyiJizba
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x = rand(rng, 100)
+y = x .+ rand(rng, 100)
+z = y .+ rand(rng, 100)
+disc = CodifyVariables(BubbleSortSwaps(m = 4))
+est = JointProbabilities(CMIRenyiJizba(), disc)
+association(est, x, z, y)
+```
+
+
+### [[`EntropyDecomposition`](@ref) + [`LeonenoProsantoSavani`](@ref)](@id example_CMIRenyiJizba_EntropyDecomposition_LeonenkoProzantoSavani)
+
+```@example example_CMIRenyiJizba
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x, y, z = rand(rng, 1000), rand(rng, 1000), rand(rng, 1000)
+def = CMIRenyiJizba(q = 1.5)
+
+# Using a differential Rényi entropy estimator
+est = EntropyDecomposition(def, LeonenkoProzantoSavani(Renyi(), k = 10))
+association(est, x, y, z)
+```
+
+
+### [[`EntropyDecomposition`](@ref) + [`OrdinalPatterns`](@ref)](@id example_CMIRenyiJizba_EntropyDecomposition_OrdinalPatterns)
+
+```@example example_CMIRenyiJizba
+using CausalityTools
+using Random; rng = Xoshiro(1234)
+x, y, z = rand(rng, 1000), rand(rng, 1000), rand(rng, 1000)
+def = CMIRenyiJizba(q = 1.5)
+
+# Using a plug-in Rényi entropy estimator, discretizing using ordinal patterns.
+est = EntropyDecomposition(def, PlugIn(Renyi()), CodifyVariables(OrdinalPatterns(m=2)), RelativeAmount())
+association(est, x, y, z)
+```
+
+## [`TEShannon`](@ref)
+
+### [[`EntropyDecomposition`](@ref) + [`TransferOperator`](@ref)](@id example_TEShannon_EntropyDecomposition_TransferOperator)
+
+For transfer entropy examples, we'll construct some time series for which 
+there is time-delayed forcing between variables.
+
+```@example transfer_entropy_examples
+
+using CausalityTools
+using DynamicalSystemsBase
+using StableRNGs
+rng = StableRNG(123)
+
+Base.@kwdef struct Logistic4Chain{V, RX, RY, RZ, RW, C1, C2, C3, Σ1, Σ2, Σ3, RNG}
+    xi::V = [0.1, 0.2, 0.3, 0.4]
+    rx::RX = 3.9
+    ry::RY = 3.6
+    rz::RZ = 3.6
+    rw::RW = 3.8
+    c_xy::C1 = 0.4
+    c_yz::C2 = 0.4
+    c_zw::C3 = 0.35
+    σ_xy::Σ1 = 0.05
+    σ_yz::Σ2 = 0.05
+    σ_zw::Σ3 = 0.05
+    rng::RNG = Random.default_rng()
+end
+
+function eom_logistic4_chain(u, p::Logistic4Chain, t)
+    (; xi, rx, ry, rz, rw, c_xy, c_yz, c_zw, σ_xy, σ_yz, σ_zw, rng) = p
+    x, y, z, w = u
+    f_xy = (y +  c_xy*(x + σ_xy * rand(rng)) ) / (1 + c_xy*(1+σ_xy))
+    f_yz = (z +  c_yz*(y + σ_yz * rand(rng)) ) / (1 + c_yz*(1+σ_yz))
+    f_zw = (w +  c_zw*(z + σ_zw * rand(rng)) ) / (1 + c_zw*(1+σ_zw))
+    dx = rx * x * (1 - x)
+    dy = ry * (f_xy) * (1 - f_xy)
+    dz = rz * (f_yz) * (1 - f_yz)
+    dw = rw * (f_zw) * (1 - f_zw)
+    return SVector{4}(dx, dy, dz, dw)
+end
+
+function system(definition::Logistic4Chain)
+    return DiscreteDynamicalSystem(eom_logistic4_chain, definition.xi, definition)
+end
+
+# An example system where `X → Y → Z → W`.
+sys = system(Logistic4Chain(; rng))
+x, y, z, w = columns(first(trajectory(sys, 300, Ttr = 10000)))
+
+precise = true # precise bin edges
+discretization = CodifyVariables(TransferOperator(RectangularBinning(2, precise))) #
+est_disc_to = EntropyDecomposition(TEShannon(), PlugIn(Shannon()), discretization);
+association(est_disc_to, x, y), association(est_disc_to, y, x)
+```
+
+The Shannon-type transfer entropy from `x` to `y` is stronger than from `y` to `x`,
+which is what we expect if `x` drives `y`.
+
+```@example transfer_entropy_examples
+association(est_disc_to, x, z), association(est_disc_to, x, z, y)
+```
+
+The Shannon-type transfer entropy from `x` to `z` is stronger than the transfer entropy from `x` to `z` given `y`. This is expected, because `x` drives `z` *through*
+`y`, so "conditioning away" the effect of `y` should decrease the estimated 
+information transfer.
+
+### [[`SymbolicTransferEntropy`](@ref) estimator](@id example_TEShannon_SymbolicTransferEntropy)
+
+The [`SymbolicTransferEntropy`](@ref) estimator is just a convenience wrapper which utilizes
+[`CodifyVariables`](@ref)with the [`OrdinalPatterns`](@ref) outcome space to 
+discretize the input time series before computing transfer entropy.
+
+We'll use coupled time series from the `logistic4` system above, where `x → y → z → w`.
+Thus, we expect that the association for the direction `x → y` is larger than for `y → x`. We also expect an association `x → z`, but the association should weaken when conditioning 
+on the intermediate value `y`.
+
+```@example transfer_entropy_examples
+using CausalityTools
+using DynamicalSystemsBase
+using Random; rng = Xoshiro(1234)
+sys = system(Logistic4Chain(; rng))
+x, y, z, w = columns(first(trajectory(sys, 300, Ttr = 10000)))
+est = SymbolicTransferEntropy(m = 5)
+association(est, x, y), association(est, y, x), association(est, x, z), association(est, x, z, y)
+```
+
+## [`TERenyiJizba`](@ref)
+
+### [[`EntropyDecomposition`](@ref) + [`TransferOperator`](@ref)](@id example_TERenyiJizba_EntropyDecomposition_TransferOperator)
+
+We can perform the same type of analysis as above using [`TERenyiJizba`](@ref)
+instead of [`TEShannon`](@ref).
+
+```@example transfer_entropy_examples
+using CausalityTools
+using DynamicalSystemsBase
+using StableRNGs; rng = StableRNG(123)
+
+# An example system where `X → Y → Z → W`.
+sys = system(Logistic4Chain(; rng))
+x, y, z, w = columns(first(trajectory(sys, 300, Ttr = 10000)))
+
+precise = true # precise bin edges
+discretization = CodifyVariables(TransferOperator(RectangularBinning(2, precise))) #
+est_disc_to = EntropyDecomposition(TERenyiJizba(), PlugIn(Renyi()), discretization);
+association(est_disc_to, x, y), association(est_disc_to, y, x)
+```
+
+## [`ConvergentCrossMapping`](@ref)
+
+
+### [[`RandomVectors`](@ref) estimator](@id example_ConvergentCrossMapping_RandomVectors)
+
+When cross-mapping with the [`RandomVectors`](@ref) estimator, a single random subsample
+of time indices (i.e. not in any particular order) of length `l` is drawn for each library
+size `l`, and cross mapping is performed using the embedding vectors corresponding
+to those time indices.
+
+```@example example_ConvergentCrossMapping
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x, y = randn(rng, 200), randn(rng, 200)
+
+# We'll draw a single sample at each `l ∈ libsizes`. Sampling with replacement is then
+# necessary, because our 200-pt timeseries will result in embeddings with
+# less than 200 points.
+est = RandomVectors(ConvergentCrossMapping(d = 3); libsizes = 50:25:200, replace = true, rng)
+crossmap(est, x, y)
+```
+
+To generate a distribution of cross-map estimates for each `l ∈ libsizes`, just call
+crossmap repeatedly, e.g.
+
+```@example example_ConvergentCrossMapping
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+using Statistics
+
+x, y = randn(rng, 300), randn(rng, 300)
+def = ConvergentCrossMapping(d = 3)
+libsizes = 25:25:200
+
+ρs = [[crossmap(RandomVectors(def; libsizes = L, replace = true, rng), x, y) for i = 1:50] for L in libsizes]
+
+using CairoMakie
+f = Figure(); ax = Axis(f[1, 1]);
+plot!(ax, libsizes, mean.(ρs))
+errorbars!(ax, libsizes, mean.(ρs), std.(ρs))
+f
+```
+
+Now, the `k`-th element of `ρs` contains `80` estimates of the correspondence measure `ρ`
+at library size `libsizes[k]`.
+
+###  [[`RandomSegment`](@ref) estimator](@id example_ConvergentCrossMapping_RandomSegment)
+
+When cross-mapping with the [`RandomSegment`](@ref) estimator, a single random subsample
+of continguous, ordered time indices of length `l` is drawn for each library
+size `l`, and cross mapping is performed using the embedding vectors corresponding
+to those time indices.
+
+```@example example_ConvergentCrossMapping
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x, y = randn(rng, 200), randn(rng, 200)
+
+# We'll draw a single sample at each `l ∈ libsizes`. We limit the library size to 100, 
+# because drawing segments of the data longer than half the available data doesn't make
+# much sense.
+est = RandomSegment(ConvergentCrossMapping(d = 3); libsizes = 50:25:100, rng)
+crossmap(est, x, y)
+```
+
+As above, to generate a distribution of cross-map estimates for each `l ∈ libsizes`, just call
+crossmap repeatedly, e.g.
+
+```@example example_ConvergentCrossMapping
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+using Statistics
+
+x, y = randn(rng, 200), randn(rng, 200)
+def = ConvergentCrossMapping(d = 3)
+libsizes = 25:25:100
+
+ρs = [[crossmap(RandomSegment(def; libsizes = L, rng), x, y) for i = 1:50] for L in libsizes]
+
+f = Figure(); ax = Axis(f[1, 1]);
+plot!(ax, libsizes, mean.(ρs))
+errorbars!(ax, libsizes, mean.(ρs), std.(ρs))
+f
+```
+
+
+## [`PairwiseAsymmetricInference`](@ref)
+
+We repeat the analyses above, but here use the pairwise asymmetric inference algorithm
+instead of the convergent cross map algorithm.
+
+### [[`RandomVectors`](@ref) estimator](@id example_PairwiseAsymmetricInference_RandomVectors)
+
+
+```@example example_PairwiseAsymmetricInference
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x, y = randn(rng, 300), randn(rng, 300)
+
+# We'll draw a single sample at each `l ∈ libsizes`. Sampling with replacement is then
+# necessary, because our 200-pt timeseries will result in embeddings with
+# less than 200 points.
+est = RandomVectors(PairwiseAsymmetricInference(d = 3); libsizes = 50:25:200, replace = true, rng)
+crossmap(est, x, y)
+```
+
+To generate a distribution of cross-map estimates for each `l ∈ libsizes`, just call
+crossmap repeatedly, e.g.
+
+```@example example_PairwiseAsymmetricInference
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+using Statistics
+
+x, y = randn(rng, 300), randn(rng,300)
+def = PairwiseAsymmetricInference(d = 3)
+libsizes = 25:25:200
+
+ρs = [[crossmap(RandomVectors(def; libsizes = L, replace = true, rng), x, y) for i = 1:50] for L in libsizes]
+
+using CairoMakie
+f = Figure(); ax = Axis(f[1, 1]);
+plot!(ax, libsizes, mean.(ρs))
+errorbars!(ax, libsizes, mean.(ρs), std.(ρs))
+f
+```
+
+### [[`RandomSegment`](@ref) estimator](@id example_PairwiseAsymmetricInference_RandomSegment)
+
+```@example example_PairwiseAsymmetricInference
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x, y = randn(rng, 200), randn(rng, 200)
+
+# We'll draw a single sample at each `l ∈ libsizes`. We limit the library size to 100, 
+# because drawing segments of the data longer than half the available data doesn't make
+# much sense.
+est = RandomSegment(PairwiseAsymmetricInference(d = 3); libsizes = 50:25:100, rng)
+crossmap(est, x, y)
+```
+
+As above, to generate a distribution of cross-map estimates for each `l ∈ libsizes`, just call
+crossmap repeatedly, e.g.
+
+```@example
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+using Statistics
+
+x, y = randn(rng, 300), randn(rng, 300)
+def = PairwiseAsymmetricInference(d = 3)
+libsizes = 25:25:100
+
+ρs = [[crossmap(RandomSegment(def; libsizes = L, rng), x, y) for i = 1:50] for L in libsizes]
+
+using CairoMakie
+f = Figure(); ax = Axis(f[1, 1]);
+plot!(ax, libsizes, mean.(ρs))
+errorbars!(ax, libsizes, mean.(ρs), std.(ρs))
+f
+```
+
+## [[`MCR`](@ref)](@id example_MCR)
+
+To quantify association by the mean conditional probability of recurrence (MCR),
+we'll create a chain of variables where `X` drives `Y`, which in turn drives 
+`Z`. We then expect there to be significant detectable association between both
+`X` and `Y`, `Y` and `Z` and also `X` and `Z` (because `Y` transfers information
+from `X` to `Z`. We expect the association between `X` and `Z` to disappear when
+conditioning on `Y` (since we're then "removing the effect" of `Y`).
+
+```@example example_mcr
+using CausalityTools
+using Random; rng = Xoshiro(1234);
+x = rand(rng, 300); y = rand(rng, 300) .* sin.(x); z = rand(rng, 300) .* y;
+est = MCR(r = 0.5)
+association(est, x, y), association(est, x, z), association(est, y, z), association(est, x, z, y)
+```
+
+## [[`RMCD`](@ref)](@id example_RMCD)
+
+To quantify association by the recurrence measure of conditional dependence (RMCD),
+we'll create a chain of variables where `X` drives `Y`, which in turn drives 
+`Z`. We then expect there to be significant detectable association between both
+`X` and `Y`, `Y` and `Z` and also `X` and `Z` (because `Y` transfers information
+from `X` to `Z`. We expect the association between `X` and `Z` to disappear when
+conditioning on `Y` (since we're then "removing the effect" of `Y`).
+
+```@example example_mcr
+using CausalityTools
+using Random; rng = Xoshiro(1234);
+x = rand(rng, 300); y = rand(rng, 300) .* sin.(x); z = rand(rng, 300) .* y;
+est = RMCD(r = 0.5)
+association(est, x, y), association(est, x, z), association(est, x, z, y)
+```
\ No newline at end of file
diff --git a/docs/src/examples/examples_closeness.md b/docs/src/examples/examples_closeness.md
deleted file mode 100644
index fe975d0db..000000000
--- a/docs/src/examples/examples_closeness.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# Closeness measures
-
-## [S-measure](@id quickstart_smeasure)
-
-### Computing the `s`-statistic
-
-```@example quickstart_smeasure
-using CausalityTools
-x, y = randn(3000), randn(3000)
-measure = SMeasure(dx = 3, dy = 3)
-s = s_measure(measure, x, y)
-```
-
-The `s` statistic is larger when there is stronger coupling and smaller
-when there is weaker coupling. To check whether `s` is significant (i.e. large
-enough to claim directional dependence), we can use a [`SurrogateTest`](@ref),
-like [here](@ref examples_surrogatetest_smeasure).
-
-## [H-measure](@id quickstart_hmeasure)
-
-### Computing the `h`-statistic
-
-```@example quickstart_hmeasure
-using CausalityTools
-x, y = randn(3000), randn(3000)
-measure = HMeasure(dx = 3, dy = 3)
-h = h_measure(measure, x, y)
-```
-
-## [M-measure](@id quickstart_mmeasure)
-
-### Computing the `m`-statistic
-
-```@example quickstart_mmeasure
-using CausalityTools
-x, y = randn(3000), randn(3000)
-measure = MMeasure(dx = 3, dy = 3)
-m = m_measure(measure, x, y)
-```
-
-## [L-measure](@id quickstart_mmeasure)
-
-### Computing the `l`-statistic
-
-```@example quickstart_lmeasure
-using CausalityTools
-x, y = randn(3000), randn(3000)
-measure = LMeasure(dx = 3, dy = 3)
-l = l_measure(measure, x, y)
-```
-
-## [Joint distance distribution](@id quickstart_jdd)
-
-### Computing the `Δ`-distribution
-
-```@example quickstart_jdd
-using CausalityTools
-x, y = randn(3000), randn(3000)
-measure = JointDistanceDistribution(D = 3, B = 5)
-Δ = jdd(measure, x, y)
-```
-
-The joint distance distribution measure indicates directional coupling between
-`x` and `y` if `Δ` is skewed towards positive values. We can use a [`JointDistanceDistributionTest`](@ref) to formally check this.
-
-```@example quickstart_jdd
-test = JointDistanceDistributionTest(measure)
-independence(test, x, y)
-```
-
-The p-value is fairly low, and depending on the significance level `1 - α`, we cannot
-reject the null hypothesis that `Δ` is not skewed towards positive values, and hence
-we cannot reject that the variables are independent.
diff --git a/docs/src/examples/examples_conditional_entropy.md b/docs/src/examples/examples_conditional_entropy.md
deleted file mode 100644
index a6e4de34f..000000000
--- a/docs/src/examples/examples_conditional_entropy.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# [Conditional entropy](@id examples_condentropy)
-
-## Discrete: example from Cover & Thomas
-
-This is essentially example 2.2.1 in Cover & Thomas (2006), where they use the following
-contingency table as an example. We'll take their example and manually construct
-a [`ContingencyMatrix`](@ref) that we can use to compute the conditional entropy.
-The [`ContingencyMatrix`](@ref) constructor takes the probabilities as the
-first argument and the raw frequencies as the second argument.
-Note also that Julia is column-major, so we need to transpose their example. Then their
-`X` is in the first dimension of our contingency matrix (along columns) and their `Y` is
-our second dimension (rows).
-
-```@example ce_contingency_table
-using CausalityTools
-freqs_yx = [1//8 1//16 1//32 1//32; 
-    1//16 1//8  1//32 1//32;
-    1//16 1//16 1//16 1//16; 
-    1//4  0//1  0//1  0//1];
-freqs_xy = transpose(freqs_yx);
-probs_xy = freqs_xy ./ sum(freqs_xy)
-c_xy = ContingencyMatrix(probs_xy, freqs_xy)
-```
-
-The marginal distribution for `x` (first dimension) is
-
-```@example ce_contingency_table
-probabilities(c_xy, 1)
-```
-
-The marginal distribution for `y` (second dimension) is
-
-```@example ce_contingency_table
-probabilities(c_xy, 2)
-```
-
-And the Shannon conditional entropy ``H^S(X | Y)``
-
-```@example ce_contingency_table
-ce_x_given_y = entropy_conditional(CEShannon(), c_xy) |> Rational
-```
-
-This is the same as in their example. Hooray! To compute ``H^S(Y | X)``, we just need to
-flip the contingency matrix.
-
-```@example ce_contingency_table
-probs_yx = freqs_yx ./ sum(freqs_yx);
-c_yx = ContingencyMatrix(probs_yx, freqs_yx);
-ce_y_given_x = entropy_conditional(CEShannon(), c_yx) |> Rational
-```
diff --git a/docs/src/examples/examples_conditional_mutual_information.md b/docs/src/examples/examples_conditional_mutual_information.md
deleted file mode 100644
index 9222f4477..000000000
--- a/docs/src/examples/examples_conditional_mutual_information.md
+++ /dev/null
@@ -1,171 +0,0 @@
-# [Conditional mutual information](@id quickstart_mutualinfo)
-
-## [`CMIShannon`](@ref)
-
-### Estimation using [`ConditionalMutualInformationEstimator`](@ref)s
-
-When estimated using a [`ConditionalMutualInformationEstimator`](@ref), some form of bias
-correction is usually applied. The [`FPVP`](@ref) estimator is a popular choice.
-
-#### [`CMIShannon`](@ref) with [`GaussianCMI`](@ref)
-
-```@example mi_demonstration
-using CausalityTools
-using Distributions
-using Statistics
-
-n = 1000
-# A chain X → Y → Z
-x = randn(1000)
-y = randn(1000) .+ x
-z = randn(1000) .+ y
-condmutualinfo(GaussianCMI(), x, z, y) # defaults to `CMIShannon()`
-```
-
-#### [`CMIShannon`](@ref) with [`FPVP`](@ref)
-
-```@example mi_demonstration
-using CausalityTools
-using Distributions
-using Statistics
-
-n = 1000
-# A chain X → Y → Z
-x = rand(Normal(-1, 0.5), n)
-y = rand(BetaPrime(0.5, 1.5), n) .+ x
-z = rand(Chisq(100), n)
-z = (z ./ std(z)) .+ y
-
-# We expect zero (in practice: very low) CMI when computing I(X; Z | Y), because
-# the link between X and Z is exclusively through Y, so when observing Y,
-# X and Z should appear independent.
-condmutualinfo(FPVP(k = 5), x, z, y) # defaults to `CMIShannon()`
-```
-
-#### [`CMIShannon`](@ref) with [`MesnerShalizi`](@ref)
-
-```@example mi_demonstration
-using CausalityTools
-using Distributions
-using Statistics
-
-n = 1000
-# A chain X → Y → Z
-x = rand(Normal(-1, 0.5), n)
-y = rand(BetaPrime(0.5, 1.5), n) .+ x
-z = rand(Chisq(100), n)
-z = (z ./ std(z)) .+ y
-
-# We expect zero (in practice: very low) CMI when computing I(X; Z | Y), because
-# the link between X and Z is exclusively through Y, so when observing Y,
-# X and Z should appear independent.
-condmutualinfo(MesnerShalizi(k = 10), x, z, y) # defaults to `CMIShannon()`
-```
-
-#### [`CMIShannon`](@ref) with [`Rahimzamani`](@ref)
-
-```@example mi_demonstration
-using CausalityTools
-using Distributions
-using Statistics
-
-n = 1000
-# A chain X → Y → Z
-x = rand(Normal(-1, 0.5), n)
-y = rand(BetaPrime(0.5, 1.5), n) .+ x
-z = rand(Chisq(100), n)
-z = (z ./ std(z)) .+ y
-
-# We expect zero (in practice: very low) CMI when computing I(X; Z | Y), because
-# the link between X and Z is exclusively through Y, so when observing Y,
-# X and Z should appear independent.
-condmutualinfo(CMIShannon(base = 10), Rahimzamani(k = 10), x, z, y)
-```
-
-#### [`CMIRenyiPoczos`](@ref) with [`PoczosSchneiderCMI`](@ref)
-
-```@example mi_demonstration
-using CausalityTools
-using Distributions
-using Statistics
-
-n = 1000
-# A chain X → Y → Z
-x = rand(Normal(-1, 0.5), n)
-y = rand(BetaPrime(0.5, 1.5), n) .+ x
-z = rand(Chisq(100), n)
-z = (z ./ std(z)) .+ y
-
-# We expect zero (in practice: very low) CMI when computing I(X; Z | Y), because
-# the link between X and Z is exclusively through Y, so when observing Y,
-# X and Z should appear independent.
-condmutualinfo(CMIRenyiPoczos(base = 2, q = 1.2), PoczosSchneiderCMI(k = 5), x, z, y)
-```
-
-### Estimation using [`MutualInformationEstimator`](@ref)s
-
-Any [`MutualInformationEstimator`](@ref) can also be used to compute conditional
-mutual information using the chain rule of mutual information. However, the naive
-application of these estimators don't perform any bias correction when
-taking the difference of mutual information terms.
-
-#### [`CMIShannon`](@ref) with [`KSG1`](@ref)
-
-```@example mi_demonstration
-using CausalityTools
-using Distributions
-using Statistics
-
-n = 1000
-# A chain X → Y → Z
-x = rand(Normal(-1, 0.5), n)
-y = rand(BetaPrime(0.5, 1.5), n) .+ x
-z = rand(Chisq(100), n)
-z = (z ./ std(z)) .+ y
-
-# We expect zero (in practice: very low) CMI when computing I(X; Z | Y), because
-# the link between X and Z is exclusively through Y, so when observing Y,
-# X and Z should appear independent.
-condmutualinfo(CMIShannon(base = 2), KSG1(k = 5), x, z, y)
-```
-
-### Estimation using [`DifferentialEntropyEstimator`](@ref)s
-
-Any [`DifferentialEntropyEstimator`](@ref) can also be used to compute conditional
-mutual information using a sum of entropies. However, the naive
-application of these estimators don't perform any bias application when
-taking the sum of entropy terms.
-
-#### [`CMIShannon`](@ref) with [`Kraskov`](@ref)
-
-```@example
-using CausalityTools
-using Distributions
-n = 1000
-# A chain X → Y → Z
-x = rand(Epanechnikov(0.5, 1.0), n)
-y = rand(Erlang(1), n) .+ x
-z = rand(FDist(5, 2), n)
-condmutualinfo(CMIShannon(), Kraskov(k = 5), x, z, y)
-```
-
-### Estimation using [`ProbabilitiesEstimator`](@ref)s
-
-Any [`ProbabilitiesEstimator`](@ref) can also be used to compute conditional
-mutual information using a sum of entropies. However, the naive
-application of these estimators don't perform any bias application when
-taking the sum of entropy terms.
-
-#### [`CMIShannon`](@ref) with [`ValueHistogram`](@ref)
-
-```@example
-using CausalityTools
-using Distributions
-n = 1000
-# A chain X → Y → Z
-x = rand(Epanechnikov(0.5, 1.0), n)
-y = rand(Erlang(1), n) .+ x
-z = rand(FDist(5, 2), n)
-est = ValueHistogram(RectangularBinning(5))
-condmutualinfo(CMIShannon(), est, x, z, y), condmutualinfo(CMIShannon(), est, x, y, z)
-```
diff --git a/docs/src/examples/examples_entropy.md b/docs/src/examples/examples_entropy.md
deleted file mode 100644
index 0cbdf6f12..000000000
--- a/docs/src/examples/examples_entropy.md
+++ /dev/null
@@ -1,180 +0,0 @@
-# [Entropy](@id examples_entropy)
-
-## Differential entropy: estimator comparison
-
-Here, we'll test the different nearest-neighbor based differential entropy estimators on a three-dimensional normal distribution
-$\mathcal{N} (\mu, \Sigma)$ with zero means and covariance matrix $\Sigma = diag(r_1, r_2, r_3)$ with $r_1 = r_2 = r_3 = 0.5$. 
-The analytical entropy for multivariate Gaussian is $H(\mathcal{N} (\mu, \Sigma)) = \dfrac{1}{2}\log(\det(2\pi e \Sigma))$. In our case, $\Sigma$ is diagonal, so $\det(\Sigma) = (0.5)^3$ and $H = 0.5\log(2\pi e (0.5)^3)\approx 3.217$.
-
-Several of these estimators have been shown to convergence to the true entropy with an increasing number of samples. Therefore, we test the 
-estimators on samples of increasing size $N$, where $N$ ranges from
-1000 to 30000. Since we're estimating entropy from *samples* of
-a normal distribution, we don't expect the estimates to perfectly match the analytical entropy every time.
-On *average*, however, they should hit the target when the sample size
-gets large enough.
-
-### Analytical and estimated entropies
-
-We'll first make two helper functions.
-
-- **`analytical_entropy(estimators, Ls; d::Int, r, base = 2)`**: Computes the analytical  
-    Shannon differential entropy to the given `base` of a multivariate normal distribution
-    with covariance matrix with diagonal elements `r` and zeros on the off-diagonal.
-    Does so for each of the given `estimators` for each
-    sample size in `Ls`.
-- **`mvnormal_entropies(; d::Int, r, base = 2, kwargs...)`**: Estimates  the Shannon
-     entropy to the given `base` of samples from a multivariate normal distribution as
-    specified as above.
-
-```@example ex_entropy_estimators
-using CausalityTools
-using Distributions: MvNormal
-using LinearAlgebra
-using Statistics: quantile
-using Random; rng = MersenneTwister(12345678)
-using CairoMakie
-
-analytical_entropy(; d::Int, r, base = 2) = 
-    0.5*log(det(2*pi*ℯ*diagm(repeat([r], d)))) / log(ℯ, base) # convert to desired base
-
-function mvnormal_entropies(estimators, Ls; 
-        d = 3,
-        base = 2,
-        nreps = 50,
-        r = 0.5,
-    )
-    μ = zeros(d)
-    Σ = diagm(repeat([r], d))
-    N = MvNormal(μ, Σ)    
-    Hs = [[zeros(nreps) for L in Ls] for est in estimators]
-    data = [StateSpaceSet([rand(rng, N) for i = 1:maximum(Ls)]) for i = 1:nreps]
-    for (e, est) in enumerate(estimators)
-        for (l, L) in enumerate(Ls)
-            for i = 1:nreps
-                Hs[e][l][i] = entropy(Shannon(; base), est, data[i][1:L])
-            end
-        end
-    end
-    return Hs
-end;
-```
-
-We'll also need a function to summarize the estimates.
-
-```@example ex_entropy_estimators
-# A helper to get the estimator name for plotting.
-getname(est::DifferentialEntropyEstimator) = typeof(est).name.name  |> string
-function medians_and_quantiles(Hs, Ls; q = 0.95)
-    medians = [zeros(length(Ls)) for est in estimators]
-    lb = [zeros(length(Ls)) for est in estimators]
-    ub = [zeros(length(Ls)) for est in estimators]
-
-    for (e, est) in enumerate(estimators)
-        for (l, L) in enumerate(Ls)
-            ĥs = Hs[e][l] # nreps estimates for this combinations of e and l
-            medians[e][l] = quantile(ĥs, 0.5)
-            lb[e][l] = quantile(ĥs, (1 - q) / 2)
-            ub[e][l] = quantile(ĥs, 1 - ((1 - q) / 2))
-        end
-    end
-
-    return medians, lb, ub
-end;
-```
-
-### Plotting utilities
-
-Now, make some plotting helper functions.
-
-```@example ex_entropy_estimators
-struct Cyclor{T} <: AbstractVector{T}
-    c::Vector{T}
-    n::Int
-end
-Cyclor(c) = Cyclor(c, 0)
-
-Base.length(c::Cyclor) = length(c.c)
-Base.size(c::Cyclor) = size(c.c)
-Base.iterate(c::Cyclor, state=1) = Base.iterate(c.c, state)
-Base.getindex(c::Cyclor, i) = c.c[(i-1)%length(c.c) + 1]
-Base.getindex(c::Cyclor, i::AbstractArray) = c.c[i]
-function Base.getindex(c::Cyclor)
-    c.n += 1
-    c[c.n]
-end
-Base.iterate(c::Cyclor, i = 1) = iterate(c.c, i)
-
-COLORSCHEME = [
-    "#D43F3AFF", "#EEA236FF", "#5CB85CFF", "#46B8DAFF",
-    "#357EBDFF", "#9632B8FF", "#B8B8B8FF",
-]
-
-COLORS = Cyclor(COLORSCHEME)
-LINESTYLES = Cyclor(string.(["--", ".-", ".", "--.", "---..."]))
-MARKERS = Cyclor(string.([:circle, :rect, :utriangle, :dtriangle, :diamond,
-    :pentagon, :cross, :xcross]))
-
-function plot_entropy_estimates(Hs, Ls, Htrue)
-    # Summarize data (medians[e][l]) is the median of the e-th estimator for the 
-    # l-th sample size).
-    medians, lbs, ubs = medians_and_quantiles(Hs, Ls);
-
-    fig = Figure(resolution = (800, 1000))
-    ymax = (vcat(Hs...) |> Iterators.flatten |> maximum) * 1.1
-    ymin = (vcat(Hs...) |> Iterators.flatten |> minimum) * 0.9
-
-    # We have 9 estimators, so place them on a 5-by-2 grid
-    positions = (Tuple(c) for c in CartesianIndices((5, 2)))
-    for (i, (est, c)) in enumerate(zip(estimators, positions))
-        ax = Axis(fig[first(c), last(c)],
-            xlabel = "Sample size (L)",
-            ylabel = "Ĥ (bits)",
-            title = getname(est)
-        )
-        ylims!(ax, (ymin, ymax))
-        # Ground truth
-        hlines!(ax, [Htrue], 
-            linestyle = :dash, 
-            color = :black,
-            linewidth = 2,
-        )
-        # Estimates
-        band!(ax, Ls, lbs[i], ubs[i], color = (COLORS[i], 0.5))
-        lines!(ax, Ls, medians[i], 
-            label = getname(est),
-            linestyle = LINESTYLES[i],
-            color = COLORS[i],
-            marker = MARKERS[i],
-            linewidth = 2
-        )
-    end
-    fig
-end;
-```
-
-### Results
-
-Now, we can finally run an ensemble of tests and plot the
-confidence bands against the ground truth. This
-
-```@example ex_entropy_estimators
-k = 4
-estimators = [
-    Kraskov(; k), 
-    KozachenkoLeonenko(), 
-    Gao(; k),
-    ZhuSingh(; k),
-    Zhu(; k),
-    Goria(; k),
-    LeonenkoProzantoSavani(; k),
-    Lord(; k = k*5)
-]
-
-Ls = [100:100:1000 |> collect; 2500:2500:5000 |> collect]
-d = 3
-r = 0.5
-nreps = 30
-Hs = mvnormal_entropies(estimators, Ls; d, r, nreps)
-Htrue = analytical_entropy(; d, r)
-plot_entropy_estimates(Hs, Ls, Htrue)
-```
diff --git a/docs/src/examples/examples_independence.md b/docs/src/examples/examples_independence.md
deleted file mode 100644
index f3a784ec9..000000000
--- a/docs/src/examples/examples_independence.md
+++ /dev/null
@@ -1,345 +0,0 @@
-# [Independence testing](@id examples_independence)
-
-## [[`JointDistanceDistributionTest`](@ref)](@id quickstart_jddtest)
-
-### Bidirectionally coupled logistic maps
-
-Let's use the built-in `logistic2_bidir` discrete dynamical system to create a pair of
-bidirectionally coupled time series and use the [`JointDistanceDistributionTest`](@ref)
-to see if we can confirm from observed time series that these variables are
-bidirectionally coupled. We'll use a significance level of `1 - α = 0.99`, i.e. `α = 0.01`.
-
-We start by generating some time series and configuring the test.
-
-```@example quickstart_jddtest_logistic
-using CausalityTools
-sys = system(Logistic2Bidir(c_xy = 0.5, c_yx = 0.4))
-x, y = columns(first(trajectory(sys, 2000, Ttr = 10000)))
-measure = JointDistanceDistribution(D = 5, B = 5)
-test = JointDistanceDistributionTest(measure)
-```
-
-Now, we test for independence in both directions.
-
-```@example quickstart_jddtest_logistic
-independence(test, x, y)
-```
-
-```@example quickstart_jddtest_logistic
-independence(test, y, x)
-```
-
-As expected, the null hypothesis is rejected in both directions at the pre-determined 
-significance level, and hence we detect directional coupling in both directions.
-
-### Non-coupled logistic maps
-
-What happens in the example above if there is no coupling?
-
-```@example quickstart_jddtest_logistic
-sys = system(Logistic2Bidir(c_xy = 0.00, c_yx = 0.0))
-x, y = columns(first(trajectory(sys, 1000, Ttr = 10000)));
-rxy = independence(test, x, y)
-ryx = independence(test, y, x)
-pvalue(rxy), pvalue(ryx)
-```
-
-At significance level `0.99`, we can't reject the null in either direction, hence there's not
-enough evidence in the data to suggest directional coupling.
-
-## [`LocalPermutationTest`](@ref)
-
-### [Conditional mutual information (Shannon, differential)](@id example_localpermtest_cmishannon)
-
-#### Chain of random variables $X \to Y \to Z$
-
-Here, we'll create a three-variable scenario where `X` and `Z` are connected through `Y`,
-so that ``I(X; Z | Y) = 0`` and ``I(X; Y | Z) > 0``. We'll test for conditional
-independence using Shannon conditional mutual information
-([`CMIShannon`](@ref)). To estimate CMI, we'll use the [`Kraskov`](@ref) differential
-entropy estimator, which naively computes CMI as a sum of entropy terms without guaranteed
-bias cancellation.
-
-```@example LOCAL_PERMUTATION_TEST_CMISHANNON
-using CausalityTools
-
-X = randn(1000)
-Y = X .+ randn(1000) .* 0.4
-Z = randn(1000) .+ Y
-x, y, z = StateSpaceSet.((X, Y, Z))
-test = LocalPermutationTest(CMIShannon(base = 2), Kraskov(k = 10), nshuffles = 30)
-test_result = independence(test, x, y, z)
-```
-
-We expect there to be a detectable influence from ``X`` to
-``Y``, if we condition on ``Z`` or not, because ``Z`` doesn't influence neither ``X`` nor ``Y``.
-The null hypothesis is that the first two variables are conditionally independent given the third, which we reject with a very low p-value. Hence, we accept the alternative
-hypothesis that the first two variables ``X`` and ``Y``. are conditionally *dependent* given ``Z``.
-
-```@example LOCAL_PERMUTATION_TEST_CMISHANNON
-test_result = independence(test, x, z, y)
-```
-
-As expected, we cannot reject the null hypothesis that ``X`` and ``Z`` are conditionally independent given ``Y``, because ``Y`` is the variable that transmits information from
-``X`` to ``Z``.
-
-### [Transfer entropy (Shannon, differential)](@id example_localpermtest_teshannon)
-
-#### Chain of random variables $X \to Y \to Z to W$
-
-Here, we demonstrate [`LocalPermutationTest`](@ref) with the [`TEShannon`](@ref) measure
-with default parameters and the [`FPVP`](@ref) estimator. We'll use a system
-of four coupled logistic maps that are linked `X → Y → Z → W`.
-
-```@example LOCAL_PERMUTATION_TEST_TESHANNON
-using CausalityTools
-using Random; rng = Random.default_rng()
-s = system(Logistic4Chain(; xi = rand(4)))
-x, y, z, w = columns(first(trajectory(s, 2000)))
-test = LocalPermutationTest(TEShannon(), FPVP(), nshuffles = 50)
-test_result = independence(test, x, z)
-```
-
-There is significant transfer entropy from `X → Z`. We should expect this transfer entropy
-to be non-significant when conditioning on `Y`, because all information from `X` to `Z`
-is transferred through `Y`.
-
-```@example LOCAL_PERMUTATION_TEST_TESHANNON
-test_result = independence(test, x, z, y)
-```
-
-As expected, we cannot reject the null hypothesis that `X` and `Z` are conditionally
-independent given `Y`.
-
-The same goes for variables one step up the chain
-
-```@example LOCAL_PERMUTATION_TEST_TESHANNON
-test_result = independence(test, y, w, z)
-```
-
-## [[`SurrogateTest`](@ref)](@id examples_surrogatetest)
-
-### [Distance correlation](@id examples_surrogatetest_distancecorrelation)
-
-```@example
-using CausalityTools
-x = randn(1000)
-y = randn(1000) .+ 0.5x
-independence(SurrogateTest(DistanceCorrelation()), x, y)
-```
-
-### [Partial correlation](@id examples_surrogatetest_partialcorrelation)
-
-```@example
-using CausalityTools
-x = randn(1000)
-y = randn(1000) .+ 0.5x
-z = randn(1000) .+ 0.8y
-independence(SurrogateTest(PartialCorrelation()), x, z, y)
-```
-
-### [Mutual information ([`MIShannon`](@ref), categorical)](@id examples_surrogatetest_mishannon_categorical)
-
-In this example, we expect the `preference` and the `food` variables to be independent.
-
-```@example
-using CausalityTools
-# Simulate 
-n = 1000
-preference = rand(["yes", "no"], n)
-food = rand(["veggies", "meat", "fish"], n)
-test = SurrogateTest(MIShannon(), Contingency())
-independence(test, preference, food)
-```
-
-As expected, there's not enough evidence to reject the null hypothesis that the
-variables are independent.
-
-### [Conditional mutual information ([`CMIShannon`](@ref), categorical)](@id examples_surrogatetest_cmishannon_categorical)
-
-Here, we simulate a survey at a ski resort. The data are such that the place a person
-grew up is associated with how many times they fell while going skiing. The control
-happens through an intermediate variable `preferred_equipment`, which indicates what
-type of physical activity the person has engaged with in the past. Some activities
-like skateboarding leads to better overall balance, so people that are good on
-a skateboard also don't fall, and people that to less challenging activities fall
-more often.
-
-We should be able to reject `places ⫫ experience`, but not reject
-`places ⫫ experience | preferred_equipment`.  Let's see if we can detect these
-relationships using (conditional) mutual information.
-
-```@example indep_cmi
-using CausalityTools
-n = 10000
-
-places = rand(["city", "countryside", "under a rock"], n);
-preferred_equipment = map(places) do place
-    if cmp(place, "city") == 1
-        return rand(["skateboard", "bmx bike"])
-    elseif cmp(place, "countryside") == 1
-        return rand(["sled", "snowcarpet"])
-    else
-        return rand(["private jet", "ferris wheel"])
-    end
-end;
-experience = map(preferred_equipment) do equipment
-    if equipment ∈ ["skateboard", "bmx bike"]
-        return "didn't fall"
-    elseif equipment ∈ ["sled", "snowcarpet"]
-        return "fell 3 times or less"
-    else
-        return "fell uncontably many times"
-    end
-end;
-
-test_mi = independence(SurrogateTest(MIShannon(), Contingency()), places, experience)
-```
-
-As expected, the evidence favors the alternative hypothesis that `places` and 
-`experience` are dependent.
-
-```@example  indep_cmi
-test_cmi = independence(SurrogateTest(CMIShannon(), Contingency()), places, experience, preferred_equipment)
-```
-
-Again, as expected, when conditioning on the mediating variable, the dependence disappears,
-and we can't reject the null hypothesis of independence.
-
-### Transfer entropy ([`TEShannon`](@ref))
-
-#### [Pairwise](@id examples_surrogatetest_teshannon)
-
-We'll see if we can reject independence for two unidirectionally coupled timeseries
-where `x` drives `y`.
-
-```@example surrogatecit_te
-using CausalityTools
-sys = system(Logistic2Unidir(c_xy = 0.5)) # x affects y, but not the other way around.
-x, y = columns(first(trajectory(sys, 1000, Ttr = 10000)))
-
-test = SurrogateTest(TEShannon(), KSG1(k = 4))
-independence(test, x, y)
-```
-
-As expected, we can reject the null hypothesis that the future of `y` is independent of
-`x`, because `x` does actually influence `y`. This doesn't change if we compute
-partial (conditional) transfer entropy with respect to some random extra time series,
-because it doesn't influence any of the other two variables.
-
-```@example surrogatecit_te
-independence(test, x, y, rand(length(x)))
-```
-
-### [[`SMeasure`](@ref)](@id examples_surrogatetest_smeasure)
-
-```@example quickstart_smeasure
-using CausalityTools
-x, y = randn(3000), randn(3000)
-measure = SMeasure(dx = 3, dy = 3)
-s = s_measure(measure, x, y)
-```
-
-The `s` statistic is larger when there is stronger coupling and smaller
-when there is weaker coupling. To check whether `s` is significant (i.e. large
-enough to claim directional dependence), we can use a [`SurrogateTest`](@ref),
-like [here](@ref examples_surrogatetest_smeasure).
-
-```@example quickstart_smeasure
-test = SurrogateTest(measure)
-independence(test, x, y)
-```
-
-The p-value is high, and we can't reject the null at any reasonable significance level.
-Hence, there isn't evidence in the data to support directional coupling from `x` to `y`.
-
-What happens if we use coupled variables?
-
-```@example quickstart_smeasure
-z = x .+ 0.1y
-independence(test, x, z)
-```
-
-Now we can confidently reject the null (independence), and conclude that there is
-evidence in the data to support directional dependence from `x` to `z`.
-
-## [[`PATest`](@ref)](@id examples_patest)
-
-The following example demonstrates how to compute the significance of the
-[`PA`](@ref) directional dependence measure using a [`PATest`](@ref).
-We'll use timeseries from a chain of unidirectionally coupled
-logistic maps that are coupled $X \to Y \to Z \to W$.
-
-### Conditional analysis
-
-What happens if we compute$\Delta A_{X \to Z}$? We'd maybe expect there to be 
-some information transfer $X \to Z$, even though the variables are not directly linked,
-because information is transferred through $Y$.
-
-```@example example_patest
-using CausalityTools
-using DelayEmbeddings
-using Random
-rng = MersenneTwister(1234)
-
-sys = system(Logistic4Chain(xi = [0.1, 0.2, 0.3, 0.4]; rng))
-x, y, z, w = columns(first(trajectory(sys, 1000)))
-τx = estimate_delay(x, "mi_min")
-τy = estimate_delay(y, "mi_min")
-test = PATest(PA(ηT = 1:10, τS = estimate_delay(x, "mi_min")), FPVP())
-ΔA_xz = independence(test, x, z)
-```
-
-As expected, the distribution is still significantly skewed towards positive values.
-To determine whether the information flow between $x$ and $z$ is mediated by $y$, we can compute
-the conditional distribution $\Delta A_{X \to Z | Y}$. If these values are still positively
-skewed, we conclude that $Y$ is not a mediating variable. If conditioning on $Y$ causes
-$\Delta A_{X \to Z | Y}$ to not be skewed towards positive values any more, then
-we conclude that $Y$ is a mediating variable and that $X$ and $Z$ are linked $X \to Y \to Z$.
-
-```@example example_patest
-measure = PA(ηT = 1:10, τS = estimate_delay(x, "mi_min"), τC = estimate_delay(y, "mi_min"))
-test = PATest(measure, FPVP())
-ΔA_xzy = independence(test, x, z, y)
-```
-
-We can't reject independence when conditioning on $Y$, so we conclude that $Y$ is a
-variable responsible for transferring information from $X$ to $Z$.
-
-## [[`CorrTest`](@ref)](@id examples_corrtest)
-
-```@example corrtest_example
-using CausalityTools
-using StableRNGs
-rng = StableRNG(1234)
-
-# Some normally distributed data
-X = randn(rng, 1000) 
-Y = 0.5*randn(rng, 1000) .+ X
-Z = 0.5*randn(rng, 1000) .+ Y
-W = randn(rng, 1000);
-```
-
-Let's test a few independence relationships. For example, we expect that `X ⫫ W`.
-We also expect dependence `X !⫫ Z`, but this dependence should vanish when
-conditioning on the intermediate variable, so we expect `X ⫫ Z | Y`.
-
-```@example corrtest_example
-independence(CorrTest(), X, W)
-```
-
-As expected, the outcome is that we can't reject the null hypothesis that `X ⫫ W`.
-
-```@example corrtest_example
-independence(CorrTest(), X, Z)
-```
-
-However, we *can* reject the  null hypothesis that `X ⫫ Z`, so the evidence favors
-the alternative hypothesis `X !⫫ Z`.
-
-```@example corrtest_example
-independence(CorrTest(), X, Z, Y)
-```
-
-As expected, the correlation between `X` and `Z` significantly vanishes when conditioning
-on `Y`, because `Y` is solely responsible for the observed correlation between `X` and `Y`.
diff --git a/docs/src/examples/examples_graphs.md b/docs/src/examples/examples_infer_graphs.md
similarity index 55%
rename from docs/src/examples/examples_graphs.md
rename to docs/src/examples/examples_infer_graphs.md
index 22dee9542..13d954e69 100644
--- a/docs/src/examples/examples_graphs.md
+++ b/docs/src/examples/examples_infer_graphs.md
@@ -6,12 +6,15 @@ directed graphs that we'll use below.
 ```@example graph_examples
 using Graphs, CairoMakie, GraphMakie
 
-function plotgraph(g)
+function plotgraph(g; nlabels = repr.(1:nv(g)))
     f, ax, p = graphplot(g,
-        nlabels = repr.(1:nv(g)),
-        nlabels_color = [:red for i in 1:nv(g)],
+        ilabels = nlabels,
+        ilabels_color = [:white for i in 1:nv(g)],
+        node_color = :blue,
+        node_size = 80,
+        arrow_size = 15,
     )
-    offsets = 0.05 * (p[:node_pos][] .- p[:node_pos][][1])
+    offsets = 0.02 * (p[:node_pos][] .- p[:node_pos][][1])
     offsets[1] = Point2f(0, 0.2)
     p.nlabels_offset[] = offsets
     autolimits!(ax)
@@ -21,11 +24,51 @@ function plotgraph(g)
     return f
 end
 ```
+## Example data
+
+We'll implement a set of chained logistic maps with unidirectional coupling.
+
+```@example graph_examples
+using DynamicalSystemsBase
+Base.@kwdef struct Logistic4Chain{V, RX, RY, RZ, RW, C1, C2, C3, Σ1, Σ2, Σ3, RNG}
+    xi::V = [0.1, 0.2, 0.3, 0.4]
+    rx::RX = 3.9
+    ry::RY = 3.6
+    rz::RZ = 3.6
+    rw::RW = 3.8
+    c_xy::C1 = 0.4
+    c_yz::C2 = 0.4
+    c_zw::C3 = 0.35
+    σ_xy::Σ1 = 0.05
+    σ_yz::Σ2 = 0.05
+    σ_zw::Σ3 = 0.05
+    rng::RNG = Random.default_rng()
+end
+
+function eom_logistic4_chain(u, p::Logistic4Chain, t)
+    (; xi, rx, ry, rz, rw, c_xy, c_yz, c_zw, σ_xy, σ_yz, σ_zw, rng) = p
+    x, y, z, w = u
+    f_xy = (y +  c_xy*(x + σ_xy * rand(rng)) ) / (1 + c_xy*(1+σ_xy))
+    f_yz = (z +  c_yz*(y + σ_yz * rand(rng)) ) / (1 + c_yz*(1+σ_yz))
+    f_zw = (w +  c_zw*(z + σ_zw * rand(rng)) ) / (1 + c_zw*(1+σ_zw))
+    dx = rx * x * (1 - x)
+    dy = ry * (f_xy) * (1 - f_xy)
+    dz = rz * (f_yz) * (1 - f_yz)
+    dw = rw * (f_zw) * (1 - f_zw)
+    return SVector{4}(dx, dy, dz, dw)
+end
+
+
+function system(definition::Logistic4Chain)
+    return DiscreteDynamicalSystem(eom_logistic4_chain, definition.xi, definition)
+end
+```
+
 
 ## [Optimal causation entropy](@id oce_example)
 
 Here, we use the [`OCE`](@ref) algorithm to infer a time series graph. We use a
-[`SurrogateTest`](@ref) for the initial step, and a [`LocalPermutationTest`](@ref)
+[`SurrogateAssociationTest`](@ref) for the initial step, and a [`LocalPermutationTest`](@ref)
 for the conditional steps.
 
 ```@example graph_examples
@@ -35,11 +78,13 @@ rng = StableRNG(123)
 
 # An example system where `X → Y → Z → W`.
 sys = system(Logistic4Chain(; rng))
-x, y, z, w = columns(first(trajectory(sys, 400, Ttr = 10000)))
+x, y, z, w = columns(first(trajectory(sys, 300, Ttr = 10000)))
 
 # Independence tests for unconditional and conditional stages.
-utest = SurrogateTest(MIShannon(), KSG2(k = 3, w = 1); rng, nshuffles = 150)
-ctest = LocalPermutationTest(CMIShannon(), MesnerShalizi(k = 3, w = 1); rng, nshuffles = 150)
+uest = KSG2(MIShannon(); k = 3, w = 1)
+utest = SurrogateAssociationTest(uest; rng, nshuffles = 19)
+cest =  MesnerShalizi(CMIShannon(); k = 3, w = 1)
+ctest = LocalPermutationTest(cest; rng, nshuffles = 19)
 
 # Infer graph
 alg = OCE(; utest, ctest, α = 0.05, τmax = 1)
@@ -54,9 +99,10 @@ The algorithm nicely recovers the true causal directions. We can also plot the g
 the function we made above.
 
 ```@example graph_examples
-plotgraph(g)
+plotgraph(g; nlabels = ["x", "y", "z", "w"])
 ```
 
+
 ## [PC-algorithm](@id pc_examples)
 
 ### [Correlation-based tests](@id pc_examples_corr)
@@ -74,7 +120,7 @@ normally distributed data.
 using CausalityTools
 using StableRNGs
 rng = StableRNG(123)
-n = 500
+n = 300
 v = randn(rng, n)
 x = v + randn(rng, n)*0.25
 w = x + randn(rng, n)*0.25
@@ -87,7 +133,7 @@ alg = PC(CorrTest(), CorrTest(); α = 0.05)
 est_cpdag_parametric = infer_graph(alg, X; verbose = false)
 
 # Plot the graph
-plotgraph(est_cpdag_parametric)
+plotgraph(est_cpdag_parametric; nlabels = ["x", "v", "w", "z", "s"])
 ```
 
 ### [Nonparametric tests](@id pc_examples_nonparametric)
@@ -97,7 +143,7 @@ CausalInference.jl is that our implementation automatically works with any compa
 and [`IndependenceTest`](@ref), and thus any combination of (nondirectional)
 [`AssociationMeasure`](@ref) and estimator.
 
-Here, we replicate the example above, but using a nonparametric [`SurrogateTest`](@ref)
+Here, we replicate the example above, but using a nonparametric [`SurrogateAssociationTest`](@ref)
 with the Shannon mutual information [`MIShannon`](@ref) measure and the
 [`GaoOhViswanath`](@ref) estimator for the pairwise independence tests, and a
 [`LocalPermutationTest`](@ref) with conditional mutual information [`CMIShannon`](@ref)
@@ -107,7 +153,7 @@ and the [`MesnerShalizi`](@ref).
 rng = StableRNG(123)
 
 # Use fewer observations, because MI/CMI takes longer to estimate
-n = 400
+n = 300
 v = randn(rng, n)
 x = v + randn(rng, n)*0.25
 w = x + randn(rng, n)*0.25
@@ -115,13 +161,16 @@ z = v + w + randn(rng, n)*0.25
 s = z + randn(rng, n)*0.25
 X = [x, v, w, z, s]
 
-pairwise_test = SurrogateTest(MIShannon(), GaoOhViswanath(k = 10))
-cond_test = LocalPermutationTest(CMIShannon(), MesnerShalizi(k = 10))
+est_pairwise = JointProbabilities(MIShannon(), CodifyVariables(ValueBinning(3)))
+est_cond = MesnerShalizi(CMIShannon(); k = 5)
+pairwise_test = SurrogateAssociationTest(est_pairwise; rng, nshuffles = 50)
+cond_test = LocalPermutationTest(est_cond; rng, nshuffles = 50)
 alg = PC(pairwise_test, cond_test; α = 0.05)
 est_cpdag_nonparametric = infer_graph(alg, X; verbose = false)
 plotgraph(est_cpdag_nonparametric)
 ```
 
-We get the same graph as with the parametric estimator. However, for general non-gaussian
-data, the correlation-based tests (which assumes normally distributed data)
-will *not* give the same results as other independence tests.
+We get the same basic structure of the graph, but which directional associations 
+are correctly ruled out varies. In general, using different types of 
+association measures with different independence tests, applied to general 
+non-gaussian data, will not give the same results as the correlation-based tests.
diff --git a/docs/src/examples/examples_mi.md b/docs/src/examples/examples_mi.md
deleted file mode 100644
index da6c6f7c8..000000000
--- a/docs/src/examples/examples_mi.md
+++ /dev/null
@@ -1,567 +0,0 @@
-# [Mutual information](@id quickstart_mutualinfo)
-
-## [`MIShannon`](@ref)
-
-### [Estimation using [`MutualInformationEstimator`](@ref)s](@id example_mi_MutualInformationEstimator)
-
-When estimated using a [`MutualInformationEstimator`](@ref), some form of bias
-correction is usually applied. The [`KraskovStögbauerGrassberger1`](@ref) and
-[`KraskovStögbauerGrassberger2`](@ref) estimators are perhaps the most popular.
-A common parametric estimator is [`GaussianMI`](@ref).
-
-#### [`MIShannon`](@ref) with [`GaussianMI`](@ref)
-
-```@example mi_demonstration
-using CausalityTools
-using Distributions
-using Statistics
-
-n = 1000
-using CausalityTools
-x = randn(1000)
-y = rand(1000) .+ x
-mutualinfo(KSG1(k = 5), x, y)
-mutualinfo(GaussianMI(), x, y) # defaults to `MIShannon()`
-```
-
-#### [`MIShannon`](@ref) with [`KraskovStögbauerGrassberger1`](@ref)
-
-```@example mi_demonstration
-using CausalityTools
-x, y = rand(1000), rand(1000)
-mutualinfo(KSG1(k = 5), x, y)
-```
-
-#### [`MIShannon`](@ref) with [`KraskovStögbauerGrassberger2`](@ref)
-
-```@example mi_demonstration
-using CausalityTools
-x, y = rand(1000), rand(1000)
-mutualinfo(KSG2(k = 5), x, y)
-```
-
-#### [`MIShannon`](@ref) with [`GaoKannanOhViswanath`](@ref)
-
-```@example mi_demonstration
-using CausalityTools
-x, y = rand(1000), rand(1000)
-mutualinfo(GaoKannanOhViswanath(k = 10), x, y)
-```
-
-#### [`MIShannon`](@ref) with [`GaoOhViswanath`](@ref)
-
-```@example mi_demonstration
-using CausalityTools
-x, y = rand(1000), rand(1000)
-mutualinfo(GaoOhViswanath(k = 10), x, y)
-```
-
-#### Reproducing Kraskov et al. (2004)
-
-Here, we'll reproduce Figure 4 from Kraskov et al. (2004)'s seminal paper on the nearest-neighbor based mutual information estimator. We'll estimate the mutual information
-between marginals of a bivariate Gaussian for a fixed time series length of 2000,
-varying the number of neighbors. *Note: in the original paper, they show multiple
-curves corresponding to different time series length. We only show two single curves:
-one for the [`KSG1`](@ref) estimator and one for the [`KSG2`](@ref) estimator*.
-
-```@example ex_mutualinfo
-using CausalityTools
-using LinearAlgebra: det
-using Distributions: MvNormal
-using StateSpaceSets: StateSpaceSet
-using CairoMakie
-using Statistics
-
-N = 2000
-c = 0.9
-Σ = [1 c; c 1]
-N2 = MvNormal([0, 0], Σ)
-mitrue = -0.5*log(det(Σ)) # in nats
-ks = [2; 5; 7; 10:10:70] .* 2
-
-nreps = 30
-mis_ksg1 = zeros(nreps, length(ks))
-mis_ksg2 = zeros(nreps, length(ks))
-for i = 1:nreps
-    D2 = StateSpaceSet([rand(N2) for i = 1:N])
-    X = D2[:, 1] |> StateSpaceSet
-    Y = D2[:, 2] |> StateSpaceSet
-    measure = MIShannon(; base = ℯ)
-    mis_ksg1[i, :] = map(k -> mutualinfo(measure, KSG1(; k), X, Y), ks)
-    mis_ksg2[i, :] = map(k -> mutualinfo(measure, KSG2(; k), X, Y), ks)
-end
-fig = Figure()
-ax = Axis(fig[1, 1], xlabel = "k / N", ylabel = "Mutual infomation (nats)")
-scatterlines!(ax, ks ./ N, mean(mis_ksg1, dims = 1) |> vec, label = "KSG1")
-scatterlines!(ax, ks ./ N, mean(mis_ksg2, dims = 1) |> vec, label = "KSG2")
-hlines!(ax, [mitrue], color = :black, linewidth = 3, label = "I (true)")
-axislegend()
-fig
-```
-
-#### [`MutualInformationEstimator`](@ref) comparison
-
-Most estimators suffer from significant bias when applied to discrete, finite data. One possible resolution is to add a small amount of noise to discrete variables, so that the data becomes continuous in practice.
-
-Instead of adding noise to your data, you can consider using an
-estimator that is specifically designed to deal with continuous-discrete mixture data. One example is the [`GaoKannanOhViswanath`](@ref) estimator.
-
-Here, we compare its performance to [`KSG1`](@ref) on uniformly
-distributed discrete multivariate data. The true mutual information is zero.
-
-```@example ex_mutualinfo
-using CausalityTools
-using Statistics
-using StateSpaceSets: StateSpaceSet
-using Statistics: mean
-using CairoMakie
-
-function compare_ksg_gkov(;
-        k = 5,
-        base = 2,
-        nreps = 15,
-        Ls = [500:100:1000; 1500; 2000; 3000; 4000; 5000; 1000])
-
-    est_gkov = GaoKannanOhViswanath(; k)
-    est_ksg1 = KSG1(; k)
-
-    mis_ksg1_mix = zeros(nreps, length(Ls))
-    mis_ksg1_discrete = zeros(nreps, length(Ls))
-    mis_ksg1_cont = zeros(nreps, length(Ls))
-    mis_gkov_mix = zeros(nreps, length(Ls))
-    mis_gkov_discrete = zeros(nreps, length(Ls))
-    mis_gkov_cont = zeros(nreps, length(Ls))
-
-    for (j, L) in enumerate(Ls)
-        for i = 1:nreps
-            X = StateSpaceSet(float.(rand(1:8, L, 2)))
-            Y = StateSpaceSet(float.(rand(1:8, L, 2)))
-            Z = StateSpaceSet(rand(L, 2))
-            W = StateSpaceSet(rand(L, 2))
-            measure = MIShannon(; base = ℯ)
-            mis_ksg1_discrete[i, j] = mutualinfo(measure, est_ksg1, X, Y)
-            mis_gkov_discrete[i, j] = mutualinfo(measure, est_gkov, X, Y)
-            mis_ksg1_mix[i, j] = mutualinfo(measure, est_ksg1, X, Z)
-            mis_gkov_mix[i, j] = mutualinfo(measure, est_gkov, X, Z)
-            mis_ksg1_cont[i, j] = mutualinfo(measure, est_ksg1, Z, W)
-            mis_gkov_cont[i, j] = mutualinfo(measure, est_gkov, Z, W)
-        end
-    end
-    return mis_ksg1_mix, mis_ksg1_discrete, mis_ksg1_cont,
-        mis_gkov_mix, mis_gkov_discrete, mis_gkov_cont
-end
-
-fig = Figure()
-ax = Axis(fig[1, 1], 
-    xlabel = "Sample size", 
-    ylabel = "Mutual information (bits)")
-Ls = [100; 200; 500; 1000; 2500; 5000; 10000]
-nreps = 5
-k = 3
-mis_ksg1_mix, mis_ksg1_discrete, mis_ksg1_cont,
-    mis_gkov_mix, mis_gkov_discrete, mis_gkov_cont = 
-    compare_ksg_gkov(; nreps, k, Ls)
-
-scatterlines!(ax, Ls, mean(mis_ksg1_mix, dims = 1) |> vec, 
-    label = "KSG1 (mixed)", color = :black, 
-    marker = :utriangle)
-scatterlines!(ax, Ls, mean(mis_ksg1_discrete, dims = 1) |> vec, 
-    label = "KSG1 (discrete)", color = :black, 
-    linestyle = :dash, marker = '▲')
-scatterlines!(ax, Ls, mean(mis_ksg1_cont, dims = 1) |> vec, 
-    label = "KSG1 (continuous)", color = :black, 
-    linestyle = :dot, marker = '●')
-scatterlines!(ax, Ls, mean(mis_gkov_mix, dims = 1) |> vec, 
-    label = "GaoKannanOhViswanath (mixed)", color = :red, 
-    marker = :utriangle)
-scatterlines!(ax, Ls, mean(mis_gkov_discrete, dims = 1) |> vec, 
-    label = "GaoKannanOhViswanath (discrete)", color = :red, 
-    linestyle = :dash, marker = '▲')
-scatterlines!(ax, Ls, mean(mis_gkov_cont, dims = 1) |> vec, 
-    label = "GaoKannanOhViswanath (continuous)", color = :red, 
-    linestyle = :dot, marker = '●')
-axislegend(position = :rb)
-fig
-```
-
-### [Estimation using [`DifferentialEntropyEstimator`](@ref)s](@id example_mi_DifferentialEntropyEstimator)
-
-#### Simple example
-
-We can compute [`MIShannon`](@ref) by naively applying a [`DifferentialEntropyEstimator`](@ref).
-Note that this doesn't apply any bias correction.
-
-```@example mi_demonstration
-using CausalityTools
-x, y = rand(1000), rand(1000)
-mutualinfo(Kraskov(k = 3), x, y)
-```
-
-#### [`DifferentialEntropyEstimator`](@ref) comparison
-
-Let's compare the performance of a subset of the implemented mutual information estimators. We'll use example data from Lord et al., where the analytical mutual information is known.
-
-```@example ex_mutualinfo
-using CausalityTools
-using LinearAlgebra: det
-using StateSpaceSets: StateSpaceSet
-using Distributions: MvNormal
-using LaTeXStrings
-using CairoMakie
-
-# adapted from https://juliadatascience.io/makie_colors
-function new_cycle_theme()
-    # https://nanx.me/ggsci/reference/pal_locuszoom.html
-    my_colors = ["#D43F3AFF", "#EEA236FF", "#5CB85CFF", "#46B8DAFF",
-        "#357EBDFF", "#9632B8FF", "#B8B8B8FF"]
-    cycle = Cycle([:color, :linestyle, :marker], covary=true) # alltogether
-    my_markers = [:circle, :rect, :utriangle, :dtriangle, :diamond,
-        :pentagon, :cross, :xcross]
-    my_linestyle = [nothing, :dash, :dot, :dashdot, :dashdotdot]
-    return Theme(
-        fontsize = 22, font="CMU Serif",
-        colormap = :linear_bmy_10_95_c78_n256,
-        palette = (
-            color = my_colors, 
-            marker = my_markers, 
-            linestyle = my_linestyle,
-        ),
-        Axis = (
-            backgroundcolor= (:white, 0.2), 
-            xgridstyle = :dash, 
-            ygridstyle = :dash
-        ),
-        Lines = (
-            cycle= cycle,
-        ), 
-        ScatterLines = (
-            cycle = cycle,
-        ),
-        Scatter = (
-            cycle = cycle,
-        ),
-        Legend = (
-            bgcolor = (:grey, 0.05), 
-            framecolor = (:white, 0.2),
-            labelsize = 13,
-        )
-    )
-end
-
-run(est; f::Function, # function that generates data
-        base::Real = ℯ, 
-        nreps::Int = 10, 
-        αs = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1], 
-        n::Int = 1000) =
-    map(α -> mutualinfo(MIShannon(; base), est, f(α, n)...), αs)
-
-function compute_results(f::Function; estimators, k = 5, k_lord = 20,
-        n = 1000, base = ℯ, nreps = 10,
-        as = 7:-1:0,
-        αs = [1/10^(a) for a in as])
-    
-    is = [zeros(length(αs)) for est in estimators]
-    for (k, est) in enumerate(estimators)
-        tmp = zeros(length(αs))
-        for i = 1:nreps
-            tmp .+= run(est; f = f, αs, base, n)
-        end
-        is[k] .= tmp ./ nreps
-    end
-
-    return is
-end
-
-function plot_results(f::Function, ftrue::Function; 
-        base, estimators, k_lord, k, 
-        as = 7:-1:0, αs = [1/10^(a) for a in as], kwargs...
-    )
-    is = compute_results(f; 
-        base, estimators, k_lord, k, as, αs, kwargs...)
-    itrue = [ftrue(α; base) for α in αs]
-
-    xmin, xmax = minimum(αs), maximum(αs)
-    
-    ymin = floor(Int, min(minimum(itrue), minimum(Iterators.flatten(is))))
-    ymax = ceil(Int, max(maximum(itrue), maximum(Iterators.flatten(is))))
-    f = Figure()
-    ax = Axis(f[1, 1],
-        xlabel = "α", ylabel = "I (nats)",
-        xscale = log10, aspect = 1,
-        xticks = (αs, [latexstring("10^{$(-a)}") for a in as]),
-        yticks = (ymin:ymax)
-        )
-    xlims!(ax, (1/10^first(as), 1/10^last(as)))
-    ylims!(ax, (ymin, ymax))
-    lines!(ax, αs, itrue, 
-        label = "I (true)", linewidth = 4, color = :black)
-    for (i, est) in enumerate(estimators)
-        es = string(typeof(est).name.name)
-        lbl = occursin("Lord", es) ? "$es (k = $k_lord)" : "$es (k = $k)"
-        scatter!(ax, αs, is[i], label = lbl)
-        lines!(ax, αs, is[i])
-
-    end
-    axislegend()
-    return f
-end
-
-set_theme!(new_cycle_theme())
-k_lord = 20
-k = 5
-base = ℯ
-
-estimators = [
-    Kraskov(; k), 
-    KozachenkoLeonenko(),
-    Zhu(; k), 
-    ZhuSingh(; k),
-    Gao(; k),
-    Lord(; k = k_lord),
-    KSG1(; k), 
-    KSG2(; k),
-    GaoOhViswanath(; k),
-    GaoKannanOhViswanath(; k),
-    GaussianMI(),
-]
-```
-
-#### Family 1
-
-In this system, samples are concentrated around the diagonal $X = Y$,
-and the strip of samples gets thinner as $\alpha \to 0$.
-
-```@example ex_mutualinfo
-function family1(α, n::Int)
-    x = rand(n)
-    v = rand(n)
-    y = x + α * v
-    return StateSpaceSet(x), StateSpaceSet(y)
-end
-
-# True mutual information values for these data
-function ifamily1(α; base = ℯ)
-    mi = -log(α) - α - log(2)
-    return mi / log(base, ℯ)
-end
-
-fig = plot_results(family1, ifamily1; 
-    k_lord = k_lord, k = k, nreps = 10,
-    estimators = estimators,
-    base = base)
-```
-
-#### Family 2
-
-```@example ex_mutualinfo
-function family2(α, n::Int)
-    Σ = [1 α; α 1]
-    N2 = MvNormal(zeros(2), Σ)
-    D2 = StateSpaceSet([rand(N2) for i = 1:n])
-    X = StateSpaceSet(D2[:, 1])
-    Y = StateSpaceSet(D2[:, 2])
-    return X, Y
-end
-
-function ifamily2(α; base = ℯ)
-    return (-0.5 * log(1 - α^2)) / log(ℯ, base)
-end
-
-αs = 0.05:0.05:0.95
-estimators = estimators
-with_theme(new_cycle_theme()) do
-    f = Figure();
-    ax = Axis(f[1, 1], xlabel = "α", ylabel = "I (nats)")
-    is_true = map(α -> ifamily2(α), αs)
-    is_est = map(est -> run(est; f = family2, αs, nreps = 20), estimators)
-    lines!(ax, αs, is_true, 
-        label = "I (true)", color = :black, linewidth = 3)
-    for (i, est) in enumerate(estimators)
-        estname = typeof(est).name.name |> String
-        scatterlines!(ax, αs, is_est[i], label = estname)
-    end
-    axislegend(position = :lt)
-    return f
-end
-```
-
-#### Family 3
-
-In this system, we draw samples from a 4D Gaussian distribution distributed
-as specified in the `ifamily3` function below. We let $X$ be the two first
-variables, and $Y$ be the two last variables.
-
-```@example ex_mutualinfo
-function ifamily3(α; base = ℯ)
-    Σ = [7 -5 -1 -3; -5 5 -1 3; -1 -1 3 -1; -3 3 -1 2+α]
-    Σx = Σ[1:2, 1:2]; Σy = Σ[3:4, 3:4]
-    mi = 0.5*log(det(Σx) * det(Σy) / det(Σ))
-    return mi / log(ℯ, base)
-end
-
-function family3(α, n::Int)
-    Σ = [7 -5 -1 -3; -5 5 -1 3; -1 -1 3 -1; -3 3 -1 2+α]
-    N4 = MvNormal(zeros(4), Σ)
-    D4 = StateSpaceSet([rand(N4) for i = 1:n])
-    X = D4[:, 1:2]
-    Y = D4[:, 3:4]
-    return X, Y
-end
-
-fig = plot_results(family3, ifamily3; 
-    k_lord = k_lord, k = k, nreps = 10,
-    n = 2000,
-    estimators = estimators, base = base)
-```
-
-We see that the [`Lord`](@ref) estimator, which estimates local volume elements using a singular-value decomposition (SVD) of local neighborhoods, outperforms the other estimators by a large margin.
-
-### [Estimation using [`ProbabilitiesEstimator`](@ref)s](@id example_mi_ProbabilitiesEstimator)
-
-We can also use [`ProbabilitiesEstimator`](@ref) to estimate Shannon mutual information.
-This does not apply any bias correction.
-
-#### Discrete [`MIShannon`](@ref) with [`ValueHistogram`](@ref)
-
-A [`ValueHistogram`](@ref) estimator can be used to bin the data and compute
-discrete Shannon mutual information.
-
-```@example mi_demonstration
-using CausalityTools
-using Random; rng = MersenneTwister(1234)
-x = rand(rng, 1000)
-y = rand(rng, 1000)
-
-# Use the H3-estimation method with a discrete visitation frequency based 
-# probabilities estimator over a fixed grid covering the range of the data,
-# which is on [0, 1].
-est = ValueHistogram(FixedRectangularBinning(0, 1, 5))
-mutualinfo(est, x, y)
-```
-
-#### Discrete [`MIShannon`](@ref) with [`Contingency`](@ref) (numerical)
-
-The above example is in fact equivalent to [`Contingency`](@ref). However,
-using the  [`Contingency`](@ref) estimator is more flexible, because it
-can also be used on [categorical data](@ref discrete_mishannon_categorical).
-
-```@example mi_demonstration
-using CausalityTools
-using Random; rng = MersenneTwister(1234)
-x = rand(rng, 1000)
-y = rand(rng, 1000)
-est = ValueHistogram(FixedRectangularBinning(0, 1, 5))
-mutualinfo(Contingency(est), x, y)
-```
-
-#### Discrete [`MIShannon`](@ref) with [`ContingencyMatrix`](@ref) (manual)
-
-If you need explicit access to the estimated joint probability mass function,
-use a [`ContingencyMatrix`](@ref) directly.
-
-```@example mi_demonstration
-using CausalityTools
-using Random; rng = MersenneTwister(1234)
-x = rand(rng, 1000)
-y = rand(rng, 1000)
-c = contingency_matrix(est, x, y)
-est = ValueHistogram(FixedRectangularBinning(0, 1, 5))
-mutualinfo(c)
-```
-
-#### [Discrete [`MIShannon`](@ref) with [`Contingency`](@ref) (categorical)](@id discrete_mishannon_categorical)
-
-The [`ContingencyMatrix`](@ref) approach can also be used with categorical data.
-For example, let's compare the Shannon mutual information between the preferences
-of a population sample with regards to different foods.
-
-```@example mi_demonstration
-using CausalityTools
-n = 1000
-preferences = rand(["neutral", "like it", "hate it"], n);
-random_foods = rand(["water", "flour", "bananas", "booze", "potatoes", "beans", "soup"], n)
-biased_foods = map(preferences) do preference
-    if cmp(preference, "neutral") == 1
-        return rand(["water", "flour"])
-    elseif cmp(preference, "like it") == 1
-        return rand(["bananas", "booze"])
-    else
-        return rand(["potatoes", "beans", "soup"])
-    end
-end
-
-c_biased = contingency_matrix(preferences, biased_foods) 
-c_random = contingency_matrix(preferences, random_foods) 
-mutualinfo(c_biased), mutualinfo(c_random)
-```
-
-#### Longer example: AR1-system and unidirectionally coupled logistic maps
-
-In this example we generate realizations of two different systems where we know the strength of coupling between the variables. Our aim is to compute Shannon mutual information $I^S(X; Y)$ ([`MIShannon`](@ref)) between time series of each variable and assess how the magnitude of $I^S(X; Y)$ changes as we change the strength of coupling between $X$ and $Y$. We'll use two systems that ship with CausalityTools.jl:
-
-* A stochastic system consisting of two unidirectionally coupled first-order autoregressive processes ([`ar1_unidir`](@ref))
-* A deterministic, chaotic system consisting of two unidirectionally coupled logistic maps ([`logistic2_unidir`](@ref))
-
-We use the default input parameter values (see [`AR1Unidir`](@ref) and [`Logistic2Unidir`](@ref) for details) and below we toggle only the random initial conditions and the coupling strength parameter `c_xy`. For each value of `c_xy` we generate 1,000 unique realizations of the system and obtain 500-point time series of the coupled variables.
-
-To estimate the mutual information, we use the binning-based [`ValueHistogram`](@ref) estimator. We summarize the distribution of $I(X; Y)$ values across all realizations using the median and quantiles encompassing 95 % of the values.
-
-```@example
-using CausalityTools
-using Statistics
-using CairoMakie
-
-# Span a range of x-y coupling strengths
-c = 0.0:0.1:1.0
-
-# Number of observations in each time series
-npts = 500
-
-# Number of unique realizations of each system
-n_realizations = 1000
-
-# Get MI for multiple realizations of two systems, 
-# saving three quantiles for each c value
-mi = zeros(length(c), 3, 2)
-
-# Define an estimator for MI
-b = RectangularBinning(4)
-estimator = ValueHistogram(b)
-
-for i in 1 : length(c)
-    
-    tmp = zeros(n_realizations, 2)
-    
-    for k in 1 : n_realizations
-        
-        # Obtain time series realizations of the two 2D systems 
-        # for a given coupling strength and random initial conditions
-        s_logistic = system(Logistic2Unidir(; xi = rand(2), c_xy = c[i]))
-        s_ar = system(AR1Unidir(xi = rand(2), c_xy = c[i]))
-        lmap = first(trajectory(s_logistic, npts - 1, Ttr = 500))
-        ar1 = first(trajectory(s_ar, npts - 1))
-        
-        # Compute the MI between the two coupled components of each system
-        tmp[k, 1] = mutualinfo(MIShannon(), estimator, lmap[:, 1], lmap[:, 2])
-        tmp[k, 2] = mutualinfo(MIShannon(), estimator, ar1[:, 1], ar1[:, 2])
-    end
-    
-    # Compute lower, middle, and upper quantiles of MI for each coupling strength
-    mi[i, :, 1] = quantile(tmp[:, 1], [0.025, 0.5, 0.975])
-    mi[i, :, 2] = quantile(tmp[:, 2], [0.025, 0.5, 0.975])
-end
-
-# Plot distribution of MI values as a function of coupling strength for both systems
-fig = with_theme(theme_minimal()) do
-    fig = Figure()
-    ax = Axis(fig[1, 1], xlabel = "Coupling strength", ylabel = "Mutual information")
-    band!(ax, c, mi[:, 1, 1], mi[:, 3, 1], color = (:black, 0.3))
-    lines!(ax, c, mi[:, 2, 1], label = "2D chaotic logistic maps", color = :black)
-    band!(ax, c, mi[:, 1, 2], mi[:, 3, 2], color = (:red, 0.3))
-    lines!(ax, c, mi[:, 2, 2],  label = "2D order-1 autoregressive", color = :red)
-    return fig
-end
-fig
-```
-
-As expected, $I(X; Y)$ increases with coupling strength in a system-specific manner.
diff --git a/docs/src/examples/examples_predictive_asymmetry.md b/docs/src/examples/examples_predictive_asymmetry.md
deleted file mode 100644
index c72407e35..000000000
--- a/docs/src/examples/examples_predictive_asymmetry.md
+++ /dev/null
@@ -1,63 +0,0 @@
-# [Predictive asymmetry](@id examples_predictive_asymmetry)
-
-## [Computing the asymmetry distribution](@id examples_pa_asymmetry_dist)
-
-The following example demonstrates how to compute the [`asymmetry`](@ref) distribution
-from time series input. We'll use timeseries from a chain of unidirectionally coupled
-logistic maps that are coupled $X \to Y \to Z \to W$.
-
-These examples compute the asymmetry distribution directly. Use the [`PA`](@ref)
-measure with [`PATest`](@ref) for formal independence testing.
-
-### Pairwise analysis
-
-When considering only two variables $V_1$ and $V_2$, we expect the distribution
-$\DeltaA_{X \to Y}$ to be skewed towards positive values if $V_1 \to V2$.
-
-Parameters are tuned by providing an instance of the [`PA`](@ref)
-measure, which quantifies directional influence. We'll use the [`FPVP`](@ref) estimator,
-and compute the asymmetry distribution over prediction lags `ηT = 1:10`.
-In real applications, it is important to ensure proper embeddings for the source
-(and conditional, if relevant) variables. We will optimize embedding parameters
-using the "traditional" approach from
-[DelayEmbeddings.jl](https://juliadynamics.github.io/DynamicalSystems.jl/dev/embedding/traditional/).
-
-```@example example_pa1
-using CausalityTools
-using DelayEmbeddings
-using Random
-rng = MersenneTwister(1234)
-
-sys = system(Logistic4Chain(xi = [0.1, 0.2, 0.3, 0.4]; rng))
-x, y, z, w = columns(first(trajectory(sys, 1000)))
-τx = estimate_delay(x, "mi_min")
-τy = estimate_delay(y, "mi_min")
-est = FPVP(; k = 3, w = 5)
-ΔA_xy = asymmetry(PA(ηT = 1:10, τS = τx), est, x, y)
-ΔA_yx = asymmetry(PA(ηT = 1:10, τS = τy), est, y, x)
-ΔA_xy, ΔA_yx
-```
-
-As expected, since there is coupling $X \to Y$, $\Delta A_{X \to Y}$ is skewed
-towards positive values, while $\Delta A_{Y \to X}$ is skewed towards negative values
-because there is no coupling $Y \to X$.
-
-### Conditional analysis
-
-What happens if we compute$\Delta A_{X \to Z}$? We'd maybe expect there to be 
-some information transfer $X \to Z$, even though ther are not directly linked, because
-information is transferred through $Y$.
-
-```@example example_pa1
-ΔA_xz = asymmetry(PA(ηT = 1:10, τS = estimate_delay(x, "mi_min")), est, x, z)
-```
-
-As expected, the distribution is still skewed towards positive values. To determine
-whether the information flow between $x$ and $z$ is mediated by $y$, we can compute
-the conditional distribution $\Delta A_{X \to Z | Y}$. If these values are still positively
-skewed, we conclude that $Y$ is not a mediating variable. If conditioning on $Y$ causes
-$\Delta A_{X \to Z | Y}$ to not be skewed towards positive values any more, then
-we conclude that $Y$ is a mediating variable and that $X$ and $Z$ are linked $X \to Y \to Z$.
-
-In [these examples](@ref examples_patest), the same time series are formally tested
-for independence using a [`PATest`](@ref).
diff --git a/docs/src/examples/examples_recurrence.md b/docs/src/examples/examples_recurrence.md
deleted file mode 100644
index 3e7f432b4..000000000
--- a/docs/src/examples/examples_recurrence.md
+++ /dev/null
@@ -1,47 +0,0 @@
-# [Inferring directional influence using conditional recurrence](@id examples_recurrence)
-
-## Computing the [`Recurrence`](@ref) measure for independent data
-
-The interpretation of the [`Recurrence`](@ref) measure is that if two variables are
-symmetrically coupled, then the conditional recurrence in both directions is equal.
-Two variables that are uncoupled are symmetrically coupled (i.e. no coupling). We
-therefore expect the difference in conditional recurrence to be around zero.
-
-```@example
-using CausalityTools
-using StableRNGs
-rng = StableRNG(1234)
-x = rand(rng, 300)
-y = rand(rng, 300)
-m = Recurrence(r = 0.5)
-Δ = conditional_recurrence(m, x, y) - conditional_recurrence(m, y, x)
-```
-
-This value is close to zero. To test if it is significantly indistinguishable from
-zero, we can use a [`SurrogateTest`](@ref) (see example below).
-
-## Independence test
-
-```@example
-using CausalityTools
-using StableRNGs
-rng = StableRNG(1234)
-x = rand(rng, 300)
-y = rand(rng, 300)
-test = SurrogateTest(Recurrence(r = 0.5); rng, nshuffles = 100, surrogate = RandomShuffle())
-independence(test, x, y)
-```
-
-As expected, we can't reject independence. What happens if two variables are coupled?
-
-```@example
-using CausalityTools
-using StableRNGs
-rng = StableRNG(1234)
-x = rand(rng, 300)
-z = x .+ rand(rng, 300)
-test = SurrogateTest(Recurrence(r = 0.5); rng, nshuffles = 100, surrogate = RandomShuffle())
-independence(test, x, z)
-```
-
-Now, because the variables are coupled, the evidence in the data support dependence.
diff --git a/docs/src/examples/examples_transferentropy.md b/docs/src/examples/examples_transferentropy.md
deleted file mode 100644
index 37a7fd381..000000000
--- a/docs/src/examples/examples_transferentropy.md
+++ /dev/null
@@ -1,192 +0,0 @@
-# [Transfer entropy](@id examples_transferentropy)
-
-## [`TEShannon`](@ref)
-
-### Estimation using [`TransferEntropyEstimator`](@ref)s
-
-#### Estimator comparison
-
-Let's reproduce Figure 4 from Zhu et al (2015)[^Zhu2015], where they test some
-dedicated transfer entropy estimators on a bivariate autoregressive system.
-We will test
-
-- The [`Lindner`](@ref) and [`Zhu1`](@ref) dedicated transfer entropy estimators,
-    which try to eliminate bias.
-- The [`KSG1`](@ref) estimator, which computes TE naively as a sum of mutual information
-    terms (without guaranteed cancellation of biases for the total sum).
-- The [`Kraskov`](@ref) estimator, which computes TE naively as a sum of entropy 
-    terms (without guaranteed cancellation of biases for the total sum).
-
-[^Zhu2015]:
-    Zhu, J., Bellanger, J. J., Shu, H., & Le Bouquin Jeannès, R. (2015). Contribution to transfer entropy estimation via the k-nearest-neighbors approach. Entropy, 17(6), 4173-4201.
-
-```@example
-using CausalityTools
-using CairoMakie
-using Statistics
-using Distributions: Normal
-
-function model2(n::Int)
-    𝒩x = Normal(0, 0.1)
-    𝒩y = Normal(0, 0.1)
-    x = zeros(n+2)
-    y = zeros(n+2)
-    x[1] = rand(𝒩x)
-    x[2] = rand(𝒩x)
-    y[1] = rand(𝒩y)
-    y[2] = rand(𝒩y)
-
-    for i = 3:n+2
-        x[i] = 0.45*sqrt(2)*x[i-1] - 0.9*x[i-2] - 0.6*y[i-2] + rand(𝒩x)
-        y[i] = 0.6*x[i-2] - 0.175*sqrt(2)*y[i-1] + 0.55*sqrt(2)*y[i-2] + rand(𝒩y)
-    end
-    return x[3:end], y[3:end]
-end
-te_true = 0.42 # eyeball the theoretical value from their Figure 4.
-
-m = TEShannon(embedding = EmbeddingTE(dT = 2, dS = 2), base = ℯ)
-estimators = [Zhu1(k = 8), Lindner(k = 8), KSG1(k = 8), Kraskov(k = 8)]
-Ls = [floor(Int, 2^i) for i in 8.0:0.5:11]
-nreps = 8
-tes_xy = [[zeros(nreps) for i = 1:length(Ls)] for e in estimators]
-tes_yx = [[zeros(nreps) for i = 1:length(Ls)] for e in estimators]
-for (k, est) in enumerate(estimators)
-    for (i, L) in enumerate(Ls)
-        for j = 1:nreps
-            x, y = model2(L);
-            tes_xy[k][i][j] = transferentropy(m, est, x, y)
-            tes_yx[k][i][j] = transferentropy(m, est, y, x)
-        end
-    end
-end
-
-ymin = minimum(map(x -> minimum(Iterators.flatten(Iterators.flatten(x))), (tes_xy, tes_yx)))
-estimator_names = ["Zhu1", "Lindner", "KSG1", "Kraskov"]
-ls = [:dash, :dot, :dash, :dot]
-mr = [:rect, :hexagon, :xcross, :pentagon]
-
-fig = Figure(resolution = (800, 350))
-ax_xy = Axis(fig[1,1], xlabel = "Signal length", ylabel = "TE (nats)", title = "x → y")
-ax_yx = Axis(fig[1,2], xlabel = "Signal length", ylabel = "TE (nats)", title = "y → x")
-for (k, e) in enumerate(estimators)
-    label = estimator_names[k]
-    marker = mr[k]
-    scatterlines!(ax_xy, Ls, mean.(tes_xy[k]); label, marker)
-    scatterlines!(ax_yx, Ls, mean.(tes_yx[k]); label, marker)
-    hlines!(ax_xy, [te_true]; xmin = 0.0, xmax = 1.0, linestyle = :dash, color = :black) 
-    hlines!(ax_yx, [te_true]; xmin = 0.0, xmax = 1.0, linestyle = :dash, color = :black)
-    linkaxes!(ax_xy, ax_yx)
-end
-axislegend(ax_xy, position = :rb)
-
-fig
-```
-
-
-### Reproducing Schreiber (2000)
-
-Let's try to reproduce the results from Schreiber's original paper[^Schreiber2000] where
-he introduced the transfer entropy. We'll use the [`ValueHistogram`](@ref) estimator,
-which is visitation frequency based and computes entropies by counting visits of the
-system's orbit to discrete portions of its reconstructed state space.
-
-```@example example_te_schreiber
-using CausalityTools
-using DynamicalSystemsBase
-using CairoMakie
-using Statistics
-using Random; Random.seed!(12234);
-
-function ulam_system(dx, x, p, t)
-    f(x) = 2 - x^2
-    ε = p[1]
-    dx[1] = f(ε*x[length(dx)] + (1-ε)*x[1])
-    for i in 2:length(dx)
-        dx[i] = f(ε*x[i-1] + (1-ε)*x[i])
-    end
-end
-
-ds = DiscreteDynamicalSystem(ulam_system, rand(100) .- 0.5, [0.04])
-first(trajectory(ds, 1000; Ttr = 1000));
-
-εs = 0.02:0.02:1.0
-base = 2
-te_x1x2 = zeros(length(εs)); te_x2x1 = zeros(length(εs))
-# Guess an appropriate bin width of 0.2 for the histogram
-est = ValueHistogram(0.2)
-
-for (i, ε) in enumerate(εs)
-    set_parameter!(ds, 1, ε)
-    tr = first(trajectory(ds, 2000; Ttr = 5000))
-    X1 = tr[:, 1]; X2 = tr[:, 2]
-    @assert !any(isnan, X1)
-    @assert !any(isnan, X2)
-    te_x1x2[i] = transferentropy(TEShannon(; base), est, X1, X2)
-    te_x2x1[i] = transferentropy(TEShannon(; base), est, X2, X1)
-end
-
-fig = with_theme(theme_minimal(), markersize = 2) do
-    fig = Figure()
-    ax = Axis(fig[1, 1], xlabel = "epsilon", ylabel = "Transfer entropy (bits)")
-    scatterlines!(ax, εs, te_x1x2, label = "X1 to X2", color = :black, lw = 1.5)
-    scatterlines!(ax, εs, te_x2x1, label = "X2 to X1", color = :red, lw = 1.5)
-    axislegend(ax, position = :lt)
-    return fig
-end
-fig
-```
-
-As expected, transfer entropy from `X1` to `X2` is higher than from `X2` to `X1` across parameter values for `ε`. But, by our definition of the ulam system, dynamical coupling only occurs from `X1` to `X2`. The results, however, show nonzero transfer entropy in both directions. What does this mean?
-
-Computing transfer entropy from finite time series introduces bias, and so does any particular choice of entropy estimator used to calculate it. To determine whether a transfer entropy estimate should be trusted, we can employ surrogate testing. We'll generate surrogate using
-[TimeseriesSurrogates.jl](https://github.com/JuliaDynamics/TimeseriesSurrogates.jl).
-One possible way to do so is to use a [`SurrogateTest`](@ref) with [`independence`](@ref), but
-here we'll do the surrogate resampling manually, so we can plot and inspect the results.
-
-In the example below, we continue with the same time series generated above. However, at each value of `ε`, we also compute transfer entropy for `nsurr = 50` different randomly shuffled (permuted) versions of the source process. If the original transfer entropy exceeds that of some percentile the transfer entropy estimates of the surrogate ensemble, we will take that as "significant" transfer entropy.
-
-```@example example_te_schreiber
-nsurr = 25 # in real applications, you should use more surrogates
-base = 2
-te_x1x2 = zeros(length(εs)); te_x2x1 = zeros(length(εs))
-te_x1x2_surr = zeros(length(εs), nsurr); te_x2x1_surr = zeros(length(εs), nsurr)
-est = ValueHistogram(0.2) # use same bin-width as before
-
-for (i, ε) in enumerate(εs)
-    set_parameter!(ds, 1, ε)
-    tr = first(trajectory(ds, 500; Ttr = 5000))
-    X1 = tr[:, 1]; X2 = tr[:, 2]
-    @assert !any(isnan, X1)
-    @assert !any(isnan, X2)
-    te_x1x2[i] = transferentropy(TEShannon(; base), est, X1, X2)
-    te_x2x1[i] = transferentropy(TEShannon(; base), est, X2, X1)
-    s1 = surrogenerator(X1, RandomShuffle()); s2 = surrogenerator(X2, RandomShuffle())
-
-    for j = 1:nsurr
-        te_x1x2_surr[i, j] =  transferentropy(TEShannon(; base), est, s1(), X2)
-        te_x2x1_surr[i, j] =  transferentropy(TEShannon(; base), est, s2(), X1)
-    end
-end
-
-# Compute 95th percentiles of the surrogates for each ε
-qs_x1x2 = [quantile(te_x1x2_surr[i, :], 0.95) for i = 1:length(εs)]
-qs_x2x1 = [quantile(te_x2x1_surr[i, :], 0.95) for i = 1:length(εs)]
-
-fig = with_theme(theme_minimal(), markersize = 2) do
-    fig = Figure()
-    ax = Axis(fig[1, 1], xlabel = "epsilon", ylabel = "Transfer entropy (bits)")
-    scatterlines!(ax, εs, te_x1x2, label = "X1 to X2", color = :black, lw = 1.5)
-    scatterlines!(ax, εs, qs_x1x2, color = :black, linestyle = :dot, lw = 1.5)
-    scatterlines!(ax, εs, te_x2x1, label = "X2 to X1", color = :red)
-    scatterlines!(ax, εs, qs_x2x1, color = :red, linestyle = :dot)
-    axislegend(ax, position = :lt)
-    return fig
-end
-fig
-```
-
-The plot above shows the original transfer entropies (solid lines) and the 95th percentile transfer entropies of the surrogate ensembles (dotted lines). As expected, using the surrogate test, the transfer entropies from `X1` to `X2` are mostly significant (solid black line is above dashed black line). The transfer entropies from `X2` to `X1`, on the other hand, are mostly not significant (red solid line is below red dotted line).
-
-[^Schreiber2000]:
-    Schreiber, Thomas. "Measuring information transfer." Physical review letters 85.2
-    (2000): 461.
diff --git a/docs/src/examples/todo_pcrobust.txt b/docs/src/examples/todo_pcrobust.txt
deleted file mode 100644
index ed0fa1c76..000000000
--- a/docs/src/examples/todo_pcrobust.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-
-## [The PB-robust algorithm](@id pc_robust_example)
-
-The PC algorithm is perhaps the most famous algorithm for inferring causal graphs.
-Here, we demonstrate the [`PCRobust`](@ref) variant on some random (uncoupled)
-variables.
-
-```@example causalgraph_corr
-using CausalityTools
-using Random
-using Graphs
-using CairoMakie, GraphMakie
-
-# A function that plots an `n`-variable directed graph.
-function plotgraph(g; labels)
-    with_theme(theme_minimal(), resolution = (400, 350)) do
-        fig = Figure();
-        ax = Axis(fig[1, 1])
-        graphplot!(ax, g; nlabels = labels)
-        hidedecorations!(ax); hidespines!(ax)
-        return fig
-    end
-end
-
-# Some example data.
-rng = MersenneTwister(1234)
-
-# The true graph is X → Y → Z → W
-sys = system(Logistic4Chain(; rng))
-X, Y, Z, W = columns(first(trajectory(sys, 1000, Ttr = 10000)))
-data = [X, Y, Z, W]
-
-# Infer a directed graph using correlation-based independence tests
-pairwise_test = SurrogateTest(MIShannon(), KSG2(k = 10, w = 5))
-conditional_test = SurrogateTest(CMIShannon(), FPVP(k = 10, w = 5)) 
-alg = PCRobust(pairwise_test, conditional_test; α = 0.05)
-g = infer_graph(alg, data)
-```
-
-Let's plot the resulting graph:
-
-```@example causalgraph_corr
-plotgraph(g; labels = ["a$i" for i = 1:5])
-```
\ No newline at end of file
diff --git a/src/methods/crossmappings/ensemble.jl b/docs/src/examples_associations.md
similarity index 100%
rename from src/methods/crossmappings/ensemble.jl
rename to docs/src/examples_associations.md
diff --git a/docs/src/examples/examples_cross_mappings.md b/docs/src/extended_examples/cross_mapping.md
similarity index 59%
rename from docs/src/examples/examples_cross_mappings.md
rename to docs/src/extended_examples/cross_mapping.md
index 52355f4da..5724e65ee 100644
--- a/docs/src/examples/examples_cross_mappings.md
+++ b/docs/src/extended_examples/cross_mapping.md
@@ -1,89 +1,11 @@
-# [Cross mappings](@id examples_crossmappings)
+# [`ConvergentCrossMapping`](@ref)
 
-## [`ConvergentCrossMapping`](@ref)
-
-### [`ConvergentCrossMapping`](@ref) directly
-
-```@example
-using CausalityTools
-x, y = rand(200), rand(100)
-crossmap(CCM(), x, y)
-```
-
-### [`ConvergentCrossMapping`](@ref) with [`RandomVectors`](@ref)
-
-When cross-mapping with the [`RandomVectors`](@ref) estimator, a single random subsample
-of time indices (i.e. not in any particular order) of length `l` is drawn for each library
-size `l`, and cross mapping is performed using the embedding vectors corresponding
-to those time indices.
-
-```@example
-using CausalityTools
-using Random; rng = MersenneTwister(1234)
-x, y = randn(rng, 200), randn(rng, 200)
-
-# We'll draw a single sample at each `l ∈ libsizes`. Sampling with replacement is then
-# necessary, because our 200-pt timeseries will result in embeddings with
-# less than 200 points.
-est = RandomVectors(; libsizes = 50:10:200, replace = true, rng)
-crossmap(CCM(), est, x, y)
-```
-
-To generate a distribution of cross-map estimates for each `l ∈ libsizes`, just call
-crossmap repeatedly, e.g.
-
-```@example
-using CausalityTools
-using Random; rng = MersenneTwister(1234)
-x, y = randn(rng, 200), randn(rng, 200)
-est = RandomVectors(; libsizes = 50:10:200, replace = true, rng)
-ρs = [crossmap(CCM(), est, x, y) for i = 1:80]
-M = hcat(ρs...)
-```
-
-Now, the `k`-th row of `M` contains `80` estimates of the correspondence measure `ρ`
-at library size `libsizes[k]`.
-
-### [`ConvergentCrossMapping`](@ref) with [`RandomSegments`](@ref)
-
-When cross-mapping with the [`RandomSegments`](@ref) estimator, a single random subsample
-of continguous, ordered time indices of length `l` is drawn for each library
-size `l`, and cross mapping is performed using the embedding vectors corresponding
-to those time indices.
-
-```@example
-using CausalityTools
-using Random; rng = MersenneTwister(1234)
-x, y = randn(rng, 200), randn(rng, 200)
-
-# We'll draw a single sample at each `l ∈ libsizes`. We limit the library size to 100, 
-# because drawing segments of the data longer than half the available data doesn't make
-# much sense.
-est = RandomSegment(; libsizes = 50:10:100, rng)
-crossmap(CCM(), est, x, y)
-```
-
-As above, to generate a distribution of cross-map estimates for each `l ∈ libsizes`, just call
-crossmap repeatedly, e.g.
-
-```@example
-using CausalityTools
-using Random; rng = MersenneTwister(1234)
-x, y = randn(rng, 200), randn(rng, 200)
-est = RandomSegment(; libsizes = 50:10:100, rng)
-ρs = [crossmap(CCM(), est, x, y) for i = 1:80]
-M = hcat(ρs...)
-```
-
-Now, the `k`-th row of `M` contains `80` estimates of the correspondence measure `ρ`
-at library size `libsizes[k]`.
-
-### Reproducing Sugihara et al. (2012)
+## [Reproducing Sugihara et al. (2012)](@id example_ConvergentCrossMapping_reproducing_sugihara)
 
 !!! note "Run blocks consecutively"
     If copying these examples and running them locally, make sure the relevant packages (given in the first block) are loaded first.
 
-#### Figure 3A
+### Figure 3A
 
 Let's reproduce figure 3A too, focusing only on [`ConvergentCrossMapping`](@ref) this time. In this figure, they compute the cross mapping for libraries of increasing size, always starting at time index 1. This approach - which we here call the [`ExpandingSegment`](@ref) estimator - is one of many ways of estimating the correspondence between observed and predicted value.
 
@@ -129,13 +51,13 @@ function add_to_fig!(fig_pos, libsizes, ρs_x̂y, ρs_ŷx; title = "", quantile
     axislegend(ax, position = :rb)
 end
 
-function reproduce_figure_3A_naive(measure::CrossmapMeasure)
+function reproduce_figure_3A_naive(definition::CrossmapMeasure)
     sys_bidir = logistic_sugi(; u0 = [0.2, 0.4], rx = 3.7, ry = 3.700001, βxy = 0.02, βyx = 0.32);
     x, y = columns(first(trajectory(sys_bidir, 3100, Ttr = 10000)));
     libsizes = [20:2:50; 55:5:200; 300:50:500; 600:100:900; 1000:500:3000]
-    est = ExpandingSegment(; libsizes);
-    ρs_x̂y = crossmap(measure, est, x, y)
-    ρs_ŷx = crossmap(measure, est, y, x)
+    est = ExpandingSegment(definition; libsizes);
+    ρs_x̂y = crossmap(est, x, y)
+    ρs_ŷx = crossmap(est, y, x)
 
     with_theme(theme_minimal(),
         markersize = 5) do
@@ -153,18 +75,18 @@ Hm. This looks a bit like the paper, but the curve is not smooth. We can do bett
 It is not clear from the paper exactly *what* they plot in their Figure 3A, if they plot an average of some kind, or precisely what parameters and initial conditions they use. However, we can get a smoother plot by using a [`Ensemble`](@ref). Combined with a [`CrossmapEstimator`](@ref), it uses Monte Carlo resampling on subsets of the input data to compute an ensemble of `ρ`s that we here use to compute the median and 90-th percentile range for each library size.
 
 ```@example MAIN_CCM
-function reproduce_figure_3A_ensemble(measure::CrossmapMeasure)
+function reproduce_figure_3A_ensemble(definition::CrossmapMeasure)
     sys_bidir = logistic_sugi(; u0 = [0.4, 0.2], rx = 3.8, ry = 3.5, βxy = 0.02, βyx = 0.1);
-    x, y = columns(first(trajectory(sys_bidir, 10000, Ttr = 10000)));
+    x, y = columns(first(trajectory(sys_bidir, 5000, Ttr = 10000)));
     # Note: our time series are 1000 points long. When embedding, some points are
     # lost, so we must use slightly less points for the segments than 
     # there are points in the original time series.
-    libsizes = [20:5:50; 55:5:200; 300:50:500; 600:100:900; 1000:500:3000]
+    libsizes = [20:5:50; 55:5:200; 300:50:500; 600:100:900; 1000:500:2000]
     # No point in doing more than one rep, because there data are always the same
     # for `ExpandingSegment.`
-    ensemble_ev = Ensemble(measure, ExpandingSegment(; libsizes); nreps = 1)
-    ensemble_rs = Ensemble(measure, RandomSegment(; libsizes); nreps = 30)
-    ensemble_rv = Ensemble(measure, RandomVectors(; libsizes); nreps = 30)
+    ensemble_ev = Ensemble(ExpandingSegment(definition; libsizes); nreps = 1)
+    ensemble_rs = Ensemble(RandomSegment(definition; libsizes); nreps = 30)
+    ensemble_rv = Ensemble(RandomVectors(definition; libsizes); nreps = 30)
     ρs_x̂y_es = crossmap(ensemble_ev, x, y)
     ρs_ŷx_es = crossmap(ensemble_ev, y, x)
     ρs_x̂y_rs = crossmap(ensemble_rs, x, y)
@@ -197,7 +119,7 @@ reproduce_figure_3A_ensemble(ConvergentCrossMapping(d = 3, τ = -1, w = 5))
 There wasn't really that much of a difference, since for the logistic map, the autocorrelation function flips sign for every lag increase. However, for examples from other systems, tuning `w` may be important.
 
 
-#### Figure 3B
+### Figure 3B
 
 What about figure 3B? Here they generate time series of length 400 for a range of values for both coupling parameters, and plot the dominant direction $\Delta = \rho(\hat{x} | y) - \rho(\hat{y} | x)$.
 
@@ -215,7 +137,8 @@ function reproduce_figure_3B()
             sys_bidir = logistic_sugi(; u0 = [0.2, 0.4], rx = 3.7, ry = 3.7, βxy, βyx);
             # Generate 1000 points. Randomly select a 400-pt long segment.
             x, y = columns(first(trajectory(sys_bidir, 400, Ttr = 10000)));
-            ensemble = Ensemble(CCM(d = 3, w = 5, τ = -1), RandomVectors(libsizes = 100), nreps = 50)
+            definition = CCM(d = 3, w = 5, τ = -1)
+            ensemble = Ensemble(RandomVectors(definition; libsizes = 100), nreps = 50)
             ρx̂ys[i, j] = mean(crossmap(ensemble, x, y))
             ρŷxs[i, j] = mean(crossmap(ensemble, y, x))
         end
@@ -239,7 +162,7 @@ end
 reproduce_figure_3B()
 ```
 
-#### Figures 3C and 3D
+### [Figures 3C and 3D](@id example_sugihara_figs3Cand3D)
 
 Let's reproduce figures 3C and 3D in Sugihara et al. (2012)[^Sugihara2012], which
 introduced the [`ConvergentCrossMapping`](@ref) measure.
@@ -264,7 +187,6 @@ using DynamicalSystemsBase
 using StateSpaceSets
 using CairoMakie, Printf
 
-
 # -----------------------------------------------------------------------------------------
 # Create 500-point long time series for Sugihara et al. (2012)'s example for figure 3.
 # -----------------------------------------------------------------------------------------
@@ -313,103 +235,3 @@ with_theme(theme_minimal(),
 end
 ```
 
-## [`PairwiseAsymmetricInference`](@ref)
-
-### Reproducing McCracken & Weigel (2014)
-
-Let's try to reproduce figure 8 from McCracken & Weigel (2014)'s[^McCracken2014]
-paper on [`PairwiseAsymmetricInference`](@ref) (PAI). We'll start by defining the their example B (equations 6-7). This system consists of two
-variables ``X`` and ``Y``, where ``X`` drives ``Y``.
-
-After we have computed the PAI in both directions, we define a measure of directionality as the difference between PAI in the ``X \to Y`` direction and in the ``Y \to X`` direction, so that if ``X`` drives ``Y``, then ``\Delta < 0``.
-
-```@example MAIN_CCM
-using CausalityTools
-using LabelledArrays
-using StaticArrays
-using DynamicalSystemsBase
-using StateSpaceSets
-using CairoMakie, Printf
-using Distributions: Normal
-using Statistics: mean, std
-
-function eom_nonlinear_sindriver(dx, x, p, n)
-    a, b, c, t, Δt = (p...,)
-    x, y = x[1], x[2]
-    𝒩 = Normal(0, 1)
-    
-    dx[1] = sin(t)
-    dx[2] = a*x * (1 - b*x) + c* rand(𝒩)
-    p[end-1] += 1 # update t
-
-    return
-end
-
-function nonlinear_sindriver(;u₀ = rand(2), a = 1.0, b = 1.0, c = 2.0, Δt = 1)
-    DiscreteDynamicalSystem(eom_nonlinear_sindriver, u₀, [a, b, c, 0, Δt])
-end
-
-function reproduce_figure_8_mccraken(; 
-        c = 2.0, Δt = 0.2,
-        as = 0.25:0.25:5.0,
-        bs = 0.25:0.25:5.0)
-    # -----------------------------------------------------------------------------------------
-    # Generate many time series for many different values of the parameters `a` and `b`,
-    # and compute PAI. This will replicate the upper right panel of 
-    # figure 8 in McCracken & Weigel (2014).
-    # -----------------------------------------------------------------------------------------
-    
-    measure = PairwiseAsymmetricInference(d = 3)
-
-    # Manually resample `nreps` length-`L` time series and use mean ρ(x̂|X̄y) - ρ(ŷ|Ȳx)
-    # for each parameter combination.
-    nreps = 50
-    L = 300 # length of timeseries
-    Δ = zeros(length(as), length(bs))
-    for (i, a) in enumerate(as)
-        for (j, b) in enumerate(bs)
-            s = nonlinear_sindriver(; a, b, c,  Δt)
-            x, y = columns(first(trajectory(s, 1000, Ttr = 10000)))
-            Δreps = zeros(nreps)
-            for i = 1:nreps
-                # Ensure we're subsampling at the same time indices. 
-                ind_start = rand(1:(1000-L))
-                r = ind_start:(ind_start + L)
-                Δreps[i] = @views crossmap(measure, y[r], x[r]) - 
-                    crossmap(measure, x[r], y[r])
-            end
-            Δ[i, j] = mean(Δreps)
-        end
-    end
-
-    # -----------------------------------------------------------------------------------------
-    # An example time series for plotting.
-    # -----------------------------------------------------------------------------------------
-    sys = nonlinear_sindriver(; a = 1.0, b = 1.0, c, Δt)
-    npts = 500
-    orbit = first(trajectory(sys, npts, Ttr = 10000))
-    x, y = columns(orbit)
-    with_theme(theme_minimal(),
-        markersize = 5) do
-        
-        X = x[1:300]
-        Y = y[1:300]
-        fig = Figure();
-        ax_ts = Axis(fig[1, 1:2], xlabel = "Time (t)", ylabel = "Value")
-        scatterlines!(ax_ts, (X .- mean(X)) ./ std(X), label = "x")
-        scatterlines!(ax_ts, (Y .- mean(Y)) ./ std(Y), label = "y")
-        axislegend()
-
-        ax_hm = Axis(fig[2, 1:2], xlabel = "a", ylabel = "b")
-        ax_hm.yticks = (1:length(as), string.([i % 2 == 0 ? as[i] : "" for i = 1:length(as)]))
-        ax_hm.xticks = (1:length(bs), string.([i % 2 == 0 ? bs[i] : "" for i = 1:length(bs)]))
-        hm = heatmap!(ax_hm, Δ,  colormap = :viridis)
-        Colorbar(fig[2, 3], hm; label = "Δ' = ρ(ŷ | yx) - ρ(x̂ | xy)")
-        fig
-    end
-end
-
-reproduce_figure_8_mccraken()
-```
-
-As expected, ``\Delta < 0`` for all parameter combinations, implying that ``X`` "PAI drives" ``Y``.
diff --git a/docs/src/extended_examples/mutual_information.md b/docs/src/extended_examples/mutual_information.md
new file mode 100644
index 000000000..3af847311
--- /dev/null
+++ b/docs/src/extended_examples/mutual_information.md
@@ -0,0 +1,362 @@
+# [`MIShannon`](@ref)
+
+## Reproducing Kraskov et al. (2004)
+
+Here, we'll reproduce Figure 4 from Kraskov et al. (2004)'s seminal paper on the nearest-neighbor based mutual information estimator. We'll estimate the mutual information
+between marginals of a bivariate Gaussian for a fixed time series length of 1000,
+varying the number of neighbors. *Note: in the original paper, they show multiple
+curves corresponding to different time series length. We only show two single curves:
+one for the [`KSG1`](@ref) estimator and one for the [`KSG2`](@ref) estimator*.
+
+```@example ex_mutualinfo
+using CausalityTools
+using LinearAlgebra: det
+using Distributions: MvNormal
+using StateSpaceSets: StateSpaceSet
+using CairoMakie
+using Statistics
+
+N = 800
+c = 0.9
+Σ = [1 c; c 1]
+N2 = MvNormal([0, 0], Σ)
+mitrue = -0.5*log(det(Σ)) # in nats
+ks = [2; 5; 7; 10:10:70] .* 2
+
+nreps = 10 # plot average over 10 independent realizations
+mis_ksg1 = zeros(nreps, length(ks))
+mis_ksg2 = zeros(nreps, length(ks))
+for i = 1:nreps
+    D2 = StateSpaceSet([rand(N2) for i = 1:N])
+    X = D2[:, 1] |> StateSpaceSet
+    Y = D2[:, 2] |> StateSpaceSet
+    for (j, k) in enumerate(ks)
+        est1 = KSG1(MIShannon(; base = ℯ); k)
+        est2 = KSG2(MIShannon(; base = ℯ); k)
+        mis_ksg1[i, j] = association(est1, X, Y)
+        mis_ksg2[i, j] = association(est2, X, Y)
+    end
+end
+fig = Figure()
+ax = Axis(fig[1, 1], xlabel = "k / N", ylabel = "Mutual infomation (nats)")
+scatterlines!(ax, ks ./ N, mean(mis_ksg1, dims = 1) |> vec, label = "KSG1")
+scatterlines!(ax, ks ./ N, mean(mis_ksg2, dims = 1) |> vec, label = "KSG2")
+hlines!(ax, [mitrue], color = :black, linewidth = 3, label = "I (true)")
+axislegend()
+fig
+```
+
+## [`MutualInformationEstimator`](@ref) comparison
+
+Most estimators suffer from significant bias when applied to discrete, finite data. One possible resolution is to add a small amount of noise to discrete variables, so that the data becomes continuous in practice.
+
+But instead of adding noise to your data, you can also consider using an
+estimator that is specifically designed to deal with continuous-discrete mixture data. 
+One example is the [`GaoKannanOhViswanath`](@ref) estimator. Below, we compare its
+performance to [`KSG1`](@ref) on uniformly distributed discrete multivariate data.
+The true mutual information is zero. While the "naive" [`KSG1`](@ref) estimator 
+diverges from the true value for these data, the [`GaoKannanOhViswanath`](@ref)
+converges to the true value.
+
+```@example ex_mutualinfo
+using CausalityTools
+using Statistics
+using StateSpaceSets: StateSpaceSet
+using Statistics: mean
+using CairoMakie
+
+function compare_ksg_gkov(;
+        k = 5,
+        base = 2,
+        nreps = 10,
+        Ls = [500:100:1000; 1500; 2500; 5000; 7000])
+
+
+    mis_ksg1_mix = zeros(nreps, length(Ls))
+    mis_ksg1_discrete = zeros(nreps, length(Ls))
+    mis_ksg1_cont = zeros(nreps, length(Ls))
+    mis_gkov_mix = zeros(nreps, length(Ls))
+    mis_gkov_discrete = zeros(nreps, length(Ls))
+    mis_gkov_cont = zeros(nreps, length(Ls))
+
+    for (j, L) in enumerate(Ls)
+        for i = 1:nreps
+            X = StateSpaceSet(float.(rand(1:8, L, 2)))
+            Y = StateSpaceSet(float.(rand(1:8, L, 2)))
+            Z = StateSpaceSet(rand(L, 2))
+            W = StateSpaceSet(rand(L, 2))
+            est_gkov = GaoKannanOhViswanath(MIShannon(; base = ℯ); k)
+            est_ksg1 = KSG1(MIShannon(; base = ℯ); k)
+            mis_ksg1_discrete[i, j] = association(est_ksg1, X, Y)
+            mis_gkov_discrete[i, j] = association(est_gkov, X, Y)
+            mis_ksg1_mix[i, j] = association(est_ksg1, X, Z)
+            mis_gkov_mix[i, j] = association(est_gkov, X, Z)
+            mis_ksg1_cont[i, j] = association(est_ksg1, Z, W)
+            mis_gkov_cont[i, j] = association(est_gkov, Z, W)
+        end
+    end
+    return mis_ksg1_mix, mis_ksg1_discrete, mis_ksg1_cont,
+        mis_gkov_mix, mis_gkov_discrete, mis_gkov_cont
+end
+
+fig = Figure()
+ax = Axis(fig[1, 1], 
+    xlabel = "Sample size", 
+    ylabel = "Mutual information (bits)")
+Ls = [100; 200; 500; 1000; 2500; 5000; 7000]
+nreps = 5
+k = 3
+mis_ksg1_mix, mis_ksg1_discrete, mis_ksg1_cont,
+    mis_gkov_mix, mis_gkov_discrete, mis_gkov_cont = 
+    compare_ksg_gkov(; nreps, k, Ls)
+
+scatterlines!(ax, Ls, mean(mis_ksg1_mix, dims = 1) |> vec, 
+    label = "KSG1 (mixed)", color = :black, 
+    marker = :utriangle)
+scatterlines!(ax, Ls, mean(mis_ksg1_discrete, dims = 1) |> vec, 
+    label = "KSG1 (discrete)", color = :black, 
+    linestyle = :dash, marker = '▲')
+scatterlines!(ax, Ls, mean(mis_ksg1_cont, dims = 1) |> vec, 
+    label = "KSG1 (continuous)", color = :black, 
+    linestyle = :dot, marker = '●')
+scatterlines!(ax, Ls, mean(mis_gkov_mix, dims = 1) |> vec, 
+    label = "GaoKannanOhViswanath (mixed)", color = :red, 
+    marker = :utriangle)
+scatterlines!(ax, Ls, mean(mis_gkov_discrete, dims = 1) |> vec, 
+    label = "GaoKannanOhViswanath (discrete)", color = :red, 
+    linestyle = :dash, marker = '▲')
+scatterlines!(ax, Ls, mean(mis_gkov_cont, dims = 1) |> vec, 
+    label = "GaoKannanOhViswanath (continuous)", color = :red, 
+    linestyle = :dot, marker = '●')
+axislegend(position = :rb)
+fig
+```
+
+## Estimation using [`DifferentialEntropyEstimator`](@ref)s: a comparison
+
+Let's compare the performance of a subset of the implemented mutual information estimators. We'll use example data from Lord et al., where the analytical mutual information is known.
+
+```@example ex_mutualinfo
+using CausalityTools
+using LinearAlgebra: det
+using StateSpaceSets: StateSpaceSet
+using Distributions: MvNormal
+using LaTeXStrings
+using CairoMakie
+
+# adapted from https://juliadatascience.io/makie_colors
+function new_cycle_theme()
+    # https://nanx.me/ggsci/reference/pal_locuszoom.html
+    my_colors = ["#D43F3AFF", "#EEA236FF", "#5CB85CFF", "#46B8DAFF",
+        "#357EBDFF", "#9632B8FF", "#B8B8B8FF"]
+    cycle = Cycle([:color, :linestyle, :marker], covary=true) # alltogether
+    my_markers = [:circle, :rect, :utriangle, :dtriangle, :diamond,
+        :pentagon, :cross, :xcross]
+    my_linestyle = [nothing, :dash, :dot, :dashdot, :dashdotdot]
+    return Theme(
+        fontsize = 22, font="CMU Serif",
+        colormap = :linear_bmy_10_95_c78_n256,
+        palette = (
+            color = my_colors, 
+            marker = my_markers, 
+            linestyle = my_linestyle,
+        ),
+        Axis = (
+            backgroundcolor= (:white, 0.2), 
+            xgridstyle = :dash, 
+            ygridstyle = :dash
+        ),
+        Lines = (
+            cycle= cycle,
+        ), 
+        ScatterLines = (
+            cycle = cycle,
+        ),
+        Scatter = (
+            cycle = cycle,
+        ),
+        Legend = (
+            bgcolor = (:grey, 0.05), 
+            framecolor = (:white, 0.2),
+            labelsize = 13,
+        )
+    )
+end
+
+run(est; f::Function, # function that generates data
+        base::Real = ℯ, 
+        nreps::Int = 10, 
+        αs = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1], 
+        n::Int = 1000) =
+    map(α -> association(est, f(α, n)...), αs)
+
+function compute_results(f::Function; estimators, k = 5, k_lord = 20,
+        n = 1000, base = ℯ, nreps = 10,
+        as = 7:-1:0,
+        αs = [1/10^(a) for a in as])
+    
+    is = [zeros(length(αs)) for est in estimators]
+    for (k, est) in enumerate(estimators)
+        tmp = zeros(length(αs))
+        for i = 1:nreps
+            tmp .+= run(est; f = f, αs, base, n)
+        end
+        is[k] .= tmp ./ nreps
+    end
+
+    return is
+end
+
+function plot_results(f::Function, ftrue::Function; 
+        base, estimators, k_lord, k, 
+        as = 7:-1:0, αs = [1/10^(a) for a in as], kwargs...
+    )
+    is = compute_results(f; 
+        base, estimators, k_lord, k, as, αs, kwargs...)
+    itrue = [ftrue(α; base) for α in αs]
+
+    xmin, xmax = minimum(αs), maximum(αs)
+    
+    ymin = floor(Int, min(minimum(itrue), minimum(Iterators.flatten(is))))
+    ymax = ceil(Int, max(maximum(itrue), maximum(Iterators.flatten(is))))
+    f = Figure()
+    ax = Axis(f[1, 1],
+        xlabel = "α", ylabel = "I (nats)",
+        xscale = log10, aspect = 1,
+        xticks = (αs, [latexstring("10^{$(-a)}") for a in as]),
+        yticks = (ymin:ymax)
+        )
+    xlims!(ax, (1/10^first(as), 1/10^last(as)))
+    ylims!(ax, (ymin, ymax))
+    lines!(ax, αs, itrue, 
+        label = "I (true)", linewidth = 4, color = :black)
+    for (i, est) in enumerate(estimators)
+        if est isa EntropyDecomposition
+            es = typeof(est.est).name.name |> String
+        else
+            es = typeof(est).name.name |> String
+        end
+        @show es
+        lbl = occursin("Lord", es) ? "$es (k = $k_lord)" : "$es (k = $k)"
+        scatter!(ax, αs, is[i], label = lbl)
+        lines!(ax, αs, is[i])
+
+    end
+    axislegend()
+    return f
+end
+
+set_theme!(new_cycle_theme())
+k_lord = 20
+k = 5
+base = ℯ
+
+def = MIShannon(base = ℯ)
+estimators = [
+    EntropyDecomposition(def, Kraskov(; k)),
+    EntropyDecomposition(def, KozachenkoLeonenko()),
+    EntropyDecomposition(def, Zhu(; k)),
+    EntropyDecomposition(def, ZhuSingh(; k)),
+    EntropyDecomposition(def, Gao(; k)),
+    EntropyDecomposition(def, Lord(; k = k_lord)),
+    EntropyDecomposition(def, LeonenkoProzantoSavani(Shannon(); k)),
+    KSG1(def; k),
+    KSG2(def; k),
+    GaoOhViswanath(def; k),
+    GaoKannanOhViswanath(def; k),
+    GaussianMI(def),
+];
+```
+
+### Example system: family 1
+
+In this system, samples are concentrated around the diagonal $X = Y$,
+and the strip of samples gets thinner as $\alpha \to 0$.
+
+```@example ex_mutualinfo
+function family1(α, n::Int)
+    x = rand(n)
+    v = rand(n)
+    y = x + α * v
+    return StateSpaceSet(x), StateSpaceSet(y)
+end
+
+# True mutual information values for these data
+function ifamily1(α; base = ℯ)
+    mi = -log(α) - α - log(2)
+    return mi / log(base, ℯ)
+end
+
+fig = plot_results(family1, ifamily1; 
+    k_lord = k_lord, k = k, nreps = 10, n = 800,
+    estimators = estimators,
+    base = base)
+```
+
+### Example system: family 2
+
+```@example ex_mutualinfo
+function family2(α, n::Int)
+    Σ = [1 α; α 1]
+    N2 = MvNormal(zeros(2), Σ)
+    D2 = StateSpaceSet([rand(N2) for i = 1:n])
+    X = StateSpaceSet(D2[:, 1])
+    Y = StateSpaceSet(D2[:, 2])
+    return X, Y
+end
+
+function ifamily2(α; base = ℯ)
+    return (-0.5 * log(1 - α^2)) / log(ℯ, base)
+end
+
+αs = 0.05:0.05:0.95
+estimators = estimators
+with_theme(new_cycle_theme()) do
+    f = Figure();
+    ax = Axis(f[1, 1], xlabel = "α", ylabel = "I (nats)")
+    is_true = map(α -> ifamily2(α), αs)
+    is_est = map(est -> run(est; f = family2, αs, nreps = 20), estimators)
+    lines!(ax, αs, is_true, 
+        label = "I (true)", color = :black, linewidth = 3)
+    for (i, est) in enumerate(estimators)
+        if est isa EntropyDecomposition
+            estname = typeof(est.est).name.name |> String
+        else
+            estname = typeof(est).name.name |> String
+        end
+        scatterlines!(ax, αs, is_est[i], label = estname)
+    end
+    axislegend(position = :lt)
+    return f
+end
+```
+
+### Example system: family 3
+
+In this system, we draw samples from a 4D Gaussian distribution distributed
+as specified in the `ifamily3` function below. We let $X$ be the two first
+variables, and $Y$ be the two last variables.
+
+```@example ex_mutualinfo
+function ifamily3(α; base = ℯ)
+    Σ = [7 -5 -1 -3; -5 5 -1 3; -1 -1 3 -1; -3 3 -1 2+α]
+    Σx = Σ[1:2, 1:2]; Σy = Σ[3:4, 3:4]
+    mi = 0.5*log(det(Σx) * det(Σy) / det(Σ))
+    return mi / log(ℯ, base)
+end
+
+function family3(α, n::Int)
+    Σ = [7 -5 -1 -3; -5 5 -1 3; -1 -1 3 -1; -3 3 -1 2+α]
+    N4 = MvNormal(zeros(4), Σ)
+    D4 = StateSpaceSet([rand(N4) for i = 1:n])
+    X = D4[:, 1:2]
+    Y = D4[:, 3:4]
+    return X, Y
+end
+
+fig = plot_results(family3, ifamily3; 
+    k_lord = k_lord, k = k, nreps = 5, n = 800,
+    estimators = estimators, base = base)
+```
+
+We see that the [`Lord`](@ref) estimator, which estimates local volume elements using a singular-value decomposition (SVD) of local neighborhoods, outperforms the other estimators by a large margin.
diff --git a/docs/src/extended_examples/pairwise_asymmetric_inference.md b/docs/src/extended_examples/pairwise_asymmetric_inference.md
new file mode 100644
index 000000000..8c06c62da
--- /dev/null
+++ b/docs/src/extended_examples/pairwise_asymmetric_inference.md
@@ -0,0 +1,103 @@
+# [`PairwiseAsymmetricInference`](@ref)
+
+## [Reproducing McCracken & Weigel (2014)](@id example_PairwiseAsymmetricInference_reproduce_mccracken)
+
+Let's try to reproduce figure 8 from [McCracken2014](@citet)'s
+paper on [`PairwiseAsymmetricInference`](@ref) (PAI). We'll start by defining the their example B (equations 6-7). This system consists of two
+variables ``X`` and ``Y``, where ``X`` drives ``Y``.
+
+After we have computed the PAI in both directions, we define a measure of directionality as the difference between PAI in the ``X \to Y`` direction and in the ``Y \to X`` direction, so that if ``X`` drives ``Y``, then ``\Delta < 0``.
+
+```@example MAIN_CCM
+using CausalityTools
+using LabelledArrays
+using StaticArrays
+using DynamicalSystemsBase
+using StateSpaceSets
+using CairoMakie, Printf
+using Distributions: Normal
+using Statistics: mean, std
+
+function eom_nonlinear_sindriver(dx, x, p, n)
+    a, b, c, t, Δt = (p...,)
+    x, y = x[1], x[2]
+    𝒩 = Normal(0, 1)
+    
+    dx[1] = sin(t)
+    dx[2] = a*x * (1 - b*x) + c* rand(𝒩)
+    p[end-1] += 1 # update t
+
+    return
+end
+
+function nonlinear_sindriver(;u₀ = rand(2), a = 1.0, b = 1.0, c = 2.0, Δt = 1)
+    DiscreteDynamicalSystem(eom_nonlinear_sindriver, u₀, [a, b, c, 0, Δt])
+end
+
+function reproduce_figure_8_mccraken(; 
+        c = 2.0, Δt = 0.2,
+        as = 0.5:0.5:5.0,
+        bs = 0.5:0.5:5.0)
+    # -----------------------------------------------------------------------------------------
+    # Generate many time series for many different values of the parameters `a` and `b`,
+    # and compute PAI. This will replicate the upper right panel of 
+    # figure 8 in McCracken & Weigel (2014).
+    # -----------------------------------------------------------------------------------------
+    
+    measure = PairwiseAsymmetricInference(d = 3)
+
+    # Manually resample `nreps` length-`L` time series and use mean ρ(x̂|X̄y) - ρ(ŷ|Ȳx)
+    # for each parameter combination.
+    nreps = 50
+    L = 200 # length of timeseries
+    Δ = zeros(length(as), length(bs))
+    for (i, a) in enumerate(as)
+        for (j, b) in enumerate(bs)
+            s = nonlinear_sindriver(; a, b, c,  Δt)
+            x, y = columns(first(trajectory(s, 1000, Ttr = 10000)))
+            Δreps = zeros(nreps)
+            for i = 1:nreps
+                # Ensure we're subsampling at the same time indices. 
+                ind_start = rand(1:(1000-L))
+                r = ind_start:(ind_start + L)
+                Δreps[i] = @views crossmap(measure, y[r], x[r]) - 
+                    crossmap(measure, x[r], y[r])
+            end
+            Δ[i, j] = mean(Δreps)
+        end
+    end
+
+    # -----------------------------------------------------------------------------------------
+    # An example time series for plotting.
+    # -----------------------------------------------------------------------------------------
+    sys = nonlinear_sindriver(; a = 1.0, b = 1.0, c, Δt)
+    npts = 500
+    orbit = first(trajectory(sys, npts, Ttr = 10000))
+    x, y = columns(orbit)
+    with_theme(theme_minimal(),
+        markersize = 5) do
+        
+        X = x[1:300]
+        Y = y[1:300]
+        fig = Figure();
+        ax_ts = Axis(fig[1, 1:2], xlabel = "Time (t)", ylabel = "Value")
+        scatterlines!(ax_ts, (X .- mean(X)) ./ std(X), label = "x")
+        scatterlines!(ax_ts, (Y .- mean(Y)) ./ std(Y), label = "y")
+        axislegend()
+
+        ax_hm = Axis(fig[2, 1:2], xlabel = "a", ylabel = "b")
+        ax_hm.yticks = (1:length(as), string.([i % 2 == 0 ? as[i] : "" for i = 1:length(as)]))
+        ax_hm.xticks = (1:length(bs), string.([i % 2 == 0 ? bs[i] : "" for i = 1:length(bs)]))
+        hm = heatmap!(ax_hm, Δ,  colormap = :viridis)
+        Colorbar(fig[2, 3], hm; label = "Δ' = ρ(ŷ | yx) - ρ(x̂ | xy)")
+        fig
+    end
+end
+
+reproduce_figure_8_mccraken()
+```
+
+We haven't used as many parameter combinations as [McCracken2014](@citet) did, 
+but we get a figure that looks roughly similar to theirs.
+
+As expected, ``\Delta < 0`` for all parameter combinations, implying that ``X`` "PAI drives" ``Y``.
diff --git a/docs/src/independence.md b/docs/src/independence.md
index 04a756b53..7010a1336 100644
--- a/docs/src/independence.md
+++ b/docs/src/independence.md
@@ -1,6 +1,13 @@
 
 # [Independence testing](@id independence_testing)
 
+For practical applications, it is often useful to determine whether variables are independent, possible conditioned upon 
+another set of variables. One way of doing so is to utilize an 
+association measure, and perform some sort of randomization-based
+[independence testing](@ref independence_testing).
+
+For example, to test the dependence between time series, [time series surrogates testing](https://github.com/JuliaDynamics/TimeseriesSurrogates.jl) is used. Many other frameworks for independence exist too. Here, we've collected some independence testing frameworks, and made sure that they are compatible with as many of the implemented association measures as possible.
+
 ## Independence testing API
 
 The independence test API is defined by
@@ -13,11 +20,11 @@ independence
 IndependenceTest
 ```
 
-## [`SurrogateTest`](@ref)
+## [`SurrogateAssociationTest`](@ref)
 
 ```@docs
-SurrogateTest
-SurrogateTestResult
+SurrogateAssociationTest
+SurrogateAssociationTestResult
 ```
 
 ## [`LocalPermutationTest`](@ref)
diff --git a/docs/src/index.md b/docs/src/index.md
index 4ce8d55ee..da19ad4ab 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1,35 +1,37 @@
+# CausalityTools.jl
+
 ![CausalityTools.jl static logo](assets/logo-large.png)
 
 ```@docs
 CausalityTools
 ```
 
-## Goals
+## Latest news
+
+CausalityTools.jl has been updated to v3!
+
+This update includes a number of breaking changes, several of which are *not* backwards compatible.
+These are done to ensure compatibility with 
+[ComplexityMeasures.jl v3](https://juliadynamics.github.io/DynamicalSystemsDocs.jl/complexitymeasures/stable/), which provides discretization functionality that we use here.
+
+Important changes are:
+- Convenience methods have been removed completely. Use [`association`](@ref) instead.
+- Example systems have been removed.
+- The syntax for computing an association has changed. Estimators now always *contain the definition it estimates*. For example, `association(MIShannon(), KSG1(), x, y)` is now `association(KSG1(MIShannon()), x, y)`. 
+- See the CHANGELOG.md for a complete list of changes.
 
-Causal inference, and quantification of association in general, is fundamental to
-most scientific disciplines. There exists a multitude of bivariate and multivariate
-association measures in the scientific literature. However, beyond the most basic measures,
-most methods aren't readily available for practical use. Most scientific papers don't
-provide code, which makes reproducing them difficult or impossible, without
-investing significant time and resources into deciphering and understanding the original
-papers to the point where an implementation is possible. To make reliable inferences,
-proper independence tests are also crucial.
+## Documentation content 
 
-Our main goal with this package is to provide an easily extendible library of
-association measures, a as-complete-as-possible set of their estimators.
-We also want to lower the entry-point to the field of association
-quantification, independence testing and causal inference, by providing well-documented
-implementations of literature methods with runnable code examples.
+- [Association measures](@ref) lists all implemented association measures and their estimators.
+- [Independence testing](@ref independence_testing) lists all implemented ways of determining if an association between datasets is "significant".
+- [Causal inference](@ref causal_graphs) lists all methods of inferring association networks
+  (also called "network graphs" and "causal graphs") between multiple variables.
+- The [Examples](@ref) page contains numerous examples for association measure estimation, independence testing and network inference.
 
-The core function for quantifying associations is [`independence`](@ref),
-which performs either a parametric or nonparametric (conditional)
-[`IndependenceTest`](@ref) using some form of
-[association measure](@ref association_measure). These tests, in turn, can be
-used with some [`GraphAlgorithm`](@ref) and [`infer_graph`](@ref) to infer causal graphs.
 
-## Input data
+## Input data for CausalityTools.jl
 
-Input data for CausalityTools are given as:
+Input data for CausalityTools.jl are given as:
 
 - Univariate *timeseries*, which are given as standard Julia `Vector`s.
 - Multivariate timeseries, *StateSpaceSets*, or *state space sets*, which are given as
@@ -44,13 +46,6 @@ Input data for CausalityTools are given as:
 StateSpaceSet
 ```
 
-## Pull requests and issues
-
-This package has been and is under heavy development. Don't hesitate to submit an
-issue if you find something that doesn't work or doesn't make sense, or if there's
-some functionality that you're missing.
-Pull requests are also very welcome!
-
 ## Maintainers and contributors
 
 The CausalityTools.jl software is maintained by
@@ -77,9 +72,3 @@ important contributions are:
 Many individuals has contributed code to other packages
 in the [JuliaDynamics](https://juliadynamics.github.io/JuliaDynamics/) ecosystem which
 we use here. Contributors are listed in the respective GitHub repos and webpages.
-
-## Related packages
-
-- [TransferEntropy.jl](https://github.com/JuliaDynamics/TransferEntropy.jl) previously
-    provided mutual infromation and transfer entropy estimators. These have been
-    re-implemented from scratch and moved here.
diff --git a/docs/src/info_tutorial.md b/docs/src/info_tutorial.md
new file mode 100644
index 000000000..88e79e54d
--- /dev/null
+++ b/docs/src/info_tutorial.md
@@ -0,0 +1,141 @@
+# [Information measure tutorial](@id info_tutorial)
+
+CausalityTools.jl extends the single-variate information API in
+[ComplexityMeasures.jl](https://github.com/JuliaDynamics/ComplexityMeasures.jl)
+to information measures of multiple variables. 
+
+## Definitions
+
+We define **"information measure"** as some functional of probability 
+mass functions or probability densities. This definition may or may not agree with
+literature usage, depending on the context. We made this choice pragmatically based on
+user-friendlyness and coding-friendlyness, but still trying to maintain some
+level of meaningful terminology.
+
+## Basic strategy
+
+To *estimate* a multivariate information measure in practice, you must first specify
+the [definition](@ref) of the measure, which is then used as input to an 
+[estimator](@ref). This estimator is then given to [`information`](@ref), or one 
+of the [convenience methods](@ref convenience_info).
+
+!!! note "Naming convention: The same name for different things"
+    Upon doing a literature review on the possible variants of information theoretic measures,
+    it become painstakingly obvious that authors use *the same name for different concepts*.
+    For novices, and experienced practitioners too, this can be confusing.
+    Our API clearly distinguishes between methods that are conceptually the same but named
+    differently in the literature due to differing *estimation* strategies, from methods
+    that actually have different definitions.
+
+    - Multiple, equivalent definitions occur for example for the Shannon mutual
+        information (MI; [`MIShannon`](@ref)), which has both a discrete and continuous version, and there there are multiple equivalent mathematical formulas for them: a direct sum/integral
+        over a joint probability mass function (pmf), as a sum of three entropy terms, and as
+        a Kullback-Leibler divergence between the joint pmf and the product of the marginal
+        distributions. Since these definitions are all equivalent, we only need once type
+        ([`MIShannon`](@ref)) to represent them.
+    - But Shannon MI is not the  only type of mutual information! For example, "Tsallis mutual information"
+        has been proposed in different variants by various authors. Despite sharing the
+        same name, these are actually *nonequivalent definitions*. We've thus assigned
+        them entirely different measure names (e.g. [`MITsallisFuruichi`](@ref) and
+        [`MITsallisMartin`](@ref)), with the author name at the end.
+
+
+## Distances/divergences
+
+There are many information measures in the literature that aim to quantify the 
+distance/divergence between two probability mass functions (pmf) or densities. You can 
+find those that we implement [here](@ref divergences_and_distances).
+
+As an example, let's quantify the [`KLDivergence`](@ref) between two probability 
+mass functions estimated by symbolizing two input vectors `x` and `y` using 
+[`OrdinalPatterns`](@ref). Since the discrete [`KLDivergence`](@ref) can be 
+expressed as a function of a joint pmf, we can use the [`JointProbabilities`](@ref)
+estimator.
+
+```@example INFO_TUTORIAL
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x, y = rand(rng, 1000), rand(rng, 1000)
+est = JointProbabilities(KLDivergence(), OrdinalPatterns(m=2))
+information(est, x, y) # should be close to 0
+```
+
+Divergences are examples of *asymmetric* information measures, which we can see by 
+flipping the order of the input data.
+
+```@example INFO_TUTORIAL
+information(est, y, x)
+```
+
+## Conditional entropies
+
+[Conditional entropies](@ref conditional_entropies) are another example of asymmetric
+information measures. They all have in common that 
+they are functions of a joint pmf, and can therefore also be estimated using the
+[`JointProbabilities`](@ref) estimator. This time, we'll use a rectangular binning
+with 3 bins along each dimension to discretize the data.
+
+```@example INFO_TUTORIAL
+x, y = randn(rng, 1000), randn(rng, 1000)
+est = JointProbabilities(ConditionalEntropyShannon(base = 2), ValueBinning(3))
+information(est, x, y)
+```
+
+## Joint entropies
+
+[Joint entropies](@ref joint_entropies), on the other hand, are *symmetric*. Joint
+entropies are functionals of a joint pmf, so we can still use the
+[`JointProbabilities`](@ref) estimator. This time, we use a [`Dispersion`](@ref)
+based discretization.
+
+```@example INFO_TUTORIAL
+x, y = randn(rng, 1000), randn(rng, 1000)
+est = JointProbabilities(JointEntropyShannon(base = 2), Dispersion())
+information(est, x, y) == information(est, y, x) # should be true
+```
+
+## Mutual informations
+
+Mutual informations, in particular [`MIShannon`](@ref) is an often-used symmetric 
+measure for quantifing the (possibly nonlinear) association between variables. It appears
+in both  discrete and differential form, and can be estimated in a multitude of ways. For 
+example, one can use dedicated [`MutualInformationEstimator`](@ref)s such as 
+[`KSG2`](@ref) or [`GaussianMI`](@ref):
+
+```@example INFO_TUTORIAL
+x, y = randn(rng, 1000), randn(rng, 1000)
+est = KSG1(MIShannon(base = 2), k = 10)
+information(est, x, y)
+```
+
+The result should be symmetric:
+
+```@example INFO_TUTORIAl
+information(est, x, y) == information(est, y, x) # should be true
+```
+
+One can also estimate mutual information using the [`EntropyDecomposition`](@ref) 
+estimator, or (like above) using the [`JointProbabilities`](@ref) estimator.
+Let's construct a differential entropy based estimator based on the [`Kraskov`](@ref)
+estimator.
+
+```@example INFO_TUTORIAL
+est_diff = EntropyDecomposition(MIShannon(base = 2), Kraskov(Shannon(), k=10))
+```
+
+We can also construct a discrete entropy based estimator based on e.g. [`PlugIn`](@ref)
+estimator of [`Shannon`](@ref) entropy.
+
+```@example INFO_TUTORIAL
+est_disc = EntropyDecomposition(MIShannon(base = 2), PlugIn(Shannon()), ValueBinning(2))
+```
+
+These estimators use different estimation methods, so give different results:
+
+```@example INFO_TUTORIAL
+information(est_diff, x, y)
+```
+
+```@example INFO_TUTORIAL
+information(est_disc, x, y)
+```
diff --git a/docs/src/measures.md b/docs/src/measures.md
deleted file mode 100644
index bb5ad230e..000000000
--- a/docs/src/measures.md
+++ /dev/null
@@ -1,193 +0,0 @@
-# [Association measures](@id association_measure)
-
-```@docs
-AssociationMeasure
-```
-
-## Overview
-
-| Type                    | Measure                               | Pairwise | Conditional | Function version               |
-| ----------------------- | ------------------------------------- | :------: | :---------: | ------------------------------ |
-| Correlation             | [`PearsonCorrelation`](@ref)          |    ✓    |     ✖      | [`pearson_correlation`](@ref)  |
-| Correlation             | [`DistanceCorrelation`](@ref)         |    ✓    |     ✓      | [`distance_correlation`](@ref) |
-| Closeness               | [`SMeasure`](@ref)                    |    ✓    |     ✖      | [`s_measure`](@ref)            |
-| Closeness               | [`HMeasure`](@ref)                    |    ✓    |     ✖      | [`h_measure`](@ref)            |
-| Closeness               | [`MMeasure`](@ref)                    |    ✓    |     ✖      | [`m_measure`](@ref)            |
-| Closeness (ranks)       | [`LMeasure`](@ref)                    |    ✓    |     ✖      | [`l_measure`](@ref)            |
-| Closeness               | [`JointDistanceDistribution`](@ref)   |    ✓    |     ✖      | [`jdd`](@ref)                  |
-| Cross-mapping           | [`PairwiseAsymmetricInference`](@ref) |    ✓    |     ✖      | [`crossmap`](@ref)             |
-| Cross-mapping           | [`ConvergentCrossMapping`](@ref)      |    ✓    |     ✖      | [`crossmap`](@ref)             |
-| Conditional recurrence  | [`MCR`](@ref)                         |    ✓    |     ✖      | [`mcr`](@ref)                  |
-| Conditional recurrence  | [`RMCD`](@ref)                        |    ✓    |     ✓      | [`rmcd`](@ref)                 |
-| Shared information      | [`MIShannon`](@ref)                   |    ✓    |     ✖      | [`mutualinfo`](@ref)           |
-| Shared information      | [`MIRenyiJizba`](@ref)                |    ✓    |     ✖      | [`mutualinfo`](@ref)           |
-| Shared information      | [`MIRenyiSarbu`](@ref)                |    ✓    |     ✖      | [`mutualinfo`](@ref)           |
-| Shared information      | [`MITsallisFuruichi`](@ref)           |    ✓    |     ✖      | [`mutualinfo`](@ref)           |
-| Shared information      | [`PartialCorrelation`](@ref)          |    ✖    |     ✓      | [`partial_correlation`](@ref)  |
-| Shared information      | [`CMIShannon`](@ref)                  |    ✖    |     ✓      | [`condmutualinfo`](@ref)       |
-| Shared information      | [`CMIRenyiSarbu`](@ref)               |    ✖    |     ✓      | [`condmutualinfo`](@ref)       |
-| Shared information      | [`CMIRenyiJizba`](@ref)               |    ✖    |     ✓      | [`condmutualinfo`](@ref)       |
-| Information transfer    | [`TEShannon`](@ref)                   |    ✓    |     ✓      | [`transferentropy`](@ref)      |
-| Information transfer    | [`TERenyiJizba`](@ref)                |    ✓    |     ✓      | [`transferentropy`](@ref)      |
-| Part mutual information | [`PMI`](@ref)                         |    ✖    |     ✓      | [`pmi`](@ref)                  |
-| Information asymmetry   | [`PA`](@ref)                          |    ✓    |     ✓      | [`asymmetry`](@ref)            |
-
-## Correlation measures
-
-### Pearson correlation
-
-```@docs
-PearsonCorrelation
-pearson_correlation
-```
-
-### Partial correlation
-
-```@docs
-PartialCorrelation
-partial_correlation
-```
-
-### Distance correlation
-
-```@docs
-DistanceCorrelation
-distance_correlation
-```
-
-## Closeness measures
-
-### Joint distance distribution
-
-```@docs
-JointDistanceDistribution
-jdd
-```
-
-### S-measure
-
-```@docs
-SMeasure
-s_measure
-```
-
-### H-measure
-
-```@docs
-HMeasure
-h_measure
-```
-
-### M-measure
-
-```@docs
-MMeasure
-m_measure
-```
-
-### L-measure
-
-```@docs
-LMeasure
-l_measure
-```
-
-## Cross-map measures
-
-See also the [cross mapping API](@ref crossmap_api) for estimators.
-
-### Convergent cross mapping
-
-```@docs
-ConvergentCrossMapping
-```
-
-### Pairwise asymmetric inference
-
-```@docs
-PairwiseAsymmetricInference
-```
-
-## Recurrence-based
-
-```@docs
-MCR
-RMCD
-```
-
-## [Information measures](@id information_measures)
-
-Association measures that are information-based are listed here. Available estimators
-are listed in the [information API](@ref information_api).
-
-### Mutual information (Shannon)
-
-```@docs
-MIShannon
-```
-
-### Mutual information (Tsallis, Furuichi)
-
-```@docs
-MITsallisFuruichi
-```
-
-### Mutual information (Tsallis, Martin)
-
-```@docs
-MITsallisMartin
-```
-
-### Mutual information (Rényi, Sarbu)
-
-```@docs
-MIRenyiSarbu
-```
-
-### Mutual information (Rényi, Jizba)
-
-```@docs
-MIRenyiJizba
-```
-
-### Conditional mutual information (Shannon)
-
-```@docs
-CMIShannon
-```
-
-### Conditional mutual information (Rényi, Jizba)
-
-```@docs
-CMIRenyiJizba
-```
-
-### Conditional mutual information (Rényi, Poczos)
-
-```@docs
-CMIRenyiPoczos
-```
-
-### Transfer entropy (Shannon)
-
-```@docs
-TEShannon
-```
-
-### Transfer entropy (Rényi, Jizba)
-
-```@docs
-TERenyiJizba
-```
-
-### Part mutual information
-
-```@docs
-PMI
-```
-
-### Predictive asymmetry
-
-```@docs
-PA
-```
diff --git a/docs/src/probabilities_tutorial.md b/docs/src/probabilities_tutorial.md
new file mode 100644
index 000000000..887cadd07
--- /dev/null
+++ b/docs/src/probabilities_tutorial.md
@@ -0,0 +1,46 @@
+# [Counts and probabilities](@id counts_and_probabilities_api)
+
+For counting and probabilities, CausalityTools.jl extends the single-variable machinery
+in ComplexityMeasures.jl to multiple variables.
+
+## Counts
+
+```@docs
+CausalityTools.Counts
+CausalityTools.counts(::OutcomeSpace)
+```
+
+## Probabilities
+
+```@docs
+CausalityTools.Probabilities
+CausalityTools.probabilities(::OutcomeSpace)
+```
+
+## Utilities
+
+```@docs
+marginal
+```
+
+
+## Tutorial
+
+Estimating multivariate counts (contingency matrices) and PMFs is simple. If the data are pre-discretized, then
+we can use [`UniqueElements`](@ref) to 
+
+```@example counts_probs_tutorial
+using CausalityTools
+n = 50 # the number of samples must be the same for each input variable
+x = rand(["dog", "cat", "snake"], n)
+y = rand(1:4, n)
+z = rand([(2, 1), (0, 0), (1, 1)], n)
+counts(UniqueElements(), x, y, z)
+```
+
+Probabilities are computed analogously, except counts are normalized.
+
+```@example counts_probs_tutorial
+probabilities(UniqueElements(), x, y, z)
+```
+
diff --git a/docs/style.jl b/docs/style.jl
new file mode 100644
index 000000000..168f16eef
--- /dev/null
+++ b/docs/style.jl
@@ -0,0 +1,38 @@
+# Color theme definitions
+struct CyclicContainer <: AbstractVector{String}
+    c::Vector{String}
+    n::Int
+end
+CyclicContainer(c) = CyclicContainer(c, 0)
+
+Base.length(c::CyclicContainer) = length(c.c)
+Base.size(c::CyclicContainer) = size(c.c)
+Base.iterate(c::CyclicContainer, state=1) = iterate(c.c, state)
+Base.getindex(c::CyclicContainer, i) = c.c[(i-1)%length(c.c) + 1]
+Base.getindex(c::CyclicContainer, i::AbstractArray) = c.c[i]
+function Base.getindex(c::CyclicContainer)
+    c.n += 1
+    c[c.n]
+end
+Base.iterate(c::CyclicContainer, i = 1) = iterate(c.c, i)
+
+COLORSCHEME = [
+    "#7143E0",
+    "#0A9A84",
+    "#191E44",
+    "#AF9327",
+    "#701B80",
+    "#2E6137",
+]
+
+COLORS = CyclicContainer(COLORSCHEME)
+LINESTYLES = CyclicContainer(["-", ":", "--", "-."])
+
+# other styling elements for Makie
+set_theme!(;
+    palette = (color = COLORSCHEME,),
+    fontsize = 22,
+    figure_padding = 8,
+    size = (800, 400),
+    linewidth = 3.0,
+)
diff --git a/src/CausalityTools.jl b/src/CausalityTools.jl
index 8082a97eb..507a16262 100644
--- a/src/CausalityTools.jl
+++ b/src/CausalityTools.jl
@@ -13,23 +13,21 @@ module CausalityTools
     using DelayEmbeddings: embed, genembed
     export embed, genembed
 
-    import DynamicalSystemsBase: trajectory
-    import DynamicalSystemsBase: DiscreteDynamicalSystem, ContinuousDynamicalSystem
     import HypothesisTests: pvalue
     export trajectory
-    export DiscreteDynamicalSystem, ContinuousDynamicalSystem
     @reexport using StateSpaceSets
     @reexport using ComplexityMeasures
     @reexport using TimeseriesSurrogates
 
+    include("utils/utils.jl")
     include("core.jl")
-    include("methods/infomeasures/infomeasures.jl")
+
+    include("methods/information/information.jl")
     include("methods/crossmappings/crossmappings.jl")
     include("methods/closeness/closeness.jl")
     include("methods/correlation/correlation.jl")
     include("methods/recurrence/methods.jl")
 
-    include("utils/utils.jl")
 
     # Independence tests must be loaded after everything else has been defined.
     include("independence_tests/independence.jl")
@@ -37,31 +35,18 @@ module CausalityTools
     # Causal graph API must be loaded after independence tests.
     include("causal_graphs/causal_graphs.jl")
 
-    include("example_systems/example_systems.jl")
-
     include("deprecations/deprecations.jl")
 
-    #using Requires
-    #function __init__()
-        #@require UncertainData="dcd9ba68-c27b-5cea-ae21-829cd07325bf" begin
-        #   include("integrations/uncertaindata.jl")
-        #end
-
-        #@require Simplices="d5428e67-3037-59ba-9ab1-57a04f0a3b6a" begin
-        # #   import PerronFrobenius: SimplexExact, SimplexPoint
-        #    export SimplexExact, SimplexPoint
-        #end
-    #end
-
     # Update messages:
     using Scratch
     display_update = true
-    version_number = "2.7.1"
+    version_number = "3.0.0"
     update_name = "update_v$(version_number)"
     update_message = """
     \nUpdate message: CausalityTools v$(version_number)\n
-    - New association measure: `PMI` (part mutual information).
-    - Fixed an import warning.
+    - Bivariate and multivariate information measure definitions and estimation is now based on the API in ComplexityMeasures.jl.
+    - Example systems have been removed from the package to avoid unnecessary package dependencies and improve compilation time.
+    - Convenience methods have been removed. Use `association` instead.
     """
 
     if display_update
diff --git a/src/causal_graphs/api.jl b/src/causal_graphs/api.jl
index 1571f75b4..c9dd53856 100644
--- a/src/causal_graphs/api.jl
+++ b/src/causal_graphs/api.jl
@@ -9,6 +9,7 @@ The supertype of all causal graph inference algorithms.
 ## Concrete implementations
 
 - [`OCE`](@ref). The optimal causation entropy algorithm for time series graphs.
+- [`PA`](@ref).
 """
 abstract type GraphAlgorithm end
 
diff --git a/src/causal_graphs/oce/OCE.jl b/src/causal_graphs/oce/OCE.jl
index ae030bdbb..419b5ab0c 100644
--- a/src/causal_graphs/oce/OCE.jl
+++ b/src/causal_graphs/oce/OCE.jl
@@ -6,7 +6,7 @@ export OCESelectedParents
 
 """
     OCE <: GraphAlgorithm
-    OCE(; utest::IndependenceTest = SurrogateTest(MIShannon(), KSG2(k = 3, w = 3)),
+    OCE(; utest::IndependenceTest = SurrogateAssociationTest(MIShannon(), KSG2(k = 3, w = 3)),
           ctest::C = LocalPermutationTest(CMIShannon(), MesnerShalizi(k = 3, w = 3)),
           τmax::T = 1, α = 0.05)
 
@@ -44,7 +44,7 @@ Input data must either be a `Vector{Vector{<:Real}}`, or a `StateSpaceSet`.
 - [Inferring time series graph from a chain of logistic maps](@ref oce_example)
 """
 Base.@kwdef struct OCE{U, C, T} <: GraphAlgorithm
-    utest::U = SurrogateTest(MIShannon(), KSG2(k = 3, w = 3), nshuffles = 100)
+    utest::U = SurrogateAssociationTest(MIShannon(), KSG2(k = 3, w = 3), nshuffles = 100)
     ctest::C = LocalPermutationTest(CMIShannon(), MesnerShalizi(k = 3, w = 3), nshuffles = 100)
     τmax::T = 1
     α = 0.05
@@ -204,13 +204,15 @@ end
 # that `P` is always conditioned on when relevant. The two functions are returned.
 function rawmeasure_and_independencetest(alg, parents::OCESelectedParents)
     if pairwise_test(parents)
-        measure, est = alg.utest.measure, alg.utest.est
-        compute_raw_measure = (xᵢ, Pⱼ) -> estimate(measure, est, xᵢ, Pⱼ)
+        #@show alg.utest
+        #@show alg.ctest
+        est_or_measure = alg.utest.est_or_measure
+        compute_raw_measure = (xᵢ, Pⱼ) -> association(est_or_measure, xᵢ, Pⱼ)
         test_independence = (xᵢ, Pix) -> independence(alg.utest, xᵢ, Pix)
     else
-        measure, est = alg.ctest.measure, alg.ctest.est
+        est_or_measure = alg.ctest.est_or_measure
         P = StateSpaceSet(parents.parents...)
-        compute_raw_measure = (xᵢ, Pⱼ) -> estimate(measure, est, xᵢ, Pⱼ, P)
+        compute_raw_measure = (xᵢ, Pⱼ) -> association(est_or_measure, xᵢ, Pⱼ, P)
         test_independence = (xᵢ, Pix) -> independence(alg.ctest, xᵢ, Pix, P)
     end
     return compute_raw_measure, test_independence
diff --git a/src/causal_graphs/pc/PC.jl b/src/causal_graphs/pc/PC.jl
index 668c36a58..838aa378c 100644
--- a/src/causal_graphs/pc/PC.jl
+++ b/src/causal_graphs/pc/PC.jl
@@ -15,10 +15,10 @@ which is implemented as described in [Kalisch2008](@citet).
 
 - **`pairwise_test`**: An [`IndependenceTest`](@ref) that uses a pairwise,
     nondirectional [`AssociationMeasure`](@ref) measure (e.g. a parametric
-    [`CorrTest`](@ref), or [`SurrogateTest`](@ref) with the [`MIShannon`](@ref) measure).
+    [`CorrTest`](@ref), or [`SurrogateAssociationTest`](@ref) with the [`MIShannon`](@ref) measure).
 - **`conditional_test`**: An [`IndependenceTest`](@ref) that uses a conditional,
     nondirectional [`AssociationMeasure`](@ref) (e.g. [`CorrTest`](@ref),
-    or [`SurrogateTest`](@ref) with the [`CMIShannon`](@ref) measure).
+    or [`SurrogateAssociationTest`](@ref) with the [`CMIShannon`](@ref) measure).
 
 ## Keyword arguments
 
@@ -119,9 +119,10 @@ end
 function check_input(alg::PC)
     u = alg.pairwise_test
     c = alg.conditional_test
-    if u.measure isa DirectedAssociationMeasure || c.measure isa DirectedAssociationMeasure
-        s = "Directional measures will not give meaningful answers. See PC docstring"*
-            " for more information."
-        throw(ArgumentError(s))
-    end
+    # TODO: implement is_directed for all measures
+    # if u.measure isa DirectedAssociationMeasure || c.measure isa DirectedAssociationMeasure
+    #     s = "Directional measures will not give meaningful answers. See PC docstring"*
+    #         " for more information."
+    #     throw(ArgumentError(s))
+    # end
 end
diff --git a/src/causal_graphs/pc_mci/PCMCI.jl b/src/causal_graphs/pc_mci/PCMCI.jl
index 48077a23e..eb1437524 100644
--- a/src/causal_graphs/pc_mci/PCMCI.jl
+++ b/src/causal_graphs/pc_mci/PCMCI.jl
@@ -3,8 +3,8 @@ export PCMCI
 """
     PCMCI <: GraphAlgorithm
     PCMCI(
-        test_unconditional = SurrogateTest(PearsonCorrelation(); nshuffles = 30),
-        test_conditional= SurrogateTest(PartialCorrelation(); nshuffles = 30),
+        test_unconditional = SurrogateAssociationTest(PearsonCorrelation(); nshuffles = 30),
+        test_conditional= SurrogateAssociationTest(PartialCorrelation(); nshuffles = 30),
         τmax = 10,
         pmax = 10,
         qmax = 1,
@@ -17,8 +17,8 @@ The maximum dimension of the condition is given by `pmax`, and `τmax` gives the
 embedding lag.
 """
 Base.@kwdef struct PCMCI{U, C, L, P, Q} <: GraphAlgorithm
-    test_unconditional::U = SurrogateTest(PearsonCorrelation(); nshuffles = 30)
-    test_conditional::C = SurrogateTest(PartialCorrelation(); nshuffles = 30)
+    test_unconditional::U = SurrogateAssociationTest(PearsonCorrelation(); nshuffles = 30)
+    test_conditional::C = SurrogateAssociationTest(PartialCorrelation(); nshuffles = 30)
     τmax::L = 10
     pmax::P = 10
     qmax::Q = 2
diff --git a/src/contingency_matrices.jl b/src/contingency_matrices.jl
deleted file mode 100644
index 65d72b181..000000000
--- a/src/contingency_matrices.jl
+++ /dev/null
@@ -1,362 +0,0 @@
-import Base: size, getindex, setindex
-import ComplexityMeasures: probabilities, outcomes
-import StatsBase: levelsmap
-
-export ContingencyMatrix
-export probabilities, outcomes, frequencies
-export contingency_matrix
-
-# This should be done so that the following can be added to the docs, but for now,
-# we only need the table, not information about variables.
-# Let `c` be a 2-dimensional `ContingencyMatrix` constructed from input data `x` and `y`.
-# Let `Ωx = unique(x)` and `Ωy = unique(y)`. `c[i, j]` then corresponds to the
-# outcome `(unique(Ωx)[i], unique(Ωy)[j]`). The generalization to higher dimensions is
-# straight-forward.
-
-# This is not optimized for speed, but it *works*
-"""
-    ContingencyMatrix{T, N} <: Probabilities{T, N}
-    ContingencyMatrix(frequencies::AbstractArray{Int, N})
-
-A contingency matrix is essentially a multivariate analogue of [`Probabilities`](@ref)
-that also keep track of raw frequencies.
-
-The contingency matrix can be constructed directyly from an `N`-dimensional `frequencies`
-array. Alternatively, the [`contingency_matrix`](@ref) function performs counting for
-you; this works on both raw categorical data, or by first discretizing data using a
-a [`ProbabilitiesEstimator`](@ref).
-
-## Description
-
-A `ContingencyMatrix` `c` is just a simple wrapper around around `AbstractArray{T, N}`.
-Indexing `c` with multiple indices `i, j, …` returns the `(i, j, …)`th
-element of the empirical probability mass function (pmf). The following convencience
-methods are defined:
-
-- `frequencies(c; dims)` returns the multivariate raw counts along the given `dims
-    (default to all available dimensions).
-- `probabilities(c; dims)` returns a multidimensional empirical
-    probability mass function (pmf) along the given `dims` (defaults to all available
-    dimensions), i.e. the normalized counts.
-- `probabilities(c, i::Int)` returns the marginal probabilities for the `i`-th dimension.
-- `outcomes(c, i::Int)` returns the marginal outcomes for the `i`-th dimension.
-
-# Ordering
-
-The ordering of outcomes are internally consistent, but we make no promise on the
-ordering of outcomes relative to the input data. This means that if your input
-data are `x = rand(["yes", "no"], 100); y = rand(["small", "medium", "large"], 100)`,
-you'll get a 2-by-3 contingency matrix, but there currently no easy way to
-determine which outcome the i-j-th row/column of this matrix corresponds to.
-
-Since [`ContingencyMatrix`](@ref) is intended for use in information theoretic methods
-that don't care about ordering, as long as the ordering is internally consistent,
-this is not an issue for practical applications in this package.
-This may change in future releases.
-
-## Usage
-
-Contingency matrices is used in the computation of discrete versions of the following
-quantities:
-
-- [`entropy_joint`](@ref).
-- [`mutualinfo`](@ref).
-- [`condmutualinfo`](@ref).
-"""
-struct ContingencyMatrix{T, N, I} <: AbstractArray{T, N}
-    # We only need to keep track of the joint probabilities. Marginals are obtained through,
-    # unsurprisingly, marginalization of the joint probabilities.
-    probs::AbstractArray{T, N}
-    freqs::AbstractArray{I, N}
-end
-
-function ContingencyMatrix(freqs::AbstractArray{Int, N}) where N
-    probs = freqs ./ sum(freqs)
-    return ContingencyMatrix(probs, freqs)
-end
-
-Base.size(c::ContingencyMatrix) = size(c.probs)
-Base.getindex(c::ContingencyMatrix, i) = getindex(c.probs, i)
-Base.getindex(c::ContingencyMatrix, i::Int...) = getindex(c.probs, i...)
-Base.setindex!(c::ContingencyMatrix, v, i) = setindex!(c.probs, v, i)
-
-function frequencies(c::ContingencyMatrix; dims = 1:ndims(c))
-    alldims = 1:ndims(c)
-    reduce_dims = setdiff(alldims, dims)
-    marginal = dropdims(sum(c.freqs, dims = reduce_dims), dims = (reduce_dims...,))
-    return marginal
-end
-
-function probabilities(c::ContingencyMatrix; dims = 1:ndims(c))
-    alldims = 1:ndims(c)
-    reduce_dims = (setdiff(alldims, dims)...,)
-    marginal = dropdims(sum(c.probs, dims = reduce_dims), dims = reduce_dims)
-    return Probabilities(marginal)
-end
-
-"""
-    contingency_matrix(x, y, [z, ...]) → c::ContingencyMatrix
-    contingency_matrix(est::ProbabilitiesEstimator, x, y, [z, ...]) → c::ContingencyMatrix
-
-Estimate a multidimensional contingency matrix `c` from input data `x, y, …`, where the
-input data can be of any and different types, as long as `length(x) == length(y) == …`.
-
-For already discretized data, use the first method. For continuous data, you want to
-discretize the data before computing the contingency table. You can do
-this manually and then use the first method. Alternatively, you can provide a
-[`ProbabilitiesEstimator`](@ref) as the first
-argument to the constructor. Then the input variables `x, y, …` are discretized
-*separately* according to `est` (*enforcing the same outcome space for all variables*),
-by calling [`marginal_encodings`](@ref).
-"""
-function contingency_matrix end
-
-function contingency_matrix(est::ProbabilitiesEstimator, x...)
-    # Enforce identical outcome space for all variables.
-    encodings = marginal_encodings(est, x...)
-    return contingency_matrix(encodings...)
-end
-
-# For this to work generically, we must map unique elements to integers.
-function contingency_matrix(x...)
-    Ls = length.(x);
-    if !all(Ls .== maximum(Ls))
-        throw(ArgumentError("Input data must have equal lengths. Got lengths $Ls."))
-    end
-
-    # The frequency matrix dimensions are dictated by the number of unique occurring values
-    Ωs = unique_elements.(x)
-    matrix_dims = length.(Ωs);
-
-    # Get marginal probabilities and outcomes
-    #pΩs = [probabilities_and_outcomes(CountOccurrences(), xᵢ) for xᵢ in x]
-    freqs, lmaps = freqtable_equallength(matrix_dims, x...)
-
-    # TODO: Inverse map from integer-encoded outcomes to the original outcomes.
-    # marginal_outcomes = [map(k -> lmap[k], last(pΩ)) for (pΩ, lmap) in zip(pΩs, lmaps)]
-    probs = freqs ./ maximum(Ls)
-    return ContingencyMatrix(
-        probs,
-        freqs,
-    )
-end
-
-unique_elements(x) = unique(x)
-unique_elements(x::AbstractStateSpaceSet) = unique(x.data)
-
-
-function freqtable_equallength(matrix_dims, x...)
-    # Map the input data to integers. This ensures compatibility with *any* input type.
-    # Then, we can simply create a joint `StateSpaceSet{length(x), Int}` and use its elements
-    # as `CartesianIndex`es to update counts.
-    lvl = tolevels.(x)
-    levels = (first(l) for l in lvl)
-    lmaps = [last(l) for l in lvl]
-    X = StateSpaceSet(levels...)
-
-    freqs = zeros(Int, matrix_dims)
-    for ix in to_cartesian(sort(X.data)) # sorted matrix access should be faster.
-        freqs[ix] += 1
-    end
-    return freqs, lmaps
-end
-
-function to_cartesian(x)
-    (CartesianIndex.(xᵢ...) for xᵢ in x)
-end
-
-"""
-    tolevels!(levels, x) → levels, dict
-    tolevels(x) → levels, dict
-
-Apply the bijective map ``f : \\mathcal{Q} \\to \\mathbb{N}^+`` to each `x[i]` and store
-the result in `levels[i]`, where `levels` is a pre-allocated integer vector such that
-`length(x) == length(levels)`.
-
-``\\mathcal{Q}`` can be any space, and each ``q \\in \\mathcal{Q}`` is mapped to a unique
-integer  in the range `1, 2, …, length(unique(x))`. This is useful for integer-encoding
-categorical data such as strings, or other complex data structures.
-
-The single-argument method allocated a `levels` vector internally.
-
-`dict` gives the inverse mapping.
-"""
-function tolevels!(levels, x)
-    @assert length(levels) == length(x)
-    lmap = _levelsmap(x)
-    for i in eachindex(x)
-        levels[i] = lmap[x[i]]
-    end
-    return levels, lmap
-end
-
-function tolevels(x)
-    lmap = _levelsmap(x)
-    levels = zeros(Int, length(x))
-    for i in eachindex(x)
-        levels[i] = lmap[x[i]]
-    end
-    return levels, lmap
-end
-
-# Ugly hack, because levelsmap doesn't work out-of-the-box for statespacesets.
-_levelsmap(x) = levelsmap(x)
-_levelsmap(x::AbstractStateSpaceSet) = levelsmap(x.data)
-
-# The following commented-out code below is equivalent to theabove, but muuuch faster.
-# I keep the comments here for, so when I revisit this, I understand *why* it works.
-# This will be moved into some sort of tutorial or doc example at some point.
-
-
-# TODO: actually dispatch on joint frequency method for all methods below.
-# function ContingencyMatrix(X, Y)
-#     pX, ΩX = probabilities_and_outcomes(CountOccurrences(), X); lX = length(pX)
-#     pY, ΩY = probabilities_and_outcomes(CountOccurrences(), Y); lY = length(pY)
-#     p̂X = reshape(pX, lX, 1)
-#     p̂Y = reshape(pY, 1, lY)
-#     pXY = p̂X .* p̂Y
-#     return ContingencyMatrix(pXY, pXY, (pX, pY), (ΩX, ΩY))
-# end
-
-# function ContingencyMatrix(X, Y, Z)
-#     pX, ΩX = probabilities_and_outcomes(CountOccurrences(), X); lX = length(pX)
-#     pY, ΩY = probabilities_and_outcomes(CountOccurrences(), Y); lY = length(pY)
-#     pZ, ΩZ = probabilities_and_outcomes(CountOccurrences(), Z); lZ = length(pZ)
-#     p̂X = reshape(pX, lX, 1, 1)
-#     p̂Y = reshape(pY, 1, lY, 1)
-#     p̂Z = reshape(pZ, 1, 1, lZ)
-#     pXYZ = p̂X .* p̂Y .* p̂Z
-#     return ContingencyMatrix(pXYZ, (pX, pY, pZ), (ΩX, ΩY, ΩZ))
-# end
-
-
-
-
-
-
-#function ContingencyMatrix(X, Y, Z)
-    # Ωx = sort(unique(X))
-    # Ωy = sort(unique(Y))
-    # Ωz = sort(unique(Z))
-    # N = length(X)*length(Y)*length(Z)
-    # pXYZ = zeros(length(Ωx), length(Ωy), length(Ωz)) # enumerates the states
-    # for (h, ωzᵢ) in enumerate(Ωz)
-    #     for (j, ωyᵢ) in enumerate(Ωy)
-    #         for (i, ωxᵢ) in enumerate(Ωx)
-    #             pXYZ[i, j, h] = count_occurrences(ωxᵢ, ωyᵢ, ωzᵢ, X, Y, Z)
-    #         end
-    #     end
-    # end
-    #return ContingencyMatrix(pXYZ / N)
-
-    # The following is equivalent to the commented-out code above, but muuuch faster.
-     # I keep the above code, so when I revisit this, I understand *why* it works.
-#     pX = probabilities(CountOccurrences(), X); lX = length(pX)
-#     pY = probabilities(CountOccurrences(), Y); lY = length(pY)
-#     pZ = probabilities(CountOccurrences(), Z); lZ = length(pZ)
-
-#     # # Reshape explicitly for 3D case to work.
-#     p̂X = reshape(pX, lX, 1, 1)
-#     p̂Y = reshape(pY, 1, lY, 1)
-#     p̂Z = reshape(pZ, 1, 1, lZ)
-#     pXYZ = p̂X .* p̂Y .* p̂Z
-
-#     return pXYZ
-# end
-
-# function count_occurrences(ωxᵢ, ωyᵢ, ωzᵢ, X, Y, Z)
-#     n = 0.0
-#     for x in X
-#         for y in Y
-#             for z in Z
-#                 if x == ωxᵢ && y == ωyᵢ && z == ωzᵢ
-#                     n += 1
-#                 end
-#             end
-#         end
-#     end
-#     return n
-# end
-
-# function count_occurrences(total_count, ωxᵢ, ωyᵢ, X, Y)
-#     n = 0.0
-#     for x in X
-#         for y in Y
-#             if x == ωxᵢ && y == ωyᵢ
-#                 n += 1
-#                 total_count[] += 1
-#             end
-#         end
-#     end
-#     return n
-# end
-
-
-###########################################################################################
-# The commented-out code below will be part of a later release. It is based on the
-# fact that the way in which joint probabilities are estimated using the
-# counts greatly affects the measure that is estimated based on the contingency table,
-# like for mutual information (Fernandes et al., 2010)
-###########################################################################################
-
-# Exaplanation of the algorithm
-# First, we compute the counts of `fᵢⱼ` for each outcome `(Ωxᵢ, Ωyᵢ)`
-# by counting how many times `X == Ωxᵢ` and `y == Ωyᵢ` occur simultaneously.
-
-# Then, the joint probabilities are computed according to `method`, which controls
-# which prior assumptions about the joint relationship is assumed
-# (Fernandes et al., 2010)[Fernandes2010](@cite).
-
-# [Fernandes2010](@cite): Fernandes, A. D., & Gloor, G. B. (2010). Mutual information is
-#     critically dependent on prior assumptions: would the correct estimate of mutual
-#     information please identify itself?. Bioinformatics, 26(9), 1135-1139.
-
-# """
-#     JointFrequencyRelationship
-
-# The supertype for all ways of computing joint frequency relationships for
-# a [`ContingencyMatrix`](@ref).
-
-# ## Motivation
-
-# As discussed in Fernandes et al. (2010), the mutual information estimated based on
-# contingency matrices *strongly* depends on how joint probabilities are estimated
-# for the contingency matrices.
-# They present multiple alternatives, based of different prior assuptions of the data,
-# which we here implement as subtypes:
-
-# - [`NaiveJointFrequency`](@ref).
-# - [`MultiplicativeJointFrequency`](@ref).
-
-# [Fernandes2010](@cite): Fernandes, A. D., & Gloor, G. B. (2010). Mutual information is
-#     critically dependent on prior assumptions: would the correct estimate of mutual
-#     information please identify itself?. Bioinformatics, 26(9), 1135-1139.
-# """
-# abstract type JointFrequencyRelationship end
-
-# """
-#     NaiveJointFrequency <: JointFrequencyRelationship
-#     NaiveJointFrequency()
-
-# Takes the joint frequencies naively as the observed sample counts, divided by the
-# total number of samples.
-
-# [Fernandes2010](@cite): Fernandes, A. D., & Gloor, G. B. (2010). Mutual information is
-#     critically dependent on prior assumptions: would the correct estimate of mutual
-#     information please identify itself?. Bioinformatics, 26(9), 1135-1139.
-# """
-# struct NaiveJointFrequency <: JointFrequencyRelationship end
-
-# """
-#     MultiplicativeJointFrequency  <: JointFrequencyRelationship
-#     MultiplicativeJointFrequency ()
-
-# Represents the assumption of independence of contingency table frequencies,
-# such that the joint probabilities are given by the outer product of
-# the marginal frequencies (e.g. row-sums and column sums in the 2D case)[Fernandes2010](@cite).
-
-# [Fernandes2010](@cite): Fernandes, A. D., & Gloor, G. B. (2010). Mutual information is
-#     critically dependent on prior assumptions: would the correct estimate of mutual
-#     information please identify itself?. Bioinformatics, 26(9), 1135-1139.
-# """
-# struct MultiplicativeJointFrequency <: JointFrequencyRelationship end
diff --git a/src/core.jl b/src/core.jl
index c7a2b31ee..92c04f7f4 100644
--- a/src/core.jl
+++ b/src/core.jl
@@ -1,44 +1,171 @@
-using DelayEmbeddings: AbstractStateSpaceSet
-using ComplexityMeasures: ProbabilitiesEstimator
-const VectorOrStateSpaceSet{D, T} = Union{AbstractVector{T}, AbstractStateSpaceSet{D, T}} where {D, T}
-const ArrayOrStateSpaceSet{D, T, N} = Union{AbstractArray{T, N}, AbstractStateSpaceSet{D, T}} where {D, T, N}
+using ComplexityMeasures: DifferentialInfoEstimator, DiscreteInfoEstimator
+using StateSpaceSets: AbstractStateSpaceSet
 
 export AssociationMeasure
-export DirectedAssociationMeasure
+export AssociationMeasureEstimator
+export association
 
+const VectorOr1DDataset{T} = Union{AbstractVector{T}, AbstractStateSpaceSet{1, T}} where T
+const VectorOrStateSpaceSet{D, T} = Union{AbstractVector{T}, AbstractStateSpaceSet{D, T}} where {D, T}
+const ArrayOrStateSpaceSet{D, T, N} = Union{AbstractArray{T, N}, AbstractStateSpaceSet{D, T}} where {D, T, N}
+const INFO_ESTS = Union{DifferentialInfoEstimator, DiscreteInfoEstimator}
 
-# Any non-bivariate association measures must implement:
-# - [`min_inputs_vars`](@ref).
-# - [`max_inputs_vars`](@ref).
 """
     AssociationMeasure
 
-The supertype of all association measures.
+The supertype of all association measures. 
+
+## Abstract implementations
+
+Currently, the association measures are classified by abstract classes listed below.
+These abstract classes offer common functionality among association measures that are 
+conceptually similar. This makes maintenance and framework extension easier than 
+if each measure was implemented "in isolation".
+
+- [`MultivariateInformationMeasure`](@ref)
+- [`CrossmapMeasure`](@ref)
+- [`ClosenessMeasure`](@ref)
+- [`CorrelationMeasure`](@ref)
+
+## Concrete implementations
+
+Concrete subtypes are given as input to [`association`](@ref).
+
+| Type                    | [`AssociationMeasure`](@ref)                | Pairwise | Conditional |
+| ----------------------- | ------------------------------------------- | :------: | :---------: |
+| Correlation             | [`PearsonCorrelation`](@ref)                |    ✓    |     ✖      |
+| Correlation             | [`DistanceCorrelation`](@ref)               |    ✓    |     ✓      |
+| Closeness               | [`SMeasure`](@ref)                          |    ✓    |     ✖      |
+| Closeness               | [`HMeasure`](@ref)                          |    ✓    |     ✖      |
+| Closeness               | [`MMeasure`](@ref)                          |    ✓    |     ✖      |
+| Closeness (ranks)       | [`LMeasure`](@ref)                          |    ✓    |     ✖      |
+| Closeness               | [`JointDistanceDistribution`](@ref)         |    ✓    |     ✖      |
+| Cross-mapping           | [`PairwiseAsymmetricInference`](@ref)       |    ✓    |     ✖      |
+| Cross-mapping           | [`ConvergentCrossMapping`](@ref)            |    ✓    |     ✖      |
+| Conditional recurrence  | [`MCR`](@ref)                               |    ✓    |     ✖      |
+| Conditional recurrence  | [`RMCD`](@ref)                              |    ✓    |     ✓      |
+| Shared information      | [`MIShannon`](@ref)                         |    ✓    |     ✖      |
+| Shared information      | [`MIRenyiJizba`](@ref)                      |    ✓    |     ✖      |
+| Shared information      | [`MIRenyiSarbu`](@ref)                      |    ✓    |     ✖      |
+| Shared information      | [`MITsallisFuruichi`](@ref)                 |    ✓    |     ✖      |
+| Shared information      | [`PartialCorrelation`](@ref)                |    ✖    |     ✓      |
+| Shared information      | [`CMIShannon`](@ref)                        |    ✖    |     ✓      |
+| Shared information      | [`CMIRenyiSarbu`](@ref)                     |    ✖    |     ✓      |
+| Shared information      | [`CMIRenyiJizba`](@ref)                     |    ✖    |     ✓      |
+| Shared information      | [`CMIRenyiPoczos`](@ref)                    |    ✖    |     ✓      |
+| Shared information      | [`CMITsallisPapapetrou`](@ref)              |    ✖    |     ✓      |
+| Information transfer    | [`TEShannon`](@ref)                         |    ✓    |     ✓      |
+| Information transfer    | [`TERenyiJizba`](@ref)                      |    ✓    |     ✓      |
+| Part mutual information | [`PMI`](@ref)                               |    ✖    |     ✓      |
+| Information asymmetry   | [`PA`](@ref)                                |    ✓    |     ✓      |
+| Information measure     | [`JointEntropyShannon`](@ref)               |    ✓    |     ✖      |
+| Information measure     | [`JointEntropyRenyi`](@ref)                 |    ✓    |     ✖      |
+| Information measure     | [`JointEntropyTsallis`](@ref)               |    ✓    |     ✖      |
+| Information measure     | [`ConditionalEntropyShannon`](@ref)         |    ✓    |     ✖      |
+| Information measure     | [`ConditionalEntropyTsallisAbe`](@ref)      |    ✓    |     ✖      |
+| Information measure     | [`ConditionalEntropyTsallisFuruichi`](@ref) |    ✓    |     ✖      |
+| Divergence              | [`HellingerDistance`](@ref)                 |    ✓    |     ✖      |
+| Divergence              | [`KLDivergence`](@ref)                      |    ✓    |     ✖      |
+| Divergence              | [`RenyiDivergence`](@ref)                   |    ✓    |     ✖      |
+| Divergence              | [`VariationDistance`](@ref)                 |    ✓    |     ✖      |
+
+See also: [`AssociationMeasureEstimator`](@ref).
 """
 abstract type AssociationMeasure end
 
-abstract type DirectedAssociationMeasure <: AssociationMeasure end
+"""
+    AssociationMeasureEstimator
+
+The supertype of all association measure estimators.
+
+Concrete subtypes are given as input to [`association`](@ref).
+
+## Abstract subtypes
+
+- [`MultivariateInformationMeasureEstimator`](@ref)
+- [`CrossmapEstimator`](@ref)
+
+## Concrete implementations
+
+| AssociationMeasure                          | Estimators                                                                                                                                                                     |
+| :------------------------------------------ | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| [`PearsonCorrelation`](@ref)                | Not required                                                                                                                                                                   |
+| [`DistanceCorrelation`](@ref)               | Not required                                                                                                                                                                   |
+| [`PartialCorrelation`](@ref)                | Not required                                                                                                                                                                   |
+| [`SMeasure`](@ref)                          | Not required                                                                                                                                                                   |
+| [`HMeasure`](@ref)                          | Not required                                                                                                                                                                   |
+| [`MMeasure`](@ref)                          | Not required                                                                                                                                                                   |
+| [`LMeasure`](@ref)                          | Not required                                                                                                                                                                   |
+| [`JointDistanceDistribution`](@ref)         | Not required                                                                                                                                                                   |
+| [`PairwiseAsymmetricInference`](@ref)       | [`RandomVectors`](@ref), [`RandomSegment`](@ref)                                                                                                                               |
+| [`ConvergentCrossMapping`](@ref)            | [`RandomVectors`](@ref), [`RandomSegment`](@ref)                                                                                                                               |
+| [`MCR`](@ref)                               | Not required                                                                                                                                                                   |
+| [`RMCD`](@ref)                              | Not required                                                                                                                                                                   |
+| [`MIShannon`](@ref)                         | [`JointProbabilities`](@ref), [`EntropyDecomposition`](@ref), [`KSG1`](@ref), [`KSG2`](@ref), [`GaoOhViswanath`](@ref), [`GaoKannanOhViswanath`](@ref), [`GaussianMI`](@ref)   |
+| [`MIRenyiJizba`](@ref)                      | [`JointProbabilities`](@ref), [`EntropyDecomposition`](@ref)                                                                                                                   |
+| [`MIRenyiSarbu`](@ref)                      | [`JointProbabilities`](@ref)                                                                                                                                                   |
+| [`MITsallisFuruichi`](@ref)                 | [`JointProbabilities`](@ref), [`EntropyDecomposition`](@ref)                                                                                                                   |
+| [`MITsallisMartin`](@ref)                   | [`JointProbabilities`](@ref), [`EntropyDecomposition`](@ref)                                                                                                                   |
+| [`CMIShannon`](@ref)                        | [`JointProbabilities`](@ref), [`EntropyDecomposition`](@ref), [`MIDecomposition`](@ref), [`GaussianCMI`](@ref), [`FPVP`](@ref), [`MesnerShalizi`](@ref), [`Rahimzamani`](@ref) |
+| [`CMIRenyiSarbu`](@ref)                     | [`JointProbabilities`](@ref)                                                                                                                                                   |
+| [`CMIRenyiJizba`](@ref)                     | [`JointProbabilities`](@ref), [`EntropyDecomposition`](@ref)                                                                                                                                                   |
+| [`CMIRenyiPoczos`](@ref)                    | [`PoczosSchneiderCMI`](@ref)                                                                                                                                                   |
+| [`CMITsallisPapapetrou`](@ref)              | [`JointProbabilities`](@ref)                                                                                                                                                   |
+| [`TEShannon`](@ref)                         | [`JointProbabilities`](@ref), [`EntropyDecomposition`](@ref), [`Zhu1`](@ref), [`Lindner`](@ref)                                                                                                                                                   |
+| [`TERenyiJizba`](@ref)                      | [`JointProbabilities`](@ref)                                                                                                                                                   |
+| [`PMI`](@ref)                               | [`JointProbabilities`](@ref)                                                                                                                                                   |
+| [`PA`](@ref)                                | [`JointProbabilities`](@ref)                                                                                                                                                   |
+| [`JointEntropyShannon`](@ref)               | [`JointProbabilities`](@ref)                                                                                                                                                   |
+| [`JointEntropyRenyi`](@ref)                 | [`JointProbabilities`](@ref)                                                                                                                                                   |
+| [`JointEntropyTsallis`](@ref)               | [`JointProbabilities`](@ref)                                                                                                                                                   |
+| [`ConditionalEntropyShannon`](@ref)         | [`JointProbabilities`](@ref)                                                                                                                                                   |
+| [`ConditionalEntropyTsallisAbe`](@ref)      | [`JointProbabilities`](@ref)                                                                                                                                                   |
+| [`ConditionalEntropyTsallisFuruichi`](@ref) | [`JointProbabilities`](@ref)                                                                                                                                                   |
+| [`HellingerDistance`](@ref)                 | [`JointProbabilities`](@ref)                                                                                                                                                   |
+| [`KLDivergence`](@ref)                      | [`JointProbabilities`](@ref)                                                                                                                                                   |
+| [`RenyiDivergence`](@ref)                   | [`JointProbabilities`](@ref)                                                                                                                                                   |
+| [`VariationDistance`](@ref)                 | [`JointProbabilities`](@ref)                                                                                                                                                   |
 
-# For measures without dedicated estimators, skip the estimator.
-function estimate(measure::M, est::Nothing, args...; kwargs...) where M
-    estimate(measure, args...; kwargs...)
-end
+"""
+abstract type AssociationMeasureEstimator end
 
-include("contingency_matrices.jl")
+"""
+    association(estimator::AssociationMeasureEstimator, x, y, [z, ...]) → r
+    association(definition::AssociationMeasure, x, y, [z, ...]) → r
 
+Estimate the (conditional) association between input variables `x, y, z, …` using 
+the given `estimator` (an [`AssociationMeasureEstimator`](@ref)) or `definition`
+(an [`AssociationMeasure`](@ref)).  
 
-# Just use ComplexityMeasures.convert_logunit when it is released.
-"""
-    _convert_logunit(h_a::Real, , to) → h_b
+The type of the return value `r` depends on the `measure`/`estimator`.
+
+## Examples
+
+Some [`AssociationMeasure`](@ref)s have no estimators and are given directly.
+For other association measures, you need to provide an [`AssociationMeasureEstimator`](@ref)
+ of some kind, which takes the definition as its first argument.
+
+```julia
+using CausalityTools
+x, y, z = rand(1000), rand(1000), rand(1000)
 
-Convert a number `h_a` computed with logarithms to base `a` to an entropy `h_b` computed
-with logarithms to base `b`. This can be used to convert the "unit" of an entropy.
+# Pairwise association using different measures
+association(DistanceCorrelation(), x, y)
+association(PartialCorrelation(), x, y)
+association(JointProbabilities(ConditionalEntropyTsallisAbe(), ValueBinning(3)), x, y)
+association(JointProbabilities(JointEntropyShannon(), Dispersion(c = 3, m = 2)), x, y)
+association(EntropyDecomposition(MIShannon(), PlugIn(Shannon()), OrdinalPatterns(m=3)), x, y)
+association(KSG2(MIShannon(base = 2)), x, y)
+
+# Conditional association using different measures
+association(JointProbabilities(PartialMutualInformation(), OrdinalPatterns(m=3)), x, y, z)
+association(FPVP(CMIShannon(base = 2)), x, y, z)
+```
 """
-function _convert_logunit(h::Real, base_from, base_to)
-    h / log(base_from, base_to)
+function association(est, x...)
+    throw(ArgumentError("`association` not implemented for `est = $est` for this input data"))
 end
 
-# Default to bivariate measures. Other measures override it.
 """
     min_inputs_vars(m::AssociationMeasure) → nmin::Int
 
@@ -70,4 +197,4 @@ function verify_number_of_inputs_vars(measure::AssociationMeasure, n::Int)
     if n > nmax
         throw(ArgumentError("$T accepts a maximum of $nmax inputs. Got $n inputs."))
     end
-end
+end
\ No newline at end of file
diff --git a/src/deprecations/crossmap.jl b/src/deprecations/crossmap.jl
index 8280d868b..e69de29bb 100644
--- a/src/deprecations/crossmap.jl
+++ b/src/deprecations/crossmap.jl
@@ -1,121 +0,0 @@
-using Statistics
-export pai
-
-"""
-    crossmap(x, y, d, τ; r = 0, correspondence_measure = Statistics.cor) → Float64
-    crossmap(x, y, d, τ, bootstrap_method::Symbol; r = 0, correspondence_measure = Statistics.cor,
-        method = :segment, L = ceil(Int, (length(x)-d*τ)*0.2), nreps = 100) → Vector{Float64}
-
-!!! info "This syntax is deprecated"
-    This syntax is deprecated. It will continue to work for CausalityTools v1.X, but will
-    be removed in CausalityTools v2. See [here](@ref cross_mapping_api) for updated syntax.
-
-Compute the cross mapping [Sugihara2012](@cite) between `x` and `y`, which is the correspondence (computed using
-`correspondence measure`) between the values ``y(t)`` and the cross-map estimated values ``ỹ(t) | M_x``.
-
-Returns the correspondence between original and cross mapped values (the default is
-`ρ = correspondence_measure(y(t), ỹ(t) | M_x)`).
-
-Here, ``y(t)`` are the raw values of the time series `y`, and ``ỹ(t)`` are the predicted values
-computed from the out-of-sample embedding ``M_X`` constructed from the time series `x` with
-embedding dimension `d` and embedding lag `τ`.
-
-The Theiler window `r` indicates how many temporal neighbors of the predictee is to be excluded
-during the nearest neighbors search (the default `r = 0` excludes only the predictee itself, while
-`r = 2` excludes the point itself plus its two nearest neighbors in time).
-
-If `bootstrap_method` is specified, then `nreps` different bootstrapped estimates of
-`correspondence_measure(y(t), ỹ(t) | M_x)` are returned. The following bootstrap methods are available:
-- `bootstrap_method = :random` selects training sets of length `L` consisting of randomly selected
-    points from the embedding ``M_x``  (time ordering does not matter). This is method 3 from
-    [Luo2015](@citet), which critiqued the original [Sugihara2012](@cite)'s methodology.
-- `bootstrap_method = :segment` selects training sets consisting of time-contiguous segments
-    (each of lenght `L`) of embedding vectors in ``M_x`` (time ordering matters). This is
-    method 2 from [Luo2015](@citet).
-"""
-function crossmap(x, y, d, τ; correspondence_measure = Statistics.cor, r = 0)
-    @warn """\
-    `crossmap(x, y, d, τ; kwargs...)`d is deprecated. Use \
-    `crossmap(CCM(; d, τ), x, y` instead.
-    """
-
-    measure = CCM(; d, τ, f = correspondence_measure, w = r)
-    return crossmap(measure, y, x)
-end
-
-function crossmap(x, y, d, τ, bootstrap_method::Symbol;
-        L = ceil(Int, (length(x) - d * τ) * 0.2), nreps = 100,
-        r = 0, correspondence_measure = Statistics.cor)
-    @warn """\
-    `crossmap(x, y, d, τ, bootstrap_method; kwargs...)` is deprecated. Use \
-    `crossmap(Ensemble(PAI(; d, τ), est::CrossmapEstimator), x, y)` instead.
-    """
-    measure = ConvergentCrossMapping(; d, τ, f = correspondence_measure, w = r)
-    if bootstrap_method == :random
-        est = RandomVectors(libsizes = L)
-    else bootstrap_method == :segment
-        est = RandomSegment(libsizes = L)
-    end
-    ensemble = Ensemble(measure, est, nreps = nreps)
-    return crossmap(ensemble, y, x)
-end
-
-"""
-    pai(x, y, d, τ; w = 0, correspondence_measure = Statistics.cor) → Float64
-    pai(x, y, d, τ, bootstrap_method::Symbol; w = 0, correspondence_measure = Statistics.cor,
-        method = :segment, L = ceil(Int, (length(x)-d*τ)*0.2), nreps = 100) → Vector{Float64}
-
-!!! info "This syntax is deprecated"
-    This syntax is deprecated. It will continue to work for CausalityTools v1.X, but will
-    be removed in CausalityTools v2. See [here](@ref cross_mapping_api) for updated syntax.
-
-Compute the pairwise asymmetric inference (PAI; [McCracken2014](@citet)) between `x` and `y`.
-Returns the correspondence between original and cross mapped values (the default is
-`ρ = correspondence_measure(y(t), ỹ(t) | M_xy)`).
-
-PAI is a modification to [Sugihara2012](@citet)'s CCM algorithm, where instead of
-using completely out-of-sample prediction when trying to predict ``y(t)``, values about *both* variables
-are included in the embedding used to make predictions. Specifically, PAI computes the
-correspondence between the values ``y(t)`` and the cross-map estimated values ``ỹ(t) | M_xy``,
-where the ``\\tilde{y}(t)`` are the values estimated using the embedding ``M_{xy} = \\{ ( x_t, x_{t-\\tau}, x_{t-2\\tau}, \\ldots, x_{t-(d - 1)\\tau} ) \\}``.
-*Note: a `d+1`-dimensional embedding is used, rather than the `d`-dimensional embedding used for CCM.
-Like for the CCM algorithm, the Theiler window `r` indicates how many temporal neighbors of the predictee is to be excluded
-during the nearest neighbors search (the default `r = 0` excludes only the predictee itself, while
-`r = 2` excludes the point itself plus its two nearest neighbors in time).
-
-If `bootstrap_method` is specified, then `nreps` different bootstrapped estimates of
-`correspondence_measure(y(t), ỹ(t) | M_x)` are returned. The following bootstrap methods are available:
-
-- `bootstrap_method = :random` selects training sets of length `L` consisting of randomly selected
-    points from the embedding ``M_x``  (time ordering does not matter). This is method 3 from
-    [Luo2015](@cite), which critiqued the original [Sugihara2012](@citet) methodology.
-- `bootstrap_method = :segment` selects training sets consisting of time-contiguous segments
-    (each of lenght `L`) of embedding vectors in ``M_x`` (time ordering matters). This is
-    method 2 from Luo et al. (2015)[Luo2015](@cite).
-"""
-function pai(x, y, d, τ; correspondence_measure = Statistics.cor, r = 0)
-    @warn """\
-    `pai(x, y, d, τ; kwargs...)`d is deprecated. Use \
-    `crossmap(PAI(; d, τ), x, y)` instead.
-    """
-
-    measure = PairwiseAsymmetricInference(; d, τ, f = correspondence_measure, w = r)
-    return crossmap(measure, y, x)
-end
-
-function pai(x, y, d, τ, bootstrap_method::Symbol;
-        L = ceil(Int, (length(x) - d * τ) * 0.2), nreps = 100,
-        r = 0, correspondence_measure = Statistics.cor)
-    @warn """\
-    `pai(x, y, d, τ; kwargs...)`d is deprecated. Use \
-    `crossmap(Ensemble(PAI(; d, τ), est::CrossmapEstimator), x, y)` instead.
-    """
-    measure = CCM(; d, τ, f = correspondence_measure, w = r)
-    if bootstrap_method == :random
-        est = RandomVectors(libsizes = L)
-    else bootstrap_method == :segment
-        est = RandomSegment(libsizes = L)
-    end
-    ensemble = Ensemble(measure, est, nreps = nreps)
-    return crossmap(ensemble, y, x)
-end
diff --git a/src/deprecations/deprecations.jl b/src/deprecations/deprecations.jl
index 85207fdbe..641735510 100644
--- a/src/deprecations/deprecations.jl
+++ b/src/deprecations/deprecations.jl
@@ -1,5 +1,7 @@
-include("transferentropy.jl")  # remove this file when 2.0 is released.
+include("v3/deprecations_v3.jl")
+
+#include("transferentropy.jl")  # remove this file when 2.0 is released.
 include("crossmap.jl") # remove this file when 2.0 is released.
-include("predictive_asymmetry.jl") # will be reintroduced in some 2.X release
-include("joint_distance_distribution.jl")
-include("smeasure.jl")
\ No newline at end of file
+#include("predictive_asymmetry.jl") # will be reintroduced in some 2.X release
+
+
diff --git a/src/deprecations/v3/crossmappings.jl b/src/deprecations/v3/crossmappings.jl
new file mode 100644
index 000000000..7ac79a46d
--- /dev/null
+++ b/src/deprecations/v3/crossmappings.jl
@@ -0,0 +1,41 @@
+export pai
+export crossmap
+
+
+# These are not actual deprecations, but breaking changes. Better to error with explicit
+# change message.
+function RandomVectors(; kwargs...)
+    msg = "RandomVectors now takes a `CrossmapMeasure` as the first argument. " *
+        "Do `RandomVectors(CCM(); kwargs...)` instead of `RandomVectors(; kwargs...)."
+    throw(ArgumentError(msg))
+end
+
+function RandomSegment(; kwargs...)
+    msg = "RandomSegment now takes a `CrossmapMeasure` as the first argument. " *
+        "Do `RandomSegment(CCM(); kwargs...)` instead of `RandomSegment(; kwargs...)."
+    throw(ArgumentError(msg))
+end
+
+function ExpandingSegment(; kwargs...)
+    msg = "ExpandingSegment now takes a `CrossmapMeasure` as the first argument. " *
+        "Do `ExpandingSegment(CCM(); kwargs...)` instead of `ExpandingSegment(; kwargs...)."
+    throw(ArgumentError(msg))
+end
+
+function crossmap(measure::CCMLike, est::CrossmapEstimator, args...)
+    msg = "crossmap(measure::CrossmapMeasure, est::CrossmapEstimator, args...) is deprecated. " * 
+        "Use `association(est::CrossmapEstiamator, x, y)` instead.`" 
+    throw(ArgumentError(msg))
+end
+
+function crossmap(x::AbstractVector, y::AbstractVector, args...)
+    msg = "crossmap(x::AbstractVector, y::AbstractVector, args...) is deprecated" * 
+    "Use `association(RandomSegment(CCM(); libsizes = 10:10:50), x, y)` instead.`" 
+    throw(ArgumentError(msg))
+end
+
+function pai(x::AbstractVector, y::AbstractVector, args...)
+    msg = "pai(x::AbstractVector, y::AbstractVector, args...) is deprecated. " * 
+    "Use `association(RandomSegment(CCM(); libsizes = 10:5:30), x, y)` instead.`" 
+    throw(ArgumentError(msg))
+end
diff --git a/src/deprecations/v3/deprecations_v3.jl b/src/deprecations/v3/deprecations_v3.jl
new file mode 100644
index 000000000..78f30ad97
--- /dev/null
+++ b/src/deprecations/v3/deprecations_v3.jl
@@ -0,0 +1,17 @@
+# Cross mappings
+include("crossmappings.jl")
+
+# Closeness
+include("joint_distance_distribution.jl")
+include("smeasure.jl")
+include("hmeasure.jl")
+include("mmeasure.jl")
+include("lmeasure.jl")
+
+# Correlation
+include("pearson_correlation.jl")
+include("distance_correlation.jl")
+include("partial_correlation.jl")
+
+# Full names
+@deprecate PMI PartialMutualInformation
diff --git a/src/deprecations/v3/distance_correlation.jl b/src/deprecations/v3/distance_correlation.jl
new file mode 100644
index 000000000..5e9e728df
--- /dev/null
+++ b/src/deprecations/v3/distance_correlation.jl
@@ -0,0 +1,17 @@
+export distance_correlation
+
+function distance_correlation(x, y; kwargs...)
+    @warn(
+        "Convenience function `distance_correlation` is deprecated. " *
+        "Use `association(DistanceCorrelation(), x, y)` instead."
+    )
+    association(DistanceCorrelation(; kwargs...), x, y)
+end
+
+function distance_correlation(x, y, z; kwargs...)
+    @warn(
+        "Convenience function `distance_correlation` is deprecated. " *
+        "Use `association(DistanceCorrelation(), x, y, z)` instead."
+    )
+    association(DistanceCorrelation(; kwargs...), x, y, z)
+end
diff --git a/src/deprecations/v3/hmeasure.jl b/src/deprecations/v3/hmeasure.jl
new file mode 100644
index 000000000..cddeb247d
--- /dev/null
+++ b/src/deprecations/v3/hmeasure.jl
@@ -0,0 +1,17 @@
+export h_measure
+
+function h_measure(x, y; kwargs...)
+    @warn(
+        "Convenience function `h_measure` is deprecated. " *
+        "Use `h_measure(HMeasure(; kwargs...), source, target)` instead."
+    )
+    return association(HMeasure(; kwargs...), x, y)
+end
+
+function h_measure(measure::HMeasure, x, y; kwargs...)
+    @warn(
+        "Convenience function `h_measure` is deprecated. " *
+        "Use `association(HMeasure(; kwargs...), source, target) instead."
+    )
+    return association(HMeasure(; kwargs...), x, y)
+end
diff --git a/src/deprecations/joint_distance_distribution.jl b/src/deprecations/v3/joint_distance_distribution.jl
similarity index 50%
rename from src/deprecations/joint_distance_distribution.jl
rename to src/deprecations/v3/joint_distance_distribution.jl
index 4636ec983..5145971c5 100644
--- a/src/deprecations/joint_distance_distribution.jl
+++ b/src/deprecations/v3/joint_distance_distribution.jl
@@ -1,3 +1,4 @@
+export jdd 
 
 """
     jdd(measure::JointDistanceDistribution, source, target) → Δ
@@ -9,28 +10,26 @@ Returns the distribution `Δ` from the paper directly ([example](@ref quickstart
 Use [`JointDistanceDistributionTest`](@ref) to perform a formal indepencence test.
 """
 function jdd(source, target; kw...)
-    if !isempty(kw)
-        @warn(
-            "Providing keywords to `jdd` is deprecated. " *
-            "Use `jdd(JointDistanceDistribution(; kwargs...), source, target) instead of " *
-            "`jdd(source, target; kwargs...)`"
-        )
-    end
-    estimate(JointDistanceDistribution(; kw...), source, target)
+    @warn(
+        "Convenience function `jdd` is deprecated. " *
+        "Use `association(JointDistanceDistribution(; kwargs...), x, y)` instead."
+    )
+    association(JointDistanceDistribution(; kw...), source, target)
 end
 
 function jdd(measure::JointDistanceDistribution, source, target)
-    return estimate(measure, source, target)
+    @warn(
+        "Convenience function `jdd` is deprecated. " *
+        "Use `association(JointDistanceDistribution(; kwargs...), x, y)` instead."
+    )
+    return association(measure, source, target)
 end
 
 function jdd(::Type{OneSampleTTest}, x, y; kwargs...)
     @warn(
-        "jdd(::OneSampleTTest, x, y; kwargs...) is deprecated. " *
-        "Instead, do\n" *
-        "  measure = JointDistanceDistribution()\n" *
-        "  independence(JointDistanceDistributionTest(measure), x, y)\n"
+        "jdd(::OneSampleTTest, x, y; kwargs...) is deprecated. Instead, do `measure = JointDistanceDistribution(); independence(JointDistanceDistributionTest(measure), x, y)`."
     )
     measure = JointDistanceDistribution(; kwargs...)
-    Δjdd = jdd(measure, x, y)
+    Δjdd = association(measure, x, y)
     return OneSampleTTest(Δjdd, measure.μ)
 end
diff --git a/src/deprecations/v3/lmeasure.jl b/src/deprecations/v3/lmeasure.jl
new file mode 100644
index 000000000..95c68d325
--- /dev/null
+++ b/src/deprecations/v3/lmeasure.jl
@@ -0,0 +1,17 @@
+export l_measure
+
+function l_measure(x, y; kwargs...)
+    @warn(
+        "Convenience function `l_measure` is deprecated. " *
+        "Use `l_measure(LMeasure(; kwargs...), source, target)` instead."
+    )
+    return association(LMeasure(; kwargs...), x, y)
+end
+
+function l_measure(measure::LMeasure, x, y; kwargs...)
+    @warn(
+        "Convenience function `l_measure` is deprecated. " *
+        "Use `association(LMeasure(; kwargs...), source, target) instead."
+    )
+    return association(LMeasure(; kwargs...), x, y)
+end
diff --git a/src/deprecations/v3/mmeasure.jl b/src/deprecations/v3/mmeasure.jl
new file mode 100644
index 000000000..507c902e5
--- /dev/null
+++ b/src/deprecations/v3/mmeasure.jl
@@ -0,0 +1,17 @@
+export m_measure
+
+function m_measure(x, y; kwargs...)
+    @warn(
+        "Convenience function `m_measure` is deprecated. " *
+        "Use `m_measure(MMeasure(; kwargs...), source, target)` instead."
+    )
+    return association(MMeasure(; kwargs...), x, y)
+end
+
+function m_measure(measure::MMeasure, x, y; kwargs...)
+    @warn(
+        "Convenience function `m_measure` is deprecated. " *
+        "Use `association(MMeasure(; kwargs...), source, target) instead."
+    )
+    return association(MMeasure(; kwargs...), x, y)
+end
diff --git a/src/deprecations/v3/partial_correlation.jl b/src/deprecations/v3/partial_correlation.jl
new file mode 100644
index 000000000..67502c7ea
--- /dev/null
+++ b/src/deprecations/v3/partial_correlation.jl
@@ -0,0 +1,9 @@
+export partial_correlation
+
+function partial_correlation(x, y, z; kwargs...)
+    @warn(
+        "Convenience function `partial_correlation` is deprecated. " *
+        "Use `association(PartialCorrelation(), x, y, z)` instead."
+    )
+    association(PartialCorrelation(; kwargs...), x, y, z)
+end
\ No newline at end of file
diff --git a/src/deprecations/v3/pearson_correlation.jl b/src/deprecations/v3/pearson_correlation.jl
new file mode 100644
index 000000000..4b9fc007a
--- /dev/null
+++ b/src/deprecations/v3/pearson_correlation.jl
@@ -0,0 +1,9 @@
+export pearson_correlation
+
+function pearson_correlation(x, y; kwargs...)
+    @warn(
+        "Convenience function `pearson_correlation` is deprecated. " *
+        "Use `association(PearsonCorrelation(; kwargs...), source, target)` instead."
+    )
+    association(PearsonCorrelation(; kwargs...), x, y)
+end
\ No newline at end of file
diff --git a/src/deprecations/smeasure.jl b/src/deprecations/v3/smeasure.jl
similarity index 50%
rename from src/deprecations/smeasure.jl
rename to src/deprecations/v3/smeasure.jl
index 9adc0eecf..42c5ff26a 100644
--- a/src/deprecations/smeasure.jl
+++ b/src/deprecations/v3/smeasure.jl
@@ -16,26 +16,25 @@ synchronization for `s = 1.0`.
 ```julia
 using CausalityTools
 
-# A two-dimensional Ulam lattice map
-sys = ulam(2)
-
-# Sample 1000 points after discarding 5000 transients
-orbit = trajectory(sys, 1000, Ttr = 5000)
-x, y = orbit[:, 1], orbit[:, 2]
+x, y = rand(1000), rand(1000)
 
 # 4-dimensional embedding for `x`, 5-dimensional embedding for `y`
 m = SMeasure(dx = 4, τx = 3, dy = 5, τy = 1)
-s_measure(m, x, y)
+association(m, x, y)
 ```
-
 """
-function s_measure(x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet; kwargs...)
-    if !isempty(kwargs)
-        @warn(
-            "Providing keywords to `s_measure` is deprecated. " *
-            "Use `s_measure(SMeasure(; kwargs...), source, target) instead of " *
-            "`s_measure(source, target; kwargs...)`"
-        )
-    end
-    return estimate(SMeasure(; kwargs...), x, y)
+function s_measure(x, y; kwargs...)
+    @warn(
+        "Convenience function `s_measure` is deprecated. " *
+        "Use `association(SMeasure(; kwargs...), x, y)` instead."
+    )
+    return association(SMeasure(; kwargs...), x, y)
 end
+
+function s_measure(measure::SMeasure, x, y; kwargs...)
+    @warn(
+        "Convenience function `s_measure` is deprecated. " *
+        "Use `association(SMeasure(; kwargs...), x, y)` instead."
+    )
+    return association(SMeasure(; kwargs...), x, y)
+end
\ No newline at end of file
diff --git a/src/example_systems/api.jl b/src/example_systems/api.jl
deleted file mode 100644
index e8bf022b8..000000000
--- a/src/example_systems/api.jl
+++ /dev/null
@@ -1,84 +0,0 @@
-export SystemDefinition, DiscreteDefinition, ContinuousDefinition, LaggedDiscreteDefinition
-export system
-
-"""
-    SystemDefinition
-
-The abstract type of all system definitions. Abstract subtypes are [`DiscreteDefinition`](@ref)
-and [`ContinuousSystem`](@ref).
-"""
-abstract type SystemDefinition end
-
-"""
-    DiscreteDefinition <: SystemDefinition
-
-The supertype of all discrete system definitions.
-"""
-abstract type DiscreteDefinition <: SystemDefinition end
-
-"""
-    ContinuousDefinition <: SystemDefinition
-
-The supertype of all continuous system definitions.
-"""
-abstract type ContinuousDefinition <: SystemDefinition end
-
-"""
-    system(definition::DiscreteDefinition) → s::DiscreteDynamicalSystem
-    system(definition::ContinuousDefinition) → s::ContinuousDynamicalSystem
-
-Initialize a dynamical system from `definition`.
-"""
-function system(d::SystemDefinition) end
-
-################################################################
-# Internal type for lagged-system-specific dispatch.
-################################################################
-"""
-    LaggedDiscreteDefinition <: SystemDefinition
-
-The supertype of definitions for discrete systems with lag larger than 1.
-
-Why is this type needed? Ideally, an additional definition shouldn't
-be needed, because we should in principle be able to use `DiscreteDynamicalSystem` directly
-for all systems. However, `DiscreteDynamicalSystem` doesn't work
-for systems with memory beyond a single time lag. For example, autoregressive systems
-of order larger than one are not representable using `DiscreteDynamicalSystem`.
-
-Concrete subtypes of `DiscreteDefinition` are *parameter containers* that are passed
-on to [`DiscreteDynamicalSystem`](@ref). They allocate mutable containers that keep
-track of past states of state variables that require it. Use [`system`](@ref) to
-generate a `DiscreteDynamicalSystem` that can be used with [`trajectory`](@ref).
-
-## Implementation details
-
-Concrete implementations must fulfill the below criteria.
-
-- Subtypes must implement a `past_states` field, which is
-    a `SVector{N, MVector{L, Int}}`, where `N` is the number of variables. For type stability,
-    `L` states are tracked for *all* variables, even though the maximum lag may only occur
-    for one of the variables.
-- The first type parameter of subtypes must be `P`, which keeps track of the type of
-    `past_states`.
-
-For an example, see the source code for [`Peguin2`](@ref).
-"""
-abstract type LaggedDiscreteDefinition{P} <: SystemDefinition end
-
-"""
-    update_states!(s::LaggedDiscreteDefinition, xnew::SVector{D})
-
-Given `xnew` (the new current state of a system), update the past states of `s`.
-"""
-function update_states!(def::LaggedDiscreteDefinition{SVector{N, MVector{D, T}}},
-        xnew::SVector{N}) where {N, D, T}
-    D >= 2 || error("Memory vector for LaggedDiscreteDefinition must have length at least 2.")
-    for var in 1:N
-        # A LaggedDiscreteDefinition always has a memory vector of length at least 2.
-        # Otherwise, it should be a regular DiscreteDefinition.
-        for k in D:-1:2
-            def.past_states[var][k] = def.past_states[var][k - 1]
-        end
-        def.past_states[var][1] = xnew[var]
-    end
-end
diff --git a/src/example_systems/continuous/ChuaCircuitsBidir6.jl b/src/example_systems/continuous/ChuaCircuitsBidir6.jl
deleted file mode 100644
index dbd83b2be..000000000
--- a/src/example_systems/continuous/ChuaCircuitsBidir6.jl
+++ /dev/null
@@ -1,82 +0,0 @@
-using Random
-using Distributions: Normal
-using DynamicalSystemsBase: ContinuousDynamicalSystem
-using StaticArrays: SVector
-
-export ChuaCircuitsBidir6
-
-"""
-    ChuaCircuitsBidir6 <: ContinuousDefinition
-    ChuaCircuitsBidir6(;u₀ = [0.1, 0.1, 0.2, 0.15, 0.15, 0.22],
-        α₁ = 7.0, α₂ = 7.0, β₁ = 14.286, β₂ = 14.286,
-        F₁ = 1.5, F₂ = 1.5, ω₁ = 3.0, ω₂ = 3.0,
-        n1 = Normal(0, 0.1),
-        n2 = Normal(0, 0.1),
-        c12 = 0.1, c21 = 0.1, m₀ = -1/7, m₁ = 2/7)
-
-Initialize a bidirectionally coupled system consisting of two driven Chua
-circuits, X₁ and X₂ [Murali1993](@citet).
-
-## Description
-
-The subsystems are mutually coupled by a linear resistor, where `ϵ12` controls the
-influence of X₁ on X₂, and `c21` controls the influence of X₂ on X₁. The parameters for
-the subsystems are set equal to each other, as in the original paper, but can here
-be tuned individually for each subsystem.
-
-```math
-\\begin{align*}
-\\dfrac{dx_1}{dt} &= \\alpha_1(y_1, h(x_1)) - \\alpha_1 \\epsilon(x_1 - x_2) \\\\
-\\dfrac{dy_1}{dt} &= x_1 - y_1 + z_1 \\\\
-\\dfrac{dz_1}{dt} &= -\\beta_1 y_1 + F_1 sin(\\omega_1 t) + \\epsilon_1 \\\\
-\\dfrac{dx_2}{dt} &= \\alpha_2 (y_2, h(x_2)) - \\alpha_2 c_{12}(x_1 - x_2) \\\\
-\\dfrac{dy_2}{dt} &= x_2 - y_2 + z_2 \\\\
-\\dfrac{dz_2}{dt} &= -\\beta_2 y_2 + F_2 sin(\\omega_2 t) + \\epsilon_2,
-\\end{align*}
-```
-
-where ``h(x) = M_1x + 0.5(M_0 - M_1)(|x+1| - |x - 1|)`` and ``\\epsilon_1, \\epsilon_2``
-are noise terms that at each integration step is drawn independently from the normal
-distributions `n1` and `n2`, respectively.
-"""
-Base.@kwdef struct ChuaCircuitsBidir6{V,A1,A2,B1,B2,F1,F2,W1,W2,S1,S2,E1,E2,M0,M1,R}<: ContinuousDefinition
-    xi::V = [0.1, 0.1, 0.2, 0.15, 0.15, 0.22]
-    α₁::A1 = 7.0
-    α₂::A2 = 7.0
-    β₁::B1 = 14.286
-    β₂::B2 = 14.286
-    F₁::F1 = 1.5
-    F₂::F2 = 1.5
-    ω₁::W1 = 3.0
-    ω₂::W2 = 3.0
-    n1::S1 = Normal(0, 0.1)
-    n2::S2 = Normal(0, 0.1)
-    c12::E1 = 0.1
-    c21::E2 = 0.1
-    m₀::M0 = -1/7
-    m₁::M1 = 2/7
-    rng::R = Random.default_rng()
-end
-
-function system(definition::ChuaCircuitsBidir6)
-    return ContinuousDynamicalSystem(eom_chuabidir6, definition.xi, definition)
-end
-
-function eom_chuabidir6(u, p::ChuaCircuitsBidir6, t)
-    (; xi, α₁, α₂, β₁, β₂, F₁, F₂, ω₁, ω₂, n1, n2, c12, c21, m₀, m₁, rng) = p
-    x₁, y₁, z₁,  x₂, y₂, z₂ = u
-
-    ξ1 = rand(rng, n1)
-    ξ2 = rand(rng, n2)
-    hx₁ = m₁*x₁ + 0.5*(m₀ - m₁)*(abs(x₁+1) - abs(x₁-1))
-    hx₂ = m₁*x₂ + 0.5*(m₀ - m₁)*(abs(x₂+1) - abs(x₂-1))
-
-    dx₁ = α₁*(y₁-hx₁) - α₁*c21*(x₁ - x₂)
-    dy₁ = x₁-y₁+z₁
-    dz₁ = -β₁*y₁ + F₁*sin(ω₁*t) + ξ1
-
-    dx₂ = α₂*(y₂-hx₂) - α₂*c12*(x₁ - x₂)
-    dy₂ = x₂-y₂+z₂
-    dz₂ = -β₂*y₂ + F₂*sin(ω₂*t) + ξ2
-    SVector{6}(dx₁, dy₁, dz₁, dx₂, dy₂, dz₂)
-end
diff --git a/src/example_systems/continuous/ChuaScrollSine3.jl b/src/example_systems/continuous/ChuaScrollSine3.jl
deleted file mode 100644
index 77e569cfd..000000000
--- a/src/example_systems/continuous/ChuaScrollSine3.jl
+++ /dev/null
@@ -1,87 +0,0 @@
-using Distributions: Normal
-using DynamicalSystemsBase: ContinuousDynamicalSystem
-using Random
-
-export ChuaScrollSine3
-
-"""
-    ChuaScrollSine3 <: ContinuousDefinition
-    ChuaScrollSine3(; xi = [0.1, 0.2, 0.3],
-        α = 10.814, β = 14, γ = 0, a = 1.3, b = 0.11, c = 2,
-        nx = Normal(0.0, 0.01),
-        ny = Normal(0.0, 0.01)
-        nz = Normal(0.0, 0.01))
-
-An adjusted Chua system giving rise to n-scroll attractors [Tang2001](@cite).
-
-## Description
-
-The dynamics is generated by the following vector field
-
-```math
-\\begin{align*}
-\\dot{x} &= \\alpha (y - fx) + \\eta x \\\\
-\\dot{y} &= x - y + z + \\eta y \\\\
-\\dot{z} &= -\\beta y - \\gamma z + \\eta z
-\\end{align*}
-```
-
-where ``\\eta x``, ``\\eta z``, and ``\\eta z`` are drawn independently from
-normal distributions `nx`, `ny` and `nz` each iteration.
-
-``fx`` is given by the following conditions:
-
-```julia
-n::Int = c + 1
-
-if x >= 2*a*c
-    fx = (b*pi/2*a)*(x - 2*a*c)
-elseif -2*a*c < x < 2*a*c
-    d = ifelse(isodd(n), pi, 0)
-    fx = -b*sin((pi*x/2*a) + d)
-elseif x <= -2*a*c
-    fx = (b*pi/2*a)*(x + 2*a*c)
-end
-```
-"""
-Base.@kwdef struct ChuaScrollSine3{V,A,B,Y,Q,R,C,NX,NY,NZ,RNG} <: ContinuousDefinition
-    xi::V = [0.1, 0.2, 0.3]
-    α::A = 10.814
-    β::B = 14
-    γ::Y = 0
-    a::Q = 1.3
-    b::R = 0.11
-    c::C = 2
-    nx::NX = Normal(0.0, 0.01)
-    ny::NY = Normal(0.0, 0.01)
-    nz::NZ = Normal(0.0, 0.01)
-    rng::RNG = Random.default_rng()
-end
-
-function system(definition::ChuaScrollSine3)
-    return ContinuousDynamicalSystem(eom_chuascrollsine3, definition.xi, definition)
-end
-
-@inline @inbounds function eom_chuascrollsine3(u, p, t)
-    (; xi, α, β, γ, a, b, c, nx, ny, nz, rng) = p
-    x, y, z = u
-
-    n::Int = c + 1
-    if x >= 2*a*c
-        fx = (b*pi/2*a)*(x - 2*a*c)
-    elseif -2*a*c < x < 2*a*c
-        d = ifelse(isodd(n), pi, 0)
-        fx = -b*sin((pi*x/2*a) + d)
-    elseif x <= -2*a*c
-        fx = (b*pi/2*a)*(x + 2*a*c)
-    end
-
-    ηx = rand(rng, nx)
-    ηy = rand(rng, ny)
-    ηz = rand(rng, nz)
-
-    dx = α*(y - fx) + ηx
-    dy = x - y + z + ηy
-    dz = -β*y - γ*z + ηz
-    return SVector{3}(dx, dy, dz)
-end
diff --git a/src/example_systems/continuous/HindmarshRose3.jl b/src/example_systems/continuous/HindmarshRose3.jl
deleted file mode 100644
index 0e161958f..000000000
--- a/src/example_systems/continuous/HindmarshRose3.jl
+++ /dev/null
@@ -1,60 +0,0 @@
-using StaticArrays: SVector
-using DynamicalSystemsBase: ContinuousDynamicalSystem
-
-export HindmarshRose3
-
-"""
-    HindmarshRose3 <: ContinuousDefinition
-    HindmarshRose3(; xi = [0.1, 0.2, 0.3, p)
-
-Initialise a Hindmarsh-Rose system, which is a model of neuronal
-spiking.
-
-## Description
-
-```math
-\\begin{align*}
-\\dfrac{dx}{dt} &= y + \\phi(x) - z + I \\\\
-\\dfrac{dy}{dt} &= \\psi(x) - y \\\\
-\\dfrac{dz}{dt} &= r[s(x - x_R) - z],
-\\end{align*}
-```
-where
-
-```math
-\\begin{aligned}
-\\phi(x) &= -ax^3+bx^2
-\\psi(x) &= c - dx^2
-\\end{aligned}
-```
-
-If parameters other than the defaults are to be used, they must be
-provided as a vector `[a, b, c, d, r, s, xᵣ, I]`.
-"""
-Base.@kwdef struct HindmarshRose3{V,A,B,C,D,R,S,X,II} <: ContinuousDefinition
-    xi::V = [0.1, 0.2, 0.3]
-    a::A = 1
-    b::B = 3
-    c::C = 1
-    d::D = 5
-    r::R = 1e-3
-    s::S = 4
-    xᵣ::X = -8/5
-    I::II = -8
-end
-
-function system(definition::HindmarshRose3)
-    return ContinuousDynamicalSystem(eom_hindmarshrose, definition.xi, definition)
-end
-
-function eom_hindmarshrose(u, p::HindmarshRose3, t)
-    (; xi, a, b, c, d, r, s, xᵣ, I) = p
-    x, y, z = u
-
-	ϕ = -a*x^3 + b*x^2
-	ψ = c - d*x^2
-    dx = y + ϕ - z + I
-	dy = ψ - y
-	dz = r*(s*(x - xᵣ) - z)
-    return SVector{3}(dx, dy, dz)
-end
diff --git a/src/example_systems/continuous/LorenzBidir6.jl b/src/example_systems/continuous/LorenzBidir6.jl
deleted file mode 100644
index 41554cce9..000000000
--- a/src/example_systems/continuous/LorenzBidir6.jl
+++ /dev/null
@@ -1,66 +0,0 @@
-using DynamicalSystemsBase: ContinuousDynamicalSystem
-using StaticArrays: SVector
-
-export LorenzBidir6
-
-"""
-    LorenzBidir6 <: ContinuousDefinition
-    LorenzBidir6(; xi = [0.1, 0.05, 0.2, 0.2, 0.25, 0.3],
-        c_xy = 0.2, c_yx = 0.2,
-        a₁ = 10, a₂ = 28, a₃ = 8/3,
-        b₁ = 10, b₂ = 28, b₃ = 9/3)
-
-A bidirectionally coupled Lorenz-Lorenz system, where each
-subsystem is a 3D Lorenz system (Amigo & Hirata, 2018)[^Amigó2018].
-
-## Description
-
-The dynamics is generated by the following vector field
-
-```math
-\\begin{align*}
-\\dot{x_1} &= -a_1 (x_1 - x_2) + c_{yx}(y_1 - x_1) \\\\
-\\dot{x_2} &= -x_1 x_3 + a_2 x_1 - x_2 \\\\
-\\dot{x_3} &= x_1 x_2 - a_3 x_3 \\\\
-\\dot{y_1} &= -b_1 (y_1 - y_2) + c_{xy} (x_1 - y_1) \\\\
-\\dot{y_2} &= -y_1 y_3 + b_2 y_1 - y_2 \\\\
-\\dot{y_3} &= y_1 y_2 - b_3 y_3
-\\end{align*}
-```
-
-Default values for the parameters `a₁`, `a₂`, `a₃`, `b₁`, `b₂`, `b₃` are as in [^Amigó2018].
-
-[^Amigó2018]:
-    Amigó, José M., and Yoshito Hirata. "Detecting directional couplings from
-    multivariate flows by the joint distance distribution." Chaos: An
-    Interdisciplinary Journal of Nonlinear Science 28.7 (2018): 075302.
-"""
-Base.@kwdef struct LorenzBidir6{V, CXY, CYX, A1, A2, A3, B1, B2, B3} <: ContinuousDefinition
-    xi::V = [0.1, 0.05, 0.2, 0.2, 0.25, 0.3]
-    c_xy::CXY = 0.2
-    c_yx::CYX = 0.2
-    a₁::A1 = 10
-    a₂::A2 = 28
-    a₃::A3 = 8/3
-    b₁::B1 = 10
-    b₂::B2 = 28
-    b₃::B3 = 9/3
-end
-
-function system(definition::LorenzBidir6)
-    return ContinuousDynamicalSystem(eom_lorenzlorenzbidir6, definition.xi, definition)
-end
-
-@inline @inbounds function eom_lorenzlorenzbidir6(u, p, t)
-    (; xi, c_xy, c_yx, a₁, a₂, a₃, b₁, b₂, b₃) = p
-    x1, x2, x3, y1, y2, y3 = u
-
-    dx1 = -a₁*(x1 - x2) + c_yx*(y1 - x1)
-    dx2 = -x1*x3 + a₂*x1 - x2
-    dx3 = x1*x2 - a₃*x3
-    dy1 = -b₁*(y1 - y2) + c_xy*(x1 - y1)
-    dy2 = -y1*y3 + b₂*y1 - y2
-    dy3 = y1*y2 - b₃*y3
-
-    return SVector{6}(dx1, dx2, dx3, dy1, dy2, dy3)
-end
diff --git a/src/example_systems/continuous/LorenzForced9.jl b/src/example_systems/continuous/LorenzForced9.jl
deleted file mode 100644
index da77a0a34..000000000
--- a/src/example_systems/continuous/LorenzForced9.jl
+++ /dev/null
@@ -1,82 +0,0 @@
-using LabelledArrays: @LArray
-using StaticArrays: SVector
-using DynamicalSystemsBase: trajectory
-using DynamicalSystemsBase: ContinuousDynamicalSystem
-using Distributions: Uniform
-
-export LorenzForced9
-
-"""
-    LorenzForced9{V} <: ContinuousDefinition
-    LorenzForced9(; xi = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
-        c_xy = 0.1, c_yx = 0.1,
-        c_zx = 0.05, c_zy = 0.05,
-        a₁ = 10, a₂ = 28, a₃ = 8/3,
-        b₁ = 10, b₂ = 28, b₃ = 8/3,
-        c₁ = 10, c₂ = 28, c₃ = 8/3)
-
-A system consisting of two bidirectionally coupled 3D Lorenz
-systems forced by an external 3D Lorenz system (Amigó & Hirata, 2018).
-
-## Description
-
-The dynamics is generated by the following vector field
-
-```math
-\\begin{align*}
-\\dot{x_1} &= - a_1 (x_1 - x_2) + c_{yx}(y_1 - x_1) + c_{zx}(z_1 - x_1) \\\\
-\\dot{x_2} &= - x_1 x_3 + a_2 x_1 - x_2 \\\\
-\\dot{x_3} &= x_1 x_2 - a_3 x_3 \\\\
-\\dot{y_1} &= -b_1 (y_1 - y_2) + c_{xy} (x_1 - y_1) + c_{zy}(z_1 - y_1) \\\\
-\\dot{y_2} &= - y_1 y_3 + b_2 y_1 - y_2 \\\\
-\\dot{y_3} &= y_1 y_2 - b_3 y_3 \\\\
-\\dot{z_1} &= - c_1 (z_1 - z_2) \\\\
-\\dot{z_2} &= - z_1 z_3 + c_2 z_1 - z_2 \\\\
-\\dot{z_3} &= z_1 z_2 - c_3 z_3
-\\end{align*}
-```
-
-[^Amigó2018]:
-    Amigó, José M., and Yoshito Hirata. "Detecting directional couplings from
-    multivariate flows by the joint distance distribution." Chaos: An
-    Interdisciplinary Journal of Nonlinear Science 28.7 (2018): 075302.
-"""
-Base.@kwdef struct LorenzForced9{V,CXY,CYX,CZX,CZY,A1,A2,A3,B1,B2,B3,C1,C2,C3} <: ContinuousDefinition
-    xi::V = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
-    c_xy::CXY = 1.0
-    c_yx::CYX = 1.0
-    c_zx::CZX = 1.0
-    c_zy::CZY = 1.0 # beyond c = 2, systems syncronize
-    a₁::A1 = 10
-    a₂::A2 = 28
-    a₃::A3 = 8/3
-    b₁::B1 = 10
-    b₂::B2 = 28
-    b₃::B3 = 8/3
-    c₁::C1 = 10
-    c₂::C2 = 28
-    c₃::C3 = 8/3
-end
-
-function system(definition::LorenzForced9)
-    return ContinuousDynamicalSystem(eom_lorenzforced9, definition.xi, definition)
-end
-
-@inline @inbounds function eom_lorenzforced9(u, p::LorenzForced9, t)
-    (; xi, c_xy, c_yx, c_zx, c_zy, a₁, a₂, a₃, b₁, b₂, b₃, a₃, c₁, c₂, c₃) = p
-    x₁, x₂, x₃, y₁, y₂, y₃, z₁, z₂, z₃ = u
-
-    dx₁ = -a₁*(x₁ - x₂) + c_yx*(y₁ - x₁) + c_zx*(z₁ - x₁)
-    dx₂ = -x₁*x₃ + a₂*x₁ - x₂
-    dx₃ = x₁*x₂ - a₃*x₃
-
-    dy₁ = -b₁*(y₁ - y₂) + c_xy*(x₁ - y₁) + c_zy*(z₁ - y₁)
-    dy₂ = -y₁*y₃ + b₂*y₁ - y₂
-    dy₃ = y₁*y₂ - b₃*y₃
-
-    dz₁ = -c₁*(z₁ - z₂)
-    dz₂ = -z₁*z₃ + c₂*z₁ - z₂
-    dz₃ = z₁*z₂ - c₃*z₃
-
-    return SVector{9}(dx₁, dx₂, dx₃, dy₁, dy₁, dy₃, dz₁, dz₂, dz₃)
-end
diff --git a/src/example_systems/continuous/LorenzTransitive9.jl b/src/example_systems/continuous/LorenzTransitive9.jl
deleted file mode 100644
index 543ff6d97..000000000
--- a/src/example_systems/continuous/LorenzTransitive9.jl
+++ /dev/null
@@ -1,85 +0,0 @@
-using LabelledArrays: @LArray
-using StaticArrays: SVector
-using DynamicalSystemsBase: ContinuousDynamicalSystem
-
-export LorenzTransitive9
-
-"""
-    LorenzTransitive9 <: ContinuousDefinition
-    LorenzTransitive9(; xi = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.9, 0.9],
-        σ₁ = 10.0, σ₂ = 10.0, σ₃ = 10.0,
-        ρ₁ = 28.0, ρ₂ = 28.0, ρ₃ = 28.0,
-        β₁ = 8/3,  β₂ = 8/3,  β₃ = 8.3,
-        c₁₂ = 1.0, c₂₃ = 1.0)
-
-Initalise a dynamical system consisting of three coupled Lorenz attractors with
-a transitive causality chain where X₁ → X₂ and X₂ → X₃. In total, the three
-3D-subsystems create a 9-dimensional dynamical system.
-
-The strength of the forcing X₁ → X₂ is controlled by the parameter `c₁`, and
-the forcing from X₂ → X₃ by `c₂`. The remaining parameters are the usual
-parameters for the Lorenz system, where the subscript `i` refers to the
-subsystem Xᵢ.
-
-## Equations of motion
-
-The dynamics is generated by the following vector field
-
-```math
-\\begin{aligned}
-\\dot{x_1} &= \\sigma_1(y_1 - x_1) \\\\
-\\dot{y_1} &= \\rho_1 x_1 - y_1 - x_1 z_1 \\\\
-\\dot{z_1} &= x_1 y_1 - \\beta_1 z_1 \\\\
-\\dot{x_2} &=  \\sigma_2 (y_2 - x_2) + c_{12}(x_1 - x_2) \\\\
-\\dot{y_2} &= \\rho_2 x_2 - y_2 - x_2 z_2 \\\\
-\\dot{z_2} &= x_2 y_2 - \\beta_2 z_2 \\\\
-\\dot{x_3} &= \\sigma_3 (y_3 - x_3) + c_{23} (x_2 - x_3) \\\\
-\\dot{y_3} &= \\rho_3 x_3 - y_3 - x_3 z_3 \\\\
-\\dot{z_3} &= x_3 y_3 - \\beta_3 z_3
-\\end{aligned}
-```
-
-## Usage in literature
-
-This system was studied by Papana et al. (2013) for coupling strengths
-``c_{12} = 0, 1, 3, 5`` and ``c_{23} = 0, 1, 3, 5``.
-"""
-Base.@kwdef struct LorenzTransitive9{V,S1,S2,S3,P1,P2,P3,B1,B2,B3,C1,C2} <: ContinuousDefinition
-    xi::V = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.9, 0.9]
-    σ₁::S1 = 10.0
-    σ₂::S2 = 10.0
-    σ₃::S3 = 10.0
-    ρ₁::P1 = 28.0
-    ρ₂::P2 = 28.0
-    ρ₃::P3 = 28.0
-    β₁::B1 = 8/3
-    β₂::B2 = 8/3
-    β₃::B3 = 8.3
-    c₁₂::C1 = 1.0
-    c₂₃::C2 = 1.0
-end
-
-function system(definition::LorenzTransitive9)
-    return ContinuousDynamicalSystem(eom_lorenztransitive9, definition.xi, definition)
-end
-
-@inline @inbounds function eom_lorenztransitive9(u, p::LorenzTransitive9, t)
-    x₁, y₁, z₁, x₂, y₂, z₂, x₃, y₃, z₃ = u
-    (; xi, σ₁, σ₂, σ₃, ρ₁, ρ₂, ρ₃, β₁, β₂, β₃, c₁₂, c₂₃) = p
-
-    # Subsystem 1
-    dx₁ = σ₁*(y₁-x₁)
-    dy₁ = ρ₁*x₁ - y₁ - x₁*z₁
-    dz₁ = x₁*y₁ - β₁*z₁
-
-    # Subsystem 2
-    dx₂ = σ₂*(y₂-x₂) + c₁₂*(x₁ - x₂)
-    dy₂ = ρ₂*x₂ - y₂ - x₂*z₂
-    dz₂ = x₂*y₂ - β₂*z₂
-
-    # Subsystem 3
-    dx₃ = σ₃*(y₃-x₃) + c₂₃*(x₂ - x₃)
-    dy₃ = ρ₃*x₃ - y₃ - x₃*z₃
-    dz₃ = x₃*y₃ - β₃*z₃
-    return SVector{9}(dx₁, dy₁, dz₁, dx₂, dy₂,dz₂, dx₃, dy₃, dz₃)
-end
diff --git a/src/example_systems/continuous/MediatedLink9.jl b/src/example_systems/continuous/MediatedLink9.jl
deleted file mode 100644
index 9d662862b..000000000
--- a/src/example_systems/continuous/MediatedLink9.jl
+++ /dev/null
@@ -1,75 +0,0 @@
-using LabelledArrays: @LArray
-using StaticArrays: SVector
-using DynamicalSystemsBase: ContinuousDynamicalSystem
-
-export MediatedLink9
-
-"""
-    MediatedLink9 <: ContinuousDefinition
-    MediatedLink9(; xi = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
-        ωx = 1.0, ωy = 1.015, ωz = 0.985,
-        k = 0.15, l = 0.2, m = 10.0,
-        c = 0.06) → ContinuousDynamicalSystem
-
-A three-subsystem dynamical system where `X` and `Y` are driven by `Z` (Krakovská,
-2018)[^Krakovská2018].
-
-## Description
-
-The dynamics is generated by the following vector field
-
-```math
-\\begin{aligned}
-dx_1 &= -\\omega_x x_2 - x_3 + c*(z_1 - x_1) \\\\
-dx_2 &= \\omega_x x_1 + k*x_2  \\\\
-dx_3 &= l + x_3(x_1 - m)  \\\\
-dy_1 &= -\\omega_y y_2 - y_3 + c*(z_1 - y_1)  \\\\
-dy_2 &= \\omega_y y_1 + k*y_2  \\\\
-dy_3 &= l + y_3(y_1 - m)  \\\\
-dz_1 &= -\\omega_z z_2 - z_3  \\\\
-dz_2 &= \\omega_z z_1 + k*z_2  \\\\
-dz_3 &= l + z_3(z_1 - m)
-\\end{aligned}
-```
-
-At the default
-value of the coupling constant `c = 0.06`, the responses `X` and `Y` are already
-synchronized to the driver `Z`.
-
-[^Krakovská2018]:
-    Krakovská, Anna, et al. "Comparison of six methods for the detection of
-    causality in a bivariate time series." Physical Review E 97.4 (2018): 042207
-"""
-Base.@kwdef struct MediatedLink9{V,W1,W2,W3,K,L,M,C} <: ContinuousDefinition
-    xi::V = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
-    ωx::W1 = 1.0
-    ωy::W2 = 1.015
-    ωz::W3 = 0.985
-    k::K = 0.15
-    l::L = 0.2
-    m::M = 10.0
-    c::C = 0.06
-end
-
-function system(definition::MediatedLink9)
-    return ContinuousDynamicalSystem(eom_mediatedlink, definition.xi, definition)
-end
-
-@inline @inbounds function eom_mediatedlink(u, p::MediatedLink9, t)
-    (; xi, ωx, ωy, ωz, k, l, m, c) = p
-    x₁, x₂, x₃, y₁, y₂, y₃, z₁, z₂, z₃ = u
-
-    dx₁ = -ωx*x₂ - x₃ + c*(z₁ - x₁)
-	dx₂ = ωx*x₁ + k*x₂
-	dx₃ = l + x₃*(x₁ - m)
-
-	dy₁ = -ωy*y₂ - y₃ + c*(z₁ - y₁)
-	dy₂ = ωy*y₁ + k*y₂
-	dy₃ = l + y₃*(y₁ - m)
-
-	dz₁ = -ωz*z₂ - z₃
-	dz₂ = ωz*z₁ + k*z₂
-	dz₃ = l + z₃*(z₁ - m)
-
-    SVector{9}(dx₁, dx₂, dx₃, dy₁, dy₂, dy₃, dz₁, dz₂, dz₃)
-end
diff --git a/src/example_systems/continuous/Repressilator6.jl b/src/example_systems/continuous/Repressilator6.jl
deleted file mode 100644
index 115c1b336..000000000
--- a/src/example_systems/continuous/Repressilator6.jl
+++ /dev/null
@@ -1,60 +0,0 @@
-export repressilator
-
-using LabelledArrays: @LArray
-using StaticArrays: SVector
-using DynamicalSystemsBase: trajectory
-using DynamicalSystemsBase: ContinuousDynamicalSystem
-
-export Repressilator6
-
-"""
-    Repressilator6 <: ContinuousDefinition
-    Repressilator6(; xi = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6], α = 10.0, α₀ = 0.0, β = 100.0,
-        n = 2) → ContinuousDynamicalSystem
-
-A six-dimensional repressilator (or repression-driven oscillator) from
-[Elowitz2000](@citet).
-
-Used in [Sun2014](@citet) to study the performance of the causation entropy
-algorithm.
-
-## Description
-
-```math
-\\begin{align*}
-\\dfrac{dm_1}{dt} &= -m1 + \\dfrac{\\alpha}{1 + p_3^n} + \\alpha_0 \\\\
-\\dfrac{dm_2}{dt} &= -m2 + \\dfrac{\\alpha}{1 + p_1^n} + \\alpha_0 \\\\
-\\dfrac{dm_3}{dt} &= -m3 + \\dfrac{\\alpha}{1 + p_2^n} + \\alpha_0 \\\\
-\\dfrac{dp_1}{dt} &= -\\beta(p_1 - m_1) \\\\
-\\dfrac{dp_2}{dt} &= -\\beta(p_2 - m_2) \\\\
-\\dfrac{dp_3}{dt} &= -\\beta(p_3 - m_3) \\\\
-\\end{align*}
-```
-"""
-Base.@kwdef struct Repressilator6{V, A, A0, B, N} <: ContinuousDefinition
-    xi::V = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
-    α::A = 10.0
-    α₀::A0 = 0.0
-    β::B = 100.0
-    n::N = 2
-end
-
-function system(definition::Repressilator6)
-    return ContinuousDynamicalSystem(eom_repressilator6, definition.xi, definition)
-end
-
-@inline @inbounds function eom_repressilator6(u, p, t)
-    (; xi, α, α₀, n, β) = p
-    # pᵢ := concentration of protein repressor i
-    # mᵢ := concentration of mRNA associated with pᵢ
-    m₁, m₂, m₃, p₁, p₂, p₃ = u
-
-    ṁ₁ = -m₁ + α/(1 + p₃^n) + α₀
-    ṁ₂ = -m₂ + α/(1 + p₁^n) + α₀
-    ṁ₃ = -m₃ + α/(1 + p₂^n) + α₀
-    ṗ₁ = -β*(p₁ - m₁)
-    ṗ₂ = -β*(p₂ - m₂)
-    ṗ₃ = -β*(p₃ - m₃)
-
-    return SVector{6}(ṁ₁, ṁ₂, ṁ₃, ṗ₁, ṗ₂, ṗ₃)
-end
diff --git a/src/example_systems/continuous/RosslerBidir6.jl b/src/example_systems/continuous/RosslerBidir6.jl
deleted file mode 100644
index 1013c0fa7..000000000
--- a/src/example_systems/continuous/RosslerBidir6.jl
+++ /dev/null
@@ -1,66 +0,0 @@
-using DynamicalSystemsBase: ContinuousDynamicalSystem
-using StaticArrays: SVector
-
-export RosslerBidir6
-
-"""
-    RosslerBidir6 <: ContinuousDefinition
-    RosslerBidir6(; xi = [0.1, 0.1, 0.2, 0.3, 0.3, 0.4],
-        a = 0.1, b = 0.1, c = 14.0, ϵ₁ = 0.0, ϵ₂ = 0.0,
-        ω₁ = 1 + 0.015, ω₂ = 1 - 0.015)
-
-A bidirectionally coupled 6D Rossler system from Krakovská et al. (2018)[^Krakovská2018].
-
-## Description
-
-The system consists of two separate subsystems, each being a 3D Rossler
-attractor. The subsystems are bidirectionally coupled, influencing each other
-through variables ``x_1`` and ``x_2`.
-
-```math
-\\begin{align*}
-\\dfrac{dx_1}{dt} &= \\omega_1 (-y_1) - z_1 + c_{21}*(x_1 - x_2) \\\\
-\\dfrac{dy_1}{dt} &= \\omega_1 x_1 + a y_1 \\\\
-\\dfrac{dz_1}{dt} &= b + z_1 (x_1 - c) \\\\
-\\dfrac{dx_2}{dt} &= \\omega_2 (-y_2) - z_2 + c_{12} (x_2 - x_1) \\\\
-\\dfrac{dy_2}{dt} &= \\omega_2*x_2 + a*y_2 \\\\
-\\dfrac{dz_2}{dt} &= b + z_2 (x_2 - c) \\\\
-\\end{align*}
-```
-
-with ``c_{12} \\geq 0`` and ``c_{21} \\geq 0``.
-
-[^Krakovská2018]:
-    Krakovská, A., Jakubík, J., Chvosteková, M., Coufal, D., Jajcay, N., & Paluš, M.
-    (2018). Comparison of six methods for the detection of causality in a bivariate time
-    series. Physical Review E, 97(4), 042207.
-"""
-Base.@kwdef struct RosslerBidir6{V, A, B, C, E1, E2, Ω1, Ω2} <: ContinuousDefinition
-    xi::V = [0.1, 0.1, 0.2, 0.3, 0.3, 0.4]
-    a::A = 0.1
-    b::B = 0.1
-    c::C = 14.0
-    c12::E1 = 0.0
-    c21::E2 = 0.0
-    ω₁::Ω1 = 1 + 0.015
-    ω₂::Ω2 = 1 - 0.015
-end
-
-function system(definition::RosslerBidir6)
-    return ContinuousDynamicalSystem(eom_rosslerrosslerbidir6, definition.xi, definition)
-end
-
-function eom_rosslerrosslerbidir6(u, p, t)
-    (; xi, a, b, c, c12, c21, ω₁, ω₂) = p
-    x₁, y₁, z₁, x₂, y₂, z₂ = u
-
-    # First Rössler system
-    dx₁ = ω₁*(-y₁) - z₁ + c21*(x₁ - x₂)
-    dy₁ = ω₁*x₁ + a*y₁
-    dz₁ = b + z₁*(x₁ - c)
-    # Second Rössler system
-    dx₂ = ω₂*(-y₂) - z₂ + c12*(x₂ - x₁)
-    dy₂ = ω₂*x₂ + a*y₂
-    dz₂ = b + z₂*(x₂ - c)
-    return SVector{6}(dx₁, dy₁, dz₁, dx₂, dy₂, dz₂)
-end
diff --git a/src/example_systems/continuous/RosslerForced9.jl b/src/example_systems/continuous/RosslerForced9.jl
deleted file mode 100644
index 167a801bc..000000000
--- a/src/example_systems/continuous/RosslerForced9.jl
+++ /dev/null
@@ -1,94 +0,0 @@
-using LabelledArrays: @LArray
-using StaticArrays: SVector
-using DynamicalSystemsBase: ContinuousDynamicalSystem
-using DynamicalSystemsBase: trajectory
-using SimpleDiffEq: SimpleATsit5
-using Distributions: Uniform
-
-export RosslerForced9
-
-"""
-    RosslerForced9 <: ContinuousDefinition
-    RosslerForced9(; xi = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
-        ω₁ = 1.015, ω₂ = 0.985, ω₃ = 0.95,
-        c_xy = 0.1, c_yx = 0.1,
-        c_zx = 0.05, c_zy = 0.05,
-        a₁ = 0.15, a₂ = 0.2, a₃ = 10,
-        b₁ = 0.15, b₂ = 0.2, b₃ = 10,
-        c₁ = 0.15, c₂ = 0.2, c₃ = 10)
-
-Equations of motion for a system consisting of three coupled 3D Rössler systems
-(``X``, ``Y``, ``Z``), giving a 9D system (Amigó & Hirata, 2018)[^Amigó2018].
-
-## Description
-
-The dynamics is generated by the following vector field
-
-```math
-\\begin{aligned}
-\\dot{x_1} &= -\\omega_1 (x_2 + x_3) + c_{yx}(y_1 - x_1) + c_{zx}(z_1 - x_1) \\\\
-\\dot{x_2} &= \\omega_1 x_1 + a_1 x_2 \\\\
-\\dot{x_3} &= a_2 + x_3 (x_1 - a_3) \\\\
-\\dot{y_1} &= -\\omega_1 (y_2 + y_3) + c_{xy}(x_1 - y_1) + c_{zy}(z_1 - y_1) \\\\
-\\dot{x_2} &= \\omega_2 y_1 + b_1 y_2 \\\\
-\\dot{x_3} &= b_2 + x_3 (y_1 - b_3) \\\\
-\\dot{y_1} &= -\\omega_2 (z_2  + z_3) \\\\
-\\dot{x_2} &= \\omega_2 z_1 + c_1 z_2 \\\\
-\\dot{x_3} &= c_2 + z_3 (z_1 - c_3).
-\\end{aligned}
-```
-
-The external system ``Z`` influences both ``X`` and ``Y`` (controlled by `c_zx` and `c_zy`).
-Simultaneously, the subsystems  ``X`` and ``Y`` bidirectionally
-influences each other (controlled by `c_xy` and `c_yx`).
-The ``X`` and ``Y`` subsystems are mostly synchronized for `c_xy > 0.1` or
-`c_yx > 0.1`.
-
-[^Amigó2018]:
-    Amigó, José M., and Yoshito Hirata. "Detecting directional couplings from
-    multivariate flows by the joint distance distribution." Chaos: An
-    Interdisciplinary Journal of Nonlinear Science 28.7 (2018): 075302.
-"""
-Base.@kwdef struct RosslerForced9{V,W1,W2,W3,CXY,CYX,CZX,CZY,A1,A2,A3,B1,B2,B3,C1,C2,C3} <: ContinuousDefinition
-    xi::V = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
-    ω₁::W1 = 1.015
-    ω₂::W2 = 0.985
-    ω₃::W3 = 0.95
-    c_xy::CXY = 0.1
-    c_yx::CYX = 0.1
-    c_zx::CZX = 0.05
-    c_zy::CZY = 0.05
-    a₁::A1 = 0.15
-    a₂::A2 = 0.2
-    a₃::A3 = 10
-    b₁::B1 = 0.15
-    b₂::B2 = 0.2
-    b₃::B3 = 10
-    c₁::C1 = 0.15
-    c₂::C2 = 0.2
-    c₃::C3 = 10
-end
-
-
-function system(definition::RosslerForced9)
-    return ContinuousDynamicalSystem(eom_rosslerforced9, definition.xi, definition)
-end
-
-@inline @inbounds function eom_rosslerforced9(u, p, t)
-    (; xi, ω₁, ω₂, ω₃, c_xy, c_yx, c_zx, c_zy, a₁, a₂, a₃, b₁, b₂, b₃, a₃, c₁, c₂, c₃) = p
-    x1, x2, x3, y1, y2, y3, z1, z2, z3 = u
-
-    dx1 = -ω₁*(x2 + x3) + c_yx*(y1 - x1) + c_zx*(z1 - x1)
-    dx2 = ω₁*x1 + a₁*x2
-    dx3 = a₂ + x3*(x1 - a₃)
-
-    dy1 = -ω₂*(y2 + y3) + c_xy*(x1 - y1) + c_zy*(z1 - y1)
-    dy2 = ω₂*y1 + b₁*y2
-    dy3 = b₂ + y3*(y1 - b₃)
-
-    dz1 = -ω₂*(z2 + z3)
-    dz2 = ω₂*z1 + c₁*z2
-    dz3 = c₂ + z3*(z1 - c₃)
-
-    return SVector{9}(dx1, dx2, dx3, dy1, dy2, dy3, dz1, dz2, dz3)
-end
diff --git a/src/example_systems/continuous/RosslerLorenzUnidir6.jl b/src/example_systems/continuous/RosslerLorenzUnidir6.jl
deleted file mode 100644
index c29d466bb..000000000
--- a/src/example_systems/continuous/RosslerLorenzUnidir6.jl
+++ /dev/null
@@ -1,68 +0,0 @@
-using DynamicalSystemsBase: ContinuousDynamicalSystem
-using StaticArrays: SVector
-
-export RosslerLorenzUnidir6
-
-"""
-
-    RosslerLorenzUnidir6 <: ContinuousDefinition
-    RosslerLorenzUnidir6(; xi = [0.1, 0.2, 0.3, 0.05, 0.1, 0.15],
-        a₁ = 6, a₂ = 6, a₃ = 2.0, b₁ = 10, b₂ = 28, b₃ = 8/3, c_xy = 1.0)
-
-Initialise a Rössler-Lorenz system consisting of two independent 3D subsystems:
-one Rössler system and one Lorenz system. They are coupled such that the
-second component (`x₂`) of the Rössler system unidirectionally forces the
-second component (`y₂`) of the Lorenz system.
-
-The parameter `c_xy` controls the coupling strength. The implementation here also
-allows for tuning the parameters of each subsystem by introducing the constants
-`a₁`, `a₂`, `a₃`, `b₁`, `b₂`, `b₃`. Default values for these parameters are
-as in [1].
-
-## Equations of motion
-
-The dynamics is generated by the following vector field
-
-## Description
-
-```math
-\\begin{align*}
-\\dot x_1 &= -a_1(x_2 + x_3) \\\\
-\\dot x_2 &= a_2(x_1 + a_2x_2) \\\\
-\\dot x_3 &= a_1(a_2 + x_3(x_1 - a_3)) \\\\
-\\dot y_1 &= b_1(y_2 - y_1) \\\\
-\\dot y_2 &= y_1(b_2 - y_3) - y_2 +c_{xy}(x_2)^2 \\\\
-\\dot y_3 &= y_1 y_2 - b_3y_3
-\\end{align*}
-```
-
-with the coupling constant ``c_{xy} \\geq 0``.
-"""
-Base.@kwdef struct RosslerLorenzUnidir6{V, A1, A2, A3, B1, B2, B3, C} <: ContinuousDefinition
-    xi::V = [0.1, 0.2, 0.3, 0.05, 0.1, 0.15]
-    a₁::A1 = 6
-    a₂::A2 = 0.2
-    a₃::A3 = 5.7
-    b₁::B1 = 10
-    b₂::B2 = 28
-    b₃::B3 = 8/3
-    c_xy::C = 1.0
-end
-
-function system(definition::RosslerLorenzUnidir6)
-    return ContinuousDynamicalSystem(eom_rosslerlorenzunidir6, definition.xi, definition)
-end
-
-function eom_rosslerlorenzunidir6(u, p, t)
-    (; xi, a₁, a₂, a₃, b₁, b₂, b₃, c_xy) = p
-    x1, x2, x3, y1, y2, y3 = u
-
-    dx1 = -a₁*(x2 + x3)
-    dx2 = a₁*(x1 + a₂*x2)
-    dx3 = a₁*(a₂ + x3*(x1 - a₃))
-    dy1 = b₁*(-y1 + y2)
-    dy2 = b₂*y1 - y2 - y1*y3 + c_xy*(x2^2)
-    dy3 = y1*y2 - b₃*y3
-
-    return SVector{6}(dx1, dx2, dx3, dy1, dy2, dy3)
-end
diff --git a/src/example_systems/continuous/Thomas3.jl b/src/example_systems/continuous/Thomas3.jl
deleted file mode 100644
index 7a3c33cf1..000000000
--- a/src/example_systems/continuous/Thomas3.jl
+++ /dev/null
@@ -1,41 +0,0 @@
-using DynamicalSystemsBase: ContinuousDynamicalSystem
-using StaticArrays: SVector
-
-export Thomas3
-
-"""
-    Thomas3 <: ContinuousDefinition
-    Thomas3(; xi = [0.11, 0.09, 0.10], b = 0.20)
-
-[Thomas' cyclically symmetric attractor](https://en.wikipedia.org/wiki/Thomas%27_cyclically_symmetric_attractor)
-is a continuous dynamical system with three variables. It has a single free parameter
-`b`, for which interesting behaviour occurs when `b ∈ (0, 1)`. In particular,
-the system is chaotic whenever `b < 0.20`.
-
-## Definition
-
-```math
-\\begin{align*}
-\\dfrac{dx}{dt} &= sin(y) - bx \\\\
-\\dfrac{dy}{dt} &= sin(z) - by \\\\
-\\dfrac{dz}{dt} &= sin(x) - bz
-\\end{align*}
-```
-"""
-Base.@kwdef struct Thomas3{V, B} <: ContinuousDefinition
-    xi::V = [0.11, 0.09, 0.10]
-    b::B = 0.20
-end
-
-function system(definition::Thomas3)
-    return ContinuousDynamicalSystem(eom_thomas3, definition.xi, definition)
-end
-
-function eom_thomas3(u, p::Thomas3, t)
-    b = p.b
-    x, y, z = u
-    dx = sin(y) - b*x
-    dy = sin(z) - b*y
-    dz = sin(x) - b*z
-    return SVector{3}(dx, dy, dz)
-end
diff --git a/src/example_systems/continuous/deprecate.jl b/src/example_systems/continuous/deprecate.jl
deleted file mode 100644
index 50ae1e28a..000000000
--- a/src/example_systems/continuous/deprecate.jl
+++ /dev/null
@@ -1,1159 +0,0 @@
-using LabelledArrays: @LArray
-using StaticArrays: SVector
-using DynamicalSystemsBase: trajectory
-using DynamicalSystemsBase: ContinuousDynamicalSystem
-using SimpleDiffEq: SimpleATsit5
-
-export rossler_rossler
-export rossler_lorenz
-export lorenz_lorenz_bidir
-export chuacircuits_driven
-export lorenz_lorenz_lorenz_bidir_forced
-export lorenz_lorenz_lorenz_transitive
-export lorenzdiffusive
-export rossler_rossler_rossler_bidir_forced
-export mediated_link
-export rossler_rossler_bidir
-export chuacircuit_nscroll_sine
-
-@inline @inbounds function eom_chuacircuit_nscroll_sine(u, p, t)
-    α, β, γ, a, b, c, σx, σy, σz = (p...,)
-    x, y, z = u
-
-    n::Int = c + 1
-    if x >= 2*a*c
-        fx = (b*pi/2*a)*(x - 2*a*c)
-    elseif -2*a*c < x < 2*a*c
-        d = ifelse(isodd(n), pi, 0)
-        fx = -b*sin((pi*x/2*a) + d)
-    elseif x <= -2*a*c
-        fx = (b*pi/2*a)*(x + 2*a*c)
-    end
-
-    ηx = σx == 0 ? 0 : rand(Normal(0, σx))
-    ηy = σy == 0 ? 0 : rand(Normal(0, σy))
-    ηz = σz == 0 ? 0 : rand(Normal(0, σz))
-
-    dx = α*(y - fx) + ηx
-    dy = x - y + z + ηy
-    dz = -β*y - γ*z + ηz
-    return SVector{3}(dx, dy, dz)
-end
-
-function chuacircuit_nscroll_sine(u₀, α, β, γ, a, b, c::Int, σx, σy, σz)
-    @warn "`chuacircuit_nscroll_sine` is deprecated in CausalityTools v2. "*
-    "Use `system(ChuaScrollSine3())` instead, which returns a "*
-    "`ContinuousDynamicalSystem` that can be iterated."
-    p = @LArray [α, β, γ, a, b, c, σx, σy, σz] (:α, :β, :γ, :a, :b, :c, :σx, :σy, :σz)
-    ContinuousDynamicalSystem(eom_chuacircuit_nscroll_sine, u₀, p)
-end
-
-"""
-    chuacircuit_nscroll_sine(;u₀ = [0.0, 0.0, 0.28695],
-        α = 10.814, β = 14, γ = 0, a = 1.3, b = 0.11, c = 2,
-        σx = 0.0, σy = 0.0, σz = 0.0)
-
-Initialise an adjusted Chua system giving rise to n-scroll attractors [1].
-
-## Equations of motion
-
-The dynamics is generated by the following vector field
-
-```math
-\\begin{aligned}
-\\dot{x} = \\alpha (y - fx) + \\eta x \\\\
-\\dot{y} = x - y + z + \\eta y \\\\
-\\dot{z} = -\\beta y - \\gamma z + \\eta z
-\\end{aligned}
-```
-
-where ``\\eta x``, ``\\eta z``, and ``\\eta z`` are drawn independently from
-normal distributions with zero mean and standard deviations `σx`, `σy`
-and `σz` at each iteration.
-
-``fx`` is given by the following conditions:
-
-```julia
-n::Int = c + 1
-
-if x >= 2*a*c
-    fx = (b*pi/2*a)*(x - 2*a*c)
-elseif -2*a*c < x < 2*a*c
-    d = ifelse(isodd(n), pi, 0)
-    fx = -b*sin((pi*x/2*a) + d)
-elseif x <= -2*a*c
-    fx = (b*pi/2*a)*(x + 2*a*c)
-end
-```
-
-## References
-
-1. Tang, Wallace KS, et al. "Generation of n-scroll attractors via
-    sine function." IEEE Transactions on Circuits and Systems I:
-    Fundamental Theory and Applications 48.11 (2001): 1369-1372.
-"""
-chuacircuit_nscroll_sine(;u₀ = [0.0, 0.0, 0.28695],
-        α = 10.814, β = 14, γ = 0, a = 1.3, b = 0.11, c = 2,
-        σx = 0.0, σy = 0.0, σz = 0.0) =
-    chuacircuit_nscroll_sine(u₀, α, β, γ, a, b, c, σx, σy, σz)
-
-"""
-    eom_rossler_rossler(u, p, t) → Function
-
-Equations of motions for a 6D dynamical system consisting
-of two 3D Rössler attractors.
-"""
-function eom_rossler_rossler(u, p, t)
-    a, b, c, ϵ₁, ϵ₂, ω₁, ω₂ = ([p[i] for i = 1:7]...,)
-    x₁, y₁, z₁, x₂, y₂, z₂ = ([u[i] for i = 1:6]...,)
-    # First Rössler system
-    dx₁ = ω₁*(-y₁) - z₁ + ϵ₂*(x₁ - x₂)
-    dy₁ = ω₁*x₁ + a*y₁
-    dz₁ = b + z₁*(x₁ - c)
-
-    # Second Rössler system
-    dx₂ = ω₂*(-y₂) - z₂ + ϵ₁*(x₂ - x₁)
-    dy₂ = ω₂*x₂ + a*y₂
-    dz₂ = b + z₂*(x₂ - c)
-    return SVector{6}(dx₁, dy₁, dz₁, dx₂, dy₂, dz₂)
-end
-
-"""
-	rossler_rossler(u₀, a, b, c, ϵ₁, ϵ₂, ω₁, ω₂) → ContinuousDynamicalSystem
-
-A coupled 6D Rossler system from Krakovská et al. (2018).
-The system consists of two separate subsystems, each being a 3D Rossler
-attractor. The subsystems are bidirectionally coupled, influencing each other
-through variables `x₁` and `x₂`.
-
-The parameters and default values (if calling the keyword version of `rrbi`)
-are as follows:
-
-```
-a = 0.1      # parameter in the Rössler attractor
-b = 0.1      # parameter in the Rössler attractor
-c = 14.0     # parameter in the Rössler attractor
-ϵ₁ = 0.0     # influence of subsystem 1 on subsystem 2
-ϵ₂ = 0.0     # influence of subsystem 2 on subsystem 1
-ω₁ = 1+0.015 # the frequency of the first system
-ω₂ = 1-0.015 # the frequency of the second system
-```
-
-The dynamics is generated by the vector field:
-
-```math
-\\begin{aligned}
-\\dot x₁ = ω₁ (-y₁) - z₁ + ϵ₂(x₁ - x₂)
-\\dot y₁ = ω₁ x₁ + ay₁
-\\dot z₁ = b + z₁(x₁ - c)
-
-\\dot x₂ = ω₂(-y₂) - z₂ + ϵ₁(x₂ - x₁)
-\\dot y₂ = ω₂x₂ + ay₂
-\\dot z₂ = b + z₂(x₂ - c)
-\\end{aligned}
-```
-
-with the coupling constant ``c \\geq 0``.
-
-# References
-Krakovská, A., Jakubík, J., Chvosteková, M., Coufal, D., Jajcay, N., & Paluš, M. (2018). Comparison of six methods for the detection of causality in a bivariate time series. Physical Review E, 97(4), 042207.
-"""
-function rossler_rossler(u₀, a, b, c, ϵ₁, ϵ₂, ω₁, ω₂)
-    @warn "`rossler_rossler` is deprecated in CausalityTools v2. "*
-    "Use `system(RosslerRossler6())` instead, which returns a "*
-    "`ContinuousDynamicalSystem` that can be iterated."
-    p = @LArray [a, b, c, ϵ₁, ϵ₂, ω₁, ω₂] (:a, :b, :c, :ϵ₁, :ϵ₂, :ω₁, :ω₂)
-    return ContinuousDynamicalSystem(eom_rossler_rossler, u₀, p)
-end
-
-"""
-	rossler_rossler(;u₀ = rand(6), a = 0.1, b = 0.1, c = 14.0, ϵ₁ = 0.0,
-		ϵ₂ = 0.0, ω₁ = 1 + 0.015, ω₂ = 1 - 0.015) → ContinuousDynamicalSystem
-
-A coupled 6D Rossler system from Krakovská et al. (2018).
-The system consists of two separate subsystems, each being a 3D Rossler
-attractor. The subsystems are bidirectionally coupled, influencing each other
-through variables `x₁` and `x₂`.
-
-The parameters and default values (if calling the keyword version of `rrbi`)
-are as follows:
-
-```
-a = 0.1      # parameter in the Rössler attractor
-b = 0.1      # parameter in the Rössler attractor
-c = 14.0     # parameter in the Rössler attractor
-ϵ₁ = 0.0     # influence of subsystem 1 on subsystem 2
-ϵ₂ = 0.0     # influence of subsystem 2 on subsystem 1
-ω₁ = 1+0.015 # the frequency of the first system
-ω₂ = 1-0.015 # the frequency of the second system
-```
-
-The dynamics is generated by the vector field:
-
-```math
-\\begin{aligned}
-dx₁ = ω₁*(-y₁) - z₁ + ϵ₂*(x₁ - x₂)
-dy₁ = ω₁*x₁ + a*y₁
-dz₁ = b + z₁*(x₁ - c)
-
-dx₂ = ω₂*(-y₂) - z₂ + ϵ₁*(x₂ - x₁)
-dy₂ = ω₂*x₂ + a*y₂
-dz₂ = b + z₂*(x₂ - c)
-\\end{aligned}
-```
-
-with the coupling constant ``c \\geq 0``.
-
-# References
-Krakovská, A., Jakubík, J., Chvosteková, M., Coufal, D., Jajcay, N., & Paluš, M. (2018). Comparison of six methods for the detection of causality in a bivariate time series. Physical Review E, 97(4), 042207.
-"""
-rossler_rossler(;u₀ = rand(6), a = 0.1, b = 0.1, c = 14.0,
-            ϵ₁ = 0.0, ϵ₂ = 0.0, ω₁ = 1 + 0.015, ω₂ = 1 - 0.015) =
-    rossler_rossler(u₀, a, b, c, ϵ₁, ϵ₂, ω₁, ω₂)
-
-@inline @inbounds function eom_rossler_lorenz(u, p, t)
-    c_xy, a₁, a₂, a₃, b₁, b₂, b₃ = (p...,)
-    x1, x2, x3, y1, y2, y3 = u[1], u[2], u[3], u[4], u[5], u[6]
-
-    dx1 = -a₁*(x2 + x3)
-    dx2 = a₁*(x1 + a₂*x2)
-    dx3 = a₁*(a₂ + x3*(x1 - a₃))
-    dy1 = b₁*(-y1 + y2)
-    dy2 = b₂*y1 - y2 - y1*y3 + c_xy*(x2^2)
-    dy3 = y1*y2 - b₃*y3
-
-    return SVector{6}(dx1, dx2, dx3, dy1, dy2, dy3)
-end
-
-"""
-    rossler_lorenz(;u₀ = rand(6), a₁ = 6, a₂ = 6, a₃ = 2.0,
-        b₁ = 10, b₂ = 28, b₃ = 8/3, c_xy = 1) → ContinuousDynamicalSystem
-
-Initialise a Rössler-Lorenz system consisting of two independent 3D subsystems:
-one Rössler system and one Lorenz system. They are coupled such that the
-second component (`x₂`) of the Rössler system unidirectionally forces the
-second component (`y₂`) of the Lorenz system.
-
-The parameter `c_xy` controls the coupling strength. The implementation here also
-allows for tuning the parameters of each subsystem by introducing the constants
-`a₁`, `a₂`, `a₃`, `b₁`, `b₂`, `b₃`. Default values for these parameters are
-as in [1].
-
-## Equations of motion
-
-The dynamics is generated by the following vector field
-
-```math
-\\begin{aligned}
-\\dot x_1 &= -a_1(x_2 + x_3) \\\\
-\\dot x_2 &= a_2(x_1 + a_2x_2) \\\\
-\\dot x_3 &= a_1(a_2 + x_3(x_1 - a_3)) \\\\
-\\dot y_1 &= b_1(y_2 - y_1) \\\\
-\\dot y_2 &= y_1(b_2 - y_3) - y_2 +c_{xy}(x_2)^2 \\\\
-\\dot y_3 &= y_1 y_2 - b_3y_3
-\\end{aligned}
-```
-
-with the coupling constant ``c_{xy} \\geq 0``.
-
-## References
-
-1. Krakovská, Anna, et al. "Comparison of six methods for the detection of causality in a
-    bivariate time series." Physical Review E 97.4 (2018):042207.
-    [https://journals.aps.org/pre/abstract/10.1103/PhysRevE.97.042207](https://journals.aps.org/pre/abstract/10.1103/PhysRevE.97.042207)
-"""
-function rossler_lorenz(;u₀ = rand(6), a₁ = 6, a₂ = 0.2, a₃ = 5.7,
-    b₁ = 10, b₂ = 28, b₃ = 8/3, c_xy = 1)
-    @warn "`rossler_lorenz` is deprecated in CausalityTools v2. "*
-    "Use `system(RosslerLorenzUnidir6())` instead, which returns a "*
-    "`ContinuousDynamicalSystem` that can be iterated."
-
-    p = @LArray [c_xy, a₁, a₂, a₃, b₁, b₂, b₃] (:c_xy, :a₁, :a₂, :a₃, :b₁, :b₂, :b₃)
-    ContinuousDynamicalSystem(eom_rossler_lorenz, u₀, p)
-end
-
-@inline @inbounds function eom_lorenz_lorenz_bidir(u, p, t)
-    c_xy, c_yx, a₁, a₂, a₃, b₁, b₂, b₃ = (p...,)
-    x1, x2, x3, y1, y2, y3 = (u...,)
-
-    dx1 = -a₁*(x1 - x2) + c_yx*(y1 - x1)
-    dx2 = -x1*x3 + a₂*x1 - x2
-    dx3 = x1*x2 - a₃*x3
-    dy1 = -b₁*(y1 - y2) + c_xy*(x1 - y1)
-    dy2 = -y1*y3 + b₂*y1 - y2
-    dy3 = y1*y2 - b₃*y3
-
-    return SVector{6}(dx1, dx2, dx3, dy1, dy2, dy3)
-end
-
-"""
-    lorenz_lorenz_bidir(; u0 = rand(6),
-        c_xy = 0.2, c_yx = 0.2,
-        a₁ = 10, a₂ = 28, a₃ = 8/3,
-        b₁ = 10, b₂ = 28, b₃ = 9/3) → ContinuousDynamicalSystem
-
-Initialise a bidirectionally coupled Lorenz-Lorenz system, where each
-subsystem is a 3D Lorenz system [1]. Default values for the parameters
-`a₁`, `a₂`, `a₃`, `b₁`, `b₂`, `b₃` are as in [1].
-
-## Equations of motion
-
-The dynamics is generated by the following vector field
-
-```math
-\\begin{aligned}
-\\dot{x_1} &= -a_1 (x_1 - x_2) + c_{yx}(y_1 - x_1) \\\\
-\\dot{x_2} &= -x_1 x_3 + a_2 x_1 - x_2 \\\\
-\\dot{x_3} &= x_1 x_2 - a_3 x_3 \\\\
-\\dot{y_1} &= -b_1 (y_1 - y_2) + c_{xy} (x_1 - y_1) \\\\
-\\dot{y_2} &= -y_1 y_3 + b_2 y_1 - y_2 \\\\
-\\dot{y_3} &= y_1 y_2 - b_3 y_3
-\\end{aligned}
-```
-
-## References
-
-1. Amigó, José M., and Yoshito Hirata. "Detecting directional couplings from
-    multivariate flows by the joint distance distribution." Chaos: An
-    Interdisciplinary Journal of Nonlinear Science 28.7 (2018): 075302.
-"""
-function lorenz_lorenz_bidir(; u0 = rand(6),
-        c_xy = 0.2, c_yx = 0.2,
-        a₁ = 10, a₂ = 28, a₃ = 8/3,
-        b₁ = 10, b₂ = 28, b₃ = 9/3)
-    @warn "`lorenz_lorenz_bidir` is deprecated in CausalityTools v2. "*
-    "Use `system(LorenzBidir6())` instead, which returns a "*
-    "`ContinuousDynamicalSystem` that can be iterated."
-    p = @LArray [c_xy, c_yx, a₁, a₂, a₃, b₁, b₂, b₃] (:c_xy, :c_yx, :a₁, :a₂, :a₃, :b₁, :b₂, :b₃)
-    ContinuousDynamicalSystem(eom_lorenz_lorenz_bidir, u0, p)
-end
-
-function lorenzlorenz_bidir_trajectory(npts; sample_dt = 1, Ttr = 1000, dt = 0.1,
-    c_xy = 0.1, c_yx = 0.1,
-    u0 = rand(6),
-    a₁ = 10, a₂ = 28, a₃ = 8/3,
-    b₁ = 10, b₂ = 28, b₃ = 9/3)
-    @warn "`lorenzlorenz_bidir_trajectory` is deprecated in CausalityTools v2. "
-    s = lorenz_lorenz_bidir(u0 = u0, c_xy = c_xy, c_yx = c_yx, a₁ = a₁, a₂ = a₂, a₃ = a₃, b₁ = b₁, b₂ = b₂, b₃ = b₃)
-
-    # the system is recorded at times t0:dt:T
-    T = npts*dt*sample_dt
-
-    o = trajectory(s, T, dt = dt, Ttr = Ttr*dt, alg = SimpleATsit5())[1:sample_dt:end-1, :]
-end
-
-# For some initial conditions, the system wanders off and doesn't settle to an attractor. Create a function that loops until we get a good realization.
-function good_lorenzlorenz_bidir_trajectory(npts;
-        sample_dt = 1,
-        dt = 0.1,
-        c_xy = 0.1,
-        c_yx = 0.1,
-        Da₁ = Uniform(9.5, 10.5),
-        Da₂ = Uniform(27, 29),
-        Da₃ = Uniform(7.5/3, 8.5/3),
-        Db₁ = Uniform(9.5, 10.5),
-        Db₂ = Uniform(27, 29),
-        Db₃ = Uniform(7.5/3, 8.5/3),
-        a₁ = nothing,
-        a₂ = nothing,
-        a₃ = nothing,
-        b₁ = nothing,
-        b₂ = nothing,
-        b₃ = nothing,
-        u0 = rand(6),
-        Ttr = 10000,
-        n_maxtries = 300)
-
-    n_tries = 0
-    while n_tries <= n_maxtries
-        a₁ == nothing ? a₁ = rand(Da₁) : nothing
-        a₂ == nothing ? a₂ = rand(Da₂) : nothing
-        a₃ == nothing ? a₃ = rand(Da₃) : nothing
-        b₁ == nothing ? b₁ = rand(Db₁) : nothing
-        b₂ == nothing ? b₂ = rand(Db₂) : nothing
-        b₃ == nothing ? b₃ = rand(Db₃) : nothing
-
-        pts = lorenzlorenz_bidir_trajectory(npts,
-            sample_dt = sample_dt, dt = dt,
-            c_xy = c_xy, c_yx = c_yx,
-            Ttr = Ttr)
-
-        M = Matrix(pts)
-
-        if all(isfinite.(M)) && all(M .< 1e10) && count(M .≈ 0) < npts*0.1 && count(abs.(M) .< 1e-10) < npts*0.1 &&
-            (count(abs.(M) .< 1e-12) < npts*0.1)
-            return pts
-        end
-        println("no attractor found. trying with new initial condition and parameters")
-        n_tries += 1
-    end
-end
-
-function eom_chuacircuits_driven(u, p, t)
-    α₁, α₂, β₁, β₂, F₁, F₂, ω₁, ω₂, ϵ₁, ϵ₂, m₀, m₁, σ = (p...,)
-    x₁, y₁, z₁ = (u[1:3]...,)
-    x₂, y₂, z₂ = (u[4:6]...,)
-
-    # Dynamical noise
-    if σ == 0
-        ξ = 0
-    else
-        ξ = rand(Normal(0, σ))
-    end
-
-    hx₁ = m₁*x₁ + 0.5*(m₀ - m₁)*(abs(x₁+1) - abs(x₁-1))
-    hx₂ = m₁*x₂ + 0.5*(m₀ - m₁)*(abs(x₂+1) - abs(x₂-1))
-
-    dx₁ = α₁*(y₁-hx₁) - α₁*ϵ₂*(x₁ - x₂)
-    dy₁ = x₁-y₁+z₁
-    dz₁ = -β₁*y₁ + F₁*sin(ω₁*t) + ξ
-
-    dx₂ = α₂*(y₂-hx₂) - α₂*ϵ₁*(x₁ - x₂)
-    dy₂ = x₂-y₂+z₂
-    dz₂ = -β₂*y₂ + F₂*sin(ω₂*t) + ξ
-    SVector{6}(dx₁, dy₁, dz₁, dx₂, dy₂, dz₂)
-end
-
-function chuacircuits_driven(u₀, α₁, α₂, β₁, β₂, F₁, F₂,
-                                      ω₁, ω₂, ϵ₁, ϵ₂, m₀, m₁, σ)
-    @warn "`chuacircuits_driven` is deprecated in CausalityTools v2. "*
-    "Use `system(ChuaCircuitsBidir6())` instead, which returns a "*
-    "`ContinuousDynamicalSystem` that can be iterated."
-    p = @LArray [α₁, α₂, β₁, β₂, F₁, F₂, ω₁, ω₂, ϵ₁, ϵ₂, m₀, m₁, σ] (:α₁, :α₂, :β₁, :β₂, :F₁, :F₂, :ω₁, :ω₂, :ϵ₁, :ϵ₂, :m₀, :m₁, :σ)
-    ContinuousDynamicalSystem(eom_chuacircuits_driven, u₀, p)
-end
-
-"""
-    chuacircuits_driven(;u₀ = [0.1, 0.1, 0.2, 0.15, 0.15, 0.22],
-        α₁ = 7.0, α₂ = 7.0, β₁ = 14.286, β₂ = 14.286,
-        F₁ = 1.5, F₂ = 1.5, ω₁ = 3.0, ω₂ = 3.0,
-        σ = 0.1, ϵ₁ = 0.1, ϵ₂ = 0.1, m₀ = -1/7, m₁ = 2/7) → ContinuousDynamicalSystem
-
-Initialize a bidirectionally coupled system consisting of two driven Chua
-circuits [1], X₁ and X₂. The subsystems are mutually coupled by a linear
-resistor, where `ϵ₁` controls the influence of X₁ on X₂, and `ϵ₂` controls the
-influence of X₂ on X₁. The parameters for the subsystems are
-set equal to each other, as in the original paper, but can be tuned
-individually for each subsystem.
-
-## References
-
-1. Murali, K., and M. Lakshmanan. "Chaotic dynamics of the driven Chua's
-    circuit." IEEE Transactions on Circuits and Systems I Fundamental
-    Theory and Applications 40.11 (1993): 836-840.
-"""
-chuacircuits_driven(;u₀ = [0.1, 0.1, 0.2, 0.15, 0.15, 0.22],
-                            α₁ = 7.0, α₂ = 7.0,
-                            β₁ = 14.286, β₂ = 14.286,
-                            F₁ = 1.5, F₂ = 1.5,
-                            ω₁ = 3.0, ω₂ = 3.0,
-                            σ = 0.1,
-                            ϵ₁ = 0.1, ϵ₂ = 0.1,
-                            m₀ = -1/7, m₁ = 2/7) =
-    chuacircuits_driven(u₀, α₁, α₂, β₁, β₂, F₁, F₂, ω₁, ω₂, ϵ₁, ϵ₂, m₀, m₁, σ)
-
-
-
-@inline @inbounds function eom_lorenztriple_forced(u, p, t)
-    c_xy, c_yx, c_zx, c_zy, a₁, a₂, a₃, b₁, b₂, b₃, a₃, c₁, c₂, c₃ = (p...,)
-    x₁, x₂, x₃, y₁, y₂, y₃, z₁, z₂, z₃ = (u...,)
-
-    dx₁ = -a₁*(x₁ - x₂) + c_yx*(y₁ - x₁) + c_zx*(z₁ - x₁)
-    dx₂ = -x₁*x₃ + a₂*x₁ - x₂
-    dx₃ = x₁*x₂ - a₃*x₃
-
-    dy₁ = -b₁*(y₁ - y₂) + c_xy*(x₁ - y₁) + c_zy*(z₁ - y₁)
-    dy₂ = -y₁*y₃ + b₂*y₁ - y₂
-    dy₃ = y₁*y₂ - b₃*y₃
-
-    dz₁ = -c₁*(z₁ - z₂)
-    dz₂ = -z₁*z₃ + c₂*z₁ - z₂
-    dz₃ = z₁*z₂ - c₃*z₃
-
-    return SVector{9}(dx₁, dx₂, dx₃, dy₁, dy₁, dy₃, dz₁, dz₂, dz₃)
-end
-
-"""
-    lorenz_lorenz_lorenz_bidir_forced(; u0 = rand(9),
-        c_xy = 0.1, c_yx = 0.1,
-        c_zx = 0.05, c_zy = 0.05,
-        a₁ = 10, a₂ = 28, a₃ = 8/3,
-        b₁ = 10, b₂ = 28, b₃ = 8/3,
-        c₁ = 10, c₂ = 28, c₃ = 8/3)
-
-Initialise a system consisting of two bidirectionally coupled 3D Lorenz
-systems forced by an external 3D Lorenz system, giving a 9D system.
-
-## Equations of motion
-
-The dynamics is generated by the following vector field
-
-```math
-\\begin{aligned}
-\\dot{x_1} &= - a_1 (x_1 - x_2) + c_{yx}(y_1 - x_1) + c_{zx}(z_1 - x_1) \\\\
-\\dot{x_2} &= - x_1 x_3 + a_2 x_1 - x_2 \\\\
-\\dot{x_3} &= x_1 x_2 - a_3 x_3 \\\\
-\\dot{y_1} &= -b_1 (y_1 - y_2) + c_{xy} (x_1 - y_1) + c_{zy}(z_1 - y_1) \\\\
-\\dot{y_2} &= - y_1 y_3 + b_2 y_1 - y_2 \\\\
-\\dot{y_3} &= y_1 y_2 - b_3 y_3 \\\\
-\\dot{z_1} &= - c_1 (z_1 - z_2) \\\\
-\\dot{z_2} &= - z_1 z_3 + c_2 z_1 - z_2 \\\\
-\\dot{z_3} &= z_1 z_2 - c_3 z_3
-\\end{aligned}
-```
-
-## References
-
-1. Amigó, José M., and Yoshito Hirata. "Detecting directional couplings from
-    multivariate flows by the joint distance distribution." Chaos: An
-    Interdisciplinary Journal of Nonlinear Science 28.7 (2018): 075302.
-"""
-function lorenz_lorenz_lorenz_bidir_forced(; u0 = rand(9),
-        c_xy = 0.1, c_yx = 0.1,
-        c_zx = 0.05, c_zy = 0.05,
-        a₁ = 10, a₂ = 28, a₃ = 8/3,
-        b₁ = 10, b₂ = 28, b₃ = 8/3,
-        c₁ = 10, c₂ = 28, c₃ = 8/3)
-    @warn "`lorenz_lorenz_lorenz_bidir_forced` is deprecated in CausalityTools v2. "*
-    "Use `system(LorenzForced9())` instead, which returns a "*
-    "`ContinuousDynamicalSystem` that can be iterated."
-
-    p = @LArray [c_xy, c_yx, c_zx, c_zy, a₁, a₂, a₃, b₁, b₂, b₃, a₃, c₁, c₂, c₃] (:c_xy, :c_yx, :c_zx, :c_zy, :a₁, :a₂, :a₃, :b₁, :b₂, :b₃, :a₃, :c₁, :c₂, :c₃)
-    ContinuousDynamicalSystem(eom_lorenz_lorenz_lorenz_bidir_forced, u0, p)
-end
-
-function lorenz_lorenz_lorenz_bidir_forced_trajectory(npts;
-        n_transient = 2000, dt = 0.1, sample_dt = 1,
-        u0 = rand(9),
-        c_xy = 1.0, c_yx = 1.0, c_zx = 1.0, c_zy = 1.0, # beyond c = 2, systems syncronize
-        a₁ = 10, a₂ = 28, a₃ = 8/3,
-        b₁ = 10, b₂ = 28, b₃ = 8/3,
-        c₁ = 10, c₂ = 28, c₃ = 8/3)
-    @warn "`lorenz_lorenz_lorenz_bidir_forced_trajectory` is deprecated in CausalityTools v2."
-    s = lorenz_lorenz_lorenz_bidir_forced(u0 = u0,
-        c_xy = c_xy, c_yx = c_yx,
-        c_zx = c_zx, c_zy = c_zy,
-        a₁ = a₁, a₂ = a₂, a₃ = a₃,
-        b₁ = b₁, b₂ = b₂, b₃ = b₃,
-        c₁ = c₁, c₂ = c₂, c₃ = c₃)
-
-    # the system is recorded at times t0:dt:T
-    T = npts*dt*sample_dt
-    o = trajectory(s, T, Δt = dt, Ttr = n_transient*dt,
-        alg = SimpleDiffEq.SimpleATsit5())[1:sample_dt:end-1, :] #alg = SimpleDiffEq.SimpleATsit5()
-end
-
-function good_lorenz_lorenz_lorenz_bidir_forced_trajectory(npts;
-        sample_dt = 1,  Ttr = 5000, dt = 0.1,
-        Da₁ = Uniform(9.5, 10.5),
-        Da₂ = Uniform(27.5, 28.5),
-        Da₃ = Uniform(7.5/3, 8.5/3),
-        Db₁ = Uniform(9.5, 10.5),
-        Db₂ = Uniform(27.5, 28.5),
-        Db₃ = Uniform(7.5/3, 8.5/3),
-        Dc₁ = Uniform(9.5, 10.5),
-        Dc₂ = Uniform(27.5, 28.5),
-        Dc₃ = Uniform(7.5/3, 8.5/3),
-
-        a₁ = nothing,
-        a₂ = nothing,
-        a₃ = nothing,
-        b₁ = nothing,
-        b₂ = nothing,
-        b₃ = nothing,
-        c₁ = nothing,
-        c₂ = nothing,
-        c₃ = nothing,
-        c_xy = 0.2,  c_yx = 0.2,
-        c_zx = 0.05, c_zy = 0.05,
-        u0 = [rand(Uniform(0, 10)) for i = 1:9],
-        n_maxtries = 300)
-
-    n_tries = 0
-
-    while n_tries <= n_maxtries
-        a₁ == nothing ? a₁ = rand(Da₁) : a₁ = a₁
-        a₂ == nothing ? a₂ = rand(Da₂) : a₂ = a₂
-        a₃ == nothing ? a₃ = rand(Da₃) : a₃ = a₃
-        b₁ == nothing ? b₁ = rand(Db₁) : b₁ = b₁
-        b₂ == nothing ? b₂ = rand(Db₂) : b₂ = b₂
-        b₃ == nothing ? b₃ = rand(Db₃) : b₃ = b₃
-        c₁ == nothing ? c₁ = rand(Dc₁) : c₁ = c₁
-        c₂ == nothing ? c₂ = rand(Dc₂) : c₂ = c₂
-        c₃ == nothing ? c₃ = rand(Dc₃) : c₃ = c₃
-        pts = lorenz_lorenz_lorenz_bidir_forced(npts,
-            sample_dt = sample_dt, dt = dt, n_transient = Ttr,
-            c_xy = c_xy,  c_yx = c_yx,
-            c_zx = c_zx, c_zy = c_zy,
-            a₁ = a₁, a₂ = a₂, a₃ = a₃,
-            b₁ = b₁, b₂ = b₂, b₃ = b₃,
-            c₁ = c₁, c₂ = c₂, c₃ = c₃)
-
-
-
-        if all(Matrix(pts) .< 1e9) #&& length(unique(pts)) < length(pts)*0.8
-            return pts
-        end
-        println("no attractor found. trying with new initial condition and parameters")
-        n_tries += 1
-    end
-end
-
-@inline @inbounds function eom_lorenz_lorenz_lorenz_transitive(u, p, t)
-    x₁, y₁, z₁, x₂, y₂, z₂, x₃, y₃, z₃ = (u...,)
-    σ₁, σ₂, σ₃, ρ₁, ρ₂, ρ₃, β₁, β₂, β₃, c₁₂, c₂₃ = (p...,)
-
-    # Subsystem 1
-    dx₁ = σ₁*(y₁-x₁)
-    dy₁ = ρ₁*x₁ - y₁ - x₁*z₁
-    dz₁ = x₁*y₁ - β₁*z₁
-
-    # Subsystem 2
-    dx₂ = σ₂*(y₂-x₂) + c₁₂*(x₁ - x₂)
-    dy₂ = ρ₂*x₂ - y₂ - x₂*z₂
-    dz₂ = x₂*y₂ - β₂*z₂
-
-    # Subsystem 3
-    dx₃ = σ₃*(y₃-x₃) + c₂₃*(x₂ - x₃)
-    dy₃ = ρ₃*x₃ - y₃ - x₃*z₃
-    dz₃ = x₃*y₃ - β₃*z₃
-    return SVector{9}(dx₁, dy₁, dz₁, dx₂, dy₂,dz₂, dx₃, dy₃, dz₃)
-end
-
-function lorenz_lorenz_lorenz_transitive(u₀, σ₁, σ₂, σ₃, ρ₁, ρ₂, ρ₃, β₁, β₂, β₃, c₁₂, c₂₃)
-    p = @LArray [σ₁, σ₂, σ₃, ρ₁, ρ₂, ρ₃, β₁, β₂, β₃, c₁₂, c₂₃] (:σ₁, :σ₂, :σ₃, :ρ₁, :ρ₂, :ρ₃, :β₁, :β₂, :β₃, :c₁₂, :c₂₃)
-    ContinuousDynamicalSystem(eom_lorenz_lorenz_lorenz_transitive, u₀, p)
-end
-
-"""
-    lorenz_lorenz_lorenz_transitive(;u₀=rand(9),
-                σ₁ = 10.0, σ₂ = 10.0, σ₃ = 10.0,
-                ρ₁ = 28.0, ρ₂ = 28.0, ρ₃ = 28.0,
-                β₁ = 8/3,  β₂ = 8/3,  β₃ = 8.3,
-                c₁₂ = 1.0, c₂₃ = 1.0) → ContinuousDynamicalSystem
-
-Initalise a dynamical system consisting of three coupled Lorenz attractors with
-a transitive causality chain where X₁ → X₂ and X₂ → X₃. In total, the three
-3D-subsystems create a 9-dimensional dynamical system.
-
-The strength of the forcing X₁ → X₂ is controlled by the parameter `c₁`, and
-the forcing from X₂ → X₃ by `c₂`. The remaining parameters are the usual
-parameters for the Lorenz system, where the subscript `i` refers to the
-subsystem Xᵢ.
-
-## Equations of motion
-
-The dynamics is generated by the following vector field
-
-```math
-\\begin{aligned}
-\\dot{x_1} &= \\sigma_1(y_1 - x_1) \\\\
-\\dot{y_1} &= \\rho_1 x_1 - y_1 - x_1 z_1 \\\\
-\\dot{z_1} &= x_1 y_1 - \\beta_1 z_1 \\\\
-\\dot{x_2} &=  \\sigma_2 (y_2 - x_2) + c_{12}(x_1 - x_2) \\\\
-\\dot{y_2} &= \\rho_2 x_2 - y_2 - x_2 z_2 \\\\
-\\dot{z_2} &= x_2 y_2 - \\beta_2 z_2 \\\\
-\\dot{x_3} &= \\sigma_3 (y_3 - x_3) + c_{23} (x_2 - x_3) \\\\
-\\dot{y_3} &= \\rho_3 x_3 - y_3 - x_3 z_3 \\\\
-\\dot{z_3} &= x_3 y_3 - \\beta_3 z_3
-\\end{aligned}
-```
-
-## Usage in literature
-
-This system was studied by Papana et al. (2013) for coupling strengths
-``c_{12} = 0, 1, 3, 5`` and ``c_{23} = 0, 1, 3, 5``.
-
-## References
-
-1. Papana et al., Simulation Study of Direct Causality Measures in Multivariate
-    Time Series. Entropy 2013, 15(7), 2635-2661; doi:10.3390/e15072635
-"""
-lorenz_lorenz_lorenz_transitive(;u₀=rand(9),
-            σ₁ = 10.0, σ₂ = 10.0, σ₃ = 10.0,
-            ρ₁ = 28.0, ρ₂ = 28.0, ρ₃ = 28.0,
-            β₁ = 8/3,  β₂ = 8/3,  β₃ = 8.3,
-            c₁₂ = 1.0, c₂₃ = 1.0) =
-    lorenz_lorenz_lorenz_transitive(u₀, σ₁, σ₂, σ₃, ρ₁, ρ₂, ρ₃, β₁, β₂, β₃, c₁₂, c₂₃)
-
-
-@inline @inbounds function eom_rossler_rossler_rossler_bidir_forced(u, p, t)
-    ω₁, ω₂, ω₃, c_xy, c_yx, c_zx, c_zy, a₁, a₂, a₃, b₁, b₂, b₃, a₃, c₁, c₂, c₃ = (p...,)
-    x1, x2, x3, y1, y2, y3, z1, z2, z3 = (u...,)
-
-    dx1 = -ω₁*(x2 + x3) + c_yx*(y1 - x1) + c_zx*(z1 - x1)
-    dx2 = ω₁*x1 + a₁*x2
-    dx3 = a₂ + x3*(x1 - a₃)
-
-    dy1 = -ω₂*(y2 + y3) + c_xy*(x1 - y1) + c_zy*(z1 - y1)
-    dy2 = ω₂*y1 + b₁*y2
-    dy3 = b₂ + y3*(y1 - b₃)
-
-    dz1 = -ω₂*(z2 + z3)
-    dz2 = ω₂*z1 + c₁*z2
-    dz3 = c₂ + z3*(z1 - c₃)
-
-    return SVector{9}(dx1, dx2, dx3, dy1, dy2, dy3, dz1, dz2, dz3)
-end
-
-"""
-    rossler_rossler_rossler_bidir_forced(; u0 = rand(9),
-        ω₁ = 1.015, ω₂ = 0.985, ω₃ = 0.95,
-        c_xy = 0.1, c_yx = 0.1,
-        c_zx = 0.05, c_zy = 0.05,
-        a₁ = 0.15, a₂ = 0.2, a₃ = 10,
-        b₁ = 0.15, b₂ = 0.2, b₃ = 10,
-        c₁ = 0.15, c₂ = 0.2, c₃ = 10)
-
-Equations of motion for a system consisting of three coupled 3D Rössler systems
-(``X``, ``Y``, ``Z``), giving a 9D system [1]. The external system
-``Z`` influences both ``X`` and ``Y`` (controlled by `c_zx` and `c_zy`).
-Simultaneously, the subsystems  ``X`` and ``Y`` bidirectionally
-influences each other (controlled by `c_xy` and `c_yx`).
-
-The ``X`` and ``Y`` subsystems are mostly synchronized for `c_xy > 0.1` or
-`c_yx > 0.1`.
-
-## Equations of motion
-
-The dynamics is generated by the following vector field
-
-```math
-\\begin{aligned}
-\\dot{x_1} &= -\\omega_1 (x_2 + x_3) + c_{yx}(y_1 - x_1) + c_{zx}(z_1 - x_1) \\\\
-\\dot{x_2} &= \\omega_1 x_1 + a_1 x_2 \\\\
-\\dot{x_3} &= a_2 + x_3 (x_1 - a_3) \\\\
-\\dot{y_1} &= -\\omega_1 (y_2 + y_3) + c_{xy}(x_1 - y_1) + c_{zy}(z_1 - y_1) \\\\
-\\dot{x_2} &= \\omega_2 y_1 + b_1 y_2 \\\\
-\\dot{x_3} &= b_2 + x_3 (y_1 - b_3) \\\\
-\\dot{y_1} &= -\\omega_2 (z_2  + z_3) \\\\
-\\dot{x_2} &= \\omega_2 z_1 + c_1 z_2 \\\\
-\\dot{x_3} &= c_2 + z_3 (z_1 - c_3)
-\\end{aligned}
-```
-
-## References
-
-1. Amigó, José M., and Yoshito Hirata. "Detecting directional couplings from
-    multivariate flows by the joint distance distribution." Chaos: An
-    Interdisciplinary Journal of Nonlinear Science 28.7 (2018): 075302.
-"""
-function rossler_rossler_rossler_bidir_forced(; u0 = rand(9),
-        ω₁ = 1.015, ω₂ = 0.985, ω₃ = 0.95,
-        c_xy = 0.1, c_yx = 0.1,
-        c_zx = 0.05, c_zy = 0.05,
-        a₁ = 0.15, a₂ = 0.2, a₃ = 10,
-        b₁ = 0.15, b₂ = 0.2, b₃ = 10,
-        c₁ = 0.15, c₂ = 0.2, c₃ = 10)
-    @warn "`rossler_rossler_rossler_bidir_forced` is deprecated in CausalityTools v2. "*
-    "Use `system(RosslerForced9())` instead, which returns a "*
-    "`ContinuousDynamicalSystem` that can be iterated."
-
-    p = @LArray [ω₁, ω₂, ω₃, c_xy, c_yx, c_zx, c_zy, a₁, a₂, a₃, b₁, b₂, b₃, a₃, c₁, c₂, c₃] (:ω₁, :ω₂, :ω₃, :c_xy, :c_yx, :c_zx, :c_zy, :a₁, :a₂, :a₃, :b₁, :b₂, :b₃, :a₃, :c₁, :c₂, :c₃)
-    ContinuousDynamicalSystem(eom_rossler_rossler_rossler_bidir_forced, u0, p)
-end
-
-function forced_rossler_rossler_bidir_trajectory(npts;
-        n_transient = 2000, dt = 0.6, sample_dt = 1,
-        u0 = rand(9), ω₁ = 1.015, ω₂ = 0.985, ω₃ = 0.95,
-        c_xy = 0.2, c_yx = 0.2, c_zx = 0.05, c_zy = 0.05,
-        a₁ = 0.15, a₂ = 0.2, a₃ = 10,
-        b₁ = 0.15, b₂ = 0.2, b₃ = 10,
-        c₁ = 0.15, c₂ = 0.2, c₃ = 10)
-    @warn "`forced_rossler_rossler_bidir_trajectory` is deprecated in CausalityTools v2."
-
-    s = rossler_rossler_rossler_bidir_forced(u0 = u0,
-        c_xy = c_xy, c_yx = c_yx,
-        c_zx = c_zx, c_zy = c_zy,
-        ω₁ = ω₁, ω₂ = ω₂, ω₃ = ω₃,
-        a₁ = a₁, a₂ = a₂, a₃ = a₃,
-        b₁ = b₁, b₂ = b₂, b₃ = b₃,
-        c₁ = c₁, c₂ = c₂, c₃ = c₃)
-
-    # the system is recorded at times t0:dt:T
-    T = npts*dt*sample_dt
-    o = trajectory(s, T, dt = dt, Ttr = n_transient*dt, alg = SimpleDiffEq.SimpleATsit5())[1:sample_dt:end-1, :] #alg = SimpleDiffEq.SimpleATsit5()
-end
-
-function good_rossler_rossler_rossler_bidir_forced_trajectory(npts;
-        sample_dt = 1,  Ttr = 5000, dt = 0.3, # dt = 0.6 about 10 samples per period
-        Da₁ = Uniform(0.12, 0.17),
-        Da₂ = Uniform(0.18, 0.22),
-        Da₃ = Uniform(9.0, 11.0),
-        Db₁ = Uniform(0.10, 0.20),
-        Db₂ = Uniform(0.18, 0.22),
-        Db₃ = Uniform(9.0, 11.0),
-        Dc₁ = Uniform(0.10, 0.20),
-        Dc₂ = Uniform(0.18, 0.22),
-        Dc₃ = Uniform(9.0, 11.0),
-        Dω₁ = Uniform(0.95, 0.999), #Uniform(0.97, 1.03)
-        Dω₂ = Uniform(1.001, 1.05), #Uniform(0.97, 1.03)
-        Dω₃ = Uniform(0.9, 0.95), #Uniform(0.97, 1.03)
-
-        a₁ = nothing,
-        a₂ = nothing,
-        a₃ = nothing,
-        b₁ = nothing,
-        b₂ = nothing,
-        b₃ = nothing,
-        c₁ = nothing,
-        c₂ = nothing,
-        c₃ = nothing,
-        ω₁ = nothing,
-        ω₂ = nothing,
-        ω₃ = nothing,
-        c_xy = 0.2,  c_yx = 0.2,
-        c_zx = 0.05, c_zy = 0.05,
-        u0 = rand(9),
-        n_maxtries = 300)
-
-    n_tries = 0
-
-    while n_tries <= n_maxtries
-        ω₁ == nothing ? ω₁ = rand(Dω₁) : ω₁ = ω₁
-        ω₂ == nothing ? ω₂ = rand(Dω₂) : ω₂ = ω₂
-        ω₃ == nothing ? ω₃ = rand(Dω₃) : ω₃ = ω₃
-        a₁ == nothing ? a₁ = rand(Da₁) : a₁ = a₁
-        a₂ == nothing ? a₂ = rand(Da₂) : a₂ = a₂
-        a₃ == nothing ? a₃ = rand(Da₃) : a₃ = a₃
-        b₁ == nothing ? b₁ = rand(Db₁) : b₁ = b₁
-        b₂ == nothing ? b₂ = rand(Db₂) : b₂ = b₂
-        b₃ == nothing ? b₃ = rand(Db₃) : b₃ = b₃
-        c₁ == nothing ? c₁ = rand(Dc₁) : c₁ = c₁
-        c₂ == nothing ? c₂ = rand(Dc₂) : c₂ = c₂
-        c₃ == nothing ? c₃ = rand(Dc₃) : c₃ = c₃
-        pts = rossler_rossler_rossler_bidir_forced(npts,
-            sample_dt = sample_dt, dt = dt, n_transient = Ttr,
-            c_xy = c_xy,  c_yx = c_yx,
-            c_zx = c_zx, c_zy = c_zy,
-            ω₁ = ω₁, ω₂ = ω₂, ω₃ = ω₃,
-            a₁ = a₁, a₂ = a₂, a₃ = a₃,
-            b₁ = b₁, b₂ = b₂, b₃ = b₃,
-            c₁ = c₁, c₂ = c₂, c₃ = c₃)
-
-        if all(Matrix(pts) .< 1e10) && length(unique(pts)) > npts/2
-            return pts
-        end
-        println("no attractor found. trying with new initial condition and parameters")
-        n_tries += 1
-    end
-end
-
-@inline @inbounds function eom_mediated_link(u, p, t)
-    (; xi, ωx, ωy, ωz, k, l, m, c) = p
-    x₁, x₂, x₃, y₁, y₂, y₃, z₁, z₂, z₃ = u
-
-    dx₁ = -ωx*x₂ - x₃ + c*(z₁ - x₁)
-	dx₂ = ωx*x₁ + k*x₂
-	dx₃ = l + x₃*(x₁ - m)
-
-	dy₁ = -ωy*y₂ - y₃ + c*(z₁ - y₁)
-	dy₂ = ωy*y₁ + k*y₂
-	dy₃ = l + y₃*(y₁ - m)
-
-	dz₁ = -ωz*z₂ - z₃
-	dz₂ = ωz*z₁ + k*z₂
-	dz₃ = l + z₃*(z₁ - m)
-
-    SVector{9}(dx₁, dx₂, dx₃, dy₁, dy₂, dy₃, dz₁, dz₂, dz₃)
-end
-
-function mediated_link(u₀, ωx, ωy, ωz, k, l, m, c)
-    @warn "`mediated_link` is deprecated in CausalityTools v2. "*
-    "Use `system(MediatedLink9())` instead, which returns a "*
-    "`ContinuousDynamicalSystem` that can be iterated."
-    p = @LArray [ωx, ωy, ωz, k, l, m, c] (:ωx, :ωy, :ωz, :k, :l, :m, :c)
-    ContinuousDynamicalSystem(eom_mediated_link, u₀, p)
-end
-
-"""
-    mediated_link(;u₀ = rand(9), ωx = 1, ωy = 1.015, ωz = 0.985,
-        k = 0.15, l = 0.2, m = 10.0,
-        c = 0.06) → ContinuousDynamicalSystem
-
-Initialise a three-subsystem dynamical system where `X` and `Y` are
-driven by `Z`. At the default value of the coupling constant `c = 0.06`, the
-responses `X` and `Y` are already synchronized to the driver `Z`.
-
-## Equations of motion
-
-The dynamics is generated by the following vector field
-
-```math
-\\begin{aligned}
-dx_1 &= -\\omega_x x_2 - x_3 + c*(z_1 - x_1) \\\\
-dx_2 &= \\omega_x x_1 + k*x_2  \\\\
-dx_3 &= l + x_3(x_1 - m)  \\\\
-dy_1 &= -\\omega_y y_2 - y_3 + c*(z_1 - y_1)  \\\\
-dy_2 &= \\omega_y y_1 + k*y_2  \\\\
-dy_3 &= l + y_3(y_1 - m)  \\\\
-dz_1 &= -\\omega_z z_2 - z_3  \\\\
-dz_2 &= \\omega_z z_1 + k*z_2  \\\\
-dz_3 &= l + z_3(z_1 - m)
-\\end{aligned}
-```
-
-##  References
-
-1. Krakovská, Anna, et al. "Comparison of six methods for the detection of
-    causality in a bivariate time series." Physical Review E 97.4 (2018): 042207
-"""
-mediated_link(;u₀ = rand(9), ωx = 1, ωy = 1.015, ωz = 0.985,
-            k = 0.15, l = 0.2, m = 10.0, c = 0.06) =
-    mediated_link(u₀, ωx, ωy, ωz, k, l, m, c)
-
-
-@inline @inbounds function eom_repressilator(u, p, t)
-    (; xi, α, α₀, n, β) = p
-    # pᵢ := concentration of protein repressor i
-    # mᵢ := concentration of mRNA associated with pᵢ
-    m₁, m₂, m₃, p₁, p₂, p₃ = u
-
-    ṁ₁ = -m₁ + α/(1 + p₃^n) + α₀
-    ṁ₂ = -m₂ + α/(1 + p₁^n) + α₀
-    ṁ₃ = -m₃ + α/(1 + p₂^n) + α₀
-    ṗ₁ = -β*(p₁ - m₁)
-    ṗ₂ = -β*(p₂ - m₂)
-    ṗ₃ = -β*(p₃ - m₃)
-
-    return SVector{6}(ṁ₁, ṁ₂, ṁ₃, ṗ₁, ṗ₂, ṗ₃)
-end
-
-"""
-    repressilator(;u₀ = rand(6), α = 10.0, α₀ = 0.0, β = 100.0,
-        n = 2) → ContinuousDynamicalSystem
-
-A six-dimensional repressilator (or repression-driven oscillator) [Elowitz2000](@citet).
-
-The equations are scaled to be non-dimensional. Used in [Sun2014](@cite) to study the
-performance of the causation entropy algorithm.
-"""
-function repressilator(;u₀ = rand(6), α = 10.0, α₀ = 0.0, β = 100.0, n = 2)
-    @warn "`repressilator` is deprecated in CausalityTools v2. "*
-    "Use `system(Repressilator6())` instead, which returns a "*
-    "`ContinuousDynamicalSystem` that can be iterated."
-    p = @LArray [α, α₀, β, n] (:α, :α₀, :β, :n)
-    ContinuousDynamicalSystem(eom_repressilator, u₀, p)
-end
-
-
-function eom_hindmarsh_rose(u, p, t)
-    a, b, c, d, r, s, xᵣ, I = (p...,)
-    x, y, z = u
-
-	ϕ = -a*x^3 + b*x^2
-	ψ = c - d*x^2
-    dx = y + ϕ - z + I
-	dy = ψ - y
-	dz = r*(s*(x - xᵣ) - z)
-    return SVector{3}(dx, dy, dz)
-end
-
-"""
-	hindmarsh_rose(u₀, p)
-
-Initialise a Hindmarsh-Rose system, which is a model of neuronal
-spiking.
-
-```math
-\\begin{aligned}
-\\dfrac{dx}{dt} &= y + \\phi(x) - z + I
-\\dfrac{dy}{dt} &= \\psi(x) - y
-\\dfrac{dz}{dt} &= r[s(x - x_R) - z],
-\\end{aligned}
-```
-where
-
-```math
-\\begin{aligned}
-\\phi(x) &= -ax^3+bx^2
-\\psi(x) &= c - dx^2
-\\end{aligned}
-```
-
-If parameters other than the defaults are to be used, they must be
-provided as a vector `[a, b, c, d, r, s, xᵣ, I]`.
-"""
-function hindmarsh_rose(u₀, p)
-    @warn "`hindmarsh_rose` is deprecated in CausalityTools v2. "*
-    "Use `system(HindmarshRose3())` instead, which returns a "*
-    "`ContinuousDynamicalSystem` that can be iterated."
-    ContinuousDynamicalSystem(eom_hindmarsh_rose, u₀, p)
-end
-hindmarsh_rose(;u₀ = rand(3), a = 1, b = 3, c = 1, d = 5, r = 1e-3, s = 4, xᵣ = - 8/5, I = -8) =
-    hindmarsh_rose(u₀, [a, b, c, d, r, s, xᵣ, I])
-
-@inline @inbounds function eom_lorenzdiffusive(u, p, t)
-
-    C₁₂, C₂₁, R, ϵ₁, ϵ₂ = p[1], p[2], p[3], p[4], p[5]
-    x₁, x₂, x₃ = u[1], u[2], u[3]
-    y₁, y₂, y₃ = u[4], u[5], u[6]
-
-    dx1 = 10*(x₂ - x₁) + C₂₁*(y₁-x₁)
-    dx2 = x₁*((R+ϵ₁) - x₃) - x₂
-    dx3 = x₁*x₂ - 8/3*x₃
-
-    dy1 = 10*(y₂ - y₁) + C₁₂*(x₁-y₁)
-    dy2 = y₁*((R+ϵ₂) - y₃) - y₂
-    dy3 = y₁*y₂ - 8/3*y₃
-    return SVector{6}(dx1, dx2, dx3, dy1, dy2, dy3)
-end
-
-"""
-    lorenzdiffusive(; ui = rand(6), C₁₂::Real = 5, C₂₁::Real = 0,
-        R::Real = 28.0, ϵ₁::Real = -0.02, ϵ₂::Real = 0.03)
-
-A dynamical system consisting of two diffusively coupled 3D Lorenz systems
-[Martini2011](@cite).
-
-The coupling magnitude from subsystem 1 to subsystem 2 is controlled by `C₁₂`, and the
-coupling from subsystem 2 to subsystem 1 is controlled by `C₂₁`. The parameters `ϵ₁` and `ϵ₂`
-add small deviations to the control parameter `R`.
-
-## Equations of motion
-
-```math
-\\begin{aligned}
-\\dot{x_1} &= 10(x_2 - x_1) + C_{21}*(y_1-x_1) \\\\
-\\dot{x_2} &= x_1((R+ϵ₁) - x_3) - x_2 \\\\
-\\dot{x_3} &= x_1x_2 - 8/3x_3 \\\\
-\\dot{y_1} &= 10(y_2 - y_1) + C_{12}(x_1-y_1) \\\\
-\\dot{y_2} &= y_1((R+\\epsilon_2) - y_3) - y_2 \\\\
-\\dot{y_3} &= y_1y_2 - 8/3y_3
-\\end{aligned}
-```
-"""
-function lorenzdiffusive(; ui = rand(6), C₁₂::Real = 5, C₂₁::Real = 0,
-        R::Real = 28.0, ϵ₁::Real = -0.02, ϵ₂::Real = 0.03)
-    @warn "`lorenzdiffusive` is deprecated in CausalityTools v2. "*
-    "Use `system(LorenzBidir6())` instead, which returns a "*
-    "`ContinuousDynamicalSystem` that can be iterated."
-    p = [C₁₂, C₂₁, R, ϵ₁, ϵ₂]
-    ContinuousDynamicalSystem(eom_lorenzdiffusive, ui, p)
-end
-
-@inline @inbounds function eom_rossler_rossler_bidir(u, p, t)
-    ω₁, ω₂, c_xy, c_yx, a₁, a₂, a₃, b₁, b₂, b₃ = (p...,)
-    x1, x2, x3, y1, y2, y3 = (u...,)
-
-    dx1 = -ω₁*(x2 + x3) + c_yx*(y1 - x1)
-    dx2 = ω₁*x1 + a₁*x2
-    dx3 = a₂ + x3*(x1 - a₃)
-
-    dy1 = -ω₂*(y2 + y3) + c_xy*(x1 - y1)
-    dy2 = ω₂*y1 + b₁*y2
-    dy3 = b₂ + y3*(y1 - b₃)
-
-    return SVector{6}(dx1, dx2, dx3, dy1, dy2, dy3)
-end
-
-"""
-    rossler_rossler_bidir(; u0 = rand(6),
-        ω₁ = 1.015, ω₂ = 0.985,
-        c_xy = 0.1, c_yx = 0.1,
-        a₁ = 0.15, a₂ = 0.2, a₃ = 10,
-        b₁ = 0.15, b₂ = 0.2, b₃ = 10)
-
-Initialise a system of two bidirectionally coupled 3D Rössler systems.
-This system has been modified from [1] to allow other parameterisations,
-but default parameters are as in [1].
-
-The ``X`` and ``Y`` subsystems are mostly synchronized for
-`c_xy > 0.1` or `c_yx > 0.1`.
-
-## Equations of motion
-
-The dynamics is generated by the following vector field
-
-```math
-\\begin{aligned}
-\\dot{x_1} &= -\\omega_1(x_2 + x_3) + c_{yx}(y_1 - x_1) \\\\
-\\dot{x_2} &= \\omega_1 x_1 + a_1 x_2 \\\\
-\\dot{x_3} &= a_2 + x_3 (x_1 - a_3) \\\\
-\\dot{y_1} &= -\\omega_2 (y_2 + y_3) + c_{xy}(x_1 - y_1) \\\\
-\\dot{y_2} &= \\omega_2 y_1 + b_1 y_2 \\\\
-\\dot{y_3} &= b_2 + y_3 (y_1 - b_3)
-\\end{aligned}
-```
-
-## References
-
-1. Amigó, José M., and Yoshito Hirata. "Detecting directional couplings from
-    multivariate flows by the joint distance distribution." Chaos: An
-    Interdisciplinary Journal of Nonlinear Science 28.7 (2018): 075302.
-"""
-function rossler_rossler_bidir(; u0 = rand(6),
-        ω₁ = 1.015, ω₂ = 0.985,
-        c_xy = 0.1, c_yx = 0.1,
-        a₁ = 0.15, a₂ = 0.2, a₃ = 10,
-        b₁ = 0.15, b₂ = 0.2, b₃ = 10)
-    @warn "`rossler_rossler_bidir` is deprecated in CausalityTools v2. "*
-    "Use `system(RosslerBidir6())` instead, which returns a "*
-    "`ContinuousDynamicalSystem` that can be iterated."
-    p = @LArray [ω₁, ω₂, c_xy, c_yx, a₁, a₂, a₃, b₁, b₂, b₃] (:ω₁, :ω₂, :c_xy, :c_yx, :a₁, :a₂, :a₃, :b₁, :b₂, :b₃)
-    ContinuousDynamicalSystem(eom_rossler_rossler_bidir, u0, p)
-end
-
-
-function rossler_rossler_bidir_trajectory(npts, sample_dt; n_transient = 2000, dt = 0.2,
-        u0 = rand(6), ω₁ = 1.015, ω₂ = 0.985,
-        c_xy = 0.2, c_yx = 0.2,
-        a₁ = 0.15, a₂ = 0.2, a₃ = 10,
-        b₁ = 0.15, b₂ = 0.2, b₃ = 10)
-
-    s = rossler_rossler_bidir(u0 = u0,
-        c_xy = c_xy, c_yx = c_yx,
-        ω₁ = ω₁, ω₂ = ω₂,
-        a₁ = a₁, a₂ = a₂, a₃ = a₃,
-        b₁ = b₁, b₂ = b₂, b₃ = b₃)
-    @warn "`rossler_rossler_bidir_trajectory` is deprecated in CausalityTools v2."
-
-    # the system is recorded at times t0:dt:T
-    T = npts*dt*sample_dt
-    o = trajectory(s, T, dt = dt, Ttr = n_transient*dt, alg = SimpleATsit5())[1:sample_dt:end-1, :] #alg = SimpleDiffEq.SimpleATsit5()
-end
-
-function good_rossler_rossler_bidir_trajectory(npts; sample_dt = 1,
-        Da₁ = Uniform(0.12, 0.17),
-        Da₂ = Uniform(0.18, 0.22),
-        Da₃ = Uniform(9.0, 11.0),
-        Db₁ = Uniform(0.10, 0.20),
-        Db₂ = Uniform(0.18, 0.22),
-        Db₃ = Uniform(9.0, 11.0),
-        Dω₁ = Uniform(0.95, 0.999), #Uniform(0.97, 1.03)
-        Dω₂ = Uniform(1.001, 1.05), #Uniform(0.97, 1.03)
-        a₁ = nothing,
-        a₂ = nothing,
-        a₃ = nothing,
-        b₁ = nothing,
-        b₂ = nothing,
-        b₃ = nothing,
-        ω₁ = nothing,
-        ω₂ = nothing,
-        c_xy = 0.2,  c_yx = 0.2,
-        Ttr = 5000, dt = 0.2,
-        u0 = rand(6),
-        n_maxtries = 300)
-
-    n_tries = 0
-
-    while n_tries <= n_maxtries
-        ω₁ == nothing ? ω₁ = rand(Dω₁) : ω₁ = ω₁
-        ω₂ == nothing ? ω₂ = rand(Dω₂) : ω₂ = ω₂
-        a₁ == nothing ? a₁ = rand(Da₁) : a₁ = a₁
-        a₂ == nothing ? a₂ = rand(Da₂) : a₂ = a₂
-        a₃ == nothing ? a₃ = rand(Da₃) : a₃ = a₃
-        b₁ == nothing ? b₁ = rand(Db₁) : b₁ = b₁
-        b₂ == nothing ? b₂ = rand(Db₂) : b₂ = b₂
-        b₃ == nothing ? b₃ = rand(Db₃) : b₃ = b₃
-
-        pts = rossler_rossler_bidir_trajectory(npts, sample_dt, dt = dt, n_transient = Ttr,
-            c_xy = c_xy,  c_yx = c_yx, ω₁ = ω₁, ω₂ = ω₂, a₁ = a₁, a₂ = a₂,a₃ = a₃,b₁ = b₁,  b₂ = b₂, b₃ = b₃)
-
-        if all(Matrix(pts) .< 1e10) && length(unique(pts)) > npts/2
-            return pts
-        end
-        println("no attractor found. trying with new initial condition and parameters")
-        n_tries += 1
-    end
-end
diff --git a/src/example_systems/discrete/AR1Bidir.jl b/src/example_systems/discrete/AR1Bidir.jl
deleted file mode 100644
index f9d53214c..000000000
--- a/src/example_systems/discrete/AR1Bidir.jl
+++ /dev/null
@@ -1,48 +0,0 @@
-using StaticArrays: SVector
-using DynamicalSystemsBase: DiscreteDynamicalSystem
-using Distributions: Normal
-
-export AR1Bidir
-
-"""
-    AR1Bidir <: DiscreteDefinition
-    AR1Bidir(;xi = [0.2, 0.3], a₁ = 0.5, b₁ = 0.7, c_xy = 0.1, c_yx = 0.2,
-        nx = Normal(0, 0.3), ny = Normal(0, 0.3),
-        rng::R = Random.default_rng())
-
-A system consisting of two mutually coupled first order autoregressive processes.
-
-## Equations of motion
-
-```math
-\\begin{aligned}
-x(t+1) &= a_{1}x + c_{yx}y + \\epsilon_{x} \\\\
-y(t+1) &= b_{1}y + c_{xy}x + \\epsilon_{y}
-\\end{aligned}
-```
-
-where at each time step, ``\\epsilon_{x}`` and ``\\epsilon_{y}`` are drawn
-from independent normal distributions `nx` and `ny`, respectively.
-"""
-Base.@kwdef struct AR1Bidir{V, A, B, C1, C2, NX, NY, R} <: DiscreteDefinition
-    xi::V = [0.2, 0.3]
-    a₁::A = 0.5
-    b₁::B = 0.7
-    c_xy::C1 = 0.1
-    c_yx::C2 = 0.2
-    nx::NX = Normal(0, 0.3)
-    ny::NY = Normal(0, 0.3)
-    rng::R = Random.default_rng()
-end
-
-function system(definition::AR1Bidir)
-    return DiscreteDynamicalSystem(eom_ar1_bidir, definition.xi, definition)
-end
-
-function eom_ar1_bidir(u, p::AR1Bidir, t)
-    (; xi, a₁, b₁, c_xy, c_yx, nx, ny, rng) = p
-    x, y = u
-    dx = a₁*x + c_yx*y + rand(rng, nx)
-    dy = b₁*y + c_xy*x + rand(rng, ny)
-    return SVector{2}(dx, dy)
-end
diff --git a/src/example_systems/discrete/AR1Unidir.jl b/src/example_systems/discrete/AR1Unidir.jl
deleted file mode 100644
index be27b52b0..000000000
--- a/src/example_systems/discrete/AR1Unidir.jl
+++ /dev/null
@@ -1,55 +0,0 @@
-using LabelledArrays: @LArray
-using StaticArrays: SVector
-using DynamicalSystemsBase: DiscreteDynamicalSystem
-using Distributions: Normal
-using Random
-
-export AR1Unidir
-
-"""
-    AR1Unidir <: DiscreteDefinition
-    AR1Unidir(; ui = [0.2, 0.3], a₁ = 0.90693, b₁ = 0.40693, c_xy = 0.5,
-        nx = Normal(0, 0.40662), ny = Normal(0, 0.40662),
-        rng::R = Random.default_rng())
-
-A bivariate, order one autoregressive model, where ``x \\to y`` (Paluš et al,
-2018)[^Paluš2018].
-
-## Equations of motion
-
-```math
-\\begin{aligned}
-x(t+1) &= a_1 x(t) + \\xi_{1} \\\\
-y(t+1) &= b_1 y(t) - c_{xy} x + \\xi_{2},
-\\end{aligned}
-```
-
-where ``\\xi_{1}`` and ``\\xi_{2}`` are drawn from normal distributions `nx` and `ny`
-at each iteration.
-
-[^Paluš2018]:
-    Paluš, M., Krakovská, A., Jakubík, J., & Chvosteková, M. (2018). Causality,
-    dynamical systems and the arrow of time. Chaos: An Interdisciplinary Journal of
-    Nonlinear Science, 28(7), 075307. http://doi.org/10.1063/1.5019944
-"""
-Base.@kwdef struct AR1Unidir{V, A, B, C, NX, NY, R} <: DiscreteDefinition
-    xi::V = [0.2, 0.3]
-    a₁::A = 0.90693
-    b₁::B = 0.40693
-    c_xy::C = 0.5
-    nx::NX = Normal(0, 0.40662)
-    ny::NY = Normal(0, 0.40662)
-    rng::R = Random.default_rng()
-end
-
-function system(definition::AR1Unidir)
-    return DiscreteDynamicalSystem(eom_ar1_unidir, definition.xi, definition)
-end
-
-function eom_ar1_unidir(u, p::AR1Unidir, t)
-    (; xi, a₁, b₁, c_xy, nx, ny, rng) = p
-    x, y = u
-    dx = a₁*x + rand(rng, nx)
-    dy = b₁*y + c_xy*x + rand(rng, ny)
-    return SVector{2}(dx, dy)
-end
diff --git a/src/example_systems/discrete/Anishchenko.jl b/src/example_systems/discrete/Anishchenko.jl
deleted file mode 100644
index 50b2e6f09..000000000
--- a/src/example_systems/discrete/Anishchenko.jl
+++ /dev/null
@@ -1,41 +0,0 @@
-using LabelledArrays: @LArray
-using StaticArrays: SVector
-using DynamicalSystemsBase: DiscreteDynamicalSystem
-
-export Anishchenko
-
-"""
-    Anishchenko <: DiscreteDefinition
-    Anishchenko(;u₀ = rand(2), α =3.277, s=0.1, ω=0.5*(sqrt(5)-1)) → DiscreteDynamicalSystem
-
-Initialise the system defined by eq. 13 in [Anishchenko1998](@cite),
-which can give strange, nonchaotic attractors.
-
-## Equations of motion
-
-```math
-\\begin{aligned}
-dx &= \\alpha (1-s \\cos (2 \\pi \\phi )) \\cdot x(1-x) \\\\
-dϕ &= (\\phi + \\omega ) \\mod{1}
-\\end{aligned}
-```
-"""
-Base.@kwdef struct Anishchenko{V, A, S, Ω} <: DiscreteDefinition
-    xi::V = [0.2, 0.3]
-    α::A = 3.277
-    s::S = 0.1
-    ω::Ω = 0.5 * (sqrt(5) - 1)
-end
-
-function system(definition::Anishchenko)
-    return DiscreteDynamicalSystem(eom_anischenko, definition.xi, definition)
-end
-
-function eom_anischenko(u, p::Anishchenko, t)
-    x, ϕ = u
-    α, s, ω = p.α, p.s, p.ω
-    dx = α * (1 - s * cos(2*pi*ϕ)) * x * (1 - x)
-    dϕ = (ϕ + ω) % 1
-
-    return SVector{2}(dx, dϕ)
-end
diff --git a/src/example_systems/discrete/ChaoticMaps3.jl b/src/example_systems/discrete/ChaoticMaps3.jl
deleted file mode 100644
index 3a5a67b01..000000000
--- a/src/example_systems/discrete/ChaoticMaps3.jl
+++ /dev/null
@@ -1,43 +0,0 @@
-using StaticArrays: SVector
-using DynamicalSystemsBase: DiscreteDynamicalSystem
-export ChaoticMaps3
-"""
-    ChaoticMaps3() <: DiscreteDefinition
-    ChaoticMaps3(; ui = [0.2, 0.1, 0.3], r = 3.4, c_xy = 0.5, c_xz = 0.5, c_yz = 0.3)
-
-A model consisting of three coupled 1D maps, where ``x \\to y`` and ``x \\to z``
-[Chen2004](@cite).
-
-## Equations of motion
-
-```math
-\\begin{aligned}
-x(t) &= r x(t-1)( 1 - x(t-1)^2 ) e^{-x(t-1)^2} \\\\
-y(t) &= r y(t-1)( 1 - y(t-1)^2 ) e^{-y(t-1)^2} + c_{xy} x(t-1) \\\\
-z(t) &= r z(t-1)( 1 - z(t-1)^2 ) e^{-z(t-1)^2} + c_{xz} x(t-1) + c_{yz} y(t-1)
-\\end{aligned}
-```
-
-The parameters `r`, `c_xy` and `c_yz` do not appear in the original paper,
-but are added here for explorative purposes.
-"""
-Base.@kwdef struct ChaoticMaps3{R, V, C1 ,C2, C3} <: DiscreteDefinition
-    r::R = 3.4
-    c_xy::C1 = 1.4
-    c_xz::C2 = 0.3
-    c_yz::C3 = 0.1
-    xi::V = [0.2, 0.1, 0.3]
-end
-
-function system(definition::ChaoticMaps3)
-    return DiscreteDynamicalSystem(eom_chaoticmaps3, definition.xi, definition)
-end
-
-function eom_chaoticmaps3(x, p::ChaoticMaps3, t)
-    r, c_xy, c_xz, c_yz = p.r, p.c_xy, p.c_xz, p.c_yz
-    x, y, z = x
-    dx = r * x * (1 - x^2) * exp(-x^2)
-    dy = r * y * (1 - y^2) * exp(-y^2) + c_xy * x
-    dz = r * z * (1 - z^2) * exp(-z^2) + c_xz * x + c_yz * y
-    return SVector{3}(dx, dy, dz)
-end
diff --git a/src/example_systems/discrete/ChaoticNoisyLinear2.jl b/src/example_systems/discrete/ChaoticNoisyLinear2.jl
deleted file mode 100644
index 59891c9da..000000000
--- a/src/example_systems/discrete/ChaoticNoisyLinear2.jl
+++ /dev/null
@@ -1,64 +0,0 @@
-using LabelledArrays: @LArray
-using StaticArrays: SVector
-using DynamicalSystemsBase: DiscreteDynamicalSystem
-using Random
-
-export ChaoticNoisyLinear2
-
-"""
-    ChaoticNoisyLinear2 <: DiscreteDefinition
-    ChaoticNoisyLinear2(; xi = [0.1, 0.2], c = 0.5,
-        nx = Normal(0, 0.05), ny = Normal(0, 0.05),
-        rng = Random.default_rng())
-
-A bivariate system of two chaotic maps that are linearly coupled from `x → y`
-with coupling strength `c`.
-
-## Definition
-
-```math
-\\begin{align*}
-x(t+1) &= 3.4 x(t) (1 - x(t)^2) e^{-x(t)^2} + 0.8x(t-1) + \\xi_x \\\\
-y(t+1) &= 3.4 y(t) (1 - y(t)^2) e^{-y(t)^2} + 0.8y(t-1) + \\xi_y + c x(t-2)
-\\end{align*}
-```
-Process noise ``\\xi_x`` and ``\\xi_y``
-is drawn at each iteration from `nx` and `ny`.
-"""
-struct ChaoticNoisyLinear2{P, V, NX, NY, C, R} <: LaggedDiscreteDefinition{P}
-    past_states::P
-    xi::V
-    nx::NX
-    ny::NY
-    c::C
-    rng::R
-
-    function ChaoticNoisyLinear2(; xi::V = [0.1, 0.2], c::C = 0.5,
-            nx::NX = Normal(0, 0.05),
-            ny::NY = Normal(0, 0.05),
-            rng::R = Random.default_rng()) where {V, C, NX, NY, R}
-        T = eltype(1.0)
-        mx = MVector{2, T}(repeat([xi[1]], 2))
-        my = MVector{2, T}(repeat([xi[2]], 2))
-        past_states = SVector{2, MVector{2, T}}(mx, my)
-        P = typeof(past_states)
-        return new{P, V, NX, NY, C, R}(past_states, xi, nx, ny, c, rng)
-    end
-end
-
-function system(definition::ChaoticNoisyLinear2)
-    return DiscreteDynamicalSystem(eom_linearmap2, definition.xi, definition)
-end
-
-function eom_linearmap2(u, p::ChaoticNoisyLinear2, t)
-    (; past_states, xi, nx, ny, c, rng) = p
-    # `u` is simply ignored here, because the state is stored in the memory vectors
-    mx, my = past_states
-    x₁, x₂ = mx[1], mx[2]
-    y₁, y₂ = my[1], my[2]
-    dx = 3.4 * x₁ * (1 - x₁^2) * exp(-x₁^2) + 0.8*x₂ + rand(rng, nx)
-    dy = 3.4 * y₁ * (1 - y₁^2) * exp(-y₁^2) + 0.5*y₂ + c*x₂ + rand(rng, ny)
-    new_state = SVector{2}(dx, dy)
-    update_states!(p, new_state) # Update memory vectors
-    return new_state
-end
diff --git a/src/example_systems/discrete/Henon2.jl b/src/example_systems/discrete/Henon2.jl
deleted file mode 100644
index e5436dab7..000000000
--- a/src/example_systems/discrete/Henon2.jl
+++ /dev/null
@@ -1,48 +0,0 @@
-using StaticArrays: SVector
-using DynamicalSystemsBase: DiscreteDynamicalSystem
-export Henon2
-
-"""
-    Henon2() <: DiscreteDefinition
-    Henon2(;u₀ = [0.1, 0.2, 0.2, 0.3], c_xy = 2.0)
-
-A bivariate system consisting of two identical 1D Henon maps with
-unidirectional forcing ``X \\to Y `` (Krakovská et al., 2018)[^Krakovská2018].
-
-## Equations of motion
-
-The equations of motion are
-
-```math
-\\begin{align*}
-x_1(t+1) &= 1.4 - x_1^2(t) + 0.3x_2(t) \\\\
-x_2(t+1) &= x_1(t) \\\\
-y_1(t+1) &= 1.4 - [c_{xy} x_1(t) y_1(t) + (1-c_{xy}) y_1^2(t)] + 0.3 y_2(t) \\\\
-y_2(t+1) &= y_1(t)
-\\end{align*}
-```
-
-[^Krakovská2018]:
-    Krakovská, A., Jakubík, J., Chvosteková, M., Coufal, D., Jajcay, N., & Paluš, M. (2018).
-    Comparison of six methods for the detection of causality in a bivariate time series.
-    Physical Review E, 97(4), 042207.
-"""
-Base.@kwdef struct Henon2{R, C, V} <: DiscreteDefinition
-    r::R = 3.4
-    c_xy::C = 1.4
-    xi::V = [0.1, 0.2, 0.2, 0.3]
-end
-
-function system(definition::Henon2)
-    return DiscreteDynamicalSystem(eom_henon2, definition.xi, definition)
-end
-
-function eom_henon2(x, p::Henon2, t)
-    c_xy = p.c_xy
-    x₁, x₂, y₁, y₂ = x
-    dx₁ = 1.4 - x₁^2 + 0.3*x₂
-    dx₂ = x₁
-    dy₁ = 1.4 - (c_xy * x₁ * y₁  +  (1 - c_xy)*y₁^2) + 0.3*y₂
-    dy₂ = y₁
-    return SVector{4}(dx₁, dx₂, dy₁, dy₂)
-end
diff --git a/src/example_systems/discrete/Henon3.jl b/src/example_systems/discrete/Henon3.jl
deleted file mode 100644
index a8ba521f4..000000000
--- a/src/example_systems/discrete/Henon3.jl
+++ /dev/null
@@ -1,67 +0,0 @@
-using LabelledArrays: @LArray
-using StaticArrays: SVector, MVector
-using DynamicalSystemsBase: DiscreteDynamicalSystem
-using StateSpaceSets: StateSpaceSet
-
-export Henon3
-
-"""
-    Henon3() <: DiscreteDefinition
-    Henon3(; a = 0.1, b = 0.3, c = 0.1, xi = [0.1, 0.2, 0.3])
-
-`Henon3` is a [`DiscreteDefinition`](@ref) definition for a lagged discrete dynamical
-system consisting of three coupled 1D Henon maps [Papana2013](@cite).
-
-## Equations of motion
-
-```math
-\\begin{align*}
-x_1(t+1) &= a - x_1(t)^2 + b x_1(t-2) \\\\
-x_2(t+1) &= a - c x_1(t) x_2(t)- (1 - c) x_2(t)^2 + b x_2(t-1) \\\\
-x_3(t+1) &= c x_2(t) x_3(t) - (1 - c) x_3(t)^2 + b x_3(t-1)
-\\end{align*}
-```
-
-Here ``c`` is the coupling constant. The system becomes completely synchronized
-for ``c >= 0.7``. The initial condition `xi` is repeated over the first two time steps
-before iteration starts.
-"""
-struct Henon3{P, T, S, A, B, C} <: LaggedDiscreteDefinition{P}
-    past_states::P
-    xi::S
-    a::A
-    b::B
-    c::C
-
-    function Henon3(; a::A = 1.4, b::B = 0.3, c::C = 0.1,
-            xi::S = [0.4, 0.5, 0.6]) where {A, B, C, S}
-        T = eltype(1.0)
-        m₁ = MVector{2, T}(repeat([xi[1]], 2))
-        m₂ = MVector{2, T}(repeat([xi[2]], 2))
-        m₃ = MVector{2, T}(repeat([xi[3]], 2))
-        past_states = SVector{3, MVector{2, T}}(m₁, m₂, m₃)
-        P = typeof(past_states)
-        return new{P, T, S, A, B, C}(past_states, xi, a, b, c)
-    end
-end
-
-function system(definition::Henon3)
-    return DiscreteDynamicalSystem(eom_henon3, definition.xi, definition)
-end
-
-function eom_henon3(u, p::Henon3, t)
-    # `u` is simply ignored here, because the state is stored in the memory vectors
-    m₁, m₂, m₃ = p.past_states
-    x₁₁, x₁₂ = m₁[1], m₁[2]
-    x₂₁, x₂₂ = m₂[1], m₂[2]
-    x₃₁, x₃₂ = m₃[1], m₃[2]
-
-    a, b, c = p.a, p.b, p.c
-    dx₁= a - x₁₁^2 + b*x₁₂
-    dx₂= a - c*x₁₁*x₂₁ - (1 - c)*x₂₁^2 + b*x₂₂
-    dx₃= a - c*x₂₁*x₃₁ - (1 - c)*x₃₁^2 + b*x₃₂
-
-    new_state = SVector{3}(dx₁, dx₂, dx₃)
-    update_states!(p, new_state) # Update memory vectors
-    return new_state
-end
diff --git a/src/example_systems/discrete/Ikeda2.jl b/src/example_systems/discrete/Ikeda2.jl
deleted file mode 100644
index 91a12491d..000000000
--- a/src/example_systems/discrete/Ikeda2.jl
+++ /dev/null
@@ -1,55 +0,0 @@
-import Distributions: Uniform
-import StaticArrays: SVector
-import DynamicalSystemsBase: DiscreteDynamicalSystem
-using Random
-export Ikeda2
-
-"""
-    Ikeda2 <: DiscreteDefinition
-    Ikeda2(; xi = [0.19, 0.21], c_xy = 1.0, c_yx = 1.0, a = 0.8, b = 12, c = 0.9,
-        r₁ = 0.2, r₂ = 0.15, σ = 0.05, rng = Random.default_rng())
-
-Initialise a discrete two-dimensional Ikeda map system, adapted from [Cao1997](@citet), by
-adding a noise term and allowing the influences from ``x \\to y``
-(``c_{xy}``) and from ``y \\to x`` (``c_{yx}``) to be adjusted.
-
-The difference equations are
-
-```math
-\\begin{align*}
-x(t+1) &= 1 + \\mu(x \\cos{(\\theta)} - c_{yx} y \\sin{(\\theta)}) -
-min( \\dfrac{\\sigma \\xi_{t}^{(1)}}{(1-x)}, \\xi_{t}^{(2)}) \\\\
-y(t+1) &= \\mu(y \\cos{(\\theta)} - c_{xy} x \\sin{(\\theta)}) -
-min(\\dfrac{\\sigma \\zeta_{t}^{(1)}}{(1-y)}, \\zeta_{t}^{(2)})
-\\end{align*}
-```
-"""
-Base.@kwdef struct Ikeda2{V, C1, C2, A, B, C, R1, R2, Σ, R} <: DiscreteDefinition
-    xi::V = [0.19, 0.21]
-    c_xy::C1 = 1.0
-    c_yx::C2 = 1.0
-    a::A = 0.8
-    b::B = 12
-    c::C = 0.9
-    r₁::R1 = 0.2
-    r₂::R2 = 0.15
-    σ::Σ = 0.05
-    rng::R = Random.default_rng()
-end
-
-function system(definition::Ikeda2)
-    return DiscreteDynamicalSystem(eom_ikeda, definition.xi, definition)
-end
-
-function eom_ikeda(u, p::Ikeda2, t)
-    x, y = u
-    (; xi, c_xy, c_yx, a, b, c, r₁, r₂, σ, rng) = p
-    θ = a - b/(c + x^2 + y^2)
-    μ = r₁*sin(t) - r₂
-    d = Uniform(0.1, 0.4)
-
-    dx = 1 + μ*(x*cos(θ) - c_yx*y*sin(θ)) - min(σ*rand(rng, d)/(1-x), rand(rng, d))
-    dy = μ*(y*cos(θ) + c_xy*x*sin(θ)) -  min(σ*rand(rng, d)/(1-y), rand(rng, d))
-
-    return SVector{2}(dx, dy)
-end
diff --git a/src/example_systems/discrete/Logistic2Bidir.jl b/src/example_systems/discrete/Logistic2Bidir.jl
deleted file mode 100644
index 073c2fd21..000000000
--- a/src/example_systems/discrete/Logistic2Bidir.jl
+++ /dev/null
@@ -1,61 +0,0 @@
-using LabelledArrays: @LArray
-using StaticArrays: SVector
-using DynamicalSystemsBase: DiscreteDynamicalSystem
-using Random
-
-export Logistic2Bidir
-
-"""
-    Logistic2Bidir() <: DiscreteDefinition
-    Logistic2Bidir(; ui = [0.5, 0.5], c_xy = 0.1, c_yx = 0.1, r₁ = 3.78, r₂ = 3.66,
-        σ_xy = 0.05, σ_yx = 0.05,
-        rng = Random.default_rng())
-
-A bivariate system consisting of two 1D noisy logistic maps which are bidirectionally
-interacting [Diego2019](@cite).
-
-## Equations of motion
-
-```math
-\\begin{align*}
-x(t+1) &= r_1 f_{yx}^{t}(1 - f_{yx}^{t}) \\\\
-y(t+1) &= r_2 f_{xy}^{t}(1 - f_{xy}^{t}) \\\\
-f_{xy}^t &= \\dfrac{y(t) + c_{xy}(x(t) + \\sigma_{xy} \\xi_{xy}^t )}{1 + c_{xy} (1 + \\sigma_{xy} )} \\\\
-f_{yx}^t &= \\dfrac{x(t) + c_{yx}(y(t) + \\sigma_{yx} \\xi_{yx}^t )}{1 + c_{yx} (1 + \\sigma_{yx} )},
-\\end{align*}
-```
-
-Here, the coupling strength ``c_{xy}`` controls how strongly species ``x`` influences species
-``y``, and vice versa for ``c_{yx}``. To simulate time-varying influence of unobserved
-processes, we use the dynamical noise terms ``\\xi_{xy}^t`` and ``\\xi_{yx}^t``, drawn from a
-uniform distribution with support on ``[0, 1]``. If ``\\sigma_{xy} > 0``, then the influence
-of ``x`` on ``y`` is masked by dynamical noise equivalent to ``\\sigma_{xy} \\xi_{xy}^{t}`` at
-the ``t``-th iteration of the map, and vice versa for ``\\sigma_{yx}``.
-"""
-Base.@kwdef struct Logistic2Bidir{V, C1, C2, R1, R2, Σx, Σy, R} <: DiscreteDefinition
-    xi::V = [0.5, 0.5]
-    c_xy::C1 = 0.1
-    c_yx::C2 = 0.1
-    r₁::R1 = 3.78
-    r₂::R2 = 3.66
-    σ_xy::Σx = 0.05
-    σ_yx::Σy = 0.05
-    rng::R = Random.default_rng()
-end
-
-function system(definition::Logistic2Bidir)
-    return DiscreteDynamicalSystem(eom_logistic2bidir, definition.xi, definition)
-end
-
-# Note: Until the `eom_logistic2_bidir` function is deprecated, this function must
-# be called something different; otherwise the DiscreteDynamicalSystem constructor
-# doesn't work.
-function eom_logistic2bidir(u, p::Logistic2Bidir, t)
-    (; xi, c_xy, c_yx, r₁, r₂, σ_xy, σ_yx, rng) = p
-    x, y = u
-    f_xy = (y +  c_xy*(x + σ_xy * rand(rng)) ) / (1 + c_xy*(1+σ_xy))
-    f_yx = (x +  c_yx*(y + σ_yx * rand(rng)) ) / (1 + c_yx*(1+σ_yx))
-    dx = r₁ * (f_yx) * (1 - f_yx)
-    dy = r₂ * (f_xy) * (1 - f_xy)
-    return SVector{2}(dx, dy)
-end
diff --git a/src/example_systems/discrete/Logistic2Unidir.jl b/src/example_systems/discrete/Logistic2Unidir.jl
deleted file mode 100644
index 75953185a..000000000
--- a/src/example_systems/discrete/Logistic2Unidir.jl
+++ /dev/null
@@ -1,68 +0,0 @@
-using LabelledArrays: @LArray
-using StaticArrays: SVector
-using DynamicalSystemsBase: DiscreteDynamicalSystem
-using DynamicalSystemsBase: trajectory
-using Distributions: Uniform
-
-export Logistic2Unidir
-
-
-"""
-    Logistic2Unidir <: DiscreteDefinition
-    Logistic2Unidir(; xi = [0.5, 0.5], c_xy = 0.1, σ_xy = 0.05, r₁ = 3.78, r₂ = 3.66,
-        rng = Random.default_rng())
-
-A bivariate system consisting of two 1D noisy logistic maps which are undirectionally
-coupled `x → y` [Diego2019](@cite).
-
-## Equations of motion
-
-The equations of motion are
-
-```math
-\\begin{align*}
-x(t+1) &= r_1 x(t)(1 - x(t)) \\\\
-y(t+1) &= r_2 f(x,y)(1 - f(x,y)),
-\\end{align*}
-```
-
-with
-
-```math
-\\begin{aligned}
-f(x,y) = \\dfrac{y + \\frac{c_{xy}(x \\xi )}{2}}{1 + \\frac{c_{xy}}{2}(1+ \\sigma_{xy} )}
-\\end{aligned}
-```
-
-The parameter `c_xy` controls how strong the dynamical forcing is. If `σ > 0`,
-dynamical noise masking the influence of  `x` on `y` equivalent to
-``\\sigma_{xy} \\cdot \\xi`` is added at each iteration. Here,``\\xi`` is a draw from a
-flat distribution on ``[0, 1]``. Thus, setting `σ_xy = 0.05` is equivalent to
-add dynamical noise corresponding to a maximum of ``5 \\%`` of the possible
-range of values of the logistic map.
-"""
-Base.@kwdef struct Logistic2Unidir{V, C, R1, R2, Σy, R} <: DiscreteDefinition
-    xi::V = [0.5, 0.5]
-    c_xy::C = 0.1
-    r₁::R1 = 3.78
-    r₂::R2 = 3.66
-    σ_xy::Σy = 0.05
-    rng::R = Random.default_rng()
-end
-
-function system(definition::Logistic2Unidir)
-    return DiscreteDynamicalSystem(eom_logistic2uni, definition.xi, definition)
-end
-
-# Note: Until the `eom_logistic2_bidir` function is deprecated, this function must
-# be called something different; otherwise the DiscreteDynamicalSystem constructor
-# doesn't work.
-function eom_logistic2uni(u, p::Logistic2Unidir, t)
-    (; xi, c_xy, r₁, r₂, σ_xy, rng) = p
-    x, y = u
-    f_xy = (y +  (c_xy*(x + σ_xy * rand(rng))/2) ) / (1 + (c_xy/2)*(1+σ_xy))
-
-    dx = r₁ * x * (1 - x)
-    dy = r₂ * (f_xy) * (1 - f_xy)
-    return SVector{2}(dx, dy)
-end
diff --git a/src/example_systems/discrete/Logistic3CommonDriver.jl b/src/example_systems/discrete/Logistic3CommonDriver.jl
deleted file mode 100644
index 52138a1c9..000000000
--- a/src/example_systems/discrete/Logistic3CommonDriver.jl
+++ /dev/null
@@ -1,62 +0,0 @@
-using LabelledArrays: @LArray
-using StaticArrays: SVector
-using DynamicalSystemsBase: DiscreteDynamicalSystem
-
-
-export Logistic3CommonDriver
-
-"""
-    Logistic3CommonDriver() <: DiscreteDefinition
-    Logistic3CommonDriver(; u₀ = [0.1, 0.2, 0.3],
-        r = 4.0, σx = 0.05, σy = 0.05, σz = 0.05,
-        rng = Random.default_rng())
-
-A discrete dynamical system consisting of three coupled 1D logistic maps
-representing the response of two independent dynamical variables to the
-forcing from a common driver [Runge2018](@cite).
-The dynamical influence goes in the directions ``Z \\to X`` and ``Z \\to Y``.
-
-## Equations of motion
-
-The equations of motion are
-
-```math
-\\begin{align*}
-x(t+1) &= (x(t)(r - r x(t) - z(t) + \\sigma_x \\eta_x)) \\mod 1 \\\\
-y(t+1) &= (y(t)(r - r y(t) - z(t) + \\sigma_y \\eta_y)) \\mod 1 \\\\
-z(t+1) &= (z(t)(r - r z(t) + \\sigma_z \\eta_z)) \\mod 1
-\\end{align*}
-```
-
-Dynamical noise may be added to each of the dynamical variables by tuning the
-parameters `σz`, `σx` and `σz`. Default values for the parameters
-`r₁`, `r₂` and `r₃` are set such that the system exhibits chaotic behaviour,
-with `r₁ = r₂ = r₃ = 4`.
-"""
-Base.@kwdef struct Logistic3CommonDriver{V, R, Σx, Σy, Σz, RNG} <: DiscreteDefinition
-    xi::V = [0.1, 0.2, 0.3]
-    r::R = 4.0
-    σx::Σx = 0.05
-    σy::Σy = 0.05
-    σz::Σz = 0.05
-    rng::RNG = Random.default_rng()
-end
-
-function system(definition::Logistic3CommonDriver)
-    return DiscreteDynamicalSystem(eom_logistic3_commondriver, definition.xi, definition)
-end
-
-# Note: Until the `eom_logistic2_bidir` function is deprecated, this function must
-# be called something different; otherwise the DiscreteDynamicalSystem constructor
-# doesn't work.
-function eom_logistic3_commondriver(u, p::Logistic3CommonDriver, t)
-    (; xi, r, σx, σy, σz, rng) = p
-    x, y, z = u
-    ηx = rand(rng)
-    ηy = rand(rng)
-    ηz = rand(rng)
-    dx = (x*(r - r*x - z + σx*ηx)) % 1
-    dy = (y*(r - r*y - z + σy*ηy)) % 1
-    dz = (z*(r - r*z + σz*ηz)) % 1
-    return SVector{3}(dx, dy, dz)
-end
diff --git a/src/example_systems/discrete/Logistic4Chain.jl b/src/example_systems/discrete/Logistic4Chain.jl
deleted file mode 100644
index 3e4360100..000000000
--- a/src/example_systems/discrete/Logistic4Chain.jl
+++ /dev/null
@@ -1,74 +0,0 @@
-using LabelledArrays: @LArray
-using StaticArrays: SVector
-using DynamicalSystemsBase: DiscreteDynamicalSystem
-using Random
-
-export Logistic4Chain
-
-"""
-    Logistic4Chain <: DiscreteDefinition
-    Logistic4Chain(; xi = rand(4),
-        rx = 3.9, ry = 3.6, rz = 3.6, rw = 3.8,
-        cxy = 0.4, cyz = 0.4, cyw = 0.35,
-        rng = Random.default_rng())
-
-A variant of [`Logistic2Bidir`](@ref) where four variables `X`, `Y`, `Z`, `W`
-are coupled in a chain `X → Y → Z → W` with dynamical noise.
-
-## Description
-
-The equations of motion are
-
-```math
-\\begin{align*}
-x(t+1) &= r_x x(t)(1 - x(t))  \\\\
-y(t+1) &= r_y f_{xy}^{t}(1 - f_{xy}^{t}) \\\\
-z(t+1) &= r_z f_{yz}^{t}(1 - f_{yz}^{t}) \\\\
-w(t+1) &= r_w f_{zw}^{t}(1 - f_{zw}^{t}) \\\\
-f_{xy}^t &= \\dfrac{y(t) + c_{xy}(x(t) + \\sigma_{xy} \\xi_{xy}^t )}{1 + c_{xy} (1 + \\sigma_{xy} )} \\\\
-f_{yz}^t &= \\dfrac{z(t) + c_{yz}(y(t) + \\sigma_{yz} \\xi_{yz}^t )}{1 + c_{yz} (1 + \\sigma_{yz} )}, \\\\
-f_{zw}^t &= \\dfrac{w(t) + c_{zw}(z(t) + \\sigma_{zw} \\xi_{zw}^t )}{1 + c_{zw} (1 + \\sigma_{zw} )},
-\\end{align*}
-```
-
-where `c_{xy}`, `c_{yz}`, `c_{zw}` controls the coupling strength from `x` to `y`, `y` to
-`z`, and `z` to `w`, respectively. The magnitude of dynamical noise in these couplings
-are controlled by ``\\sigma_{xy}``, ``\\sigma_{yz}``, and ``\\sigma_{zw}``, respectively.
-``\\xi_{xy}``, ``\\xi_{yz}``, and ``\\xi_{zw}`` are noise terms that each iteration
-are drawn from independent uniform distributions over the unit interval.
-
-With default parameters and all dynamical noise terms set to zero, this is the system
-from [Ye2015](@cite) (but note that for some initial conditions,
-this system wanders off to ``\\pm \\infty`` for some of the variables).
-"""
-Base.@kwdef struct Logistic4Chain{V, RX, RY, RZ, RW, C1, C2, C3, Σ1, Σ2, Σ3, RNG} <: DiscreteDefinition
-    xi::V = [0.1, 0.2, 0.3, 0.4]
-    rx::RX = 3.9
-    ry::RY = 3.6
-    rz::RZ = 3.6
-    rw::RW = 3.8
-    c_xy::C1 = 0.4
-    c_yz::C2 = 0.4
-    c_zw::C3 = 0.35
-    σ_xy::Σ1 = 0.05
-    σ_yz::Σ2 = 0.05
-    σ_zw::Σ3 = 0.05
-    rng::RNG = Random.default_rng()
-end
-
-function system(definition::Logistic4Chain)
-    return DiscreteDynamicalSystem(eom_logistic4_chain, definition.xi, definition)
-end
-
-function eom_logistic4_chain(u, p::Logistic4Chain, t)
-    (; xi, rx, ry, rz, rw, c_xy, c_yz, c_zw, σ_xy, σ_yz, σ_zw, rng) = p
-    x, y, z, w = u
-    f_xy = (y +  c_xy*(x + σ_xy * rand(rng)) ) / (1 + c_xy*(1+σ_xy))
-    f_yz = (z +  c_yz*(y + σ_yz * rand(rng)) ) / (1 + c_yz*(1+σ_yz))
-    f_zw = (w +  c_zw*(z + σ_zw * rand(rng)) ) / (1 + c_zw*(1+σ_zw))
-    dx = rx * x * (1 - x)
-    dy = ry * (f_xy) * (1 - f_xy)
-    dz = rz * (f_yz) * (1 - f_yz)
-    dw = rw * (f_zw) * (1 - f_zw)
-    return SVector{4}(dx, dy, dz, dw)
-end
diff --git a/src/example_systems/discrete/Nonlinear3.jl b/src/example_systems/discrete/Nonlinear3.jl
deleted file mode 100644
index 56ef53693..000000000
--- a/src/example_systems/discrete/Nonlinear3.jl
+++ /dev/null
@@ -1,68 +0,0 @@
-using LabelledArrays: @LArray
-using StaticArrays: SVector
-using DynamicalSystemsBase: DiscreteDynamicalSystem
-using Distributions: Normal
-using Random
-
-export Nonlinear3
-
-"""
-    Nonlinear3 <: DiscreteDefinition
-    Nonlinear3(; xi = rand(3),
-        σ₁ = 1.0, σ₂ = 1.0, σ₃ = 1.0,
-        a₁ = 3.4, a₂ = 3.4, a₃ = 3.4,
-        b₁ = 0.4, b₂ = 0.4, b₃ = 0.4,
-        c₁₂ = 0.5, c₂₃ = 0.3, c₁₃ = 0.5,
-        rng = Random.default_rng())
-
-A 3d nonlinear system with nonlinear couplings ``x_1 \\to x_2``,
-``x_2 \\to x_3`` and ``x_1 \\to x_3``. Modified from Gourévitch et al.
-(2006)[Gourévitch2006].
-
-## Equations of motion
-
-```math
-\\begin{aligned}
-x_1(t+1) &= a_1 x_1 (1-x_1(t))^2  e^{-x_2(t)^2} + 0.4 \\xi_{1}(t) \\\\
-x_2(t+1) &= a_1 x_2 (1-x_2(t))^2  e^{-x_2(t)^2} + 0.4 \\xi_{2}(t) + b x_1 x_2 \\\\
-x_3(t+1) &= a_3 x_3 (1-x_3(t))^2  e^{-x_3(t)^2} + 0.4 \\xi_{3}(t) + c x_{2}(t) + d x_{1}(t)^2.
-\\end{aligned}
-```
-
-[Gourévitch2006]:
-    Gourévitch, B., Le Bouquin-Jeannès, R., & Faucon, G. (2006). Linear and nonlinear
-    causality between signals: methods, examples and neurophysiological
-    applications. Biological Cybernetics, 95(4), 349–369.
-"""
-Base.@kwdef struct Nonlinear3{V, Σx, Σy, Σz, AX, AY, AZ, BX, BY, BZ, C1, C2, C3, RNG} <: DiscreteDefinition
-    xi::V = [0.1, 0.2, 0.3]
-    σx::Σx = Normal(0, 1.0)
-    σy::Σy = Normal(0, 1.0)
-    σz::Σz = Normal(0, 1.0)
-    ax::AX = 3.4
-    ay::AY = 3.4
-    az::AZ = 3.4
-    bx::BX = 0.4
-    by::BY = 0.4
-    bz::BZ = 0.4
-    c_xy::C1 = 0.5
-    c_xz::C2 = 0.3
-    c_yz::C3 = 0.5
-    rng::RNG = Random.default_rng()
-end
-
-function system(definition::Nonlinear3)
-    return DiscreteDynamicalSystem(eom_nonlinear3, definition.xi, definition)
-end
-
-function eom_nonlinear3(u, p, n)
-    x, y, z = u
-    (; xi, σx, σy, σz, ax, ay, az, bx, by, bz, c_xy, c_xz, c_yz, rng) = p
-    ξ₁ = rand(rng, σx)
-    ξ₂ = rand(rng, σy)
-    ξ₃ = rand(rng, σz)
-    dx = ax*x*(1-x)^2 * exp(-x^2) + bx*ξ₁
-    dy = ay*y*(1-y)^2 * exp(-y^2) + by*ξ₂ + c_xy*x*y
-    dz = az*z*(1-z)^2 * exp(-z^2) + bz*ξ₃ + c_yz*y + c_xz*x^2
-    return SVector{3}(dx, dy, dz)
-end
diff --git a/src/example_systems/discrete/Peguin2.jl b/src/example_systems/discrete/Peguin2.jl
deleted file mode 100644
index ea60be4b1..000000000
--- a/src/example_systems/discrete/Peguin2.jl
+++ /dev/null
@@ -1,98 +0,0 @@
-using LabelledArrays: @LArray
-using StaticArrays: SVector
-using DynamicalSystemsBase: DiscreteDynamicalSystem
-using DynamicalSystemsBase: trajectory
-using Distributions: Normal
-using Statistics: mean, std
-
-export Peguin2
-
-"""
-    Peguin2 <: DiscreteDefinition
-    Peguin2(; xi = [0.5, 0.4], σ₁ = 0.1, σ₂ = 0.1,
-        p₁ = 0.7, p₂ = 0.1, p₃ = 0.4, p₄ = 2.4, p₅ = 0.9, p₆ = 4)
-
-A 2D discrete autoregressive system with nonlinear, nontrivial coupling from [1] .
-This system is from Péguin-Feissolle & Teräsvirta (1999)[^Péguin-Feissolle1999], and
-was also studied in Chávez et al. (2003)[^Chávez2003].
-
-## Description
-
-The system is defined by the equations
-
-```math
-\\begin{align*}
-x(t+1) &= p_2 + p_3 x(t-2) + c_{yx}\\dfrac{p_4 - p_5 y(t-3)}{1 + e^{-p_6 y(t-3)}} + \\xi_1(t) \\\\
-y(t+1) &= p_1 y(t) + \\xi_2(t).
-\\end{align*}
-```
-
-Here, ``\\xi_{1,2}(t)`` are two independent normally distributed noise processes
-with zero mean and standard deviations ``\\sigma_1`` and ``\\sigma_2``. The
-``\\xi_{1,2}(t)`` terms represent dynamical noise. The parameters of the original system
-are here tweakable.
-
-[^Péguin-Feissolle1999]:
-    Péguin-Feissolle, A., & Teräsvirta, T. (1999). A General Framework for
-    Testing the Granger Noncausaality Hypothesis. Universites d’Aix-Marseille II
-    et III. [https://www.amse-aixmarseille.fr/sites/default/files/_dt/greqam/99a42.pdf](https://www.amse-aixmarseille.fr/sites/default/files/_dt/greqam/99a42.pdf)
-
-[^Chávez2003]:
-    Chávez, M., Martinerie, J., & Le Van Quyen, M. (2003). Statistical
-    assessment of nonlinear causality: application to epileptic EEG signals.
-    Journal of Neuroscience Methods, 124(2), 113–128.
-    doi:10.1016/s0165-0270(02)00367-9
-    [https://www.sciencedirect.com/science/article/pii/S0165027002003679](https://www.sciencedirect.com/science/article/pii/S0165027002003679)
-"""
-struct Peguin2{P,V,T,Nx,Ny,P1,P2,P3,P4,P5,P6,R} <: LaggedDiscreteDefinition{P}
-     # `past_states[i]` := past states of the i-th variable of the system.
-    past_states::P
-    xi::V
-    nx::Nx # a distribution to sample noise from for x
-    ny::Ny # a distribution to sample noise from for y
-    p₁::P1
-    p₂::P2
-    p₃::P3
-    p₄::P4
-    p₅::P5
-    p₆::P6
-    rng::R
-
-    function Peguin2(;
-            xi::V = [0.4, 0.5],
-            nx::Nx = Normal(0, 0.1),
-            ny::Ny = Normal(0, 0.1),
-            p₁::P1 = 0.7,
-            p₂::P2 = 0.1,
-            p₃::P3 = 0.4,
-            p₄::P4 = 2.4,
-            p₅::P5 = 0.9,
-            p₆::P6 = 4.0,
-            rng::R = Random.default_rng()) where {V,Nx,Ny,P1,P2,P3,P4,P5,P6,R}
-        T = eltype(1.0)
-
-        mx = MVector{3, T}(repeat([xi[1]], 3))
-        my = MVector{3, T}(repeat([xi[2]], 3))
-        past_states = SVector{2, MVector{3, T}}(mx, my)
-        P = typeof(past_states)
-        return new{P, V,T,Nx,Ny,P1,P2,P3,P4,P5,P6,R}(
-            past_states, xi, nx, ny, p₁, p₂, p₃, p₄, p₅, p₆, rng)
-    end
-end
-
-function system(definition::Peguin2)
-    return DiscreteDynamicalSystem(eom_peguin2, definition.xi, definition)
-end
-
-function eom_peguin2(u, p, t)
-    (; xi, nx, ny, p₁, p₂, p₃, p₄, p₅, p₆, rng) = p
-    # `u` is simply ignored here, because the state is stored in the memory vectors
-    mx, my = p.past_states
-    x₂ = mx[2]
-    y₁, y₃ = my[1], my[3]
-    dx = p₁*y₁ + rand(rng, nx)
-    dy = p₂ + p₃*x₂ + (p₄ - p₅*y₃)/(1 + exp(-p₆*y₃)) + rand(rng, ny)
-    new_state = SVector{2}(dx, dy)
-    @inbounds update_states!(p, new_state)
-    return new_state
-end
diff --git a/src/example_systems/discrete/UlamLattice.jl b/src/example_systems/discrete/UlamLattice.jl
deleted file mode 100644
index e3eb3341f..000000000
--- a/src/example_systems/discrete/UlamLattice.jl
+++ /dev/null
@@ -1,35 +0,0 @@
-using DynamicalSystemsBase: DiscreteDynamicalSystem
-
-export UlamLattice
-
-"""
-    UlamLattice <: DiscreteDefinition
-    UlamLattice(; D::Int = 10; ui = sin.(1:10), ε::Real = 0.10)
-
-A lattice of `D` unidirectionally coupled ulam maps [Schreiber2000](@cite) defined as
-
-```math
-x^{m}_{t+1} = f(\\epsilon x^{m-1}_{t} + (1 - \\epsilon) x_{t}^{m}),
-```
-
-where ``m = 1, 2, \\ldots, D`` and ``f(x) = 2 - x^2``. In this system, information transfer
-happens only in the direction of increasing ``m``.
-"""
-Base.@kwdef struct UlamLattice{V, E, F} <: DiscreteDefinition
-    xi::V = sin.(1:10)
-    ε::E = 0.1
-    f::F = x -> 2 - x^2
-end
-
-function system(definition::UlamLattice)
-    return DiscreteDynamicalSystem(eom_ulamlattice, definition.xi, definition)
-end
-
-function eom_ulamlattice(dx, x, p::UlamLattice, t)
-    (; xi, ε, f) = p
-    # `u` is simply ignored here, because the state is stored in the memory vectors
-    dx[1] = f(ε*x[length(dx)] + (1-ε)*x[1])
-    for i in 2:length(dx)
-        dx[i] = f(ε*x[i-1] + (1-ε)*x[i])
-    end
-end
diff --git a/src/example_systems/discrete/Var1.jl b/src/example_systems/discrete/Var1.jl
deleted file mode 100644
index b77895f29..000000000
--- a/src/example_systems/discrete/Var1.jl
+++ /dev/null
@@ -1,35 +0,0 @@
-using DynamicalSystemsBase: DiscreteDynamicalSystem
-using Distributions: Normal
-using Random
-
-export Var1
-
-"""
-    Var1 <: DiscreteDefinition
-    Var1(; xi = [0.5, 0.5, 0.5],
-        a = 0.5, θ = Normal(0, 1.0), η = Normal(0, 0.2), ϵ = Normal(0, 0.3),
-        rng = Random.default_rng())
-
-A discrete vector autoregressive system where X₁ → X₂ → X₃.
-"""
-Base.@kwdef struct Var1{V, A, Σ, N, E, R} <: DiscreteDefinition
-    xi::V = [0.5, 0.5, 0.5]
-    a::A = 0.5
-    θ::Σ = Normal(0, 1.0)
-    η::N = Normal(0, 0.2)
-    ϵ::E = Normal(0, 0.3)
-    rng::R = Random.default_rng()
-end
-
-function system(definition::Var1)
-    return DiscreteDynamicalSystem(eom_var1system, definition.xi, definition)
-end
-
-function eom_var1system(u, p::Var1, n)
-    x₁, x₂, x₃ = u
-    (; a, θ, η, ϵ, rng) = p
-    dx₁ = rand(rng, θ)
-    dx₂ = x₁ * rand(rng, η)
-    dx₃ = a*x₃ * x₂ + rand(rng, ϵ)
-    return SVector{3}(dx₁, dx₂, dx₃)
-end
diff --git a/src/example_systems/discrete/Verdes3.jl b/src/example_systems/discrete/Verdes3.jl
deleted file mode 100644
index 2ee5de947..000000000
--- a/src/example_systems/discrete/Verdes3.jl
+++ /dev/null
@@ -1,46 +0,0 @@
-using DynamicalSystemsBase: DiscreteDynamicalSystem
-using Distributions: Normal
-
-export Verdes3
-
-"""
-    Verdes3 <: DiscreteDefinition
-    Verdes3(; ui = [0.1, 0.15, 0.2], ωy = 315, ωz = 80, σx = 0.0, σy = 0.0, σz = 0.0)
-
-A 3D system where the response X is a highly nonlinear combination
-of Y and Z [Verdes2005](@cite). The forcings Y and Z involve sines and cosines, and
-have different periods, which controlled by `ωy` and `ωz`.
-
-```math
-\\begin{align*}
-x(t+1) &= \\dfrac{y(t)(18y(t) - 27y(t)^2 + 10)}{2} + z(t)(1-z(t)) + ηx \\\\
-y(t+1) &= \\dfrac{(1 - \\dfrac{\\cos(2\\pi)}{\\omega y}t)}{2} + ηy \\\\
-z(t+1) &= \\dfrac{(1 - \\dfrac{\\sin(2\\pi)}{\\omega z}t)}{2} + ηz
-\\end{align*}
-```
-where ηx, ηy, ηz is gaussian noise with mean 0 and standard deviation `σx`, `σy`
-and `σz`.
-"""
-Base.@kwdef struct Verdes3{V, Ωy, Ωz, N1, N2, N3, R} <: DiscreteDefinition
-    xi::V = [0.1, 0.15, 0.2]
-    ωy::Ωy = 315
-    ωz::Ωz = 80
-    σx::N1 = Normal(0, 0.01)
-    σy::N2 = Normal(0, 0.01)
-    σz::N3 = Normal(0, 0.01)
-    rng::R = Random.default_rng()
-end
-
-function system(definition::Verdes3)
-    return DiscreteDynamicalSystem(eom_verdes3, definition.xi, definition)
-end
-
-function eom_verdes3(u, p::Verdes3, t)
-    x, y, z = u
-    (; xi, ωy, ωz, σx, σy, σz, rng) = p
-
-    dx = y*(18y - 27y^2 + 10)/2 + z*(1-z) + rand(rng, σx)
-    dy = (1 - cos((2*pi/ωy) * t))/2 + rand(rng, σy)
-    dz = (1 - sin((2*pi/ωz) * t))/2 + rand(rng, σz)
-    return SVector{3}(dx, dy, dz)
-end
diff --git a/src/example_systems/discrete/deprecate.jl b/src/example_systems/discrete/deprecate.jl
deleted file mode 100644
index 17aee33c1..000000000
--- a/src/example_systems/discrete/deprecate.jl
+++ /dev/null
@@ -1,1045 +0,0 @@
-
-using LabelledArrays: @LArray
-using StaticArrays: SVector
-using DynamicalSystemsBase: DiscreteDynamicalSystem
-using DynamicalSystemsBase: trajectory
-using StateSpaceSets: StateSpaceSet
-using Distributions: Normal, Uniform
-
-export ar1_unidir
-export henon_triple
-export henon2
-export ar1_bidir
-export ikeda
-export linearmap1
-export logistic2_unidir
-export logistic2_bidir
-export logistic3
-export logistic4
-export nonlinear_3d
-export nontrivial_pegiun
-export ulam
-export var1
-export verdes
-
-function eom_ar1_unidir(x, p, n)
-    a₁, b₁, c_xy, σ = (p...,)
-    x, y = (x...,)
-    ξ₁ = rand(Normal(0, σ))
-    ξ₂ = rand(Normal(0, σ))
-
-    dx = a₁*x + ξ₁
-    dy = b₁*y + c_xy*x + ξ₂
-    return SVector{2}(dx, dy)
-end
-
-function ar1_unidir(u₀, a₁, b₁, c_xy, σ)
-    @warn "`ar1_unidir` is deprecated in CausalityTools v2. "*
-        "Use `system(AR1Unidir())` instead, which returns a `DiscreteDynamicalSystem` "*
-        "that can be iterated"
-    p = @LArray [a₁, b₁, c_xy, σ] (:a₁, :b₁, :c_xy, :σ)
-    DiscreteDynamicalSystem(eom_ar1_unidir, u₀, p)
-end
-
-"""
-    ar1_unidir(u₀, a₁ = 0.90693, b₁ = 0.40693, c_xy = 0.5,
-        σ = 0.40662) → DiscreteDynamicalSystem
-
-A bivariate, order one autoregressive model, where ``x \\to y`` (Paluš et al,
-2018)[^Paluš2018].
-
-## Equations of motion
-
-```math
-\\begin{aligned}
-x(t+1) &= a_1 x(t) + \\xi_{1} \\\\
-y(t+1) &= b_1 y(t) - c_{xy} x + \\xi_{2},
-\\end{aligned}
-```
-
-where ``\\xi_{1}`` and ``\\xi_{2}`` are drawn from normal distributions
-with zero mean and standard deviation `σ` at each iteration.
-
-[^Paluš2018]:
-    Paluš, M., Krakovská, A., Jakubík, J., & Chvosteková, M. (2018). Causality,
-    dynamical systems and the arrow of time. Chaos: An Interdisciplinary Journal of
-    Nonlinear Science, 28(7), 075307. http://doi.org/10.1063/1.5019944
-"""
-ar1_unidir(;u₀ = rand(2), a₁ = 0.90693, b₁ = 0.40693, c_xy = 0.5, σ = 0.40662) =
-    ar1_unidir(u₀, a₁, b₁, c_xy, σ)
-
-function eom_henon_triple(u, p, n)
-    O = zeros(Float64, n + 3, 3)
-    x₁, x₂, x₃ = (u...,)
-    a, b, c = (p...,)
-
-    # Propagate initial condition to the three first time steps.
-    for i = 1:3
-        O[i, 1] = x₁
-        O[i, 2] = x₂
-        O[i, 3] = x₃
-    end
-    for i = 4:n+3
-        x₁1 = O[i-1, 1]
-        x₁2 = O[i-2, 1]
-        x₂1 = O[i-1, 2]
-        x₂2 = O[i-2, 2]
-        x₃1 = O[i-1, 3]
-        x₃2 = O[i-2, 3]
-
-        x₁new = a - x₁1^2 + b*x₁2
-        x₂new = a - c*x₁1*x₂1 - (1 - c)*x₂1^2 + b*x₂2
-        x₃new = a - c*x₂1*x₃1 - (1 - c)*x₃1^2 + b*x₃2
-
-        O[i, 1] = x₁new
-        O[i, 2] = x₂new
-        O[i, 3] = x₃new
-    end
-
-    return O[4:end, :]
-end
-
-
-function henon_triple(u₀, a, b, c, n::Int, n_transient::Int)
-    @warn "`henon_triple` is deprecated in CausalityTools v2. "*
-        "Use `system(HenonTriple())` instead, which returns a `DiscreteDynamicalSystem` "*
-        "that can be iterated."
-    p = @LArray [a, b, c] (:a, :b, :c)
-    o = eom_henon_triple(u₀, p, n + n_transient)
-    x, y, z = o[n_transient+1:end, 1], o[n_transient+1:end, 2], o[n_transient+1:end, 3]
-    return StateSpaceSet(x, y, z)
-end
-
-"""
-    henon_triple(x, p, n) → Function
-
-Iterate a 3D discrete system consisting of coupled Henon maps where the coupling
-is x1 → x2 → x3 [1]. This version allows for tweaking the parameters of the
-equations.
-
-The difference equations are:
-
-```math
-\\begin{aligned}
-x_1(t+1) &= a - x_1(t)^2 + b x_1(t-2) \\
-x_2(t+1) &= a - c x_1(t) x_2(t)- (1 - c) x_2(t)^2 + b x_2(t-1) \\
-x_3(t+1) &= c x_2(t) x_3(t) - (1 - c) x_3(t)^2 + b x_3(t-1)
-\\end{aligned}
-```
-
-Here ``c`` is the coupling constant. The system becomes completely synchronized
-for ``c >= 0.7``.
-
-# References
-1. Papana, A., Kyrtsou, C., Kugiumtzis, D., & Diks, C. (2013). Simulation study of
-direct causality measures in multivariate time series. Entropy, 15(7), 2635–2661.
-"""
-function henon_triple(;u₀ = rand(3), a = 1.4, b = 0.3, c = 0.0,  n::Int = 100, n_transient::Int = 100)
-    henon_triple(u₀, a, b, c, n, n_transient)
-end
-
-
-export henon2
-
-function eom_henon2(x, p, n)
-    c_xy = p[1]
-    x₁, x₂, y₁, y₂ = x
-    dx₁ = 1.4 - x₁^2 + 0.3*x₂
-    dx₂ = x₁
-    dy₁ = 1.4 - (c_xy * x₁ * y₁  +  (1 - c_xy)*y₁^2) + 0.3*y₂
-    dy₂ = y₁
-    return SVector{4}(dx₁, dx₂, dy₁, dy₂)
-end
-
-function henon2(u₀, c_xy)
-    @warn "`henon2` is deprecated in CausalityTools v2. "*
-    "Use `system(Henon2())` instead, which returns a `DiscreteDynamicalSystem` "*
-    "that can be iterated."
-    p = (c_xy,)
-    DiscreteDynamicalSystem(eom_henon2, u₀, p)
-end
-
-"""
-    henon2(;u₀ = [0.1, 0.2, 0.2, 0.3], c_xy = 2.0) → DiscreteDynamicalSystem
-
-A bivariate system consisting of two identical 1D Henon maps with
-unidirectional forcing ``X \\to Y `` (Krakovská et al., 2018)[^Krakovská2018].
-
-## Equations of motion
-
-The equations of motion are
-
-```math
-\\begin{aligned}
-x_1(t+1) &= 1.4 - x_1^2(t) + 0.3x_2(t) \\\\
-x_2(t+1) &= x_1(t) \\\\
-y_1(t+1) &= 1.4 - [c_{xy} x_1(t) y_1(t) + (1-c_{xy}) y_1^2(t)] + 0.3 y_2(t) \\\\
-y_2(t+1) &= y_1(t)
-\\end{aligned}
-```
-
-[^Krakovská2018]:
-    Krakovská, A., Jakubík, J., Chvosteková, M., Coufal, D., Jajcay, N., & Paluš, M. (2018).
-    Comparison of six methods for the detection of causality in a bivariate time series.
-    Physical Review E, 97(4), 042207.
-"""
-henon2(;u₀ = [0.1, 0.2, 0.2, 0.3], c_xy = 2.0) = henon2(u₀, c_xy)
-
-
-function eom_ar1_bidir(x, p, t)
-    a₁, b₁, c_xy, c_yx, ϵx, ϵy = p
-    x, y = (x...,)
-    dx = a₁*x + c_yx*y + rand(ϵx)
-    dy = b₁*y + c_xy*x + rand(ϵy)
-    return SVector{2}(dx, dy)
-end
-
-function ar1_bidir(u₀,a₁, b₁, c_xy, c_yx, σx, σy)
-    @warn "`ar1_bidir` is deprecated in CausalityTools v2. "*
-    "Use `system(AR1Bidir())` instead, which returns a `DiscreteDynamicalSystem` "*
-    "that can be iterated."
-    ϵx = Normal(0, σx)
-    ϵy = Normal(0, σy)
-    p = (a₁, b₁, c_xy, c_yx, ϵx, ϵy)
-    return DiscreteDynamicalSystem(eom_ar1_bidir, u₀, p)
-end
-
-"""
-    ar1_bidir(;u₀ = rand(2), a₁ = 0.5, b₁ = 0.7, c_xy = 0.1, c_yx = 0.2,
-        σx = 0.3, σy = 0.3) → DiscreteDynamicalSystem
-
-A system consisting of two mutually coupled first order autoregressive processes.
-
-## Equations of motion
-
-```math
-\\begin{aligned}
-x(t+1) &= a_{1}x + c_{yx}y + \\epsilon_{x} \\\\
-y(t+1) &= b_{1}y + c_{xy}x + \\epsilon_{y}
-\\end{aligned}
-```
-
-where at each time step, ``\\epsilon_{x}`` and ``\\epsilon_{y}`` are drawn
-from independent normal distributions with zero mean and standard deviations `σx` and `σy`,
-respectively.
-"""
-ar1_bidir(;a₁ = 0.5, b₁ = 0.7, u₀ = rand(2), c_xy = 0.1, c_yx = 0.2, σx = 0.3, σy = 0.3) =
-    ar1_bidir(u₀, a₁, b₁, c_xy, c_yx, σx, σy)
-
-export anishchenko1
-
-function eom_anishchenko1(u, p, t)
-    x, ϕ = (u...,)
-    α, s, ω = (p...,)
-    dx = α*(1 - s*cos(2*pi*ϕ))*x*(1 - x)
-    dϕ = (ϕ + ω) % 1
-
-    return SVector{2}(dx, dϕ)
-end
-
-function anishchenko1(u₀, α, s, ω)
-    @warn "`anishchenko1` is deprecated in CausalityTools v2. "*
-    "Use `system(Anishchenko())` instead, which returns a `DiscreteDynamicalSystem` "*
-    "that can be iterated."
-    p = @LArray [α, s, ω] (:α, :s, :ω)
-    DiscreteDynamicalSystem(eom_anishchenko1, u₀, p)
-end
-
-"""
-    anishchenko1(;u₀ = rand(2), α =3.277, s=0.1, ω=0.5*(sqrt(5)-1)) → DiscreteDynamicalSystem
-
-Initialise the system defined by eq. 13 in [Anishchenko1998](@cite),
-which can give strange, nonchaotic attractors.
-
-## Equations of motion
-
-```math
-\\begin{aligned}
-dx &= \\alpha (1-s \\cos (2 \\pi \\phi )) \\cdot x(1-x) \\\\
-dϕ &= (\\phi + \\omega ) \\mod{1}
-\\end{aligned}
-```
-"""
-anishchenko1(;u₀ = rand(2), α =3.277, s=0.1, ω=0.5*(sqrt(5)-1)) =
-    anishchenko1(u₀, α, s, ω)
-
-
-export ikeda
-
-function eom_ikeda(u, p, t)
-    x, y = u[1], u[2]
-    c_xy, c_yx, a, b, c, r₁, r₂, σ = p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8]
-
-    θ = a - b/(c + x^2 + y^2)
-    μ = r₁*sin(t) - r₂
-    d = Uniform(0.1, 0.4)
-
-    dx = 1 + μ*(x*cos(θ) - c_yx*y*sin(θ)) - min(σ*rand(d)/(1-x), rand(d))
-    dy = μ*(y*cos(θ) + c_xy*x*sin(θ)) -  min(σ*rand(d)/(1-y), rand(d))
-
-    SVector{2}(dx, dy)
-end
-
-"""
-    ikeda(; u₀ = rand(2), c_xy = 1.0, c_yx = 1.0, a = 0.8, b = 12, c = 0.9,
-        r₁ = rand(Uniform(0.01, 0.3)), r₂ = rand(Uniform(0.01, 0.3)), σ = 0.05)
-
-Initialise a discrete two-dimensional Ikeda map system, adapted from [1]
-by adding a noise term and allowing the influences from ``x \\to y`` (``c_{xy}``) and
-from ``y \\to x`` (``c_{yx}``) to be adjusted.
-
-As a rule-of-thumb, if parameters `a`, `b`, and `c` are drawn from uniform
-distributions on `[0.8, 1.5]`, `[10, 14]` and `[0.1, 0.9]`.
-
-The difference equations are
-
-```math
-\\begin{aligned}
-x(t+1) = 1 + \\mu(x \\cos{(\\theta)} - c_{yx} y \\sin{(\\theta)}) - min(\\dfrac{\\sigma \\xi_{t}^{(1)})}{(1-x)}, \\xi_{t}^{(2)} \\\\
-y(t+1) = \\mu(y \\cos{(\\theta)} - c_{xy} x \\sin{(\\theta)}) - min(\\dfrac{\\sigma \\zeta_{t}^{(1)})}{(1-y)}, \\zeta_{t}^{(2)}
-\\end{aligned}
-```
-
-## References
-
-1. Cao, Liangyue, Alistair Mees, and Kevin Judd. "Modeling and predicting
-    non-stationary time series." International Journal of Bifurcation and
-    Chaos 7.08 (1997): 1823-1831.
-"""
-function ikeda(u₀, c_xy, c_yx, a, b, c, r₁, r₂, σ)
-    @warn "`ikeda` is deprecated in CausalityTools v2. "*
-    "Use `system(Ikeda())` instead, which returns a `DiscreteDynamicalSystem` "*
-    "that can be iterated."
-    p = @LArray [c_xy, c_yx, a, b, c, r₁, r₂, σ] (:c_xy, :c_yx, :a, :b, :c, :r₁, :r₂, :σ)
-    DiscreteDynamicalSystem(eom_ikeda, u₀, p)
-end
-
-function ikeda(; u₀ = rand(2), c_xy = 1.0, c_yx = 1.0, a = 0.8, b = 12, c = 0.9,
-        r₁ = rand(Uniform(0.01, 0.3)), r₂ = rand(Uniform(0.01, 0.3)), σ = 0.05)
-    ikeda(u₀, c_xy, c_yx, a, b, c, r₁, r₂, σ)
-end
-
-function eom_linearmap1(x, p, t)
-    c = p[1]
-    x, y = (x...,)
-    t = t + 3
-    dx = 3.4*x*(t - 1)*(1 - x^2*(t - 1))*exp(-x^2*(t - 1)) + 0.8*x*(t - 2) + rand(Normal(0, 0.05))
-    dy = 3.4*y*(t - 1)*(1 - y^2*(t - 1))*exp(-y^2*(t - 1)) + 0.5*y*(t - 2) + c*x*(t - 2) + rand(Normal(0, 0.05))
-    return SVector{2}(dx, dy)
-end
-
-function linearmap1(u₀, c)
-    @warn "`linearmap1` is deprecated in CausalityTools v2. "*
-    "Use `system(ChaoticNoisyLinear2())` instead, which returns a `DiscreteDynamicalSystem` "*
-    "that can be iterated."
-    p = @LArray [c] (:c)
-    DiscreteDynamicalSystem(eom_linearmap2, u₀, p)
-end
-
-"""
-    linearmap1(;u₀ = [1, rand(2)], c = 0.5) → DiscreteDynamicalSystem
-
-Linear map from [Chen2004](@citet).
-"""
-linearmap1(;u₀ = rand(2), c = 0.5) = linearmap1(u₀, c)
-
-
-export logistic2_bidir
-
-"""
-    logistic2_bidir(u₀, c_xy, c_yx, r₁, r₂, σ_xy, σ_yx)
-
-Equations of motion for a bidirectional logistic model for the chaotic
-population dynamics of two interacting species. This system is from [1],
-and is given by
-
-```math
-\\begin{align}
-x(t+1) &= r_1 f_{yx}^{t}(1 - f_{yx}^{t}) \\
-y(t+1) &= r_2 f_{xy}^{t}(1 - f_{xy}^{t}) \\
-f_{xy}^t &= \\dfrac{y(t) + c_{xy}(x(t) + \\sigma_{xy} \\xi_{xy}^t )}{1 + c_{xy} (1 + \\sigma_{xy} )} \\
-f_{yx}^t &= \\dfrac{x(t) + c_{yx}(y(t) + \\sigma_{yx} \\xi_{yx}^t )}{1 + c_{yx} (1 + \\sigma_{yx} )},
-\\end{align}
-```
-
-where the coupling strength ``c_{xy}`` controls how strongly species ``x`` influences species
-``y``, and vice versa for ``c_{yx}``. To simulate time-varying influence of unobserved
-processes, we use the dynamical noise terms ``\\xi_{xy}^t`` and ``\\xi_{yx}^t``, drawn from a
-uniform distribution with support on ``[0, 1]``. If ``\\sigma_{xy} > 0``, then the influence
-of ``x`` on ``y`` is masked by dynamical noise equivalent to ``\\sigma_{xy} \\xi_{xy}^{t}`` at
-the ``t``-th iteration of the map, and vice versa for ``\\sigma_{yx}``.
-
-## References
-
-1. Diego, David, Kristian Agasøster Haaga, and Bjarte Hannisdal. "Transfer entropy computation
-    using the Perron-Frobenius operator." Physical Review E 99.4 (2019): 042212.
-"""
-function eom_logistic2_bidir(dx, x, p, n)
-
-    # c_xy is the coupling from x to y
-    # c_yx is the coupling from y to x
-    # σ_yx is the dynamical noise from y to x
-    # σ_xy is the dynamical noise from y to x
-    c_xy, c_yx, r₁, r₂, σ_xy, σ_yx = (p...,)
-
-    ξ₁ = rand() # random number from flat distribution on [0, 1]
-    ξ₂ = rand() # random number from flat distribution on [0, 1]
-    x, y = x[1], x[2]
-
-    f_xy = (y +  c_xy*(x + σ_xy*ξ₁) ) / (1 + c_xy*(1+σ_xy))
-    f_yx = (x +  c_yx*(y + σ_yx*ξ₂) ) / (1 + c_yx*(1+σ_yx))
-
-    dx[1] = r₁ * (f_yx) * (1 - f_yx)
-    dx[2] = r₂ * (f_xy) * (1 - f_xy)
-    return
-end
-
-function logistic2_bidir(u₀, c_xy, c_yx, r₁, r₂, σ_xy, σ_yx)
-    @warn "`logistic2_bidir` is deprecated in CausalityTools v2. "*
-    "Use `system(Logistic2Bidir())` instead, which returns a "*
-    "`DiscreteDynamicalSystem` that can be iterated."
-    p = @LArray [c_xy, c_yx, r₁, r₂, σ_xy, σ_yx] (:c_xy, :c_yx, :r₁, :r₂, :σ_xy, :σ_yx)
-    DiscreteDynamicalSystem(eom_logistic2_bidir, u₀, p)
-end
-
-"""
-    logistic2_bidir(;u₀ = rand(2), c_xy = 0.1, c_yx = 0.1,
-        r₁ = 3.78, r₂ = 3.66, σ_xy = 0.05, σ_yx = 0.05)
-
-A bidirectional logistic model for the chaotic population dynamics of two interacting
-species [1].
-
-## Equations of motion
-
-The equations of motion are
-
-```math
-\\begin{align}
-x(t+1) &= r_1 f_{yx}^{t}(1 - f_{yx}^{t}) \\\\
-y(t+1) &= r_2 f_{xy}^{t}(1 - f_{xy}^{t}) \\\\
-f_{xy}^t &= \\dfrac{y(t) + c_{xy}(x(t) + \\sigma_{xy} \\xi_{xy}^t )}{1 + c_{xy} (1 + \\sigma_{xy} )} \\\\
-f_{yx}^t &= \\dfrac{x(t) + c_{yx}(y(t) + \\sigma_{yx} \\xi_{yx}^t )}{1 + c_{yx} (1 + \\sigma_{yx} )},
-\\end{align}
-```
-
-where the coupling strength ``c_{xy}`` controls how strongly species ``x`` influences species
-``y``, and vice versa for ``c_{yx}``. To simulate time-varying influence of unobserved
-processes, we use the dynamical noise terms ``\\xi_{xy}^t`` and ``\\xi_{yx}^t``, drawn from a
-uniform distribution with support on ``[0, 1]``. If ``\\sigma_{xy} > 0``, then the influence
-of ``x`` on ``y`` is masked by dynamical noise equivalent to ``\\sigma_{xy} \\xi_{xy}^{t}`` at
-the ``t``-th iteration of the map, and vice versa for ``\\sigma_{yx}``.
-"""
-logistic2_bidir(;u₀ = rand(2), c_xy = 0.1, c_yx = 0.1,
-    r₁ = 3.78, r₂ = 3.66, σ_xy = 0.05, σ_yx = 0.05) =
-    logistic2_bidir(u₀, c_xy, c_yx, r₁, r₂, σ_xy, σ_yx)
-
-    export logistic2_unidir
-
-"""
-    eom_logistic2(dx, x, p, n) → function
-
-Equations of motions for a system consisting of two coupled logistic maps where
-X unidirectionally influences Y [1].
-
-
-## Equations of motion
-
-The equations of motion are
-
-```math
-\\begin{aligned}
-x(t+1) &= r_1 x(t)(1 - x(t)) \\\\
-y(t+1) &= r_2 f(x,y)(1 - f(x,y)),
-\\end{aligned}
-```
-
-with
-
-```math
-\\begin{aligned}
-f(x,y) = \\dfrac{y + \\frac{c_{xy}(x \\xi )}{2}}{1 + \\frac{c_{xy}}{2}(1+ \\sigma )}
-\\end{aligned}
-```
-
-The parameter `c_xy` controls how strong the dynamical forcing is. If `σ > 0`,
-dynamical noise masking the influence of  `x` on `y` equivalent to
-``\\sigma \\cdot \\xi`` is added at each iteration. Here,``\\xi`` is a draw from a
-flat distribution on ``[0, 1]``. Thus, setting `σ = 0.05` is equivalent to
-add dynamical noise corresponding to a maximum of ``5 \\%`` of the possible
-range of values of the logistic map.
-
-1. Diego, David, Kristian Agasøster Haaga, and Bjarte Hannisdal. "Transfer entropy computation
-    using the Perron-Frobenius operator." Physical Review E 99.4 (2019): 042212.
-"""
-
-function eom_logistic2_unidir(dx, x, p, n)
-    c_xy, r₁, r₂, σ = (p...,)
-    ξ = rand() # random number from flat distribution on [0, 1]
-    x, y = x[1], x[2]
-    f_xy = (y +  (c_xy*(x + σ*ξ)/2) ) / (1 + (c_xy/2)*(1+σ))
-
-    dx[1] = r₁ * x * (1 - x)
-    dx[2] = r₂ * (f_xy) * (1 - f_xy)
-    return
-end
-
-function logistic2_unidir(u₀, c_xy, r₁, r₂, σ)
-    @warn "`logistic2_unidir` is deprecated in CausalityTools v2. "*
-    "Use `system(Logistic2Unidir())` instead, which returns a "*
-    "`DiscreteDynamicalSystem` that can be iterated."
-    p = @LArray [c_xy, r₁, r₂, σ] (:c_xy, :r₁, :r₂, :σ)
-    DiscreteDynamicalSystem(eom_logistic2_unidir, u₀, p)
-end
-
-"""
-    logistic2(;u₀ = rand(2), c_xy = 0.1, σ = 0.05,
-        r₁ = 3.78, r₂ = 3.66) → DiscreteDynamicalSystem
-
-Initialise a system consisting of two coupled logistic maps where X
-unidirectionally influences Y. By default, the parameters `r₁` and `r₂` are set
-to values yielding chaotic behaviour.
-
-## Equations of motion
-
-The equations of motion are
-
-```math
-\\begin{aligned}
-x(t+1) &= r_1 x(t)(1 - x(t)) \\\\
-y(t+1) &= r_2 f(x,y)(1 - f(x,y)),
-\\end{aligned}
-```
-
-with
-
-```math
-\\begin{aligned}
-f(x,y) = \\dfrac{y + \\frac{c_{xy}(x \\xi )}{2}}{1 + \\frac{c_{xy}}{2}(1+ \\sigma )}
-\\end{aligned}
-```
-
-The parameter `c_xy` controls how strong the dynamical forcing is. If `σ > 0`,
-dynamical noise masking the influence of  `x` on `y` equivalent to
-``\\sigma \\cdot \\xi`` is added at each iteration. Here,``\\xi`` is a draw from a
-flat distribution on ``[0, 1]``. Thus, setting `σ = 0.05` is equivalent to
-add dynamical noise corresponding to a maximum of ``5 \\%`` of the possible
-range of values of the logistic map.
-
-## References
-
-1. Diego, David, Kristian Agasøster Haaga, and Bjarte Hannisdal. "Transfer entropy computation
-    using the Perron-Frobenius operator." Physical Review E 99.4 (2019): 042212.
-"""
-logistic2_unidir(;u₀ = rand(2), c_xy = 0.1, r₁ = 3.78, r₂ = 3.66, σ = 0.05) =
-    logistic2_unidir(u₀, c_xy, r₁, r₂, σ)
-
-#To get chaotic realisation, check that the orbit doesn't settle to a few unique values
-function good_logistic_unidir_trajectory(npts::Int;
-        Ttr = 1000, dt = 1,
-        c_xy = 0.5,
-        Dr₁ = Uniform(3.6, 4.0),
-        Dr₂ = Uniform(3.6, 4.0),
-        σ = 0.0,
-        n_maxtries = 300)
-
-    n_tries = 0
-    while n_tries <= n_maxtries
-        s = logistic2_unidir(u₀ = rand(2),
-            c_xy = c_xy,
-            σ = σ,
-            r₁ = rand(Dr₁),
-            r₂ = rand(Dr₂))
-
-        o = trajectory(s, npts * dt - 1, Ttr = Ttr, dt = dt)
-
-        # Ensure there are not too many repeated values, so we don't have trivial behaviour
-
-        if length(unique(o[:, 1])) > npts * 0.9 && length(unique(o[:, 2])) > npts * 0.9
-            return o
-        end
-
-        n_tries += 1
-    end
-end
-
-
-export logistic3
-
-"""
-    eom_logistic3(u, p, t)
-
-Equations of motion for a system consisting of three coupled logistic map
-representing the response of two independent dynamical variables to the
-forcing from a common driver. The dynamical influence goes in the directions
-Z → X and Z → Y.
-
-## Equations of motion
-
-The equations of motion are
-
-```math
-\\begin{aligned}
-x(t+1) = (x(t)(r - r_1 x(t) - z(t) + σ_x η_x)) \\mod 1 \\\\
-y(t+1) = (y(t)(r - r_2 y(t) - z(t) + σ_y η_y)) \\mod 1 \\\\
-z(t+1) = (z(t)(r - r_3 z(t) + σ_z η_z)) \\mod 1
-\\end{aligned}
-```
-
-Dynamical noise may be added to each of the dynamical variables by tuning the
-parameters `σz`, `σx` and `σz`. Default values for the parameters
-`r₁`, `r₂` and `r₃` are set such that the system exhibits chaotic behaviour,
-with `r₁ = r₂ = r₃ = 4`.
-
-## References
-
-1. Runge, Jakob. Causal network reconstruction from time series: From theoretical
-    assumptions to practical estimation, Chaos 28, 075310 (2018);
-    doi: 10.1063/1.5025050
-"""
-function eom_logistic3(u, p, t)
-    r₁, r₂, r₃, σx, σy, σz = (p...,)
-    x, y, z = (u...,)
-
-    # Independent dynamical noise for each variable.
-    ηx = rand()
-    ηy = rand()
-    ηz = rand()
-
-    dx = (x*(r₁ - r₁*x - z + σx*ηx)) % 1
-    dy = (y*(r₂ - r₂*y - z + σy*ηy)) % 1
-    dz = (z*(r₃ - r₃*z + σz*ηz)) % 1
-    return SVector{3}(dx, dy, dz)
-end
-
-function logistic3(u₀, r₁, r₂, r₃, σx, σy, σz)
-    @warn "`logistic3` is deprecated in CausalityTools v2. "*
-    "Use `system(Logistic3CommonDriver())` instead, which returns a "*
-    "`DiscreteDynamicalSystem` that can be iterated."
-    p = @LArray [r₁, r₂, r₃, σx, σy, σz] (:r₁, :r₂, :r₃, :σx, :σy, :σz)
-    DiscreteDynamicalSystem(eom_logistic3, u₀, p)
-end
-
-"""
-    logistic3(;u₀ = rand(3), r = 4,
-        σx = 0.05, σy = 0.05, σz = 0.05) → DiscreteDynamicalSystem
-
-Initialise a dynamical system consisting of three coupled logistic map
-representing the response of two independent dynamical variables to the
-forcing from a common driver. The dynamical influence goes in the directions
-``Z \\to X`` and ``Z \\to Y``.
-
-## Equations of motion
-
-The equations of motion are
-
-```math
-\\begin{aligned}
-x(t+1) = (x(t)(r - r_1 x(t) - z(t) + σ_x η_x)) \\mod 1 \\\\
-y(t+1) = (y(t)(r - r_2 y(t) - z(t) + σ_y η_y)) \\mod 1 \\\\
-z(t+1) = (z(t)(r - r_3 z(t) + σ_z η_z)) \\mod 1
-\\end{aligned}
-```
-
-Dynamical noise may be added to each of the dynamical variables by tuning the
-parameters `σz`, `σx` and `σz`. Default values for the parameters
-`r₁`, `r₂` and `r₃` are set such that the system exhibits chaotic behaviour,
-with `r₁ = r₂ = r₃ = 4`.
-
-## References
-
-1. Runge, Jakob. Causal network reconstruction from time series: From theoretical
-    assumptions to practical estimation, Chaos 28, 075310 (2018);
-    doi: 10.1063/1.5025050
-"""
-logistic3(;u₀ = rand(3), r₁ = 4, r₂ = 4, r₃ = 4,
-    σx = 0.05, σy = 0.05, σz = 0.05) = logistic3(u₀, r₁, r₂, r₃, σx, σy, σz)
-
-function eom_logistic4(u, p, t)
-    r₁, r₂, r₃, r₄, c₁₂, c₂₃, c₃₄  = (p...,)
-    y₁, y₂, y₃, y₄ = (u...,)
-
-    dy₁ = y₁*(r₁ - r₁*y₁)
-    dy₂ = y₂*(r₂ - c₁₂*y₁ - r₂*y₂)
-    dy₃ = y₃*(r₃ - c₂₃*y₂ - r₃*y₃)
-    dy₄ = y₄*(r₄ - c₃₄*y₃ - r₄*y₄)
-    return SVector{4}(dy₁, dy₂, dy₃, dy₄)
-end
-
-function logistic4(u₀, r₁, r₂, r₃, r₄, c₁₂, c₂₃, c₃₄)
-    @warn "`logistic4` is deprecated in CausalityTools v2. "*
-    "Use `system(Logistic4Chain())` instead, which returns a "*
-    "`DiscreteDynamicalSystem` that can be iterated."
-    p = @LArray [r₁, r₂, r₃, r₄, c₁₂, c₂₃, c₃₄] (:r₁, :r₂, :r₃, :r₄, :c₁₂, :c₂₃, :c₃₄)
-    DiscreteDynamicalSystem(eom_logistic4, u₀, p)
-end
-
-"""
-    logistic4(;u₀ = rand(4), r₁ = 3.9, r₂ = 3.6, r₃ = 3.6, r₄ = 3.8,
-        c₁₂ = 0.4, c₂₃ = 0.4, c₃₄ = 0.35) → DiscreteDynamicalSystem
-
-Initialise a system of a transitive chain of four unidirectionally coupled
-logistic maps, where ``y_1 \\to y_2 \\to y_3 \\to y_4`` [1]. Default
-parameters are as in [1].
-
-*Note: With the default parameters which are as in [1], for some initial conditions,
-this system wanders off to ``\\pm \\infty`` for some of the variables. Make sure that
-you have a good realisation before using the orbit for anything.*
-
-## Equations of motion
-
-```math
-\\begin{aligned}
-y_1(t+1) &= y_1(t)(r_1 - r_1 y_1) \\\\
-y_2(t+1) &= y_2(t)(r_2 - c_{12} y_1 - r_2 y_2) \\\\
-y_3(t+1) &= y_3(t)(r_3 - c_{23} y_2 - r_3 y_3) \\\\
-y_4(t+1) &= y_4(t)(r_4 - c_{34} y_3 - r_4 y_4)
-\\end{aligned}
-```
-
-## References
-
-1. Ye, Hao, et al. "Distinguishing time-delayed causal interactions using
-    convergent cross mapping." Scientific reports 5 (2015): 14750
-"""
-logistic4(;u₀ = rand(4),
-            r₁ = 3.9, r₂ = 3.6, r₃ = 3.6, r₄ = 3.8,
-            c₁₂ = 0.4, c₂₃ = 0.4, c₃₄ = 0.35) =
-    logistic4(u₀, r₁, r₂, r₃, r₄, c₁₂, c₂₃, c₃₄)
-
-
-export nonlinear3d
-
-"""
-    eom_nonlinear3d(u₀, a₁, a₂, a₃,  b₁, b₂, b₃,
-        c₁₂, c₂₃, c₁₃, σ₁, σ₂, σ₃) → DiscreteDynamicalSystem
-
-Equations of motion for a 3d nonlinear system with nonlinear couplings
-``x_1 \\to x_2``, ``x_2 \\to x_3`` and ``x_1 \\to x_3``. Modified from [1].
-
-## Equations of motion
-
-The equations of motion are
-
-```math
-\\begin{aligned}
-x_1(t+1) &= a_1 x_1 (1-x_1(t))^2  e^{-x_2(t)^2} + 0.4 \\xi_{1}(t) \\\\
-x_2(t+1) &= a_1 x_2 (1-x_2(t))^2  e^{-x_2(t)^2} + 0.4 \\xi_{2}(t) + b x_1 x_2 \\\\
-x_3(t+1) &= a_3 x_3 (1-x_3(t))^2  e^{-x_3(t)^2} + 0.4 \\xi_{3}(t) + c x_{2}(t) + d x_{1}(t)^2.
-\\end{aligned}
-```
-
-## References
-
-1. Gourévitch, B., Le Bouquin-Jeannès, R., & Faucon, G. (2006). Linear and nonlinear
-    causality between signals: methods, examples and neurophysiological
-    applications. Biological Cybernetics, 95(4), 349–369.
-"""
-function eom_nonlinear3d(x, p, n)
-    x₁, x₂, x₃ = (x...,)
-    a₁, a₂, a₃, b₁, b₂, b₃, c₁₂, c₂₃, c₁₃, σ₁, σ₂, σ₃ = (p...,)
-    ξ₁ = rand(Normal(0, σ₁))
-    ξ₂ = rand(Normal(0, σ₂))
-    ξ₃ = rand(Normal(0, σ₃))
-
-    dx₁ = a₁*x₁*(1-x₁)^2 * exp(-x₁^2) + b₁*ξ₁
-    dx₂ = a₂*x₂*(1-x₂)^2 * exp(-x₂^2) + b₂*ξ₂ + c₁₂*x₁*x₂
-    dx₃ = a₃*x₃*(1-x₃)^2 * exp(-x₃^2) + b₃*ξ₃ + c₂₃*x₂ + c₁₃*x₁^2
-
-    return SVector{3}(dx₁, dx₂, dx₃)
-end
-
-function nonlinear3d(u₀, a₁, a₂, a₃,  b₁, b₂, b₃, c₁₂, c₂₃, c₁₃, σ₁, σ₂, σ₃)
-    @warn "`nonlinear3d` is deprecated in CausalityTools v2. "*
-    "Use `system(Nonlinear3())` instead, which returns a "*
-    "`DiscreteDynamicalSystem` that can be iterated."
-    p = @LArray [a₁, a₂, a₃,  b₁, b₂, b₃, c₁₂, c₂₃, c₁₃, σ₁, σ₂, σ₃] (:a₁, :a₂, :a₃,  :b₁, :b₂, :b₃, :c₁₂, :c₂₃, :c₁₃, :σ₁, :σ₂, :σ₃)
-    s = DiscreteDynamicalSystem(eom_nonlinear3d, u₀, p)
-    return s
-end
-
-"""
-    nonlinear3d(;u₀ = rand(3),
-        σ₁ = 1.0, σ₂ = 1.0, σ₃ = 1.0,
-        a₁ = 3.4, a₂ = 3.4, a₃ = 3.4,
-        b₁ = 0.4, b₂ = 0.4, b₃ = 0.4,
-        c₁₂ = 0.5, c₂₃ = 0.3, c₁₃ = 0.5) → DiscreteDynamicalSystem
-
-A 3d nonlinear system with nonlinear couplings ``x_1 \\to x_2``,
-``x_2 \\to x_3`` and ``x_1 \\to x_3``. Modified from [1].
-
-## Equations of motion
-
-The equations of motion are
-
-```math
-\\begin{aligned}
-x_1(t+1) &= a_1 x_1 (1-x_1(t))^2  e^{-x_2(t)^2} + 0.4 \\xi_{1}(t) \\\\
-x_2(t+1) &= a_1 x_2 (1-x_2(t))^2  e^{-x_2(t)^2} + 0.4 \\xi_{2}(t) + b x_1 x_2 \\\\
-x_3(t+1) &= a_3 x_3 (1-x_3(t))^2  e^{-x_3(t)^2} + 0.4 \\xi_{3}(t) + c x_{2}(t) + d x_{1}(t)^2.
-\\end{aligned}
-```
-
-## References
-
-1. Gourévitch, B., Le Bouquin-Jeannès, R., & Faucon, G. (2006). Linear and nonlinear
-    causality between signals: methods, examples and neurophysiological
-    applications. Biological Cybernetics, 95(4), 349–369.
-"""
-nonlinear3d(;u₀ = rand(3),
-        σ₁ = 1.0, σ₂ = 1.0, σ₃ = 1.0,
-        a₁ = 3.4, a₂ = 3.4, a₃ = 3.4,
-        b₁ = 0.4, b₂ = 0.4, b₃ = 0.4,
-        c₁₂ = 0.5, c₂₃ = 0.3, c₁₃ = 0.5) =
-    nonlinear3d(u₀, a₁, a₂, a₃,  b₁, b₂, b₃, c₁₂, c₂₃, c₁₃, σ₁, σ₂, σ₃)
-
-
-function eom_nontrivial_pegiun(u, p, n)
-    n = n + 10
-    O = zeros(Float64, n + 3, 2)
-    x, y = (u...,)
-    p₁, p₂, p₃, p₄, p₅, p₆, σ₁, σ₂ = (p...,)
-
-    # Propagate initial condition to the three first time steps.
-    for i = 1:3
-        O[i, 1] = x
-        O[i, 2] = y
-    end
-    for i = 4:n
-        y1 = O[i-1, 2]
-        x2 = O[i-2, 1]
-        y3 = O[i-3, 2]
-
-        ξ₁ = rand(Normal(0, σ₁))
-        ξ₂ = rand(Normal(0, σ₂))
-        ynew = p₁*y1 + ξ₁
-        xnew = p₂ + p₃*x2 + (p₄ - p₅*y3)/(1 + exp(-p₆*y3)) + ξ₂
-        O[i, 1] = xnew
-        O[i, 2] = ynew
-    end
-    O = O[10+3:end-10, :]
-    O[:, 1] .= O[:, 1] .- mean(O[:, 1])
-    O[:, 2] .= O[:, 2] .- mean(O[:, 2])
-    O[:, 1] .= O[:, 1] ./ std(O[:, 1])
-    O[:, 2] .= O[:, 2] ./ std(O[:, 2])
-    return StateSpaceSet(O)
-end
-
-function nontrivial_pegiun(u₀, p₁, p₂, p₃, p₄, p₅, p₆, σ₁, σ₂, n::Int)
-    @warn "`nontrivial_pegiun` is deprecated in CausalityTools v2. "*
-    "Use `system(Peguin2())` instead, which returns a "*
-    "`DiscreteDynamicalSystem` that can be iterated."
-    p = @LArray [p₁, p₂, p₃, p₄, p₅, p₆, σ₁, σ₂] (:p₁, :p₂, :p₃, :p₄, :p₅, :p₆, :σ₁, :σ₂)
-    eom_nontrivial_pegiun(u₀, p, n)
-end
-
-
-"""
-    nontrivial_pegiun(;u₀ = rand(2), σ₁ = 0.1, σ₂ = 0.1,
-        p₁ = 0.7, p₂ = 0.1, p₃ = 0.4, p₄ = 2.4, p₅ = 0.9, p₆ = 4, n = 100) → StateSpaceSet
-
-A 2D discrete autoregressive system with nonlinear, nontrivial coupling from [1] .
-This system is from [1](https://www.amse-aixmarseille.fr/sites/default/files/_dt/greqam/99a42.pdf), and
-was also studied in [2](https://www.sciencedirect.com/science/article/pii/S0165027002003679).
-The version implemented here allows for tweaking the parameters of the equations.
-The difference equations are
-
-```math
-\\begin{aligned}
-x(t+1) &= p_2 + p_3 x(t-2) + c_{yx}\\dfrac{p_4 - p_5 y(t-3)}{1 + e^{-p_6 y(t-3)}} + \\xi_1(t) \\
-y(t+1) &= p_1 y(t) + \\xi_2(t).
-\\end{aligned}
-```
-Here, ``\\xi_{1,2}(t)`` are two independent normally distributed noise processes
-with zero mean and standard deviations ``\\sigma_1`` and ``\\sigma_2``. The
-``\\xi_{1,2}(t)`` terms represent dynamical noise.
-
-# References
-
-[1] Péguin-Feissolle, A., & Teräsvirta, T. (1999). A General Framework for
-Testing the Granger Noncausaality Hypothesis. Universites d’Aix-Marseille II
-et III. [https://www.amse-aixmarseille.fr/sites/default/files/_dt/greqam/99a42.pdf](https://www.amse-aixmarseille.fr/sites/default/files/_dt/greqam/99a42.pdf)
-
-[2] Chávez, M., Martinerie, J., & Le Van Quyen, M. (2003). Statistical
-assessment of nonlinear causality: application to epileptic EEG signals.
-Journal of Neuroscience Methods, 124(2), 113–128.
-doi:10.1016/s0165-0270(02)00367-9
-[https://www.sciencedirect.com/science/article/pii/S0165027002003679](https://www.sciencedirect.com/science/article/pii/S0165027002003679)
-"""
-function nontrivial_pegiun(;u₀ = rand(2), σ₁ = 0.1, σ₂ = 0.1,
-        p₁ = 0.7, p₂ = 0.1, p₃ = 0.4, p₄ = 2.4, p₅ = 0.9, p₆ = 4, n = 100)
-    eom_nontrivial_pegiun(u₀, [p₁, p₂, p₃, p₄, p₅, p₆, σ₁, σ₂], n)
-end
-
-function eom_ulam(dx, x, p, t)
-    ε = p[:ε]
-    f = x -> 2 - x^2
-    dx[1] = f(ε*x[length(dx)] + (1-ε)*x[1])
-    for i in 2:length(dx)
-        dx[i] = f(ε*x[i-1] + (1-ε)*x[i])
-    end
-end
-
-"""
-    ulam(D::Int = 10; u₀ = rand(D), ε::Real = 0.10) → DiscreteDynamicalSystem
-
-A lattice of `D` unidirectionally coupled ulam maps [Schreiber2000](@cite) defined as
-
-```math
-x^{m}_{t+1} = f(\\epsilon x^{m-1}_{t} + (1 - \\epsilon) x_{t}^{m}),
-```
-
-where ``m = 1, 2, \\ldots, D`` and ``f(x) = 2 - x^2``. In this system, information transfer
-happens only in the direction of increasing ``m``.
-
-"""
-function ulam(D::Int = 10; u₀ = rand(D), ε::Real = 0.10)
-    @warn "`ulam` is deprecated in CausalityTools v2. "*
-    "Use `system(UlamLattice())` instead, which returns a "*
-    "`DiscreteDynamicalSystem` that can be iterated."
-    p = LVector(ε = ε)
-
-    DiscreteDynamicalSystem(eom_ulam, u₀, p)
-end
-
-function eom_var1(x, p, n)
-    σ₁, σ₂, σ₃ = p[1], p[2], p[3]
-    x₁, x₂, x₃ = x[1], x[2], x[3]
-    θ = rand(Normal(0, σ₁))
-    η = rand(Normal(0, σ₂))
-    ϵ = rand(Normal(0, σ₃))
-
-    dx₁ = θ
-    dx₂ = x₁ * η
-    dx₃ = 0.5*x₃ * x₂ + ϵ
-    return SVector{3}(dx₁, dx₂, dx₃)
-end
-
-function var1(u₀, σ₁, σ₂, σ₃)
-    @warn "`var1` is deprecated in CausalityTools v2. "*
-    "Use `system(Var1())` instead, which returns a "*
-    "`DiscreteDynamicalSystem` that can be iterated."
-    p = LVector(ε = ε)
-    p = @LArray [σ₁, σ₂, σ₃] (:σ₁, :σ₂, :σ₃)
-
-    DiscreteDynamicalSystem(eom_var1, u₀, p)
-end
-
-"""
-    var1(x, p, n) → DiscreteDynamicalSystem
-
-Initialise a discrete vector autoregressive system where X₁ → X₂ → X₃.
-"""
-var1(;u₀ = rand(3), σ₁ = 1.0, σ₂ = 0.2, σ₃ = 0.3) = var1(u₀, σ₁, σ₂, σ₃)
-
-function eom_verdes(u, p, t)
-    x, y, z = (u...,)
-    ωy, ωz, σx, σy, σz = (p...,)
-
-    ηx = σx == 0 ? 0 : rand(Normal(0, σx))
-    ηy = σy == 0 ? 0 : rand(Normal(0, σy))
-    ηz = σz == 0 ? 0 : rand(Normal(0, σz))
-
-    dx = y*(18y - 27y^2 + 10)/2 + z*(1-z) + ηx
-    dy = (1 - cos((2*pi/ωy) * t))/2 + ηy
-    dz = (1 - sin((2*pi/ωz) * t))/2 + ηz
-    return SVector{3}(dx, dy, dz)
-end
-
-function verdes(u₀, ωy, ωz, σx, σy, σz)
-    @warn "`verdes` is deprecated in CausalityTools v2. "*
-    "Use `system(Verdes())` instead, which returns a "*
-    "`DiscreteDynamicalSystem` that can be iterated."
-    p = LVector(ωy = ωy, ωz = ωz, σx = σx, σy = σy, σz = σz)
-
-    DiscreteDynamicalSystem(eom_verdes, u₀, p)
-end
-
-"""
-    verdes(;u₀ = rand(3), ωy = 315, ωz = 80,
-        σx = 0.0, σy = 0.0, σz = 0.0) → DiscreteDynamicalSystem
-
-Intitialise a 3D system where the response X is a highly nonlinear combination
-of Y and Z [Verdes2005](@cite). The forcings Y and Z involve sines and cosines, and
-have different periods, which controlled by `ωy` and `ωz`.
-
-The equations of motion are
-
-```math
-\\begin{aligned}
-x(t+1) &= \\dfrac{y(t)(18y(t) - 27y(t)^2 + 10)}{2} + z(t)(1-z(t)) + ηx \\
-y(t+1) &= \\dfrac{(1 - \\dfrac{\\cos(2\\pi)}{\\omega y}t)}{2} + ηy \\
-z(t+1) &= \\dfrac{(1 - \\dfrac{\\sin(2\\pi)}{\\omega z}t)}{2} + ηz
-\\end{aligned}
-```
-where ηx, ηy, ηz is gaussian noise with mean 0 and standard deviation `σx`, `σy`
-and `σz`.
-"""
-verdes(;u₀ = rand(3),
-    ωy = 315, ωz = 80,
-    σx = 0.01, σy = 0.01, σz = 0.01) =
-    verdes(u₀, ωy, ωz, σx, σy, σz)
-
-import Distributions: Distribution, Uniform, Normal
-
-"""
-    noise_uu(n::Int, lo = - 1, hi = 1)
-
-Generate a signal consisting of `n` steps of uncorrelated uniform noise from
-a uniform distribution on `[lo, hi]`.
-"""
-function noise_uu(n::Int; lo = - 1, hi = 1)
-    @warn "`noise_uu` is deprecated in CausalityTools v2. "
-    u = Uniform(-lo, hi)
-    rand(u, n)
-end
-
-
-"""
-    noise_ug(n::Int; μ = 0, σ = 1)
-
-Generate a signal consisting of `n` steps of uncorrelated Gaussian noise from
-a normal distribution with mean `μ` and standard deviation `σ`.
-"""
-function noise_ug(n::Int; μ = 0, σ = 1)
-    @warn "`noise_ug` is deprecated in CausalityTools v2. "
-    d = Normal(μ, σ)
-    rand(d, n)
-end
-
-"""
-    noise_brownian(n::Int; lo = - 1, hi = 1)
-    noise_brownian(d::Distribution, n::Int)
-
-Generate a signal consisting of `n` steps of Brownian noise, generated as
-the zero-mean and unit standard deviation normalised cumulative sum of noise
-generated from a uniform distribution on `[lo, hi]`. Optionally, a distribution
-`d` from which to sample can be provided.
-
-## Examples
-
-```julia
-# Based on uncorrelated uniform noise
-noise_brownian(100)
-noise_brownian(100, lo = -2, hi = 2)
-noise_brownian(Uniform(-3, 3), 100)
-
-# Based on uncorrelated Gaussian noise
-μ, σ = 0, 2
-noise_brownian(Normal(μ, σ), 100)
-```
-"""
-function noise_brownian(n::Int; lo = - 1, hi = 1)
-    @warn "`noise_brownian` is deprecated in CausalityTools v2."
-
-    u = Uniform(lo, hi)
-    xs = cumsum(rand(u, n))
-    (xs .- mean(xs)) ./ std(xs)
-end
-
-function noise_brownian(d::Distribution, n::Int)
-    @warn "`noise_brownian` is deprecated in CausalityTools v2."
-    xs = cumsum(rand(d, n))
-    (xs .- mean(xs)) ./ (std(xs))
-end
-
-export noise_uu, noise_ug, noise_brownian
diff --git a/src/example_systems/discrete/discrete_systems.ipynb b/src/example_systems/discrete/discrete_systems.ipynb
deleted file mode 100644
index 9fe7d835f..000000000
--- a/src/example_systems/discrete/discrete_systems.ipynb
+++ /dev/null
@@ -1,198 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32m\u001b[1m  Activating\u001b[22m\u001b[39m project at `~/Code/Repos/Temp/CausalityTools.jl`\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "WARNING: using NearestNeighbors.inrangecount in module CausalityTools conflicts with an existing identifier.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "WARNING: Method definition eom_henon_triple(Any, Any, Any) in module CausalityTools at /Users/work/Code/Repos/Temp/CausalityTools.jl/src/example_systems/discretemaps/henontriple.jl:19 overwritten at /Users/work/Code/Repos/Temp/CausalityTools.jl/src/example_systems/discretemaps/henontriple.jl:23.\n",
-      "  ** incremental compilation may be fatally broken for this module **\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "#using Pkg; Pkg.activate(\"../../../\")\n",
-    "using Revise, CausalityTools"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 61,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "update_state! (generic function with 2 methods)"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "using StaticArrays: MVector\n",
-    "\"\"\"\n",
-    "    Henon3() <: DiscreteDefinition\n",
-    "    Henon3(; a = 0.1, b = 0.3, c = 0.1, xi = [0.1, 0.2, 0.3])\n",
-    "\n",
-    "The `Henon3` system is a lagged discrete dynamical system. The initial condition\n",
-    "`xi` is repeated over the three first time steps before iteration starts.\n",
-    "\"\"\"\n",
-    "struct Henon33333{T, S, A, B, C} <: DiscreteDefinition\n",
-    "    m₁::MVector{2, T} # holds past states of x1\n",
-    "    m₂::MVector{2, T} # holds past states of x2\n",
-    "    m₃::MVector{2, T} # holds past states of x3\n",
-    "    xi::S\n",
-    "    a::A\n",
-    "    b::B\n",
-    "    c::C\n",
-    "\n",
-    "    function Henon33333(; a::A = 1.4, b::B = 0.3, c::C = 0.1, \n",
-    "            xi::S = [0.4, 0.5, 0.6]) where {A, B, C, S}\n",
-    "        T = eltype(1.0)\n",
-    "        m₁ = MVector{2, T}(repeat([xi[1]], 2))      \n",
-    "        m₂ = MVector{2, T}(repeat([xi[2]], 2))        \n",
-    "        m₃ = MVector{2, T}(repeat([xi[3]], 2))        \n",
-    "        return new{T, S, A, B, C}(m₁, m₂, m₃, xi, a, b, c)\n",
-    "    end\n",
-    "end\n",
-    "\n",
-    "function system(s::Henon33333)\n",
-    "    return DiscreteDynamicalSystem(eom_henon33333, s.xi, s)\n",
-    "end\n",
-    "\n",
-    "function eom_henon33333(u, p::Henon33333, t)\n",
-    "    # `u` is simply ignored here, because the state is stored in the memory vectors\n",
-    "    m₁, m₂, m₃ = p.m₁, p.m₂, p.m₃\n",
-    "    x₁₁, x₁₂ = m₁[1], m₁[2]\n",
-    "    x₂₁, x₂₂ = m₂[1], m₂[2]\n",
-    "    x₃₁, x₃₂ = m₃[1], m₃[2]\n",
-    "\n",
-    "    a, b, c = p.a, p.b, p.c\n",
-    "    dx₁= a - x₁₁^2 + b*x₁₂\n",
-    "    dx₂= a - c*x₁₁*x₂₁ - (1 - c)*x₂₁^2 + b*x₂₂\n",
-    "    dx₃= a - c*x₂₁*x₃₁ - (1 - c)*x₃₁^2 + b*x₃₂\n",
-    "\n",
-    "    new_state = SVector{3}(dx₁, dx₂, dx₃)\n",
-    "    update_state!(p, new_state) # Update memory vectors\n",
-    "    return new_state\n",
-    "end\n",
-    "\n",
-    "function update_state!(p::Henon33333, xnew::SVector{3})\n",
-    "    p.m₁[2] = p.m₁[1]\n",
-    "    p.m₁[1] = xnew[1]\n",
-    "    p.m₂[2] = p.m₂[1]\n",
-    "    p.m₂[1] = xnew[2]\n",
-    "    p.m₃[2] = p.m₃[1]\n",
-    "    p.m₃[1] = xnew[3]\n",
-    "end\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 62,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "3-dimensional discrete dynamical system\n",
-       " state:       [0.4, 0.5, 0.6]\n",
-       " rule f:      eom_henon33333\n",
-       " in-place?    false\n",
-       " jacobian:    ForwardDiff\n",
-       " parameters:  Henon33333{Float64, Vector{Float64}, Float64, Float64, Float64}([-0.3296, 1.36], [-0.160202, 1.305], [0.0672386, 1.226], [0.4, 0.5, 0.6], 1.4, 0.3, 0.1)"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "sys = system(Henon33333());"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 66,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "3-dimensional StateSpaceSet{Float64} with 1001 points\n",
-       "  0.4        0.5        0.6\n",
-       "  1.62032    0.733367   1.61086\n",
-       " -1.12199    1.06447   -1.22586\n",
-       "  0.627231   0.719658   0.661283\n",
-       "  0.669983   1.20809    0.591086\n",
-       "  1.13929    0.221434   1.21253\n",
-       "  0.30301    1.69307    0.227264\n",
-       "  1.64997   -1.1647     1.6788\n",
-       " -1.23151    0.879215  -0.872819\n",
-       "  0.378384   0.463148   1.29475\n",
-       "  ⋮                    \n",
-       "  0.781281   0.533008   1.78612\n",
-       "  1.07028    1.31244   -1.53514\n",
-       "  0.488876  -0.130807   0.0163228\n",
-       "  1.48209    1.78473    0.939432\n",
-       " -0.649913  -1.77048    0.442955\n",
-       "  1.42224   -1.00078    1.58367\n",
-       " -0.817736   0.109789  -0.565819\n",
-       "  1.15798    1.0979     1.59318\n",
-       " -0.186237   0.220965  -1.22905"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "trajectory(sys, 1000)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Julia 1.8.4",
-   "language": "julia",
-   "name": "julia-1.8"
-  },
-  "language_info": {
-   "file_extension": ".jl",
-   "mimetype": "application/julia",
-   "name": "julia",
-   "version": "1.8.4"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/src/example_systems/example_systems.jl b/src/example_systems/example_systems.jl
deleted file mode 100644
index a19d522f6..000000000
--- a/src/example_systems/example_systems.jl
+++ /dev/null
@@ -1,33 +0,0 @@
-include("api.jl")
-
-include("discrete/deprecate.jl")
-include("discrete/AR1Unidir.jl")
-include("discrete/AR1Bidir.jl")
-include("discrete/Anishchenko.jl")
-include("discrete/ChaoticMaps3.jl")
-include("discrete/Henon2.jl")
-include("discrete/Henon3.jl")
-include("discrete/Ikeda2.jl")
-include("discrete/ChaoticNoisyLinear2.jl")
-include("discrete/Nonlinear3.jl")
-include("discrete/Peguin2.jl")
-include("discrete/Logistic2Unidir.jl")
-include("discrete/Logistic2Bidir.jl")
-include("discrete/Logistic3CommonDriver.jl")
-include("discrete/Logistic4Chain.jl")
-include("discrete/UlamLattice.jl")
-include("discrete/Var1.jl")
-include("discrete/Verdes3.jl")
-
-include("continuous/ChuaCircuitsBidir6.jl")
-include("continuous/ChuaScrollSine3.jl")
-include("continuous/HindmarshRose3.jl")
-include("continuous/MediatedLink9.jl")
-include("continuous/LorenzForced9.jl")
-include("continuous/LorenzTransitive9.jl")
-include("continuous/LorenzBidir6.jl")
-include("continuous/Repressilator6.jl")
-include("continuous/RosslerBidir6.jl")
-include("continuous/RosslerForced9.jl")
-include("continuous/RosslerLorenzUnidir6.jl")
-include("continuous/Thomas3.jl")
diff --git a/src/independence_tests/bbnue/bbnue.jl b/src/independence_tests/bbnue/bbnue.jl
new file mode 100644
index 000000000..21eeb1b4d
--- /dev/null
+++ b/src/independence_tests/bbnue/bbnue.jl
@@ -0,0 +1,165 @@
+# TODO: test how this works with 
+export BootstrapBasedNonUniformEmbeddingTest
+export MostInformativeForwardSearch
+export forward_search
+
+"""
+    BootstrapBasedNonUniformEmbeddingTest(;
+        measure_pairwise = MIShannon();
+        measure_cond
+    )
+
+"""
+Base.@kwdef struct BootstrapBasedNonUniformEmbeddingTest{M} <: IndependenceTest{M}
+    measure_pairwise::M = MIShannon()
+    measure_cond::M = CMIShannon()
+    max_dim::Int = 2
+    max_τ::Int = 3
+    include_zerolag::Bool = false
+end
+
+function prepare_embedding_lags_and_indices(test, x, y, 𝒵::AbstractStateSpaceSet)
+    min_τ = test.include_zerolag ? 0 : 1
+    τs_x = min_τ:test.max_τ
+    τs_y = min_τ:test.max_τ
+    τs = min_τ:test.max_τ
+    n_lags = length(τs )
+    τs_z = vcat([collect(τs) for i = 3:3+dimension(𝒵)-1]...,)
+
+    # Variables are enumerated according to their input order.
+    # e.g. x = 1, y = 2, z[:, 1] = 3, z[:, 2] = 4, and so on.
+    js_x = repeat([1], n_lags)
+    js_y = repeat([2], n_lags)
+    js_z = vcat([repeat([i], n_lags) for i = 3:3+dimension(𝒵)-1]...,)
+
+    τs = [τs_x; τs_y; τs_z]
+    js = [js_x; js_y; js_z]
+    return τs, js
+end
+
+function joint_embedding(test::BootstrapBasedNonUniformEmbeddingTest, x, y, 𝒵)
+    joint_dataset = [x y Matrix(z)]
+    τs, js = prepare_embedding_lags_and_indices(test, x, y, 𝒵)
+
+    # Construct the candidate set.
+    𝒞 = genembed(Dataset(joint_dataset), τs, js)
+end
+
+function nue(test::BootstrapBasedNonUniformEmbeddingTest, 
+        x::AbstractVector{T}, 
+        y::AbstractVector{T}, 
+        𝒵::AbstractDataset{D, T}) where {D, T}
+
+    𝒮 = Vector{T}[]
+    
+end
+
+# Notation:
+# 𝒮: selected variables
+# 𝒞: candidate variables
+
+function k_forward_search!(test::BootstrapBasedNonUniformEmbeddingTest, 𝒞, 𝒮, k::Int)
+    if k == 0
+    end
+end
+
+function forward_search!(test::BootstrapBasedNonUniformEmbeddingTest, 𝒞, 𝒮)
+
+end
+
+# TODO: provide estimator, not measure
+"""
+    MostInformativeForwardSearch(;
+        measure_pairwise = MIShannon(),
+        measure_cond = CMIShannon(),
+        n_reps = 100,
+        α = 0.05
+    )
+
+The forward search part of the BBNUE algorithm from Baboukani et al. (2020).
+"""
+Base.@kwdef struct MostInformativeForwardSearch{MP, MC}
+    measure_pairwise::MP = GaussianMI(MIShannon())
+    measure_cond::MC = GaussianCMI(CMIShannon())
+    n_reps = 100
+    α = 0.05
+end
+
+abstract type StoppingCriterion end
+struct BootstrapBasedStopping <: StoppingCriterion end 
+
+"""
+    forward_search!(test::MostInformativeSearch, target, 𝒞, 𝒮)
+
+Given a `target` variable, a set of candidate variables `𝒞` and a set of 
+(initially empty) selected variables, apply the `test` and do the search.
+
+`idxs_𝒞` and `idxs_𝒮` keep track of the indices of the selected/candidate variables,
+in terms of their input order.
+"""
+function forward_search(search::MostInformativeForwardSearch, target, candidates)
+    # Initialize empty set of selected variables
+    𝒮 = eltype(candidates)[]
+
+    # Make a copy of the input candidates, so that we don't remove any of the original
+    # data.
+    𝒞 = StateSpaceSet.(deepcopy(candidates))
+
+    k = 0
+    terminate = false
+    idxs_𝒞 = collect(1:length(𝒞))
+    idxs_𝒮 = Int[]
+    @show idxs_𝒞, idxs_𝒮
+
+    while !terminate
+        kth_forward_search!(search, target, 𝒞, 𝒮, idxs_𝒞, idxs_𝒮, k)
+        # Shuffle target and shuffle the *last* selected variable
+        k += 1 
+        if k >= length(candidates)
+            terminate = true
+        end
+    end
+
+    return idxs_𝒞, idxs_𝒮
+end
+
+function kth_forward_search!(search::MostInformativeForwardSearch, target, 𝒞, 𝒮, idxs_𝒞, idxs_𝒮, k)
+    if k == 0
+        forward_search_pairwise!(search, target, 𝒞, 𝒮, idxs_𝒞, idxs_𝒮)
+    else
+        forward_search_conditional!(search, target, 𝒞, 𝒮, idxs_𝒞, idxs_𝒮)  
+    end
+end
+
+# Perform a single forward search with the pairwise measure
+function forward_search_pairwise!(search::MostInformativeForwardSearch, target, 
+        𝒞, 𝒮, idxs_𝒞, idxs_𝒮)
+    ℳ = search.measure_pairwise
+    associations = [CausalityTools.estimate(ℳ, target, 𝒞[i]) for i in eachindex(𝒞)]
+    @show associations
+
+    𝒜, idx_most_informative_variable = findmax(associations)
+
+    update_variables!(idx_most_informative_variable, 𝒞, 𝒮, idxs_𝒞, idxs_𝒮)
+end
+
+# Perform a single forward search with the conditional measure
+function forward_search_conditional!(search::MostInformativeForwardSearch, target, 
+        𝒞, 𝒮, idxs_𝒞, idxs_𝒮)
+    ℳ = search.measure_cond
+    s_concatenated = StateSpaceSet(𝒮...)
+
+    associations = [CausalityTools.estimate(ℳ, target, 𝒞[i], s_concatenated) for i in eachindex(𝒞)]
+    @show associations
+    𝒜, idx_most_informative_variable = findmax(associations)
+
+    return update_variables!(idx_most_informative_variable, 𝒞, 𝒮, idxs_𝒞, idxs_𝒮)
+end
+
+function update_variables!(idx_most_informative_variable, 𝒞, 𝒮, idxs_𝒞, idxs_𝒮)
+    push!(𝒮, 𝒞[idx_most_informative_variable])
+    push!(idxs_𝒮, idx_most_informative_variable)
+    deleteat!(𝒞, idx_most_informative_variable)
+    deleteat!(idxs_𝒞, idx_most_informative_variable)
+    @show idxs_𝒞, idxs_𝒮
+end
\ No newline at end of file
diff --git a/src/independence_tests/independence.jl b/src/independence_tests/independence.jl
index 6d3face39..2a727cc88 100644
--- a/src/independence_tests/independence.jl
+++ b/src/independence_tests/independence.jl
@@ -1,6 +1,7 @@
 export independence, cit
 export IndependenceTest
 
+using Statistics: quantile
 
 """
     IndependenceTest <: IndependenceTest
@@ -23,9 +24,10 @@ Returns a test `summary`, whose type depends on `test`.
 
 ## Compatible independence tests
 
-- [`SurrogateTest`](@ref).
-- [`LocalPermutationTest`](@ref).
-- [`JointDistanceDistributionTest`](@ref).
+- [`SurrogateAssociationTest`](@ref)
+- [`LocalPermutationTest`](@ref)
+- [`JointDistanceDistributionTest`](@ref)
+- [`CorrTest`](@ref)
 """
 function independence(test::IndependenceTest, x...)
     L = length(x)
@@ -65,8 +67,8 @@ function null_hypothesis_text(test::IndependenceTestResult)
     if test.n_vars == 2
         return """\
         ---------------------------------------------------------------------
-        H₀: "The first two variables are independent"
-        Hₐ: "The first two variables are dependent"
+        H₀: "The variables are independent"
+        Hₐ: "The variables are dependent"
         ---------------------------------------------------------------------\
         """
     elseif test.n_vars == 3
@@ -81,7 +83,6 @@ function null_hypothesis_text(test::IndependenceTestResult)
     end
 end
 include("parametric/parametric.jl")
+include("surrogate/SurrogateAssociationTest.jl")
 include("local_permutation/LocalPermutationTest.jl")
-include("surrogate/SurrogateTest.jl")
-# TODO: rename/find suitable generic name before including
-# include("correlation/correlation.jl). Perhaps `ParametricTest`?
+#include("parametric/parametric.jl")
diff --git a/src/independence_tests/local_permutation/LocalPermutationTest.jl b/src/independence_tests/local_permutation/LocalPermutationTest.jl
index 6f193c06a..1137acfb4 100644
--- a/src/independence_tests/local_permutation/LocalPermutationTest.jl
+++ b/src/independence_tests/local_permutation/LocalPermutationTest.jl
@@ -95,9 +95,8 @@ The nearest-neighbor approach in Runge (2018) can be reproduced by using the
 - [Example using `CMIShannon`](@ref example_localpermtest_cmishannon).
 - [Example using `TEShannon`](@ref example_localpermtest_teshannon).
 """
-struct LocalPermutationTest{M, EST, C, R} <: IndependenceTest{M}
-    measure::M
-    est::EST
+struct LocalPermutationTest{M, C, R} <: IndependenceTest{M}
+    est_or_measure::M
     rng::R
     kperm::Int
     nshuffles::Int
@@ -106,22 +105,21 @@ struct LocalPermutationTest{M, EST, C, R} <: IndependenceTest{M}
     w::Int # Theiler window
     show_progress::Bool
 end
-function LocalPermutationTest(measure::M, est::EST = nothing;
+function LocalPermutationTest(est_or_measure::M;
         rng::R = Random.default_rng(),
         kperm::Int = 10,
         replace::Bool = true,
         nshuffles::Int = 100,
         closeness_search::C = NeighborCloseness(),
-        w::Int = 0, show_progress = false) where {M, EST, C, R}
-    return LocalPermutationTest{M, EST, C, R}(measure, est, rng, kperm, nshuffles, replace, closeness_search, w, show_progress)
+        w::Int = 0, show_progress = false) where {M, C, R}
+    return LocalPermutationTest{M, C, R}(est_or_measure, rng, kperm, nshuffles, replace, closeness_search, w, show_progress)
 end
 
 Base.show(io::IO, test::LocalPermutationTest) = print(io,
     """
     `LocalPermutationTest` independence test.
     -------------------------------------
-    measure:    $(test.measure)
-    estimator:  $(test.est)
+    measure/est:$(test.est_or_measure)
     rng:        $(test.rng)
     # shuffles: $(test.nshuffles)
     k (perm)    $(test.kperm)
@@ -161,15 +159,14 @@ end
 # should be done for the NN-based CMI methods, so we don't have to reconstruct
 # KD-trees and do marginal searches for all marginals all the time.
 function independence(test::LocalPermutationTest, x, y, z)
-    measure, est, nshuffles = test.measure, test.est, test.nshuffles
-
+    est_or_measure, nshuffles = test.est_or_measure, test.nshuffles
     # Make sure that the measure is compatible with the input data.
-    verify_number_of_inputs_vars(measure, 3)
+    verify_number_of_inputs_vars(est_or_measure, 3)
 
     X, Y, Z = StateSpaceSet(x), StateSpaceSet(y), StateSpaceSet(z)
     @assert length(X) == length(Y) == length(Z)
-    Î = estimate(measure, est, X, Y, Z)
-    Îs = permuted_Îs(X, Y, Z, measure, est, test)
+    Î = association(est_or_measure, X, Y, Z)
+    Îs = permuted_Îs(X, Y, Z, est_or_measure, test)
     p = count(Î .<= Îs) / nshuffles
     return LocalPermutationTestResult(3, Î, Îs, p, nshuffles)
 end
@@ -177,7 +174,7 @@ end
 # This method takes `measure` and `est` explicitly, because for some measures
 # like `TEShannon`, `test.measure` may be converted to some other measure before
 # computing the test statistic.
-function permuted_Îs(X, Y, Z, measure, est, test)
+function permuted_Îs(X, Y, Z, est_or_measure, test)
     rng, kperm, nshuffles, replace, w = test.rng, test.kperm, test.nshuffles, test.replace, test.w
     progress = ProgressMeter.Progress(nshuffles;
         desc = "LocalPermutationTest:",
@@ -198,7 +195,7 @@ function permuted_Îs(X, Y, Z, measure, est, test)
         else
             shuffle_without_replacement!(X̂, X, idxs_z, kperm, rng, Nᵢ, πs)
         end
-        Îs[n] = estimate(measure, est, X̂, Y, Z)
+        Îs[n] = association(est_or_measure, X̂, Y, Z)
         ProgressMeter.next!(progress)
     end
 
@@ -228,5 +225,9 @@ function shuffle_without_replacement!(X̂, X, idxs, kperm, rng, Nᵢ, πs)
     end
 end
 
-# Concrete implementations
+
+function LocalPermutationTest(m::MultivariateInformationMeasure; kwargs...)
+    throw(ArgumentError("You need to provide an estimator for the multivariate information measure $(typeof(m)), not only the definition."))
+end
+# TODO: fix this
 include("transferentropy.jl")
diff --git a/src/independence_tests/local_permutation/transferentropy.jl b/src/independence_tests/local_permutation/transferentropy.jl
index ad6c02a66..436378e05 100644
--- a/src/independence_tests/local_permutation/transferentropy.jl
+++ b/src/independence_tests/local_permutation/transferentropy.jl
@@ -1,38 +1,51 @@
 using Random: shuffle
 using StatsBase: sample
+using Setfield
 
-function LocalPermutationTest(measure::TransferEntropy, est::Nothing, args...; kwargs...)
-    txt = "A valid estimator must be provided as second argument to "*
-    "`LocalPermutationTest` when using the `TEShannon` measure.\n" *
-        "Do e.g. LocalPermutationTest(TEShannon(), FPVP())"
-    throw(ArgumentError(txt))
-end
-
-function independence(test::LocalPermutationTest{<:TransferEntropy{<:E}}, x::AbstractVector...) where E
-    measure, est, nshuffles = test.measure, test.est, test.nshuffles
+# function LocalPermutationTest(measure::TransferEntropy, est::Nothing, args...; kwargs...)
+#     txt = "A valid estimator must be provided as second argument to "*
+#     "`LocalPermutationTest` when using the `TEShannon` measure.\n" *
+#         "Do e.g. LocalPermutationTest(TEShannon(), FPVP())"
+#     throw(ArgumentError(txt))
+# end
 
+function independence(test::LocalPermutationTest{<:MultivariateInformationMeasureEstimator{<:TransferEntropy}}, x::AbstractVector...)
+    est_or_measure, nshuffles = deepcopy(test.est_or_measure), test.nshuffles
     if !(length(x) == 3) && est isa TransferEntropyEstimator
         msg = "`LocalPermutationTest` is not defined for pairwise transfer entropy with " *
         " `TransferEntropyEstimators`. " * 
             "Either provide a third timeseries to condition on, or use some other estimator."
         throw(ArgumentError(msg))
     end
+
+    def = est_or_measure.definition
     # Below, the T variable also includes any conditional variables.
-    S, T, T⁺, C = individual_marginals_te(measure.embedding, x...)
+    S, T, T⁺, C = individual_marginals_te(def.embedding, x...)
     TC = StateSpaceSet(T, C)
     @assert length(T⁺) == length(S) == length(TC)
     N = length(x)
 
-    if est isa TransferEntropyEstimator
-        Î = estimate(measure, est, S, T, T⁺, C)
-        Îs = permuted_Îs_te(S, T, T⁺, C, measure, est, test)
-    else
-        X, Y = S, T⁺ # The source marginal `S` is the one being shuffled.
-        Z = TC # The conditional variable
-        cmi = te_to_cmi(measure)
-        Î = estimate(cmi, est, X, Y, Z)
-        Îs = permuted_Îs(X, Y, Z, cmi, est, test)
-    end
+    X, Y = S, T⁺ # The source marginal `S` is the one being shuffled.
+    Z = TC # The conditional variable
+    est = convert_to_cmi_estimator(est_or_measure)
+   
+    Î = association(est, X, Y, Z)
+    # works until here.
+
+    Îs = permuted_Îs_te(S, T, T⁺, C, est, test)
+    # TODO: make compatible with TransferEntropyEstimators.
+    # THis requires a new permuted_Îs_te dedicated for that.
+    # if est_or_measure isa TransferEntropyEstimator
+    #     # @show "lll"
+    #     # @show "heyo"
+    #     # Î = association(est_or_measure, S, T, T⁺, C)
+    #             #Îs = permuted_Îs_te(S, T, T⁺, C, est_or_measure, test)
+
+    #     X, Y = S, T⁺ # The source marginal `S` is the one being shuffled.
+    #     Z = TC # The conditional variable
+    #     Îs = permuted_Îs(X, Y, Z, est_or_measure, test)
+
+    # end
 
     p = count(Î .<= Îs) / nshuffles
     return LocalPermutationTestResult(length(x), Î, Îs, p, nshuffles)
@@ -43,7 +56,7 @@ end
 # the source marginal `S` is shuffled according to local closeness in the 
 # conditional marginal `C`. The `T` and `T⁺` marginals (i.e. all information)
 # about the target variable is left untouched.
-function permuted_Îs_te(S, T, T⁺, C, measure::TransferEntropy, est, test)
+function permuted_Îs_te(S, T, T⁺, C, est_or_measure, test)
     rng, kperm, nshuffles, replace, w = test.rng, test.kperm, test.nshuffles, test.replace, test.w
     progress = ProgressMeter.Progress(nshuffles;
         desc = "LocalPermutationTest:",
@@ -67,7 +80,7 @@ function permuted_Îs_te(S, T, T⁺, C, measure::TransferEntropy, est, test)
         else
             shuffle_without_replacement!(Ŝ, S, idxs_C, kperm, rng, Nᵢ, πs)
         end
-        Îs[n] = estimate(measure, est, Ŝ, T, T⁺, C)
+        Îs[n] = association(est_or_measure, Ŝ, T, T⁺, C)
         ProgressMeter.next!(progress)
     end
     return Îs
diff --git a/src/independence_tests/parametric/CorrTest.jl b/src/independence_tests/parametric/CorrTest.jl
index 961525007..f5233d5dd 100644
--- a/src/independence_tests/parametric/CorrTest.jl
+++ b/src/independence_tests/parametric/CorrTest.jl
@@ -1,6 +1,7 @@
 export CorrTest
 export CorrTestResult
-
+using Distributions: Normal
+using StateSpaceSets
 import HypothesisTests: pvalue
 
 # Note: HypothesisTests already defines CorrelationTest.
@@ -99,7 +100,7 @@ end
 const VectorOr1D{D} = Union{AbstractVector, AbstractDataset{D}} where D
 function independence(test::CorrTest, x::VectorOr1D, y::VectorOr1D, z::ArrayOrStateSpaceSet...)
     if isempty(z)
-        ρ = estimate(PearsonCorrelation(), x, y)
+        ρ = association(PearsonCorrelation(), x, y)
         z = fishers_z(ρ)
         pval = pvalue(test, z, 0, length(x))
         return CorrTestResult(ρ, z, pval)
@@ -108,8 +109,8 @@ function independence(test::CorrTest, x::VectorOr1D, y::VectorOr1D, z::ArrayOrSt
         # redundant, but we need the dimension of Z, so some code duplication occurs here.
         X, Y, Z = construct_partialcor_datasets(x, y, z...)
         D = StateSpaceSet(X, Y, Z)
-        cov = fastcov(D)
-        precision_matrix = invert_cov(cov)
+        cov_matrix = cov(D)
+        precision_matrix = invert_cov(cov_matrix)
         ρ = partial_correlation_from_precision(precision_matrix, 1, 2)
         z = fishers_z(ρ)
         pval = pvalue(test, z, dimension(Z), length(x))
diff --git a/src/independence_tests/parametric/JointDistanceDistributionTest.jl b/src/independence_tests/parametric/JointDistanceDistributionTest.jl
index f953152a4..8a38056b6 100644
--- a/src/independence_tests/parametric/JointDistanceDistributionTest.jl
+++ b/src/independence_tests/parametric/JointDistanceDistributionTest.jl
@@ -97,7 +97,7 @@ end
 
 
 function independence(test::JointDistanceDistributionTest, x, y)
-    Δjdd = jdd(test.measure, x, y)
+    Δjdd = association(test.measure, x, y)
 
     # Right-sided t-test
     t = t_statistic(Δjdd, hypothetical_μ = test.measure.μ)
diff --git a/src/independence_tests/parametric/PATest.jl b/src/independence_tests/parametric/PATest.jl
index cd7d4ea2b..725ae88e5 100644
--- a/src/independence_tests/parametric/PATest.jl
+++ b/src/independence_tests/parametric/PATest.jl
@@ -23,7 +23,7 @@ end
 """
     PATestResult(n_vars, ΔA, ttest, pvalue)
 
-Holds the result of a [`SurrogateTest`](@ref). `n_vars` is the number of variables
+Holds the result of a [`SurrogateAssociationTest`](@ref). `n_vars` is the number of variables
 used for the test (2 for pairwise, 3 for conditional). `ΔA` is the distribution of
 asymmetries, one for each `η`. `ttest` is a one-sample t-test, and `pvalue` is the
 right-tailed p-value for the test.
diff --git a/src/independence_tests/parametric/parametric.jl b/src/independence_tests/parametric/parametric.jl
index 5c158de97..f2a110cdb 100644
--- a/src/independence_tests/parametric/parametric.jl
+++ b/src/independence_tests/parametric/parametric.jl
@@ -27,6 +27,6 @@ function fishers_z(p̂)
 end
 
 include("JointDistanceDistributionTest.jl")
-include("PredictiveAsymmetryTest.jl")
-include("PATest.jl")
+#include("PredictiveAsymmetryTest.jl")
+#include("PATest.jl")
 include("CorrTest.jl")
diff --git a/src/independence_tests/surrogate/SurrogateAssociationTest.jl b/src/independence_tests/surrogate/SurrogateAssociationTest.jl
new file mode 100644
index 000000000..e63daed34
--- /dev/null
+++ b/src/independence_tests/surrogate/SurrogateAssociationTest.jl
@@ -0,0 +1,191 @@
+using Random
+using TimeseriesSurrogates
+import ProgressMeter
+export SurrogateAssociationTest
+export SurrogateAssociationTestResult
+import Statistics: quantile
+
+"""
+    SurrogateAssociationTest <: IndependenceTest
+    SurrogateAssociationTest(est_or_measure;
+        nshuffles::Int = 100,
+        surrogate = RandomShuffle(),
+        rng = Random.default_rng(),
+        show_progress = false,
+    )
+
+A generic (conditional) independence test for assessing whether two variables `X` and `Y`
+are independendent, potentially conditioned on a third variable `Z`, based on
+surrogate data.
+
+## Usage 
+
+- Use with [`independence`](@ref) to perform a surrogate test with input data. This will
+    return a [`SurrogateAssociationTestResult`](@ref).
+
+## Description
+
+This is a generic one-sided hypothesis test that checks whether `x` and `y`
+are independent (given `z`, if provided) based on resampling from a null distribution
+assumed to represent independence between the variables. The null distribution is generated
+by repeatedly shuffling the input data in some way that is intended
+to break any dependence between the input variables.
+
+The test first estimates the desired statistic using `est_or_measure` on the input data. 
+Then, the first input variable is shuffled `nshuffled` times according to the given 
+`surrogate` method (each type of `surrogate` represents a distinct null hypothesis).
+For each shuffle, `est_or_measure` is recomputed and the results are stored. 
+
+- If [`TransferEntropy`](@ref) measure such as [`TEShannon`](@ref),
+    then the source variable is always shuffled, and the target and conditional
+    variable are left unshuffled.
+
+## Compatible estimators/measures
+
+| Measure                       | Pairwise | Conditional |
+| ----------------------------- | :------: | :---------: |
+| [`PearsonCorrelation`](@ref)  |    ✓    |     ✖      |
+| [`DistanceCorrelation`](@ref) |    ✓    |     ✓      |
+| [`SMeasure`](@ref)            |    ✓    |     ✖      |
+| [`HMeasure`](@ref)            |    ✓    |     ✖      |
+| [`MMeasure`](@ref)            |    ✓    |     ✖      |
+| [`LMeasure`](@ref)            |    ✓    |     ✖      |
+| [`PairwiseAsymmetricInference`](@ref) |    ✓    |     ✖      |
+| [`ConvergentCrossMapping`](@ref)      |    ✓    |     ✖      |
+| [`MIShannon`](@ref)                   |    ✓    |     ✖      |
+| [`MIRenyiJizba`](@ref)                |    ✓    |     ✖      |
+| [`MIRenyiSarbu`](@ref)                |    ✓    |     ✖      |
+| [`MITsallisMartin`](@ref)             |    ✓    |     ✖      |
+| [`MITsallisFuruichi`](@ref)           |    ✓    |     ✖      |
+| [`PartialCorrelation`](@ref)          |    ✖    |     ✓      |
+| [`CMIShannon`](@ref)                  |    ✖    |     ✓      |
+| [`CMIRenyiJizba`](@ref)               |    ✖    |     ✓      |
+| [`PMI`](@ref)                         |    ✖    |     ✓      |
+| [`TEShannon`](@ref)                   |    ✓    |     ✓      |
+| [`TERenyiJizba`](@ref)                |    ✓    |     ✓      |
+
+## Examples
+
+```julia
+using CausalityTools
+using Random
+rng = Xoshiro(1234)
+x = rand(rng, 3000);
+y = rand(rng, 3000) .* circshift(x, 1);
+z = rand(rng, 3000) .* circshift(y, 1);
+
+est = EntropyDecomposition(TEShannon(base = 2), Lord(k = 20))
+test = SurrogateAssociationTest(est)
+independence(test, x, z) # Should indicate dependence
+independence(test, x, z, y) # Should indicate independence
+
+d = CodifyVariables(OrdinalPatterns(m=3))
+est = JointProbabilities(PartialMutualInformation(), d)
+test = SurrogateAssociationTest(est)
+independence(test, x, z, y)
+```
+
+## Extended examples
+
+- [Pairwise test, `DistanceCorrelation`](@ref examples_SurrogateAssociationTest_distancecorrelation).
+- [Pairwise test, `TEShannon`](@ref examples_SurrogateAssociationTest_teshannon).
+- [Conditional test, `PartialCorrelation`](@ref examples_SurrogateAssociationTest_partialcorrelation).
+- [Pairwise test, `MIShannon`, categorical](@ref examples_SurrogateAssociationTest_mishannon_categorical).
+- [Conditional test, `CMIShannon`, categorical](@ref examples_SurrogateAssociationTest_cmishannon_categorical).
+"""
+struct SurrogateAssociationTest{E, R, S} <: IndependenceTest{E}
+    est_or_measure::E
+    rng::R
+    surrogate::S
+    nshuffles::Int
+    show_progress::Bool
+end
+function SurrogateAssociationTest(
+        est_or_measure::E;
+        rng::R = Random.default_rng(),
+        surrogate::S = RandomShuffle(),
+        nshuffles::Int = 100, 
+        show_progress = false
+        ) where {E, R, S}
+    SurrogateAssociationTest{E, R, S}(est_or_measure, rng, surrogate, nshuffles, show_progress)
+end
+
+
+Base.show(io::IO, test::SurrogateAssociationTest) = print(io,
+    """
+    `SurrogateAssociationTest` independence test.
+    -------------------------------------
+    estimator/measure: $(test.est_or_measure)
+    rng:               $(test.rng)
+    # shuffles:        $(test.nshuffles)
+    surrogate:         $(test.surrogate)
+    """
+)
+
+"""
+    SurrogateAssociationTestResult(m, m_surr, pvalue)
+
+Holds the result of a [`SurrogateAssociationTest`](@ref). `m` is the measure computed on
+the original data. `m_surr` is a vector of the measure computed on permuted data, where
+`m_surr[i]` is the measure compute on the `i`-th permutation. `pvalue` is the one-sided
+`p`-value for the test.
+"""
+struct SurrogateAssociationTestResult{M, MS, P} <: IndependenceTestResult
+    n_vars::Int # 2 vars = pairwise, 3 vars = conditional
+    m::M
+    m_surr::MS
+    pvalue::P
+    nshuffles::Int
+end
+pvalue(r::SurrogateAssociationTestResult) = r.pvalue
+quantile(r::SurrogateAssociationTestResult, q) = quantile(r.m_surr, q)
+
+function Base.show(io::IO, test::SurrogateAssociationTestResult)
+    print(io,
+        """\
+        `SurrogateAssociationTest` independence test
+        $(null_hypothesis_text(test))
+        $(quantiles_text(test))
+        $(pvalue_text_summary(test))
+        """
+        )
+end
+
+# Generic dispatch for any three-argument conditional independence measure where the
+# third argument is to be conditioned on. This works naturally with e.g.
+# conditional mutual information.
+function independence(test::SurrogateAssociationTest, x, args...)
+    # Setup (`args...` is either `y` or `y, z`)
+    (; est_or_measure, rng, surrogate, nshuffles, show_progress) = test
+    verify_number_of_inputs_vars(est_or_measure, 1+length(args))
+    SSSets = map(w -> StateSpaceSet(w), args)
+    estimation = x -> association(est_or_measure, x, SSSets...)
+    progress = ProgressMeter.Progress(nshuffles;
+        desc="SurrogateAssociationTest:", enabled=show_progress
+    )
+    
+    # Estimate
+    Î = estimation(StateSpaceSet(x))
+    s = surrogenerator(x, surrogate, rng)
+    Îs = zeros(nshuffles)
+    for b in 1:nshuffles
+        Îs[b] = estimation(s())
+        ProgressMeter.next!(progress)
+    end
+    p = count(Î .<= Îs) / nshuffles
+    return SurrogateAssociationTestResult(1+length(args), Î, Îs, p, nshuffles)
+end
+
+# Concrete implementations
+include("transferentropy.jl")
+include("crossmapping.jl")
+include("hlms_measure.jl")
+
+# Input checks
+function SurrogateAssociationTest(measure::T) where T <: MultivariateInformationMeasure
+    str = "`SurrogateAssociationTest` can't be constructed using the information measure `$T` definition directly. " * 
+        "Give a valid estimator as the first argument instead and give the " * 
+        "definition to the estimator, e.g. " * 
+        "FPVP(CMIShannon())"
+    throw(ArgumentError(str))
+end
\ No newline at end of file
diff --git a/src/independence_tests/surrogate/SurrogateTest.jl b/src/independence_tests/surrogate/SurrogateTest.jl
deleted file mode 100644
index 4b325f458..000000000
--- a/src/independence_tests/surrogate/SurrogateTest.jl
+++ /dev/null
@@ -1,161 +0,0 @@
-using Random
-using TimeseriesSurrogates
-import ProgressMeter
-export SurrogateTest
-export SurrogateTestResult
-
-"""
-    SurrogateTest <: IndependenceTest
-    SurrogateTest(measure, [est];
-        nshuffles::Int = 100,
-        surrogate = RandomShuffle(),
-        rng = Random.default_rng(),
-        show_progress = false,
-    )
-
-A generic (conditional) independence test for assessing whether two variables `X` and `Y`
-are independendent, potentially conditioned on a third variable `Z`, based on
-surrogate data.
-
-When used with [`independence`](@ref), a [`SurrogateTestResult`](@ref) is returned.
-
-## Description
-
-This is a generic one-sided hypothesis test that checks whether `x` and `y`
-are independent (given `z`, if provided) based on resampling from a null distribution
-assumed to represent independence between the variables. The null distribution is generated
-by repeatedly shuffling the input data in some way that is intended
-to break any dependence between the input variables.
-
-There are different ways of shuffling, dictated by `surrogate`, each representing a
-distinct null hypothesis. For each shuffle, the provided `measure` is computed (using `est`,
-if relevant). This procedure is repeated `nshuffles` times, and a test summary is returned.
-The shuffled variable is always the first variable (`X`). Exceptions are:
-
-- If [`TransferEntropy`](@ref) measure such as [`TEShannon`](@ref),
-    then the source variable is always shuffled, and the target and conditional
-    variable are left unshuffled.
-
-## Compatible measures
-
-| Measure                               | Pairwise | Conditional | Requires `est` |
-| ------------------------------------- | :------: | :---------: | :------------: |
-| [`PearsonCorrelation`](@ref)          |    ✓    |     ✖      |       No       |
-| [`DistanceCorrelation`](@ref)         |    ✓    |     ✓      |       No       |
-| [`SMeasure`](@ref)                    |    ✓    |     ✖      |       No       |
-| [`HMeasure`](@ref)                    |    ✓    |     ✖      |       No       |
-| [`MMeasure`](@ref)                    |    ✓    |     ✖      |       No       |
-| [`LMeasure`](@ref)                    |    ✓    |     ✖      |       No       |
-| [`PairwiseAsymmetricInference`](@ref) |    ✓    |     ✖      |      Yes       |
-| [`ConvergentCrossMapping`](@ref)      |    ✓    |     ✖      |      Yes       |
-| [`MIShannon`](@ref)                   |    ✓    |     ✖      |      Yes       |
-| [`MIRenyiJizba`](@ref)                |    ✓    |     ✖      |      Yes       |
-| [`MIRenyiSarbu`](@ref)                |    ✓    |     ✖      |      Yes       |
-| [`MITsallisMartin`](@ref)             |    ✓    |     ✖      |      Yes       |
-| [`MITsallisFuruichi`](@ref)           |    ✓    |     ✖      |      Yes       |
-| [`PartialCorrelation`](@ref)          |    ✖    |     ✓      |      Yes       |
-| [`CMIShannon`](@ref)                  |    ✖    |     ✓      |      Yes       |
-| [`CMIRenyiJizba`](@ref)               |    ✖    |     ✓      |      Yes       |
-| [`TEShannon`](@ref)                   |    ✓    |     ✓      |      Yes       |
-| [`TERenyiJizba`](@ref)                |    ✓    |     ✓      |      Yes       |
-| [`PMI`](@ref)                         |    ✖    |     ✓      |      Yes       |
-
-## Examples
-
-- [Pairwise test, `DistanceCorrelation`](@ref examples_surrogatetest_distancecorrelation).
-- [Pairwise test, `TEShannon`](@ref examples_surrogatetest_teshannon).
-- [Conditional test, `PartialCorrelation`](@ref examples_surrogatetest_partialcorrelation).
-- [Pairwise test, `MIShannon`, categorical](@ref examples_surrogatetest_mishannon_categorical).
-- [Conditional test, `CMIShannon`, categorical](@ref examples_surrogatetest_cmishannon_categorical).
-"""
-struct SurrogateTest{M, E, R, S} <: IndependenceTest{M}
-    measure::M
-    est::E
-    rng::R
-    surrogate::S
-    nshuffles::Int
-    show_progress::Bool
-end
-function SurrogateTest(measure::M, est::E = nothing;
-    rng::R = Random.default_rng(),
-    surrogate::S = RandomShuffle(),
-    nshuffles::Int = 100, show_progress = false
-    ) where {M, E, R, S}
-    SurrogateTest{M, E, R, S}(measure, est, rng, surrogate, nshuffles, show_progress)
-end
-
-
-Base.show(io::IO, test::SurrogateTest) = print(io,
-    """
-    `SurrogateTest` independence test.
-    -------------------------------------
-    measure:    $(test.measure)
-    estimator:  $(test.est)
-    rng:        $(test.rng)
-    # shuffles: $(test.nshuffles)
-    surrogate:  $(test.surrogate)
-    """
-)
-
-"""
-    SurrogateTestResult(m, m_surr, pvalue)
-
-Holds the result of a [`SurrogateTest`](@ref). `m` is the measure computed on
-the original data. `m_surr` is a vector of the measure computed on permuted data, where
-`m_surr[i]` is the measure compute on the `i`-th permutation. `pvalue` is the one-sided
-`p`-value for the test.
-"""
-struct SurrogateTestResult{M, MS, P} <: IndependenceTestResult
-    n_vars::Int # 2 vars = pairwise, 3 vars = conditional
-    m::M
-    m_surr::MS
-    pvalue::P
-    nshuffles::Int
-end
-pvalue(r::SurrogateTestResult) = r.pvalue
-quantile(r::SurrogateTestResult, q) = quantile(r.m_surr, q)
-
-function Base.show(io::IO, test::SurrogateTestResult)
-    print(io,
-        """\
-        `SurrogateTest` independence test
-        $(null_hypothesis_text(test))
-        $(quantiles_text(test))
-        $(pvalue_text_summary(test))
-        """
-        )
-end
-
-# Generic dispatch for any three-argument conditional independence measure where the
-# third argument is to be conditioned on. This works naturally with e.g.
-# conditional mutual information.
-function independence(test::SurrogateTest, x, args...)
-    # Setup (`args...` is either `y` or `y, z`)
-    (; measure, est, rng, surrogate, nshuffles, show_progress) = test
-    verify_number_of_inputs_vars(measure, 1+length(args))
-    SSSets = map(w -> StateSpaceSet(w), args)
-    estimation = x -> estimate(measure, est, x, SSSets...)
-    progress = ProgressMeter.Progress(nshuffles;
-        desc="SurrogateTest:", enabled=show_progress
-    )
-
-    # Estimate
-    Î = estimation(StateSpaceSet(x))
-    s = surrogenerator(x, surrogate, rng)
-    Îs = zeros(nshuffles)
-    for b in 1:nshuffles
-        Îs[b] = estimation(s())
-        ProgressMeter.next!(progress)
-    end
-    p = count(Î .<= Îs) / nshuffles
-    return SurrogateTestResult(3, Î, Îs, p, nshuffles)
-end
-
-# Concrete implementations
-include("contingency.jl")
-include("transferentropy.jl")
-include("hlms_measure.jl")
-include("crossmapping.jl")
-include("mutualinfo.jl")
-include("condmutualinfo.jl")
-include("pmi.jl")
diff --git a/src/independence_tests/surrogate/condmutualinfo.jl b/src/independence_tests/surrogate/condmutualinfo.jl
deleted file mode 100644
index fc8e98d16..000000000
--- a/src/independence_tests/surrogate/condmutualinfo.jl
+++ /dev/null
@@ -1,7 +0,0 @@
-function SurrogateTest(measure::ConditionalMutualInformation, est::Nothing, args...;
-        kwargs...)
-    T = typeof(measure)
-    txt = "Estimator not provided for measure $T. Cannot construct `SurrogateTest`\n" *
-        "Do e.g. `SurrogateTest(CMIShannon(), FPVP())`"
-    throw(ArgumentError(txt))
-end
diff --git a/src/independence_tests/surrogate/contingency.jl b/src/independence_tests/surrogate/contingency.jl
deleted file mode 100644
index b16fea575..000000000
--- a/src/independence_tests/surrogate/contingency.jl
+++ /dev/null
@@ -1,33 +0,0 @@
-
-# Explicit dispatch for independence for `Contingency` estimator, because we don't
-# want to convert categorical input data to `StateSpaceSets`.
-function independence(test::SurrogateTest{MEASURE, <:Contingency}, x, y) where MEASURE
-    (; measure, est, rng, surrogate, nshuffles) = test
-    @assert length(x) == length(y)
-    N = length(x)
-    Î = estimate(measure, est, x, y)
-    sx = surrogenerator(x, surrogate, rng)
-    sy = surrogenerator(y, surrogate, rng)
-    Îs = zeros(nshuffles)
-    for b in 1:nshuffles
-        Îs[b] = estimate(measure, est, sx(), sy())
-    end
-    p = count(Î .<= Îs) / nshuffles
-
-    return SurrogateTestResult(2, Î, Îs, p, nshuffles)
-end
-
-function independence(test::SurrogateTest{MEASURE, <:Contingency}, x, y, z) where MEASURE
-    (; measure, est, rng, surrogate, nshuffles) = test
-    @assert length(x) == length(y) == length(z)
-    N = length(x)
-    Î = estimate(measure, est, x, y, z)
-    s = surrogenerator(x, surrogate, rng)
-    Îs = zeros(nshuffles)
-    for b in 1:nshuffles
-        Îs[b] = estimate(measure, est, s(), y, z)
-    end
-    p = count(Î .<= Îs) / nshuffles
-
-    return SurrogateTestResult(3, Î, Îs, p, nshuffles)
-end
diff --git a/src/independence_tests/surrogate/crossmapping.jl b/src/independence_tests/surrogate/crossmapping.jl
index 56f056b6a..91bd35c00 100644
--- a/src/independence_tests/surrogate/crossmapping.jl
+++ b/src/independence_tests/surrogate/crossmapping.jl
@@ -1,61 +1,61 @@
 using Statistics: mean
 
-function independence(test::SurrogateTest{<:CrossmapMeasure, <:CrossmapEstimator{Int}}, x, y)
-    (; measure, est, rng, surrogate, nshuffles) = test
-    Î = crossmap(measure, est, x, y)
+function independence(test::SurrogateAssociationTest{<:CrossmapEstimator}, x, y)
+    (; est_or_measure, rng, surrogate, nshuffles, show_progress) = test
+    Î = crossmap(est_or_measure, x, y)
     sx = surrogenerator(x, surrogate, rng)
     sy = surrogenerator(y, surrogate, rng)
     Îs = zeros(nshuffles)
     for b in 1:nshuffles
-        Îs[b] = crossmap(measure, est, sx(), sy())
+        Îs[b] = crossmap(est_or_measure, sx(), sy())
     end
     p = count(Î .<= Îs) / nshuffles
 
-    return SurrogateTestResult(2, Î, Îs, p, nshuffles)
+    return SurrogateAssociationTestResult(2, Î, Îs, p, nshuffles)
 end
 
-function independence(test::SurrogateTest{<:Ensemble{<:CrossmapMeasure, <:RandomVectors{Int}}}, x, y)
-    (; measure, est, rng, surrogate, nshuffles) = test
-    Î = crossmap(measure, x, y) # A vector of length `measure.nreps`
+function independence(test::SurrogateAssociationTest{<:Ensemble}, x, y)
+    (; est_or_measure, rng, surrogate, nshuffles, show_progress) = test
+    Î = crossmap(est_or_measure, x, y) # A vector of length `measure.nreps`
     sx = surrogenerator(x, surrogate, rng)
     sy = surrogenerator(y, surrogate, rng)
     Îs = Vector{eltype(1.0)}(undef, 0)
-    sizehint!(Îs, nshuffles * measure.nreps)
+    sizehint!(Îs, nshuffles * est_or_measure.nreps)
 
     for b in 1:nshuffles
-        append!(Îs, crossmap(measure, sx(), sy()))
+        append!(Îs, crossmap(est_or_measure, sx(), sy()))
     end
-    p = count(mean(Î) .<= Îs) / (nshuffles * measure.nreps)
-    return SurrogateTestResult(2, mean(Î), Îs, p, nshuffles)
+    p = count(mean(Î) .<= Îs) / (nshuffles * est_or_measure.nreps)
+    return SurrogateAssociationTestResult(2, mean(Î), Îs, p, nshuffles)
 end
 
 
-# Independence tests are currently only defined for estimators operating on a single
-# library size.
-const INVALID_ENSEMBLE = Ensemble{
-    <:CrossmapMeasure,
-    <:CrossmapEstimator{<:Union{AbstractVector, AbstractRange}
-    }}
-const INVALID_CM_TEST = SurrogateTest{<:INVALID_ENSEMBLE}
-
-function SurrogateTest(measure::CrossmapMeasure, est::CrossmapEstimator{<:Union{AbstractVector, AbstractRange}}, args...; kwargs...)
-    T = typeof(est)
-    txt = "\n`SurrogateTest` not implemented for estimator $T. Specifically,\n" *
-        "`SurrogateTest(CCM(), RandomVectors(libsizes = 100:200:500, replace = true)))`" *
-        " will not work.\n" *
-        "The estimator must operate on a single library size, e.g.\n" *
-        "`SurrogateTest(CCM(), RandomVectors(libsizes = 100, replace = true))`.\n"
-
-    throw(ArgumentError(txt))
-end
-
-function SurrogateTest(e::INVALID_ENSEMBLE, args...; kwargs...)
-    T = typeof(e.est)
-    txt = "\n`SurrogateTest` not implemented for estimator $T. Specifically,\n" *
-        "`SurrogateTest(CCM(), RandomVectors(libsizes = 100:200:500, replace = true)))`" *
-        " will not work.\n" *
-        "The estimator must operate on a single library size, e.g.\n" *
-        "`SurrogateTest(CCM(), RandomVectors(libsizes = 100, replace = true))`.\n"
-
-    throw(ArgumentError(txt))
-end
+# # Independence tests are currently only defined for estimators operating on a single
+# # library size.
+# const INVALID_ENSEMBLE = Ensemble{
+#     <:CrossmapMeasure,
+#     <:CrossmapEstimator{<:Union{AbstractVector, AbstractRange}
+#     }}
+# const INVALID_CM_TEST = SurrogateAssociationTest{<:INVALID_ENSEMBLE}
+
+# function SurrogateAssociationTest(measure::CrossmapMeasure, est::CrossmapEstimator{<:Union{AbstractVector, AbstractRange}}, args...; kwargs...)
+#     T = typeof(est)
+#     txt = "\n`SurrogateAssociationTest` not implemented for estimator $T. Specifically,\n" *
+#         "`SurrogateAssociationTest(CCM(), RandomVectors(libsizes = 100:200:500, replace = true)))`" *
+#         " will not work.\n" *
+#         "The estimator must operate on a single library size, e.g.\n" *
+#         "`SurrogateAssociationTest(CCM(), RandomVectors(libsizes = 100, replace = true))`.\n"
+
+#     throw(ArgumentError(txt))
+# end
+
+# function SurrogateAssociationTest(e::INVALID_ENSEMBLE, args...; kwargs...)
+#     T = typeof(e.est)
+#     txt = "\n`SurrogateAssociationTest` not implemented for estimator $T. Specifically,\n" *
+#         "`SurrogateAssociationTest(CCM(), RandomVectors(libsizes = 100:200:500, replace = true)))`" *
+#         " will not work.\n" *
+#         "The estimator must operate on a single library size, e.g.\n" *
+#         "`SurrogateAssociationTest(CCM(), RandomVectors(libsizes = 100, replace = true))`.\n"
+
+#     throw(ArgumentError(txt))
+# end
diff --git a/src/independence_tests/surrogate/hlms_measure.jl b/src/independence_tests/surrogate/hlms_measure.jl
index bcd8e930d..7ee86446a 100644
--- a/src/independence_tests/surrogate/hlms_measure.jl
+++ b/src/independence_tests/surrogate/hlms_measure.jl
@@ -2,16 +2,16 @@
 # so we need to dispatch explicitly and call `s_measure` manually
 # to avoid automatic conversion to `StateSpaceSet`s (which would ignore
 # embedding parameters if input data are `Vector`s).
-function independence(test::SurrogateTest{<:HLMS}, x, y)
-    (; measure, est, rng, surrogate, nshuffles) = test
-    Î = estimate(measure, x, y)
+function independence(test::SurrogateAssociationTest{<:HLMS}, x, y)
+    (; est_or_measure, rng, surrogate, nshuffles) = test
+    Î = association(est_or_measure, x, y)
     sx = surrogenerator(x, surrogate, rng)
     sy = surrogenerator(y, surrogate, rng)
     Îs = zeros(nshuffles)
     for b in 1:nshuffles
-        Îs[b] = estimate(measure, sx(), sy())
+        Îs[b] = association(est_or_measure, sx(), sy())
     end
     p = count(Î .<= Îs) / nshuffles
 
-    return SurrogateTestResult(2, Î, Îs, p, nshuffles)
+    return SurrogateAssociationTestResult(2, Î, Îs, p, nshuffles)
 end
diff --git a/src/independence_tests/surrogate/mutualinfo.jl b/src/independence_tests/surrogate/mutualinfo.jl
deleted file mode 100644
index dddd39af9..000000000
--- a/src/independence_tests/surrogate/mutualinfo.jl
+++ /dev/null
@@ -1,6 +0,0 @@
-function SurrogateTest(measure::MutualInformation, est::Nothing, args...; kwargs...)
-    T = typeof(measure)
-    txt = "Estimator not provided for measure $T. Cannot construct `SurrogateTest`\n" *
-        "Do e.g. `SurrogateTest(MIShannon(), KSG2())`"
-    throw(ArgumentError(txt))
-end
diff --git a/src/independence_tests/surrogate/pmi.jl b/src/independence_tests/surrogate/pmi.jl
deleted file mode 100644
index 7ab4ea296..000000000
--- a/src/independence_tests/surrogate/pmi.jl
+++ /dev/null
@@ -1,7 +0,0 @@
-function SurrogateTest(measure::PMI, est::Nothing, args...;
-        kwargs...)
-    T = typeof(measure)
-    txt = "Estimator not provided for measure $T. Cannot construct `SurrogateTest`\n" *
-        "Do e.g. `SurrogateTest(PMI(), MesnerShalizi())`"
-    throw(ArgumentError(txt))
-end
diff --git a/src/independence_tests/surrogate/transferentropy.jl b/src/independence_tests/surrogate/transferentropy.jl
index e10a8ea1d..08dedffea 100644
--- a/src/independence_tests/surrogate/transferentropy.jl
+++ b/src/independence_tests/surrogate/transferentropy.jl
@@ -20,43 +20,30 @@ function marginals_and_surrogenerator(opt::OptimiseTraditional, surrogate::Surro
     return Ŝ, T⁺, S, T, C
 end
 
-function independence(test::SurrogateTest{<:TransferEntropy{<:E, <:EmbeddingTypes}}, x::AbstractVector...) where {E}
-    (; measure, est, rng, surrogate, nshuffles) = test
+function independence(test::SurrogateAssociationTest{<:EntropyDecomposition{<:TransferEntropy}}, x, args...)
+    (; est_or_measure, rng, surrogate, nshuffles) = test
+    embedding = est_or_measure.definition.embedding
 
-    cmi = te_to_cmi(measure)
-    Ŝ, T⁺, S, T, C = marginals_and_surrogenerator(measure.embedding, surrogate, x...; rng)
+    cmi_est = convert_to_cmi_estimator(est_or_measure)
+    Ŝ, T⁺, S, T, C = marginals_and_surrogenerator(embedding, surrogate, x, args...; rng)
     TC = StateSpaceSet(T, C)
     @assert length(T⁺) == length(S) == length(TC)
-    Î = estimate(cmi, est, T⁺, S, TC)
+    Î = association(cmi_est, T⁺, S, TC)
     Îs = zeros(nshuffles)
     for b in 1:nshuffles
         # TE(ŝ -> t) := I(t⁺; ŝ⁻ | t⁻, c⁻)
-        Îs[b] = estimate(cmi, est, T⁺, Ŝ(), TC)
+        Îs[b] = association(cmi_est, T⁺, Ŝ(), TC)
     end
     p = count(Î .<= Îs) / nshuffles
 
-    return SurrogateTestResult(length(x), Î, Îs, p, nshuffles)
+    return SurrogateAssociationTestResult(length(x), Î, Îs, p, nshuffles)
 end
 
-function independence(test::SurrogateTest{<:TransferEntropy{<:E, <:EmbeddingTypes}, <:TransferEntropyEstimator}, x::AbstractVector...) where {E}
-    (; measure, est, rng, surrogate, nshuffles) = test
 
-    Ŝ, T⁺, S, T, C = marginals_and_surrogenerator(measure.embedding, surrogate, x...; rng)
-    @assert length(T⁺) == length(S) == length(T) == length(C)
-    Î = estimate(measure, est, S, T, T⁺, C)
-    Îs = zeros(nshuffles)
-    for b in 1:nshuffles
-        # TE(ŝ -> t) := I(t⁺; ŝ⁻ | t⁻, c⁻)
-        Îs[b] = estimate(measure, est, StateSpaceSet(Ŝ()), T, T⁺, C)
-    end
-    p = count(Î .<= Îs) / nshuffles
-
-    return SurrogateTestResult(length(x), Î, Îs, p, nshuffles)
-end
-
-function SurrogateTest(measure::TEShannon, est::Nothing, args...; kwargs...)
-    txt = "A valid estimator must be provided as second argument to `SurrogateTest` " *
-        "when using the `TEShannon` measure.\n" *
-        "Do e.g. SurrogateTest(TEShannon(), FPVP())"
-    throw(ArgumentError(txt))
-end
+function SurrogateAssociationTest(est::T) where T <: TransferEntropy
+    str = "`SurrogateAssociationTest` can't be constructed using `$T` definition directly. " * 
+        "Give a valid estimator as the first argument instead and give the " * 
+        "definition to the estimator, e.g. " * 
+        "Lindner(TEShannon()), or CMIDecomposition(TEShannon())"
+    throw(ArgumentError(str))
+end
\ No newline at end of file
diff --git a/src/integrations/uncertaindata.jl b/src/integrations/uncertaindata.jl
deleted file mode 100644
index 856aa947d..000000000
--- a/src/integrations/uncertaindata.jl
+++ /dev/null
@@ -1,125 +0,0 @@
-# The uncertainty handling framework in this file will be added
-# as part of a 2.X release. Can be ignored for now.
-
-import UncertainData:
-    resample,
-    UncertainStateSpaceSet,
-    UncertainIndexStateSpaceSet,
-    UncertainValueStateSpaceSet,
-    UncertainIndexValueStateSpaceSet
-import .s_measure
-import .jdd
-import .transferentropy; export transferentropy
-import .crossmap
-import .ccm
-import .predictive_asymmetry
-import HypothesisTests.OneSampleTTest
-
-##################################################
-# Basic resampling for `UncertainStateSpaceSet`s
-##################################################
-const UT = Union{UncertainValueStateSpaceSet, UncertainIndexStateSpaceSet, UncertainStateSpaceSet}
-
-s_measure(s::UT, t::UT, args...; kwargs...) =
-    s_measure(resample(s), resample(t), args...; kwargs...)
-
-jdd(s::UT, t::UT; kwargs...) =
-    jdd(resample(s), resample(t); kwargs...)
-
-jdd(test::OneSampleTTest, s::UT, t::UT; kwargs...) =
-    jdd(test, resample(s), resample(t); kwargs...)
-
-mutualinfo(s::UT, t::UT, method; kwargs...) =
-    mutualinfo(resample(s), resample(t), method; kwargs...)
-
-info_methods = [
-    :VisitationFrequency, :TransferOperator,
-    :SymbolicPermutation, :SymbolicAmplitudeAwarePermutation, :SymbolicWeightedPermutation,
-    :NaiveKernel,
-    :Kraskov,
-    :Kraskov1,
-    :Kraskov2,
-    :KozachenkoLeonenko,
-    :Hilbert,
-    :TimeScaleMODWT
-]
-
-for method in info_methods
-    @eval transferentropy($(method), s::UT, t::UT; kwargs...) =
-        transferentropy(method, resample(s), resample(t); kwargs...)
-
-    @eval transferentropy($(method), s::UT, t::UT, c::UT; kwargs...) =
-        transferentropy(method, resample(s), resample(t), resample(c); kwargs...)
-end
-
-# transferentropy(s::UT, t::UT, method; kwargs...) =
-#     transferentropy(resample(s), resample(t), method; kwargs...)
-
-# transferentropy(s::UT, t::UT, c::UT, method; kwargs...) =
-#     transferentropy(resample(s), resample(t), resample(c), method; kwargs...)
-
-predictive_asymmetry(method, s::UT, t::UT; kwargs...) =
-    predictive_asymmetry(method, resample(s), resample(t); kwargs...)
-
-predictive_asymmetry(method, s::UT, t::UT, c::UT; kwargs...) =
-    predictive_asymmetry(method, resample(s), resample(t), resample(c); kwargs...)
-
-crossmap(s::UT, t::UT, args...; kwargs...) =
-    crossmap(resample(s), resample(t), args...; kwargs...)
-
-ccm(s::UT, t::UT, args...; kwargs...) =
-    ccm(resample(s), resample(t), args...; kwargs...)
-
-##########################################################################
-# Basic resampling for `UncertainIndexValueStateSpaceSet` (no constraints)
-##########################################################################
-const UIVD = UncertainIndexValueStateSpaceSet
-
-# TODO: warn about potential index reversals?
-#
-# function warn_about_sampling(s::V, t::W)
-#     if s isa UIVD
-#         @warn "`s` isa UncertainIndexValueStateSpaceSet. Index reversals may occur. Consider constrained resampling."
-#     end
-
-#     if t isa UIVD
-#         @warn "`t` isa UncertainIndexValueStateSpaceSet. Index reversals may occur. Consider constrained resampling."
-#     end
-# end
-
-# function warn_about_sampling(s::V, t::W, c::X)
-#     warn_about_sampling(s, t)
-#     if c isa UIVD
-#         @warn "`c` isa UncertainIndexValueStateSpaceSet. Index reversals may occur. Consider constrained resampling."
-#     end
-# end
-
-s_measure(s::UIVD, t::UIVD, args...; kwargs...) =
-    s_measure(resample(s.values), resample(t.values), args...; kwargs...)
-
-jdd(s::UIVD, t::UIVD; kwargs...) =
-    jdd(resample(s.values), resample(t.values); kwargs...)
-
-jdd(test::OneSampleTTest, s::UIVD, t::UIVD; kwargs...) =
-    jdd(test, resample(s), resample(t); kwargs...)
-
-mutualinfo(method, s::UIVD, t::UIVD; kwargs...) =
-    mutualinfo(method, resample(s.values), resample(t.values); kwargs...)
-
-transferentropy(method, s::UIVD, t::UIVD; kwargs...) =
-    transferentropy(method, resample(s.values), resample(t.values); kwargs...)
-
-transferentropy(method, s::UIVD, t::UIVD, c::UIVD; kwargs...) =
-    transferentropy(method, resample(s.values), resample(t.values), resample(c.values); kwargs...)
-
-predictive_asymmetry(method, s::UIVD, t::UIVD; kwargs...) =
-    predictive_asymmetry(method, resample(s.values), resample(t.values); kwargs...)
-
-predictive_asymmetry(s::UIVD, t::UIVD, c::UIVD, method; kwargs...) =
-    predictive_asymmetry(resample(s.values), resample(t.values), resample(c.values), method; kwargs...)
-
-crossmap(s::UIVD, t::UIVD, args...; kwargs...) =
-    crossmap(resample(s.values), resample(t.values), args...; kwargs...)
-
-ccm(s::UIVD, t::UIVD, args...; kwargs...) =
-    ccm(resample(s.values), resample(t.values), args...; kwargs...)
diff --git a/src/methods/closeness/HMeasure.jl b/src/methods/closeness/HMeasure.jl
index 548d03eed..282f1393c 100644
--- a/src/methods/closeness/HMeasure.jl
+++ b/src/methods/closeness/HMeasure.jl
@@ -4,7 +4,6 @@ using StateSpaceSets: AbstractStateSpaceSet
 using Distances: SqEuclidean, Euclidean
 using Distances: pairwise, evaluate
 
-export h_measure
 export HMeasure
 
 """
@@ -20,8 +19,8 @@ for an explanation.
 
 ## Usage
 
+- Use with [`association`](@ref) to compute the raw h-measure statistic.
 - Use with [`independence`](@ref) to perform a formal hypothesis test for directional dependence.
-- Use with [`h_measure`](@ref) to compute the raw h-measure statistic.
 
 ## Description
 
@@ -36,8 +35,10 @@ H^{(k)}(x|y) = \\dfrac{1}{N} \\sum_{i=1}^{N}
 ```
 
 Parameters are the same and ``R_i^{(k)}(x|y)`` is computed as for [`SMeasure`](@ref).
+
+See also: [`ClosenessMeasure`](@ref).
 """
-Base.@kwdef struct HMeasure{M, TM} <: AssociationMeasure
+Base.@kwdef struct HMeasure{M, TM} <: ClosenessMeasure
     K::Int = 2
     metric::M = SqEuclidean()
     tree_metric::TM = Euclidean()
@@ -48,17 +49,8 @@ Base.@kwdef struct HMeasure{M, TM} <: AssociationMeasure
     w::Int = 0
 end
 
-"""
-    h_measure(measure::HMeasure, x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet)
-
-Compute the [`HMeasure`](@ref) from source `x` to target `y`.
-"""
-function h_measure(measure::HMeasure, x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet)
-    return estimate(measure, x, y)
-end
-
 # Internal method for use with `independence`
-function estimate(measure::HMeasure, x::AbstractStateSpaceSet, y::AbstractStateSpaceSet)
+function association(measure::HMeasure, x::AbstractStateSpaceSet, y::AbstractStateSpaceSet)
     (; K, metric, tree_metric, τx, τy, dx, dy, w) = measure
 
     # Match length of StateSpaceSets by excluding end points.
diff --git a/src/methods/closeness/JointDistanceDistribution.jl b/src/methods/closeness/JointDistanceDistribution.jl
index 10d3dc00d..69db794c2 100644
--- a/src/methods/closeness/JointDistanceDistribution.jl
+++ b/src/methods/closeness/JointDistanceDistribution.jl
@@ -6,7 +6,6 @@ using DelayEmbeddings: genembed
 import HypothesisTests: OneSampleTTest, pvalue
 export OneSampleTTest, pvalue
 export JointDistanceDistribution
-export jdd
 
 function normalise_minmax(x::T, vmin, vmax) where T
     if x == zero(T)
@@ -20,14 +19,14 @@ end
     JointDistanceDistribution <: AssociationMeasure end
     JointDistanceDistribution(; metric = Euclidean(), B = 10, D = 2, τ = -1, μ = 0.0)
 
-The joint distance distribution (JDD) measure [Amigo2018](@citet).
+The joint distance distribution (JDD) measure [Amigo2018](@cite).
 
 ## Usage
 
+- Use with [`association`](@ref) to compute the joint distance distribution measure `Δ` from
+    [Amigo2018](@citet).
 - Use with [`independence`](@ref) to perform a formal hypothesis test for directional
     dependence.
-- Use with [`jdd`](@ref) to compute the joint distance distribution `Δ` from
-    [Amigo2018](@citet)
 
 ## Keyword arguments
 
@@ -61,7 +60,7 @@ of these embeddings, as described in [Amigo2018](@citet).
 * [Computing the JDD](@ref quickstart_jdd)
 * [Independence testing using JDD](@ref quickstart_jddtest)
 """
-Base.@kwdef struct JointDistanceDistribution{M, T} <: AssociationMeasure
+Base.@kwdef struct JointDistanceDistribution{M, T} <: ClosenessMeasure
     metric::M = Euclidean()
     B::Int = 5
     D::Int = 3
@@ -69,14 +68,7 @@ Base.@kwdef struct JointDistanceDistribution{M, T} <: AssociationMeasure
     μ::T = 0.0
 end
 
-# The convenience wrapper `jdd`` is in deprecations folder for now.
-
-function estimate(measure::JointDistanceDistribution, est::Nothing, source, target)
-    return estimate(measure, source, target)
-end
-
-# Internal method for compatibility with independence tests.
-function estimate(measure::JointDistanceDistribution, source, target)
+function association(measure::JointDistanceDistribution, source, target)
     (; metric, B, D, τ) = measure
     length(source) == length(target) || error("lengths of inputs must match")
     js = ([1 for i = 1:D]...,)
@@ -128,3 +120,54 @@ function jdd_step3(Dx, Dy, bmin, bmax)
         return bmin
     end
 end
+
+"""
+    rank_transformation(x::AbstractVector)
+    rank_transformation(x::AbstractStateSpaceSet) → ranks::NTuple{D, Vector}
+
+Rank-transform each variable/column of the length-`n` `D`-dimensional StateSpaceSet `x` and return the
+rank-transformed variables as a `D`-tuple of length-`n` vectors.
+
+Returns the unscaled `ranks`. Divide by `n` to get an *approximation* to the
+empirical cumulative distribution function (ECDF)  `x`.
+
+## Description
+
+Modulo division by `n`, `rank_transformation` does *roughly* the same as naively computing the ECDF as
+```julia
+[count(xᵢ .<= x)  for xᵢ in x] / length(x)
+```
+
+but an order of magnitude faster and with roughly three orders of magnitude less
+allocations. The increased efficiency of this function relative to naively computing the
+ECDF is
+because it uses sorting of the input data to determine ranks,
+arbitrarily breaking ties according to the sorting algorithm. Rank ties can therefore
+never occur, and equal values are assigned different but close ranks. To preserve
+ties, which you might want to do for example when dealing with
+categorical or integer-valued data, use (the much slower) [`empcdf`](@ref).
+"""
+function rank_transformation(x::AbstractStateSpaceSet)
+    s = zeros(Int, length(x)) # re-use for each marginal
+    [rank_transformation!(s, xⱼ) for xⱼ in columns(x)]
+end
+
+function rank_transformation(x::AbstractVector{T}) where T
+    N = length(x)
+    s = zeros(Int, N)
+    return rank_transformation!(s, x)
+end
+
+function rank_transformation!(
+        s::AbstractVector{Int},
+        x::AbstractVector{T}) where T <: Real
+    N = length(x)
+    r = zeros(N)
+    # Break ties arbitrarily by sorting. This means that ties are broken according to the
+    # sorting algorithm used, and equal values are assigned different ranks.
+    sortperm!(s, x)
+    for j in 1:N
+        r[s[j]] = j
+    end
+    return r
+end
\ No newline at end of file
diff --git a/src/methods/closeness/LMeasure.jl b/src/methods/closeness/LMeasure.jl
index d996422ff..767b1304b 100644
--- a/src/methods/closeness/LMeasure.jl
+++ b/src/methods/closeness/LMeasure.jl
@@ -5,10 +5,9 @@ using Distances: SqEuclidean, Euclidean
 using Distances: pairwise, evaluate
 
 export LMeasure
-export l_measure
 
 """
-    LMeasure <: AssociationMeasure
+    LMeasure <: ClosenessMeasure
     LMeasure(; K::Int = 2, dx = 2, dy = 2, τx = - 1, τy = -1, w = 0)
 
 The `LMeasure` [Chicharro2009](@cite) is a pairwise association
@@ -20,8 +19,8 @@ for an explanation.
 
 ## Usage
 
+- Use with [`association`](@ref) to compute the raw L-measure statistic.
 - Use with [`independence`](@ref) to perform a formal hypothesis test for directional dependence.
-- Use with [`l_measure`](@ref) to compute the raw l-measure statistic.
 
 ## Description
 
@@ -50,8 +49,10 @@ G_i^{(k)}(x|y) = \\dfrac{1}{K}\\sum_{j=1}^{K} g_{i,w_{i, j}},
 ```
 
 where ``w_{i,j}`` is the index of the ``j``-th nearest neighbor of ``\\bf{y_i}``.
+
+See also: [`ClosenessMeasure`](@ref).
 """
-Base.@kwdef struct LMeasure{M, TM} <: AssociationMeasure
+Base.@kwdef struct LMeasure{M, TM} <: ClosenessMeasure
     K::Int = 2
     metric::M = Euclidean()
     tree_metric::TM = Euclidean()
@@ -62,21 +63,12 @@ Base.@kwdef struct LMeasure{M, TM} <: AssociationMeasure
     w::Int = 0
 end
 
-"""
-    l_measure(measure::LMeasure, x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet)
-
-Compute the [`LMeasure`](@ref) from source `x` to target `y`.
-"""
-function l_measure(measure::LMeasure, x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet)
-    return estimate(measure, x, y)
-end
-
 function getrank(x, p)
     xmin, xmax = minimum(x), maximum
 end
 
 # Internal method for use with `independence`
-function estimate(measure::LMeasure, x::AbstractStateSpaceSet, y::AbstractStateSpaceSet)
+function association(measure::LMeasure, x::AbstractStateSpaceSet, y::AbstractStateSpaceSet)
     (; K, metric, tree_metric, τx, τy, dx, dy, w) = measure
 
     # Match length of StateSpaceSets by excluding end points.
diff --git a/src/methods/closeness/MMeasure.jl b/src/methods/closeness/MMeasure.jl
index 6c98c2069..686e53802 100644
--- a/src/methods/closeness/MMeasure.jl
+++ b/src/methods/closeness/MMeasure.jl
@@ -4,11 +4,10 @@ using StateSpaceSets: AbstractStateSpaceSet
 using Distances: SqEuclidean, Euclidean
 using Distances: pairwise, evaluate
 
-export m_measure
 export MMeasure
 
 """
-    MMeasure <: AssociationMeasure
+    MMeasure <: ClosenessMeasure
     MMeasure(; K::Int = 2, dx = 2, dy = 2, τx = - 1, τy = -1, w = 0)
 
 The `MMeasure` [Andrzejak2003](@cite) is a pairwise association
@@ -20,8 +19,8 @@ for an explanation.
 
 ## Usage
 
+- Use with [`association`](@ref) to compute the raw m-measure statistic.
 - Use with [`independence`](@ref) to perform a formal hypothesis test for directional dependence.
-- Use with [`m_measure`](@ref) to compute the raw m-measure statistic.
 
 ## Description
 
@@ -35,8 +34,10 @@ M^{(k)}(x|y) = \\dfrac{1}{N} \\sum_{i=1}^{N}
 where ``R_i(x)`` is computed as for [`HMeasure`](@ref), while ``R_i^k(x)`` and
 ``R_i^{(k)}(x|y)`` is computed as for [`SMeasure`](@ref).
 Parameters also have the same meaning as for [`SMeasure`](@ref)/[`HMeasure`](@ref).
+
+See also: [`ClosenessMeasure`](@ref).
 """
-Base.@kwdef struct MMeasure{M, TM} <: AssociationMeasure
+Base.@kwdef struct MMeasure{M, TM} <: ClosenessMeasure
     K::Int = 2
     metric::M = SqEuclidean()
     tree_metric::TM = Euclidean()
@@ -47,17 +48,8 @@ Base.@kwdef struct MMeasure{M, TM} <: AssociationMeasure
     w::Int = 0
 end
 
-"""
-    m_measure(measure::MMeasure, x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet)
-
-Compute the [`MMeasure`](@ref) from source `x` to target `y`.
-"""
-function m_measure(measure::MMeasure, x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet)
-    return estimate(measure, x, y)
-end
-
 # Internal method for use with `independence`
-function estimate(measure::MMeasure, x::AbstractStateSpaceSet, y::AbstractStateSpaceSet)
+function association(measure::MMeasure, x::AbstractStateSpaceSet, y::AbstractStateSpaceSet)
     (; K, metric, tree_metric, τx, τy, dx, dy, w) = measure
 
     # Match length of StateSpaceSets by excluding end points.
diff --git a/src/methods/closeness/SMeasure.jl b/src/methods/closeness/SMeasure.jl
index a835f1f18..e0b7840db 100644
--- a/src/methods/closeness/SMeasure.jl
+++ b/src/methods/closeness/SMeasure.jl
@@ -5,10 +5,9 @@ using Distances: SqEuclidean, Euclidean
 using Distances: pairwise, evaluate
 
 export SMeasure
-export s_measure
 
 """
-    SMeasure < AssociationMeasure
+    SMeasure < ClosenessMeasure
     SMeasure(; K::Int = 2, dx = 2, dy = 2, τx = - 1, τy = -1, w = 0)
 
 `SMeasure` is a bivariate association measure from [Arnhold1999](@citet)
@@ -19,8 +18,8 @@ Note that `τx` and `τy` are negative; see explanation below.
 
 ## Usage
 
+- Use with [`association`](@ref) to compute the raw s-measure statistic.
 - Use with [`independence`](@ref) to perform a formal hypothesis test for directional dependence.
-- Use with [`s_measure`](@ref) to compute the raw s-measure statistic.
 
 ## Description
 
@@ -78,8 +77,10 @@ The algorithm is slightly modified from [Grassberger1999](@cite) to allow univar
 
 In all three cases, input StateSpaceSets are length-matched by eliminating points at the end of
 the longest StateSpaceSet (after the embedding step, if relevant) before analysis.
+
+See also: [`ClosenessMeasure`](@ref).
 """
-Base.@kwdef struct SMeasure{M, TM} <: AssociationMeasure
+Base.@kwdef struct SMeasure{M, TM} <: ClosenessMeasure
     K::Int = 2
     metric::M = SqEuclidean()
     tree_metric::TM = Euclidean()
@@ -90,17 +91,8 @@ Base.@kwdef struct SMeasure{M, TM} <: AssociationMeasure
     w::Int = 0
 end
 
-"""
-    s_measure(measure::SMeasure, x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet)
-
-Compute the [`SMeasure`](@ref) from source `x` to target `y`.
-"""
-function s_measure(measure::SMeasure, x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet)
-    return estimate(measure, x, y)
-end
-
 # Internal method for use with `independence`
-function estimate(measure::SMeasure, x::AbstractStateSpaceSet, y::AbstractStateSpaceSet)
+function association(measure::SMeasure, x::AbstractStateSpaceSet, y::AbstractStateSpaceSet)
     (; K, metric, tree_metric, τx, τy, dx, dy, w) = measure
 
     # Match length of StateSpaceSets by excluding end points.
diff --git a/src/methods/closeness/closeness.jl b/src/methods/closeness/closeness.jl
index 258cb3409..ef0b50c24 100644
--- a/src/methods/closeness/closeness.jl
+++ b/src/methods/closeness/closeness.jl
@@ -1,5 +1,21 @@
-include("JointDistanceDistribution.jl")
+export ClosenessMeasure
+"""
+    ClosenessMeasure <: AssociationMeasure
+
+The supertype for all multivariate information-based measure definitions.
+
+## Implementations
 
+- [`JointDistanceDistribution`](@ref)
+- [`SMeasure`](@ref)
+- [`HMeasure`](@ref)
+- [`MMeasure`](@ref)
+- [`LMeasure`](@ref)
+
+"""
+abstract type ClosenessMeasure <: AssociationMeasure end
+
+include("JointDistanceDistribution.jl")
 include("SMeasure.jl")
 include("HMeasure.jl")
 include("MMeasure.jl")
diff --git a/src/methods/closeness/common.jl b/src/methods/closeness/common.jl
index 7d182081e..967d7cded 100644
--- a/src/methods/closeness/common.jl
+++ b/src/methods/closeness/common.jl
@@ -1,6 +1,6 @@
 const HLMS = Union{HMeasure, LMeasure, MMeasure, SMeasure}
 
-function estimate(measure::HLMS, x::AbstractVector{T}, y::AbstractVector{T}) where T
+function association(measure::HLMS, x::AbstractVector{T}, y::AbstractVector{T}) where T
 
     (; K, metric, tree_metric, τx, τy, dx, dy, w) = measure
     jsx = ([1 for i = 1:dx]...,)
@@ -14,21 +14,21 @@ function estimate(measure::HLMS, x::AbstractVector{T}, y::AbstractVector{T}) whe
     # TODO: cut the last points of the shortest resulting embedding.
     x̂ = lX > lY ? X[1:lY, :] : X
     ŷ = lY > lX ? Y[1:lX, :] : Y
-    return estimate(measure, x̂, ŷ)
+    return association(measure, x̂, ŷ)
 end
 
-function estimate(measure::HLMS, x::AbstractStateSpaceSet{D}, y::AbstractVector{T}) where {D, T}
+function association(measure::HLMS, x::AbstractStateSpaceSet{D}, y::AbstractVector{T}) where {D, T}
     (; K, metric, tree_metric, τx, τy, dx, dy, w) = measure
 
     Y = embed(y, dy, τy)
     X = x[1:length(Y), :]
-    return estimate(measure, X, Y)
+    return association(measure, X, Y)
 end
 
-function estimate(measure::HLMS, x::AbstractVector{T}, y::AbstractStateSpaceSet{D}) where {D, T}
+function association(measure::HLMS, x::AbstractVector{T}, y::AbstractStateSpaceSet{D}) where {D, T}
     (; K, metric, tree_metric, τx, τy, dx, dy, w) = measure
 
     X = embed(x, dx, τx)
     Y = y[1:length(X), :]
-    return estimate(measure, X, Y)
+    return association(measure, X, Y)
 end
diff --git a/src/methods/correlation/correlation.jl b/src/methods/correlation/correlation.jl
index a835ff395..fd8c3b603 100644
--- a/src/methods/correlation/correlation.jl
+++ b/src/methods/correlation/correlation.jl
@@ -1,3 +1,20 @@
+"""
+    CorrelationMeasure <: AssociationMeasure end
+
+The supertype for correlation measures.
+
+## Concrete implementations
+
+- [`PearsonCorrelation`](@ref)
+- [`PartialCorrelation`](@ref)
+- [`DistanceCorrelation`](@ref)
+"""
+abstract type CorrelationMeasure <: AssociationMeasure end
+
+# Future proof definition, to obey the overall API ("estimator contains measure"). 
+# Implementations must have `definition` as the first field.
+abstract type CorrelationMeasureEstimator{M} <: AssociationMeasure end
+
 include("pearson_correlation.jl")
 include("partial_correlation.jl")
 include("distance_correlation.jl")
diff --git a/src/methods/correlation/distance_correlation.jl b/src/methods/correlation/distance_correlation.jl
index 9cd88662b..3e37fd77b 100644
--- a/src/methods/correlation/distance_correlation.jl
+++ b/src/methods/correlation/distance_correlation.jl
@@ -1,73 +1,52 @@
 using StateSpaceSets: AbstractStateSpaceSet
-using Distances
+using Distances: Euclidean
+using Distances: pairwise
 using LinearAlgebra
 
 export DistanceCorrelation
-export distance_correlation
 
 """
     DistanceCorrelation
 
-The distance correlation (Székely et al., 2007)[^Székely2007] measure quantifies
+The distance correlation [Szekely2007](@cite) measure quantifies
 potentially nonlinear associations between pairs of variables. If applied to
-three variables, the partial distance correlation (Székely and Rizzo, 2014)[^Székely2014]
+three variables, the partial distance correlation [Szekely2014](@cite)
 is computed.
 
 ## Usage
 
+- Use with [`association`](@ref) to compute the raw (partial) distance correlation
+    coefficient.
 - Use with [`independence`](@ref) to perform a formal hypothesis test for
     pairwise dependence.
-- Use with [`distance_correlation`](@ref) to compute the raw distance correlation
-    coefficient.
-
-!!! warn
-    A partial distance correlation `distance_correlation(X, Y, Z) = 0` doesn't
-    always guarantee conditional independence `X ⫫ Y | Z`. See Székely and Rizzo (2014)
-    for in-depth discussion.
-
-[^Székely2007]:
-    Székely, G. J., Rizzo, M. L., & Bakirov, N. K. (2007). Measuring and testing
-    dependence by correlation of distances. The annals of statistics, 35(6), 2769-2794.
-[^Székely2014]:
-    Székely, G. J., & Rizzo, M. L. (2014). Partial distance correlation with methods for
-    dissimilarities.
-"""
-struct DistanceCorrelation <: AssociationMeasure end
 
-max_inputs_vars(::DistanceCorrelation) = 3
+## Description 
 
-"""
-    distance_correlation(x, y) → dcor ∈ [0, 1]
-    distance_correlation(x, y, z) → pdcor
+The distance correlation can be used to compute the association between two variables,
+or the conditional association between three variables, like so:
 
-Compute the empirical/sample distance correlation (Székely et al., 2007)[^Székely2007],
-here called `dcor`, between StateSpaceSets `x` and `y`. Alternatively, compute the
-partial distance correlation `pdcor` (Székely and Rizzo, 2014)[^Székely2014].
+    association(DistanceCorrelation(), x, y) → dcor ∈ [0, 1]
+    association(DistanceCorrelation(), x, y, z) → pdcor
 
-See also: [`DistanceCorrelation`](@ref).
+With two variable, we comptue `dcor`, which is called the empirical/sample distance 
+correlation [Szekely2007](@cite). With three variables, the 
+partial distance correlation `pdcor` is computed [Szekely2014](@cite).
 
-[^Székely2007]:
-    Székely, G. J., Rizzo, M. L., & Bakirov, N. K. (2007). Measuring and testing
-    dependence by correlation of distances. The annals of statistics, 35(6), 2769-2794.
-[^Székely2014]:
-    Székely, G. J., & Rizzo, M. L. (2014). Partial distance correlation with methods for
-    dissimilarities.
+!!! warn
+    A partial distance correlation `distance_correlation(X, Y, Z) = 0` doesn't
+    always guarantee conditional independence `X ⫫ Y | Z`. [Szekely2014](@citet)
+    for an in-depth discussion.
 """
-function distance_correlation(x::ArrayOrStateSpaceSet, y::ArrayOrStateSpaceSet)
-    return estimate(DistanceCorrelation(), x, y)
-end
+struct DistanceCorrelation <: CorrelationMeasure end
 
-function distance_correlation(x::ArrayOrStateSpaceSet, y::ArrayOrStateSpaceSet,
-        z::ArrayOrStateSpaceSet)
-    return estimate(DistanceCorrelation(), x, y, z)
-end
+max_inputs_vars(::DistanceCorrelation) = 3
 
-function estimate(m::DistanceCorrelation, est::Nothing, args...)
-    return estimate(m, args...)
+function association(m::DistanceCorrelation, est::Nothing, args...)
+    return association(m, args...)
 end
 
 # Common interface for higher-level methods.
-function estimate(measure::DistanceCorrelation, X, Y)
+function association(measure::DistanceCorrelation, X, Y)
     # TODO: Future optimization: this could be quicker if we only compute distances once
     # for X and once for Y. Currently, they are computed twice each.
     𝒱ₙ²xy = distance_covariance(X, Y)
@@ -216,7 +195,7 @@ end
 
 
 # Common interface for higher-level methods.
-function estimate(measure::DistanceCorrelation, X, Y, Z)
+function association(measure::DistanceCorrelation, X, Y, Z)
     Lx, Ly, Lz = length(X), length(Y), length(Z)
     Lx == Ly == Lz || throw(ArgumentError("Input X, Y and Z must have same lengths."))
     N = Lx
diff --git a/src/methods/correlation/partial_correlation.jl b/src/methods/correlation/partial_correlation.jl
index 56183d778..98eddd8ce 100644
--- a/src/methods/correlation/partial_correlation.jl
+++ b/src/methods/correlation/partial_correlation.jl
@@ -1,4 +1,3 @@
-export partial_correlation
 export PartialCorrelation
 
 """
@@ -9,9 +8,9 @@ variables removed.
 
 ## Usage
 
+- Use with [`association`](@ref) to compute the raw partial correlation coefficient.
 - Use with [`independence`](@ref) to perform a formal hypothesis test for
-    conditional dependence.
-- Use with [`partial_correlation`](@ref) to compute the raw correlation coefficient.
+    correlated-based conditional independence.
 
 ## Description
 
@@ -38,28 +37,18 @@ In practice, we compute the estimate
 
 where ``\\hat{P} = \\hat{\\Sigma}^{-1}`` is the sample precision matrix.
 """
-struct PartialCorrelation <: AssociationMeasure end
+struct PartialCorrelation <: CorrelationMeasure end
 
 min_inputs_vars(::PartialCorrelation) = 3
 max_inputs_vars(::PartialCorrelation) = Inf
 
-"""
-    partial_correlation(x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet,
-        z::VectorOrStateSpaceSet...)
-
-Compute the [`PartialCorrelation`](@ref) between `x` and `y`, given `z`.
-"""
-function partial_correlation(x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet, z::ArrayOrStateSpaceSet...)
-    return estimate(PartialCorrelation(), x, y, z...)
-end
-
 # Compatibility with `independence`
-function estimate(::PartialCorrelation, x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet,
+function association(::PartialCorrelation, x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet,
         conds::ArrayOrStateSpaceSet...)
     X, Y, Z = construct_partialcor_datasets(x, y, conds...)
     D = StateSpaceSet(X, Y, Z)
-    cov = fastcov(D)
-    precision_matrix = invert_cov(cov)
+    cov_matrix = cov(D)
+    precision_matrix = invert_cov(cov_matrix)
     return partial_correlation_from_precision(precision_matrix, 1, 2)
 end
 
@@ -72,19 +61,14 @@ function construct_partialcor_datasets(x::VectorOrStateSpaceSet, y::VectorOrStat
     return X, Y, Z
 end
 
-function estimate(measure::PartialCorrelation, est::Nothing, x, y, z)
-    return estimate(measure, x, y, z)
-end
-
-
-function invert_cov(cov::AbstractMatrix)
-    if det(cov) ≈ 0.0
+function invert_cov(cov_matrix::AbstractMatrix)
+    if det(cov_matrix) ≈ 0.0
         # If the determinant of the covariance matrix is zero, then the
         # Moore-Penrose pseudo-inverse is used.
-        rtol = sqrt(eps(real(float(one(eltype(cov))))))
-        return pinv(cov; rtol)
+        rtol = sqrt(eps(real(float(one(eltype(cov_matrix))))))
+        return pinv(cov_matrix; rtol)
     else
-        return inv(cov)
+        return inv(cov_matrix)
     end
 end
 
diff --git a/src/methods/correlation/pearson_correlation.jl b/src/methods/correlation/pearson_correlation.jl
index 20bae6b12..a17891e3a 100644
--- a/src/methods/correlation/pearson_correlation.jl
+++ b/src/methods/correlation/pearson_correlation.jl
@@ -1,5 +1,4 @@
 export PearsonCorrelation
-export pearson_correlation
 
 """
     PearsonCorrelation
@@ -8,8 +7,9 @@ The Pearson correlation of two variables.
 
 ## Usage
 
-- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise dependence.
-- Use with [`pearson_correlation`](@ref) to compute the raw correlation coefficient.
+- Use with [`association`](@ref) to compute the raw Pearson correlation coefficient.
+- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise dependence
+    using the Pearson correlation coefficient.
 
 ## Description
 
@@ -24,20 +24,10 @@ for real-valued random variables ``X`` and ``Y`` with associated samples
 where ``\\bar{x}`` and ``\\bar{y}`` are the means of the observations ``x_k`` and ``y_k``,
 respectively.
 """
-struct PearsonCorrelation <: AssociationMeasure end
-
-"""
-    pearson_correlation(x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet)
-
-Compute the [`PearsonCorrelation`](@ref) between `x` and `y`, which must each be
-1-dimensional.
-"""
-function pearson_correlation(x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet)
-    return estimate(PearsonCorrelation(), x, y)
-end
+struct PearsonCorrelation <: CorrelationMeasure end
 
 # Common interface for higher-level methods.
-function estimate(measure::PearsonCorrelation,
+function association(measure::PearsonCorrelation,
         x::VectorOrStateSpaceSet{1, T},
         y::VectorOrStateSpaceSet{1, T}) where T
     Lx, Ly = length(x), length(y)
@@ -56,10 +46,6 @@ function estimate(measure::PearsonCorrelation,
     return ρ
 end
 
-function estimate(measure::PearsonCorrelation, est::Nothing, x, y)
-    return estimate(measure, x, y)
-end
-
 # Silly, but 1-dimensional StateSpaceSets needs special indexing (because each point is a vector,
 # not a value).
 pt_generator(x::AbstractStateSpaceSet{1}) = (x[1] for x in x)
diff --git a/src/methods/crossmappings/ccm-like/ConvergentCrossMapping.jl b/src/methods/crossmappings/ccm-like/ConvergentCrossMapping.jl
index 728e425b6..9188edd1e 100644
--- a/src/methods/crossmappings/ccm-like/ConvergentCrossMapping.jl
+++ b/src/methods/crossmappings/ccm-like/ConvergentCrossMapping.jl
@@ -8,12 +8,23 @@ export ConvergentCrossMapping, CCM
     ConvergentCrossMapping(; d::Int = 2, τ::Int = -1, w::Int = 0,
         f = Statistics.cor, embed_warn = true)
 
-The convergent [cross mapping](@ref cross_mapping_api) (CCM) measure [Sugihara2012](@cite)).
+The convergent cross mapping measure [Sugihara2012](@cite).
 
-Specifies embedding dimension `d`, embedding lag `τ` to be used, as described below,
-with [`predict`](@ref) or [`crossmap`](@ref). The Theiler window `w` controls how many
-temporal neighbors are excluded during neighbor searches (`w = 0` means that only the
-point itself is excluded).
+## Usage
+
+- Use with [`association`](@ref) together with a [`CrossmapEstimator`](@ref) to compute the 
+    cross-map correlation between input variables.
+
+## Compatible estimators
+
+- [`RandomSegment`](@ref)
+- [`RandomVectors`](@ref)
+- [`ExpandingSegment`](@ref)
+
+## Description
+
+The Theiler window `w` controls how many temporal neighbors are excluded during neighbor 
+searches (`w = 0` means that only the point itself is excluded).
 `f` is a function that computes the agreement between observations and
 predictions (the default, `f = Statistics.cor`, gives the Pearson correlation
 coefficient).
@@ -35,6 +46,15 @@ With this convention, `τ < 0` implies "past/present values of source used to pr
 target", and `τ > 0` implies "future/present values of source used to predict target".
 The latter case may not be meaningful for many applications, so by default, a warning
 will be given if `τ > 0` (`embed_warn = false` turns off warnings).
+
+## Estimation
+
+- [Example 1](@ref example_ConvergentCrossMapping_RandomVectors). 
+    Estimation with [`RandomVectors`](@ref) estimator.
+- [Example 2](@ref example_ConvergentCrossMapping_RandomSegment). 
+    Estimation with [`RandomSegment`](@ref) estimator.
+- [Example 3](@ref example_ConvergentCrossMapping_reproducing_sugihara): Reproducing 
+    figures from [Sugihara2012](@citet).
 """
 Base.@kwdef struct ConvergentCrossMapping <: CrossmapMeasure
     d::Int = 2
@@ -45,14 +65,14 @@ Base.@kwdef struct ConvergentCrossMapping <: CrossmapMeasure
 end
 const CCM = ConvergentCrossMapping
 
-n_neighbors_simplex(measure::ConvergentCrossMapping) = measure.d + 1
-max_segmentlength(measure::ConvergentCrossMapping, x::AbstractVector) =
-    length(x) - measure.d + 1
+n_neighbors_simplex(definition::ConvergentCrossMapping) = definition.d + 1
+max_segmentlength(definition::ConvergentCrossMapping, x::AbstractVector) =
+    length(x) - definition.d + 1
 # TODO: version that takes into consideration prediction lag
 
-function embed(measure::ConvergentCrossMapping, t::AbstractVector, s::AbstractVector)
-    (; d, τ, w, f) = measure
-    if τ > 0 && measure.embed_warn
+function embed(definition::ConvergentCrossMapping, t::AbstractVector, s::AbstractVector)
+    (; d, τ, w, f) = definition
+    if τ > 0 && definition.embed_warn
         @warn """τ > 0. You're using future values of source to predict the target. Turn \
         off this warning by setting `embed_warn = false` in the \
         `PairwiseAsymmetricInference` constructor."""
diff --git a/src/methods/crossmappings/ccm-like/PairwiseAsymmetricInference.jl b/src/methods/crossmappings/ccm-like/PairwiseAsymmetricInference.jl
index bed6b119e..49760f507 100644
--- a/src/methods/crossmappings/ccm-like/PairwiseAsymmetricInference.jl
+++ b/src/methods/crossmappings/ccm-like/PairwiseAsymmetricInference.jl
@@ -2,21 +2,32 @@ import DelayEmbeddings: embed
 using Statistics: cor
 
 export PairwiseAsymmetricInference, PAI
+
 """
     PairwiseAsymmetricInference <: CrossmapMeasure
     PairwiseAsymmetricInference(; d::Int = 2, τ::Int = -1, w::Int = 0,
         f = Statistics.cor, embed_warn = true)
 
-The pairwise asymmetric inference (PAI) [cross mapping](@ref cross_mapping_api)
-measure [McCracken2014](@cite)) is a version of
-[`ConvergentCrossMapping`](@ref) that searches for neighbors in
+The pairwise asymmetric inference (PAI) measure [McCracken2014](@cite)
+is a version of [`ConvergentCrossMapping`](@ref) that searches for neighbors in
 *mixed* embeddings (i.e. both source and target variables included); otherwise, the
 algorithms are identical.
 
-Specifies embedding dimension `d`, embedding lag `τ` to be used, as described below,
-with [`predict`](@ref) or [`crossmap`](@ref). The Theiler window `w` controls how many
-temporal neighbors are excluded during neighbor searches (`w = 0` means that only the
-point itself is excluded).
+## Usage
+
+- Use with [`association`](@ref) to compute the pairwise asymmetric inference measure 
+    between variables.
+
+## Compatible estimators
+
+- [`RandomSegment`](@ref)
+- [`RandomVectors`](@ref)
+- [`ExpandingSegment`](@ref)
+
+## Description
+
+The Theiler window `w` controls how many temporal neighbors are excluded during neighbor 
+searches (`w = 0` means that only the point itself is excluded).
 `f` is a function that computes the agreement between observations and
 predictions (the default, `f = Statistics.cor`, gives the Pearson correlation
 coefficient).
@@ -41,6 +52,15 @@ With this convention, `τ < 0` implies "past/present values of source used to pr
 target", and `τ > 0` implies "future/present values of source used to predict target".
 The latter case may not be meaningful for many applications, so by default, a warning
 will be given if `τ > 0` (`embed_warn = false` turns off warnings).
+
+## Estimation
+
+- [Example 1](@ref example_PairwiseAsymmetricInference_RandomVectors). 
+    Estimation with [`RandomVectors`](@ref) estimator.
+- [Example 2](@ref example_PairwiseAsymmetricInference_RandomSegment). 
+    Estimation with [`RandomSegment`](@ref) estimator.
+- [Example 3](@ref example_PairwiseAsymmetricInference_reproduce_mccracken). Reproducing 
+    McCracken & Weigel's results from the original paper.
 """
 Base.@kwdef struct PairwiseAsymmetricInference <: CrossmapMeasure
     d::Int = 2
@@ -51,16 +71,16 @@ Base.@kwdef struct PairwiseAsymmetricInference <: CrossmapMeasure
 end
 const PAI = PairwiseAsymmetricInference
 
-n_neighbors_simplex(measure::PairwiseAsymmetricInference) =
-    (measure.d + 1) + 1 # one extra coordinate included, due to the inclusion of the target.
-max_segmentlength(measure::PairwiseAsymmetricInference, x::AbstractVector) =
-    length(x) - measure.d + 1
+n_neighbors_simplex(definition::PairwiseAsymmetricInference) =
+    (definition.d + 1) + 1 # one extra coordinate included, due to the inclusion of the target.
+max_segmentlength(definition::PairwiseAsymmetricInference, x::AbstractVector) =
+    length(x) - definition.d + 1
 # TODO: version that takes into consideration prediction lag
 
-function embed(measure::PairwiseAsymmetricInference, t::AbstractVector, s::AbstractVector)
-    (; d, τ, w) = measure
+function embed(definition::PairwiseAsymmetricInference, t::AbstractVector, s::AbstractVector)
+    (; d, τ, w) = definition
     @assert τ != 0
-    if τ > 0 && measure.embed_warn
+    if τ > 0 && definition.embed_warn
         @warn """τ > 0. You're using future values of source to predict the target. Turn \
         off this warning by setting `embed_warn = false` in the \
         `PairwiseAsymmetricInference` constructor."""
@@ -70,7 +90,7 @@ function embed(measure::PairwiseAsymmetricInference, t::AbstractVector, s::Abstr
     # - Positive τ := embedding vectors (s(i), t(i), t(i+1), ...), "future predicts present"
     τs = [0; reverse(range(start=0, step=τ, stop=(d-1)*τ))]
     js = [2; repeat([1], d)]
-    idxs_S̄ = 1:measure.d
-    idx_t̄ = measure.d + 1 # column index of time series to be predict
+    idxs_S̄ = 1:definition.d
+    idx_t̄ = definition.d + 1 # column index of time series to be predict
     return genembed(StateSpaceSet(t, s), τs, js), idx_t̄, idxs_S̄
 end
diff --git a/src/methods/crossmappings/ccm-like/common.jl b/src/methods/crossmappings/ccm-like/common.jl
index 7e609755b..c39b27c9f 100644
--- a/src/methods/crossmappings/ccm-like/common.jl
+++ b/src/methods/crossmappings/ccm-like/common.jl
@@ -3,6 +3,7 @@ using StateSpaceSets: dimension
 using StateSpaceSets: AbstractStateSpaceSet
 
 CCMLike = Union{ConvergentCrossMapping, PairwiseAsymmetricInference}
+export predict, crossmap
 
 # -----------------------------------------------------------------------------------------
 # Generic methods that operates on the entire input. Useful for reconstructing figures
@@ -23,50 +24,58 @@ end
 
 # Wrappers for timeseries inputs that ensure embeddings are done correctly.
 # =========================================================================================
-function crossmap(measure::CCMLike, est::CrossmapEstimator, target, source)
-    return last.(predict(measure, est, target, source))
+function crossmap(est::CrossmapEstimator{<:CCMLike}, target, source)
+    return last.(predict(est, target, source))
 end
-function crossmap(measure::CCMLike, est::CrossmapEstimator{<:Integer}, target, source)
-    return last(predict(measure, est, target, source))
+function crossmap(est::CrossmapEstimator{<:CCMLike, <:Integer}, target, source)
+    return last(predict(est, target, source))
 end
 
-function predict(measure::CCMLike, est::CrossmapEstimator, target::AbstractVector, source::AbstractVector)
-    emb, idx_t̄, idxs_S̄ = embed(measure, target, source)
+function predict(est::CrossmapEstimator{<:CCMLike}, target::AbstractVector, source::AbstractVector)
+    emb, idx_t̄, idxs_S̄ = embed(est.definition, target, source)
     S̄ = emb[:, idxs_S̄]
     t̄ = emb[:, idx_t̄]
-    return predict(measure, est, t̄, S̄)
+    return predict(est, t̄, S̄)
 end
 
 # The following methods assume pre-embedded data.
 # =========================================================================================
-function predict(measure::CCMLike, est::CrossmapEstimator, target::AbstractVector, source::AbstractStateSpaceSet)
+function predict(est::CrossmapEstimator{<:CCMLike}, target::AbstractVector, source::AbstractStateSpaceSet)
     # Ensure equal-length input
-    input_check(measure, target, source)
+    input_check(est.definition, target, source)
 
     n_libraries = length(est.libsizes)
     ρs = Vector{Tuple{Vector{<:Real}, <:Real}}(undef, n_libraries)
     for i = 1:n_libraries
         # Randomly or deterministically determined indices for the library points.
-        inds = library_indices(measure, est, i, target, source)
+        inds = library_indices(est, i, target, source)
         # Predict on the library (i.e. on selected subset of points).
-        ρs[i] = subset_predict(measure, target, source, inds)
+        ρs[i] = subset_predict(est, target, source, inds)
     end
     return ρs
 end
 
-function predict(measure::CCMLike, est::CrossmapEstimator{<:Integer}, target::AbstractVector, source::AbstractStateSpaceSet)
+function predict(
+        est::CrossmapEstimator{<:CCMLike, <:Integer}, 
+        target::AbstractVector, 
+        source::AbstractStateSpaceSet
+    )
+    definition = est.definition
+
     # Ensure equal-length input
-    input_check(measure, target, source)
-    inds = library_indices(measure, est, 1, target, source)
-    ρ = subset_predict(measure, target, source, inds)
+    input_check(definition, target, source)
+    inds = library_indices(est, 1, target, source)
+    ρ = subset_predict(est, target, source, inds)
     return ρ
 end
 
-function subset_predict(measure::CCMLike, target, source, inds)
+function subset_predict(est::CrossmapEstimator{<:CCMLike}, target, source, inds)
+    definition = est.definition
+
     S̄ = @views source[inds]
     t̄ = @views target[inds]
-    t̂ₛ = predict(measure, t̄, S̄)
-    ρ = measure.f(t̄, t̂ₛ)
+    t̂ₛ = predict(definition, t̄, S̄)
+    ρ = definition.f(t̄, t̂ₛ)
     return t̂ₛ, ρ
 end
 
@@ -78,38 +87,28 @@ that is being applied to `target` and `source`.
 """
 function library_indices end
 
-function library_indices(measure::CCMLike, est::RandomVectors, i::Int, target, args...)
+function library_indices(est::RandomVectors{<:CCMLike}, i::Int, target, args...)
     N = length(target)
     L = est.libsizes[i]
-    return library_indices(measure, est, N, L)
+    return library_indices(est, N, L)
 end
-function library_indices(measure::CCMLike, est::RandomVectors, N::Int, L::Int)
+function library_indices(est::RandomVectors{<:CCMLike}, N::Int, L::Int)
     return sample(est.rng, 1:N, L; replace = est.replace)
 end
 
-function library_indices(measure::CCMLike, est::RandomSegment, i::Int, target, args...)
+function library_indices(est::RandomSegment{<:CCMLike}, i::Int, target, args...)
     N = length(target)
     L = est.libsizes[i]
-    Lmax = max_segmentlength(measure, target)
+    Lmax = max_segmentlength(est.definition, target)
     L <= Lmax ||
         throw(ArgumentError("L ($L) > Lmax ($Lmax). Use a smaller segment length (some points are lost when embedding)."))
-    library_indices(measure, est, N, L)
+    return library_indices(est, N, L)
 end
-function library_indices(measure::CCMLike, est::RandomSegment, N::Int, L::Int)
+function library_indices(est::RandomSegment{<:CCMLike}, N::Int, L::Int)
     startidx = sample(est.rng, 1:(N - L)) # random segment starting point
     return startidx:startidx+L-1
 end
 
-function library_indices(measure::CCMLike, est::ExpandingSegment, i::Int, target, args...)
-    Lmax = max_segmentlength(measure, target)
-    L = est.libsizes[i]
-    L <= Lmax || throw(ArgumentError("L ($L) > Lmax ($Lmax). Use a smaller segment length (some points are lost when embedding)."))
-    return library_indices(measure, est, length(target), L)
-end
-function library_indices(measure::CCMLike, est::ExpandingSegment, N::Int, L::Int)
-    return 1:L
-end
-
 function input_check(measure::CCMLike, args...)
     ns = length.(args)
     all(ns .== maximum(ns)) || throw(ArgumentError("""\
@@ -121,27 +120,34 @@ end
 # # -----------------------------------------------------------------------------------------
 # # Ensemble analysis. Repeats an analysis ensemble.nreps times. Takes care of the embedding.
 # # -----------------------------------------------------------------------------------------
-function crossmap(ensemble::Ensemble{<:CCMLike, <:CrossmapEstimator{<:Integer, R}},
+function crossmap(ensemble::Ensemble{<:CrossmapEstimator{<:CCMLike, <:Integer, R}},
         target::AbstractVector, source::AbstractVector) where R
-    (; measure, est, nreps) = ensemble
-    emb, idx_t̄, idxs_S̄ = embed(measure, target, source)
+    (; est, nreps) = ensemble
+    definition = est.definition
+
+    emb, idx_t̄, idxs_S̄ = embed(definition, target, source)
     S̄ = emb[:, idxs_S̄]
     t̄ = emb[:, idx_t̄]
 
     ρs = zeros(nreps)
     for i = 1:nreps
-        inds = library_indices(measure, est, 1, t̄, S̄)
-        ρ = last(subset_predict(measure, target, S̄, inds))
+        inds = library_indices(est, 1, t̄, S̄)
+        ρ = last(subset_predict(est, target, S̄, inds))
         ρs[i] = ρ
     end
     return ρs
 end
 
-function crossmap(ensemble::Ensemble{<:CCMLike, <:CrossmapEstimator},
-        target::AbstractVector, source::AbstractVector)
-    (; measure, est, nreps) = ensemble
+function crossmap(
+        ensemble::Ensemble{<:CrossmapEstimator},
+        target::AbstractVector, 
+        source::AbstractVector
+    )
+    (; est, nreps) = ensemble
+    definition = est.definition
+
     libsizes = est.libsizes
-    emb, idx_t̄, idxs_S̄ = embed(measure, target, source)
+    emb, idx_t̄, idxs_S̄ = embed(definition, target, source)
     S̄ = emb[:, idxs_S̄]
     t̄ = emb[:, idx_t̄]
     N = length(t̄)
@@ -149,8 +155,8 @@ function crossmap(ensemble::Ensemble{<:CCMLike, <:CrossmapEstimator},
     ρs = [zeros(nreps) for j in eachindex(libsizes)]
     for (j, L) in enumerate(libsizes)
         for i = 1:nreps
-            inds = library_indices(measure, est, N, L)
-            ρs[j][i] = last(subset_predict(measure, target, S̄, inds))
+            inds = library_indices(est, N, L)
+            ρs[j][i] = last(subset_predict(est, target, S̄, inds))
         end
     end
     return ρs
diff --git a/src/methods/crossmappings/crossmappings.jl b/src/methods/crossmappings/crossmappings.jl
index 0b353044e..a67ad70ce 100644
--- a/src/methods/crossmappings/crossmappings.jl
+++ b/src/methods/crossmappings/crossmappings.jl
@@ -6,11 +6,8 @@ using Neighborhood: bulksearch
 using StaticArrays: MVector
 using StateSpaceSets: AbstractStateSpaceSet
 
-export predict
-export crossmap
 export CrossmapMeasure
 export CrossmapEstimator
-export Ensemble
 
 """
     CrossmapMeasure <: AssociationMeasure
@@ -19,14 +16,23 @@ The supertype for all cross-map measures. Concrete subtypes are
 
 - [`ConvergentCrossMapping`](@ref), or [`CCM`](@ref) for short.
 - [`PairwiseAsymmetricInference`](@ref), or [`PAI`](@ref) for short.
+
+See also: [`CrossmapEstimator`](@ref).
 """
 abstract type CrossmapMeasure <: AssociationMeasure end
 
 """
-    CrossmapEstimator{LIBSIZES, RNG}
+    CrossmapEstimator{M<:CrossmapMeasure, LIBSIZES, RNG}
+
+The abstract supertype for all cross-map estimators.
+
+## Concrete subtypes
 
-A parametric supertype for all cross-map estimators, which are used with [`predict`](@ref) and
-[`crossmap`](@ref).
+- [`RandomVectors`](@ref)
+- [`RandomSegment`](@ref)
+- [`ExpandingSegment`](@ref)
+
+## Description
 
 Because the type of the library may differ between estimators, and because RNGs from
 different packages may be used, subtypes must implement the `LIBSIZES` and `RNG`
@@ -35,25 +41,25 @@ type parameters.
 For efficiency purposes, subtypes may contain mutable containers that can be re-used
 for ensemble analysis (see [`Ensemble`](@ref)).
 
-## Libraries
+!!! info "Libraries"
 
-A cross-map estimator uses the concept of "libraries". A library is essentially just
-a reference to a set of points, and usually, a library refers to *indices* of points,
-not the actual points themselves.
+    A cross-map estimator uses the concept of "libraries". A library is essentially just
+    a reference to a set of points, and usually, a library refers to *indices* of points,
+    not the actual points themselves.
 
-For example, for timeseries, `RandomVectors(libsizes = 50:25:100)` produces three
-separate libraries, where the first contains 50 randomly selected time indices,
-the second contains 75 randomly selected time indices, and the third contains
-100 randomly selected time indices. This of course assumes that all quantities involved
-can be indexed using the same time indices, meaning that the concept of "library"
-only makes sense *after* relevant quantities have been *jointly* embedded, so that they
-can be jointly indexed. For non-instantaneous prediction, the maximum possible library
-size shrinks with the magnitude of the index/time-offset for the prediction.
+    For example, for timeseries, `RandomVectors(libsizes = 50:25:100)` produces three
+    separate libraries, where the first contains 50 randomly selected time indices,
+    the second contains 75 randomly selected time indices, and the third contains
+    100 randomly selected time indices. This of course assumes that all quantities involved
+    can be indexed using the same time indices, meaning that the concept of "library"
+    only makes sense *after* relevant quantities have been *jointly* embedded, so that they
+    can be jointly indexed. For non-instantaneous prediction, the maximum possible library
+    size shrinks with the magnitude of the index/time-offset for the prediction.
 
-For spatial analyses (not yet implemented), indices could be more complex and involve
-multi-indices.
+    For spatial analyses (not yet implemented), indices could be more complex and involve
+    multi-indices.
 """
-abstract type CrossmapEstimator{LIBSIZES, RNG} end
+abstract type CrossmapEstimator{M, LIBSIZES, RNG} end
 
 segment_length_error() = "Segment lengths can be inferred only if both a cross-map " *
     "measure and an input time series is provided. " *
@@ -85,9 +91,9 @@ This produces `emb`, a `D`-dimensional `StateSpaceSet` where
 function embed(measure::CrossmapMeasure, args...) end
 
 """
-    crossmap(measure::CrossmapMeasure, t::AbstractVector, s::AbstractVector) → ρ::Real
-    crossmap(measure::CrossmapMeasure, est, t::AbstractVector, s::AbstractVector) → ρ::Vector
-    crossmap(measure::CrossmapMeasure, t̄::AbstractVector, S̄::AbstractStateSpaceSet) → ρ
+    crossmap(measure::CrossmapEstimator, t::AbstractVector, s::AbstractVector) → ρ::Real
+    crossmap(measure::CrossmapEstimator, est, t::AbstractVector, s::AbstractVector) → ρ::Vector
+    crossmap(measure::CrossmapEstimator, t̄::AbstractVector, S̄::AbstractStateSpaceSet) → ρ
 
 Compute the cross map estimates between between raw time series `t` and `s` (and return
 the real-valued cross-map statistic `ρ`). If a [`CrossmapEstimator`](@ref) `est` is provided,
@@ -107,8 +113,8 @@ function crossmap end
 # implementations go in a relevant `measures/CustomMeasure.jl` file.
 
 """
-    predict(measure::CrossmapMeasure, t::AbstractVector, s::AbstractVector) → t̂ₛ, t̄, ρ
-    predict(measure::CrossmapMeasure, t̄::AbstractVector, S̄::AbstractStateSpaceSet) → t̂ₛ
+    predict(measure::CrossmapEstimator, t::AbstractVector, s::AbstractVector) → t̂ₛ, t̄, ρ
+    predict(measure::CrossmapEstimator, t̄::AbstractVector, S̄::AbstractStateSpaceSet) → t̂ₛ
 
 Perform point-wise cross mappings between source embeddings and target time series
 according to the algorithm specified by the given cross-map `measure` (e.g.
@@ -176,29 +182,13 @@ function predict(measure::CrossmapMeasure, t::AbstractVector, S̄::AbstractState
     return t̂ₛ
 end
 
-"""
-    Ensemble(; measure::CrossmapMeasure, est::CrossmapEstimator, nreps::Int = 100)
-
-A directive to compute an ensemble analysis, where `measure` (e.g.
-[`ConvergentCrossMapping`](@ref)) is computed
-using the given estimator `est` (e.g. [`RandomVectors`](@ref))
-"""
-Base.@kwdef struct Ensemble{M, E} # todo: perhaps just use a more general Ensemble?
-    measure::M
-    est::E
-    nreps::Int = 100
-    function Ensemble(measure::M, est::E; nreps = 100) where {M, E}
-        new{M, E}(measure, est, nreps)
-    end
-end
-
 include("estimators/estimators.jl")
 include("ccm-like/ccm-like.jl")
 
 
 # Internal methods for compatibility with `independence`
-function estimate(measure::CrossmapMeasure, args...)
-    return crossmap(measure, args...)
+function association(est::CrossmapEstimator{<:CrossmapMeasure}, args...)
+    return crossmap(est, args...)
 end
 
 # Experimental
diff --git a/src/methods/crossmappings/estimators/Ensemble.jl b/src/methods/crossmappings/estimators/Ensemble.jl
new file mode 100644
index 000000000..cce0ad8cf
--- /dev/null
+++ b/src/methods/crossmappings/estimators/Ensemble.jl
@@ -0,0 +1,16 @@
+export Ensemble
+
+"""
+    Ensemble(; measure::CrossmapMeasure, est::CrossmapEstimator, nreps::Int = 100)
+
+A directive to compute an ensemble analysis, where `measure` (e.g.
+[`ConvergentCrossMapping`](@ref)) is computed
+using the given estimator `est` (e.g. [`RandomVectors`](@ref))
+"""
+Base.@kwdef struct Ensemble{M <: CrossmapEstimator{<:CrossmapMeasure}} # todo: perhaps just use a more general Ensemble?
+    est::M
+    nreps::Int = 100
+    function Ensemble(est::M; nreps = 100) where {M}
+        new{M}(est, nreps)
+    end
+end
\ No newline at end of file
diff --git a/src/methods/crossmappings/estimators/ExpandingSegment.jl b/src/methods/crossmappings/estimators/ExpandingSegment.jl
index cf5aec926..245256a83 100644
--- a/src/methods/crossmappings/estimators/ExpandingSegment.jl
+++ b/src/methods/crossmappings/estimators/ExpandingSegment.jl
@@ -2,15 +2,31 @@ export ExpandingSegment
 
 """
     ExpandingSegment <: CrossmapEstimator
-    ExpandingSegment(; libsizes::Int, rng = Random.default_rng())
+    ExpandingSegment(definition::CrossmapMeasure; libsizes, rng = Random.default_rng())
 
-Indicatates that cross mapping is performed on a contiguous time series segment/window,
-starting from the first available data point up to the `L`th data point.
+Cross map *once* over `N = length(libsizes)` different "point libraries", where 
+point indices are selected as time-contiguous segments/windows.
+
+This is the method from [Sugihara2012](@cite). See [`CrossmapEstimator`](@ref) for an in-depth 
+explanation of what "library" means in this context.
+
+## Description
+
+Point index segments are selected as first available data point index, up to the `L`th data point index.
+This results in one library of contiguous time indices per `L ∈ libsizes`.
 
 If used in an ensemble setting, the estimator is applied to time indices `Lmin:step:Lmax`
 of the joint embedding.
+
+## Returns
+
+The return type when used with [`association`](@ref) depends on the type of `libsizes`.
+- If `libsizes` is an `Int` (a single library), then a single cross-map estimate is returned.
+- If `libsizes` is an `AbstractVector{Int}` (multiple libraries), then a vector of cross-map
+    estimates is returned --- one per library.
 """
-struct ExpandingSegment{I, R} <: CrossmapEstimator{I, R}
+struct ExpandingSegment{M <: CrossmapMeasure, I, R} <: CrossmapEstimator{M, I, R}
+    definition::M
     libsizes::I
     # For other estimators, `rng` is used for ensemble analyses. For `ExpandingSegment`,
     # an ensemble doesn't make sense, because there is no random sampling involved.
@@ -18,7 +34,17 @@ struct ExpandingSegment{I, R} <: CrossmapEstimator{I, R}
     # it here for convenience.
     rng::R
 
-    function ExpandingSegment(; libsizes::I, rng::R = Random.default_rng()) where {I, R}
-        new{I, R}(libsizes, rng)
+    function ExpandingSegment(definition::M; libsizes::I, rng::R = Random.default_rng()) where {M <: CrossmapMeasure, I, R}
+        new{M, I, R}(definition, libsizes, rng)
     end
 end
+
+function library_indices(est::ExpandingSegment, i::Int, target, args...)
+    Lmax = max_segmentlength(est.definition, target)
+    L = est.libsizes[i]
+    L <= Lmax || throw(ArgumentError("L ($L) > Lmax ($Lmax). Use a smaller segment length (some points are lost when embedding)."))
+    return library_indices(est, length(target), L)
+end
+function library_indices(est::ExpandingSegment, N::Int, L::Int)
+    return 1:L
+end
diff --git a/src/methods/crossmappings/estimators/RandomSegment.jl b/src/methods/crossmappings/estimators/RandomSegment.jl
index ef529a784..e0b97ddfa 100644
--- a/src/methods/crossmappings/estimators/RandomSegment.jl
+++ b/src/methods/crossmappings/estimators/RandomSegment.jl
@@ -4,17 +4,56 @@ export RandomSegment
 
 """
     RandomSegment <: CrossmapEstimator
-    RandomSegment(; libsizes::Int, rng = Random.default_rng())
+    RandomSegment(definition::CrossmapMeasure; libsizes::Int, rng = Random.default_rng())
 
-Indicatates that cross mapping is performed on contiguous time series
-segments/windows of length `L` with a randomly selected starting point.
+Cross map *once* over `N = length(libsizes)` different "point libraries", where 
+point indices are selected as time-contiguous segments with random starting points.
 
-This is method 2 from [Luo2015](@cite).
+This is method 2 from [Luo2015](@citet). See [`CrossmapEstimator`](@ref) for an in-depth 
+explanation of what "library" means in this context.
+
+## Description
+
+The cardinality of the point index segments are given by `libsizes`. One segment 
+with a randomly selected starting point is picked per `L ∈ libsizes`, and the `i`-th 
+point index segment has cardinality `k = libsizes[i]`. 
+
+The starting point for each library is selected independently of other libraries.
+A user-specified `rng` may be specified for reproducibility. If the time series
+you're cross mapping between have length `M`, and `Lᵢ < M` for any `Lᵢ ∈ libsizes`,
+then an error will be thrown.
+
+A user-specified `rng` may be specified for reproducibility.
+
+## Returns
+
+The return type when used with [`association`](@ref) depends on the type of `libsizes`.
+- If `libsizes` is an `Int` (a single library), then a single cross-map estimate is returned.
+- If `libsizes` is an `AbstractVector{Int}` (multiple libraries), then a vector of cross-map
+    estimates is returned --- one per library.
+
+See also: [`CrossmapEstimator`](@ref).
 """
-struct RandomSegment{I, R} <: CrossmapEstimator{I, R}
+struct RandomSegment{M <: CrossmapMeasure, I, R} <: CrossmapEstimator{M, I, R}
+    definition::M
     libsizes::I
     rng::R
-    function RandomSegment(; libsizes::I, rng::R = Random.default_rng()) where {I, R}
-        new{I, R}(libsizes, rng)
+    function RandomSegment(definition::M; libsizes::I, rng::R = Random.default_rng()) where {M <: CrossmapMeasure, I, R}
+        new{M, I, R}(definition, libsizes, rng)
     end
 end
+
+function library_indices(est::RandomSegment, i::Int, target, args...)
+    definition = est.definition
+    N = length(target)
+    L = est.libsizes[i]
+    Lmax = max_segmentlength(definition, target)
+    L <= Lmax ||
+        throw(ArgumentError("L ($L) > Lmax ($Lmax). Use a smaller segment length (some points are lost when embedding)."))
+    return library_indices(definition, est, N, L)
+end
+
+function library_indices(est::RandomSegment, N::Int, L::Int)
+    startidx = sample(est.rng, 1:(N - L)) # random segment starting point
+    return startidx:startidx+L-1
+end
\ No newline at end of file
diff --git a/src/methods/crossmappings/estimators/RandomVectors.jl b/src/methods/crossmappings/estimators/RandomVectors.jl
index 973cd6fdf..8dbfa616f 100644
--- a/src/methods/crossmappings/estimators/RandomVectors.jl
+++ b/src/methods/crossmappings/estimators/RandomVectors.jl
@@ -4,23 +4,53 @@ export RandomVectors
 
 """
     RandomVectors <: CrossmapEstimator
-    RandomVectors(; libsizes, replace = false, rng = Random.default_rng())
+    RandomVectors(definition::CrossmapMeasure; libsizes, replace = false, 
+        rng = Random.default_rng())
 
-Cross-map over `N` different libraries, where `N = length(libsizes)`, and the `i`-th
-library has cardinality `k = libsizes[i]`. Points within each library are randomly
-selected, independently of other libraries, and `replace` controls whether or not to
-sample with replacement. A user-specified `rng` may be specified for reproducibility.
+Cross map *once* over  `N = length(libsizes)` different "point libraries", where 
+point indices are selected randomly (not considering time ordering). 
 
-This is method 3 from [Luo2015](@citet).
+This is method 3 from [Luo2015](@citet). See [`CrossmapEstimator`](@ref) for an in-depth 
+explanation of what "library" means in this context.
+
+## Description
+
+The cardinality of the point libraries are given by `libsizes`. One set of 
+random point indices is selected per `L ∈ libsizes`, and the `i`-th 
+library has cardinality `k = libsizes[i]`. 
+
+Point indices within each library are randomly selected, independently of other libraries.
+A user-specified `rng` may be specified for reproducibility. The `replace` argument
+controls whether sampling is done with or without replacement. If the time series
+you're cross mapping between have length `M`, and `Lᵢ < M` for any `Lᵢ ∈ libsizes`,
+then you must set `replace = true`.
+
+## Returns
+
+The return type when used with [`association`](@ref) depends on the type of `libsizes`.
+- If `libsizes` is an `Int` (a single library), then a single cross-map estimate is returned.
+- If `libsizes` is an `AbstractVector{Int}` (multiple libraries), then a vector of cross-map
+    estimates is returned --- one per library.
 
 See also: [`CrossmapEstimator`](@ref).
 """
-struct RandomVectors{I, R} <: CrossmapEstimator{I, R}
+struct RandomVectors{M <: CrossmapMeasure, I, R} <: CrossmapEstimator{M, I, R}
+    definition::M
     libsizes::I
     rng::R
     replace::Bool
-    function RandomVectors(; libsizes::I, replace::Bool = false,
-            rng::R = Random.default_rng()) where {I,R}
-        new{I, R}(libsizes, rng, replace)
+    function RandomVectors(definition::M; libsizes::I, replace::Bool = false,
+            rng::R = Random.default_rng()) where {M<:CrossmapMeasure, I, R}
+        new{M, I, R}(definition, libsizes, rng, replace)
     end
 end
+
+function library_indices(est::RandomVectors, i::Int, target, args...)
+    N = length(target)
+    L = est.libsizes[i]
+    return library_indices(measure, est, N, L)
+end
+
+function library_indices(est::RandomVectors, N::Int, L::Int)
+    return sample(est.rng, 1:N, L; replace = est.replace)
+end
diff --git a/src/methods/crossmappings/estimators/estimators.jl b/src/methods/crossmappings/estimators/estimators.jl
index fbedcc145..5c97d0a9b 100644
--- a/src/methods/crossmappings/estimators/estimators.jl
+++ b/src/methods/crossmappings/estimators/estimators.jl
@@ -1,3 +1,4 @@
+include("Ensemble.jl")
 include("ExpandingSegment.jl")
 include("RandomSegment.jl")
 include("RandomVectors.jl")
diff --git a/src/methods/crossmappings/measures/ccm-like/ConvergentCrossMapping.jl b/src/methods/crossmappings/measures/ccm-like/ConvergentCrossMapping.jl
deleted file mode 100644
index 63863135a..000000000
--- a/src/methods/crossmappings/measures/ccm-like/ConvergentCrossMapping.jl
+++ /dev/null
@@ -1,69 +0,0 @@
-import DelayEmbeddings: embed
-using DelayEmbeddings: genembed
-using Statistics: cor
-
-export ConvergentCrossMapping, CCM
-"""
-    ConvergentCrossMapping <: CrossmapMeasure
-    ConvergentCrossMapping(; d::Int = 2, τ::Int = -1, w::Int = 0,
-        f = Statistics.cor, embed_warn = true)
-
-The convergent cross mapping measure [Sugihara2012](@cite).
-
-Specifies embedding dimension `d`, embedding lag `τ` to be used, as described below,
-with [`predict`](@ref) or [`crossmap`](@ref). The Theiler window `w` controls how many
-temporal neighbors are excluded during neighbor searches (`w = 0` means that only the
-point itself is excluded).
-`f` is a function that computes the agreement between observations and
-predictions (the default, `f = Statistics.cor`, gives the Pearson correlation
-coefficient).
-
-## Embedding
-
-Let `S(i)` be the source time series variable and `T(i)` be the target time series variable.
-This version produces regular embeddings with fixed dimension `d` and embedding lag
-`τ` as follows:
-
-```math
-( S(i), S(i+\\tau), S(i+2\\tau), \\ldots, S(i+(d-1)\\tau, T(i))_{i=1}^{N-(d-1)\\tau}.
-```
-
-In this joint embedding, neighbor searches are performed in the subspace spanned by
-the first `D-1` variables, while the last (`D`-th) variable is to be predicted.
-
-With this convention, `τ < 0` implies "past/present values of source used to predict
-target", and `τ > 0` implies "future/present values of source used to predict target".
-The latter case may not be meaningful for many applications, so by default, a warning
-will be given if `τ > 0` (`embed_warn = false` turns off warnings).
-"""
-Base.@kwdef struct ConvergentCrossMapping <: CrossmapMeasure
-    d::Int = 2
-    τ::Int = -1
-    w::Int = 0
-    f::Function = cor
-    embed_warn::Bool = true
-end
-const CCM = ConvergentCrossMapping
-
-n_neighbors_simplex(measure::ConvergentCrossMapping) = measure.d + 1
-max_segmentlength(measure::ConvergentCrossMapping, x::AbstractVector) =
-    length(x) - measure.d + 1
-# TODO: version that takes into consideration prediction lag
-
-function embed(measure::ConvergentCrossMapping, t::AbstractVector, s::AbstractVector)
-    (; d, τ, w, f) = measure
-    if τ > 0 && measure.embed_warn
-        @warn """τ > 0. You're using future values of source to predict the target. Turn \
-        off this warning by setting `embed_warn = false` in the \
-        `PairwiseAsymmetricInference` constructor."""
-    end
-    @assert τ != 0
-    # Convention:
-    # - Negative τ := embedding vectors (s(i), s(i-1), ..., t(i)), "past predicts present"
-    # - Positive τ := embedding vectors (s(i), s(i+1), ..., t(i)), "future predicts present"
-    τs = [0:τ:(d-1)*τ; 0]
-    js = [repeat([1], d); 2]
-    idxs_S̄ = 1:length(js) - 1
-    idx_t̄ = length(js)# column index of time series to be predict
-    genembed(StateSpaceSet(s, t), τs, js), idx_t̄, idxs_S̄
-end
diff --git a/src/methods/crossmappings/measures/ccm-like/PairwiseAsymmetricInference.jl b/src/methods/crossmappings/measures/ccm-like/PairwiseAsymmetricInference.jl
deleted file mode 100644
index 4114f6d86..000000000
--- a/src/methods/crossmappings/measures/ccm-like/PairwiseAsymmetricInference.jl
+++ /dev/null
@@ -1,75 +0,0 @@
-import DelayEmbeddings: embed
-using Statistics: cor
-
-export PairwiseAsymmetricInference, PAI
-"""
-    PairwiseAsymmetricInference <: CrossmapMeasure
-    PairwiseAsymmetricInference(; d::Int = 2, τ::Int = -1, w::Int = 0,
-        f = Statistics.cor, embed_warn = true)
-
-The pairwise asymmetric inference (PAI) measure [McCracken2014](@cite)
-is a version of [`ConvergentCrossMapping`](@ref) that searches for neighbors in
-*mixed* embeddings (i.e. both source and target variables included); otherwise, the
-algorithms are identical.
-
-Specifies embedding dimension `d`, embedding lag `τ` to be used, as described below,
-with [`predict`](@ref) or [`crossmap`](@ref). The Theiler window `w` controls how many
-temporal neighbors are excluded during neighbor searches (`w = 0` means that only the
-point itself is excluded).
-`f` is a function that computes the agreement between observations and
-predictions (the default, `f = Statistics.cor`, gives the Pearson correlation
-coefficient).
-
-## Embedding
-
-There are many possible ways of defining the embedding for PAI. Currently, we only
-implement the *"add one non-lagged source timeseries to an embedding of the target"*
-approach, which is used as an example in McCracken & Weigel's paper. Specifically:
-Let `S(i)` be the source time series variable and `T(i)` be the target time series variable.
-`PairwiseAsymmetricInference` produces regular embeddings with fixed dimension `d` and
-embedding lag `τ` as follows:
-
-```math
-(S(i), T(i+(d-1)\\tau, \\ldots, T(i+2\\tau), T(i+\\tau), T(i)))_{i=1}^{N-(d-1)\\tau}.
-```
-
-In this joint embedding, neighbor searches are performed in the subspace spanned by
-the first `D` variables, while the last variable is to be predicted.
-
-With this convention, `τ < 0` implies "past/present values of source used to predict
-target", and `τ > 0` implies "future/present values of source used to predict target".
-The latter case may not be meaningful for many applications, so by default, a warning
-will be given if `τ > 0` (`embed_warn = false` turns off warnings).
-"""
-Base.@kwdef struct PairwiseAsymmetricInference <: CrossmapMeasure
-    d::Int = 2
-    τ::Int = -1
-    w::Int = 0
-    f::Function = cor
-    embed_warn::Bool = true
-end
-const PAI = PairwiseAsymmetricInference
-
-n_neighbors_simplex(measure::PairwiseAsymmetricInference) =
-    (measure.d + 1) + 1 # one extra coordinate included, due to the inclusion of the target.
-max_segmentlength(measure::PairwiseAsymmetricInference, x::AbstractVector) =
-    length(x) - measure.d + 1
-# TODO: version that takes into consideration prediction lag
-
-function embed(measure::PairwiseAsymmetricInference, t::AbstractVector, s::AbstractVector)
-    (; d, τ, w) = measure
-    @assert τ != 0
-    if τ > 0 && measure.embed_warn
-        @warn """τ > 0. You're using future values of source to predict the target. Turn \
-        off this warning by setting `embed_warn = false` in the \
-        `PairwiseAsymmetricInference` constructor."""
-    end
-    # Convention:
-    # - Negative τ := embedding vectors (s(i), t(i), t(i-1), ...), "past predicts present"
-    # - Positive τ := embedding vectors (s(i), t(i), t(i+1), ...), "future predicts present"
-    τs = [0; reverse(range(start=0, step=τ, stop=(d-1)*τ))]
-    js = [2; repeat([1], d)]
-    idxs_S̄ = 1:measure.d
-    idx_t̄ = measure.d + 1 # column index of time series to be predict
-    return genembed(StateSpaceSet(t, s), τs, js), idx_t̄, idxs_S̄
-end
diff --git a/src/methods/crossmappings/measures/ccm-like/common.jl b/src/methods/crossmappings/measures/ccm-like/common.jl
deleted file mode 100644
index 7e609755b..000000000
--- a/src/methods/crossmappings/measures/ccm-like/common.jl
+++ /dev/null
@@ -1,157 +0,0 @@
-using StatsBase: sample, sample!
-using StateSpaceSets: dimension
-using StateSpaceSets: AbstractStateSpaceSet
-
-CCMLike = Union{ConvergentCrossMapping, PairwiseAsymmetricInference}
-
-# -----------------------------------------------------------------------------------------
-# Generic methods that operates on the entire input. Useful for reconstructing figures
-# from e.g. Sugihara et al.
-# -----------------------------------------------------------------------------------------
-crossmap(measure::CCMLike, target, source) = last(predict(measure, target, source))
-function predict(measure::CrossmapMeasure, target::AbstractVector, source::AbstractVector)
-    emb, idx_t̄, idxs_S̄ = embed(measure, target, source)
-    S̄ = emb[:, idxs_S̄]
-    t̄ = emb[:, idx_t̄]
-    t̄ₛ = predict(measure, t̄, S̄);
-    return t̄ₛ, t̄, measure.f(t̄, t̄ₛ)
-end
-
-# -----------------------------------------------------------------------------------------
-# Estimator-specific implementations. Assumed pre-embedded data.
-# -----------------------------------------------------------------------------------------
-
-# Wrappers for timeseries inputs that ensure embeddings are done correctly.
-# =========================================================================================
-function crossmap(measure::CCMLike, est::CrossmapEstimator, target, source)
-    return last.(predict(measure, est, target, source))
-end
-function crossmap(measure::CCMLike, est::CrossmapEstimator{<:Integer}, target, source)
-    return last(predict(measure, est, target, source))
-end
-
-function predict(measure::CCMLike, est::CrossmapEstimator, target::AbstractVector, source::AbstractVector)
-    emb, idx_t̄, idxs_S̄ = embed(measure, target, source)
-    S̄ = emb[:, idxs_S̄]
-    t̄ = emb[:, idx_t̄]
-    return predict(measure, est, t̄, S̄)
-end
-
-# The following methods assume pre-embedded data.
-# =========================================================================================
-function predict(measure::CCMLike, est::CrossmapEstimator, target::AbstractVector, source::AbstractStateSpaceSet)
-    # Ensure equal-length input
-    input_check(measure, target, source)
-
-    n_libraries = length(est.libsizes)
-    ρs = Vector{Tuple{Vector{<:Real}, <:Real}}(undef, n_libraries)
-    for i = 1:n_libraries
-        # Randomly or deterministically determined indices for the library points.
-        inds = library_indices(measure, est, i, target, source)
-        # Predict on the library (i.e. on selected subset of points).
-        ρs[i] = subset_predict(measure, target, source, inds)
-    end
-    return ρs
-end
-
-function predict(measure::CCMLike, est::CrossmapEstimator{<:Integer}, target::AbstractVector, source::AbstractStateSpaceSet)
-    # Ensure equal-length input
-    input_check(measure, target, source)
-    inds = library_indices(measure, est, 1, target, source)
-    ρ = subset_predict(measure, target, source, inds)
-    return ρ
-end
-
-function subset_predict(measure::CCMLike, target, source, inds)
-    S̄ = @views source[inds]
-    t̄ = @views target[inds]
-    t̂ₛ = predict(measure, t̄, S̄)
-    ρ = measure.f(t̄, t̂ₛ)
-    return t̂ₛ, ρ
-end
-
-"""
-    library_indices(measure::CCMLike, est::CrossmapEstimator, i::Int,  target, source)
-
-Produce (randomly, if relevant) the `i`-th subset of indices for a `CrossmapEstimator`
-that is being applied to `target` and `source`.
-"""
-function library_indices end
-
-function library_indices(measure::CCMLike, est::RandomVectors, i::Int, target, args...)
-    N = length(target)
-    L = est.libsizes[i]
-    return library_indices(measure, est, N, L)
-end
-function library_indices(measure::CCMLike, est::RandomVectors, N::Int, L::Int)
-    return sample(est.rng, 1:N, L; replace = est.replace)
-end
-
-function library_indices(measure::CCMLike, est::RandomSegment, i::Int, target, args...)
-    N = length(target)
-    L = est.libsizes[i]
-    Lmax = max_segmentlength(measure, target)
-    L <= Lmax ||
-        throw(ArgumentError("L ($L) > Lmax ($Lmax). Use a smaller segment length (some points are lost when embedding)."))
-    library_indices(measure, est, N, L)
-end
-function library_indices(measure::CCMLike, est::RandomSegment, N::Int, L::Int)
-    startidx = sample(est.rng, 1:(N - L)) # random segment starting point
-    return startidx:startidx+L-1
-end
-
-function library_indices(measure::CCMLike, est::ExpandingSegment, i::Int, target, args...)
-    Lmax = max_segmentlength(measure, target)
-    L = est.libsizes[i]
-    L <= Lmax || throw(ArgumentError("L ($L) > Lmax ($Lmax). Use a smaller segment length (some points are lost when embedding)."))
-    return library_indices(measure, est, length(target), L)
-end
-function library_indices(measure::CCMLike, est::ExpandingSegment, N::Int, L::Int)
-    return 1:L
-end
-
-function input_check(measure::CCMLike, args...)
-    ns = length.(args)
-    all(ns .== maximum(ns)) || throw(ArgumentError("""\
-        All inputs must have same lengths. \
-        Use `embed` to ensure target time series and embedding are aligned.\
-        """))
-end
-
-# # -----------------------------------------------------------------------------------------
-# # Ensemble analysis. Repeats an analysis ensemble.nreps times. Takes care of the embedding.
-# # -----------------------------------------------------------------------------------------
-function crossmap(ensemble::Ensemble{<:CCMLike, <:CrossmapEstimator{<:Integer, R}},
-        target::AbstractVector, source::AbstractVector) where R
-    (; measure, est, nreps) = ensemble
-    emb, idx_t̄, idxs_S̄ = embed(measure, target, source)
-    S̄ = emb[:, idxs_S̄]
-    t̄ = emb[:, idx_t̄]
-
-    ρs = zeros(nreps)
-    for i = 1:nreps
-        inds = library_indices(measure, est, 1, t̄, S̄)
-        ρ = last(subset_predict(measure, target, S̄, inds))
-        ρs[i] = ρ
-    end
-    return ρs
-end
-
-function crossmap(ensemble::Ensemble{<:CCMLike, <:CrossmapEstimator},
-        target::AbstractVector, source::AbstractVector)
-    (; measure, est, nreps) = ensemble
-    libsizes = est.libsizes
-    emb, idx_t̄, idxs_S̄ = embed(measure, target, source)
-    S̄ = emb[:, idxs_S̄]
-    t̄ = emb[:, idx_t̄]
-    N = length(t̄)
-
-    ρs = [zeros(nreps) for j in eachindex(libsizes)]
-    for (j, L) in enumerate(libsizes)
-        for i = 1:nreps
-            inds = library_indices(measure, est, N, L)
-            ρs[j][i] = last(subset_predict(measure, target, S̄, inds))
-        end
-    end
-    return ρs
-end
diff --git a/src/methods/crossmappings/measures/ccm-like/measures.jl b/src/methods/crossmappings/measures/ccm-like/measures.jl
deleted file mode 100644
index 835e60f1f..000000000
--- a/src/methods/crossmappings/measures/ccm-like/measures.jl
+++ /dev/null
@@ -1,3 +0,0 @@
-include("ConvergentCrossMapping.jl")
-include("PairwiseAsymmetricInference.jl")
-include("common.jl") # need to come after definitions
diff --git a/src/methods/infomeasures/condmutualinfo/CMIRenyiJizba.jl b/src/methods/infomeasures/condmutualinfo/CMIRenyiJizba.jl
deleted file mode 100644
index 085aeefc3..000000000
--- a/src/methods/infomeasures/condmutualinfo/CMIRenyiJizba.jl
+++ /dev/null
@@ -1,81 +0,0 @@
-using Accessors
-
-export CMIRenyiJizba
-"""
-    CMIRenyiJizba <: ConditionalMutualInformation
-
-The Rényi conditional mutual information ``I_q^{R_{J}}(X; Y | Z`` defined in
-[Jizba2012](@citet).
-
-## Usage
-
-- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise dependence.
-- Use with [`condmutualinfo`](@ref) to compute the raw conditional mutual information.
-
-## Definition
-
-```math
-I_q^{R_{J}}(X; Y | Z) = I_q^{R_{J}}(X; Y, Z) - I_q^{R_{J}}(X; Z),
-```
-
-where ``I_q^{R_{J}}(X; Z)`` is the [`MIRenyiJizba`](@ref) mutual information.
-"""
-struct CMIRenyiJizba{E <: Renyi} <: ConditionalMutualInformation{E}
-    e::E
-    function CMIRenyiJizba(; base = 2, q = 1.5)
-        e = Renyi(; base, q)
-        new{typeof(e)}(e)
-    end
-    function CMIRenyiJizba(e::E) where E <: Renyi
-        new{E}(e)
-    end
-end
-
-min_inputs_vars(::CMIRenyiJizba) = 3
-max_inputs_vars(::CMIRenyiJizba) = 3
-
-function estimate(measure::CMIRenyiJizba, est::Contingency, x, y, z)
-    c = _contingency_matrix(measure, est, x, y, z)
-    pxz = probabilities(c, dims = [1, 3])
-    pyz = probabilities(c, dims = [2, 3])
-    pz = probabilities(c, dims = 3)
-    pxyz = probabilities(c)
-    e = measure.e
-    return entropy(e, pxz) + entropy(e, pyz) - entropy(e, pz) - entropy(e, pxyz)
-end
-
-function _contingency_matrix(measure::CMIRenyiJizba,
-        est::Contingency{<:ProbabilitiesEstimator}, x, y, z)
-    return contingency_matrix(est.est, x, y, z)
-end
-function _contingency_matrix(measure::CMIRenyiJizba, est::Contingency{<:Nothing}, x, y, z)
-    return contingency_matrix(x, y, z)
-end
-
-function estimate(measure::CMIRenyiJizba, est::ProbabilitiesEstimator, x, y, z)
-    HXZ, HYZ, HXYZ, HZ = marginal_entropies_cmi4h(measure, est, x, y, z)
-    return HXZ + HYZ - HXYZ - HZ
-end
-
-function estimate(measure::CMIRenyiJizba, est::DifferentialEntropyEstimator, x, y, z)
-    # Due to inconsistent API in ComplexityMeasures.jl, we have to treat
-    # DifferentialEntropyEstimator here. Because all measures in this package
-    # have their own `base` field, it will conflict with `est.base` for
-    # `DifferentialEntropyEstimator`s. In these cases, we use `measure.base`,
-    # and override the estimator base, by simply creating a copy of the
-    # estimator with one field modified.
-    if est isa DifferentialEntropyEstimator && :base in fieldnames(typeof(est))
-        if est.base != measure.e.base
-            mb = measure.e.base
-            eb = est.base
-            modified_est = Accessors.@set est.base = measure.e.base
-            HXZ, HYZ, HXYZ, HZ = marginal_entropies_cmi4h(measure, modified_est, x, y, z)
-        else
-            HXZ, HYZ, HXYZ, HZ = marginal_entropies_cmi4h(measure, est, x, y, z)
-        end
-    else
-        HXZ, HYZ, HXYZ, HZ = marginal_entropies_cmi4h(measure, est, x, y, z)
-    end
-    cmi = HXZ + HYZ - HXYZ - HZ
-    return cmi
-end
diff --git a/src/methods/infomeasures/condmutualinfo/CMIRenyiPoczos.jl b/src/methods/infomeasures/condmutualinfo/CMIRenyiPoczos.jl
deleted file mode 100644
index 3622e5e21..000000000
--- a/src/methods/infomeasures/condmutualinfo/CMIRenyiPoczos.jl
+++ /dev/null
@@ -1,42 +0,0 @@
-export CMIRenyiPoczos
-
-"""
-    CMIRenyiPoczos <: ConditionalMutualInformation
-
-The differential Rényi conditional mutual information ``I_q^{R_{P}}(X; Y | Z)``
-defined in (Póczos & Schneider, 2012)[^Póczos2012].
-
-## Usage
-
-- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise dependence.
-- Use with [`condmutualinfo`](@ref) to compute the raw conditional mutual information.
-
-## Definition
-
-```math
-\\begin{align*}
-I_q^{R_{P}}(X; Y | Z) = \\dfrac{1}{q-1}
-\\int \\int \\int \\dfrac{p_Z(z) p_{X, Y | Z}^q}{( p_{X|Z}(x|z) p_{Y|Z}(y|z) )^{q-1}} \\\\
-\\mathbb{E}_{X, Y, Z} \\sim p_{X, Y, Z}
-\\left[ \\dfrac{p_{X, Z}^{1-q}(X, Z) p_{Y, Z}^{1-q}(Y, Z) }{p_{X, Y, Z}^{1-q}(X, Y, Z) p_Z^{1-q}(Z)} \\right]
-\\end{align*}
-```
-
-[^Póczos2012]:
-    Póczos, B., & Schneider, J. (2012, March). Nonparametric estimation of conditional
-    information and divergences. In Artificial Intelligence and Statistics (pp. 914-923).
-    PMLR.
-"""
-struct CMIRenyiPoczos{E <: Renyi} <: ConditionalMutualInformation{E}
-    e::E
-    function CMIRenyiPoczos(; base = 2, q = 1.5)
-        e = Renyi(; base, q)
-        new{typeof(e)}(e)
-    end
-    function CMIRenyiPoczos(e::E) where E <: Renyi
-        new{E}(e)
-    end
-end
-
-min_inputs_vars(::CMIRenyiPoczos) = 3
-max_inputs_vars(::CMIRenyiPoczos) = 3
diff --git a/src/methods/infomeasures/condmutualinfo/CMIShannon.jl b/src/methods/infomeasures/condmutualinfo/CMIShannon.jl
deleted file mode 100644
index 334fafd4f..000000000
--- a/src/methods/infomeasures/condmutualinfo/CMIShannon.jl
+++ /dev/null
@@ -1,118 +0,0 @@
-using Accessors
-
-export CMIShannon
-import ComplexityMeasures: log_with_base
-
-"""
-    CMIShannon <: ConditionalMutualInformation
-    CMIShannon(; base = 2)
-
-The Shannon conditional mutual information (CMI) ``I^S(X; Y | Z)``.
-
-## Usage
-
-- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise dependence.
-- Use with [`condmutualinfo`](@ref) to compute the raw conditional mutual information.
-
-## Supported definitions
-
-Consider random variables ``X \\in \\mathbb{R}^{d_X}`` and
-``Y \\in \\mathbb{R}^{d_Y}``, given ``Z \\in \\mathbb{R}^{d_Z}``. The Shannon
-conditional mutual information is defined as
-
-```math
-\\begin{align*}
-I(X; Y | Z)
-&= H^S(X, Z) + H^S(Y, z) - H^S(X, Y, Z) - H^S(Z) \\\\
-&= I^S(X; Y, Z) + I^S(X; Y)
-\\end{align*},
-```
-
-where ``I^S(\\cdot; \\cdot)`` is the Shannon mutual information [`MIShannon`](@ref),
-and ``H^S(\\cdot)`` is the [`Shannon`](@ref) entropy.
-
-Differential Shannon CMI is obtained by replacing the entropies by
-differential entropies.
-
-See also: [`condmutualinfo`](@ref).
-"""
-struct CMIShannon{E <: Shannon} <: ConditionalMutualInformation{E}
-    e::E
-    function CMIShannon(; base::T = 2) where {T <: Real}
-        e = Shannon(; base)
-        new{typeof(e)}(e)
-    end
-    function CMIShannon(e::E) where E <: Shannon
-        new{E}(e)
-    end
-end
-
-min_inputs_vars(::CMIShannon) = 3
-max_inputs_vars(::CMIShannon) = 3
-
-
-function estimate(measure::CMIShannon, est::ProbabilitiesEstimator, x, y, z)
-    HXZ, HYZ, HXYZ, HZ = marginal_entropies_cmi4h(measure, est, x, y, z)
-    return HXZ + HYZ - HXYZ - HZ
-end
-
-
-function estimate(measure::CMIShannon, est::DifferentialEntropyEstimator, x, y, z)
-    # Due to inconsistent API in ComplexityMeasures.jl, we have to treat
-    # DifferentialEntropyEstimator here. Because all measures in this package
-    # have their own `base` field, it will conflict with `est.base` for
-    # `DifferentialEntropyEstimator`s. In these cases, we use `measure.base`,
-    # and override the estimator base, by simply creating a copy of the
-    # estimator with one field modified.
-    if est isa DifferentialEntropyEstimator && :base in fieldnames(typeof(est))
-        if est.base != measure.e.base
-            mb = measure.e.base
-            eb = est.base
-            modified_est = Accessors.@set est.base = measure.e.base
-            HXZ, HYZ, HXYZ, HZ = marginal_entropies_cmi4h(measure, modified_est, x, y, z)
-        else
-            HXZ, HYZ, HXYZ, HZ = marginal_entropies_cmi4h(measure, est, x, y, z)
-        end
-    else
-        HXZ, HYZ, HXYZ, HZ = marginal_entropies_cmi4h(measure, est, x, y, z)
-    end
-    cmi = HXZ + HYZ - HXYZ - HZ
-    return cmi
-end
-
-function estimate(measure::CMIShannon, est::Contingency{<:ProbabilitiesEstimator}, x, y, z)
-    return estimate(measure, contingency_matrix(est.est, x, y, z))
-end
-
-function estimate(measure::CMIShannon, est::Contingency{<:Nothing}, x, y, z)
-    return estimate(measure, contingency_matrix(x, y, z))
-end
-
-function estimate(
-        measure::CMIShannon,
-        pxyz::ContingencyMatrix{T, 3}) where T
-    e = measure.e
-    dx, dy, dz = size(pxyz)
-    pxz = probabilities(pxyz, dims = [1, 3])
-    pyz = probabilities(pxyz, dims = [2, 3])
-    pz = probabilities(pxyz, dims = 3)
-    cmi = 0.0
-    log0 = log_with_base(e.base)
-    for k in 1:dz
-        pzₖ = pz[k]
-        for j in 1:dy
-            pyⱼzₖ = pyz[j, k]
-            pyⱼzₖ > 0 || continue # leads to NaN
-            for i in 1:dx
-                pxᵢzₖ = pxz[i, k]
-                pxᵢzₖ > 0 || continue # leads to NaN
-                pxᵢyⱼzₖ = pxyz[i, j, k]
-                inner = (pzₖ * pxᵢyⱼzₖ) / (pxᵢzₖ * pyⱼzₖ)
-                if inner != 0.0
-                    cmi += pxᵢyⱼzₖ * log0(inner)
-                end
-            end
-        end
-    end
-    return cmi
-end
diff --git a/src/methods/infomeasures/condmutualinfo/CMITsallis.jl b/src/methods/infomeasures/condmutualinfo/CMITsallis.jl
deleted file mode 100644
index 85a2d5b84..000000000
--- a/src/methods/infomeasures/condmutualinfo/CMITsallis.jl
+++ /dev/null
@@ -1,48 +0,0 @@
-"""
-    CMITsallis <: ConditionalMutualInformation
-    CMITsallis(; q = 1.5, base = 2)
-"""
-struct CMITsallis{E <: Tsallis} <: ConditionalMutualInformation{E}
-    e::E
-    function CMITsallis(; base::T = 2, q = 1.5) where {T <: Real}
-        e = Tsallis(; base, q)
-        new{typeof(e)}(e)
-    end
-end
-
-min_inputs_vars(::CMITsallis) = 3
-max_inputs_vars(::CMITsallis) = 3
-
-function estimate(measure::CMITsallis, est::Contingency{<:ProbabilitiesEstimator}, x, y, z)
-    return estimate(measure, contingency_matrix(est.est, x, y, z))
-end
-
-function estimate(measure::CMITsallis, est::Contingency{<:Nothing}, x, y, z)
-    return estimate(measure, contingency_matrix(x, y, z))
-end
-
-function estimate(
-        measure::CMITsallis,
-        pxyz::ContingencyMatrix{T, 3}) where T
-    e = measure.e
-    dx, dy, dz = size(pxyz)
-    pxz = probabilities(pxyz, dims = [1, 3])
-    pyz = probabilities(pxyz, dims = [2, 3])
-    pz = probabilities(pxyz, dims = 3)
-    cmi = 0.0
-    log0 = log_with_base(e.base)
-    for k in 1:dz
-        pzₖ = pz[k]
-        for j in 1:dy
-            pyⱼzₖ = pyz[j, k]
-            for i in 1:dx
-                pxᵢzₖ = pxz[i, k]
-                pxᵢyⱼzₖ = pxyz[i, j, k]
-                if pxᵢyⱼzₖ != 0.0
-                    cmi += pxᵢyⱼzₖ * log0((pzₖ * pxᵢyⱼzₖ) / (pxᵢzₖ * pyⱼzₖ))
-                end
-            end
-        end
-    end
-    return cmi
-end
diff --git a/src/methods/infomeasures/condmutualinfo/condmutualinfo.jl b/src/methods/infomeasures/condmutualinfo/condmutualinfo.jl
deleted file mode 100644
index 4453e5797..000000000
--- a/src/methods/infomeasures/condmutualinfo/condmutualinfo.jl
+++ /dev/null
@@ -1,235 +0,0 @@
-
-export ConditionalMutualInformationEstimator
-export ConditionalMutualInformation
-export condmutualinfo
-
-"""
-    ConditionalMutualInformation <: AssociationMeasure
-    CMI # alias
-
-The supertype of all conditional mutual information measures. Concrete subtypes are
-
-- [`CMIShannon`](@ref)
-- [`CMIRenyiJizba`](@ref)
-- [`CMIRenyiPoczos`](@ref)
-"""
-abstract type ConditionalMutualInformation{E} <: InformationMeasure end
-const CMI{E} = ConditionalMutualInformation{E}
-
-min_inputs_vars(::CMI) = 3
-max_inputs_vars(::CMI) = 3
-
-"""
-    ConditionalMutualInformationEstimator <: InformationEstimator
-    CMIEstimator # alias
-
-The supertype of all conditional mutual information estimators.
-
-## Subtypes
-
-- [`FPVP`](@ref).
-- [`PoczosSchneiderCMI`](@ref).
-- [`Rahimzamani`](@ref).
-- [`MesnerShalizi`](@ref).
-"""
-abstract type ConditionalMutualInformationEstimator end
-const CMIEstimator = ConditionalMutualInformationEstimator
-
-condmutualinfo(args...; kwargs...) = estimate(args...; kwargs...)
-
-const CMI_ESTIMATORS = Union{
-    ProbabilitiesEstimator,
-    DifferentialEntropyEstimator,
-    MutualInformationEstimator,
-    ConditionalMutualInformationEstimator
-}
-function estimate(measure::CMI, est::CMI_ESTIMATORS, x)
-    txt = "`condmutualinfo` takes three input vectors/StateSpaceSets. Only one was given."
-    throw(ArgumentError(txt))
-end
-function estimate(measure::CMI, est::CMI_ESTIMATORS, x, y)
-    txt = "`condmutualinfo` takes three input vectors/StateSpaceSets. Only two were given."
-    throw(ArgumentError(txt))
-end
-
-"""
-    condmutualinfo([measure::CMI], est::CMIEstimator, x, y, z) → cmi::Real
-
-Estimate a conditional mutual information (CMI) of some kind (specified by `measure`),
-between `x` and `y`, given `z`, using the given dedicated
-[`ConditionalMutualInformationEstimator`](@ref), which may be discrete, continuous or
-mixed.
-
-## Estimators
-
-| Estimator                    | Principle         | [`CMIShannon`](@ref) | [`CMIRenyiPoczos`](@ref) |
-| ---------------------------- | ----------------- | :------------------: | :----------------------: |
-| [`FPVP`](@ref)               | Nearest neighbors |          ✓          |            x             |
-| [`MesnerShalizi`](@ref)      | Nearest neighbors |          ✓          |            x             |
-| [`Rahimzamani`](@ref)        | Nearest neighbors |          ✓          |            x             |
-| [`PoczosSchneiderCMI`](@ref) | Nearest neighbors |          x           |            ✓            |
-"""
-function condmutualinfo(measure::CMI, est::ConditionalMutualInformationEstimator, x, y, z)
-    return estimate(measure, est, x, y, z)
-end
-
-function estimate(est::ConditionalMutualInformationEstimator, x, y, z)
-    return estimate(CMIShannon(), est, x, y, z)
-end
-
-
-include("CMIShannon.jl")
-include("CMIRenyiSarbu.jl")
-include("CMIRenyiJizba.jl")
-include("CMIRenyiPoczos.jl")
-include("estimators/estimators.jl")
-
-# Default to Shannon mutual information.
-"""
-    condmutualinfo([measure::CMI], est::ProbabilitiesEstimator, x, y, z) → cmi::Real ∈ [0, a)
-
-Estimate the conditional mutual information (CMI) `measure` between `x` and `y` given `z`
-using a sum of entropy terms, without any bias correction, using the provided
-[`ProbabilitiesEstimator`](@ref) `est`.
-If `measure` is not given, then the default is `CMIShannon()`.
-
-With a [`ProbabilitiesEstimator`](@ref), the returned `cmi` is guaranteed to be
-non-negative.
-
-## Estimators
-
-| Estimator                    | Principle           | [`CMIShannon`](@ref) | [`CMIRenyiSarbu`](@ref) |
-| ---------------------------- | ------------------- | :------------------: | :---------------------: |
-| [`CountOccurrences`](@ref)   | Frequencies         |          ✓          |           ✓            |
-| [`ValueHistogram`](@ref)     | Binning (histogram) |          ✓          |           ✓            |
-| [`SymbolicPermutation`](@ref) | Ordinal patterns    |          ✓          |           ✓            |
-| [`Dispersion`](@ref)         | Dispersion patterns |          ✓          |           ✓            |
-"""
-function condmutualinfo(measure::CMI, est::ProbabilitiesEstimator, x, y, z)
-    return estimate(measure, est, x, y, z)
-end
-
-function estimate(est::ProbabilitiesEstimator, x, y, z)
-    return estimate(CMIShannon(), est, x, y, z)
-end
-
-"""
-    condmutualinfo([measure::CMI], est::DifferentialEntropyEstimator, x, y, z) → cmi::Real
-
-Estimate the mutual information between `x` and `y` conditioned on `z`, using
-the differential version of the given conditional mutual information (CMI) `measure`.
-The [`DifferentialEntropyEstimator`](@ref) `est` must must support multivariate data.
-No bias correction is performed. If `measure` is not given, then the default is
-`CMIShannon()`.
-
-!!! note
-    [`DifferentialEntropyEstimator`](@ref)s have their own `base` field which is not
-    used here. Instead, this method creates a copy of `est` internally,
-    where `est.base` is replaced by `measure.e.base`. Therefore, use `measure` to
-    control the "unit" of the mutual information.
-
-## Estimators
-
-| Estimator                        | Principle         | [`CMIShannon`](@ref) |
-| -------------------------------- | ----------------- | :------------------: |
-| [`Kraskov`](@ref)                | Nearest neighbors |          ✓          |
-| [`Zhu`](@ref)                    | Nearest neighbors |          ✓          |
-| [`Gao`](@ref)                    | Nearest neighbors |          ✓          |
-| [`Goria`](@ref)                  | Nearest neighbors |          ✓          |
-| [`Lord`](@ref)                   | Nearest neighbors |          ✓          |
-| [`LeonenkoProzantoSavani`](@ref) | Nearest neighbors |          ✓          |
-"""
-function condmutualinfo(measure::CMI, est::DifferentialEntropyEstimator, x, y, z)
-    return estimate(measure, est, x, y, z)
-end
-
-function estimate(est::DifferentialEntropyEstimator, x, y, z)
-    return estimate(CMIShannon(), est, x, y, z)
-end
-
-"""
-    condmutualinfo([measure::CMI], est::MutualInformationEstimator, x, y, z) → cmi::Real
-
-Estimate the mutual information between `x` and `y` conditioned on `z`, using the
-given conditional mutual information (CMI) `measure`, computed as a
-a difference of mutual information terms (just the chain rule of mutual information)
-
-```math
-\\hat{I}(X; Y | Z) = \\hat{I}(X; Y, Z) - \\hat{I}(X; Z).
-```
-
-The [`MutualInformationEstimator`](@ref) `est` may be continuous/differential,
-discrete or mixed. No bias correction in performed, except the bias correction
-that occurs for each individual mutual information term.
-If `measure` is not given, then the default is `CMIShannon()`.
-
-## Estimators
-
-| Estimator                              |    Type    |     Principle     | [`CMIShannon`](@ref) |
-| -------------------------------------- | :--------: | :---------------: | :------------------: |
-| [`KraskovStögbauerGrassberger1`](@ref) | Continuous | Nearest neighbors |          ✓          |
-| [`KraskovStögbauerGrassberger2`](@ref) | Continuous | Nearest neighbors |          ✓          |
-| [`GaoKannanOhViswanath`](@ref)         |   Mixed    | Nearest neighbors |          ✓          |
-| [`GaoOhViswanath`](@ref)               | Continuous | Nearest neighbors |          ✓          |
-"""
-function condmutualinfo(measure::CMI, est::MutualInformationEstimator, x, y, z)
-    return estimate(measure, est, x, y, z)
-end
-
-mi_measure(m::CMIShannon) = MIShannon(m.e)
-# Internal methods for `independence`
-function estimate(measure::CMI, est::MutualInformationEstimator, x, y, z)
-    X = StateSpaceSet(x)
-    Y = StateSpaceSet(y)
-    Z = StateSpaceSet(z)
-    YZ = StateSpaceSet(Y, Z)
-    m = mi_measure(measure)
-    return mutualinfo(m, est, X, YZ) - mutualinfo(m, est, X, Z)
-end
-
-function estimate(est::MutualInformationEstimator, x, y, z)
-    return estimate(CMIShannon(), est, x, y, z)
-end
-
-
-# Generic H4-formulation of CMI
-function marginal_entropies_cmi4h(measure::ConditionalMutualInformation, est, x, y, z)
-    e = measure.e
-    Z = StateSpaceSet(z)
-    Y = StateSpaceSet(y)
-    X = StateSpaceSet(x)
-    XZ = StateSpaceSet(X, Z)
-    YZ = StateSpaceSet(Y, Z)
-    XYZ = StateSpaceSet(X, Y, Z)
-
-    HXZ = entropy(e, est, XZ)
-    HYZ = entropy(e, est,YZ)
-    HXYZ = entropy(e, est, XYZ)
-    HZ = entropy(e, est, Z)
-    return HXZ, HYZ, HXYZ, HZ
-end
-
-# Override some definitions, because the estimator behaviour need to be adjusted
-# for multiple input variables.
-const WellDefinedCMIShannonProbEsts{m, D} = Union{
-    SymbolicPermutation{m},
-    ValueHistogram{<:FixedRectangularBinning{D}},
-    ValueHistogram{<:RectangularBinning{T}},
-    Dispersion
-} where {m, D, T}
-
-function marginal_entropies_cmi4h(measure::Union{CMIShannon, CMIRenyiSarbu},
-        est::WellDefinedCMIShannonProbEsts{m, D},
-        x, y, z) where {m, D}
-    e = measure.e
-    eX, eY, eZ = marginal_encodings(est, x, y, z)
-    eXZ = StateSpaceSet(eX, eZ)
-    eYZ = StateSpaceSet(eY, eZ)
-    eXYZ = StateSpaceSet(eX, eY, eZ)
-
-    HXZ = entropy(e, CountOccurrences(), eXZ)
-    HYZ = entropy(e, CountOccurrences(), eYZ)
-    HXYZ = entropy(e, CountOccurrences(), eXYZ)
-    HZ = entropy(e, CountOccurrences(), eZ)
-    return HXZ, HYZ, HXYZ, HZ
-end
diff --git a/src/methods/infomeasures/condmutualinfo/estimators/FPVP.jl b/src/methods/infomeasures/condmutualinfo/estimators/FPVP.jl
deleted file mode 100644
index f688436d7..000000000
--- a/src/methods/infomeasures/condmutualinfo/estimators/FPVP.jl
+++ /dev/null
@@ -1,60 +0,0 @@
-
-using Neighborhood: bulkisearch, inrangecount
-using Neighborhood: Theiler, NeighborNumber, KDTree, Chebyshev
-using SpecialFunctions: digamma
-
-export FPVP
-
-"""
-    FPVP <: ConditionalMutualInformationEstimator
-    FPVP(k = 1, w = 0)
-
-The Frenzel-Pompe-Vejmelka-Paluš (or `FPVP` for short) estimator is used to estimate the
-differential conditional mutual information using a `k`-th nearest neighbor approach that is
-analogous to that of the [`KraskovStögbauerGrassberger1`](@ref) mutual information estimator
-([Frenzel2007](@citet); [Vejmelka2008](@citet)).
-
-`w` is the Theiler window, which controls the number of temporal neighbors that are excluded
-during neighbor searches.
-"""
-Base.@kwdef struct FPVP{MJ, MM} <: ConditionalMutualInformationEstimator
-    k::Int = 1
-    w::Int = 0
-    metric_joint::MJ = Chebyshev()
-    metric_marginals::MM = Chebyshev()
-end
-
-function estimate(measure::CMIShannon, est::FPVP, x, y, z)
-    e = measure.e
-    (; k, w, metric_joint, metric_marginals) = est
-    # Ensures that vector-valued inputs are converted to StateSpaceSets, so that
-    # building the marginal/joint spaces and neighbor searches are fast.
-    X = StateSpaceSet(x)
-    Y = StateSpaceSet(y)
-    Z = StateSpaceSet(z)
-    @assert length(X) == length(Y) == length(Z)
-    N = length(X)
-    joint = StateSpaceSet(X, Y, Z)
-    XZ = StateSpaceSet(X, Z)
-    YZ = StateSpaceSet(Y, Z)
-
-    tree_joint = KDTree(joint, metric_joint)
-    ds_joint = last.(bulksearch(tree_joint, joint, NeighborNumber(k), Theiler(w))[2])
-    tree_xz = KDTree(XZ, metric_marginals)
-    tree_yz = KDTree(YZ, metric_marginals)
-    tree_z = KDTree(Z, metric_marginals)
-
-    condmi = 0.0
-    for (i, dᵢ) in enumerate(ds_joint)
-        # Usually, we subtract 1 because inrangecount includes the point itself,
-        # but we'll have to add it again inside the digamma, so just skip it.
-        condmi += digamma(k)
-        condmi -= digamma(inrangecount(tree_xz, XZ[i], dᵢ))
-        condmi -= digamma(inrangecount(tree_yz, YZ[i], dᵢ))
-        condmi += digamma(inrangecount(tree_z, Z[i], dᵢ))
-    end
-    # The "unit" is nats.
-    condmi /= N
-
-    return _convert_logunit(condmi, ℯ, e.base)
-end
diff --git a/src/methods/infomeasures/condmutualinfo/estimators/GaussianCMI.jl b/src/methods/infomeasures/condmutualinfo/estimators/GaussianCMI.jl
deleted file mode 100644
index 4eb648380..000000000
--- a/src/methods/infomeasures/condmutualinfo/estimators/GaussianCMI.jl
+++ /dev/null
@@ -1,33 +0,0 @@
-export GaussianCMI
-using StateSpaceSets: StateSpaceSet
-
-"""
-    GaussianCMI <: MutualInformationEstimator
-    GaussianCMI(; normalize::Bool = false)
-
-`GaussianCMI` is a parametric estimator for Shannon conditional mutual information (CMI)
-[Vejmelka2008](@cite).
-
-## Description
-
-`GaussianCMI` estimates Shannon CMI through a sum of two mutual information terms that
-each are estimated using [`GaussianMI`](@ref) (the `normalize` keyword is the same as
-for [`GaussianMI`](@ref)):
-
-```math
-\\hat{I}_{Gaussian}(X; Y | Z) = \\hat{I}_{Gaussian}(X; Y, Z) - \\hat{I}_{Gaussian}(X; Z)
-```
-"""
-Base.@kwdef struct GaussianCMI <: MutualInformationEstimator
-    normalize::Bool = false
-end
-
-function estimate(measure::CMIShannon, est::GaussianCMI, x, y, z)
-    YZ = StateSpaceSet(y, z)
-
-    mi_est = GaussianMI()
-    MI_x_yz = estimate(MIShannon(measure.e), mi_est, x, YZ)
-    MI_x_z = estimate(MIShannon(measure.e), mi_est, x, z)
-
-    return MI_x_yz - MI_x_z
-end
diff --git a/src/methods/infomeasures/condmutualinfo/estimators/Rahimzamani.jl b/src/methods/infomeasures/condmutualinfo/estimators/Rahimzamani.jl
deleted file mode 100644
index 84381173a..000000000
--- a/src/methods/infomeasures/condmutualinfo/estimators/Rahimzamani.jl
+++ /dev/null
@@ -1,55 +0,0 @@
-export Rahimzamani
-
-"""
-    Rahimzamani <: ConditionalMutualInformationEstimator
-    Rahimzamani(k = 1, w = 0)
-
-The `Rahimzamani` estimator, short for Rahimzamani-Asnani-Viswanath-Kannan,
-is an estimator for Shannon conditional mutual information for data that can be mixtures of
-discrete and continuous data [Rahimzamani2018](@cite).
-
-This is very similar to the [`GaoKannanOhViswanath`](@ref) mutual information estimator,
-but has been expanded to the conditional case.
-"""
-Base.@kwdef struct Rahimzamani{M} <: ConditionalMutualInformationEstimator
-    k::Int = 1
-    w::Int = 0
-    metric::M = Chebyshev()
-end
-
-function estimate(measure::CMIShannon, est::Rahimzamani, x, y, z)
-    e = measure.e
-    (; k, w, metric) = est
-    X = StateSpaceSet(x)
-    Y = StateSpaceSet(y)
-    Z = StateSpaceSet(z)
-    joint = StateSpaceSet(X, Y, Z)
-    XZ = StateSpaceSet(x, z)
-    YZ = StateSpaceSet(y, z)
-    Z = StateSpaceSet(z)
-    N = length(joint)
-    M = 3
-    tree_joint = KDTree(joint, metric)
-    ds_joint = last.(bulksearch(tree_joint, joint, NeighborNumber(k), Theiler(w))[2])
-    tree_xz = KDTree(XZ, metric)
-    tree_yz = KDTree(YZ, metric)
-    tree_z = KDTree(Z, metric)
-
-    condmi = 0.0
-    for i = 1:N
-        # The notation for ρ_{i, xy} in the paper in unclear. They claim in the paper that
-        # the estimator reduces to the KSG1 estimator when k̂ == k. Therefore,
-        # I assume ρ_{i, xy} is the distance in the *joint* space.
-        # ... but isn't this just the FPVP estimator?
-        dmax = ds_joint[i]
-        k̂ = dmax == 0 ? inrangecount(tree_joint, joint[i], 0.0) - 1  : k
-        condmi += digamma(k̂)
-        condmi -= log(inrangecount(tree_xz, XZ[i], dmax))
-        condmi -= log(inrangecount(tree_yz, YZ[i], dmax))
-        condmi += log(inrangecount(tree_z, Z[i], dmax))
-    end
-    # The "unit" is nats
-    condmi /= N
-
-    return _convert_logunit(condmi, ℯ, e.base)
-end
diff --git a/src/methods/infomeasures/condmutualinfo/estimators/estimators.jl b/src/methods/infomeasures/condmutualinfo/estimators/estimators.jl
deleted file mode 100644
index 893bf6927..000000000
--- a/src/methods/infomeasures/condmutualinfo/estimators/estimators.jl
+++ /dev/null
@@ -1,12 +0,0 @@
-include("FPVP.jl")
-include("Rahimzamani.jl")
-include("PoczosSchneiderCMI.jl")
-include("MesnerShalizi.jl")
-include("GaussianCMI.jl")
-
-# Definition is actually never used, but we need to define it, so that calling `estimate`
-# within independence tests work.
-estimate(measure::CMIShannon, est::ConditionalMutualInformationEstimator,
-    x, y, z) = estimate(measure, est, x, y, z)
-
-#include("TsallisCMIFuruichi.jl")
diff --git a/src/methods/infomeasures/entropy_conditional/CEShannon.jl b/src/methods/infomeasures/entropy_conditional/CEShannon.jl
deleted file mode 100644
index 237bbae4e..000000000
--- a/src/methods/infomeasures/entropy_conditional/CEShannon.jl
+++ /dev/null
@@ -1,79 +0,0 @@
-export CEShannon
-
-"""
-    CEShannon <: ConditionalEntropy
-    CEShannon(; base = 2,)
-
-The[`Shannon`](@ref) conditional entropy measure.
-
-## Discrete definition
-
-### Sum formulation
-
-The conditional entropy between discrete random variables
-``X`` and ``Y`` with finite ranges ``\\mathcal{X}`` and ``\\mathcal{Y}`` is defined as
-
-```math
-H^{S}(X | Y) = -\\sum_{x \\in \\mathcal{X}, y \\in \\mathcal{Y}} = p(x, y) \\log(p(x | y)).
-```
-
-This is the definition used when calling [`entropy_conditional`](@ref) with a
-[`ContingencyMatrix`](@ref).
-
-### Two-entropies formulation
-
-Equivalently, the following difference of entropies hold
-
-```math
-H^S(X | Y) = H^S(X, Y) - H^S(Y),
-```
-
-where ``H^S(\\cdot`` and ``H^S(\\cdot | \\cdot)`` are the [`Shannon`](@ref) entropy and
-Shannon joint entropy, respectively. This is the definition used when calling
-[`entropy_conditional`](@ref) with a [`ProbabilitiesEstimator`](@ref).
-
-## Differential definition
-
-The differential conditional Shannon entropy is analogously defined as
-
-```math
-H^S(X | Y) = h^S(X, Y) - h^S(Y),
-```
-
-where ``h^S(\\cdot`` and ``h^S(\\cdot | \\cdot)`` are the [`Shannon`](@ref)
-differential entropy and Shannon joint differential entropy, respectively. This is the
-definition used when calling [`entropy_conditional`](@ref) with a
-[`DifferentialEntropyEstimator`](@ref).
-"""
-struct CEShannon{E} <: ConditionalEntropy
-    e::E
-    function CEShannon(; base = 2)
-        e = MLEntropy(Shannon(; base))
-        new{typeof(e)}(e)
-    end
-end
-
-function estimate(measure::CEShannon, c::ContingencyMatrix{T, 2}) where {T}
-    e = measure.e.definition
-    Nx, Ny = size(c)
-
-    py = probabilities(c, dims = 2)
-    pxy = probabilities(c)
-    ce = 0.0
-    log0 = log_with_base(e.base)
-    for j in 1:Ny
-        pyⱼ = py[j]
-        for i in 1:Nx
-            pxyᵢⱼ = pxy[i, j]
-            if pxyᵢⱼ != 0.0
-                ce += pxyᵢⱼ * log0(pxyᵢⱼ / pyⱼ)
-            end
-        end
-    end
-    return -ce
-end
-
-function estimate(measure::CEShannon, est::ProbOrDiffEst, x, y)
-    HY, HXY = marginal_entropies_ce2h(measure, est, x, y)
-    return HXY - HY
-end
diff --git a/src/methods/infomeasures/entropy_conditional/CETsallisAbe.jl b/src/methods/infomeasures/entropy_conditional/CETsallisAbe.jl
deleted file mode 100644
index 431c1b302..000000000
--- a/src/methods/infomeasures/entropy_conditional/CETsallisAbe.jl
+++ /dev/null
@@ -1,67 +0,0 @@
-export CETsallisAbe
-
-"""
-    CETsallisAbe <: ConditionalEntropy
-    CETsallisAbe(; base = 2, q = 1.5)
-
-[Abe2001](@citet)'s discrete Tsallis conditional entropy measure.
-
-## Definition
-
-Abe & Rajagopal's Tsallis conditional entropy between discrete random variables
-``X`` and ``Y`` with finite ranges ``\\mathcal{X}`` and ``\\mathcal{Y}`` is defined as
-
-```math
-H_q^{T_A}(X | Y) = \\dfrac{H_q^T(X, Y) - H_q^T(Y)}{1 + (1-q)H_q^T(Y)},
-```
-
-where ``H_q^T(\\cdot)`` and ``H_q^T(\\cdot, \\cdot)`` is the [`Tsallis`](@ref)
-entropy and the joint Tsallis entropy.
-"""
-struct CETsallisAbe{E} <: ConditionalEntropy
-    e::E
-    function CETsallisAbe(; q = 1.5, base = 2)
-        e = MLEntropy(Tsallis(; q, base))
-        new{typeof(e)}(e)
-    end
-end
-
-function estimate(measure::CETsallisAbe, c::ContingencyMatrix{T, 2}) where {T}
-    e = measure.e.definition
-    Nx, Ny = size(c)
-    base, q = e.base, e.q
-
-    py = probabilities(c, dims = 2)
-    pxy = probabilities(c)
-    # Definition 7 in Abe & Rajagopal (2001)
-    hjoint = 1 / (1 - q) * (sum(pxy .^ 2) - 1)
-
-    # The marginal Tsallis entropy for the second variable
-    hy = entropy(Tsallis(; q, base), py)
-
-    # Equation 13 in Abe & Rajagopal (2001)
-    ce = (hjoint - hy) / (1 + (1 - q)*hy)
-
-    if q == 1 # if shannon, normalize
-        return _convert_logunit(ce, ℯ, base)
-    else
-        return ce
-    end
-end
-
-function estimate(measure::CETsallisAbe, est::ProbabilitiesEstimator, x, y)
-    e = measure.e.definition
-    q, base = e.q, e.base
-
-    HY, HXY = marginal_entropies_ce2h(measure, est, x, y)
-    ce = (HXY - HY) / (1 + (1 - q)*HY)
-    if q == 1 # if shannon, normalize
-        return _convert_logunit(ce, ℯ, e.base)
-    else
-        return ce
-    end
-end
-
-function estimate(measure::CETsallisAbe, est::DifferentialEntropyEstimator, x, y)
-    throw(ArgumentError("CETsallisAbe not implemented for $(typeof(est))"))
-end
diff --git a/src/methods/infomeasures/entropy_conditional/CETsallisFuruichi.jl b/src/methods/infomeasures/entropy_conditional/CETsallisFuruichi.jl
deleted file mode 100644
index 6b2c8c452..000000000
--- a/src/methods/infomeasures/entropy_conditional/CETsallisFuruichi.jl
+++ /dev/null
@@ -1,61 +0,0 @@
-export CETsallisFuruichi
-
-"""
-    CETsallisFuruichi <: ConditionalEntropy
-    CETsallisFuruichi(; base = 2, q = 1.5)
-
-Furuichi (2006)'s discrete Tsallis conditional entropy measure.
-
-## Definition
-
-Furuichi's Tsallis conditional entropy between discrete random variables
-``X`` and ``Y`` with finite ranges ``\\mathcal{X}`` and ``\\mathcal{Y}`` is defined as
-
-```math
-H_q^T(X | Y) = -\\sum_{x \\in \\mathcal{X}, y \\in \\mathcal{Y}}
-p(x, y)^q \\log_q(p(x | y)),
-```
-
-when ``q \\neq 1``. For ``q = 1``, ``H_q^T(X | Y)`` reduces to the Shannon conditional
-entropy:
-
-```math
-H_{q=1}^T(X | Y) = -\\sum_{x \\in \\mathcal{X}, y \\in \\mathcal{Y}} =
-p(x, y) \\log(p(x | y))
-```
-"""
-struct CETsallisFuruichi{E} <: ConditionalEntropy
-    e::E
-    function CETsallisFuruichi(; q = 1.5, base = 2)
-        e = MLEntropy(Tsallis(; q, base))
-        new{typeof(e)}(e)
-    end
-end
-
-function estimate(measure::CETsallisFuruichi, c::ContingencyMatrix{T, 2}) where {T}
-    e = measure.e.definition
-    Nx, Ny = size(c)
-    q = e.q
-    if q == 1
-        return estimate(CEShannon(;base=measure.e.base), pxy)
-    end
-    py = probabilities(c, dims = 2)
-    pxy = probabilities(c)
-    ce = 0.0
-    qlog = logq0(q)
-    for j in 1:Ny
-        pyⱼ = py[j]
-        for i in 1:Nx
-            pxyᵢⱼ = pxy[i, j]
-            ce += pxyᵢⱼ^q * qlog(pxyᵢⱼ / pyⱼ)
-        end
-    end
-    ce *= -1.0
-
-    return ce
-end
-
-
-function estimate(measure::CETsallisFuruichi, est::ProbOrDiffEst, x, y)
-    throw(ArgumentError("CETsallisFurichi not implemented for $(typeof(est))"))
-end
diff --git a/src/methods/infomeasures/entropy_conditional/entropy_conditional.jl b/src/methods/infomeasures/entropy_conditional/entropy_conditional.jl
deleted file mode 100644
index aaf5e44be..000000000
--- a/src/methods/infomeasures/entropy_conditional/entropy_conditional.jl
+++ /dev/null
@@ -1,140 +0,0 @@
-export entropy_conditional
-export ConditionalEntropy
-export ConditionalEntropyDefinition
-
-"""
-The supertype for all conditional entropies.
-"""
-abstract type ConditionalEntropy <: InformationMeasure end
-
-"""
-The supertype for all conditional entropy definitions.
-"""
-abstract type ConditionalEntropyDefinition <: Definition end
-
-# Measures
-include("CEShannon.jl")
-include("CETsallisFuruichi.jl")
-include("CETsallisAbe.jl")
-
-entropy_conditional(measure::ConditionalEntropy, args...; kwargs...) =
-    estimate(measure, args...; kwargs...)
-
-
-"""
-    entropy_conditional(measure::ConditionalEntropy, c::ContingencyMatrix{T, 2}) where T
-
-Estimate the discrete version of the given [`ConditionalEntropy`](@ref) `measure` from
-its direct (sum) definition, using the probabilities from a pre-computed
-[`ContingencyMatrix`](@ref), constructed from two input variables `x` and `y`.
-This estimation method works for both numerical and categorical data.
-If `measure` is not given, then the default is `CEShannon()`.
-
-The convention is to compute the entropy of the variable in the *first* column of `c`
-conditioned on the variable in the *second* column of `c`. To do the opposite, call this
-function with a new contingency matrix where the order of the variables is reversed.
-
-## Compatible measures
-
-|                             | [`ContingencyMatrix`](@ref) |
-| --------------------------- | :-------------------------: |
-| [`CEShannon`](@ref)         |             ✓              |
-| [`CETsallisFuruichi`](@ref) |             ✓              |
-| [`CETsallisAbe`](@ref)      |             ✓              |
-"""
-function entropy_conditional(measure::ConditionalEntropy, c::ContingencyMatrix)
-    return estimate(measure, c)
-end
-
-"""
-    entropy_conditional([measure::ConditionalEntropy], est::ProbabilitiesEstimator, x, y)
-
-Estimate the entropy of `x` conditioned on `y`, using the discrete version of the given
-conditional entropy (CE) `measure`. The CE is computed the difference of
-the joint entropy and the marginal entropy of `y`, using
-the [`ProbabilitiesEstimator`](@ref) `est`, which must compatible with multivariate data
-(that is, have an implementation for [`marginal_encodings`](@ref)).
-No bias correction is applied. If `measure` is not given, then the default is `CEShannon()`.
-
-## Estimators
-
-Joint and marginal probabilities are computed by jointly discretizing `x` and `y` using
-the approach given by `est`, and obtaining the marginal distribution for `y` from the joint
-distribution.
-
-| Estimator                    | Principle           | [`CEShannon`](@ref) | [`CETsallisAbe`](@ref) | [`CETsallisFuruichi`](@ref) |
-| ---------------------------- | ------------------- | :-----------------: | :--------------------: | :-------------------------: |
-| [`CountOccurrences`](@ref)   | Frequencies         |         ✓          |           ✓           |              x              |
-| [`ValueHistogram`](@ref)     | Binning (histogram) |         ✓          |           ✓           |              x              |
-| [`SymbolicPermutation`](@ref) | Ordinal patterns    |         ✓          |           ✓           |              x              |
-| [`Dispersion`](@ref)         | Dispersion patterns |         ✓          |           ✓           |              x              |
-"""
-function entropy_conditional(measure::ConditionalEntropy, est::ProbabilitiesEstimator, x, y)
-    return estimate(measure, est, x, y)
-end
-
-"""
-    entropy_conditional([measure::ConditionalEntropy], est::DifferentialEntropyEstimator, x, y)
-
-Estimate the entropy of `x` conditioned on `y`, using the differential/continuous
-version of the given conditional entropy (CE) `measure`.  The CE is computed the difference of
-the joint entropy and the marginal entropy of `y`, using
-the [`DifferentialEntropyEstimator`](@ref) `est`, which must be compatible with multivariate data.
-No bias correction is applied.
-If `measure` is not given, then the default is `CEShannon()`.
-
-## Estimators
-
-| Estimator                        | Principle         | [`CEShannon`](@ref) | [`CETsallisAbe`](@ref) | [`CETsallisFuruichi`](@ref) |
-| -------------------------------- | ----------------- | :-----------------: | :--------------------: | :-------------------------: |
-| [`Kraskov`](@ref)                | Nearest neighbors |         ✓          |           x           |              x              |
-| [`Zhu`](@ref)                    | Nearest neighbors |         ✓          |           x           |              x              |
-| [`ZhuSingh`](@ref)               | Nearest neighbors |         ✓          |           x           |              x              |
-| [`Gao`](@ref)                    | Nearest neighbors |         ✓          |           x           |              x              |
-| [`Goria`](@ref)                  | Nearest neighbors |         ✓          |           x           |              x              |
-| [`Lord`](@ref)                   | Nearest neighbors |         ✓          |           x           |              x              |
-| [`LeonenkoProzantoSavani`](@ref) | Nearest neighbors |         ✓          |           x           |              x              |
-"""
-function entropy_conditional(measure::ConditionalEntropy, est::DifferentialEntropyEstimator, x, y)
-    return estimate(measure, est, x, y)
-end
-
-# Generic 3H-formulation of mutual information.
-function marginal_entropies_ce2h(measure::ConditionalEntropy, est::ProbabilitiesEstimator, x, y)
-    e = measure.e.definition
-    X = StateSpaceSet(x)
-    Y = StateSpaceSet(y)
-    XY = StateSpaceSet(X, Y)
-    HY = entropy(e, est, Y)
-    HXY = entropy(e, est, XY)
-    return HY, HXY
-end
-
-# Override some definitions, because the estimator behaviour need to be adjusted
-# for multiple input variables.
-const WellDefinedCEProbEsts{m, D} = Union{
-    SymbolicPermutation{m},
-    ValueHistogram{<:FixedRectangularBinning{D}},
-    ValueHistogram{<:RectangularBinning{T}},
-    Dispersion
-} where {m, D, T}
-
-function marginal_entropies_ce2h(measure::ConditionalEntropy,
-        est::WellDefinedCEProbEsts{m, D}, x, y) where {m, D}
-    eX, eY = marginal_encodings(est, x, y)
-    eXY = StateSpaceSet(eX, eY)
-    e = measure.e
-    HY = entropy(e, CountOccurrences(), eY)
-    HXY = entropy(e, CountOccurrences(), eXY)
-    return HY, HXY
-end
-
-function marginal_entropies_ce2h(measure::ConditionalEntropy, est::DifferentialEntropyEstimator, x, y)
-    e = measure.e.definition
-    X = StateSpaceSet(x)
-    Y = StateSpaceSet(y)
-    XY = StateSpaceSet(X, Y)
-    HY = entropy(est, Y)
-    HXY = entropy(est, XY)
-    return HY, HXY
-end
diff --git a/src/methods/infomeasures/entropy_joint.jl b/src/methods/infomeasures/entropy_joint.jl
deleted file mode 100644
index 637f1c761..000000000
--- a/src/methods/infomeasures/entropy_joint.jl
+++ /dev/null
@@ -1,154 +0,0 @@
-using ComplexityMeasures: EntropyDefinition
-
-export entropy_joint
-export JointEntropy
-export JointEntropyRenyi
-export JointEntropyShannon
-export JointEntropyTsallis
-
-"""
-    entropy_joint(e::EntropyDefinition, x, y)
-    entropy_joint(e::EntropyDefinition, c::ContingencyMatrix)
-
-Compute the joint entropy of type `e` (e.g. [`Shannon`](@ref)) of the input variables
-`x` and `y`, or from the pre-computed contingency matrix `c` (see
-[`ContingencyMatrix`](@ref)).
-
-## Discrete definitions
-
-Given two two discrete random variables ``X`` and ``Y`` with ranges ``\\mathcal{X}`` and
-``\\mathcal{X}``, we define the following discrete joint entropies:
-
-- [`JointEntropyShannon`](@ref):
-    ``H^S(X, Y) = -\\sum_{x\\in \\mathcal{X}, y \\in \\mathcal{Y}} p(x, y) \\log p(x, y)``
-    (Cover & Thomas, 2006)[CoverThomas2006](@cite).
-- [`JointEntropyRenyi`](@ref):
-    ``H_q^R(X, Y) = -\\dfrac{1}{1-\\alpha} \\log \\sum_{i = 1}^N p_i^q``
-    (Golshani et al., 2009)[Golshani2009](@cite).
-- [`JointEntropyTsallis`](@ref):
-    ``H_q^T(X, Y) = -\\sum_{x\\in \\mathcal{X}, y \\in \\mathcal{Y}} p(x, y)^q \\log_q p(x, y)``
-    (Furuichi, 2006)[Furuichi2006](@cite),
-    where ``log_q(x, q) = \\dfrac{x^{1-q} - 1}{1-q}`` is the q-logarithm.
-
-The expressions for Shannon joint entropy is from [CoverThomas2006](@citet),
-for Rényi joint entropy from [Golshani2009](@citet), and for Tsallis joint entropy
-from [Furuichi2006](@citet).
-"""
-function entropy_joint(e::EntropyDefinition, args...)
-    throw(ArgumentError("Joint entropy not defined and/or implemented for $e with $(args)"))
-end
-
-################################################################
-# Types of joint entropy. Each type of joint entropy is its
-# own type, so it can be used as a "module" in other measures.
-################################################################
-""" The supertype of all joint entropy measures. """
-abstract type JointEntropy end
-
-"""
-    JointEntropyShannon <: JointEntropy
-    JointEntropyShannon(; base = 2)
-
-The Shannon joint entropy measure. See docstring of [`entropy_joint`](@ref) for definition.
-"""
-struct JointEntropyShannon{E<:Shannon} <: JointEntropy
-    e::E
-    function JointEntropyShannon(; base = 2)
-        e = Shannon(; base)
-        new{typeof(e)}(e)
-    end
-end
-
-"""
-    JointEntropyTsallis <: JointEntropy
-    JointEntropyTsallis(; base = 2, q = 1.5)
-
-The Tsallis joint entropy measure. See docstring of [`entropy_joint`](@ref) for definition.
-"""
-struct JointEntropyTsallis{E<:Tsallis} <: JointEntropy
-    e::E
-    function JointEntropyTsallis(; base = 2, q = 1.5)
-        e = Tsallis(; base, q)
-        new{typeof(e)}(e)
-    end
-end
-
-"""
-    JointEntropyRenyi <: JointEntropy
-    JointEntropyRenyi(; base = 2, q = 1.5)
-
-The Tsallis joint entropy measure. See docstring of [`entropy_joint`](@ref) for definition.
-"""
-struct JointEntropyRenyi{E<:Renyi} <: JointEntropy
-    e::E
-    function JointEntropyRenyi(; base = 2, q = 1.5)
-        e = Renyi(; base, q)
-        new{typeof(e)}(e)
-    end
-end
-
-################################################################
-# Discrete implementations
-################################################################
-function entropy_joint(measure::JointEntropyShannon, x...)
-    # Define p(x...) log p(x...) = 0 if p(x....) = 0; (Cover & Thomas, 2006)
-    # We circumvent this definition by directly counting *occurring pairs*.
-    # Any non-occurring pair then gets probability zero automatically.
-    X = StateSpaceSet(x...)
-    return entropy(measure.e, CountOccurrences(), X)
-end
-
-function entropy_joint(measure::JointEntropyShannon, est::DifferentialEntropyEstimator, x...)
-    X = StateSpaceSet(x...)
-    return entropy(measure.e, est, X)
-end
-
-function entropy_joint(measure::JointEntropyRenyi, x...)
-    # Direct analogue of Shannon version,
-    #Golshani, L., Pasha, E., & Yari, G. (2009). Some properties of Rényi entropy and Rényi entropy rate. Information Sciences, 179(14), 2426-2433.
-    X = StateSpaceSet(x...)
-    return entropy(measure.e, CountOccurrences(), X)
-end
-
-function entropy_joint(measure::JointEntropyTsallis, x...)
-    X = StateSpaceSet(x...)
-    return entropy(measure.e, CountOccurrences(), X)
-end
-
-function entropy_joint(measure::JointEntropyShannon, c::ContingencyMatrix{T, 2}) where {T}
-    base = measure.e.base
-    h = 0.0
-    for pij in c
-        h += pij * log(pij)
-    end
-    h = -h
-    return _convert_logunit(h, ℯ, base)
-end
-
-
-# ``H_q^T(X, Y) = -\\sum_{x\\in \\mathcal{X}, y \\in \\mathcal{Y}} p(x, y)^q \\log_q p(x, y)``
-# (Furuichi, 2006)[Furuichi2006](@cite),
-# where ``log_q(x, q) = \\dfrac{x^{1-q} - 1}{1-q}`` is the q-logarithm.
-
-function entropy_joint(measure::JointEntropyTsallis, c::ContingencyMatrix{T, 2}) where {T}
-    base = measure.e.base
-    q = measure.e.q
-    h = 0.0
-    for pij in c
-        h += pij^q * logq(pij, q)
-    end
-    h = -h
-    return _convert_logunit(h, ℯ, base)
-end
-
-
-function entropy_joint(measure::JointEntropyRenyi, c::ContingencyMatrix{T, 2}) where {T}
-    base = measure.e.base
-    q = measure.e.q
-    h = 0.0
-    for pij in c
-        h += pij^q * logq(pij, q)
-    end
-    h = -h
-    return _convert_logunit(h, ℯ, base)
-end
diff --git a/src/methods/infomeasures/infomeasures.jl b/src/methods/infomeasures/infomeasures.jl
deleted file mode 100644
index 5e86af75e..000000000
--- a/src/methods/infomeasures/infomeasures.jl
+++ /dev/null
@@ -1,71 +0,0 @@
-export estimate
-export InformationMeasure
-export InformationMeasureEstimator
-export InformationMeasureDefinition # Actually, InformationMeasure and InformationMeasureDefinition could be identical
-
-const ProbOrDiffEst = Union{ProbabilitiesEstimator, DifferentialEntropyEstimator}
-"""
-The supertype for all estimators of information-based measures.
-"""
-abstract type InformationMeasureEstimator end
-
-"""
-A generic supertype for definitions of information measures (one measure may have
-multiple definitions).
-"""
-abstract type Definition end
-
-"""
-    InformationMeasure <: AssociationMeasure
-
-The supertype for all definitions of information-based measures.
-
-## Why use definitions?
-
-Several information measures, such as mutual information, come in several forms
-depending on what type of generalized entropy they are defined with respect to.
-For example, there are at least three forms of Rényi mutual informations.
-
-In CausalityTools.jl, each unique variant of a measure is a subtype of `InformationMeasure`.
-For example, [`MITsallisFuruichi`](@ref) gives the formula for Furuichi (2006)'s
-Rényi-based mutual information.
-
-## Implemented measures
-
-### Mutual informations
-
-- [`MIShannon`](@ref). Discrete Shannon mutual information.
-- [`MITsallisFuruichi`](@ref). Discrete Tsallis mutual information, as defined by
-    Furuichi (2006).
-
-### Conditional mutual information (CMI)
-
-- [`CMIRenyiSarbu`](@ref). Discrete Rényi CMI.
-"""
-abstract type InformationMeasure <: AssociationMeasure end
-
-"""
-    estimate(e::EntropyDefinition, est::InformationMeasureEstimator, input::VectorOrStateSpaceSet...)
-
-Given some `input` data, estimate some information measure using the given
-[`InformationMeasureEstimator`](@ref), with respect to the generalized entropy `e`.
-"""
-function estimate(measure::InformationMeasure, args...; kwargs...) end
-
-# Contingency matrices and its computation based on various probabilites
-# estimators
-include("marginal_encodings.jl")
-
-# Things that will be eventually moved to ComplexityMeasures.jl
-include("various/probabilities.jl")
-include("various/entropies.jl")
-
-# Higher-level measures
-include("entropy_conditional/entropy_conditional.jl")
-include("entropy_joint.jl")
-include("mutualinfo/mutualinfo.jl")
-include("condmutualinfo/condmutualinfo.jl")
-include("transferentropy/transferentropy.jl")
-include("predictive_asymmetry/predictive_asymmetry.jl") # old (TE-based)
-include("predictive_asymmetry/PA.jl") # new
-include("pmi.jl")
diff --git a/src/methods/infomeasures/marginal_encodings.jl b/src/methods/infomeasures/marginal_encodings.jl
deleted file mode 100644
index 98624406c..000000000
--- a/src/methods/infomeasures/marginal_encodings.jl
+++ /dev/null
@@ -1,102 +0,0 @@
-import ComplexityMeasures: symbolize_for_dispersion
-export marginal_encodings
-
-"""
-    marginal_encodings(est::ProbabilitiesEstimator, x::VectorOrStateSpaceSet...)
-
-Encode/discretize each input vector `xᵢ ∈ x` according to a procedure determined by `est`.
-Any `xᵢ ∈ X` that are multidimensional ([`StateSpaceSet`](@ref)s) will be encoded column-wise,
-i.e. each column of `xᵢ` is treated as a timeseries and is encoded separately.
-
-This is useful for computing any discrete information theoretic quantity, and is
-used internally by [`contingency_matrix`](@ref).
-
-## Supported estimators
-
-- [`ValueHistogram`](@ref). Bin visitation frequencies are counted in the joint space `XY`,
-    then marginal visitations are obtained from the joint bin visits.
-    This behaviour is the same for both [`FixedRectangularBinning`](@ref) and
-    [`RectangularBinning`](@ref) (which adapts the grid to the data).
-    When using [`FixedRectangularBinning`](@ref), the range along the first dimension
-    is used as a template for all other dimensions.
-- [`SymbolicPermutation`](@ref). Each timeseries is separately [`encode`](@ref)d according
-    to its ordinal pattern.
-- [`Dispersion`](@ref). Each timeseries is separately [`encode`](@ref)d according to its
-    dispersion pattern.
-
-Many more implementations are possible. Each new implementation gives one new
-way of estimating the [`ContingencyMatrix`](@ref)
-"""
-function marginal_encodings end
-
-function marginal_encodings(est, x::VectorOrStateSpaceSet...)
-    return marginally_encode_variable.(Ref(est), x)
-end
-
-function marginally_encode_variable(est, x::AbstractStateSpaceSet)
-    return StateSpaceSet(marginally_encode_variable.(Ref(est), columns(x))...)
-end
-
-function marginally_encode_variable(est::CountOccurrences, x::AbstractVector)
-    return x
-end
-
-function marginally_encode_variable(est::SymbolicPermutation{m}, x::AbstractVector) where {m}
-    emb = embed(x, m, est.τ).data
-    return encode.(Ref(est.encoding), emb)
-end
-
-function marginally_encode_variable(est::Dispersion, x::AbstractVector)
-    return symbolize_for_dispersion(est, x)
-end
-
-function marginally_encode_variable(
-        est::ValueHistogram{<:FixedRectangularBinning{D}},
-        x::AbstractVector) where D
-    range = first(est.binning.ranges)
-    ϵmin = minimum(range)
-    ϵmax = maximum(range)
-    N = length(range)
-    encoder = RectangularBinEncoding(FixedRectangularBinning(ϵmin, ϵmax, N, 1))
-    return encode.(Ref(encoder), x)
-end
-
-# Special treatment for RectangularBinning. We create the joint embedding, then
-# extract marginals from that. This could probably be faster,
-# but it *works*. I'd rather things be a bit slower than having marginals
-# that are not derived from the same joint distribution, which would hugely increase
-# bias, because we're not guaranteed cancellation between entropy terms
-# in higher-level methods.
-function marginal_encodings(est::ValueHistogram{<:RectangularBinning}, x::VectorOrStateSpaceSet...)
-    X = StateSpaceSet(StateSpaceSet.(x)...)
-    encoder = RectangularBinEncoding(est.binning, X)
-
-    bins = [vec(encode_as_tuple(encoder, pt))' for pt in X]
-    joint_bins = reduce(vcat, bins)
-    idxs = size.(x, 2) #each input can have different dimensions
-    s = 1
-    encodings = Vector{Vector}(undef, 0)
-    for (i, cidx) in enumerate(idxs)
-        variable_subset = s:(s + cidx - 1)
-        s += cidx
-        y = @views joint_bins[:, variable_subset]
-        for j in size(y, 2)
-            push!(encodings, y[:, j])
-        end
-    end
-
-    return encodings
-end
-
-# A version of `cartesian_bin_index` that directly returns the joint bin encoding
-# instead of converting it to a cartesian index.
-function encode_as_tuple(e::RectangularBinEncoding, point::SVector{D, T}) where {D, T}
-    ranges = e.ranges
-    if e.precise
-        # Don't know how to make this faster unfurtunately...
-        bin = map(searchsortedlast, ranges, point)
-    else
-        bin = floor.(Int, (point .- e.mini) ./ e.widths) .+ 1
-    end
-    return bin
-end
diff --git a/src/methods/infomeasures/mutualinfo/MIRenyiJizba.jl b/src/methods/infomeasures/mutualinfo/MIRenyiJizba.jl
deleted file mode 100644
index caacb8cac..000000000
--- a/src/methods/infomeasures/mutualinfo/MIRenyiJizba.jl
+++ /dev/null
@@ -1,71 +0,0 @@
-export MIRenyiJizba
-
-"""
-    MIRenyiJizba <: MutualInformation
-
-The Rényi mutual information ``I_q^{R_{J}}(X; Y)`` defined in [Jizba2012](@cite).
-
-## Usage
-
-- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise dependence.
-- Use with [`mutualinfo`](@ref) to compute the raw mutual information.
-
-## Definition
-
-```math
-I_q^{R_{J}}(X; Y) = S_q^{R}(X) + S_q^{R}(Y) - S_q^{R}(X, Y),
-```
-
-where ``S_q^{R}(\\cdot)`` and ``S_q^{R}(\\cdot, \\cdot)`` the [`Rényi`](@ref) entropy and
-the joint Rényi entropy.
-"""
-struct MIRenyiJizba{E <: Renyi} <: MutualInformation{E}
-    e::E
-    function MIRenyiJizba(; q = 1.5, base = 2)
-        e = Renyi(; q, base)
-        new{typeof(e)}(e)
-    end
-    function MIRenyiJizba(e::E) where E <: Renyi
-        new{E}(e)
-    end
-end
-
-function estimate(measure::MIRenyiJizba, est::Contingency{<:ProbabilitiesEstimator}, x, y)
-    return estimate(measure, contingency_matrix(est.est, x, y))
-end
-
-function estimate(measure::MIRenyiJizba, est::Contingency{<:Nothing}, x, y)
-    return estimate(measure, contingency_matrix(x, y))
-end
-
-function estimate(measure::MIRenyiJizba, pxy::ContingencyMatrix{T, 2}) where {T}
-    e = measure.e
-    q = e.q
-    px = probabilities(pxy, dims = 1)
-    py = probabilities(pxy, dims = 2)
-    logb = log_with_base(e.base)
-    num = 0.0
-    den = 0.0
-    for i in eachindex(px.p)
-        for j in eachindex(py.p)
-            num += px[i]^q * py[j]^q
-            den += pxy[i, j]^q
-        end
-    end
-    if den != 0
-        mi = logb(num / den)
-    else
-        mi = 0.0
-    end
-
-    return (1 / (1 / q)) * mi
-end
-
-function estimate(measure::MIRenyiJizba, est::ProbabilitiesEstimator, x, y)
-    HX, HY, HXY = marginal_entropies_mi3h(measure, est, x, y)
-    return HX + HY - HXY
-end
-
-function estimate(::MIRenyiJizba, est::DifferentialEntropyEstimator, args...)
-    throw(ArgumentError("MIRenyiJizba not implemented for $(typeof(est))"))
-end
diff --git a/src/methods/infomeasures/mutualinfo/MIRenyiSarbu.jl b/src/methods/infomeasures/mutualinfo/MIRenyiSarbu.jl
deleted file mode 100644
index 41915f873..000000000
--- a/src/methods/infomeasures/mutualinfo/MIRenyiSarbu.jl
+++ /dev/null
@@ -1,74 +0,0 @@
-export MIRenyiSarbu
-
-"""
-    MIRenyiSarbu <: MutualInformation
-    MIRenyiSarbu(; base = 2, q = 1.5)
-
-The discrete Rényi mutual information from [Sarbu2014](@citet).
-
-## Usage
-
-- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise dependence.
-- Use with [`mutualinfo`](@ref) to compute the raw mutual information.
-
-## Description
-
-Sarbu (2014) defines discrete Rényi mutual information as the
-Rényi ``\\alpha``-divergence between the conditional joint probability mass function
-``p(x, y)`` and the product of the conditional marginals, ``p(x) \\cdot p(y)``:
-
-```math
-I(X, Y)^R_q =
-\\dfrac{1}{q-1}
-\\log \\left(
-    \\sum_{x \\in X, y \\in Y}
-    \\dfrac{p(x, y)^q}{\\left( p(x)\\cdot p(y) \\right)^{q-1}}
-\\right)
-```
-See also: [`mutualinfo`](@ref).
-"""
-struct MIRenyiSarbu{E <: Renyi} <: MutualInformation{E}
-    e::E
-    function MIRenyiSarbu(; q = 1.5, base = 2)
-        e = Renyi(; q, base)
-        new{typeof(e)}(e)
-    end
-end
-
-function estimate(measure::MIRenyiSarbu, est::Contingency{<:ProbabilitiesEstimator}, x, y)
-    return estimate(measure, contingency_matrix(est.est, x, y))
-end
-
-function estimate(measure::MIRenyiSarbu, est::Contingency{<:Nothing}, x, y)
-    return estimate(measure, contingency_matrix(x, y))
-end
-
-function estimate(measure::MIRenyiSarbu, pxy::ContingencyMatrix{T, 2}) where {T}
-    px = probabilities(pxy, dims = 1)
-    py = probabilities(pxy, dims = 2)
-    e = measure.e
-    q = e.q
-
-    mi = 0.0
-    for i in eachindex(px.p)
-        for j in eachindex(py.p)
-            pxyᵢⱼ = pxy[i, j]
-            mi += pxyᵢⱼ^q / ((px[i] * py[j])^(q - 1))
-        end
-    end
-    if mi == 0
-        return 0.0
-    else
-        return _convert_logunit(1 / (q - 1) * log(mi), ℯ, e.base)
-    end
-end
-
-
-function estimate(::MIRenyiSarbu, est::ProbabilitiesEstimator, x, y)
-    throw(ArgumentError("MIRenyiSarbu not implemented for $(typeof(est))"))
-end
-
-
-function estimate(::MIRenyiSarbu, est::DifferentialEntropyEstimator, args...)
-    throw(ArgumentError("MIRenyiSarbu not implemented for $(typeof(est))"))
-end
diff --git a/src/methods/infomeasures/mutualinfo/MIShannon.jl b/src/methods/infomeasures/mutualinfo/MIShannon.jl
deleted file mode 100644
index 0e7d90fb0..000000000
--- a/src/methods/infomeasures/mutualinfo/MIShannon.jl
+++ /dev/null
@@ -1,128 +0,0 @@
-using ComplexityMeasures: log_with_base
-using Accessors
-
-export MIShannon
-
-"""
-    MIShannon <: MutualInformation
-    MIShannon(; base = 2)
-
-The Shannon mutual information ``I^S(X; Y)``.
-
-## Usage
-
-- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise dependence.
-- Use with [`mutualinfo`](@ref) to compute the raw mutual information.
-
-## Discrete definition
-
-There are many equivalent formulations of discrete Shannon mutual information. In this
-package, we currently use the double-sum and the three-entropies formulations.
-
-### Double sum formulation
-
-Assume we observe samples
-``\\bar{\\bf{X}}_{1:N_y} = \\{\\bar{\\bf{X}}_1, \\ldots, \\bar{\\bf{X}}_n \\}`` and
-``\\bar{\\bf{Y}}_{1:N_x} = \\{\\bar{\\bf{Y}}_1, \\ldots, \\bar{\\bf{Y}}_n \\}`` from
-two discrete random variables ``X`` and ``Y`` with finite supports
-``\\mathcal{X} = \\{ x_1, x_2, \\ldots, x_{M_x} \\}`` and
-``\\mathcal{Y} = y_1, y_2, \\ldots, x_{M_y}``.
-The double-sum estimate is obtained by replacing the double sum
-
-```math
-\\hat{I}_{DS}(X; Y) =
- \\sum_{x_i \\in \\mathcal{X}, y_i \\in \\mathcal{Y}} p(x_i, y_j) \\log \\left( \\dfrac{p(x_i, y_i)}{p(x_i)p(y_j)} \\right)
-```
-
-where  ``\\hat{p}(x_i) = \\frac{n(x_i)}{N_x}``, ``\\hat{p}(y_i) = \\frac{n(y_j)}{N_y}``,
-and ``\\hat{p}(x_i, x_j) = \\frac{n(x_i)}{N}``, and ``N = N_x N_y``.
-This definition is used by [`mutualinfo`](@ref) when called with a
-[`ContingencyMatrix`](@ref).
-
-### Three-entropies formulation
-
-An equivalent formulation of discrete Shannon mutual information is
-
-```math
-I^S(X; Y) = H^S(X) + H_q^S(Y) - H^S(X, Y),
-```
-
-where ``H^S(\\cdot)`` and ``H^S(\\cdot, \\cdot)`` are the marginal and joint discrete
-Shannon entropies. This definition is used by [`mutualinfo`](@ref) when called with a
-[`ProbabilitiesEstimator`](@ref).
-
-## Differential mutual information
-
-One possible formulation of differential Shannon mutual information is
-
-```math
-I^S(X; Y) = h^S(X) + h_q^S(Y) - h^S(X, Y),
-```
-
-where ``h^S(\\cdot)`` and ``h^S(\\cdot, \\cdot)`` are the marginal and joint
-differential Shannon entropies. This definition is used by [`mutualinfo`](@ref) when
-called with a [`DifferentialEntropyEstimator`](@ref).
-
-See also: [`mutualinfo`](@ref).
-"""
-struct MIShannon{E <: Shannon} <: MutualInformation{E}
-    e::E
-    function MIShannon(; base::Real = 2)
-        e = Shannon(; base)
-        new{typeof(e)}(e)
-    end
-    function MIShannon(e::Shannon)
-        new{typeof(e)}(e)
-    end
-end
-
-function estimate(measure::MIShannon, est::Contingency{<:ProbabilitiesEstimator}, x, y)
-    return estimate(measure, contingency_matrix(est.est, x, y))
-end
-
-function estimate(measure::MIShannon, est::Contingency{<:Nothing}, x, y)
-    return estimate(measure, contingency_matrix(x, y))
-end
-
-function estimate(measure::MIShannon, pxy::ContingencyMatrix{T, 2}) where {T}
-    e = measure.e
-    px = probabilities(pxy, dims = 1)
-    py = probabilities(pxy, dims = 2)
-    mi = 0.0
-    logb = log_with_base(e.base)
-    for i in eachindex(px.p)
-        pxᵢ = px[i]
-        for j in eachindex(py.p)
-            pyⱼ = py[j]
-            pxyᵢⱼ = pxy[i, j]
-            inner = pxyᵢⱼ / (pxᵢ * pyⱼ)
-            if inner != 0.0
-                mi += pxyᵢⱼ * logb(inner)
-            end
-        end
-    end
-    return mi
-end
-
-function estimate(measure::MIShannon, est::ProbOrDiffEst, x, y)
-    # Due to inconsistent API in ComplexityMeasures.jl, we have to treat
-    # DifferentialEntropyEstimator here. Because all measures in this package
-    # have their own `base` field, it will conflict with `est.base` for
-    # `DifferentialEntropyEstimator`s. In these cases, we use `measure.base`,
-    # and override the estimator base, by simply creating a copy of the
-    # estimator with one field modified.
-    if est isa DifferentialEntropyEstimator && :base in fieldnames(typeof(est))
-        if est.base != measure.e.base
-            mb = measure.e.base
-            eb = est.base
-            modified_est = Accessors.@set est.base = measure.e.base
-            HX, HY, HXY = marginal_entropies_mi3h(measure, modified_est, x, y)
-        else
-            HX, HY, HXY = marginal_entropies_mi3h(measure, est, x, y)
-        end
-    else
-        HX, HY, HXY = marginal_entropies_mi3h(measure, est, x, y)
-    end
-    mi = HX + HY - HXY
-    return mi
-end
diff --git a/src/methods/infomeasures/mutualinfo/MITsallisFuruichi.jl b/src/methods/infomeasures/mutualinfo/MITsallisFuruichi.jl
deleted file mode 100644
index 73cddfdfe..000000000
--- a/src/methods/infomeasures/mutualinfo/MITsallisFuruichi.jl
+++ /dev/null
@@ -1,75 +0,0 @@
-export MITsallisFuruichi
-
-"""
-    MITsallisFuruichi <: MutualInformation
-    MITsallisFuruichi(; base = 2, q = 1.5)
-
-The discrete Tsallis mutual information from Furuichi (2006)[Furuichi2006](@cite), which
-in that paper is called the *mutual entropy*.
-
-## Usage
-
-- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise dependence.
-- Use with [`mutualinfo`](@ref) to compute the raw mutual information.
-
-
-## Description
-
-Furuichi's Tsallis mutual entropy between variables ``X \\in \\mathbb{R}^{d_X}`` and
-``Y \\in \\mathbb{R}^{d_Y}`` is defined as
-
-```math
-I_q^T(X; Y) = H_q^T(X) - H_q^T(X | Y) = H_q^T(X) + H_q^T(Y) - H_q^T(X, Y),
-```
-
-where ``H^T(\\cdot)`` and ``H^T(\\cdot, \\cdot)`` are the marginal and joint Tsallis
-entropies, and `q` is the [`Tsallis`](@ref)-parameter.
-```
-
-See also: [`mutualinfo`](@ref).
-"""
-struct MITsallisFuruichi{E <: Tsallis} <: MutualInformation{E}
-    e::E
-    function MITsallisFuruichi(; q = 1.5, base = 2)
-        e = Tsallis(; q, base)
-        new{typeof(e)}(e)
-    end
-end
-
-function estimate(measure::MITsallisFuruichi, est::Contingency{<:ProbabilitiesEstimator}, x, y)
-    return estimate(measure, contingency_matrix(est.est, x, y))
-end
-
-function estimate(measure::MITsallisFuruichi, est::Contingency{<:Nothing}, x, y)
-    return estimate(measure, contingency_matrix(x, y))
-end
-
-function estimate(
-        measure::MITsallisFuruichi,
-        pxy::ContingencyMatrix{T, 2}) where T
-    e = measure.e
-    q = measure.e.q
-    px = probabilities(pxy, dims = 1)
-    py = probabilities(pxy, dims = 2)
-
-    mi = 0.0
-    for i in eachindex(px.p)
-        for j in eachindex(py.p)
-            pxyᵢⱼ = pxy[i, j]
-            mi += pxyᵢⱼ^q / (px[i]^(q - 1) * py[j]^(q - 1))
-        end
-    end
-    mi = (1 / (q - 1) * (1 - mi) / (1-q))
-    return _convert_logunit(mi, ℯ, e.base)
-end
-
-function estimate(measure::MITsallisFuruichi, est::ProbabilitiesEstimator, x, y)
-    HX, HY, HXY = marginal_entropies_mi3h(measure, est, x, y)
-    q = measure.e.q
-    return HX + HY - HXY
-end
-
-
-function estimate(::MITsallisFuruichi, est::DifferentialEntropyEstimator, args...)
-    throw(ArgumentError("MITsallisFuruichi not implemented for $(typeof(est))"))
-end
diff --git a/src/methods/infomeasures/mutualinfo/MITsallisMartin.jl b/src/methods/infomeasures/mutualinfo/MITsallisMartin.jl
deleted file mode 100644
index 5b66b3512..000000000
--- a/src/methods/infomeasures/mutualinfo/MITsallisMartin.jl
+++ /dev/null
@@ -1,76 +0,0 @@
-export MITsallisMartin
-
-"""
-    MITsallisMartin <: MutualInformation
-    MITsallisMartin(; base = 2, q = 1.5)
-
-The discrete Tsallis mutual information from [Martin2004](@citet).
-
-## Usage
-
-- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise
-    dependence.
-- Use with [`mutualinfo`](@ref) to compute the raw mutual information.
-
-## Description
-
-Martin et al.'s Tsallis mutual information between variables ``X \\in \\mathbb{R}^{d_X}``
-and ``Y \\in \\mathbb{R}^{d_Y}`` is defined as
-
-```math
-I_{\\text{Martin}}^T(X, Y, q) := H_q^T(X) + H_q^T(Y) - (1 - q) H_q^T(X) H_q^T(Y) - H_q(X, Y),
-```
-
-where ``H^S(\\cdot)`` and ``H^S(\\cdot, \\cdot)`` are the marginal and joint Shannon
-entropies, and `q` is the [`Tsallis`](@ref)-parameter.
-
-See also: [`mutualinfo`](@ref).
-"""
-struct MITsallisMartin{E <: Tsallis} <: MutualInformation{E}
-    e::E
-    function MITsallisMartin(; q = 1.5, base = 2)
-        e = Tsallis(; q, base)
-        new{typeof(e)}(e)
-    end
-end
-
-function estimate(measure::MITsallisMartin, est::Contingency{<:ProbabilitiesEstimator}, x, y)
-    return estimate(measure, contingency_matrix(est.est, x, y))
-end
-
-function estimate(measure::MITsallisMartin, est::Contingency{<:Nothing}, x, y)
-    return estimate(measure, contingency_matrix(x, y))
-end
-
-# This is definition 3 in Martin et al. (2004), but with pᵢ replaced by the joint
-# distribution and qᵢ replaced by the product of the marginal distributions.
-function estimate(
-        measure::MITsallisMartin,
-        pxy::ContingencyMatrix{T, 2}) where T
-    e = measure.e
-    q = measure.e.q
-    q != 1 || throw(ArgumentError("MITsallisMartin for q=$(q) not defined with estimator ContingencyMatrix"))
-    px = probabilities(pxy, dims = 1)
-    py = probabilities(pxy, dims = 2)
-
-    mi = 0.0
-    for (i, pxᵢ) in enumerate(px.p)
-        for (j, pyⱼ) in enumerate(py.p)
-            pxyᵢⱼ = pxy[i, j]
-            mi += pxyᵢⱼ^q / (pxᵢ^(q - 1) * pyⱼ^(q - 1))
-        end
-    end
-    f = 1 / (q - 1)
-    return f * (1 - mi)
-end
-
-function estimate(measure::MITsallisMartin, est::ProbabilitiesEstimator, x, y)
-    HX, HY, HXY = marginal_entropies_mi3h(measure, est, x, y)
-    q = measure.e.q
-    return HX + HY - (1 - q) * HX * HY - HXY
-end
-
-
-function estimate(::MITsallisMartin, est::DifferentialEntropyEstimator, args...)
-    throw(ArgumentError("MITsallisMartin not implemented for $(typeof(est))"))
-end
diff --git a/src/methods/infomeasures/mutualinfo/estimators/nearest_neighbors/tests.ipynb b/src/methods/infomeasures/mutualinfo/estimators/nearest_neighbors/tests.ipynb
deleted file mode 100644
index 4747e1683..000000000
--- a/src/methods/infomeasures/mutualinfo/estimators/nearest_neighbors/tests.ipynb
+++ /dev/null
@@ -1,91 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import scipy"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let the joint StateSpaceSet $X := \\{\\bf{X}_1, \\bf{X_2}, \\ldots, \\bf{X}_m \\}$ be defined by the\n",
-    "concatenation of the marginal StateSpaceSets $\\{ \\bf{X}_k \\}_{k=1}^m$, where each $\\bf{X}_k$ \n",
-    "is potentially multivariate. Let $\\bf{x}_1, \\bf{x}_2, \\ldots, \\bf{x}_N$ be the points \n",
-    "in the joint space $X$.\n",
-    "\n",
-    "The `KraskovStögbauerGrassberger2` estimator first locates, for each $\\bf{x}_i \\in X$, the\n",
-    "point $\\bf{n}_i \\in X$, the `k`-th nearest neighbor to $\\bf{x}_i$, according to the \n",
-    "maximum norm (`Chebyshev` metric). Let $\\epsilon_i$ be the\n",
-    "distance $d(\\bf{x}_i, \\bf{n}_i)$.\n",
-    "\n",
-    "Consider $x_i^m \\in \\bf{X}_m$, the $i$-th point in the marginal space $\\bf{X}_m$. For each \n",
-    "$\\bf{x}_i^m$, we determine $\\theta_i^m$ := the number of points $\\bf{x}_k^m \\in \\bf{X}_m$ that \n",
-    "are a distance less than $\\epsilon_i$ away from $\\bf{x}_i^m$. That is, we use the \n",
-    "distance from a query point $\\bf{x}_i \\in X$ (in the *joint* space) to count neighbors of \n",
-    "$x_i^m \\in \\bf{X}_m$ (in the marginal space).\n",
-    "\n",
-    "Mutual information between the is the variables $\\bf{X}_1, \\bf{X_2}, \\ldots, \\bf{X}_m$ is\n",
-    "the estimated as \n",
-    "\n",
-    "$$\n",
-    "\\hat{I}_{KSG2}(\\bf{X}) = \n",
-    "    \\psi{(k)} - \n",
-    "    \\dfrac{m - 1}{k} + \n",
-    "    (m - 1)\\psi{(N)} - \n",
-    "    \\dfrac{1}{N} \\sum_{i = 1}^N \\sum_{j = 1}^m \\psi{(\\theta_i^j + 1)}\n",
-    "$$"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3.8.9 64-bit",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.9"
-  },
-  "orig_nbformat": 4,
-  "vscode": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/src/methods/infomeasures/mutualinfo/estimators/utils.jl b/src/methods/infomeasures/mutualinfo/estimators/utils.jl
deleted file mode 100644
index 3c764294f..000000000
--- a/src/methods/infomeasures/mutualinfo/estimators/utils.jl
+++ /dev/null
@@ -1,21 +0,0 @@
-using Distances: evaluate
-
-# In the Kraskov1 estimator, ϵs are the distances in the Z = (X, Y) joint space
-# In the Kraskov2 estimator, ϵs are the distances in the X and Y marginal spaces
-function count_within_radius!(p, x, metric, ϵs, N)
-    @inbounds for i in 1:N
-        ϵ = ϵs[i] / 2
-        xᵢ = x[i]
-        p[i] = count(evaluate(metric, xᵢ, x[j]) < ϵ for j in 1:N)
-    end
-
-    return p
-end
-
-function eval_dists_to_knns!(ds, pts, knn_idxs, metric)
-    @inbounds for i in eachindex(pts)
-        ds[i] = evaluate(metric, pts[i], pts[knn_idxs[i]])
-    end
-
-    return ds
-end
diff --git a/src/methods/infomeasures/mutualinfo/estimators/wip/Evans.jl b/src/methods/infomeasures/mutualinfo/estimators/wip/Evans.jl
deleted file mode 100644
index f51b1f7e4..000000000
--- a/src/methods/infomeasures/mutualinfo/estimators/wip/Evans.jl
+++ /dev/null
@@ -1,86 +0,0 @@
-using SpecialFunctions: gamma, digamma
-using Neighborhood: bulksearch
-using Neighborhood: KDTree, Euclidean, Theiler, NeighborNumber
-using StateSpaceSets: StateSpaceSet
-using StateSpaceSets: dimension
-
-export Evans
-
-"""
-    Evans <: MutualInformationEstimator
-    Evans(k = 1, w = 0)
-
-The `Evans` mutual information estimator is based on `k`-th nearest neighbors
-"""
-Base.@kwdef struct Evans{M} <: MutualInformationEstimator
-    k::Int = 5
-    w::Int = 0
-    metric::M = Chebyshev()
-end
-
-function estimate(def::MIShannonDifferential, est::Evans, x::VectorOrStateSpaceSet...)
-    e = def.e
-    @assert length(x) >= 2 ||
-        error("Need at leats two input StateSpaceSets to compute mutual information between them.")
-
-    (; k, w, metric) = est
-
-    joint = StateSpaceSet(x...)
-    marginals = StateSpaceSet.(x)
-    N = length(joint)
-    D = dimension(joint)
-    tree_joint = KDTree(joint, metric)
-    # The ball radii of interest are just the `k`-th nearest neighbor distances in the
-    # joint and marginal spaces.
-    rs_joint = last.(bulksearch(tree_joint, joint, NeighborNumber(k), Theiler(w))[2])
-    rs = [zeros(N) for m in eachindex(marginals)]
-    for (m, xₘ) in enumerate(marginals)
-        distance_to_kth_neighbors!(est, rs[m], xₘ)
-    end
-
-    mi = 0.0
-    marginal_rs = StateSpaceSet(rs...) # so we can index all marginals at once
-    for (i, rs) in enumerate(marginal_rs)
-        vⱼ = pball_volume(est, rs_joint[i]; d = D)
-        vprod = prod(pball_volume(est, r; d = D) for r in rs)
-        mi += log(vⱼ / vprod)
-    end
-
-    I = -digamma(k + 1) + digamma(N) - (mi / N)
-    #@show -digamma(k)
-    #@show digamma(N)
-    #@show -(mi / N)
-    return _convert_logunit(I, ℯ, e.base)
-end
-mutualinfo(est::Evans, args...; base = 2, kwargs...) =
-    mutualinfo(Shannon(; base), est, args...; kwargs...)
-
-# For the marginal StateSpaceSet `xₘ`, find the distance to the `k`-th nearest neighbor
-# of each point `xₘ[i]`, where `i = 1, 2, ..., N = length(xₘ)`.
-function distance_to_kth_neighbors!(est::Evans, rs, xₘ)
-    (; k, w, metric) = est
-    tree = KDTree(xₘ, metric)
-    rs[:] = last.(bulksearch(tree, xₘ, NeighborNumber(k), Theiler(w))[2])
-end
-
-# TODO: we could also implement this for Chebyshev distance.
-"""
-Compute the volume of a `d`-dimensional ball with radius `r` respecting the
-Lₚ-norm.
-"""
-function pball_volume(p, r::Real = 1.0; d::Int)
-    # https://link.springer.com/article/10.1007/s00013-019-01394-7
-    if p == Inf
-        return r^d
-    end
-    return (2*r)^d * gamma(1 + 1/p)^d / gamma(1 + d/p)
-end
-
-function pball_volume(est::Evans{M}, r::Real; d::Int) where M
-    if M <: Euclidean
-        p = 2
-    elseif M <: Chebyshev
-        p = Inf
-    end
-    return pball_volume(p, r; d)
-end
diff --git a/src/methods/infomeasures/mutualinfo/estimators/wip/Gao2015.jl b/src/methods/infomeasures/mutualinfo/estimators/wip/Gao2015.jl
deleted file mode 100644
index dbf8f8cd3..000000000
--- a/src/methods/infomeasures/mutualinfo/estimators/wip/Gao2015.jl
+++ /dev/null
@@ -1,52 +0,0 @@
-
-"""
-    Gao2015 <: MutualInformationEstimator
-    Gao2015(k = 1, w = 0, base = 2)
-
-`Gao2015` estimates the [`Shannon`](@ref) mutual information using a nearest neighbor
-approach with a local nonuniformity correction (LNC).
-
-[Gao2015]
-    Gao, S., Ver Steeg, G. &amp; Galstyan, A.. (2015). Efficient Estimation of Mutual
-    Information for Strongly Dependent Variables. *Proceedings of the Eighteenth
-        International Conference on Artificial Intelligence and Statistics*, in
-        *Proceedings of Machine Learning Research* 38:277-286.
-        Available from https://proceedings.mlr.press/v38/gao15.html.
-"""
-Base.@kwdef struct Gao2015{M} <: MutualInformationEstimator
-    k::Int = 1
-    w::Int = 0
-    metric::M = Euclidean()
-end
-
-function mutualinfo(e::Renyi, est::Gao2015{B, Chebyshev}, x) where {B}
-    (; k, w) = est
-
-    joint = StateSpaceSet(x...)
-    N = length(joint)
-    M = length(x)
-
-    # For each point `xᵢ ∈ joint`, find the index of its k-th nearest neighbor.
-    tree_joint = KDTree(joint, metric_joint)
-    idxs_joint = bulkisearch(tree_joint, joint, NeighborNumber(k + 1), Theiler(w))
-
-
-end
-
-
-function lnc_correction(est::Gao2015, x::AbstractStateSpaceSet{D}, idxs_neighbors)
-    (; k, w, base) = est
-
-end
-
-using Statistics
-using LinearAlgebra
-
-function pca(xᵢ, neighbors::SubStateSpaceSet{D}) where D
-    μ = xᵢ # manually set mean to be xᵢ, so that it is at the center of rotated rectangle.
-    M = Matrix(x)
-    C = @SMatrix cov(x)
-    E = eigen(C)
-    E.vectors[:, sortperm(E.values)]
-
-end
diff --git a/src/methods/infomeasures/mutualinfo/estimators/wip/Gao2017.jl b/src/methods/infomeasures/mutualinfo/estimators/wip/Gao2017.jl
deleted file mode 100644
index 3c682fcb9..000000000
--- a/src/methods/infomeasures/mutualinfo/estimators/wip/Gao2017.jl
+++ /dev/null
@@ -1,91 +0,0 @@
-using Neighborhood: Chebyshev, KDTree, Theiler, NeighborNumber
-using Neighborhood: bulksearch
-using Distances: evaluate
-using DelayEmbeddings.StateSpaceSets: SubStateSpaceSet
-using LinearAlgebra: det
-
-"""
-    Gao2017 <: DifferentialEntropyEstimator
-    Gao2017(k = 1, w = 1)
-
-The `Gao2017` estimator [Gao2017](@cite) can be used to estimate
-[`Renyi`](@ref) differential entropy.
-
-It does so by considering, for ``\\alpha \\neq 1``  the integral
-
-```math
-J_{\\alpha}(X) = \\int_{\\mathbb{R}^D} f^{\\alpha}(x) dx
-```
-"""
-Base.@kwdef struct Gao2017{B, M} <: InformationEstimator
-    k::Int = 1
-    w::Int = 0
-    metric::M = Euclidean()
-end
-
-function Î(q, est::Gao2017, x::AbstractStateSpaceSet{D}) where D
-    (; k, w, metric) = est
-    N = length(x)
-    tree = KDTree(x, metric)
-    Bk,d,α,K = bias(est)
-    idxs, ds = bulksearch(tree, x, NeighborNumber(k), Theiler(w))
-
-end
-
-# TODO: implement
-multiplicative_bias(est::Gao2017) = 1.0
-
-Base.@kwdef struct LocalLikelihood{M} <: ProbabilitiesEstimator
-    k::Int = 1
-    w::Int = 0
-    metric::M = Euclidean()
-end
-
-function point_densities(est::LocalLikelihood, x::AbstractStateSpaceSet{D}) where D
-
-    (; k, w, metric) = est
-
-    N = length(x)
-    kmax = max(floor(Int, log(N)), k)
-
-    tree = KDTree(x)
-    # The bandwidth `ds[i]` for the point `x[i]` is the distance to the `k`-th nearest
-    # neighbor of `x[i]`.
-    idxs, ds = bulksearch(tree, x, NeighborNumber(kmax), Theiler(w))
-    hs = [d[k] for d in ds]
-
-    densities = zeros(N)
-    for i = 1:N
-        xᵢ = x[i]
-        hᵢ = hs[i]
-        neighborsᵢ = @view x[idxs[i]]
-        densities[i] = point_density(est, xᵢ, hᵢ, neighborsᵢ)
-    end
-    return densities
-end
-
-# Compute the local density around point xᵢ, given its `neighborsᵢ`
-function point_density(est::LocalLikelihood, xᵢ, hᵢ, neighborsᵢ::SubStateSpaceSet{D}) where D
-    S₀ = 0.0
-    S₁ = zeros(MVector{D, Float64})
-    S₂ = zeros(MMatrix{D, D, Float64})
-    # Gao et al, in the original paper, only loops through the floor(Int, log(N)) nearest
-    # neighbors of x[i]. No need to go through all.
-    hᵢsq = hᵢ^2
-    twicehᵢsq = 2*hᵢsq
-    for (k, nⱼ) in enumerate(neighborsᵢ)
-        dᵢ = evaluate(est.metric, xᵢ, nⱼ)
-        eᵢ = exp(-dᵢ / twicehᵢsq)
-        xdiff = (nⱼ - xᵢ)
-        S₀ += eᵢ
-        S₁ .+= xdiff * (eᵢ / hᵢ)
-        S₂ .+= (xdiff * transpose(xdiff)) .* (eᵢ / twicehᵢsq)
-    end
-
-    μ = S₁ / S₀
-    Σ = (S₂ / S₀) - (S₁ * transpose(S₁) / (S₀^2))
-
-    num = (S₀ * exp(-0.5*transpose(μ) * inv(Σ) * μ))
-    den = (N * (2π)^(D/2) * (hᵢ^D) * det(Σ)^(1/2))
-    return num / den
-end
diff --git a/src/methods/infomeasures/mutualinfo/estimators/wip/copula/copula_mi.ipynb b/src/methods/infomeasures/mutualinfo/estimators/wip/copula/copula_mi.ipynb
deleted file mode 100644
index 6d856d9e3..000000000
--- a/src/methods/infomeasures/mutualinfo/estimators/wip/copula/copula_mi.ipynb
+++ /dev/null
@@ -1,101 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32m\u001b[1m  Activating\u001b[22m\u001b[39m new project at `~/Code/Repos/Temp/CausalityTools.jl/src/methods/infomeasures`\n"
-     ]
-    }
-   ],
-   "source": [
-    "using Pkg; Pkg.activate(\"../../../\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "using Revise, CausalityTools"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "using CairoMakie\n",
-    "using Distributions\n",
-    "using Statistics: quantile\n",
-    "usin\n",
-    "Rx = Exponential(0.7);\n",
-    "n = 500\n",
-    "x = rand(Rx, n)\n",
-    "y = x + randn(n)\n",
-    "\n",
-    "function empcdf(x)\n",
-    "    N = length(x)\n",
-    "    F̂ = [count(xᵢ .<= x)  for xᵢ in x] / N\n",
-    "end\n",
-    "\n",
-    "function inv_normal_copula_transform(x)\n",
-    "    ex = empcdf(x)\n",
-    "    t = zeros(length(ex))\n",
-    "    N = Normal(0, 1)\n",
-    "    for (i, eᵢ) in enumerate(ex)\n",
-    "        if eᵢ == 1.0 # Something weird happens when x = 1 or x = 0. Check why.\n",
-    "            t[i] = quantile(N, 1-eps())\n",
-    "        elseif eᵢ == 0.0\n",
-    "            t[i] = quantile(N, eps())\n",
-    "        else\n",
-    "            t[i] = quantile(N, eᵢ)\n",
-    "        end\n",
-    "    end\n",
-    "\n",
-    "    return t\n",
-    "end\n",
-    "\n",
-    "ex = empcdf(x)\n",
-    "ey = empcdf(y)\n",
-    "# Apply transformation (ok, as long as we don't change the ecdf). Use inverse normal cdf\n",
-    "#, ie.e quantile, so that we can use parametric estimator of MI.\n",
-    "X = inv_normal_copula_transform(x)\n",
-    "Y = inv_normal_copula_transform(y)\n",
-    "\n",
-    "f = Figure(resolution = (600, 300))\n",
-    "ax = Axis(f[ 1, 1])\n",
-    "scatter!(ax, x, y)\n",
-    "ax2 = Axis(f[1, 2])\n",
-    "scatter!(ax2, X, Y)\n",
-    "xlims!(ax2, (-3 , 3))\n",
-    "ylims!(ax2, (-3, 3))\n",
-    "f\n",
-    "\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Julia 1.8.0",
-   "language": "julia",
-   "name": "julia-1.8"
-  },
-  "language_info": {
-   "file_extension": ".jl",
-   "mimetype": "application/julia",
-   "name": "julia",
-   "version": "1.8.0"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/src/methods/infomeasures/mutualinfo/estimators/wip/copula/copula_nonparametric.jl b/src/methods/infomeasures/mutualinfo/estimators/wip/copula/copula_nonparametric.jl
deleted file mode 100644
index 9bc3f10a8..000000000
--- a/src/methods/infomeasures/mutualinfo/estimators/wip/copula/copula_nonparametric.jl
+++ /dev/null
@@ -1,116 +0,0 @@
-import ComplexityMeasures: ProbabilitiesEstimator, DifferentialEntropyEstimator
-import StateSpaceSets: AbstractStateSpaceSet
-
-export Copula
-export empirical_copula_transformation
-
-"""
-    Copula <: MutualInformationEstimator
-    Copula(; est = Kraskov(k = 5), exact = false)
-
-A non-parametric copula-based mutual information estimator.
-
-It is typically many times faster to compute mutual information using `Copula` than
-with other [`MutualInformationEstimator`](@ref)s, [`DifferentialEntropyEstimator`](@ref)s,
-or [`ProbabilitiesEstimator`](@ref)s, because `Copula` only needs to compute the
-entropy of a single (multivariate) variable, whereas the other methods explicitly
-computes the entropy of several variables.
-
-If `exact == true`, then the exact empirical cumulative distribution function (ECDF) is
-used to compute the empirical copula. If `exact == false`, then a fast sorting-based
-approximation to the exact ECDF is computed instead (this breaks ties arbitrarily,
-so be careful when applying it to categorical/integer-valued data).
-
-## Description
-
-Assume we have two `Dy`-dimensional and `Dy`-dimensional input [`StateSpaceSet`](@ref)s `x` and
-`y`, both containing `N` observations. We can define the `Dx + Dy`-dimensional joint
-StateSpaceSet `D = [Dx Dy]`. `Copula` returns the negative *copula entropy* of `D`,
-which is equal to the mutual information between `Dx` and `Dy` ([Ma2011](@citet);
-[Pál2010](@citet)).
-"""
-Base.@kwdef struct Copula <: MutualInformationEstimator
-    est::Union{ProbabilitiesEstimator, DifferentialEntropyEstimator} = Kraskov(k = 5)
-    exact::Bool = false
-end
-
-function estimate(measure::MIShannon, est::Copula, x, y)
-    X = StateSpaceSet(x)
-    Y = StateSpaceSet(y)
-    D = StateSpaceSet(X,  Y)
-   -entropy(measure.e, est.est, empirical_copula_transformation(D))
-end
-
-"""
-
-    empirical_copula_transformation(x::AbstractVector) → empirical_copula::Vector{<:Real}
-    empirical_copula_transformation(x::AbstractStateSpaceSet{D, T}) → empirical_copula::StateSpaceSet{D, T}
-
-Apply the empirical copula transformation (as described in [Pal2010](@cite);
-see a summary below) to the each point `xᵢ ∈ x`, where
-the `xᵢ` can be either univariate (`x::AbstractVector`) or multivariate
-(`x::AbstractStateSpaceSet`) to compute the empirical copula (here called `empirical_copula)`.
-
-## Description
-
-## Empirical copula transformation
-
-Assume we have a length-`n` sample of data points ``\\bf{X}_{1:n} = \\{\\bf{X}_1, \\bf{X}_2, \\ldots, \\bf{X}_n \\}`` where ``\\bf{X}_i \\in \\mathbb{R}^d``, which is assumed sampled from some distribution ``\\mu`` with density function ``f``. Let ``X_i^j \\in \\mathbb{R}`` denote the j-th coordinate of ``\\bf{X}_i``. Assume these points are represented as the `d`-dimensional [`StateSpaceSet`](@ref) which we call `S` (indexed like a matrix where rows are samples).
-
-The *empirical* cumulative distribution function (CDF) for the j-th column of `S`, based on the sample ``\\bf{X}_{1:n}``, is defined as
-```math
-\\hat{F}_j(y) = \\dfrac{\\left| \\{ 1 \\leq i \\leq n, y \\leq X^j_i \\} \\right|}{n},
-```
-
-for any input value ``y \\in \\mathbb{R}`` (which is in general completely unrelated to the j-th column of our sample points). Given the samples ``\\bf{X}_{1:n}``, we can also define a "multivariate empirical CDF" for `S`, ``\\bf{\\hat{F}} : \\mathbb{R}^d \\to [0, 1]^d``, as
-
-```math
-\\hat{\\bf{F}}(\\bf{y}) = (\\hat{F}_j(x^1), \\hat{F}_j(x^2), \\ldots, \\hat{F}_j(x^d)),
-```
-
-for any point ``\\bf{y} \\in \\mathbb{R}^d`` (which is in general completely unrelated to our sample points, except sharing the property of being `d`-dimensional). Think of this as checking, for each coordinate ``y^j \\in \\bf{y}``, how this coordinate ranks among values in `S[:, j]`.
-The map ``\\hat{\\bf{F}}`` is called the *empirical copula transformation*.
-
-Sidenote: Given only our sample, don't actually *know* what the underlying distribution ``\\mu`` is, nor what its cumulative distribution function ``F`` is. But if we did, the analogous map (the *copula transformation*) ``\\bf{F} : \\mathbb{R}^d \\to [0, 1]^d`` would be
-
-```math
-\\bf{F}(\\bf{y}) = (F_j(x^1), F_j(x^2), \\ldots, F_j(x^d)).
-```
-
-In summary, we've defined the empirical copula *transformation* ``\\hat{\\bf{F}}`` as a map from some `d`-dimensional space to the `d`-dimensional unit square. The j-th axis of ``\\hat{\\bf{F}}``'s domain and the j-th axis of ``\\hat{\\bf{F}}``'s codomain (i.e. the hypersquare) are linked through the *ranks* of `S[:, j]`.
-
-## Empirical copula
-
-The *copula* of ``\\mu`` is the joint distribution ``\\bf{F}(\\bf{X}) = (F_1(X^1), F_2(X^2), \\ldots, F_d(X^d))``. The *empirical copula* (note the lack of "transformation" here) is the set of `d`-dimensional empirical-copula-transformed points ``\\hat{\\bf{Z}} = \\{\\bf{Z}_1, \\bf{Z}_2, \\ldots, \\bf{Z}_n \\} = \\{ \\hat{\\bf{F}}(\\bf{X_1}), \\hat{\\bf{F}}(\\bf{X_2}), \\ldots, \\hat{\\bf{F}}(\\bf{X_n}) \\}``. Note that ``\\hat{\\bf{Z}}`` is an *approximation* of a sample ``\\{\\bf{Z}_1,\\bf{Z}_2, \\ldots, \\bf{Z}_n\\} = \\{\\bf{F}(\\bf{X}_1), \\bf{F}(\\bf{X}_2), \\ldots, \\bf{F}(\\bf{X}_n)\\}`` from the true copula of ``\\mu`` (which we in general don't know, given only some sample points).
-"""
-function empirical_copula_transformation(x::AbstractStateSpaceSet{D, T}) where {D, T}
-    c = rank_transformation(x) ./ length(x)
-    C = StateSpaceSet(c...)
-end
-
-function empirical_copula_transformation(x::AbstractVector{<:Real})
-    rank_transformation(x) ./ length(x)
-end
-
-# # An example worked out by hand.
-# X = [
-#     1 8;
-#     2 2;
-#     3 6;
-#     1 5;
-#     2 2;
-#     3 1;
-#     1 8;
-#     2 9;
-# ]
-# analytical_copula = StateSpaceSet([
-#     0.125  0.75;
-#     0.5    0.25;
-#     0.875  0.625;
-#     0.25   0.5;
-#     0.625  0.375;
-#     1.0    0.125;
-#     0.375  0.875;
-#     0.75   1.0])
-
-# @test copula_transform(StateSpaceSet(X)) == analytical_copula
diff --git a/src/methods/infomeasures/mutualinfo/estimators/wip/copula/copula_parametric.jl b/src/methods/infomeasures/mutualinfo/estimators/wip/copula/copula_parametric.jl
deleted file mode 100644
index 9060d86e9..000000000
--- a/src/methods/infomeasures/mutualinfo/estimators/wip/copula/copula_parametric.jl
+++ /dev/null
@@ -1,82 +0,0 @@
-using Statistics: quantile
-
-import ComplexityMeasures: entropy
-using LinearAlgebra: det
-using SpecialFunctions: digamma
-using Distributions: Distribution, UnivariateDistribution, quantile, MvNormal, Normal
-
-export ParametricCopula
-
-"""
-    ParametricCopula <: MutualInformationEstimator
-    ParametricCopula(d = Normal())
-
-A parametric copula-based mutual information estimator.
-
-Robin et al. (2016) A statistical framework for neuroimaging data analysis based on
-mutual information estimated via a gaussian copula.
-"""
-Base.@kwdef struct ParametricCopula{D} <: MutualInformationEstimator
-    d::D = Normal()
-    debias = true
-end
-
-function estimate(measure::MIShannon, est::ParametricCopula, x, y)
-    X = StateSpaceSet(x)
-    Y = StateSpaceSet(y)
-    D = StateSpaceSet(X, Y)
-    Tp = StateSpaceSet((copula_transform(c) for c in columns(D))...)
-    -entropy(est.d, Tp; debias = est.debias)
-end
-
-function entropy(d::Normal, x::AbstractStateSpaceSet{D}; debias = true, base = 2) where D
-    N = length(x)
-    Σ = fastcov(x)
-    h = 1 / (2 * log(2))
-    if debias
-        bias = D * log(2/(N-1)) - sum(map(i -> digamma((N - i) / 2), 1:D))
-        h *= log((2*π*ℯ)^D * det(Σ)) - bias
-    else
-        h *= log((2*π*ℯ)^D * det(Σ))
-    end
-    return _convert_logunit(h, ℯ, e.base)
-end
-
-
-
-"""
-    inv_cdf_transform(x::AbstractVector, d::Distribution) → tx::Vector
-
-Map each value `xᵢ ∈ x` to the transformed value `t(xᵢ) ∈ [0, 1]` using
-the inverse cumulative distribution function (CDF) (i.e.e quantile function)
-of the distribution `d`.
-
-This function is meant to be used marginal empirical copula transforms (which
-are uniformly distributed). Since the inverse CDF is a strictly increasing function,
-the marginal copulas are preserved by the transformation.
-"""
-function inv_cdf_transform(x::AbstractVector, d::Distribution)
-    ex = empcdf(x)
-    t = zeros(length(ex))
-    for (i, eᵢ) in enumerate(ex)
-        if eᵢ == 1.0
-            t[i] = quantile(d, 1-eps())
-        elseif eᵢ == 0.0
-            t[i] = quantile(d, eps())
-        else
-            t[i] = quantile(d, eᵢ)
-        end
-    end
-
-    return t
-end
-
-function entropy_debiased(d::Normal, x::AbstractStateSpaceSet{D}) where D
-    # Strictly speaking, `d` should be a MvNormal, but it doesn't matter here,
-    # because the marginal data have already been transformed to be normally distributed.
-    # `d` is purely for dispatch.
-    N = length(x)
-    Σ = fastcov(x)
-    h = 1 / (2 * log(2))
-    h *= log(2*π*ℯ^D * det(Σ)) - D * log(2/(N-1)) - sum(map(i -> digamma((N - i) / 2), 1:D))
-end
diff --git a/src/methods/infomeasures/mutualinfo/estimators/wip/gao2017.ipynb b/src/methods/infomeasures/mutualinfo/estimators/wip/gao2017.ipynb
deleted file mode 100644
index d46488351..000000000
--- a/src/methods/infomeasures/mutualinfo/estimators/wip/gao2017.ipynb
+++ /dev/null
@@ -1,225 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32m\u001b[1m  Activating\u001b[22m\u001b[39m project at `~/Code/Repos/Temp/CausalityTools.jl`\n"
-     ]
-    }
-   ],
-   "source": [
-    "using Pkg; Pkg.activate(\"/Users/work/Code/Repos/Temp/CausalityTools.jl\")\n",
-    "using Revise, CausalityTools"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 281,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "bias (generic function with 1 method)"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "using Neighborhood: Euclidean, Chebyshev, KDTree, Theiler, NeighborNumber\n",
-    "using Neighborhood: bulksearch\n",
-    "using Distances: evaluate\n",
-    "using DelayEmbeddings.StateSpaceSets: SubStateSpaceSet\n",
-    "using LinearAlgebra: det, norm\n",
-    "using StateSpaceSets: StateSpaceSet\n",
-    "using StaticArrays: MVector, MMatrix, SVector, SMatrix\n",
-    "\n",
-    "import Entropies: entropy\n",
-    "\n",
-    "\"\"\"\n",
-    "    Gao2017 <: EntropyEstimator\n",
-    "    Gao2017(k = 1, w = 1, base = 2)\n",
-    "\n",
-    "A resubstitution estimator from Gao et al. (2017). Can be used both for entropy\n",
-    "estimation and\n",
-    "\n",
-    "[Gao2017](@cite): Gao, W., Oh, S., & Viswanath, P. (2017, June). Density functional estimators\n",
-    "    with k-nearest neighbor bandwidths. In 2017 IEEE International Symposium on Information\n",
-    "    Theory (ISIT) (pp. 1351-1355). IEEE.\n",
-    "\"\"\"\n",
-    "Base.@kwdef struct Gao2017{B, M} #<: CausalityTools.InformationEstimator\n",
-    "    k::Int = 1\n",
-    "    w::Int = 0\n",
-    "    base::B = 2\n",
-    "    metric::M = Euclidean()\n",
-    "end\n",
-    "\n",
-    "function Î(q, est::Gao2017, x::AbstractStateSpaceSet{D}) where D\n",
-    "    (; k, w, base, metric) = est\n",
-    "    N = length(x)\n",
-    "    tree = KDTree(x, metric)\n",
-    "    Bk,d,α,K = bias(est)\n",
-    "    idxs, ds = bulksearch(tree, x, NeighborNumber(k), Theiler(w))\n",
-    "\n",
-    "end\n",
-    "\n",
-    "# TODO: implement\n",
-    "multiplicative_bias(est::Gao2017) = 1.0\n",
-    "\n",
-    "Base.@kwdef struct LocalLikelihood{M} <: ProbabilitiesEstimator\n",
-    "    k::Int = 5\n",
-    "    w::Int = 0\n",
-    "    metric::M = Euclidean()\n",
-    "end\n",
-    "\n",
-    "function point_densities(est::LocalLikelihood, x::AbstractStateSpaceSet{D}) where D\n",
-    "    (; k, w, metric) = est\n",
-    "    N = length(x)\n",
-    "    # Modified heuristic from Gao et al. (2017): it is sufficient to consider the \n",
-    "    # `K = max(floor(Int, log(N), k)` nearest neighbors neighbors of `x[i]` when \n",
-    "    # estimating the local density. A global point-search is pointless and expensive.\n",
-    "    kmax = max(floor(Int, log(N)), k)\n",
-    "    tree = KDTree(x, Euclidean())\n",
-    "    \n",
-    "    # The bandwidth `bw[i]` for the point `x[i]` is the distance to the `k`-th nearest\n",
-    "    # neighbor of `x[i]`.\n",
-    "    idxs, ds = bulksearch(tree, x, NeighborNumber(kmax), Theiler(w))\n",
-    "    bws = [d[k] for d in ds]\n",
-    "    densities = zeros(N)\n",
-    "\n",
-    "    S₁ = zeros(MVector{D, Float64})\n",
-    "    S₂ = zeros(MMatrix{D, D, Float64})\n",
-    "\n",
-    "    for i = 1:N\n",
-    "        xᵢ = x[i]\n",
-    "        bwᵢ = bws[i]\n",
-    "        neighborsᵢ = @views x[idxs[i]]\n",
-    "        densities[i] = point_density!(S₁, S₂, est, xᵢ, bwᵢ, neighborsᵢ)\n",
-    "    end\n",
-    "    return densities\n",
-    "end\n",
-    "\n",
-    "\"\"\"\n",
-    "    point_density!(S₁, S₂, est::LocalLikelihood, xᵢ, bwᵢ, \n",
-    "        neighborsᵢ::AbstractStateSpaceSet{D}) where D\n",
-    "\n",
-    "Estimate the density around point `xᵢ` using a local likehood estimator, which is \n",
-    "a generalization of kernel density estimation. This is done by fitting a local gaussian \n",
-    "distribution around `xᵢ` from its local neighborhood (represented the points `neighborsᵢ`).\n",
-    "The bandwidth  `bwᵢ` is given by the distance from `xᵢ` to its `k`-th nearest neighbor. \n",
-    "\n",
-    "`S₁` is a pre-allocated length-`D` vector which holds the means, and `S₂` is a pre-allocated\n",
-    "`D`-by-`D` matrix which holds the covariances. Both `S₁` and `S₂` are zeroed every time\n",
-    "`point_density!` is called.\n",
-    "\"\"\"\n",
-    "function point_density!(S₁, S₂, est::LocalLikelihood, xᵢ, bwᵢ, \n",
-    "        neighborsᵢ::AbstractStateSpaceSet{D}) where D\n",
-    "    N = length(neighborsᵢ)\n",
-    "    S₀ = 0.0\n",
-    "    S₁ .= 0.0\n",
-    "    S₂ .= 0.0 \n",
-    "    \n",
-    "    bwᵢ_sq = bwᵢ^2\n",
-    "    twice_bwᵢ_sq = 2*bwᵢ_sq\n",
-    "    for (k, nⱼ) in enumerate(neighborsᵢ)\n",
-    "        dᵢ = evaluate(est.metric, nⱼ, xᵢ)\n",
-    "        eᵢ = exp(-dᵢ / twice_bwᵢ_sq)\n",
-    "        Δⱼ = (nⱼ - xᵢ)\n",
-    "        S₀ += eᵢ\n",
-    "        S₁ += eᵢ * (Δⱼ / bwᵢ)\n",
-    "        S₂ += eᵢ * (Δⱼ * transpose(Δⱼ)) / bwᵢ_sq\n",
-    "    end\n",
-    "    # Weighted sample mean and sample variance\n",
-    "    μ = S₁ / S₀\n",
-    "    Σ = S₂ / S₀ - S₁*transpose(S₁) / S₀^2\n",
-    "    \n",
-    "    detΣ = det(Σ)\n",
-    "    # if Σ is singular, we can't take its inverse either, so just return 0.0\n",
-    "    # density straight away.\n",
-    "    if det(Σ) ≈ 0\n",
-    "        return 0.0\n",
-    "    end\n",
-    "    \n",
-    "    num = S₀ * exp((-1/(2*S₀^2))*transpose(μ)*inv(Σ)*μ) \n",
-    "    den = N*(2π)^(D/2)*(bwᵢ^D) * det(Σ)^(1/2)\n",
-    "    return num/den\n",
-    "end\n",
-    "\n",
-    "function probabilities_and_outcomes(est::LocalLikelihood, x)\n",
-    "    return Probabilities(point_densities(est, x)), x\n",
-    "end\n",
-    "probabilities(est::LocalLikelihood, x) = Probabilities(point_densities(est, x))\n",
-    "outcomes(est::LocalLikelihood, x) = x\n",
-    "total_outcomes(x, est::LocalLikelihood) = length(x)\n",
-    "\n",
-    "# TODO: implement. not sure how, though. Gao (2017) is not very clear...\n",
-    "bias(q, est::LocalLikelihood, x) = 1.0 "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 283,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "MethodError",
-     "evalue": "MethodError: no method matching bias()\nClosest candidates are:\n  bias(!Matched::Any, !Matched::LocalLikelihood, !Matched::Any) at ~/Code/Repos/Temp/CausalityTools.jl/src/methods/infomeasures/mutualinfo/estimators/nearest_neighbors/gao2017.ipynb:129",
-     "output_type": "error",
-     "traceback": [
-      "MethodError: no method matching bias()\n",
-      "Closest candidates are:\n",
-      "  bias(!Matched::Any, !Matched::LocalLikelihood, !Matched::Any) at ~/Code/Repos/Temp/CausalityTools.jl/src/methods/infomeasures/mutualinfo/estimators/nearest_neighbors/gao2017.ipynb:129\n",
-      "\n",
-      "Stacktrace:\n",
-      " [1] entropy(e::Renyi{Float64, Int64}, est::LocalLikelihood{Euclidean}, x::StateSpaceSet{5, Float64})\n",
-      "   @ Main ~/Code/Repos/Temp/CausalityTools.jl/src/methods/infomeasures/mutualinfo/estimators/nearest_neighbors/gao2017.ipynb:129\n",
-      " [2] top-level scope\n",
-      "   @ ./timing.jl:263 [inlined]\n",
-      " [3] top-level scope\n",
-      "   @ ~/Code/Repos/Temp/CausalityTools.jl/src/methods/infomeasures/mutualinfo/estimators/nearest_neighbors/gao2017.ipynb:0"
-     ]
-    }
-   ],
-   "source": [
-    "using CairoMakie\n",
-    "\n",
-    "#x = StateSpaceSet(rand(1.0:1.0:5.0, 30, 2) .+ randn(30, 2)*0.0001 )\n",
-    "x = StateSpaceSet(rand(10000, 5))\n",
-    "est = LocalLikelihood()\n",
-    "#x = StateSpaceSet([0.79, 0.5, 0.45, 0.46, 0.5, 0.46, 0.03, 0.11, 0.02, 0.2, 0.03, 0.5, 0.61])\n",
-    "@time entropy(Shannon(), est, x)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Julia 1.8.0",
-   "language": "julia",
-   "name": "julia-1.8"
-  },
-  "language_info": {
-   "file_extension": ".jl",
-   "mimetype": "application/julia",
-   "name": "julia",
-   "version": "1.8.0"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/src/methods/infomeasures/mutualinfo/estimators/wip/mi_tests.ipynb b/src/methods/infomeasures/mutualinfo/estimators/wip/mi_tests.ipynb
deleted file mode 100644
index 94db3a58e..000000000
--- a/src/methods/infomeasures/mutualinfo/estimators/wip/mi_tests.ipynb
+++ /dev/null
@@ -1,125 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "UndefVarError",
-     "evalue": "UndefVarError: StateSpaceSet not defined",
-     "output_type": "error",
-     "traceback": [
-      "UndefVarError: StateSpaceSet not defined\n",
-      "\n",
-      "Stacktrace:\n",
-      " [1] top-level scope\n",
-      "   @ ~/Code/Repos/Temp/CausalityTools.jl/src/methods/infomeasures/mutualinfo/mi_tests.ipynb:11"
-     ]
-    }
-   ],
-   "source": [
-    "\n",
-    "using LinearAlgebra\n",
-    "using Distributions\n",
-    "μ₁ = [1, 1, 1]\n",
-    "μ₂ = [0, 0, 0]\n",
-    "Σ₁ = diagm(repeat([0.5], 3))\n",
-    "Σ₂ = diagm(repeat([0.5], 3))\n",
-    "N₁ = MvNormal(μ₁, Σ₁)\n",
-    "N₂ = MvNormal(μ₂, Σ₂)\n",
-    "n = 10000\n",
-    "D₁ = StateSpaceSet([rand(N₁) for i = 1:n])\n",
-    "D₂ = StateSpaceSet([rand(N₂) for i = 1:n])\n",
-    "\n",
-    "function kl_divergence(𝒩₁::MvNormal, 𝒩₂::MvNormal; base = 2)\n",
-    "    μ = 𝒩₁.μ\n",
-    "    Σ = 𝒩₁.Σ\n",
-    "    μ̃ = 𝒩₂.μ\n",
-    "    Σ̃ = 𝒩₂.Σ\n",
-    "\n",
-    "    d = length(μ)\n",
-    "    @assert length(μ) == length(μ̃) # dimensions must match\n",
-    "    return 0.5 * (\n",
-    "        transpose(μ̃ - μ) * inv(Σ̃) * (μ̃ - μ) + \n",
-    "        tr(inv(Σ̃)*Σ) - \n",
-    "        log(det(Σ̃) / det(Σ)) - \n",
-    "        d\n",
-    "    ) / log(ℯ, base)\n",
-    "end\n",
-    "\n",
-    "\n",
-    "function mutualinfo(𝒩₁::MvNormal, 𝒩₂::MvNormal; base = 2)\n",
-    "    @assert length(𝒩₁.μ) == length(𝒩₁.μ) # dimensions must match\n",
-    "    d = length(𝒩₁.μ)\n",
-    "\n",
-    "    H1 = 0.5 * log(det(𝒩₁.Σ))+ d/2. * (1 + log(2π))\n",
-    "    H2 = 0.5 * log(det(𝒩₂.Σ))+ d/2. * (1 + log(2π))\n",
-    "    M = [𝒩₁.Σ zeros(d, d); zeros(d, d) 𝒩₁.Σ]\n",
-    "    H12 = 0.5 * log(det(M))+ (2*d)/2 * (1 + log(2π))\n",
-    "\n",
-    "    return H1 + H2 - H12\n",
-    "end\n",
-    "\n",
-    "MI = kl_divergence(N₁, N₂)\n",
-    "\n",
-    "\n",
-    "mutualinfo(Shannon(), Kraskov1(), D₁, D₂)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "UndefVarError",
-     "evalue": "UndefVarError: kl_divergence not defined",
-     "output_type": "error",
-     "traceback": [
-      "UndefVarError: kl_divergence not defined\n",
-      "\n",
-      "Stacktrace:\n",
-      " [1] top-level scope\n",
-      "   @ ~/Code/Repos/Temp/CausalityTools.jl/src/methods/infomeasures/mutualinfo/mi_tests.ipynb:1"
-     ]
-    }
-   ],
-   "source": [
-    "MI = kl_divergence(N₁, N₂)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "using CairoMakie"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Julia 1.6.4",
-   "language": "julia",
-   "name": "julia-1.6"
-  },
-  "language_info": {
-   "file_extension": ".jl",
-   "mimetype": "application/julia",
-   "name": "julia",
-   "version": "1.8.0"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/src/methods/infomeasures/mutualinfo/estimators/wip/tests.ipynb b/src/methods/infomeasures/mutualinfo/estimators/wip/tests.ipynb
deleted file mode 100644
index 884b8a762..000000000
--- a/src/methods/infomeasures/mutualinfo/estimators/wip/tests.ipynb
+++ /dev/null
@@ -1,570 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let the joint StateSpaceSet $X := \\{\\bf{X}_1, \\bf{X_2}, \\ldots, \\bf{X}_m \\}$ be defined by the\n",
-    "concatenation of the marginal StateSpaceSets $\\{ \\bf{X}_k \\}_{k=1}^m$, where each $\\bf{X}_k$ \n",
-    "is potentially multivariate. Let $\\bf{x}_1, \\bf{x}_2, \\ldots, \\bf{x}_N$ be the points \n",
-    "in the joint space $X$.\n",
-    "\n",
-    "The `KraskovStögbauerGrassberger2` estimator first locates, for each $\\bf{x}_i \\in X$, the\n",
-    "point $\\bf{n}_i \\in X$, the `k`-th nearest neighbor to $\\bf{x}_i$, according to the \n",
-    "maximum norm (`Chebyshev` metric). Let $\\epsilon_i$ be the\n",
-    "distance $d(\\bf{x}_i, \\bf{n}_i)$.\n",
-    "\n",
-    "Consider $x_i^m \\in \\bf{X}_m$, the $i$-th point in the marginal space $\\bf{X}_m$. For each \n",
-    "$\\bf{x}_i^m$, we determine $\\theta_i^m$ := the number of points $\\bf{x}_k^m \\in \\bf{X}_m$ that \n",
-    "are a distance less than $\\epsilon_i$ away from $\\bf{x}_i^m$. That is, we use the \n",
-    "distance from a query point $\\bf{x}_i \\in X$ (in the *joint* space) to count neighbors of \n",
-    "$x_i^m \\in \\bf{X}_m$ (in the marginal space).\n",
-    "\n",
-    "Mutual information between the is the variables $\\bf{X}_1, \\bf{X_2}, \\ldots, \\bf{X}_m$ is\n",
-    "the estimated as \n",
-    "\n",
-    "$$\n",
-    "\\hat{I}_{KSG2}(\\bf{X}) = \n",
-    "    \\psi{(k)} - \n",
-    "    \\dfrac{m - 1}{k} + \n",
-    "    (m - 1)\\psi{(N)} - \n",
-    "    \\dfrac{1}{N} \\sum_{i = 1}^N \\sum_{j = 1}^m \\psi{(\\theta_i^j + 1)}\n",
-    "$$"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32m\u001b[1m  Activating\u001b[22m\u001b[39m project at `~/Code/Repos/Temp/CausalityTools.jl`\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "WARNING: Method definition estimate(CausalityTools.CMI{Nothing}, Entropies.Renyi{Q, B} where B where Q, CausalityTools.FrenzelPompe{MJ, MM} where MM where MJ, Any, Any, Any) in module CausalityTools at /Users/work/Code/Repos/Temp/CausalityTools.jl/src/methods/infomeasures/conditional_mutualinfo/estimators/FrenzelPompe.jl:34 overwritten at /Users/work/Code/Repos/Temp/CausalityTools.jl/src/methods/infomeasures/conditional_mutualinfo/estimators/MesnerShalisi.jl:22.\n",
-      "  ** incremental compilation may be fatally broken for this module **\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "using Pkg; Pkg.activate(\"../../../../../../\")\n",
-    "using Revise, CairoMakie, CausalityTools, StateSpaceSets\n",
-    "using Distributions: MvNormal\n",
-    "using Statistics\n",
-    "using LinearAlgebra\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ρ = 0.1\n",
-    "N2 = MvNormal([0, 0], [1.0 ρ; ρ 1.0])\n",
-    "X, Y = columns(StateSpaceSet([rand(N2) for i = 1:100]));"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "new_cycle_theme (generic function with 1 method)"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "function new_cycle_theme()\n",
-    "    # https://nanx.me/ggsci/reference/pal_locuszoom.html\n",
-    "    my_colors = [\"#D43F3AFF\", \"#EEA236FF\", \"#5CB85CFF\", \"#46B8DAFF\",\n",
-    "        \"#357EBDFF\", \"#9632B8FF\", \"#B8B8B8FF\"]\n",
-    "    cycle = Cycle([:color, :linestyle, :marker], covary=true) # alltogether\n",
-    "    my_markers = [:circle, :rect, :utriangle, :dtriangle, :diamond,\n",
-    "        :pentagon, :cross, :xcross]\n",
-    "    my_linestyle = [nothing, :dash, :dot, :dashdot, :dashdotdot]\n",
-    "    Theme(\n",
-    "        fontsize = 16, font=\"CMU Serif\",\n",
-    "        colormap = :linear_bmy_10_95_c78_n256,\n",
-    "        palette = (\n",
-    "            color = my_colors, \n",
-    "            marker = my_markers, \n",
-    "            linestyle = my_linestyle,\n",
-    "        ),\n",
-    "        Axis = (\n",
-    "            backgroundcolor= (:white, 0.2), \n",
-    "            xgridstyle = :dash, \n",
-    "            ygridstyle = :dash\n",
-    "        ),\n",
-    "        Lines = (\n",
-    "            cycle= cycle,\n",
-    "        ), \n",
-    "        Scatter = (\n",
-    "            cycle = cycle,\n",
-    "        ),\n",
-    "        Legend = (\n",
-    "            bgcolor = (:white, 0.1), \n",
-    "            framecolor = (:white, 0.2),\n",
-    "            labelsize = 13,\n",
-    "        )\n",
-    "    )\n",
-    "end"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "getellipsepoints (generic function with 1 method)"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "function getellipsepoints(cx, cy, rx, ry, θ)\n",
-    "\tt = range(0, 2*pi, length=100)\n",
-    "\tellipse_x_r = @. rx * cos(t)\n",
-    "\tellipse_y_r = @. ry * sin(t)\n",
-    "\tR = [cos(θ) sin(θ); -sin(θ) cos(θ)]\n",
-    "\tr_ellipse = [ellipse_x_r ellipse_y_r] * R\n",
-    "\tx = @. cx + r_ellipse[:,1]\n",
-    "\ty = @. cy + r_ellipse[:,2]\n",
-    "\t(x,y)\n",
-    "end"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "anim_entropy (generic function with 1 method)"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "using StaticArrays, Neighborhood, SpecialFunctions\n",
-    "function anim_entropy(e::Renyi, est::Lord, x::AbstractStateSpaceSet{D}; j = 1) where {D}\n",
-    "    # TODO: only for Shannon()\n",
-    "    (; k, w, metric) = est\n",
-    "    N = length(x)\n",
-    "    tree = KDTree(x, metric)\n",
-    "    knn_idxs, ds = bulksearch(tree, x, NeighborNumber(k), Theiler(w))\n",
-    "\n",
-    "    # Decrease allocations and speed up computations by pre-allocating.\n",
-    "    # We're only dealing with matrices which in either axis has a maximum dimension of `k`,\n",
-    "    # so this is still far within the realm of where StaticArrays shines.\n",
-    "    # -------------------------------------------------------------------------------------\n",
-    "    # Contains neighborhood-centroid-centered vectors, where\n",
-    "    # `C[1]` := the centered query point\n",
-    "    # `C[1 + j]` := the centered `j`-th neighbor of the query point.\n",
-    "    C = MVector{k + 1, MVector{D}}(@SVector zeros(D) for i = 1:k+1)\n",
-    "\n",
-    "    # Centered neighbors need to be ordered row-wise in a matrix. We re-fill this matrix\n",
-    "    # for every query point `xᵢ`\n",
-    "    A = @MMatrix zeros(k+1, D)\n",
-    "\n",
-    "    h = 0.0\n",
-    "    rs = zeros(D)\n",
-    "    g2 = gamma(D/2 + 1)\n",
-    "    for (i, xᵢ) in enumerate(x)\n",
-    "        neighborsᵢ = x[knn_idxs[i]]\n",
-    "\n",
-    "        # Center neighborhood around mean.\n",
-    "        c = CausalityTools.centroid(xᵢ, neighborsᵢ, C)\n",
-    "        CausalityTools.center_neighborhood!(c, C, xᵢ, neighborsᵢ) # put centered vectors in `M`\n",
-    "        CausalityTools.fill_A!(A, C, D)\n",
-    "\n",
-    "        # SVD\n",
-    "        U, Σ, Vt = svd(A)\n",
-    "\n",
-    "        # How many of the `k` neighbors of `xᵢ` are within the ellipsoid\n",
-    "        # whose semi-axes have lengths rs[1], rs[2], ..., rs[D]?\n",
-    "        σ₁ = Σ[1]\n",
-    "        ϵᵢ = last(ds[i])\n",
-    "        rs .= ϵᵢ .* (Σ ./ σ₁) # Scale semi-axis lengths to k-th neighbor distance\n",
-    "\n",
-    "        # Matrix representation of the ellipse, relative to origin.\n",
-    "        Λ = CausalityTools.hyperellipsoid_matrix(Vt, rs)\n",
-    "        nns_centered = (pt - xᵢ for pt in neighborsᵢ)\n",
-    "        # After the ellipse is found e must now center on `xᵢ`\n",
-    "        inside = [transpose(centered_p) * Λ * (centered_p) <= 1 for centered_p in nns_centered]\n",
-    "        k_inside = count(inside)\n",
-    "\n",
-    "        if i == j\n",
-    "            return xᵢ, C, A, U, Σ, Vt, rs, neighborsᵢ, c, k_inside, ϵᵢ\n",
-    "        end\n",
-    "\n",
-    "        ϵᵢᵈ = last(ds[i])^D\n",
-    "\n",
-    "        # σ₁ = Σ[1]\n",
-    "        h += 1 / (prod(rs) * N * ϵᵢᵈ * π^(D/2))\n",
-    "    end\n",
-    "\n",
-    "    return h / log(ℯ, e.base)\n",
-    "end\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAyAAAAJYCAIAAAAVFBUnAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nOzde1xUdf4/8DMXYLjDDAPD/SqCoIIi3gXBSyaKWppmmUapWblu319Za+3Wttvu1n7dtra+1aqJrqmZ5jUv4QUVEEQQUUHu9zvDZWBgruf3x7TTxAwmzOGcmeH1/KOH856ZM29D4eXn8zmfD4skSQIAAAAAqMNmugEAAAAAS4OABQAAAEAxBCwAAAAAiiFgAQAAAFAMAQsAAACAYghYAAAAABRDwAIAAACgGAIWAAAAAMUQsAAAAAAohoAFAAAAQDEELAAAAACKIWABAAAAUAwBCwAAAIBiCFgAAAAAFOMy3cAwffzxx7dv3w4ICGC6EQAAALBwVVVVUVFR27Zte/S3mOsI1u3bt6uqqpjuYiCZTCaTyZjuAn5BKpUqFAqmu4BfkEgkarWa6S7gZyRJdnd3M90F/IJKperp6WG6C/hJVVXV7du3h/QWcx3BCggICAgIePfdd5lu5Be6uroIgnB2dma6EfhZa2urvb29nZ0d043AzxoaGtzd3blcc/3+Y3lIkqyrq/P19WW6EfiZXC4Xi8UikYjpRoAgCGIYecNcR7AAAAAATBYCFgAAAADFELAAAAAAKIaABQAAAEAxBCwAAAAAiiFgAQAAAFAMAQsAAACAYghYAAAAABRDwAIAAACgGAIWAAAAAMUQsAAAAAAohoAFAAAAQDEELAAAAACKIWABAAAAUAwBCwAAAIBiCFgAAAAAFEPAAgAAAKAYAhYAAAAAxbhMNwAAAAAWiCTJkpKSuro6mUzm4eERERHB4/GYboo+CFgAAABAMYVCcfDgwcrKSm3l8uXLTz/9tJeXF4Nd0QlThAAAAECxy5cv66YrgiB6e3uPHDmiUqmYaolmCFgAAABAsYKCAv1iZ2dnTU0N/c0wAgELAAAAqKRUKqVSqcGnurq6aG6GKQhYAAAAQCUul2ttbW3wKTs7O5qbYQoCFgAAAFAsLCxMv2hraxsQEEB7L8xAwAIAAACKzZ8/n8/n61Y4HM7SpUsHG9myPNimAQAAACjm4OCwcePGmzdv1tTUyOVyd3f3mJgYd3d3pvuiDwIWAAAAUM/GxmbWrFlMd8EYTBECAAAAUAwBCwAAAIBiCFgAAAAAFEPAAgAAAKAYAhYAAAAAxRCwAAAAACiGbRoAAADA5LS3t7e0tNjY2Hh6etra2jLdzpAhYAEAAIAJkUqlR48eraio0Dzkcrlz5syZPXs2s10NFaYIAQAAwFSQJPntt99q0xVBEEql8tKlS3l5eQx2NQzUjGCVl5eXlpZGR0d7eHjoPyuXy8VisX7d1dXVxsZG/1k7OzsnJydKGgMAAAAz0tjYWF1drV/PysqaNGkS/f0Mm7EjWDKZLDk5OSQkZPny5SKR6J133tF/zdWrVz0NOXv2LEEQx48fH1B/7bXXjOwKAAAAzFFra6vBeltbm0qlorkZYxg7gvXee+9dvnw5MzNz6tSpqampKSkpMTExycnJuq+Jioo6deqUbuXChQuHDx+ePn06QRBlZWW+vr6ff/659lk/Pz8juwIAAADjqdXqrq4uBwcHKysrej6Rw+EYrLPZbDbbnNY1GRWwVCrV3r17N23apIlKGzZsSE1N3bNnz4CA5ebmlpSUpH3Y2tq6cePGffv2aeYTy8rKJk6cqPsCAAAAYFZ/f/+5c+fu3r2rUqlYLJaPj8/jjz8uEolG+nP9/PzYbLZarR5QDwgIYLFYI/3pFDIqDFZXVzc2NiYmJmoriYmJmZmZD3/X5s2bly9fvnDhQs3D0tLS0NDQ8+fPf/rpp2fOnOnr6zOmJQAAADASSZIHDhwoKCjQzMqRJFlbW7t3796Ojo6R/mgnJ6cZM2YMKHK53Hnz5o30R1PLqBGspqYmgiB0F7aLRKL29nalUsnlGr7ylStX0tLSysrKtJWysrK8vLzdu3d7eXmVlZX5+fmdOnUqPDxc910tLS3Nzc26ld7eXltbW4VCYUz/lNP0Y2pdjXKK/2K6EfiZUqlUKBQkSTLdCPyEJEnNF4XpRuBnzH7vKikpqaurG1CUyWRXr159/PHHR/rTZ8+e7eDgkJub297ebmVl5ePjEx8f7+bmxuAfUbVaPdQJSqMCVmdnJ0EQjo6O2oqjoyNJkh0dHUKhUP/1JEn+v//3/9544w3ts/39/S4uLqtWrdq5cyeHw6msrExMTExJSRkwDHb06FHdRVoEQfj7+4eFhbW1tRnTP+W6u7sJBCwTIxaL+/v7pVIp043Az8RiMYvFGuyfYUA/kiTFYjGPx2O6EfiZQqHo6Ohg6q9JaWmpwXpNTQ09P3n9/Pz8/PzUajWLxdLMDDL7E18qlTo4OAzpLUZ95QQCAUEQEolEW+nq6mKxWC4uLgZfn5aWVlhYeO7cOW2Fx+MVFRVpHwYGBm7fvn3z5s0dHR2urq7a+ksvvfTSSy/pXurdd98lCMLT09OY/ilnZ2dHEISzszPTjcDPuFyuvb295ksDJoIkSXd3dwQs00GSpEqlMrXvqKOcXC63tramYc2TQfb29gbrHA5ndP45GWq6Ioxcg6X5wmsmCjWampqEQuFg9xp8/vnnycnJbm5uD7lmYGAgwXRQBQAAGM0GS1GjM10Nj1EBy8/PLzAwMC0tTVtJS0ubM2eOwRe3tLScOnVq7dq1usW0tDRPT8/c3Fxt5c6dOzweLygoyJjGAAAAYNjCw8P1l/pwudyZM2cy0o85MipgsVisjRs3fvnll9evX1cqlf/+978zMjK0c3lfffXV6tWrZTKZ5uG5c+fUavWAs4Rmz57N5XI3b9589epViURy8uTJDz74YNu2bYNtgwEAAAAjjcPhPPvss8HBwdoKn89fu3atu7s7g12ZF2PXQLzxxhtVVVVxcXEcDofNZn/22WcJCQmap3Jycg4fPrxr1y4bGxuCIM6ePRsZGcnn83XfbmNjc/LkybVr18bFxREEwWazt27dqllfBQAAAExxdHR85plnpFJpW1ubo6Oji4uLeW1DxThjAxabzf7iiy8+/PDD8vLycePGabKUxq5du3bt2qV9ePDgQYNXiI6Ovnv3bllZmUQiCQsLG2xhHQAAANDMzs4Ox6sMDzV38Tg5OUVHRw/77Ww2OzQ0lJJOAAAAABhnTsf6AAAAAJgFBCwAAAAAiiFgAQAAAFAMOykDAADACFIoFJWVlWKx2NHR0c/PT/eEPQuGgAUAAAAjpaam5siRIz09PZqHHA5nwYIFsbGxzHZFA0wRAgAAwIiQSqUHDx7UpiuCIFQq1dmzZysqKhjsih4IWAAAADAi7t6929/fr1+/efMm/c3QDFOEAAAAMCLEYrHBent7u+7De/fu5ebmtra28ng8f3//uLg4JycnWhocQQhYAAAAMCKsra0N1nXPfUlPT79y5Yrm1729ve3t7Q8ePHjhhRdcXFxo6HDkYIoQAAAARoTucdEG611dXVevXh3wbG9v76VLl0a2s5GHgAUAAAAjwt/ff8KECQOKQqFw+vTpml9XVVWp1Wr9N5aVlY14cyMMU4QAAAAwUpYtW+bt7V1YWNje3u7k5BQUFBQXF6edIpTL5QbfNVjdjCBgAQAAwEhhsVixsbGDbXzF5/MN1gUCwUg2RQdMEQIAAAAzAgMD3dzc9OtTpkyhvxlqIWABAAAAM9hs9urVq3UzFovFmjFjxuTJkxnsihKYIgQAAADGCASCl156qbq6uqWlxdbW1tfX19XVlemmKICABQAAAExis9mBgYGBgYFMN0IlTBECAACMUiqViukWLBZGsAAAAEYFmUyWk5NTW1vb398vl8t7e3t7enocHBzGjBkzb948Ozs7phu0KAhYAAAAlq+3t/frr78ecAggQRA9PT35+fm1tbUvvvjiYCfbwDAgYAEAANBELpdfvny5pKSkq6vL2dk5LCwsPj7eysqKho++ePGifrrSamtru3nz5syZM2noZJTAGiwAAAA6qFSq1NTUGzduiMVilUolFoszMzP37dtn8KwYyj148ODhL6iqqqKhjZGgUCh6e3uZ7mIgjGABAADQIT8/v6GhYUCxrq6uoKAgOjp6RD+aJMm+vr6Hv0apVI5oDyOhoqLiwoULzc3NBEHY2dnFxsbOnj2bzTaJwSOTaAIAAMDiVVZWDqlOIRaL5eTk9PDXuLu7j3Qb1Kqqqjpw4IAmXREEIZVKr1y5cu7cOWa70kLAAgAAoIxMJmtvbzc46zfYEBE9BxtPnDjxIc9yuVyzO53m0qVL+v+fc3NzOzs7GelnAEwRAgAAUKChoeHMmTOaSUAOhxMREbFo0SIej6d9gZubW0lJif4bhUIhDe3NmTOnqanJYAMODg7JyckGzwQ0WSRJ1tfXG6zX1ta6uLjQ39IACFgAAADGam9vT01N1Y5FqVSqO3fuiMXi559/nsViaYqTJ0/OyckZMI5lZWVFz7l7HA5n9erV5eXlNTU1MplMKBQKBIKenh5nZ2cvLy8u18zyAEmSJEkafIqemwZ+lZn9DwUAADBB169f15/pq6urKykpGTt2rOYhn89/6qmnvv/+e6lUqqnY29uvWLGCtuEWFosVEhISEhJCz8eNKDab7e7url2ApcvT05P+fvQhYAEAABjL4HQVQRB1dXXagEUQREhIyLZt22prazs7O11dXX19fc1u6Mh0zJ49+7vvvhtQHDt2rIms1sfXFQAAwFiDTVfp162srIKCgka+I8sXERHR39+flpbW399PEASLxRo/fvzixYuZ7usnCFgAAADG8vT0bGtr0697eXnR38zoMXny5KioqNbWVplM5u7ubmtry3RHP8M2DQAAAMaaMmWKdjG7lkgkCgsLY6Sf0YPD4YhEIn9/f5NKVwQCFgAAgJHkcvmpU6d0ZwNJkrS3t3/66adNZFdxoB++8AAAAEbJyMhobW3VrbBYrN7e3rq6OqZaAsZhDRYAAMBwkCR548aNjIyMwU4arqioCA8Pp7krMBEIWAAAAMORnp6enp7+kBcoFAramgFTgylCAACAIZPJZBkZGQ9/DT1n4IBpQsACAAAYssbGxsEOb9bg8XgPP18ZLBumCAEAAIZssJ1FNZycnJ544gkHBwfa+gFTg4AFAAAwZB4eHiwWSz9m2djYrFq1yt/fn8PhMNIYmAhMEQIAAAyZnZ1dTEyMfj0hISEoKAjpCjCCBQAAMByPPfYYm83Ozc1VqVQEQVhbW8fFxcXGxjLdF5gEBCwAAIDhYLPZjz32WEJCQmtrK5vNFgqFXK5l/lRVqVS5ubmVlZU9PT18Pn/ChAkhISFMN2XqLPOPAgAAAD2sra29vb2Z7mIEKZXK/fv319TUaB7W19cXFhbGxcXFx8cz2pepwxosAAAAGFROTo42XWldvXq1ubmZkX7MBQIWAAAADKq4uFi/SJLkgwcP6G/GjCBgAQAAwKD6+voM1qVSKc2dmBcELAAAABiUs7OzwbqLiwvNnZgXBCwAAAAYVFRUlH7RysoqIiKC/mbMCAIWAAAADCoyMnLatGm6FSsrqxUrVjg6OjLVklnANg0AAADwMAsXLhw/fnxFRYVEInFzcwsLC0O6+lUIWAAAAPArvLy8vLy8mO7CnGCKEAAAAIBiCFgAAAAAFEPAAgAAAKAYAhYAAAAAxRCwAAAAACiGgAUAAABAMQQsAAAAAIohYAEAAABQDAELAAAAgGIIWAAAAAAUQ8ACAAAAoBgCFgAAAADFELAAAAAAKIaABQAAAEAxBCwAAAAAiiFgAQAAAFAMAQsAAACAYghYAAAAABRDwAIAAACgGAIWAAAAAMUQsAAAAMCkqVQqplsYMi7TDQAAAAAYIBaL09LSqqqq+vv7XV1do6KiZs6cyWabx9gQAhYAAACYnM7Ozt27d0ulUs1DsVh86dKl1tbWFStWMNvYIzKPGAgAAACjytWrV7XpSquwsLC+vp6RfoYKAQsAAAAeRq1Wt7e3i8VikiRp+9Cqqqoh1U0NpggBAADAMJIkr127lpGRIZfLCYKwtbWNi4ubOnUqDR+tVCqHVDc1GMECAAAAw65cuXL58mVNuiIIoq+v79y5c9nZ2TR8tLu7+5DqpgYBCwAAAAyQy+WZmZn69atXr6rV6pH+9GnTpukXBQJBaGjoSH80JRCwAAAAwICWlhaD83FSqbSzs3OkPz0kJGTp0qXW1tbaiqen59q1azkczkh/NCWwBgsAAAAMYLFYzDYQHR09bty4hoaG3t5eNzc3Dw8Pxlt6dAhYAAAAYIC7uzuXy9UfxLKzs3N1daWnBxsbm8DAQHo+i1qYIgQAAAADrKysZs2apV+Pj483o5EkpmAECwAAAAybM2cOh8O5fv26TCYjCMLOzi4+Pn7KlClM92UGELAAAADAMBaLNWvWrJkzZ3Z0dLBYLBcXF4xdPSIELAAAAHgYFovF5/OZ7sLMIGABAADAL8hksuvXr5eXl0skEoFAMH78+EmTJmHsakioCVjl5eWlpaXR0dEeHh4GXyCXy8VisW7Fzs7Oycnp0a8AAAAANJDL5Xv27GlpadE87Onpqa6urq2tXbZsGbONmRdj7yKUyWTJyckhISHLly8XiUTvvPOOwZcdP37c85dee+21IV0BAAAAaHDjxg1tutIqKCiorq5mpB8zZewI1nvvvXf58uXMzMypU6empqampKTExMQkJycPeFlZWZmvr+/nn3+urfj5+Q3pCgAAAMOmOdqFzcbmRL+urKxssLq/vz/NzZgvowKWSqXau3fvpk2bpk+fThDEhg0bUlNT9+zZYzBgTZw4MSkpadhXAAAAGIaSkpL09PTm5maSJD08PGbPnh0eHs50UyZNe7TzAJqdGkyHVCrt7u7m8/m6x+mYDqMCVnV1dWNjY2JioraSmJj4ySef6L+ytLQ0Njb2/PnzJSUlQUFBCQkJtra2Q7oCAADAUN29e/fo0aPah42Njd9++21ycnJUVBSDXZk4Pp/f3NysXxcIBMO+ZktLS1lZmSYPjR071tnZ2YgGifr6+lOnTmmbHDt2bFJSkoODgzHXpJxRAaupqYkgCN1l6SKRqL29XalUcrm/uHJZWVleXt7u3bu9vLzKysr8/PxOnToVHh7+6FcAAAAYEpIkL1y4oF//8ccfJ0yYgOnCwUyePLmoqGhAkcfjRUZGDu+C6enpV65c0T68cOHCkiVLJk6cOLyricXiffv26Q6zPXjwoKOjY+PGjSZ1DrRRIUZzmLajo6O24ujoSJJkR0eHUCjUFvv7+11cXFatWrVz504Oh1NZWZmYmJiSkpKZmfmIV/joo48+/PBD3Y+Ojo6OjIysq6szpn/KSSQS7X/BRLS3t9vZ2WlGTMFENDc3y+Vy/CPKdJAk2djYaHk34Xd3dxv8hiyVSgsLC40Zj6GBQqHo7OzUPweQBjY2NtOmTcvOziZJUlOxtbWdO3duR0dHR0fHUK9WV1enm64IglCpVCdOnOByucMbx8rIyNCfxGxpabl+/XpwcPAwLvgouru7dbc+eBRGfYPT/OnU/ePb1dWl2elV92U8Hk83CwcGBm7fvn3z5s0dHR2PeIWXX355/fr1upWPP/6Yy+V6eXkZ0z/lurq6CIIwcuQTqGVlZWVvb29nZ8d0I/ALmkNkme4CfkKSpFqtNrXvqMZ7yJ8xPp9v4r9fuVxuY2MjEokY+XQvL6+pU6dWV1d3d3cLBIKgoKBhr3O6ceOGfpEkyebm5uEthtP8qNUnlUpH7muqOxL0iIz6Bqf5wmum+TSampqEQqGVldXD36g5Gbutre0Rr2BnZzfgB6TmBaY2wKvpx9S6GuXY/8V0I/AzfFFMDUmSFvkVEQgEXC5XfxCIzWYLhUIT//0y/tfExcVlwGDH8AyWh7q6uqj93Y3o/65hjO8a1Yqfn19gYGBaWpq2kpaWNmfOnAEvS0tL8/T0zM3N1Vbu3LnD4/GCgoIe8QoAAABDZWVlZfBY4smTJ/N4PPr7GZ0Gm0AY9sSCt7e3wbqpDUkaFbBYLNbGjRu//PLL69evK5XKf//73xkZGS+99JLm2a+++mr16tUymWz27NlcLnfz5s1Xr16VSCQnT5784IMPtm3bxuFwHn4FAAAAY8ybNy8mJkY7/MBisaKjoxcuXMhsV6PK2LFjDdbDwsKGd8EZM2bY2NgMKIpEooiIiOFdcIQYuwbijTfeqKqqiouL43A4bDb7s88+S0hI0DyVk5Nz+PDhXbt2OTg4nDx5cu3atXFxcQRBsNnsrVu3vvvuu796BQAAAGOw2ezFixfPmTOnqamJJElPT89hLKYBY0RFRRUVFQ3YvDQ2NnbYe5a6uro+99xzp06damxsJAiCxWKFhYUtXrzY1OZ8Wdp7BIzR3d1dXl4+btw4/VCppVary8rKJBJJWFiYvb39MK6gS5PPtCnNRGCRuwlqbW3FIndT09DQgEXuJoUkybq6Ol9fX6YbgZ9pzvBlapE7tUiSvHXrVllZWVdXF5/Pj4yMpGSv176+Ps3GWr+68tt4w0gd1HyDc3Jyio6Ofvhr2Gx2aGioMVcAAAAAs8NisWJiYmJiYqi9rK2trSlvwWNa42kAAAAAFgABCwAAGCCXyylZowJgmrAGAgAA6KNUKtPT0wsKCiQSibW1dUBAwMKFC/l8PtN9AVAMAQsAAOhz5MiRkpISza/lcnlJSUldXd3GjRtxexBYGEwRAgAATaqqqrTpSksqlV67do2RfgBGDgIWAADQpLq6ekh1APOFKUIAAKCJSqUaUn1U6evry8vL02ye6enpaWr7ksNQIWABAABN3N3dh1Q3EVVVVffv3+/o6HB2dg4JCRn2GS8P0d7enpqaKpFINA/v3buXlZW1ePFiy9hodHRCwAIAoJhCoSgtLW1paeHxeH5+fkydQVtTU3P16tWGhga1Wu3p6Tlr1qzg4GBGOtEKCwvj8/lisVi3yGKxZsyYwVRLv+rKlSvp6enah7du3ZowYcKyZcu05xtS4uTJk9p0pdHb23vlyhVKdjwHRiBgAQBQqbm5+Ztvvunu7tZWJk6cmJycTO3P419VVlZ28OBBtVqteVhVVVVVVbVixYrx48fT2cYAXC73mWee+e677xoaGjQVW1vbxYsX+/n5MdjVQ9TX1+umK407d+6MGTMmMjKSqk+RSCQ1NTX69ZaWls7OThcXF6o+COiEgAUAQBmVSnX48GHddEUQREFBgVAonDlzJp2dnDt3TpuudIsRERHMnonr6ur64osvtra2tre3Ozg4eHh40HCQ3LDdv3/fYP3evXsUBqze3t7Bnurp6WEqYLW2tqanp9fX18vlcpFING3atDFjxjDSiZlCwAIAoExlZWVHR4d+PS8vj86A1d3d3d7erl+XSqXNzc2enp60dTIYoVAoFAqZ7uLX9fT0DKk+PA4ODoM95ejoSOEHPbqGhoa9e/cqFArNw4qKioqKiqSkpMmTJzPSjznCNg0AAJTp7Ow0WO/o6KDzWBjtz8UhPQX6nJychlQfHgcHh4CAAP26SCRiav/V8+fP6/9RuXDhglwuZ6Qfc4SABQBAGR6PN1idzjVYzs7OBufdWCyWQCCgrQ0LEBERYfALR/lStqVLlw6YCnR0dIyPj6f2Ux6RUqmsra3Vr8vlcoN1MAhThAAAlAkMDORyuUqlckA9NDSUzja4XO7kyZNv3LgxoD5+/Hh7e3s6OzF3IpFo/vz5P/74o+4AZGxsLOU7Nbi6um7evLmgoKCxsZEkSU9Pz3Hjxj1kbdaIesg53BjBenQIWAAAlLG3t1+wYMEPP/ygW3R2dk5MTKS5k3nz5vX19RUUFGgrYWFhSUlJNLdhAaZPnx4YGKjZB8vJyWnMmDEGp/OMZ2NjExsbq30ol8uZCli2trZ2dnZSqVT/KTc3N/1id3d3RUVFd3c3n88PDg62tbUd+R7NAAIWAACVpkyZ4ubmlpubq90Ha+bMmXZ2djS3weFwli1bNmfOnMbGRs0+WAZ/NMKjEIlEo2rDTxaLFRMTc/Xq1QH1gIAA/VsTbt68qXvLqo2NzYoVK2gesjVNCFgAABQLDAwMDAxkuguCIAg+n8/n85nuAsxPXFycRCLJz8/XVnx9fZ988skBL6uurh4wXiuTyY4cOfLKK68wtTzfdCBgAQAAwC+w2eylS5fOmDGjvr5eoVCIRCJvb2/99f55eXn671UqlQUFBXPmzKGlU9OFgAUAAAAGuLm5PXxm2eCubwRBDDgNaXTCNg0AAAAwHDY2Ngbrg+1XMqogYAEAAMBwhISEGKzjUB0CAQsAAACGJyYmxtfXd0Bx/PjxwcHBD3+jRCIpKirKy8urqamh85ADOmENFgAAAAwHh8NZt25dVlZWaWlpV1eXQCCIiIiYNGnSw9/1448/ZmVlaXOVQCB4+umnLe92VwQsAACA0aKvry8vL6+hoYEgCJFINHnyZCM3aeNyubNnz549e/Yjvv769euZmZm6lfb29n379m3dupXNtqhZNQQsAACAUaG9vT01NVUikWge3r9/Pzs7e926de7u7rT1cO3aNf1iV1dXWVmZhW1PalFpEQAAAAZz8uRJbbrS6O3tPXHiBG0NSKXSwU4zrK6upq0NeiBgAQAAWL6enp6amhr9ekNDQ2dnJz09GDzfUEN72I7FwBQhAAD8RC6Xl5SUaE9R9PHxYbojoMxDjo6WSCQuLi409ODo6DjYUxY2P0ggYAEAgEZTU9M333yjO4U0fvz4ZcuWWdjS41HLwcFhsKecnJzo6cHGxmbMmDGlpaX6DZjI8Z0Uwl8bAAAg1Gr14cOHByzQKSwszMjIYKoloJa9vX1QUJB+3c/Pj86DmZctWzZgTb2Dg8OGDRtoa4A2GMECAIBBF+Lk5+c/+h34YOKWLFmyb98+3QMEnZ2dk5OT6ezBzs5u06ZNxcXFDQ0NarVaJBKNGzeOy7XANGKBvyUAABiqnp4eg/XOzk6SJFksFs39wEhwcXHZvHlzQUFBY2MjSZKenp4TJ04c7DzBkcNms8eNGzdu3OgOvXYAACAASURBVDiaP5dmCFgAAPCwU3uRriyJtbX1lClTmO5iVMAaLAAAIDw9Pa2srPTrlndvFwA9ELAAAEwISZKVlZWZmZlZWVl0noPL4/Eee+yxAUUXF5d58+bR0wCAhcEUIQCAqZBKpQcPHqyrq9NWAgMDn3rqKXpWyUyaNEkgEOTm5mr3wZoxY4atrS0NHw1geRCwAABMxYkTJ3TTFUEQlZWVP/zww/Lly+lpwN/f39/fn57PArBsmCIEADAJPT09JSUl+vW7d+/KZDL6+wEAYyBgAQCYBN3diXSp1equri6amwEAIyFgAQCYhIcstOLxeHR2AgDGQ8ACADAJQqHQ4IG7Hh4etB0VBwBUQcACADAJLBYrKSmJw+HoFq2srJKSkphqCQCGDXcRAoDlaGxszMrKampqYrPZXl5eM2fOFAgETDc1BMHBwS+88EJmZmZjY6P2t+Dm5sZ0XwAwZAhYAGAhSktLDx8+rFKpNA+bm5vv3r373HPPeXt7M9vYkIhEohUrVjDdBQAYC1OEAGAJSJI8ffq0Nl1pKBSKM2fOMNUSAIxmCFgAYAmam5u7u7v1642NjT09PfT3AwCjHAIWAFiCh2zF2d/fT2cnAAAEAhYAWICenp7GxkaDT3E4HOxxAAD0wyJ3ADBvd+7cOXXqlFKpNPjs+PHjra2taW4JAAABCwDMWHNz8/Hjx0mSNPhsUFDQokWLaG4JAIBAwAIAs3b79m2D6crPzy8hIcHPz4/FYtHfFQAAAhYAmDGxWGywbmNj4+/vT3MzAABaWOQOAGZssFOQcToyADALAQsAzNiYMWOGVAcAoAcCFgCYsYiICP0sNWbMmMjISEb6AQDQwBosADBjLBZr9erVN2/efPDggVgs5vP5Y8eOnTJlijFr20mSLCwsLCsr6+rqEggE48aNCwkJobBnABgNELAAwLyx2eypU6dOnTqVkquRJHnkyJGioiLNw5qamvz8/BkzZsyfP5+S6wPAKIEpQgCAn925c0ebrrQyMzNra2sZ6cfUDLblGAAMgBEsAICf6acrbd3X15fmZkwHSZK3bt3Kzc1ta2uzsbHRbDMmFAqZ7gvAdCFgAQD8TCqVGqz39vbS3IlJSUtLy8zM1PxaKpUWFxdXVlampKQgYwEMBlOEAAA/G+xkaGdnZ5o7MR2dnZ1ZWVkDijKZ7OLFi4z0A2AWELAAAH42YcIE/SKbzR4/fjz9zZiI6upqg0uvKisr6W8GwFxgihAA4GehoaEzZ87MyMjQVths9uOPP24Zc2H9/f2dnZ0uLi5D2uleoVAYrCuVSpIkR9tpjz09Pfn5+c3NzVwu19vbOyoqysrKiummwBQhYAEA/MK8efPCw8PLyso6OzsFAkF4eLhAIGC6KWO1tbWdPHlSey+kv7//0qVL+Xz+o7x3sHDp5uY22tJVfX39f/7zn/7+fs3DgoKCnJyc5557zsHBgdnGwARhihAAYCBvb++4uLjk5ORZs2ZZQLqSSqWpqam6O01UV1fv3btXGxQezs/Pz8vLS78+ffp0ylo0ByRJHjt2bMD/tLa2trNnzzLVEpgyBCwAAAuXm5vb09MzoCiRSG7duvUob9dsl+/j46OtcDichISEqKgoKrs0eQ0NDWKxWL/+4MEDpVJJfz9g4jBFCABg4err6w3WMzIyhEJhaGjor17B0dExJSWlqamppaWFx+N5e3vb29tT3aap0w+pGiqVSiqVDnb/KYxaGMECADOmUCjEYrFarWa6EZOmu1JK937Avr6+gwcP3r179xGvIxKJJkyYEBoaOgrTFUEQjo6OBuscDsfOzo7mZsD0YQQLAMxSa2vrmTNnampqSJJks9nh4eGLFi0anT/4f5W3t/eDBw80v9Zfln7+/Plx48bR3pT58fT05PP5+rOEYWFhXC5+mMJAGMECAPMjkUj27t2r3Z9JrVbfu3dv//79KpWK6dZMUUxMzGCjLwRB9PT0tLa20tmPmWKxWCtWrLC1tdUturm5LVq0iKmWwJQhdAOA+cnKytI/06a5ufnu3bsTJ05kpCVTZmtru379+mPHjg22GAtrtB+Rt7f3yy+/nJ+f39TUZGVl5e3tPXHiROyDBQYhYAGA+amrqxusjoBlEJ/PX7du3Ycffqg/yMfhcAQCAQaxHpG9vf2sWbOY7gLMAKYIAcD8GDy55SF1IAjC2to6JiZGvz5p0iQbGxv6+wGwbAhYAGB+DO57+ZA6aMyfPz86Olq7zp3FYkVFRS1cuJDZrgAsEqYIAcD8TJs2raCgQCaT6Rb5fL7Bo5pBi8PhLF26ND4+vqGhgSAILy8vze5N9Iz8KRSK3Nzc2tpaqVQqFAonTZrk6elJw+cCMAIBCwDMj6ur67PPPnvixAntyqGgoKClS5fibvlH4eTkRP+umP39/Xv37m1ubtY8rK6uzsvLW7p0KdbMgaXCNyMAMEve3t5btmzp7u7u6uoSCATY6dHEpaena9OVhlqtPnPmTEhICHYvA4uENVgAYMacnJx8fX2RrkxfUVGRflGhUJSXl9PfDAANELAAAGDE6e9bptHb20tzJwD0QMACAIARN9iqL2dnZ5o7AaAHAhYAAIw4gzd42tvbh4SE0N8MAA0QsAAAYMTNnDlz7NixuhUej/fkk09aW1sz1RLAiMJdhAAAMOI4HM5TTz1VXl5eXV3d19cnFAojIyNx/yBYMAQsAACgA4vFCgkJwZzgQ8jl8sbGxr6+Pjc3N/r3KgNqIWABAAAw79atWxcuXJDL5ZqHvr6+cXFxzLYExsAaLAAAAIY9ePDg9OnT2nRFEERtbe3Zs2fVajWDXYExELAAAAAYlpmZqV9sb2+vqKigvxmgBAIWAAAAw7Snag7Q0tJCcydAFWrWYJWXl5eWlkZHR3t4eAz2GpVKVVhYWFtbGxAQEBERwWb/lO3kcrlYLNZ9pZ2dHRb3AQDA6MHhcIZUB9Nn7AiWTCZLTk4OCQlZvny5SCR65513DL6ssrJyypQp0dHR69atmzBhwvTp06uqqjRPHT9+3POXXnvtNSO7AgAAMCP+/v4G6wEBAfQ2ApQxdgTrvffeu3z5cmZm5tSpU1NTU1NSUmJiYpKTkwe8bMuWLZ2dnWVlZcHBwUVFRUlJSc8+++y1a9cIgigrK/P19f3888+1L/bz8zOyKwAAADMSHx9fVlYmk8l0i2FhYQ+ZFwITZ1TAUqlUe/fu3bRp0/Tp0wmC2LBhQ2pq6p49ewYErL6+vgsXLvzrX/8KDg4mCCI8PPydd97ZsGFDe3u7QCAoKyubOHFiUlKSMZ0AAACYLzc3t5SUlAsXLtTU1MjlcldX14kTJ44ZM4bpvmD4jApY1dXVjY2NiYmJ2kpiYuInn3wy4GXd3d0vvvii7ss0x6crlUqCIEpLS2NjY8+fP19SUhIUFJSQkGBra2tMVwAAAGZHKBSuXbuWJEmlUmllZaW/QBnMi1EBq6mpiSAI3QFMkUjU3t6uVCq53J+v7OHh8cUXX2gf1tXVffLJJ7NmzdK8saysLC8vb/fu3V5eXmVlZX5+fqdOnQoPD9f9oO++++7IkSO6FZIkAwIC2trajOmfct3d3QRBKBQKphuBn4nF4v7+fqlUynQj8DOxWMxms3W/SwCzSJIUi8X4x61JkcvlnZ2d+GtiIqRSqZ2d3ZDeYtRXrrOzkyAIR0dHbcXR0ZEkyY6ODqFQaPAthw4deu2112xtbf/zn/8QBNHf3+/i4rJq1aqdO3dyOJzKysrExMSUlJQBO4KEh4evWLFCt3L16lUrK6uh/m5HmmZMztS6GuWkUqmtrS2+KCZF8xXBTw7TQZIk/pqYGi6XK5PJ8EUxEVZWVkN9i1Hf4AQCAUEQEolEW+nq6mKxWC4uLvovLi8vT0lJuXHjxiuvvPLuu+86ODgQBMHj8YqKirSvCQwM3L59++bNmzs6OlxdXbX1iIiIiIgI3atp3mVqf/I0Y1em1tUop/mxgS+KSUHAMjUIWCaIy+X29/fji2Ii6A5YIpGI+O9EoUZTU5NQKNTvIz8/Pz4+fvr06UVFRYGBgQ+5pubZtrY23YAFAACjE0mSxcXFFRUVvb29AoFg/Pjx7u7uTDcF8OuMClh+fn6BgYFpaWmLFi3SVNLS0ubMmTPgZWq1etWqVYmJiUePHmWxWLpPpaWlPfvss6dOnYqJidFU7ty5w+PxgoKCjGkMAEYPkiTz8/Nzc3NbW1t5PJ6fn19CQoJmfB3MnVqtPnLkSHFxsbaSmZm5ePHiSZMmMdgVwKMwKmCxWKyNGzf+6U9/Wr58+bRp077++uuMjIy0tDTNs1999dWlS5dSU1Ozs7PLysqWLl26e/du3bevXbt29uzZXC538+bNO3fujI6Ovnz58gcffLBt2zbsXQsAj+jixYsZGRmaX/f09Ny/f7+ioiIlJcXNzY3ZxsB4ubm5uumKIAi1Wv3DDz8EBQUZXIsCYDqMXQPxxhtvVFVVxcXFcTgcNpv92WefJSQkaJ7Kyck5fPjwrl27NH89du7cOeC9SUlJIpHo5MmTa9eujYuLIwiCzWZv3br13XffNbIrAHgUEokkPz+/ubmZy+X6+PhERUUNY50Bszo7O/VPye3v77906dKqVasYaQkodPfuXf2iSqUqKirS7L8IYLKMDVhsNvuLL7748MMPy8vLx40bZ2Njo31q165du3btIghi48aNGzduHOwK0dHRd+/eLSsrk0gkYWFh9vb2RrYEAI+itrb2m2++6e/v1zy8c+dOdnb2+vXrNTegmIuqqiqSJPXrFRUV9DcDlOvp6TFY1725CsA0GXsWoYaTk1N0dLRuuhpaE2x2aGjo5MmTka4A6EGS5LFjx7TpSqO9vf3cuXNMtTQ8g207p1AoDAYvMC+DxX3d7YEATBM1AQsAzEt9fb1mH7sBiouLVSoV/f0M22Bb7gmFwgG31IA5ioyM1C9yOJwBm1EDmCAELIDRaLCZF5VKZV673vv7+2v2ixlg2rRp9DcDlIuJiRmQpdhs9uLFi7HCHUwfNvoDGI0Gm2HhcDjmtbEhi8VavXr1t99+29DQoKlwOJzZs2dHRUUx2xhQgs1mr1y5UrMPVk9Pj5ubG/bBAnOBgAUwGnl5efH5fP2jZMeNG2d2m6Q4Ozu/8MILjY2NLS0tPB7Px8fHvNbpw8OxWKzw8HDMCYLZwRQhwGjEYrFWrFgx4HBfoVC4cOFCployBovF8vLyioqKCgsLQ7oCAFOAESyAUcrb2/vll1++fft2Y2OjtbW1j4/PhAkTcD4gAAAl8M0UYPSyt7efOXMm010AAFggTBECAAAAUAwjWACWQy6Xp6enl5aWdnV18fn88PDwmTNnmt2idQAAC4CABWAhlErl3r17GxsbNQ+bmpqampqqq6ufeeYZbLkJAEAzTBECWIhbt25p05VWRUXF/fv3GemHIAilUonzagBgdMIIFoCFGOx444qKioiICDo7IUkyJycnJyeno6ODw+F4e3vPmzfPx8eHzh4AAJiFESwAaqjV6uPHP8jLi21rExYXjzt9ekNjYwOdDcjl8iHVR87FixfPnTsnFotJklQqldXV1Xv37q2rq6O5DSAIQq1Wl5SUXL169dq1a2VlZRhQBKANRrAAqHH+/DPLlh3U/NrNrS0srKiy8mpj4zVPTy96GnBzc6uqqjJYp6cBDYlEkpWVNaCoUqnS0tLWr19PZyfQ09Nz4MCBpqYmbcXX1/fpp5/m8XgMdgUwSmAEC4ACGRkXFi06OKAYGFhx8+Z7tPUQExPDZg/8G21lZUXzqXy1tbVqtVq/XlNTY7AOI+fEiRO66YogiNra2tOnTzPVD8CogoAFQIHW1gsG62PGZNDWg4eHx4oVK2xsbLQVe3v71atXOzs709YDQRBKpdJgnSRJBCw6SSSSsrIy/XpRUVF/fz/9/QCMNpgiBKAAiyU1WLez66GzjYiIiODg4Orqas0+WP7+/lZWVnQ2QBCEu7u7wbpAIMA5PHTq7Ow0WFer1V1dXZglBBhp+H4HQIkgg9W6uiB/f1r74PF4Y8eOpfUjf0kkEgUFBenf0ogzeWj2kAg14JBvABgJCFgwSrX/V29vb3d3t0Kh6Orqksvlvb29UqlUJpM95L02NjZ2dnZ2dnY2NjZOTk5cLtfGJrSlxd3dvWXAK7u7143kb8JEPfnkk8ePHy8pKdE85HK58fHx0dHRzHY12ri5ubm6unZ0dAyoi0QiJycngiD6+/srKyu7urqcnZ0DAgIYaBHAoiFggWXq7u6ura2tra0tLi5ubW1tampqbW1t10H5/eoLFszcvdvKx6de81Aut/7885VZWWfT0gr9/f39/f19fX39/PxovqePEba2tmvWrOns7Gxubra2tvb09MSEFP1YLNbSpUsPHDiguyrO2tp6yZIlBEEUFRUdP35cu4WHjY3NzJkzfX19mekVwBIhYIF5I0mypqamtLS05L+qq6tra2slEgnNnVy4kBEe7rBs2cKxY+2ampRpaUUPHhzQf5mdnV1AQEBgYOC4cePCwsIiIiLCwsJoXodODxcXFxcXF6a7GNUCAgI2btyYlZXV2NjIYrG8vb2nT5/O5/Pb2tq+++473XsOZDLZlStXwsPDR8M/AADogYAF5kStVpeVlRUUFBQUFBQXF2ty1fBuiXJ2dhYKhQKBwNHR0cHBwcrKytXVlcvlOjo68ni8hy9S6e/v7+vr6+npUSgUnZ2dSqWyu7u7s7OztbX1+PGMnp6HLWyXSqX379+/f//+mTNntEVvb+/w8PCIiIjJkydPmTIlNDRUf8MFgGEQCoVLly4dUMzLy9O/o1OtVufl5S1YsICu1gAsHAIWmLTe3t7CwsKCgoLbt28XFBQUFhY+PL5o2dra+vr6ent7C4VCzYiRp6en4L/4fP7I3dHW39/f3t7e1tbW3t7e2NhYV1dXU1NTU1NTXV1dU1PT1dWl/5b6+vr6+vq0tDTNQycnJ03SmjJlSmxsrJ+f3wi1OqqoVKo7d+7U1dXJZDIPD4+JEydqliKNQu3t7QbrbW1tNHcCYMEQsMDklJeXZ2Vl3bhxIzMzs7CwcLB9lbSEQmFoaOjYsWPHjBkTGhoaHBzs7e2tnelobW21t7e3s7Mb+cZ/wuPxvL29vb29DT7b1dVVXV394MGDoqKi+/fvFxcXFxcXD1hT393dffny5cuXL2seBgYGzp07d+7cuQkJCV5eNO0Lb2H6+/v37dunPQz73r17GRkZq1evHp2Lu62trQ3WdTdRAwAjIWAB85RKZXZ29rVr127cuJGVldXSMvBePF0ikWjixIlRUVGRkZGhoaGhoaHmtdDH2dl5woQJEyZM0FZUKlVlZWVRUVF+fn5ubu7NmzcH7L5dWVlZWVm5Z88egiDCwsLmzp2bmJi4YMECR0dHurs3W2lpadp0pSGTyY4dO7Z169ZRuDtXSEjI3bt3DdbpbwbAUo267yxgIkiSvHPnzsWLFy9dupSenj7YxB+bzQ4LC4uKitKEqokTJ3p4eNDc6kjjcDghISEhISGa27sIgqitrb1582Zubm5OTk52drbu/xzNiNf//d//2djYJCQkJCcnL1myBMNav8pgnpBIJDU1NUFBhvcws2Djx4+/ffv2gJMrPT09x48fz1BHABYIAQtoVV1dff78+YsXL16+fLm1tdXga1xdXadNmzZt2rTp06dPnTp1FC6U8fX19fX1XbFiBUEQSqUyJydHM2OYmZnZ19eneY1MJjt79uzZs2dfeumlKVOmJCcnr1y5csyYMYw2bqIUCsVgG5vRf7epKWCz2WvXrs3MzCwuLu7s7HRxcRk7dmxAQABurQCgEAIWjDi1Wn3z5s2TJ0+ePn36zp07Bl8TFBQ0d+7cGTNmTJ8+PSwsjMVi0dykyeJyuTNmzJgxY8aOHTtkMllWVtalS5dOnz6dn5+veQFJkjk5OTk5OTt27Jg+ffqzzz67evVqV1dXZts2KVZWVjY2NgYzloODA/39mAIulztnzpw5c+ZoHpIkWVdXx2xLABYGAQtGilQq/fHHH0+dOnX69Onm5mb9F3h4eCQkJCQmJiYmJo7OtcZDZWNjEx8fHx8f/8c//rGmpubkyZMnTpxIT09XKBSaF2RlZWVlZf32t79NSkpat27dokWL6D+L0DRFRETk5eUNKDo6OvrTfJIRAIwaCFhAsb6+vh9++OHQoUNnzpzRzmdp8Xi8uXPnLly4MDExMSIiAiNVw+bn5/fKK6+88sornZ2dZ8+ePXLkyJkzZzQbc8tksqNHjx49etTDw2PTpk2bN2/29PRkul+GzZs3r6GhQffuAWtr6+XLl4/CFe4AQA98cwFqKBSKCxcuHDp06MSJE/rrWtzd3ZOSkpKSkhYsWGBvb89Ih5bKxcVlzZo1a9asEYvFhw4d2r9//40bNzRPNTc3//GPf/zrX/+6cuXKrVu3xsbGMtsqg2xtbVNSUgoKCmpra+VyuYeHR1RUlEVuoA8AJgIBC4xCkuS1a9f2799/7NgxsVg84NnIyMglS5YsXbo0NjYW62dHGp/P37Jly5YtW0pKSvbv35+amlpbW0sQhFwuP3DgwIEDB6ZOnfo///M/TzzxxOj8WnC53MmTJ0+ePJnpRmBoSJLs6elxcHDAgDeYFwQsGKbGxsbU1NQ9e/aUlpYOeGrs2LGrV69evXp1WFgYI72NcqGhoe+///4f/vCH77///pNPPrl+/bqmnp2dvWrVqsjIyN///vejNmZZnr6+vsLCwqamJg6H4+PjExkZyeFwmG6KGhKJ5OzZsyUlJSqVisvlhoaGPvbYY9j+DcwFAhYMjVqtvnTp0ldffXX8+HHt2moNX1/f5cuXr1y5ctasWUy1B1pcLnflypUrV67Mz8//4osv9u/fr1kSd/fu3VWrVkVERLzxxhvPPPMMYpZZa25u/s9//qPdKS03NzcrK2vdunV0Hl0wQuRy+ddff93R0aF5qFQq79+/39TUtGnTpsF2ogcwKfjeCo+qvr5+x44dPj4+8+fPP3LkiDZdubi4bNmyJTMzs7q6+p///CfSlamJjo7+8ssvq6qqduzYof3X/71795577rlJkyb9+OOPzLYHw0aS5LFjxwZs0tvc3Hz27FmmWqLQrVu3tOlKSywW37p1i5F+AIYKAQt+XXZ29po1awIDAz/44APteSMsFis+Pn7//v0NDQ2fffbZ9OnTsULClLm7u//pT3+qrKx86623tDGroKBgwYIFy5cvr6ioYLY9GIaWlhaD50oVFRX96gmepk+zgvDR6wCmBgELBqVQKA4dOqTZVP3QoUPaISsvL6+33nqrpKTk8uXLzzzzjK2tLbN9wqMTCAQffPBBRUXFm2++qd1j8/jx4xEREe+8845UKmW2PbOjWX9NkiQjn97d3W2wrlKpent7aW6GcoP9X1Wr1TR3AjA8WIMFBkgkks8///zTTz+tr6/Xrc+dO3fr1q1LliyxmFW0o5Obm9tf/vKXbdu2/e53v9u7d69are7v7//Tn/6Umpr66aefJicnM92gGZBKpRcuXCgqKpLL5VwuNyQk5LHHHqN534fBtqFns9kWsAbLw8OjuLhYvy4SiehvBmAYMIIFv9DR0fHee+8FBAS8+eab2nTF4/Gef/7527dvX7p0admyZTSnq/qK7j8sSXsl+MjWccf+sSlDIcO/X6nh4eGxe/fuGzduTJ06VVOpra1dtmzZ+vXru7q6mO3NxKlUqn379hUUFGh2dlUqlcXFxV9//bX+zrrD09bWlp+ff+PGjfLy8ocM2IhEIoNnIo0ZM8YCdvCPiYnRHx23tbWdMmUKI/0ADBVGsOAnra2t//jHPz777DPdeQdPT88tW7Zs2rRJKBQy0tXp3Q/SXzztQPZoPr67qHDb/rvvFK0V+Y/SI+QoN2XKlKysrNTU1Lfeekuz0XlqauqlS5f27Nkzb948prszUXfu3NE//amrqysnJycuLs6YK5Mkefbs2Zs3b2orAoFgzZo1AoFA/8UsFmv58uUHDhzQPWbRxcVl0aJFxvRgIhwcHNatW/f9999r15m5u7svX74cOxWDuUDAAqKpqemjjz768ssvdddthIaGvvXWW08//TSDd0QrFeqLm8+7kL+4Scq9r+n9Oac/q17NVFeWh8VirV+/ftmyZVu3bt2/fz9BELW1tQsWLNi6devf/vY3Gxsbphs0OYOtsy4vLzcyYGVnZ+umK4Ig2tvbDx069NJLLxncUMPX13fLli23bt1qbGzkcrk+Pj6TJ0+2mC+ZSCTavHlzZ2dnR0eHq6uri4sL7qQBM4KANapJJJKPPvpo586dutEqMjLyd7/73apVqxhfaHX0kyIX5cD7tAmCcK6pVMjUVjaY4KaSi4vLvn37li9fvnnz5paWFpIk9+7dm56e/v333+Mo7gFUKpXBel1dXWVlZWBg4LCvnJubq19sa2urrq4e7LJOTk5z584d9ieaOBaL5erqanAmFMDE4UfUKCWXyz/99NPg4OD3339fm64mTZp09OjRgoKCNWvWMJ6uCIKoKRx49o6GDSGvKu6kuZlRgsfj+fr6an7d1dV1+/btmJiYtLQ0ZrsyNYOtsyZJ8sSJE8O+qZAkSf3zpjTa29uHd00AYAoC1qhDkuShQ4fCw8O3bt3a2tqqKUZFRZ0+fTo3N3fFihWms7W3wNfwYgsVwfEJdqK5GYuXlpY2bdq0xx9//NatW25ubkuWLNHMNLW3tz/22GN///vfmW7QhERFRQ12B19XV5d2r7ihYrFYg83IW8ysH8DoYSo/SoEe2dnZsbGxa9as0W4sGRAQsH///lu3bi1evNjU1jesfC2yl2XgbvNWV19bB8xuU0YTrebPn5+dne3m5vaHP/yhvLz85MmTV65c8fb2JghCpVK9/vrr69atG3A40qhla2u7bNmywZ41ZjuxkJAQ/SKHwzFm2hEAGIGANVq0tbW9+OKLM2bM0C7yEAgEO3fuctRYqAAAIABJREFULC4uNtkD6Rxdrf1enaP85UpBCdvx+W/nM9WShdGNVkKh8K9//Wt1dfW7777r5OREEMS0adNyc3O1Zx/t37//iSee6O/vZ7RlU+Hv7z/YNLqLi8uwLztv3jz9u+QSEhIGGzAzUkdHR2Fh4c2bN6urq5naLhXAUmEYwPKp1eqvvvpqx44d2uUdtra227Zt2759O837Ig7DK/+c+uNU4fG3c4nmTpLD5Y1x2/ZNnN9YU2/b9KWlpb399tvZ2dkEQQiFwv/5n/959dVX9XenFIlEFy9efPXVV7/66iuCIE6dOvX444+fOHFCe9iOZZDL5bdu3aqrq1MoFB4eHpMmTfrVVdVcLjciIuLOnTsD6t7e3m5ubsPuxMXFZePGjVevXq2pqenv73d3d58yZcrYsWOHfcHBkCR54cKF7Oxsba7i8/nPPvusMekQAHQhYFm4nJycl19+WffWpOTk5I8//tiM7gub/3TQ/KeDmO7CcjxitNKytrb+8ssv3dzcPvjgA4IgLl++nJiYePbsWYM7M5mjnp6evXv3aleRl5aWZmdnr169OijoV/7ULVq0SCwW19XVaSt8Pv+JJ54wsh8nJ6ekpCQjL/KrsrOzb9y4oVsRi8WHDx/euHGjqS0VADBTCFgWq6+v7+233/7444+1O0EHBwd/8sknjz/+OLONAVPS0tJ27NiRk5NDPFq00vXnP//ZxcVl+/btJEnevHlz/vz5V65c0cwkmrvz588PuEdPoVB8//3327Zte/i9tDweb8OGDaWlpXV1dSqVSiQSjRs3jss1j2+qA3bb0mhqaqqpqfH396e/HwDLYx7fC2CosrKyNmzY8ODBA81DW1vbN99884033uDxeMw2BowwJlppvf76687Ozi+99JJarc7Pz1+xYsUPP/zA4D60lCBJ0uCBdz09PTU1Nb+6tJzNZo8dO3YkpvBGFEmSHR0GdpgjCKKtrQ0BC4ASpri0GYzR39//+uuvz549W5uuFi5ceO/evd///vdIV6NQWlra1KlT58+fn5OTo1nGXlVVtX379uEdBrxx48avvvpKM4V08eLF9evXP+SkPLMgk8mUSqXBp3R337UwLBZrsJE27AcBQBWMYFmU7Ozs9evXa/9F7uTk9L//+78pKSlYVGGaSvLF335wp62008XPcfHW8CnzvCi8OCWjVvpSUlKamprefvttgiAOHjzo4eHxj3/8g4J2GWJjY2Ntba05s3kAy5gAHUxwcLD+0B2HwzGj1ZkAJg4jWBaCJMkPP/xw1qxZ2m+aCxYsKCwsfOGFF5CuTNPfU66nTvpS9V26a0EB69T1U/N375h/npIrUztqpW/Hjh1btmzR/Prjjz/eu3cvJZdlBIvFioyM1K+7urr6+PjQ3w9t5s2bZ2trO6A4d+7cEdoPAmAUQsCyBG1tbUlJSdu3b9dMdjg5OX355Zfnzp3z8/NjujUwLON0TdeeK9bEzwMnHEJtlXZj/59vG3PZkY5WWp988on2drlXXnnl/v371F6fTvPnz9fsp6plZ2f35JNPmubmcFQRCAQvvvjixIkTXV1dbW1tAwICEhISZs6cyXRfAJYDU4Rm79q1a08//bT2XvEZM2Z88803WKZq4r57J9+FGHhgMIsgMv9179kdUQRB7Hv/dt7RKrm4z9HfefmOCdMe+5XRlLS0tN/97neaW8Pc3d1fe+21rVu36g9RUIXD4ezbt6+oqOj+/fu9vb1PPfVUdnY25TGOHjwe7/nnn793715dXZ1cLvfw8JgwYYKZ/l6GxNXVVbsfPUmSuvtNAIDxELDMGEmSf/nLX/7whz9oBq5YLNbrr7/+5z//2VxuFB/N+hslBuvszh6VSv1q4GGP2pKfdrqsJU4vyk9/Zs72/bMNvmWo0aq5tuf/XsluvdPG4rBF0cLffDXD0XU4dwLa2dl9++23sbGxUqn07t27v/nNb/79738P4zqmgM1mjx8/fvz48Uw3Qpnm5uacnJympiZra2tvb+9p06Zh7g+AZvhJbK56e3ufe+65o0ePah66ubmlpqZijytzwbEznGnU1lYfrEr3qC3RLVoRSsl/ruY9HzJprqdufRijVhmna44tO+ak6nInCIIgVOXE28cLX7z6VOR092H8LiIiIj799NOUlBSCIHbt2pWcnEzDDpm0IUnSTNcvlpSUfPvttyrVT0OkVVVVt2/ffv755/l8PrONAYwqlrzIwILV1dXNmTNHm65mz56dn5+PdGVGxi0zPIcrmObTeL5cv25FKA/9Pk/7MC0tLTY2dv78+Tdv3nR3d9eutfrVOcFvVp51UnXpVvhK8eeP/zD038FPnn/++bVr12p+/Zvf/Kavr2/YlzIRPT09J06c+Mc//vH+++//61//Sk9P1yYVs6BSqU6ePDmg597e3rNnzzLVEsDohIBlfnJycmJjY/Pyfvpx+9vf/vbSpUuWfceT5dmyc2qTcOBJLC12nm9+G2/db3j7JWmjlDAiWhEEceNcnXt/k35d2FlT86BLv/6I/vnPf2qOzamoqPjb3/427OuYgr6+vt27d9++fbu7u5skyfb29itXrhw+fNiMDkKur683uINXeXn5YDt+AcBIQMAyM0eOHJk7d25jYyNBEFwu91//+tfOnTux6MocfVq/VrloerO9dzfbqcXOUzJt8l/r1js4WyusDO8Hq+Kphx2tNEputhmsswny3o2WYf42CEIgEPz5z3/W/Pqvf/1rSUnJw19vyrKysjo7OwcUS0tLy8sNDCuaJqlUarBOkmR/fz/NzQCMZvjBbE7+/ve/v/HGG5p/TAsEgqNHj8bFxTHdFAwT14r9/g8L9OsOMb7E9YHjTGqCdbEutbTrpjF3CHqNcaoc5KmAca5DvZquF198cc+ePTk5OTKZ7M033zx27JgxV2NQZaXh/0OVlZUhISE0NzM8Li4uBuvW1taj4dZIANOBESyz8fbbb7/++uuadBUeHp6dnY10ZZHePjmvxf4X2zKRBHHb1aHL5tYwRq10zVsdJOYaWObcYisKn+I2zHYJgiAINpv92WefaTaOOn78uPluizXYJJpCoaC5k2Hz8PDw9PTUr0+YMMGyd/YCMDX4+2YG1Gr1q6++qp2FiY+Pz8rKCg4OZrYrGCGOrtb/27JePCOiyt6zyUpY6uCR5t06Y7vCmGilNeujBBnrF1OQUpbdsl3zjWuZIAgiJiZm6dKlxH8PFTD+goxwczMcNIVCIc2dDBuLxXryySddXX8xJBkYGLhggYHhUgAYOZgiNHUqlWrjxo179uzRPFy8ePGRI0dGbgNJYFxaWtpbb72Vm5tL/HfzhX9v/Yiqr/jKbRHBE/l7X8lS1nSQLJZNEH/z7llhk40avtJ66623jh8/ThDEN998895775njbrdTp069d+/egCXt9vb25rVFFp/P37JlS3FxcVNTk5WVlY+PT1BQkJluOQFgvhCwTJpSqXzqqae0K1rWrFmTmppqZWXFbFcwQvSj1Ujsxj5pruekeyuovaZGbGxsfHz8lStXFArFxx9/bI6HQPv4+KxYseL06dMymUxT4fP5K1eu5PEM33lgsrhcbmRkpMFjFgH+P3t3Hk9l9j8A/NyFayd79qXFGqEkDUqLFqImGS3TMmHa+fbVTMu3ZWr6tUzUVGgqbVOJlMKoUZRcaRUiskR22bebu/3+eGaeucMl2Z57+bz/mNe55z7LB831cZ5zPgcMDkiwBBeHw1m5ciWeXXl7e586dQpmUQw9XC43Ojp67969WGqloqLi6+s7oBvdDJytW7cmJiYihC5dunTo0CFh/GPAxMRk9OjRpaWljY2NCgoK6urq8D8dAKAXIMESXBs2bLh8+TLW/s9//nP48GEY5B9isNRqz549L168QEKeWmFmzZqlra1dVFRUU1MTHx8/e/ZsoiPqDRqNpqfXsUoZAAB8EfjLTEBt27bt1KlTWHvlypWQXQ0xXC73zp07EyZMcHFxefHihYqKSh9XCAoIEom0ePFirB0WFkZsMAAAQCBIsATRoUOHDhw4gLU9PT3PnDkD2dWQ0SG10tTUDAwMxFIroZvowxeeYN26dQsqWwIAhi1IsATO9evXf/jhB6zt7Ox8/vx5mAIyNPBNrXJzczdt2jQ0UiuMhYXFmDFjEEINDQ33798nOhwAACAG/OYWLK9evVq5ciW2Snzq1KnXr18XxmnCoINhklrh5s2bhzWSk5OJjQQAAIgCCZYAqaysnD9/PraV2NixYyMjI4fkb99hBUutsCKcwyG1wkyePBlrpKSkEBsJAAAQBVYRCgomk7l48eIPHz4ghGRkZCIjI7vaUwwIBWyF4O7du1++fIkQ0tTU/M9//uPt7T2E8yqcjY0N1nj69CmLxYLNyAEAwxCMYAmKDRs2PHz4ECFEoVDCwsKMjIyIjgj0Eu+o1cuXL4fJqBUvNTU1rIx7a2vr69eviQ4HAAAIAAmWQLh582ZISAjWPnTokJOTE7HxgN6B1Ao3ceJErPHmzRtiIwEAAELA0D3xysrKvLy8sPY333zj5+dHbDw9x2hl/bo2pZheyW5jyo0d8V3gJD2TYfpYczg/EOQL34iwpKSE2EgAAIAQkGARjMvlrlmz5uPHjwghDQ2NkydPEh1RT5UWNP5sfFWZUfHXRsElKGRcutXRuYs2GxMb2CDrkFppaWn5+fkN59QKo6GhgTVKS0uJjQQAAAgBjwgJduzYsdjYWIQQmUy+ePHiiBEjiI6op36e9ocyo4K3R4Lb9mTLn23NLKJCGmQdHghqaWkFBgbm5OQMwweCneEJFoxgAQCGJ0iwiFRQUPDjjz9ibX9//6lTpxIbT8+x2RzZ4ved+2XYDWFHMgc9nMGGpVaWlpaQWnUFRrAAAMMcPCIkkq+vL7aXiIWFxZ49e4gO5wtUl7TSuPx3QSnNrhvkYAYTh8OJiYnZtWvXq1evEDwQ7JqUlBTWgN1yAADDEyRYhLl3797t27cRQmQyOSgoSFRUlOiIvoCimgQTiYggZue3FLSkBj+eQQCp1RfBdiNACME2mgCA4QkSLGK0t7dv2LABa3/33Xf4mnZhQRUh1yhrqVbld+hvIUks/q8pISENHL6plY+PD41GIzo0AAAAAgrmYBHjyJEjubm5CCF5efn9+/cTHU5vrLs1s57yr6IMTETV3mg3QnnojOhwOBx8GvurV6+wuVZYXSvIrgQBl8tlMvkMowIAAOFgBIsA9fX1hw4dwto//fSToqIisfH0jomN8pbclceWJTVlV5PamWS1EQt/tpz6tS7RcfUPbNTqf//7X1paGoJRqy/X3t6ONSgUykBcv6Ki4s8///zw4QOTyZSTk7O0tJw8eTKZDH8xAgAEBSRYBDh+/HhDQwNCyMjIyNvbm+hwek9dT+ZQ8lyio+hnHVIrbW1tX19fSK2+FL54UFVVtd8vXlVVFRoaiudw9fX19+/fr6urc3Z27vd7AQBA70CCNdhaWlp+/fVXrL19+/be/X3f1sw68m1S+dMKbhtTTFPWfb+lzRyNfg1zOILUqh9h25YjhLS0tPr94omJiXh2hXv58uWkSZOUlJT6/XYAANALkGANthMnTmB12/X19d3d3XtxhYqi5n0GvysxKlSw1zUoZu6bFxsd1x+z7v7EzJSqi/7PmvIbKFKiOl+pbg6eTBWBRyoIQWo1APD6onhBrH5UVFTUVT8kWAAAAQEJ1qBqa2sLCAjA2tu2baNSe/P93z8jTunfJdRFELPk18TSTYbqejJdnXVu58t3++5Jok+SCCGEWt692Rj2dn/B0qE0J70XILUaIHiCpa6u3u8XZ7PZfPtZrOGyiwAAQPD1zwBGfn5+XFxcZWVlrw/r4RWE3a1bt7CvUVtbe9myZb27iFh+cedOcS7j6v70rk75WNb6dv99MfSJt1OlpXSXQ2zvYhgCOBxOeHi4iYmJi4tLWlqatrY2Xo0dsqu+y8z8q6C/np5ev19cWVmZb7+Kikq/3wsAAHqnrwnWp0+f5s+fP2rUKDc3N1VV1Z07d37pYT28wtAQGhqKNby8vERERHp3EQlOM9/+2iL+/Qihyz+lSXJbO/eL5rzvXQxCpKKioq2tjbcHT63c3d2zs7Mhtep3ra2tGRkZCCEymWxpadnv1588eXLnTnV1dR0dnX6/FwAA9E5fE6w9e/YkJCTQ6fSWlpZz587t378/Kirqiw7r4RWGgJKSkgcPHiCEyGRyr4evEEKtZEm+/fJdl1Cvzm/i2y/FaWazOb2ORJCxWKw1a9bIysqOGzdOWVlZT08vPDwcS62MjY0htRpQz549w57WGRsby8h0+di61wwMDObNm8e7+YGurq6HhwdUjQcACI4+zcFis9nnz5/39va2sbFBCK1cufLChQvnzp2bP39+Dw/r4RWGhvPnz2NzR2bMmKGpqdnr6zD0taXevenQ2UYS+27HuK5OkVOX4DN+hVArSZxCGZrz3F1dXWNiYrA2h8MpLCz85ptvjI2N09PTEULa2trbtm1btWpV76bBge49efIEa0yaNGmAbmFpaWlqaooNTyopKcnLyw/QjQAAoHf69Mu1qKiovLzc0dER73F0dKTT6T0/rIdXGBouXbqENVasWNGX62z/06la7F9zTZhIRGO9Qzcz3Bf/aMpAfEZoWrT7fwm9IHj16lVsbMfpZWw2u6qqSkdHJyQkJC8vz8vLC7KrAYL/L2xt/ZmVrX0hKiqqpaU1duxYyK4AAAKoT79gKioq0L8nlqqqqtbU1LBYLN5fXd0c1sMrvHjx4sWLF7y3Li8vV1BQaG7uctYRIVpaWlAXpasLCwuxvXEkJSWnT5/el8ilFNCuQo8TXs+qXlSjtnZRDVnX3aYTZqp2c015NarMUuu2y8kU9M/yqxoRhfU37ATte9gvoqOj8c2GeVVXV1dWVtJoNAaDMfhRDQGMVtbBBY/bnn+QbG9uE5Hk6iv73bRXUhfnPaalpeXPP//E2hMmTOD7D6ylpaW5uRkSXMHB5XKxHwrRgYB/tLe3ww9FcLS3t/NOS+iJPn3A1dfXI4SkpaXxHmlpaS6XW1dXx1uNppvDeniFsrKy58+f8966ublZTk6uc7FBYmHx8I3q7t27WGPKlClkMrmPkVNE0abz/5o7/NkLeh8zS7RVuH8km1vRxKGJyJgo+p2xklEQEbTvYb/oKn/icDhMJhNm6vQOm83ZOfbOyPpibK6fRHsryq4+Oq7qP5kLZRT+WbERExODrSowMTHR0NDg+w+MyWS2t7dzOENz/p8wwnZ1HJKfBsKrvb0dfiiCo6vqMN3oU4KloKCAEGpq+mcCdUNDA4lEkpOT6+FhPbyCs7Nzh00wdu/ejRAStEcD2NiVrKxs57fwhyZz5swhKuwFPvILfLqcpzWUTJ069eeff+7cr6qqqqKiAglW75zYmDqyvmOJEMX26qBVLw4mzcF78L8l3N3du/qnzmAw5OXlYQRLcHC53NbWVkH7RB3msNQKfigCQlxc/PMH/Vuf5mBhu4xhj/kwFRUVSkpKHQoQdHNYD68g7NhsdmJiItaeOXMmobEMC9OnTx83jk8qaWtrGxYW1qFqA+ihtzF8CrAhhJrSyvA2g8H4448/sPbChQsHIywAABBIfUqwtLS0dHV14+Pj8Z74+Hg7O7ueH9bDKwi7V69e1dXVIYQ0NTUNDAyIDmdYuHv3Lu8SNjExsbVr106YMCEnJyc4OLi4mH+uALrBbmPy7Scz/6mfHhERgQ1IGxgYGBkZDVJkAAAgePqUYJFIJC8vr5CQkMePH7NYrN9++y05Ofn777/H3j19+rSHh8enT5+6Oaz7KwwZeFXrgVu1PtBObExdrx/+vcKFjUY3bp7KJjqcz1NVVU1JScnLyzt16tTNmzerqqpOnjzp7e2tqanZ2Nh44cKFxMREvhPhQVckNaX59nNG/NN//PhxrLF8+fLBiAkAAARVX+dA+Pv7v3//3t7enkKhkMnkkydPTps2DXvr6dOnYWFhZ86codFo3RzWzVtDRlZWFtYQxr/pWUzOBq2rqhV5fy06qEWv171JDZvwfw9nExtYT+jr63/99deSkpISEhIIIVlZ2RUrVjx69OjRo0cPHz4sKipasGAB7xoL0A3Pg1Y3pmaIon+NY3ER6Svfv57GJicnP3v2DCEkLi6+Zs0aAkIEAACB0dcik2QyOTg4uK6uLjU1taGhgXfw6cyZM1wuV0pKqvvDunlryMATLGNjY2Ij6YmKoubrAZl/XHjX3NCOEDq49KFqRR7vASTEpT169seFdwQF2CdkMtnBwWHZsmVSUlLv378PDg7Oy8v7/GkAIQuHkVr/ndlG+md3cCaiIhfbb/xNsZf48NWSJUsUFRUJCBEAAARG/6zikZGRGT9+fF8O6+EVhBSeYBkaGhIbSfea6tq3mkcqFr+jIA5C6BGJRnYcX/esrPMOumTEvXs0c/a3owc/yH6hq6vr4+Nz69atvLy833//3draeubMmWTy0Cxq34+8D1lVrDO4+vPriqwGeV0p9w1GhhP+SqQ+fPhw8+ZNrL1hwwbiYgQAAIEAy6QHHIfDKSoqQgiRyeQxY8YQHU53thpHqJT/My4lxv2E4p+Ik/k/QWuvEe61eJKSkp6enk+fPr13715qampJScnChQtHjBhBdFyCTlVbyjfEtnP/7t27mUwmQsjBwYHvEk4AABhW4E/2AdfY2IgVVJSWlv7SOrCD6WVCuXI5n6d+4hz+RTtFFb64KIigIZFI1tbWq1atkpOTKy0tDQkJefOm4yaPoCfevn178eJFrL1z505igwEAAEEACdaAa2xsxBp8C5AKjqTw93zrb4ogPovzuYg0c7PJQIc0ONTV1X18fIyMjD59+hQREXHz5k1sJAb0nL+/P4vFQgjNnTt36C1SAQCAXoAEa8A1NDRgDRmZLjdjFgQUapf/GCoUdXlfchGpdYrlnJXCOgGrMxqNtmjRonnz5lEolPT09N9++626uprooITG48eP79y5gxAik8n79u0jOhwAABAIkGANOGEZwXJcps/h9++hjjoiqHq53Pczq7QNKmS1asaaGAcsPJQ0d/AjHGiWlparV6+Wl5evrq4+ffr0y5cviY5ICLBYrM2bN2PtpUuXmpubExsPAAAICJjkPuDwapYCvgWe4QTFurFGCjmZvJ1chHRWWyGENp2yQciGoNAGz8iRI729vWNiYtLT0+/cuVNUVDR37lxBnjlHuKNHj7548QIhJC4uvnfvXqLDAQAAQQEjWAMOqwSG/r2ntWD65bVbo9V4xt+FjuopclIrHTcHTyY2qkEmKirq5ubm6uoqIiKSnp5++vRp3r0yhVruq9pja1N2Ocdf2JvGaGV9/oTPXjA3F9t2HSG0Z88ebW3tvl8TAACGBhjBGnD41CvBT7BEaORfnrkg5PIsvkxBVULPRI7oiAhjZmamrq4eERFRWVl59uzZ6dOnT5w4UcDHILu3fcZdcvxzKmKREXofjX7c+3DGb3P6MpGOw+GsWbMG2znb3Nwcf1AIAAAAwQjWIMATLHwyluCbMF1tOGdXGEVFxe+++87a2prFYsXFxV2/fh1LJoTRiU2povFPqOifUSs5dn3Cd3c+lrX2+prBwcGPHj1CCImIiJw/f15ERKQfAgUAgKECEqwBh29119DQwGaziQ0GfBEqlerk5LR48WIxMbG3b9+GhIR8+PCB6KB6400on/peUpym4E1PsfbVwxnfK13aLPLrRtqpddrXnsSVdH/B169fb9myBWv7+/ubmZn1b8AAACDsIMEacDQaTU1NDSHEZDILCgqIDgd8MQMDAx8fH01NzYaGhvPnzycmJuILF4SFeEsD3/6qrFqE0OEVSW/9b6p+LBjBqlVor1Yuzrk95/fY0C73mmxoaPj666+x8bxx48ZBZVEAAOgMEqzBYGRkhDXwTQmBcJGVlV2xYoW9vT2Xy3348OGlS5eam5uJDuoLsMn8Z1uKSFCb6tprLqaQ0b9SRhqXEb0+ke8pXC539erV2A7ZUlJSYWFhNBqtv+MFAAChBwnWYDA2NsYasBOL8CKTyQ4ODkuXLpWSkiosLAwODs7Pzyc6qJ5ia6ny7Z/ooR/5a5Y4l8/cMuXW8soPfJLIgICAGzduYO3Q0FADA4N+jBMAAIYMSLAGA55gwQiWsNPT0/Px8dHX129pabl8+XJcXBy20aSAW3dtalOnTbsrRo5e/B/j+gr+M/dJiFte2DHB+vPPP3/44QesvWnTpq+//rrfQwUAgKEBEqzBMG7cOKyRlJREbCSg7yQlJZcsWeLk5EQmk1NTU8+dO1dfX090UJ9hOEFxxeOllWqjG8gyLEStEVVkTJ14vNADIWRgo8z3lE9IdKyFIm/P69evv/76a2yjxkmTJh06dGgQIgcAACEFdbAGg5WVlby8fG1tbXFxcXZ2tqGhIdERgT4hkUjW1taqqqo3btwoLS0NDg52cXHBZ9oJJhMb5VOlnp37Zy3Tv+OlqsToWEy1UW+UuNQ/nw+lpaXOzs5YqRF1dfXr169DgXsAAOgGjGANBgqF4ujoiLX/+OMPYoMB/UVbW9vHx2fMmDGfPn0KDw+Pjo4W0jIcq2JdaqkKvD2Vslo/pTjjLxsbG+fMmYOVqJCRkYmNjdXU1BzsKAEAQKhAgjVInJycsEZcXByxkYB+JCEh4eHh4eTkRKFQXrx4cfbs2draWqKD+mIWU0cebPAZsW5mo9X41ikTRv3sdqp+5Qjlv3ZMamtrmz9/fnp6OkKIRqNFRUXhj7wBAAB0BR4RDhInJycSiYQt8q+urlZSUiI6ItA/sMeFWlpaERER5eXlISEh8+bNMzU1JTquLyMmQd14gs9m3m1tbS4uLomJiQghEol07tw5BweHQY4NAACEESRYg0RNTc3GxoZOp7e3t4eGhvr7+xMdEehPI0eO9Pb2jo6OzsjIiIyMzMvLmzdvnrDvHoNlV/Hx8djLQ4f/stcIAAAgAElEQVQOeXrymcUFAOgXiYmJ2B8zGDab3dbWJiUlRVxEw46Dg0M//g0JjwgHj7e3N9YICQkRirX94IuIioouWLDA1dVVREQkPT399OnTlZWVRAfVex2yq59++gnfGwcAMBA6JFgUCgWyq8HU4fvfdzCCNXgWLVrk6+tbW1tbUFAQHx8/c+ZMoiMC/c/MzExdXT0iIqKysvLMmTPTp0+3trYmOqgv1tra6uLicv/+fezlgQMH8PJXAICB4+DgsHv3bqKjGKb6/TsPI1iDR1xc/Ntvv8Xax44dIzYYMHAUFRW/++47a2trFosVFxcXFhbGYDCIDuoLVFdXT5s2DbIrAADoC0iwBpW3tzeZTEYIxcbGPn78mOhwBhCLydnmGLeBdmoH6Wc/SsBatStpjzpWWhrCqFSqk5OTu7u7mJjY27dvg4ODsRoHgi8vL2/y5MmpqanYS8iuAACgdyDBGlRjx45dsmQJ1t6yZQuXy+3+eOG1Sf867UGqYnu1CGLKchpVyt/97vD7sMqxEEKGhoY+Pj4aGhoNDQ3nz59PTEwc5J84o5W19avYdeIh/yX/sl4s+D8Tbtd/7G4s7dmzZ7a2tthGzhQK5eTJk5BdAQBA70CCNdj27t1Lo9EQQqmpqXfu3CE6nAERFZyj/CGnQ6cUt/m3ZYlEhEMkWVnZlStX2tvbYxU6Ll++3NzMZwflgcBicvzULkk8fqbMqJDiNit9qpR5/mq71oW2Zhbf4+/cuePg4FBVVYUQkpCQiIyMXLt27eCECgAAQw8kWINNR0fHx8cHa//444+fPn0iNp6BQP89n28/rXR4jWBhyGSyg4PD0qVLJSUlCwoKgoOD8/P5f3/6V6A3XaWhuEOnclvFAfeEDp1cLvfnn392c3NrbW1FCCkqKt6/f9/FxWUQggQAgKEKEiwCbN++XVpaGiGUlZW1d+9eosPpf+0tTL79VA7/sZPhQE9P7/vvv9fT02tpabl8+XJcXNxAl+oouMd/1tfHJ2W8LxsbGxcsWLB9+3Zsnx99fX06nT5p0qQBjQ0AAIY8SLAIoKSk9PPPP2PtQ4cOPXv2jNh4+p2qyQi+/c3isoMciUCRlJRcunSpvb09iURKTU29ePFiU1PTAN7vUxfp7Kd2vJmVlTVx4sRbt25hL+3s7Oh0+ujRowcwKgAAGB4gwSLG2rVrp06dihBisVgrVqwYYg8KvY5MbCTLdO4fs8x48IMRKCQSycHBYfny5dLS0kVFRcHBwbm5uQN0LxHVLkoUKvzVHxERMWnSpJycv2bLbd68+f79+8rKygMUDwAADCuQYBGDTCafPXsWq9KblZW1detWoiPqTyOUxWZecK6n/jOOxUGUai2Dr7eYEBiV4MDm4Y0ePbq1tfXq1atxcXHY47n+tfAnCxaidOjkINJ0f/PW1ta1a9e6u7tjQ2iSkpJXrlwJCAigUqHyMAAA9A9IsAijq6t7+PBhrH3s2LHz588TGk4/m7V01KHm9cyZkxgkMYQQGbGVit+eHH3q4LIkokMTCBISEt98842TkxOFQklNTT179mxtbW3/3sLOVVvea+onRMN7mEiEunCKtk2rpaVlUFAQVjMCm3T1zTff9O/dAQBgmIMEi0je3t6urq5Y28fHJyUlhdh4+ld+Zi3nzzQx7j+Fl8TQp9bLiRHH3xAYleAgkUjW1tarVq0aMWJEeXl5SEhIRkZG/97CN8R2zRsfsWVTGyeMF/3G4Zsn34lYPZ08efLbt2+xAxYsWPDs2bNx48b1730BAILPy8sL32yU2CvzHo+32Wz2rVu3Xr58ORARDg5IsIhEIpEuXbqE/Xr79OnTggULSkpKiA6q34RufkrjdixrSUacB//3mpB4+t353Wm+46PW6YT9OPWPXtdQVVNT8/b2NjExaW9vj4yMvHnzJpPJfw1m7+gayW29aPfLUxfPAzrrtrr/+OOP7e3tCCEpKamzZ8/euHFjxAj+KxIAAENbZGQkVlWY8CvzHo+329ra3Nzcjh8/PhARDg5IsAgmJSUVFRWlqKiIEKqoqJg3b16/PyoiSlNhPd9+Sm3jIEfS71hMzvdqV4r2RMmlpSkXvRVLfBpuHxr4Pb13V6PRaAsXLnR1dRUREUlPTz99+nRlZWU/RsvhcI4fP25qavrw4UOsZ9KkSa9evVq1alU/3gUAAPqRmJjYpUuXvL29iQ6k92BOK/F0dHQiIiJmzJjBZDJfv349c+bM+Ph4OTk5ouPqK5Io/39dbErHmddCZ7dzvGr5O94eUdReFfzw7XdjDCwVe3dNMzMzdXX18PDwqqqqM2fOTJ8+3drauu+hpqene3l54XsLUqnU7du379ixA+azAwB45eTkREVFlZSUGBoaenp6ysr+U1XnzZs3t2/fLi8vHz9+/NKlS0VERLB+Op0eExNTU1Ojrq7u4eHRkwovZWVlEREROTk5urq6y5cv72bZMpVKpdFosrKyzc3NERERixcvTkxMfPDggYKCwoIFC8aMGYMdxmazw8PDU1NTKRSKnZ0db4Xknt9rgMCHrECwt7c/d+7ct99+y+FwXrx44eTkdO/ePRkZPpUOhMjomRoNIdmd+8WNVAY/mP5V/7BQqVMnDbVf2flqb+yMXl9WUVFxzZo18fHxqampcXFxRUVFLi4uYmJivbtaW1vbTz/9dOTIEfyZo7Gx8ZkzZ6CIKABC6sSJEx8/fuzduevXr8celfB18+bNJUuW6OnpGRoahoeHHzp06P79+3p6egih69evL1++3NzcXFZWNigo6Pz58w8ePKBQKCEhIT4+PsbGxmPHjo2Jifn5559TUlLMzc27iSElJcXFxUVeXt7c3PzOnTuBgYHPnz9XVVXt6nhfX98tW7a4urquXLmSTqffvn3bxsYmPDx83759N2/enDFjBpfLdXV1vX//vp2dXWNjY0BAwA8//LB///5e3GsgQIIlKJYuXcpkMr/77jsOh5Oamjp79uy4uDis4DvhWEzOvq8TKh+XUlta2NJS2k46Wy999dmzNpyctOHKW5Wmf9UTr6OO+CFi2oBFOkjE2lv49tcX9bVwKJVKdXJy0tLSunPnTnZ2dllZ2ddff62hofGl17l79+6GDRvevftrmE1MTGzbtm1bt24VFRXtY4QAAKKcOHECL1z3pTw8PLpKsLC6La6urpcvXyaTyTU1NZMmTdq2bdu1a9caGxvXrVu3bt26X375BSH04MEDR0fH2NhYZ2fnwMDARYsWXb9+HSHU1NSkq6sbFRXVTYLF4XC8vb1tbW0jIiKoVGpLS8uMGTM8PDwSExN7En9iYuKbN28UFBQYDMacOXM2bNjw5s2bd+/eRUdHX7t2bfHixQghHx+fa9eu7d+/v4/36i8wB0uArFy5MigoiEQiIYTodLqdnV1paSnRQSEWk7NB5RLp9mPV2kLFT1UqHwsYlx+s1bjy2RMpFPLBoqV1ZmZ11BFsRG6kyFZpG2x6vVxVu4sCmMKjnUrj2y+hKN4v1zcyMvL29tbQ0GhoaAgNDU1MTMRKKvRETk7OvHnznJyc8OzK3t4+LS1t586dkF0BADp78uRJRUXFjh07yGQyQkhBQWHdunVRUVFcLvf+/fsfP37cvn07duS0adNCQkKwcaDExMQLFy5g/Ww2W0REpPutKTIzMzMyMnbu3InNT5CUlNyyZcvDhw9ramp6EuSmTZsUFBQQQmJiYjt37szJycnOzsZ+XaampjY3NyOE8J1e+3iv/gIjWILFy8uLRCJ5e3tzudy0tLQJEyZER0dbWFgQGNKh5Y9U69536FQpfXfKL3Xt0c9MEpIeIRqY5jpQkRGHYqyBXnUcqOcg8hxfo/66hZyc3MqVKx88eJCcnPzw4cMPHz4sWLBAUlKym1Pq6uoOHjwYGBiIbwwgJye3e/fuDRs2YJ+bAAChtn79+l4/Iuzm+WBBQQGZTMZnNSGEDA0NGQxGeXl5Xl6eoqKivLw8/paXlxfWkJGRCQoKotPp7969y87mMxukA2xtoLOzM/5xhK1o7uGaHmPjfzYCMTU1RQgVFhY6Oztv3779//7v/06fPm1jYzN79uyVK1eOGDGim3thWdrggARL4KxZs0ZERMTLy4vJZJaXlzs4OISFhc2ePZuoeEriP/CdM5UZUYA+l2ANVTtiZ+zSrVBi/FOagYtQ2xQLO1ftfrwLmUyePn26pqZmVFRUQUHB6dOnFyxYoK3N5xYsFis4OHj37t3432dkMnnVqlX79u1TURH6GW8AAMz69esH4rJSUlIcDofBYGCbiyCEWltbEULi4uI0Go3vTm4sFsvGxqa1tdXLy8vHx8fU1HT+/Pnd30VcXBwhFBoa2iHF0dHR6UmQvGFg4WHzU/ft27dx48bY2NiEhIRdu3YFBASkp6f38V79Bf6uFUQrVqz4448/sIWETU1NLi4uv/zyS88fEvWzto61rDDclna+/cOBgqrE/1Ws/DTNumKEdrWYSqXaaL298w8lzR2Ie40dO9bHx0dLS6uxsfHChQsdHhdyOJxr166ZmJhs2LABz64cHBxevHjx22+/QXYFAPgsAwMDhNCjR4/wnsTERBUVlREjRhgaGjY1NfHWQHZ0dPzll19SU1Nfv3598eLFLVu2TJ8+XUVFpaysrPu7GBoaIoSYTKbV3+rr62NjYyUkJHoSJG94WCVSAwMDOp3u7+8vLy+/YsWKCxcuJCUllZSU0On0Pt6rv0CCJaAcHR2Tk5OxdJvFYm3ZssXZ2bnXg8N9wZXh/1iKotDd46ohT0pW9Of7TkG1K060+Zwq9Vy+s7u1M12JPJH9X9uYjYaRe1zvF2bxLxuGEJKRkVmxYoW9vT1C6OHDhxcvXmxqauJyuTdv3jQzM/vmm2/wea96eno3btxISEjofi0PAADgzM3NHRwcNm3a9OzZs9bW1t9///306dO+vr4IIUdHR1NT09WrV+fk5FRXVx84cCAxMXHKlCnYEFFqaiqLxaqvr9+4ceOHDx9KS0u72VZVR0dn4cKFmzdvfvLkSWtra3x8vLu7e8/nGR87duzGjRtNTU1379798ccfFyxYoKmpyWQyDx8+fOjQocrKyrKystu3b5PJZAMDgz7eq79AgiW4jIyMUlJS8EX1MTEx5ubmeK3IQWPtxWdeEQeRZv3XdJAjGUrYbM463bCMDdel6M8V3magqMenTULObH/R1fEkEsnBwWH58uXS0tLv37/ftGmTqanpggULMjMzsQPk5OQOHjyYlZW1YMGCwfoiAABDxOXLlzU0NCZOnCgpKbl8+fLvv//ez88PIUQmkyMjI9lstoGBgbKy8v79+3/99Vdra2sLCwsfH5/NmzdLSUkpKyuLiIgEBASEh4dv2bKlm7uEhIQYGxvb2NhISkrOnDlz1qxZgYGBPYxwx44dXl5eMjIyTk5OpqamwcHBCCF7e3s/P79du3apqqqqq6sHBAQEBQXp6+v38V79hUTYg6e+2b17N/5fwdHQ0IAQ4q3P1ndMJnPbtm34I0IKhYIVisRLvQ2C/9pESz55SUJ//VNhIzJ3tvVPsTMHLYC+qK6ulpSUHOSR4c/a5RxPjk7u0MkgiXlleusadVljlsvlRkREbNu2jXcbCmlp6U2bNvn5+QnRpjdlZWXKyspQ7FRwcLnckpISTU1NogMZ1gj/vVZZWVlSUmJgYNB5Pc2HDx+qq6vHjh3L+1ZpaWlFRYWhoSH2AdvY2CgpKUn5XCnpmpqagoICbW3tHlb+fP/+va6u7rNnz8zNzbOyshQVFdXU1DpcsLCwkEajjR49ukPhwC+6V/ff/178dOADTtCJiIgcPnx42rRp3377bXV1NZvN3rt3b2RkJLZoYnBiOJwy7+4lw/vBb1vLWyTUJef9x6R/Z3MPQzXx+Z1LlYpxGRe3vdh1y7Hz8e3t7b///vuRI0eysrLwTgkJiXXr1vn7+3ezPggAAHpIRUWlq4mbmpqanfNvdXV1dXV1/GUPi2MrKCj0bikflUrluzN9Nxfs9b36BSRYwmH27NmZmZnLly+/e/cuQigzM9PW1nbNmjWHDx8enILvs5bpz1qmPwg3GibEPzXz7a8r6LhRY1NT07lz544cOcK7EbioqOiKFSt27drV4S85AAAAAgLmYAkNZWXl2NjYY8eOYeXduVzu6dOnjYyMIiIiiA4NfLF2Cv+an2Ly/4xvFxYW+vv7a2hobN68Gc+uZGRk/vvf/xYWFoaEhEB2BQAY8uTk5IT0j0lIsIQJmUzeuHHjmzdv8P0sS0tLFy1aZGtrm5zccUIPEGTcMeqdOzmINGu9AZvNvnPnzpw5c0aNGnX48OHGxr/GtNTU1A4ePFhcXHzo0CFh/KwBAIBewAomC+OHHiRYwgerPHn79m38iTidTp8yZcqMGTN4q5UAQeYfNb1GpOPMgAYL06f510ePHu3i4vLHH39wOBysf9SoUYGBgfn5+f7+/v27hAIAAMAAgQRLWDk7O2dkZGzZsgVfNBEfH29hYeHl5VVYWEhsbOCzNEbJ/O/9qkar8ZWS6rVUhbIR2lkWEkGZHj/88AP+4yOTyXPmzImJicnNzd20aVOH1TEAAAAEGSRYQkxWVvbw4cNv375dtmwZtuMSi8X67bffxowZs2TJktevXxMdIOiOoprEst+0xLyyrivuP1238vpLf2y3LISQsrLyDz/8kJeXFxMTM2fOHGxDUwAAAEIEEiyhp62tffHixYyMjEWLFmE9LBbrypUr5ubmU6ZMuXPnDrHhgc7KysqOHTtmaWk5fvz4gICAiop/9jS0tLQMCQl5//79gQMHdHV1CQwSAABAX0CZhiHCyMjo+vXriYmJBw4cuHfvHtaZnJzs4uIyYcIEb2/vxYsX4xt5AkKUlpbevHnzxo0bSUlJHTaUGDlypKen54oVK0xMTAgMLzs7u66uTlZWdvTo0ZDeAQBAX0CCNaQ4ODg4ODi8fPny4MGDN27cwH6LP3v27NmzZ35+fp6enmvWrLGwsCA6zOGloKAgMjLyxo0bqampHTZOkJCQcHNzW7Zs2fTp0z9b/nhAJSUlPXjwAH+ZkpIyceLE2bNnExgSAAAINXhEOARZWFiEhYXl5OR4e3vjM6MbGxuDg4MtLS0nTJgQFBRUXV1NbJBDG4fDefr06Z49eywsLPT19f/73/8+efIEz64oFMq0adPOnz9fUVFx+fLlWbNmEZtdlZaW8mZXmKdPn+J7SAMAhrz4+HgvLy+ioxhSIMEasvT19YODg0tKSo4ePWpoaIj3P3/+fO3atSNHjpw5c+Zvv/1WU1NDYJBDTGVl5cWLFz09PVVUVKytrXfv3v3q1Sv8XRERkZkzZ4aEhJSVld2/f//bb7/FasYS7s2bN3z78Z2kAQACrqGhobq6Gq/t0gt5eXmRkZGfPSwwMPDAgQO9vsuwAo8IhzgFBQVfX19fX9+kpKTffvstPDycwWAghNhs9p9//vnnn3+uXbvW0dFx0aJFc+fOVVVVJTpe4dPa2vrkyZP4+Pi7d+++evWq8+7pYmJiM2fOXLBggYuLi2Dux9zU1MS3H69xCgAQWBkZGffu3WtubkYIUalUKyurAZ1y8Pz5c+xe4LMgwRouvvrqq6+++urYsWO///779evXk5OTsb91WCzW3bt37969SyKRzM3NnZycnJycJk+eTKXCv40uNTQ0PH78OCkpKSkp6dmzZ0wms/MxqqqqTk5Os2fPnj17toCMVHWlq9UPAh42ACA7O5t32InFYj158oTBYMyfP78npz9+/DguLo5EIs2ZM6fDW3Q6PSYmpqamRl1d3cPDY/To0QihmJiYgoICBoNx/vx5T09PUVFRvocBDDwiHF5GjBixfv36R48eFRcXBwYGTp48Ga+xxOVyX716deDAAXt7ewUFhYULFwYHB2dmZvZlzHnI4HK5ubm5V65c2bx5s4WFhYKCwrx58w4ePEin03mzKxEREQcHhwMHDrx69aqsrCw0NNTd3V3w0xRjY+Mv6gcACIjExMTOnWlpaXV1dZ899+DBg3Z2drGxsWlpaU5OTlevXsXfCgkJsbW1jYqKqq6uPn369Lhx49LS0hBCCQkJxcXFZWVlERERTCazq8MABkYphil1dfVNmzZt2rTpw4cP4eHh0dHRjx8/xnOFxsbGyMhI7A8jOTm5yZMnT548ecqUKRMmTJCQkCA08MFTWFj4/G8vXrxoaGjgexiJRDI2Nra3t58+fbqjo6Pgp1OdaWho2NvbP3z4kLfT0tKSd+oeAEDQsFisqqoqvm+VlZV1PyGhsLBw586dfn5+R44cQQhlZmZOmDBBUlISezcwMHDRokXXr19HCDU1Nenq6kZFRZmbmx85cqSioqK5ufnWrVvdHNa/X6bwggRruNPU1PTz8/Pz82tqarp//35cXNzdu3ffv3+PH1BfXx8bGxsbG4sQEhERMTU1NeMhmJOKeqGqqiozMzM7Oxv7b0ZGRm1tbVcHU6nU8ePHf/XVV/b29ra2tgoKHXcVFFhcLvf9+/fl5eVUKlVNTU1DQwPrd3Bw0NXVzc7Orq2tlZWVHTt27KhRo4gNFQDQvW72ePjs9g8xMTEUCmX37t3YSxMTEw8PD7wwdWJiooyMDNZms9kiIiJ8Z2r28LBhCxIs8BdpaWlXV1dXV1eE0Nu3b+/evZuUlESn08vLy/FjmEzmy5cvX758ifdoaWmNGzfO1NR01KhRo0aN0tfXV1dXJyD6L1FXV1f4t/z8fCyp+uxqShUVFSsrKysrKxsbm8mTJwvjSFVLS8uVK1fKysrwntGjRy9atEhERAQhpK2tra2tTVx0AIAvQ6FQVFRUKisrO7+lpqbW/bmFhYU6Ojq88y+NjY3xBEtGRiYoKIhOp7979y47O7uri/TwsGELEizAh4GBgYGBwaZNmxBC+fn5ycnJdDr98ePH2dnZHaZkFRcXFxcXR0dH4z0SEhL6+vqjRo3S09PT0NBQVVVVU1NTUVHR0NDAx58HQVVVVVVVVUVFRW5ubmNjY1VVVVFREZZU1dfX9+QK8vLyVjw0NTUHOuaBw+Fwqquro6KieNNlhNC7d+/u3bs3d+5cogIDAPSFg4NDWFhYh04LCws5ObnuT5SRkemwGLC1tRVrsFgsGxub1tZWLy8vHx8fU1NTvlPme3jYcAYJFvgMfX19fX395cuXI4Tq6+vT0tJev36dnp7++vXrN2/eYEUfeLW2tmZkZGRkZHS+lKSkpLq6upKSkoyMjIyMjKysrJycnKysLPZSXFwcISQhIUGj0bDj5eTksIHupqYmFouFEGIymdiHApvNbmxsrKura/y32traqqqq6upqviv7uiEtLW1gYGBiYmJkZGRsbGxkZDRkhnPevHnzxx9/tLS08H03LS1t1qxZsGgUAGFkYGCwcOHCe/fuYc/mqFTqxIkTp02b9tkTDQ0Nsd2x8KmWCQkJWCM1NfX169cpKSmTJk3CeniHvXE9PGw4g09V8AXk5OSw3XiwlywWKycnJz09/e3bt/n5+Xl5efn5+R8/fiSRSCtXzra1JSsptb1/L3X16oeUlJcIoZaWltzc3NzcXCK/BoQkJCR0dXX19PR0dXV1dXXHjh1raGiora392VkLwqiwsPDGjRudq3PhWCxWU1PTkJlLB8BwY2JiYmJi0tDQwGQy5eXlyeQeFQdwdXXV0tJasmRJaGiolpbWqVOn6HQ6NvMB+1s3NTXVysqqubn5f//734cPH0pLS9lsNoVCoVAoZWVl1dXV2DYhXR02oF+ysIAEC/QelUo1NjbusJi/qqoqPX3l9OmxeI+Pj8iePYsDAu7gQ9CDQF5eXlVVVVlZWVFRUVVVVUtLS0NDA8uoVFRUBi0Mwj1+/Lib7AqDDxkCAISUrKzsFx0vKip6+/btr7/+Glv0Z2BgEBQU5O/vjxCysLDw8fHZvHnz1q1bORzOhg0bAgICtmzZoqqqGhAQ4ObmFhERoaysXFdX181hA/JFChtIsEA/o9NDXF1jeXtERJg7d95atuzlyJEapaWltbW1jY2NDQ0N+DM+rI09bWxtbf306RN2Yn19PZYcSEtLY8+wqFQq9jcWhUKRkZEZMWKEtLS0DA85OTllZWVlZWU8aaiurpaUlBw+1SU64DsBlpeamtqw/eYAMJyNGzcuNzc3Pz+fzWaPGTMGIbRq1SrsraCgoB07dlRUVBgaGmKfD6tWrcIm0bq6utbW1ra2tsrJyXVzGECQYIF+p6kZ07mTRvuUnX1t7Ni9+JpeMDi6f15ApVJhhjsAw5m+vj7ffnV1dd4l4bwf3TQaDf8LtpvDACRYQweHw8nMzCwuLm5ra1NSUiKqSJWCAv/Cd2RyxSBHAhBCmpqaWVlZXb1rYGDw2eXcAAAAegESrCGivb39999/Ly4uxnuSk5MXLVqEDfwOptpaRR2dws79HI7yIEcCEEL29vbv3r3rak0lbNoKAAADBPYiHCKw7QV5e1gs1s2bNzuXURhoxcWzO3e2t4uOHv31IEcCEELKysorVqzoalaEqKjoIMcDAADDBCRYxGOxWPi07l578+ZN504Gg5Gfn9/HK3+p2bO3JSVN5+1hsymxsbuMjWGDKmKoqanNns0n60VdT78AAADQR/CIkEjv3r178OBBZWUll8vF9lS2srLqXTWmrnaAGvydoWg02uTJcXfuBFGpydLSVbW1+ioqnq6uDoMcBuBlZGSkp6dXUFDA26mmpmZpaUlUSAAAMLRBgkWY3Nzca9eu4TWKsD2V29ra7OzsenE1KSmphoaGzv2EbJlHoVCcndcjtH7wbw34IpFInp6eycnJ2dnZNTU1cnJyY8eO/eqrr6AeIAAADBBIsAgTHx/fuQLko0ePJk6ciFXI/SImJibJyckdOsXExOAZEMBQKBQ7O7vepe8AAAC+FMzBIkZbW1t1dXXnfjabXVpa2osL2tnZddiNmEqlurq69iJXAwAAAEAfwQgWMTgcTi/e6oaoqLgLrcgAACAASURBVOiKFSsyMjKKi4tbW1uVlZXNzc1hgzkAAACAEDCCRQwJCQkpKSm+b/V6pzwymWxmZubs7Lx48eKpU6dCdgUAAKAfsdnsW7duvXz5svvD1q1b98cff3Tur6ys9PDwKCoqGpjoBA4kWMQgkUhTpkzp3G9mZgZbDQAAAPgiRUUoKwt1UVG437S1tbm5uR0/frz7w6KionJycjr3NzU1hYWF1dXVDUx0AgcSLMJYW1tPnToV28MYIUQikczNzefNm0dsVAAAAITIlStITQ3p6CBjYyQjg/z8UJ/rKnZJTEzs0qVL3t7eA3WDoQXmYBHJzs5u0qRJlZWVTCZTVVUV240cAAAA6InISLR0KcLXozMYKCAA1dejc+c+f254eLiVlRWDwYiKiqqoqJgxYwbv1u9lZWURERE5OTm6urrLly9XVlZGCFGpVBqNJisrix3z7Nmz6OhohNDChQtpNFpOTo6Liwt+hfT09KioqJqamunTp3cYO7h9+/bDhw8VFBRcXV2NjIzw/pycnKioqJKSEkNDQ09PT/xG4eHhEydOrKmpuXLlyrZt22RlZcPDw1NTU7HF0bw3FSgwgkUwUVFRTU1NPT09yK4AAAB8kd27UadqP+j8efTvosL8+fr6njhxYv78+bm5uQ8ePJg3b15wcDD2VkpKipmZ2cmTJz9+/Hj8+HELC4uKigr8rHv37iGEzp07Z2Nj8+eff6alpU2bNm3Hjh3/+9//8Ivfu3dvwYIF79+/T0xMdHZ2Pn36NP7Wjh07NmzY8P79+5CQEEtLyzt37mD9N2/eHD9+/MWLF8vLy/fu3Wtubo7XRvb19Q0MDJw8efL169cZDIarq+uqVauys7PpdLqbm9v27dt79c0bcJBgAQAAAMKHwUAZGXz6uVz0/HmPrnDhwoVHjx6dO3cuPT3d1NQ0IiICIcThcLy9vW1tbd+8eRMWFpadna2lpeXh4cF7Ym1tra+vr7+/P51Oj4qKio6OvnHjBu8Bz549o9PpZ8+eTUtLs7Ozi4yMxN8qLi7OzMy8cePG27dvra2tN2zY0N7e3traunbtWldX1/T09PDw8MzMTCqVum3bNvysc+fOYVvuNjY2RkdHh4aGxsXF0en0NWvWXLt27Qu+a4MIEiwAAABA+JBIqKud1Xq449qSJUtUVVWx9vTp05ubmxFCmZmZGRkZO3fuxKYIS0pKbtmy5eHDhzU1NfiJcXFxLS0tP/74I/Zy0qRJDg4OvFf29PTEnioihMzNzbErYzZv3oxtMSIuLr5r166ioqKsrKwnT55UVFTs2LGDTCYjhBQUFNatWxcVFYWX4/b09Jw4cSJCCNtNLjU1FbtmcHDw4G+520OQYAEAAADCh0ZDpqZ8+kkkZGXVoytoa2vjbSyzQQjl5eUhhJydnTX+5uPjgxCqrKzED87Pz1dTU+Pdim3s2LFdXbnDBrsGBgZ4e9y4cQihwsLCgoICMpk8ZswY/C1DQ0MGg1FeXo691NPTw2+0ffv248ePq6qqzpgx4+jRowK7LBEmuQMAAABCac8etGBBx2lYq1cjXd0enY4nVbzExcURQqGhoQoKCrz9Ojo6eFtMTKy1tZX33ba2ts9eGcM7mtXY2IgQkpeX//TpE4fDYTAYeIVI7PpYMAghERER/Kx9+/Zt3LgxNjY2ISFh165dAQEB6enpAlj6EUawAAAAAKHk6oquXEHq6n+9FBdHW7agkyf7dE1DQ0OEEJPJtPpbfX19bGws70osIyOjmpqa7Oxs7CWXy01NTe3h9R88eIC37969S6FQDA0NsWGtR48e4W8lJiaqqKh0TpvodLq/v7+8vPyKFSsuXLiQlJRUUlJCp9N79bUOrP4ZwcrPz3/37t348eO7qULOZrMzMjI+fPigo6NjbGyMp7ft7e21tbW8R0pISECxTQAAAOCzPDyQhwcqKkKtrWjUKMQz0NNLOjo6Cxcu3Lx5s6Ki4rhx4+h0uru7+6JFi3iPcXJyMjAwWLVq1cWLF5WUlA4ePPj+/XssM/usoKCg8ePHz549+9GjR9u3b1+2bJmysrKysrKDg8OmTZuUlJSMjY1v3rx5+vTp3bt3dz6dyWQePnxYTk5u9erVbDb79u3bZDKZ97Gj4OhrgvXp0yd3d/fbt2+LiYkxGIwdO3b89NNPnQ8rLCxcuHDhq1ev5OTk6uvrJ06cGBYWho033rp1a/HixbwHr169+syZM30MjHBcLjczMzM7O7uurk5WVnbMmDHjx4/nfRTN5XJfvXqVnZ1dX18vKys7duxYS0vLboZVAQAAAL54pjz1g5CQkFWrVtnY2CCESCTS4sWLAwMDeQ+gUCiRkZFubm7YrClnZ+edO3cmJCT05OLnzp1bvXp1Q0MDQsjV1RWvC3/58uWlS5diM9nJZPKmTZv8/Pw6n25vb+/n57dr1y6sOoOcnFxQUJC+vn6fvuCB0dcEa8+ePQkJCXQ63dra+sKFC6tXr7ayspo/f36Hw9auXVtfX5+Xl6evr5+dnT1v3rxly5YlJSUhhPLy8jQ1NU+dOoUfrKWl1ceoBMHt27fT0tKwdkVFRU5OTm5u7uLFi7Eci8vlRkREZGVlYQd8/PgRGwX85ptvSD1c/gEAAAD0VklJCe/LI0eO4G0FBQWsRmhBQYG2tja+HhA/i8Ph6OrqZmVl5ebmysrKjhw5ctOmTfiCxA5XxpOzUaNGYasCXV1d37x5M3LkSCUlJfwwdXX1hISEysrKkpISAwMDSUnJrkL95Zdftm3bVlhYSKPRRo8eLSYm1rfvxEDpU4LFZrPPnz/v7e2N5bkrV668cOHCuXPnOiRYbW1t9+7dO3HiBJZjGhoa7ty5c+XKlTU1NQoKCnl5eWZmZkNsi5j8/Hw8u8Ll5OSkp6ebmZkhhHJzc/HsCvfu3buMjAxsVQUAAABAIAUFhQ7z3HGlpaVaWlonTpxYt24dQqi8vPz333//7B6FOAqF0tVvOhUVlW7mGvUkNsHRpwdSRUVF5eXljo6OeI+jo2PnuWaNjY1r1qzhPaylpQUhxGKxEELv3r0bM2bM3bt3f/3115iYmA4rEYQU330ueftzc3O/6EQAAABAQGhqam7ZssXX19fW1tbNzc3Y2NjMzMzd3Z3ouARLn0awsNr5vMmmqqpqTU0Ni8XC9zDGDsAL8COESkpKjh8/PmXKFOzEvLy8ly9fnj17Vk1NLS8vT0tL686dOx3mytXW1naYCN/W1kaj0bAUTXBg8bBYrA7rV3Hl5eXYMV0d0NbWJmhflLBj/Y3oQMA/4CciaLhcLvxQCMfhcIRoGu7hw4c9PDzodDqbzfb29p41a9YQmN/C4XC6+r+gFz+dPiVY9fX1CCHeUmPS0tJcLreuro73wSqva9eu+fn5iYuLX758GSHEYDDk5OTc3d2PHj1KoVAKCwsdHR1Xr17dYRjs8uXLHcYeR48ebWRkVFVV1Zf4+11TUxNCCKvnwfeA5ubmyspKEokkKirK9wAxMTFB+6KEXU1NTWtrK15MBQgCrCQ0759hgFhcLrempoZGoxEdyLDW0tLC+/tU8FlaWlpaWhIdRX9qaWnp6ldwL346X/YB9+DBg5kzZ2LtrVu3YltYY1kFpqGhgUQiycnJdT43Pz9/9erVT548Wb9+/e7du7FiYmJiYnghDYSQrq7u1q1bfXx86urqeKtfbNy4cePGjbxXw1ZvqqmpfVH8Aw1bFiErK6urq8v7deFYLJacnJykpKStrW1mZmbnPMzW1lbQvihhJyIiIikpCXtpCxplZWVIsAQHl8vlcDjw4UMs4cquhiRpaemu/i/oxU/nyz7grK2t8bnbSkpKDAYD/f2gEFNRUaGkpCTSqRDHq1evHBwcbGxssrOzdbstMYu9+/HjRwGsytpz8vLyXb2F/VJRVlZ2cXGJjo7GRyPJZPLs2bPV8YJxAAAAABBaX5ZgSUpKmpiY4C+5XK6urm58fPzs2bOxnvj4eDs7uw5ncTgcd3d3R0fHGzdudHhGGx8fv2zZsjt37lj9vXNSenq6mJgYvuuQkFJXV6dSqZ0f5Y4cORIfhDczM9PV1X337l1dXZ2cnNyoUaP4jvwBAAAAQOj0aYieRCJ5eXnt27fPzc1t0qRJoaGhycnJ8fHx2LunT59+8ODBhQsXUlNT8/LyXFxczp49y3v6kiVLvvrqKyqV6uPjc/To0fHjxyckJPz888+bN2+mUCh9CYxwYmJiDg4O+LcCQyaT8QesGBkZmSH2ABsAAAAAqO+FRv39/d+/f29vb0+hUMhk8smTJ6dNm4a99fTp07CwsDNnzrx9+xYhdPTo0Q7nzps3T1VV9fbt20uWLLG3t0cIkcnkjRs38q2OL3RsbW0lJSVTUlI+fvxIJpPV1dWnTZs2NGqoAgAAAKB7fU2wyGRycHDwoUOH8vPzjYyMeBehnDlzBtvxxsvLy8vLq6srjB8/PjMzMy8vr6mpqUPxVmFnbm5ubm7OZrPJZPIQWL8KAAAAgB7qn5IbMjIy48eP7/USXzKZPGbMGEtLy6GUXeEoFApkVwAAAAQHm82+devWy5cv+/30+Pj4boZUhhWhqWkGAAAAAL4aihqqs6o5TP4lGDtra2tzc3Pr+eY2PT89Ly8vMjKyd5cdYqAODQAAACCsMq5k3Ntyr7m8GSFEFaNafW81/cB0Cu0zC8XExMQuXbqEbRDcC308fZiABAuAHmGz2TU1NRwOR0lJSdhXuQIAhobsyOzIpZGI+9dLFoP1JOAJo54x/9z87k+kUqk0Gk1WVhZ7GR4ebmVlxWAwoqKiKioqZsyYMXfuXOwtNpsdHh6emppKoVDs7OywAuMdTn/8+HFcXByJRJozZ06HG5WVlUVEROTk5Ojq6i5fvlxZWbmfvnQhAI8IAfgMLpf7+PHjQ4cOBQUFhYSEHDx48MGDB1wu9/NnAgDAQErcnYg6fRSlnU+rK6j77Lm+vr737t3D2ydOnJg/f35ubu6DBw/mzZuH7SDM5XJdXV1XrVqVnZ1Np9Pd3Ny2b9/e4fSDBw/a2dnFxsampaU5OTldvXoVv0VKSoqZmdnJkyc/fvx4/PhxCwsL3srkQx4kWAB8Bp1Ov3//fnt7O/aSyWQmJSU9ePCA2KgAAMMci8GqyuC3cR4XlT0v+9KrXbhw4dGjR+fOnUtPTzc1NY2IiEAI5eTkREdHh4aGxsXF0en0NWvWXLt2jfeswsLCnTt3+vn5vXz58s6dO8nJyU+fPsXe4nA43t7etra2b968CQsLy87O1tLS8vDw6M2XKpwgwQKgOxwO5/Hjx537U1JS8JQLAAAGH4lEQl2sUO/F0vUlS5aoqqpi7enTpzc3N+PXSU1NxV4GBwfn5+fznhUTE0OhUPDqlSYmJngKlZmZmZGRsXPnTmyDOElJyS1btjx8+BDb6304gAQLCBkGgzGYj+fq6uqwPTc7YLPZXW26DgAAg4BCo6iYqvB5g4TUrL54325tbW28TSb/lRuMHTt2+/btx48fV1VVnTFjxtGjR+vq/vXwsbCwUEdHR0pKCu8xNjbGGnl5eQghZ2dnjb/5+PgghCorK780NiEFk9yBcGAymffv38/IyGhtbRUREdHV1Z01a1Y3m2r3l27+EITyZgAAYjnscQhbENZhGpbFags53S/e2RZPqjrYt2/fxo0bY2NjExISdu3aFRAQkJ6ePmLECOxdGRkZbHAL19raijXExcURQqGhoQoKCrwH6OjofGlsQgpGsIAQ4HK5V69eTU1Nxf7XZTKZubm5586da2pqGuhbjxgxgm/9WxERkWG1HAYAIIAMXA0WXlkorS6NvaSKUydvmTznZMelfL1Gp9P9/f3l5eVXrFhx4cKFpKSkkpISOp2OH2BoaFhaWpqdnY33JCQk4G8hhJhMptXf6uvrY2NjJSQk+is8AQcjWEAIvHv3rrCwsENnS0tLcnKyk5PTgN6aRCJNnTo1Ojq6Q7+9vb2IiMiA3hoAAD7LxMPExMOkoaiB2cqUHyVPFunPcRMmk3n48GE5ObnVq1ez2ezbt2+TyWQDAwP8AFdXVy0trSVLloSGhmppaZ06dYpOp0tLSyOEdHR0Fi5cuHnzZkVFxXHjxtHpdHd390WLFvVjeAIORrCAECguLubbX1RUNAh3t7S0dHZ2xj4yEEKSkpKzZs2aPHnyINwaAAB6QlZbVtFQsX+zK4SQvb29n5/frl27VFVV1dXVAwICgoKCeOuLioqK3r59u7m52dzcXF5e/vLly0FBQfi7ISEhxsbGNjY2kpKSM2fOnDVrVmBgYP9GKMhgBAsIAQ6H//4PbDZ7cAKwsLCwsLBoamricDh4bT0AABBeJSUlfNsIoSNHjuDtX375Zdu2bYWFhTQabfTo0WJiYh1OGTduXG5ubn5+PpvNHjNmDEJo1apV2FsKCgpRUVE1NTUFBQXa2trDbVoFJFhACHT1vyW+qHhw4INYAAAwfCgoKHSYqN5ZN9vm9OT0IQkeEQIhYGxsLCfXcVEMhUKxsbEhJB4AAACge5BgASEgIiKybNkyNbV/KrtISUm5u7uPHDmSwKgAAACArsAjQiAc5OXl16xZU1NTU1NTIy0traysDDsuAwAAEFiQYAFhMmyf5QMAABAu8IgQAAAAAKCfQYIFAAAAANDPIMECAAAAAOhnMAdLsHC53Orq6traWikpKRUVFdiMBQAAABBGkGAJkNra2vDw8IqKCuyluLj43LlzjY2NiY0KAAAAAF8KHhEKChaLdenSJTy7Qgi1tbXduHHjw4cPBEYFAAAA9Bybzb5169bLly/5vhsfH+/l5TXIIREFEixBkZWVVV9f36GTy+WmpKQQEg8AAADhUYRQFkLMgb5NYGDggQMHujmgra3Nzc3t+PHjfN/Ny8uLjIwcmNAEDjwi5KOlpSU9Pb2qqkpMTExTU9PQ0JBEIg30Tauqqvj2V1ZWDvStAQAACK0rCG1BqBwhhJAYQt8jdAAh2gDd7Pnz583Nzd0cICYmdunSpW62Jhw+IMHqqLi4+OrVqwwGA3v55MkTbW1tT09PUVHRAb0vlcr/ZwH1ygEAAHQhEqGlCHH/fslAKACheoTO9eTknJycqKiokpISQ0NDT09PWVlZrP/SpUu2trZ6enrYy8ePH3/69MnR0TEmJqagoIDBYJw/f97T05NCoYSHh6emplIoFDs7OxcXF4QQlUql0Wj4pbDT4+LiSCTSnDlzOgRQVlYWERGRk5Ojq6u7fPlyZWXlPn0zBAw8IvwXFosVERGBZ1eYoqKixMTEgb61trY2334dHZ2BuB2LxcrKykpISHj8+PH79++5XO7nzwEAACBYdvNkV7jzCBV89sybN2+OHz/+4sWL5eXle/fuNTc3Lyj466w1a9bQ6XT8yDNnzgQEBCCEEhISiouLsayovb3d1dV11apV2dnZdDrdzc1t+/bt2PG+vr737t3D2gcPHrSzs4uNjU1LS3Nycrp69Sp+2ZSUFDMzs5MnT378+PH48eMWFha8s5CHABjB+peioqKmpqbO/enp6TNnzhzQW+vq6o4ZMyY3N5e3U1JS8quvvur3e9XW1l6+fLmurg7vGT16tLu7e1ejaAAAAAQPA6EMfv1chJ4jpNfNma2trWvXrnV1db18+TKZTK6pqZk0adK2bduuXbvWzVlHjhypqKhobm6+devW27dvo6Ojr127tnjxYoSQj4/PtWvX9u/fz3t8YWHhzp07/fz8jhw5ghDKzMycMGGCpKQkQojD4Xh7e9va2kZERFCp1JaWlhkzZnh4eAzCcMaggRGsf2lsbOTb39LSwmazB/ruixYt+v/27jwmirOPA/gzy7HgAuWUswjIJVilRcEDFUurNPGAknpHbalVWmORUI0FoiY1bSWNtUmL0lixojGamKp4tKKARKpoEbFC5Fosxy5ys7Isx+68f+zriruAs8vsAfv9/MU8OzP7k2dHvjvPMzMRERHW1taEEC6XGxgYGB8fL19kEU3TZ8+eHZquCCFVVVU3btxg940AAECbKEJGmh/8mnnDd+7cEQqFqampHA6HEOLg4PDFF19cuHCB+WiGfGry3bt35VOyjhw5UlNTo7TO5cuXTUxM9u3bJ1+cPn36mjVr5D//+++/jx49SktLk3+x5/F4ycnJBQUFbW1tDAswfDhj8Qp5slZlaWmpg7lQpqamUVFRUVFR/f392pvyJRAIhp04X1paumTJEh1M59dMS0tLdXV1V1eXvb29v7+/ra2tvisCANAvLiFvEVKm0k4RMmv0LWtrazkcjr+/v6Jl2rRpEolEIBC4ubkxee+AgICUlJTvvvsuMzNz7ty5H3zwwccff2xnZzd0HT6f7+XlZWVlpWgJDg6+dOkSIaS6upoQsnz5cnnCI4T09/cTQpqbmx0cHJgUYPgQsF7h5eVlaWnZ29ur1B4UFKTLMrQ6oV71ZhByEomkt7d30qRJ2ntrjRUWFubl5Sm+Wv3111/Lly+fOXOmfqsCANC3/YR8qDINK54Q79E3s7KykslkEolEkX7EYjEhxNLSUnVl+Uuqvvnmmx07dly5ciUvL2/v3r2HDh0qKysbmrFsbGyULjlU7Er+RsePH1eKU1qadqwXGCJ8hbm5+cqVK5WmIk2ePDkqKkpfJbFu2OOHEMLhcLhcbV3ZOxa1tbU3b94ceuJaKpVevHixtbVVj1UBABiAGEJOE+L+YtGSkGRCfn7tZoGBgYSQW7duKVry8/OdnZ3l8YiiqK6uLnk7TdMPHjxQ3UNRUdGuXbvs7e03b9584sSJwsLChoaGoVPjCSHTpk1rbGysqKhQtOTl5SleIoQMDAzMeqGzs/PKlSuG+SVfMwhYygICArZu3Tpv3rypU6cGBQVFR0d/+umnI4WS8ejNN98c9p8zdepUw7wlRGlpqWqjTCYrK1M9MQ4AYGzWENJASB0h5YR0EZJOyOvHQEJCQiIjI7/88st79+6JxeJTp05lZmbu3LlT/qqHh8exY8f4fH5fX19qampTU5NiQxMTk6amppaWlv7+/vT09IMHDzY3Nzc1NV28eJHD4chzm0JMTIynp+f69esfPnzY0dFx4MABRQLz8vKKi4tLTEy8c+eOWCzOzc1dtWpVY2Mja78VA4CANQxHR8f3339/w4YNH330UXh4+AR74rKpqemyZcsUw95yPB4vOjpaXyWNTvFFSslIY50AAMZnCiHTCFHjr1V2draHh0dYWBiPx9u4cWNCQkJSUpL8pfT09Orqah8fHxsbGz6fn5iYqNgqNjb28ePHkydPDgkJSUpK2rt3r4uLi7u7+6FDhzIyMpTuL2pubn7x4sXnz5+HhITY29tnZ2dnZGQoXj169GhwcPDcuXN5PN6SJUuWLl36448/ju2XYFgwB8sYBQUF2draFhcXC4VCc3NzDw+PefPmDZ2HaFBGOn04kc4kAwDomLu7e15eXnNzc0NDQ2Bg4NBrvGJiYhoaGiorK318fOzt7YduFRMT097eLhaLbW1tf/jhh6+//prP53O5XD8/PwsLC/k6DQ0NivVnzJhRWVlZU1MjlUrlc+o/+eQT+UsODg4XLlxoa2urra2dMmXKBLvLKEHAMlpubm4xMTH6roKRgICAJ0+eDNuu+2IAACYSZ2dnZ2dn1XYbG5tZs4a/FJHL5Som7Do4ODC56G+UJ+cw3MN4hCFCMHQzZ8708/NTapw1a5a392sukwEAANAXnMECNUilUg6Ho+N7ZXE4nLVr15aUlFRVVXV2djo4OAQFBQUHB+uyBgAAALUgYMHr0TRdUlJy9+7dtrY2ExMTDw+Pd99918PDQ2cFUBQVGhoaGhqqs3cEAAAYCwwRwusVFBTk5OS0tLTIZLKBgQE+n5+VlVVfX6/vugAAAAwUAha8Rk9PT2FhoVKjVCrNzc3VSz0AAACGD0OERkEkEgmFQpqmXVxcbGxs1Nq2vr5eJpMN2y6VSg3z3qQAAAD6hYA1wclkssuXLz948ED+qBmKombOnLls2TLmwUgqlQ7bTtM0AhYAAMCwMEQ4wV2/fr2kpETxID+apktLS69du8Z8D05OTsO229raavWh1AAAAOMXApauDQ4ONjU1PXnypK2tTdvvNTAwcP/+fdX2kpKSvr4+hjuZPHmyr6+vavu8efPGVBwAAMDEhSFCnaqoqMjJyRGLxfJFd3f3uLg4+dPLtaGjo2NwcFC1XSaTtba2uru7q740rLi4uPPnz1dVVckXTUxMFixYMHv2bNYKBQAAAyCVSi9duuTp6fnOO+/ou5ZxDwFLd+rr68+dO6cYrSOENDY2ZmdnJyQkmJpqpSNGmSCl1jtaWFisW7euo6NDKBRyuVxXV9eRng8IAAC69/Tp056eHj8/PzMzNZ73rKq3tzc2NnbTpk1ZWVkslWa8MESoO3///ffQdCXX3t5eUVGhpXe0t7e3trZWbefxeCPNrBqFnZ3dtGnTfHx8kK4AAAzE6dOn3dzcvLy8goODbWxskpKSmM8AUWVhYXHy5MmtW7eyWKHRQsDSnWfPng3b3tzcrPE+pVJpXV3dP//88+TJE8XIowJFUUuXLlXdasmSJRwOuh4AYHw7f/78hg0bBAKBfFEikRw6dCghIeG1Gz5+/PjEiRP9/f3yRZFIlJWVVVtba2pqyuVy33jjDS0WbTQwRKg7Iw3YaXynA4FAcPbs2c7OTvmiqalpdHS00vNkgoODzc3NCwoKhEIhIcTZ2XnhwoUBAQGavSMAABiOffv2qQ6MZGVlpaam+vj4jLKhs7Pz7t27KysrDxw4QAhJTk4uLCxcvXo1IWTnzp3JyclBQUHaK9tIIGDpjqen57AnsaZMmaLB3vr6+k6dOtXT06NoGRwczMnJsbOzUzqu/Pz8/Pz85DcL1dKJq87OzuLiYqFQSFGUm5tbWFjYsEOTAADAFolE8ujRI9V2mqbv378/esBydHTMzMyMi4uLjY3t7OzMysoqKirC9A92IWDpzoIFbLvpcgAACi9JREFUC8rLy5UG8vz8/EY/DEZSXl4+NF0pFBcXD7tD7Y0J1tfXZ2dnK04119bWlpSUbNq0afLkyVp6RwAAoCiKoijVM1jyl167+YoVK9avX79p06aenp60tDSl0Q8YO0zE0R0bG5v4+Hh/f3/5VR5WVlbz589ftWqVZntrbW0dtr2lpUXzEtVH0/SFCxcU6UpOLBbn5OTosgwAAGPD5XLfeust1XaKombNmsVkD4cPHxYIBFwud8+ePWxXBziDpVv29vZr166labq/v5/L5Y5lVyNdi6vju6u3trYOe8fU+vr6np4eHo+ny2IAAIzK/v37P/zwQ6WTWPHx8d7e3kw2z8vL6+np6erqunv3Lu4dzTqcwdIDiqLGmK4IISMNLGo24Kgx1UsXmbwEAABjFxMTc/r0acVdoy0tLZOTk3/++Wcm2z579uyzzz77/vvv169fv3nz5t7eXm1WaowQsMYrT09P1ZPDdnZ2ERERuixjpKt5KYrCPHcAAG1bs2ZNQ0NDXV1deXl5V1dXeno6w3GMLVu2+Pv779ix4/DhwyKRKCUlRdulGhsMEY5jsbGxbm5uZWVlLS0t1tbWU6dOXbx4sY4vA7G1tfX09Pzvv/+U2gMDAy0sLHRZCQCA0VL3avRjx45dv3794cOHHA7Hzs4uIyMjLi4uLi5u/vz5WqrQCCFgjWMURc2ZM2fOnDn6LSM2NvbkyZPt7e2KFhcXl2XLlumxJAAAGEV8fHx8fLxiMSYmRiqVyn9uaGjQU1ETDQIWjJWtrW1CQkJ5eblAIOBwOG5uboGBgRrfPRUAAGACQMACFpiams6YMWPGjBn6LgQAAMAgYJI7AAAAAMsQsAAAAABYhoAFAAAAwDIELAAAAACWIWABAAAAsAwBCwAAAIBlCFgAAAAALEPAAgAAAGAZbjQKAABgEPLz8/ft26fvKoxUfn5+ZGQkiztEwAIAANA/pb/uUqm0t7fXyspKT+UYncjISAQsAACAiUbpD3x/f397e7uLi4v+KoIxwRwsAAAAAJYhYAEAAACwDAELAAAAgGUIWAAAAAAsQ8ACAAAAYBkCFgAAAADLELAAAAAAWDZe74NVV1dXV1dnaHe87evrI4RwuVx9FwIvicViMzMzMzMzfRcCL4lEIh6Px+HgC56hoGlaJBLZ2NjouxB4CTcaNSj5+fleXl5qbTJe/4MLCQlR95+qAwKBQCgU6rsKeAWfz+/q6tJ3FfCK8vJy+bcRMBA0TZeWluq7CniFWCyurKzUdxXwf15eXiEhIWptQtE0raVqjFBaWpq5uXlaWpq+C4GXYmNjN27cGBsbq+9C4CVfX99r1675+vrquxD4P4lEYmtrK5FI9F0IvHTv3r3PP//83r17+i4ENDRez2ABAAAAGCwELAAAAACWIWABAAAAsAwBCwAAAIBlJoZ2p4NxjaIob2/vKVOm6LsQeImiqOnTpzs6Ouq7EHhFeHi4paWlvquAlzgczsKFC/VdBbxEUZSNjU1oaKi+CwEN4SpCAAAAAJZhiBAAAACAZQhYAAAAACxDwAIAAABgGQIWAAAAAMtwFeFY1dTUFBcXW1tbj/JITqlUWlZWdu/evf7+ficnJ4qidFmh8WDSF8xXA1bgADE06n7+y8rK+Hy+h4eHtgszZsw7pbu7+9atW21tba6urjhSDB0NmpJIJCtWrCCEWFhYEEJSU1OHXa22tvbtt98mhNja2hJCwsLC+Hy+biud+Bj2BcPVgBU4QAyNBp//pqYmJyen9evX66A846RWpxw8eJDD4XC5XEJIeHh4R0eHzuoEDSBgaW7Pnj3W1tZFRUVSqfS3336jKOqPP/5QXS06Otrb27u6upqm6fLych8fn4iICJ0XO8Ex7AuGqwErcIAYGnU//zKZ7L333iOEIGBpD/NOOXPmjJmZ2ZkzZwYHB4uKiqysrLZt26bjakEtCFgaGhwcdHV1TU5OVrQsWrRoxYoVSquJxWIOh/PLL78oWo4fP04IaW1t1VGhRoBhXzBcDViBA8TQaPD5//777319fadPn46ApSVqdcr8+fPj4+MVi8ePHx+6IRggTHLX0NOnTwUCQVRUlKIlKiqqqKhIabXu7u4tW7YMXa2np4cQMjg4qJs6jQHDvmC4GrACB4ihUffzf//+/f37958+fZrH4+mkQGPEvFPa2tpu3769cuVKQohMJiOEbN68OT09XWelggYQsDQkFAoJIc7OzooWFxeXtrY2pT8Mzs7OR44c8ff3ly82NDT89NNPERERQzeEMWLYFwxXA1bgADE0an3+nz9/vm7durS0tNmzZ+uuROPDvFMaGxsJISKRKCIiYtKkSa6url999ZVEItFltaAuBCwNdXZ2EkKsra0VLdbW1jRNd3R0jLTJmTNnwsLCBgcHs7OzdVGi0WDYFxp0GWgMB4ihUatHtm/f7uHhsWvXLt3VZ5SYd4o8im3fvn358uV//vnn7t27MzIyEhMTdVktqAsBi5GbN2+avpCSkkIIcXBwIISIRCLFOl1dXRRFya+EUlJTUxMZGbl58+Z169Y9fPgQT4NmF8O+UKvLYIxwgBga5j1y7ty5S5cu/f777xwO/kBoF/NOMTc3J4Ts2bNn9+7dixYtSkxMTElJ+fXXX3t7e3VZMKjFVN8FjA/h4eGlpaXyn52cnAghLi4u5MW3CjmhUOjk5GRmZqa07YMHDyIjI+fOnVtRUeHt7a2rko0Iw75g3mUwdjhADA3zHrl9+3ZHR4eXl5d8USqVFhcXnzlz5vz58/IbCgBbmHeKq6srISQsLEzREhoaKpPJnj59GhgYqJNiQW34gsIIj8eb/oJ8vNzT09Pb2zs3N1exTm5u7sKFC5U2lMlkq1atioqKunr1Kv54aAnDvmC4GrACB4ihYf7537Zt25UrV3JeCAwMXLx4cU5Ozpw5c3RYr1Fg3ine3t4ODg6PHj1StFRUVJiYmChyMBgifV7COM59++23PB6vsLBwYGAgMzOToqgbN27QNH306NHVq1dLJBKapgsKCgghSUlJv75KLBbru/wJZaS+oF/tjlFWA9bhADE0DA8TJeHh4bhNg/Yw75Rdu3Y5OjpeuXJFJBJdvXrVwcFh6F0bwAAhYGlOKpVu3bqVw+GYmZlxuVzFvXzi4+MJISKRiKbpo0ePDptrBQKBXmufaEbqC/rV7hhlNWAdDhBDw/AwUYKApVXMO6Wvr2/jxo2Kx+Ns2LChu7tbT1UDIxRN06yfFTMq3d3dNTU1QUFB8scXgB4x7At0mS7ht21o0CMGiHmndHd3V1VV+fj42NnZ6aY20BgCFgAAAADLMMkdAAAAgGUIWAAAAAAsQ8ACAAAAYBkCFgAAAADLELAAAAAAWIaABQAAAMAyBCwAAAAAliFgAQAAALAMAQsAAACAZQhYAAAAACxDwAIAAABgGQIWAAAAAMsQsAAAAABYhoAFAAAAwDIELAAAAACWIWABAAAAsAwBCwAAAIBl/wMR43ozzLItowAAAABJRU5ErkJggg=="
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "j = 5\n",
-    "α = 0.5\n",
-    "n = 100\n",
-    "X, V = rand(n), rand(n)\n",
-    "Y = X + α * V\n",
-    "data = StateSpaceSet(X, Y)\n",
-    "xᵢ, C, A, U, Σ, Vt, rs, neighborsᵢ, c, k_inside, ϵᵢ = \n",
-    "    anim_entropy(Shannon(), Lord(k = 20), data; j);\n",
-    "X1, X2 = columns(data)\n",
-    "pts = [Point2f(pt - xᵢ) for pt in data]\n",
-    "nns = [Point2f(nn - xᵢ) for nn in neighborsᵢ]\n",
-    "centered_xᵢ = Point2f(xᵢ - xᵢ)\n",
-    "\n",
-    "Λ = CausalityTools.hyperellipsoid_matrix(Vt, rs)\n",
-    "#rotated_nns = [inv(Vt) * pt for pt in nns]\n",
-    "inside = [transpose(pt) * Λ * pt <= 1 for pt in nns]\n",
-    "outside = [transpose(pt) * Λ * pt > 1 for pt in nns]\n",
-    "\n",
-    "let\t\n",
-    "    f = Figure()\n",
-    "    ax = Axis(f[1, 1])\n",
-    "\n",
-    "    # Plot the ellipse and its semi-axes, from the origin\n",
-    "    θ = atan(Vt[:, 1][2], Vt[:, 1][1]) # angle of the first direction relative to x-axis\n",
-    "    ellipse = getellipsepoints(centered_xᵢ[1], centered_xᵢ[2], rs[1], rs[2], θ) # everything is shifted\n",
-    "\tfirst_axis = [Point2f(centered_xᵢ), Point2f(centered_xᵢ + rs[1] * Vt[:, 1])]\n",
-    "    second_axis =  [Point2f(centered_xᵢ), Point2f(centered_xᵢ + rs[2] * Vt[:, 2])]\n",
-    "    lines!(ax, first_axis, linewidth = 2, color = :black)\n",
-    "    lines!(ax, second_axis, linewidth = 2, color = :grey)\n",
-    "    lines!(ax, ellipse[1], ellipse[2], color = :black, linewidth = 3, label = \"local ellipse\")\n",
-    "    \n",
-    "\n",
-    "    # # Shift all data to `xᵢ` (so that `xᵢ` is in the origin), and \n",
-    "    # # plot it.\n",
-    "    scatter!(ax, pts, label = \"data\",\n",
-    "        color = :grey)\n",
-    "    scatter!(ax, nns, label = \"neighbors\", \n",
-    "        color = :blue)\n",
-    "    scatter!(ax, nns[inside], label = \"inside\", \n",
-    "        color = :purple)\n",
-    "    scatter!(ax, nns[outside], label = \"outside\", \n",
-    "        color = :yellow)\n",
-    "    #scatter!(ax, data[:, 1], data[:, 2])\n",
-    "    #scatter!(ax, neighborsᵢ[:, 1], neighborsᵢ[:, 2])\n",
-    "    scatter!(ax, centered_xᵢ, color = :black, label = \"xi\")\n",
-    "\n",
-    "    axislegend(position = :rb)\n",
-    "    return f\n",
-    "end\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "ifamily3 (generic function with 1 method)"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "# Different data with known mutual information from Lord et al.\n",
-    "\n",
-    "function family1(α, n::Int)\n",
-    "    x = rand(n)\n",
-    "    v = rand(n)\n",
-    "    y = x + α * v\n",
-    "    return StateSpaceSet(x), StateSpaceSet(y)\n",
-    "end\n",
-    "\n",
-    "function family3(α, n::Int)\n",
-    "    Σ = [\n",
-    "       7 -5 -1 -3;\n",
-    "       -5 5 -1 3;\n",
-    "       -1 -1 3 -1;\n",
-    "       -3 3 -1 2+α]\n",
-    "    N4 = MvNormal(zeros(4), Σ)\n",
-    "    D4 = StateSpaceSet([rand(N4) for i = 1:n])\n",
-    "    X = D4[:, 1:2]\n",
-    "    Y = D4[:, 3:4]\n",
-    "    return X, Y\n",
-    "end\n",
-    "\n",
-    "# True mutual information values for these data\n",
-    "using LinearAlgebra\n",
-    "function ifamily1(α; base = ℯ)\n",
-    "    mi = -log(α) - α - log(2)\n",
-    "    return mi / log(ℯ, base)\n",
-    "end\n",
-    "\n",
-    "function ifamily3(α; base = ℯ)\n",
-    "    Σ = [\n",
-    "       7 -5 -1 -3;\n",
-    "       -5 5 -1 3;\n",
-    "       -1 -1 3 -1;\n",
-    "       -3 3 -1 2+α]\n",
-    "    Σx = Σ[1:2, 1:2]\n",
-    "    Σx = Σ[3:4, 3:4]\n",
-    "    mi = 0.5*log(det(Σx) * det(Σx) / det(Σ))\n",
-    "    return mi / log(ℯ, base)\n",
-    "end\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "plot_results (generic function with 1 method)"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "using LaTeXStrings\n",
-    "function new_cycle_theme()\n",
-    "    # https://nanx.me/ggsci/reference/pal_locuszoom.html\n",
-    "    my_colors = [\"#D43F3AFF\", \"#EEA236FF\", \"#5CB85CFF\", \"#46B8DAFF\",\n",
-    "        \"#357EBDFF\", \"#9632B8FF\", \"#B8B8B8FF\"]\n",
-    "    cycle = Cycle([:color, :linestyle, :marker], covary=true) # alltogether\n",
-    "    my_markers = [:circle, :rect, :utriangle, :dtriangle, :diamond,\n",
-    "        :pentagon, :cross, :xcross]\n",
-    "    my_linestyle = [nothing, :dash, :dot, :dashdot, :dashdotdot]\n",
-    "    return Theme(\n",
-    "        fontsize = 22, font=\"CMU Serif\",\n",
-    "        colormap = :linear_bmy_10_95_c78_n256,\n",
-    "        palette = (\n",
-    "            color = my_colors, \n",
-    "            marker = my_markers, \n",
-    "            linestyle = my_linestyle,\n",
-    "        ),\n",
-    "        Axis = (\n",
-    "            backgroundcolor= (:white, 0.2), \n",
-    "            xgridstyle = :dash, \n",
-    "            ygridstyle = :dash\n",
-    "        ),\n",
-    "        Lines = (\n",
-    "            cycle= cycle,\n",
-    "        ), \n",
-    "        ScatterLines = (\n",
-    "            cycle = cycle,\n",
-    "        ),\n",
-    "        Scatter = (\n",
-    "            cycle = cycle,\n",
-    "        ),\n",
-    "        Legend = (\n",
-    "            bgcolor = (:grey, 0.2), \n",
-    "            framecolor = (:white, 0.2),\n",
-    "            labelsize = 13,\n",
-    "        )\n",
-    "    )\n",
-    "end\n",
-    "\n",
-    "run(est; f::Function, # function that generates data\n",
-    "        base::Real = ℯ, \n",
-    "        nreps::Int = 10, \n",
-    "        αs = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1], \n",
-    "        n::Int = 500) =\n",
-    "    map(α -> mutualinfo(Shannon(; base), est, f(α, n)...), αs)\n",
-    "\n",
-    "function compute_results(f::Function; estimators, k = 5, k_lord = 20,\n",
-    "        n = 1000, base = ℯ, nreps = 3)\n",
-    "    as = 7:-1:0\n",
-    "    αs = [1/10^(a) for a in as]\n",
-    "    is = [zeros(length(αs)) for est in estimators]\n",
-    "    for (k, est) in enumerate(estimators)\n",
-    "        tmp = zeros(length(αs))\n",
-    "        for i = 1:nreps\n",
-    "            tmp .+= run(est; f = f, αs, base, n)\n",
-    "        end\n",
-    "        is[k] .= tmp ./ nreps\n",
-    "    end\n",
-    "\n",
-    "    return αs, as, is\n",
-    "end\n",
-    "\n",
-    "function plot_results(f::Function, ftrue::Function; \n",
-    "        base, estimators, k_lord, k, kwargs...)\n",
-    "    αs, as, is = compute_results(f; base, estimators, k_lord, k, kwargs...)\n",
-    "    set_theme!(new_cycle_theme())\n",
-    "\n",
-    "    ymin = floor(Int, minimum(Iterators.flatten(is)))\n",
-    "    ymax = ceil(Int, maximum(Iterators.flatten(is)))\n",
-    "    f = Figure()\n",
-    "    ax = Axis(f[1, 1],\n",
-    "        xlabel = \"α\", ylabel = \"I (nats)\",\n",
-    "        xscale = log10, aspect = 1,\n",
-    "        xticks = (αs, [latexstring(\"10^{$(-a)}\") for a in as]),\n",
-    "        yticks = (ymin:ymax)\n",
-    "        )\n",
-    "    xlims!(ax, (1e-7, 1e-0),)\n",
-    "    ylims!(ax, (ymin, ymax))\n",
-    "    lines!(ax, αs, [ftrue(α; base) for α in αs], \n",
-    "        label = \"I (true)\", linewidth = 4, color = :black)\n",
-    "    for (i, est) in enumerate(estimators)\n",
-    "        es = string(typeof(est).name.name)\n",
-    "        lbl = est isa Lord ? \"$es (k = $k_lord)\" : \"$es (k = $k)\"\n",
-    "        scatter!(ax, αs, is[i], label = lbl)\n",
-    "        lines!(ax, αs, is[i])\n",
-    "\n",
-    "    end\n",
-    "    axislegend()\n",
-    "    return f\n",
-    "end"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig = with_theme(new_cycle_theme()) do \n",
-    "\n",
-    "    k_lord = 20\n",
-    "    k = 5\n",
-    "    base = ℯ\n",
-    "\n",
-    "    estimators = [\n",
-    "        Lord(; k = k_lord),\n",
-    "        Kraskov(; k), \n",
-    "        KozachenkoLeonenko(),\n",
-    "        Zhu(; k), \n",
-    "        ZhuSingh(; k),\n",
-    "        GaoNaive(; k),\n",
-    "        GaoNaiveCorrected(; k),\n",
-    "        KSG1(; k), \n",
-    "        KSG2(; k),\n",
-    "    ]\n",
-    "    return plot_results(family1, ifamily1; \n",
-    "        k_lord = k_lord, k = k, \n",
-    "        estimators = estimators,\n",
-    "        base = base)\n",
-    "end"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(3.8085002797604375, 3.623143098568017, 5.159445273146853)"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "htrue = 0.5*(1 + log(2π*det([14 7; 7 12]))) \n",
-    "N2 = MvNormal([1, 4], [14 7; 7 12])\n",
-    "X = StateSpaceSet([rand(N2) for i = 1:100000])\n",
-    "h_krask = entropy(Shannon(; base = ℯ), Kraskov(k = 5), X)\n",
-    "h_lord = entropy(Shannon(; base = ℯ), Lord(k = 30), X)\n",
-    "htrue, h_krask, h_lord"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "3.8085002797604375"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Julia 1.8.0",
-   "language": "julia",
-   "name": "julia-1.8"
-  },
-  "language_info": {
-   "file_extension": ".jl",
-   "mimetype": "application/julia",
-   "name": "julia",
-   "version": "1.8.0"
-  },
-  "orig_nbformat": 4,
-  "vscode": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/src/methods/infomeasures/mutualinfo/mutualinfo.jl b/src/methods/infomeasures/mutualinfo/mutualinfo.jl
deleted file mode 100644
index c60923d0d..000000000
--- a/src/methods/infomeasures/mutualinfo/mutualinfo.jl
+++ /dev/null
@@ -1,209 +0,0 @@
-export MutualInformation
-export MutualInformationEstimator
-export MutualInformationDefinition
-export mutualinfo
-
-"""
-    MutualInformation <: AssociationMeasure
-
-The supertype of all mutual information measures. Concrete subtypes are
-
-- [`MIShannon`](@ref)
-- [`MITsallisFuruichi`](@ref)
-- [`MITsallisMartin`](@ref)
-- [`MIRenyiJizba`](@ref)
-- [`MIRenyiSarbu`](@ref)
-"""
-abstract type MutualInformation{E} <: InformationMeasure end
-
-"""
-    MutualInformationEstimator
-
-The supertype of all dedicated mutual information estimators.
-
-[`MutualInformationEstimator`](@ref)s can be either mixed, discrete or a combination of
-both. Each estimator uses a specialized technique to approximate relevant
-densities/integrals and/or probabilities, and is typically tailored to a specific
-type of [`MutualInformation`](@ref) (mostly [`MIShannon`](@ref)).
-"""
-abstract type MutualInformationEstimator <: InformationMeasureEstimator end
-
-# There are many ways of defining mutual information. Moreover, the definitions
-# differ for different types of base `EntropyDefinition`s. Therefore, we dispatch
-# on subtypes of `MutualInformationDefinition`.
-""" The supertype for mutual information definitions. """
-abstract type MutualInformationDefinition <: Definition end
-
-""" The supertype for all H3-type (three entropies) decomposition of mutual information. """
-abstract type MIH3 <: MutualInformationDefinition end
-
-#= """
-    mutualinfo(measure::MutualInformation, est::MutualInformationEstimator, x, y)
-    mutualinfo(measure::MutualInformation, est::DifferentialEntropyEstimator, x, y)
-    mutualinfo(measure::MutualInformation, est::ProbabilitiesEstimator, x, y)
-    mutualinfo(measure::MutualInformation, c::ContingencyMatrix)
-
-Estimate the mutual information `measure` (either [`MIShannon`](@ref) or
-[`MITsallis`](@ref), ) between `x` and `y` using the provided estimator `est`.
-Alternatively, compute mutual information from a pre-computed [`ContingencyMatrix`](@ref).
-
-Compatible measures/definitions and estimators are listed in the
-[online documentation](@ref mutualinfo_overview).
-""" =#
-mutualinfo(args...; kwargs...) = estimate(args...; kwargs...)
-
-include("MIShannon.jl")
-include("MITsallisFuruichi.jl")
-include("MITsallisMartin.jl")
-include("MIRenyiSarbu.jl")
-include("MIRenyiJizba.jl")
-
-include("estimators/estimators.jl")
-
-# Default to Shannon mutual information.
-
-"""
-    mutualinfo([measure::MutualInformation], m::ContingencyMatrix) → mi::Real
-
-Estimate the mutual information between `x` and `y`, the variables corresponding to
-the columns and rows of the 2-dimensional contingency matrix `m`, respectively.
-
-Estimates the discrete version of the given [`MutualInformation`](@ref) `measure` from
-its direct definition (double-sum), using the probabilities from a pre-computed
-[`ContingencyMatrix`](@ref). If `measure` is not given, then the default
-is `MIShannon()`.
-"""
-mutualinfo(c::ContingencyMatrix) = estimate(MIShannon(), c)
-
-"""
-    mutualinfo([measure::MutualInformation], est::ProbabilitiesEstimator, x, y) → mi::Real ∈ [0, a]
-
-Estimate the mutual information between `x` and `y` using the discrete version of the
-given `measure`, using the given [`ProbabilitiesEstimator`](@ref) `est` (which must accept
-multivariate data and have an implementation for [`marginal_encodings`](@ref)).
-See examples [here](@ref example_mi_ProbabilitiesEstimator).
-If `measure` is not given, then the default is `MIShannon()`.
-
-## Estimators
-
-The mutual information is computed as sum of three entropy terms, without any bias correction.
-The exception is when using [`Contingency`](@ref); then the mutual information
-is computed using a [`ContingencyMatrix`](@ref).
-
-Joint and marginal probabilities are computed by jointly discretizing `x` and `y` using
-the approach given by `est` (using [`marginal_encodings`](@ref)), and obtaining marginal
-distributions from the joint distribution.
-
-| Estimator                    | Principle           | [`MIShannon`](@ref) | [`MITsallisFuruichi`](@ref) | [`MITsallisMartin`](@ref) | [`MIRenyiJizba`](@ref) | [`MIRenyiSarbu`](@ref) |
-| ---------------------------- | ------------------- | :-----------------: | :-------------------------: | :-----------------------: | :--------------------: | :--------------------: |
-| [`Contingency`](@ref)        | Contingency table   |         ✓          |             ✓              |            ✓             |           ✓           |           ✓            |
-| [`CountOccurrences`](@ref)   | Frequencies         |         ✓          |             ✓              |            ✓             |           ✓           |           ✖            |
-| [`ValueHistogram`](@ref)     | Binning (histogram) |         ✓          |             ✓              |            ✓             |           ✓           |           ✖            |
-| [`SymbolicPermutation`](@ref) | Ordinal patterns    |         ✓          |             ✓              |            ✓             |           ✓           |           ✖            |
-| [`Dispersion`](@ref)         | Dispersion patterns |         ✓          |             ✓              |            ✓             |           ✓           |           ✖            |
-"""
-function mutualinfo(measure::MutualInformation, est::ProbabilitiesEstimator, x, y)
-    return estimate(measure, est, x, y)
-end
-
-function estimate(est::ProbabilitiesEstimator, x, y)
-    estimate(MIShannon(), est, x, y)
-end
-
-"""
-    mutualinfo([measure::MutualInformation], est::DifferentialEntropyEstimator, x, y)
-
-Estimate the mutual information `measure` between `x` and `y` by a sum of three
-entropy terms, without any bias correction, using any [`DifferentialEntropyEstimator`](@ref)
-compatible with multivariate data. See examples
-[here](@ref example_mi_DifferentialEntropyEstimator). If `measure` is not given, then the
-default is `MIShannon()`.
-
-!!! note
-    [`DifferentialEntropyEstimator`](@ref)s have their own `base` field which is not
-    used here. Instead, this method creates a copy of `est` internally,
-    where `est.base` is replaced by `measure.e.base`. Therefore, use `measure` to
-    control the "unit" of the mutual information.
-
-## Estimators
-
-Some [`MutualInformation`](@ref) measures can be computed using a [`DifferentialEntropyEstimator`](@ref),
-provided it supports multivariate input data. These estimators compute mutual information as a sum of
-of entropy terms (with different dimensions), without any bias correction.
-
-| Estimator                        | Principle         | [`MIShannon`](@ref) | [`MITsallisFuruichi`](@ref) | [`MITsallisMartin`](@ref) | [`MIRenyiJizba`](@ref) | [`MIRenyiSurbu`](@ref) |
-| -------------------------------- | ----------------- | :-----------------: | :-------------------------: | :-----------------------: | :--------------------: | :--------------------: |
-| [`Kraskov`](@ref)                | Nearest neighbors |         ✓          |              x              |             x             |           x            |           x            |
-| [`Zhu`](@ref)                    | Nearest neighbors |         ✓          |              x              |             x             |           x            |           x            |
-| [`ZhuSingh`](@ref)               | Nearest neighbors |         ✓          |              x              |             x             |           x            |           x            |
-| [`Gao`](@ref)                    | Nearest neighbors |         ✓          |              x              |             x             |           x            |           x            |
-| [`Goria`](@ref)                  | Nearest neighbors |         ✓          |              x              |             x             |           x            |           x            |
-| [`Lord`](@ref)                   | Nearest neighbors |         ✓          |              x              |             x             |           x            |           x            |
-| [`LeonenkoProzantoSavani`](@ref) | Nearest neighbors |         ✓          |              x              |             x             |           x            |           x            |
-"""
-function mutualinfo(est::DifferentialEntropyEstimator, x, y)
-    return estimate(est, x, y)
-end
-
-# Internal method for compatibility with `independence`
-estimate(est::DifferentialEntropyEstimator, x, y) = estimate(MIShannon(), est, x, y)
-
-"""
-    mutualinfo([measure::MutualInformation], est::MutualInformationEstimator, x, y)
-
-Estimate the mutual information `measure` between `x` and `y` using the
-dedicated [`MutualInformationEstimator`](@ref) `est`.
-See examples [here](@ref example_mi_MutualInformationEstimator).
-If `measure` is not given, then the default is `MIShannon()`.
-
-## Estimators
-
-Dedicated [`MutualInformationEstimator`](@ref)s are either discrete, continuous,
-or a mixture of both. Typically, these estimators apply bias correction.
-
-| Estimator                      |    Type    | [`MIShannon`](@ref) |
-| ------------------------------ | :--------: | :-----------------: |
-| [`GaussanMI`](@ref)            | Parametric |         ✓          |
-| [`KSG1`](@ref)                 | Continuous |         ✓          |
-| [`KSG2`](@ref)                 | Continuous |         ✓          |
-| [`GaoKannanOhViswanath`](@ref) |   Mixed    |         ✓          |
-| [`GaoOhViswanath`](@ref)       | Continuous |         ✓          |
-"""
-function mutualinfo(measure::MIShannon, est::MutualInformationEstimator, x, y)
-    return estimate(MIShannon(measure.e), est, x, y)
-end
-
-# Internal method for compatibility with `independence`
-estimate(est::MutualInformationEstimator, x, y) = estimate(MIShannon(), est, x, y)
-
-# Generic 3H-formulation of mutual information.
-function marginal_entropies_mi3h(measure::MutualInformation, est, x, y)
-    e = measure.e
-    X = StateSpaceSet(x)
-    Y = StateSpaceSet(y)
-    XY = StateSpaceSet(X, Y)
-    HX = entropy(e, est, X)
-    HY = entropy(e, est, Y)
-    HXY = entropy(e, est, XY)
-    return HX, HY, HXY
-end
-
-# Override some definitions, because the estimator behaviour need to be adjusted
-# for multiple input variables.
-const WellDefinedMIShannonProbEsts{m, D} = Union{
-    SymbolicPermutation{m},
-    ValueHistogram{<:FixedRectangularBinning{D}},
-    ValueHistogram{<:RectangularBinning{T}},
-    Dispersion
-} where {m, D, T}
-
-function marginal_entropies_mi3h(measure::MutualInformation,
-        est::WellDefinedMIShannonProbEsts{m, D}, x, y) where {m, D}
-    eX, eY = marginal_encodings(est, x, y)
-    eXY = StateSpaceSet(eX, eY)
-    e = measure.e
-    HX = entropy(e, CountOccurrences(), eX)
-    HY = entropy(e, CountOccurrences(), eY)
-    HXY = entropy(e, CountOccurrences(), eXY)
-    return HX, HY, HXY
-end
diff --git a/src/methods/infomeasures/pmi.jl b/src/methods/infomeasures/pmi.jl
deleted file mode 100644
index 30ea951eb..000000000
--- a/src/methods/infomeasures/pmi.jl
+++ /dev/null
@@ -1,120 +0,0 @@
-export PMI
-export pmi
-
-"""
-    PMI <: AssociationMeasure
-    PMI(; base = 2)
-
-The partial mutual information (PMI) measure of association [Zhao2016](@cite).
-
-## Definition
-
-PMI is defined as for variables ``X``, ``Y`` and ``Z`` as
-
-```math
-PMI(X; Y | Z) = D(p(x, y, z) || p^{*}(x|z) p^{*}(y|z) p(z)),
-```
-
-where ``p(x, y, z)`` is the joint distribution for ``X``, ``Y`` and ``Z``, and
-``D(\\cdot, \\cdot)`` is the extended Kullback-Leibler divergence from
-``p(x, y, z)`` to ``p^{*}(x|z) p^{*}(y|z) p(z)``. See Zhao et al. (2016) for details.
-
-## Estimation
-
-PMI can be estimated using any [`ProbabilitiesEstimator`](@ref) that implements
-[`marginal_encodings`](@ref). This allows estimation of 3D contingency matrices, from
-which relevant probabilities for the PMI formula are extracted. See also [`pmi`](@ref).
-
-## Properties
-
-For the discrete case, the following identities hold in theory (when estimating PMI, they
-may not).
-
-- `PMI(X, Y, Z) >= CMI(X, Y, Z)` (where CMI is the Shannon CMI). Holds in theory, but
-    when estimating PMI, the identity may not hold.
-- `PMI(X, Y, Z) >= 0`. Holds both in theory and for estimation using
-    [`ProbabilitiesEstimator`](@ref)s.
-- `X ⫫ Y | Z => PMI(X, Y, Z) = CMI(X, Y, Z) = 0` (in theory, but not necessarily for
-    estimation).
-"""
-Base.@kwdef struct PMI <: AssociationMeasure
-    base::Real = 2
-end
-
-min_inputs_vars(::PMI) = 3
-max_inputs_vars(::PMI) = 3
-
-"""
-    pmi([measure::CMI], est::ProbabilitiesEstimator, x, y, z) → pmi_est::Real ∈ [0, a)
-
-Estimate the part mutual information ([`PMI`](@ref); [Zhao2016](@citet)).
-
-If `measure` is not given, then the default is `PMI(; base = 2)`.
-With a [`ProbabilitiesEstimator`](@ref), the returned `pmi_est` is guaranteed to be
-non-negative.
-
-## Estimators
-
-| Estimator                     | Principle           | [`PMI`](@ref) |
-| ----------------------------- | ------------------- | :-----------: |
-| [`CountOccurrences`](@ref)    | Frequencies         |          ✓    |
-| [`ValueHistogram`](@ref)      | Binning (histogram) |          ✓    |
-| [`SymbolicPermutation`](@ref) | Ordinal patterns    |          ✓    |
-| [`Dispersion`](@ref)          | Dispersion patterns |          ✓    |
-"""
-function pmi(measure::PMI, args...)
-    return estimate(measure, args...)
-end
-
-function pmi(args...)
-    return estimate(PMI(), args...)
-end
-
-function estimate(measure::PMI, est::Contingency{<:ProbabilitiesEstimator}, x, y, z)
-    return estimate(measure, contingency_matrix(est.est, x, y, z))
-end
-
-function estimate(measure::PMI, est::Contingency{<:Nothing}, x, y, z)
-    return estimate(measure, contingency_matrix(CountOccurrences(), x, y, z))
-end
-
-# We explicitly need to construct a contingency matrix, because unlike for e.g. CMI,
-# there's no obvious way to rewrite PMI in terms of sums of entropies.
-function estimate(measure::PMI, est::ProbabilitiesEstimator, x, y, z)
-    return estimate(measure, Contingency(est), x, y, z)
-end
-
-function estimate(
-        measure::PMI,
-        pxyz::ContingencyMatrix{T, 3}) where T
-
-    # The sums go over *states*, so these are what we iterate over.
-    dx, dy, dz = size(pxyz)
-    px = probabilities(pxyz, dims = [1])
-    py = probabilities(pxyz, dims = [2])
-    pz = probabilities(pxyz, dims = [3])
-    pyz = probabilities(pxyz, dims = [2, 3])
-    pxz = probabilities(pxyz, dims = [1, 3])
-
-    pmi = 0.0
-    logb = log_with_base(measure.base)
-    for i in 1:dx
-        for j in 1:dy
-            for k in 1:dz
-                pzₖ = pz[k]
-                pyzⱼₖ = pyz[j, k]
-                pxzᵢₖ = pxz[i, k]
-                pxyzᵢⱼₖ = pxyz[i, j, k]
-                sy = sum(pyzⱼₖ > 0 ? py[j] * (pxyz[i, j, k]/ pyz[j, k]) : 0 for j = 1:dy)
-                sx = sum(pxzᵢₖ > 0 ? px[i] * (pxyz[i, j, k] / pxz[i, k]) : 0 for i = 1:dx)
-
-                sxy = sy * sx
-                pxy_z = pxyzᵢⱼₖ / pzₖ
-                if sxy > 0 && pxy_z > 0
-                    pmi += pxyzᵢⱼₖ * logb(pxy_z / (sy * sx))
-                end
-            end
-        end
-    end
-    return pmi
-end
diff --git a/src/methods/infomeasures/predictive_asymmetry/PA.jl b/src/methods/infomeasures/predictive_asymmetry/PA.jl
index 57b953a60..69df322fa 100644
--- a/src/methods/infomeasures/predictive_asymmetry/PA.jl
+++ b/src/methods/infomeasures/predictive_asymmetry/PA.jl
@@ -37,8 +37,8 @@ of the other dedicated conditional mutual information estimators.
 
 | Estimator                        | Type                                            | Principle           | Pairwise | Conditional |
 | -------------------------------- | ----------------------------------------------- | ------------------- | :------: | :---------: |
-| [`CountOccurrences`](@ref)       | [`ProbabilitiesEstimator`](@ref)                | Frequencies         |    ✓    |     ✓      |
-| [`ValueHistogram`](@ref)         | [`ProbabilitiesEstimator`](@ref)                | Binning (histogram) |    ✓    |     ✓      |
+| [`UniqueElements`](@ref)         | [`ProbabilitiesEstimator`](@ref)                | Frequencies         |    ✓    |     ✓      |
+| [`ValueBinning`](@ref)           | [`ProbabilitiesEstimator`](@ref)                | Binning (histogram) |    ✓    |     ✓      |
 | [`Dispersion`](@ref)             | [`ProbabilitiesEstimator`](@ref)                | Dispersion patterns |    ✓    |     ✓      |
 | [`Kraskov`](@ref)                | [`DifferentialEntropyEstimator`](@ref)          | Nearest neighbors   |    ✓    |     ✓      |
 | [`Zhu`](@ref)                    | [`DifferentialEntropyEstimator`](@ref)          | Nearest neighbors   |    ✓    |     ✓      |
@@ -88,7 +88,7 @@ given. Returns the distribution of asymmetry values.
 The docstring for [`PA`](@ref) lists compatible estimators.
 """
 function asymmetry(args...; kwargs...)
-    return estimate(args...; kwargs...)
+    return association(args...; kwargs...)
 end
 
 const PA_ESTS = Union{
@@ -102,7 +102,7 @@ function asymmetry(est::PA_ESTS, args...)
     throw(ArgumentError("A valid measure must be provided as the second argument; do `PA()`."))
 end
 
-function estimate(measure::PA, x::AbstractVector...)
+function association(measure::PA, x::AbstractVector...)
     throw(ArgumentError("A valid estimator must be provided as the second argument, try for example `FPVP()`."))
 end
 
@@ -194,7 +194,7 @@ end
 as_vector(x::Union{AbstractVector{T}}) where T = vec(x)
 as_vector(x::AbstractStateSpaceSet{1, T}) where T = x[:, 1]
 
-function estimate(measure::PA, est::PA_ESTS, x, y)
+function association(measure::PA, est::PA_ESTS, x, y)
     X = as_vector(x)
     Y = as_vector(y)
     measure = redefine_lags(measure, x)
@@ -225,7 +225,7 @@ function estimate(measure::PA, est::PA_ESTS, x, y)
     return ΔA
 end
 
-function estimate(measure::PA, est::PA_ESTIMATORS, x, y, z)
+function association(measure::PA, est::PA_ESTIMATORS, x, y, z)
     measure = redefine_lags(measure, x, z)
     (; ηT, τS, τC) = measure
     X = as_vector(x)
diff --git a/src/methods/infomeasures/predictive_asymmetry/predictive_asymmetry.jl b/src/methods/infomeasures/predictive_asymmetry/predictive_asymmetry.jl
index 34aa6d12c..1df9b6ed7 100644
--- a/src/methods/infomeasures/predictive_asymmetry/predictive_asymmetry.jl
+++ b/src/methods/infomeasures/predictive_asymmetry/predictive_asymmetry.jl
@@ -36,7 +36,7 @@ const PA_ESTIMATORS = Union{
     TransferEntropyEstimator
     }
 
-function estimate(measure::PredictiveAsymmetry, est::PA_ESTIMATORS, source, target)
+function association(measure::PredictiveAsymmetry, est::PA_ESTIMATORS, source, target)
     (; ηs, normalize, f, base, dTf, dT, dS, dC, τT, τS, τC) = measure
 
     check_ηs(ηs)
@@ -66,7 +66,7 @@ function estimate(measure::PredictiveAsymmetry, est::PA_ESTIMATORS, source, targ
     return 𝔸s
 end
 
-function estimate(measure::PredictiveAsymmetry, est::PA_ESTIMATORS, source, target, cond)
+function association(measure::PredictiveAsymmetry, est::PA_ESTIMATORS, source, target, cond)
     (; ηs, normalize, f, base, dTf, dT, dS, dC, τT, τS, τC) = measure
 
     check_ηs(ηs)
diff --git a/src/methods/infomeasures/transferentropy/TERenyiJizba.jl b/src/methods/infomeasures/transferentropy/TERenyiJizba.jl
deleted file mode 100644
index be998621e..000000000
--- a/src/methods/infomeasures/transferentropy/TERenyiJizba.jl
+++ /dev/null
@@ -1,55 +0,0 @@
-export TERenyiJizba
-
-"""
-    TERenyiJizba() <: TransferEntropy
-
-The Rényi transfer entropy from [Jizba2012](@citet).
-
-## Usage
-
-- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise
-    and conditional dependence.
-- Use with [`transferentropy`](@ref) to compute the raw transfer entropy.
-
-## Description
-
-The transfer entropy from source ``S`` to target ``T``, potentially
-conditioned on ``C`` is defined as
-
-```math
-\\begin{align*}
-TE(S \\to T) &:= I_q^{R_J}(T^+; S^- | T^-) \\\\
-TE(S \\to T | C) &:= I_q^{R_J}(T^+; S^- | T^-, C^-),
-\\end{align*},
-```
-where ``I_q^{R_J}(T^+; S^- | T^-)`` is Jizba et al. (2012)'s definition of
-conditional mutual information ([`CMIRenyiJizba`](@ref)).
-The variables ``T^+``, ``T^-``,
-``S^-`` and ``C^-`` are described in the docstring for [`transferentropy`](@ref).
-
-## Compatible estimators
-
-Jizba's formulation of Renyi-type transfer entropy can currently be estimated using
-selected probabilities estimators and differential entropy estimators, which
-under the hood compute the transfer entropy as Jizba's formulation of Rényi conditional
-mutual information.
-
-| Estimator                        | Type                                   | Principle           | [`TERenyiJizba`](@ref) |
-| -------------------------------- | -------------------------------------- | ------------------- | :--------------------: |
-| [`CountOccurrences`](@ref)       | [`ProbabilitiesEstimator`](@ref)       | Frequencies         |           ✓           |
-| [`ValueHistogram`](@ref)         | [`ProbabilitiesEstimator`](@ref)       | Binning (histogram) |           ✓           |
-| [`LeonenkoProzantoSavani`](@ref) | [`DifferentialEntropyEstimator`](@ref) | Nearest neighbors   |           ✓           |
-"""
-struct TERenyiJizba{E <: Renyi, EMB} <: TransferEntropy{E, EMB}
-    e::E
-    embedding::EMB
-    function TERenyiJizba(; base = 2, q = 1.5, embedding::EMB = EmbeddingTE()) where EMB
-        e = Renyi(; base = base, q = q)
-        return new{typeof(e), EMB}(e, embedding)
-    end
-    function TERenyiJizba(e::E; embedding::EMB = EmbeddingTE()) where {E <: Renyi, EMB}
-        return new{E, EMB}(e, embedding)
-    end
-end
-
-max_inputs_vars(::TERenyiJizba) = 3
diff --git a/src/methods/infomeasures/transferentropy/TEShannon.jl b/src/methods/infomeasures/transferentropy/TEShannon.jl
deleted file mode 100644
index 2aa668769..000000000
--- a/src/methods/infomeasures/transferentropy/TEShannon.jl
+++ /dev/null
@@ -1,105 +0,0 @@
-using DelayEmbeddings: delay_f1nn
-export TEShannon
-
-"""
-    TEShannon <: TransferEntropy
-    TEShannon(; base = 2; embedding = EmbeddingTE()) <: TransferEntropy
-
-The Shannon-type transfer entropy measure.
-
-## Usage
-
-- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise
-    and conditional dependence.
-- Use with [`transferentropy`](@ref) to compute the raw transfer entropy.
-
-## Description
-
-The transfer entropy from source ``S`` to target ``T``, potentially
-conditioned on ``C`` is defined as
-
-```math
-\\begin{align*}
-TE(S \\to T) &:= I^S(T^+; S^- | T^-) \\\\
-TE(S \\to T | C) &:= I^S(T^+; S^- | T^-, C^-)
-\\end{align*}
-```
-
-where ``I(T^+; S^- | T^-)`` is the Shannon conditional mutual information
-([`CMIShannon`](@ref)). The variables ``T^+``, ``T^-``,
-``S^-`` and ``C^-`` are described in the docstring for [`transferentropy`](@ref).
-
-## Compatible estimators
-
-Shannon-type transfer entropy can be estimated using a range of different estimators,
-which all boil down to computing conditional mutual information, except for
-[`TransferEntropyEstimator`](@ref), which compute transfer entropy using some direct method.
-
-| Estimator                        | Type                                            | Principle           | [`TEShannon`](@ref) |
-| -------------------------------- | ----------------------------------------------- | ------------------- | :-----------------: |
-| [`CountOccurrences`](@ref)       | [`ProbabilitiesEstimator`](@ref)                | Frequencies         |         ✓          |
-| [`ValueHistogram`](@ref)         | [`ProbabilitiesEstimator`](@ref)                | Binning (histogram) |         ✓          |
-| [`SymbolicPermuation`](@ref)     | [`ProbabilitiesEstimator`](@ref)                | Ordinal patterns    |         ✓          |
-| [`Dispersion`](@ref)             | [`ProbabilitiesEstimator`](@ref)                | Dispersion patterns |         ✓          |
-| [`Kraskov`](@ref)                | [`DifferentialEntropyEstimator`](@ref)          | Nearest neighbors   |         ✓          |
-| [`Zhu`](@ref)                    | [`DifferentialEntropyEstimator`](@ref)          | Nearest neighbors   |         ✓          |
-| [`ZhuSingh`](@ref)               | [`DifferentialEntropyEstimator`](@ref)          | Nearest neighbors   |         ✓          |
-| [`Gao`](@ref)                    | [`DifferentialEntropyEstimator`](@ref)          | Nearest neighbors   |         ✓          |
-| [`Goria`](@ref)                  | [`DifferentialEntropyEstimator`](@ref)          | Nearest neighbors   |         ✓          |
-| [`Lord`](@ref)                   | [`DifferentialEntropyEstimator`](@ref)          | Nearest neighbors   |         ✓          |
-| [`LeonenkoProzantoSavani`](@ref) | [`DifferentialEntropyEstimator`](@ref)          | Nearest neighbors   |         ✓          |
-| [`GaussanMI`](@ref)              | [`MutualInformationEstimator`](@ref)            | Parametric          |         ✓          |
-| [`KSG1`](@ref)                   | [`MutualInformationEstimator`](@ref)            | Continuous          |         ✓          |
-| [`KSG2`](@ref)                   | [`MutualInformationEstimator`](@ref)            | Continuous          |         ✓          |
-| [`GaoKannanOhViswanath`](@ref)   | [`MutualInformationEstimator`](@ref)            | Mixed               |         ✓          |
-| [`GaoOhViswanath`](@ref)         | [`MutualInformationEstimator`](@ref)            | Continuous          |         ✓          |
-| [`FPVP`](@ref)                   | [`ConditionalMutualInformationEstimator`](@ref) | Nearest neighbors   |         ✓          |
-| [`MesnerShalizi`](@ref)          | [`ConditionalMutualInformationEstimator`](@ref) | Nearest neighbors   |         ✓          |
-| [`Rahimzamani`](@ref)            | [`ConditionalMutualInformationEstimator`](@ref) | Nearest neighbors   |         ✓          |
-| [`Zhu1`](@ref)                   | [`TransferEntropyEstimator`](@ref)              | Nearest neighbors   |         ✓          |
-| [`Lindner`](@ref)                | [`TransferEntropyEstimator`](@ref)              | Nearest neighbors   |         ✓          |
-"""
-struct TEShannon{E <: Shannon, EMB} <: TransferEntropy{E, EMB}
-    e::E
-    embedding::EMB
-    function TEShannon(; base = 2, embedding::EMB = EmbeddingTE()) where EMB
-        e = Shannon(; base = base)
-        return new{typeof(e), EMB}(e, embedding)
-    end
-    function TEShannon(e::E; embedding::EMB = EmbeddingTE()) where {E <:Shannon, EMB}
-        return new{E, EMB}(e, embedding)
-    end
-    # TODO: add constructor that automatically determines the embedding.
-end
-
-max_inputs_vars(::TEShannon) = 3
-
-# function transferentropy(
-#         est::Union{
-#             ConditionalMutualInformationEstimator,
-#             MutualInformationEstimator,
-#             DifferentialEntropyEstimator,
-#             ProbabilitiesEstimator
-#         },
-#         x...; kwargs...)
-#     N = length(first(x))
-
-#     # A very naive heuristic to avoid too high dimensions. *All* marginals are optimised,
-#     # so in the worst case, the dimension triples.
-#     maxdim = floor(Int, N^(1/7))
-#     # The maxlag should also scale with the length the input.
-#     maxlag = min(floor(Int, N ÷ 50), 100)
-#     dmethod = "mi_min"
-#     method = delay_f1nn
-#     opt = OptimiseTraditional(; maxdim, maxlag, method, dmethod)
-#     m = TEShannon(; base = 2, embedding = EmbeddingTE(opt, x...))
-#     return transferentropy(m, est, x...; kwargs...)
-# end
-
-# If a pre-computed [`ContingencyMatrix`](@ref) `c` is provided, then we just compute
-# the conditional mutual information directly from it, assuming the contingency matrix
-# was constructed from a meaningful embedding.
-function transferentropy(measure::TEShannon, c::ContingencyMatrix)
-    cmi = CMIShannon(; base = measure.base)
-    return condmutualinfo(cmi, c)
-end
diff --git a/src/methods/infomeasures/transferentropy/convenience/SymbolicTransferEntropy.jl b/src/methods/infomeasures/transferentropy/convenience/SymbolicTransferEntropy.jl
deleted file mode 100644
index 05fb736df..000000000
--- a/src/methods/infomeasures/transferentropy/convenience/SymbolicTransferEntropy.jl
+++ /dev/null
@@ -1,33 +0,0 @@
-export SymbolicTransferEntropy
-
-"""
-    SymbolicTransferEntropy <: TransferEntropyEstimator
-    SymbolicTransferEntropy(; m = 3, τ = 1, lt = ComplexityMeasures.isless_rand
-
-A convenience estimator for symbolic transfer entropy [Staniek2008](@cite).
-
-## Description
-
-[Symbolic transfer entropy](https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.100.158101)
-consists of two simple steps. First, the input time series are embedded with embedding
-lag `m` and delay `τ`. The ordinal patterns of the embedding vectors are then encoded
-using [`SymbolicPermutation`](@ref) with [`marginal_encodings`](@ref). This transforms the
-input time series into integer time series using [`OrdinalPatternEncoding`](@ref).
-
-Transfer entropy is then estimated as usual on the encoded timeseries with
-[`transferentropy`](@ref) and the [`CountOccurrences`](@ref) naive frequency estimator.
-"""
-Base.@kwdef struct SymbolicTransferEntropy <: TransferEntropyEstimator
-    m::Int = 3
-    τ::Int = 1
-    lt::Function = ComplexityMeasures.isless_rand
-end
-
-
-function estimate(measure::TransferEntropy, est::SymbolicTransferEntropy,
-    x::AbstractVector...)
-    (; m, τ, lt) = est
-    est = SymbolicPermutation(; m, τ, lt)
-    s = marginal_encodings(est, x...)
-    transferentropy(measure, CountOccurrences(), s...)
-end
diff --git a/src/methods/infomeasures/transferentropy/convenience/convenience.jl b/src/methods/infomeasures/transferentropy/convenience/convenience.jl
deleted file mode 100644
index 072cb8d34..000000000
--- a/src/methods/infomeasures/transferentropy/convenience/convenience.jl
+++ /dev/null
@@ -1,2 +0,0 @@
-include("Hilbert.jl")
-include("SymbolicTransferEntropy.jl")
diff --git a/src/methods/infomeasures/transferentropy/estimators/estimators.jl b/src/methods/infomeasures/transferentropy/estimators/estimators.jl
deleted file mode 100644
index 99b03b246..000000000
--- a/src/methods/infomeasures/transferentropy/estimators/estimators.jl
+++ /dev/null
@@ -1,4 +0,0 @@
-include("Lindner.jl")
-include("Zhu1.jl")
-
-include("transferoperator.jl")
diff --git a/src/methods/infomeasures/transferentropy/optimization/optimization.jl b/src/methods/infomeasures/transferentropy/optimization/optimization.jl
deleted file mode 100644
index 4cc29ac24..000000000
--- a/src/methods/infomeasures/transferentropy/optimization/optimization.jl
+++ /dev/null
@@ -1 +0,0 @@
-include("traditional_optimal_embedding.jl")
diff --git a/src/methods/infomeasures/transferentropy/transferentropy.jl b/src/methods/infomeasures/transferentropy/transferentropy.jl
deleted file mode 100644
index d934cc66d..000000000
--- a/src/methods/infomeasures/transferentropy/transferentropy.jl
+++ /dev/null
@@ -1,177 +0,0 @@
-using StateSpaceSets: AbstractStateSpaceSet, StateSpaceSet
-using StateSpaceSets: dimension
-
-export transferentropy
-export TransferEntropy
-export TransferEntropyEstimator
-
-include("embedding.jl")
-include("utils.jl")
-
-"""
-    TransferEntropy <: AssociationMeasure
-
-The supertype of all transfer entropy measures. Concrete subtypes are
-- [`TEShannon`](@ref)
-- [`TERenyiJizba`](@ref)
-"""
-abstract type TransferEntropy{E, EMB} <: DirectedAssociationMeasure end
-
-max_inputs_vars(::TransferEntropy) = 3
-
-"""
-The supertype of all dedicated transfer entropy estimators.
-"""
-abstract type TransferEntropyEstimator end
-
-"""
-    transferentropy([measure::TEShannon], est, s, t, [c])
-    transferentropy(measure::TERenyiJizba, est, s, t, [c])
-
-Estimate the transfer entropy ``TE^*(S \\to T)`` or ``TE^*(S \\to T | C)`` if `c` is given,
-using the provided estimator `est`, where ``*`` indicates the given `measure`.
-If `measure` is not given, then `TEShannon(; base = 2)` is the default.
-
-## Arguments
-
-- **`measure`**: The transfer entropy measure, e.g. [`TEShannon`](@ref) or
-    [`TERenyi`](@ref), which dictates which formula is computed.
-    Embedding parameters are stored in `measure.embedding`, and
-    is represented by an [`EmbeddingTE`](@ref) instance. If calling `transferentropy`
-    without giving `measure`, then the embedding is optimized by finding
-    suitable delay embedding parameters using the ["traditional"](https://juliadynamics.github.io/DynamicalSystems.jl/dev/embedding/traditional/)
-    approach from DynamicalSystems.jl.
-- **`s`**: The source timeseries.
-- **`t`**: The target timeseries.
-- **`c`**: Optional. A conditional timeseries.
-
-## Description
-
-The Shannon transfer entropy is defined as ``TE^S(S \\to T | C) := I^S(T^+; S^- | T^-, C^-)``,
-where ``I^S(T^+; S^- | T^-, C^-)`` is [`CMIShannon`](@ref), and marginals for
-the CMI are constructed as described in [`EmbeddingTE`](@ref). The definition is
-analogous for [`TERenyiJizba`](@ref).
-
-If `s`, `t`, and `c` are univariate timeseries, then the
-the marginal embedding variables ``T^+`` (target future), ``T^-`` (target present/past),
-``S^-`` (source present/past) and ``C^-`` (present/past of conditioning variables)
-are constructed by first jointly embedding  `s`, `t` and `c` with relevant delay
-embedding parameters, then subsetting relevant columns of the embedding.
-
-Since estimates of ``TE^*(S \\to T)`` and ``TE^*(S \\to T | C)`` are just a special cases of
-conditional mutual information where input data are marginals of a particular form of
-[delay embedding](https://juliadynamics.github.io/DynamicalSystems.jl/dev/embedding/reconstruction/),
-*any* combination of variables, e.g. ``S = (A, B)``, ``T = (C, D)``,
-``C = (D, E, F)`` are valid inputs (given as `StateSpaceSet`s).
-In practice, however, `s`, `t` and `c` are most often timeseries, and if
- `s`, `t` and `c` are [`StateSpaceSet`](@ref)s, it is assumed that the data are
-pre-embedded and the embedding step is skipped.
-
-## Compatible estimators
-
-
-`transferentropy` is just a simple wrapper around [`condmutualinfo`](@ref) that constructs
-an appropriate delay embedding from the input data before CMI is estimated. Consequently,
-any estimator that can be used for [`ConditionalMutualInformation`](@ref) is, in principle,
-also a valid transfer entropy estimator. [`TransferEntropyEstimator`](@ref)s are the
-exception - they compute transfer entropy directly.
-
-| Estimator                        | Type                                            | Principle           | [`TEShannon`](@ref) | [`TERenyiJizba`](@ref) |
-| -------------------------------- | ----------------------------------------------- | ------------------- | :-----------------: | :--------------------: |
-| [`CountOccurrences`](@ref)       | [`ProbabilitiesEstimator`](@ref)                | Frequencies         |         ✓          |           ✓            |
-| [`ValueHistogram`](@ref)         | [`ProbabilitiesEstimator`](@ref)                | Binning (histogram) |         ✓          |           ✓            |
-| [`Dispersion`](@ref)             | [`ProbabilitiesEstimator`](@ref)                | Dispersion patterns |         ✓          |           ✖            |
-| [`Kraskov`](@ref)                | [`DifferentialEntropyEstimator`](@ref)          | Nearest neighbors   |         ✓          |           ✖            |
-| [`Zhu`](@ref)                    | [`DifferentialEntropyEstimator`](@ref)          | Nearest neighbors   |         ✓          |           ✖            |
-| [`ZhuSingh`](@ref)               | [`DifferentialEntropyEstimator`](@ref)          | Nearest neighbors   |         ✓          |           ✖            |
-| [`Gao`](@ref)                    | [`DifferentialEntropyEstimator`](@ref)          | Nearest neighbors   |         ✓          |           ✖            |
-| [`Goria`](@ref)                  | [`DifferentialEntropyEstimator`](@ref)          | Nearest neighbors   |         ✓          |           ✖            |
-| [`Lord`](@ref)                   | [`DifferentialEntropyEstimator`](@ref)          | Nearest neighbors   |         ✓          |           ✖            |
-| [`LeonenkoProzantoSavani`](@ref) | [`DifferentialEntropyEstimator`](@ref)          | Nearest neighbors   |         ✓          |           ✓            |
-| [`GaussanMI`](@ref)              | [`MutualInformationEstimator`](@ref)            | Parametric          |         ✓          |           ✖            |
-| [`KSG1`](@ref)                   | [`MutualInformationEstimator`](@ref)            | Continuous          |         ✓          |           ✖            |
-| [`KSG2`](@ref)                   | [`MutualInformationEstimator`](@ref)            | Continuous          |         ✓          |           ✖            |
-| [`GaoKannanOhViswanath`](@ref)   | [`MutualInformationEstimator`](@ref)            | Mixed               |         ✓          |           ✖            |
-| [`GaoOhViswanath`](@ref)         | [`MutualInformationEstimator`](@ref)            | Continuous          |         ✓          |           ✖            |
-| [`FPVP`](@ref)                   | [`ConditionalMutualInformationEstimator`](@ref) | Nearest neighbors   |         ✓          |           ✖            |
-| [`MesnerShalizi`](@ref)          | [`ConditionalMutualInformationEstimator`](@ref) | Nearest neighbors   |         ✓          |           ✖            |
-| [`Rahimzamani`](@ref)            | [`ConditionalMutualInformationEstimator`](@ref) | Nearest neighbors   |         ✓          |           ✖            |
-| [`Zhu1`](@ref)                   | [`TransferEntropyEstimator`](@ref)              | Nearest neighbors   |         ✓          |           ✖            |
-| [`Lindner`](@ref)                | [`TransferEntropyEstimator`](@ref)              | Nearest neighbors   |         ✓          |           ✖            |
-| [`Hilbert`](@ref)                | [`TransferEntropyEstimator`](@ref)              | Hilbert transform   |         ✓          |           ✖            |
-| [`SymbolicTransferEntropy`](@ref)| [`TransferEntropyEstimator`](@ref)              | Hilbert transform   |         ✓          |           ✖            |
-
-"""
-function transferentropy end
-
-
-const TE_ESTIMATORS = Union{
-    TransferEntropyEstimator,
-    ConditionalMutualInformationEstimator,
-    MutualInformationEstimator,
-    DifferentialEntropyEstimator,
-    ProbabilitiesEstimator,
-}
-
-# Embedding optimization
-include("optimization/optimization.jl")
-
-include("TEShannon.jl")
-include("TERenyiJizba.jl")
-
-function transferentropy(measure::TransferEntropy, est::TE_ESTIMATORS, args...; kwargs...)
-    return estimate(measure, est, args...; kwargs...)
-end
-function transferentropy(est::TE_ESTIMATORS, args...; kwargs...)
-    return estimate(TEShannon(), est, args...; kwargs...)
-end
-
-function estimate(est::TE_ESTIMATORS, args...; kwargs...)
-    return estimate(TEShannon(), est, args...; kwargs...)
-end
-
-function estimate(measure::TransferEntropy, est::TE_ESTIMATORS, x...)
-    # If a conditional input (x[3]) is not provided, then C is just a 0-dimensional
-    # StateSpaceSet. The horizontal concatenation of C with T then just returns T.
-    # We therefore don't need separate methods for the conditional and non-conditional
-    # cases.
-    S, T, T⁺, C = individual_marginals_te(measure.embedding, x...)
-    cmi = te_to_cmi(measure)
-    # TE(s -> t) := I(t⁺; s⁻ | t⁻, c⁻).
-    return condmutualinfo(cmi, est, T⁺, S, StateSpaceSet(T, C))
-end
-
-# When using any estimator except dedicatd `TransferEntropyEstimator`s,
-# we use the conditional mutual information decomposition, so we need
-# to change the measure for dispatch to work.
-te_to_cmi(measure::TEShannon) = CMIShannon(measure.e)
-te_to_cmi(measure::TERenyiJizba) = CMIRenyiJizba(measure.e)
-
-function individual_marginals_te(emb::EmbeddingTE, x::AbstractVector...)
-    joint, vars, τs, js = te_embed(emb, x...)
-    S = joint[:, vars.S]
-    T = joint[:, vars.T]
-    Tf = joint[:, vars.Tf]
-    C = joint[:, vars.C]
-    return S, T, Tf, C
-end
-
-function h4_marginals(measure::TransferEntropy, x...)
-    S, T, T⁺, C = individual_marginals_te(measure.embedding, x...)
-    joint = StateSpaceSet(S, T, T⁺, C)
-    ST = StateSpaceSet(S, T, C)
-    TT⁺ = StateSpaceSet(T, T⁺, C)
-    T = StateSpaceSet(T, C)
-    return joint, ST, TT⁺, T
-end
-
-include("estimators/estimators.jl")
-include("convenience/convenience.jl")
-
-# Default to Shannon-type base 2 transfer entropy
-function estimate(est::TransferEntropyEstimator, x...)
-    estimate(TEShannon(base = 2), est, x...)
-end
-
-transferentropy(emb::EmbeddingTE, args...; kwargs...) =
-    transferentropy(TEShannon(; embedding = emb), args...; kwargs...)
diff --git a/src/methods/infomeasures/various/entropies.jl b/src/methods/infomeasures/various/entropies.jl
deleted file mode 100644
index 5a9d33edb..000000000
--- a/src/methods/infomeasures/various/entropies.jl
+++ /dev/null
@@ -1,4 +0,0 @@
-include("entropies/LeonenkoProzantoSavani.jl")
-include("entropies/GenericKernel.jl")
-include("entropies/Faivishevsky.jl")
-include("entropies/Pal.jl")
diff --git a/src/methods/infomeasures/various/entropies/Faivishevsky.jl b/src/methods/infomeasures/various/entropies/Faivishevsky.jl
deleted file mode 100644
index 72211aab9..000000000
--- a/src/methods/infomeasures/various/entropies/Faivishevsky.jl
+++ /dev/null
@@ -1,28 +0,0 @@
-export Faivishevsky
-#https://proceedings.neurips.cc/paper/2008/file/3dc4876f3f08201c7c76cb71fa1da439-Paper.pdf
-Base.@kwdef struct Faivishevsky{M} <: DifferentialEntropyEstimator
-    k::Int = 1 # todo: remove. it isn't used.
-    w::Int = 0
-    metric::M = Euclidean()
-end
-import ComplexityMeasures.ball_volume
-using Neighborhood: search
-function entropy(e::Shannon, est::Faivishevsky, x::AbstractStateSpaceSet{D}) where D
-    (; k, w, metric) = est
-    N = length(x)
-    tree = KDTree(x, metric)
-    idxs, ϵs  = bulksearch(tree, x, NeighborNumber(N-1), Theiler(w))
-
-    f = 0.0
-    for k = 1:N-1 # loop over neighbor numbers
-        f -= digamma(k)
-        c = 0.0
-        for i = 1:N # loop over points
-            c += D/N * log(ϵs[i][k])
-        end
-    end
-    f *= 1 / (N - 1)
-    # The unit is nats
-    h = digamma(N) + ball_volume(D) + f
-    return _convert_logunit(h, ℯ, e.base)
-end
diff --git a/src/methods/infomeasures/various/entropies/Gao2017.jl b/src/methods/infomeasures/various/entropies/Gao2017.jl
deleted file mode 100644
index 5b388e5d8..000000000
--- a/src/methods/infomeasures/various/entropies/Gao2017.jl
+++ /dev/null
@@ -1,56 +0,0 @@
-import Statistics.cov
-using StaticArrays: SMatrix, @MMatrix, @MVector
-
-"""
-    Gao2017 <: DifferentialEntropyEstimator
-    Gao2017(k = 1, w = 1, base = 2)
-
-A resubstitution Shannon entropy estimator from [Gao2017](@citet).
-"""
-Base.@kwdef struct Gao2017{B, M} #<: CausalityTools.InformationEstimator
-    k::Int = 1
-    w::Int = 0
-    metric::M = Euclidean()
-end
-
-function Î(q, est::Gao2017, x::AbstractStateSpaceSet{D}) where D
-    (; k, w, metric) = est
-    N = length(x)
-    tree = KDTree(x, metric)
-    Bk,d,α,K = bias(est)
-    idxs, ds = bulksearch(tree, x, NeighborNumber(k), Theiler(w))
-
-    # In the case of a multivariate Gaussian, maximum likehood estimation simply
-    # amounts to finding the sample means and sample covariance matrices.
-    for xᵢ in x
-        μ, Σ = mean_and_cov(x)
-    end
-end
-
-import Statistics.cov
-
-# Non-allocating and more than twice as fast as writing a wrapper
-# `f(x) = Statistics.cov(Matrix(x))`.
-# Also accepts SubStateSpaceSets, so we can use views on neighbor points.
-function cov(x̄, x::AbstractStateSpaceSet{D}) where D
-    N = length(x) - 1
-    C = @MMatrix zeros(D, D)
-    x̄ = mean(x)
-    Δx = @MVector zeros(D)
-    @inbounds for xᵢ in x
-        Δx .= xᵢ - x̄
-        C .+= Δx * transpose(Δx)
-    end
-    C ./= N
-    return SMatrix{D, D}(C)
-end
-# So we don't have to compute the mean twice at every iteration.
-cov(x::AbstractStateSpaceSet{D}) where D = cov(mean(x), x)
-function mean_and_cov(x::AbstractStateSpaceSet{D}) where D
-    μ = mean(x)
-    Σ = cov(μ, x)
-    return μ, Σ
-end
-
-# TODO: implement
-multiplicative_bias(est::Gao2017) = 1.0
diff --git a/src/methods/infomeasures/various/entropies/GenericKernel.jl b/src/methods/infomeasures/various/entropies/GenericKernel.jl
deleted file mode 100644
index 22b216520..000000000
--- a/src/methods/infomeasures/various/entropies/GenericKernel.jl
+++ /dev/null
@@ -1,66 +0,0 @@
-export GenericKernel
-
-"""
-    GenericKernel <: DifferentialEntropyEstimator
-    GenericKernel(bandwidth = Silverman(), kernel::MultivariateKernel = NormalIsotropic())
-
-A generic, multivariate plug-in estimator for entropies based on kernel density estimation
-(KDE) that can in principle be used to compute any differential entropy.
-
-Data should be standardized to zero mean and unit variance before applying `GenericKernel`.
-
-The `bandwidth` may be set manually, or to one of the rule-of-thumbs listed below.
-
-## Description
-
-Assume we have samples ``\\{x_1, x_2, \\ldots, x_N \\}`` from a continuous random variable
-``X \\in \\mathbb{R}^d`` with support ``\\mathcal{X}`` and density function
-``f : \\mathbb{R}^d \\to \\mathbb{R}``.
-
-`GenericKernel` estimates, for each ``x_i`` in the sample, the point-wise densities
-``\\hat{f}(x_i)`` using the given `kernel` and `bandwidth`, then computes a resubstitution
-estimate for the entropy. We support the following resubstitution estimates.
-
-### [Shannon](@ref) differential entropy
-
-```math
-H(X) = \\int_{\\mathcal{X}} f(x) \\log f(x) dx = \\mathbb{E}[-\\log(f(X))]
-```
-
-is estimated by replacing the expectation with the sample average ([Diks2017](@cite))
-
-```math
-\\hat{H}(X) = -\\dfrac{1}{N}\\sum_{i = 1}^N \\log \\hat{f}(x).
-```
-
-## Compatible kernels
-
-- [`NormalIsotropic`](@ref).
-- [`Epanechnikov`](@ref)
-
-## Bandwidth rule-of-thumbs
-
-- [`Silverman`](@ref)
-- [`DiksFang`](@ref)
-
-[Diks2017](@cite).
-    Diks, C., & Fang, H. (2017). Transfer entropy for nonparametric granger causality
-    detection: an evaluation of different resampling methods. Entropy, 19(7), 372.
-"""
-struct GenericKernel{K, B} <: DifferentialEntropyEstimator
-    bandwidth::B
-    kernel::K
-    function GenericKernel(
-            bandwidth::B = DiksFang(4.8),
-            kernel::K = NormalIsotropic()) where {B, K}
-        new{K, B}(bandwidth, kernel)
-    end
-end
-bandwidth(r::Real, x::AbstractStateSpaceSet) = r # convenience for manual settings
-
-function entropy(e::Renyi, est::GenericKernel, x::AbstractStateSpaceSet)
-    bw = bandwidth(est.bandwidth, x)
-    ρs = densities_at_points(est.kernel, x, bw)
-    e.q ≈ 1 || error("Renyi entropy with q = $(e.q) not implemented for `GenericKernel`")
-    return sum(log0.(e.base, ρs)) / length(x)
-end
diff --git a/src/methods/infomeasures/various/entropies/LeonenkoProzantoSavani.jl b/src/methods/infomeasures/various/entropies/LeonenkoProzantoSavani.jl
deleted file mode 100644
index 6761bee34..000000000
--- a/src/methods/infomeasures/various/entropies/LeonenkoProzantoSavani.jl
+++ /dev/null
@@ -1,90 +0,0 @@
-using SpecialFunctions: gamma
-using Neighborhood: bulksearch
-using Neighborhood: Euclidean, Theiler
-import ComplexityMeasures: DifferentialEntropyEstimator
-import ComplexityMeasures: entropy
-
-export LeonenkoProzantoSavani
-
-"""
-    LeonenkoProzantoSavani <: DifferentialEntropyEstimator
-    LeonenkoProzantoSavani(k = 1, w = 0)
-
-The `LeonenkoProzantoSavani` estimator computes the [`Shannon`](@ref), [`Renyi`](@ref), or
-[`Tsallis`](@ref) [`entropy`](@ref) using the `k`-th nearest-neighbor approach
-from [LeonenkoProsantoSavani2008](@citet).
-
-`w` is the Theiler window, which determines if temporal neighbors are excluded
-during neighbor searches (defaults to `0`, meaning that only the point itself is excluded
-when searching for neighbours).
-"""
-Base.@kwdef struct LeonenkoProzantoSavani <: DifferentialEntropyEstimator
-    k::Int = 1
-    w::Int = 0
-end
-
-function entropy(e::Shannon, est::LeonenkoProzantoSavani, x::AbstractStateSpaceSet{D}) where D
-    h = Î(1.0, est, x) # measured in nats
-    return _convert_logunit(h, ℯ, e.base)
-end
-
-function entropy(e::Renyi, est::LeonenkoProzantoSavani, x::AbstractStateSpaceSet{D}) where D
-    if e.q ≈ 1.0
-        h = Î(e.q, est, x) # measured in nats
-    else
-        h = log(Î(e.q, est, x)) / (1 - e.q) # measured in nats
-    end
-    return _convert_logunit(h, ℯ, e.base)
-end
-
-function entropy(e::Tsallis, est::LeonenkoProzantoSavani, x::AbstractStateSpaceSet{D}) where D
-    if e.q ≈ 1.0
-        h = Î(e.q, est, x) # measured in nats
-    else
-        h = (Î(e.q, est, x) - 1) / (1 - e.q) # measured in nats
-    end
-    return _convert_logunit(h, ℯ, e.base)
-end
-
-# TODO: this gives nan??
-# Use notation from original paper
-function Î(q, est::LeonenkoProzantoSavani, x::AbstractStateSpaceSet{D}) where D
-    (; k, w) = est
-    N = length(x)
-    Vₘ = ball_volume(D)
-    Cₖ = (gamma(k) / gamma(k + 1 - q))^(1 / (1 - q))
-    tree = KDTree(x, Euclidean())
-    idxs, ds = bulksearch(tree, x, NeighborNumber(k), Theiler(w))
-    if q ≈ 1.0 # equations 3.9 & 3.10 in Leonenko et al. (2008)
-        h = (1 / N) * sum(log.(ξᵢ_shannon(last(dᵢ), Vₘ, N, D, k) for dᵢ in ds))
-    else # equations 3.1 & 3.2 in Leonenko et al. (2008)
-        h = (1 / N) * sum(ξᵢ_renyi_tsallis(last(dᵢ), Cₖ, Vₘ, N, D)^(1 - q) for dᵢ in ds)
-    end
-    return h
-end
-ξᵢ_renyi_tsallis(dᵢ, Cₖ, Vₘ, N::Int, D::Int) = (N - 1) * Cₖ * Vₘ * (dᵢ)^D
-ξᵢ_shannon(dᵢ, Vₘ, N::Int, D::Int, k) = (N - 1) * exp(-digamma(k)) * Vₘ * (dᵢ)^D
-
-using Distributions: MvNormal
-import Distributions.entropy as dentropy
-function entropy(e::Renyi, 𝒩::MvNormal; base = 2)
-    q = e.q
-    if q ≈ 1.0
-        h = dentropy(𝒩)
-    else
-        Σ = 𝒩.Σ
-        D = length(𝒩.μ)
-        h = dentropy(𝒩) - (D / 2) * (1 + log(q) / (1 - q))
-    end
-    return _convert_logunit(h, ℯ, base)
-end
-
-# Eq. 15 in Nielsen & Nock (2011); https://arxiv.org/pdf/1105.3259.pdf
-function entropy(e::Tsallis, 𝒩::MvNormal; base = 2)
-    q = e.q
-    Σ = 𝒩.Σ
-    D = length(𝒩.μ)
-    hr = entropy(Renyi(q = q), 𝒩; base)
-    h = (exp((1 - q) * hr) - 1) / (1 - q)
-    return _convert_logunit(h, ℯ, base)
-end
diff --git a/src/methods/infomeasures/various/entropies/LoftsgaardenH.jl b/src/methods/infomeasures/various/entropies/LoftsgaardenH.jl
deleted file mode 100644
index b51762f25..000000000
--- a/src/methods/infomeasures/various/entropies/LoftsgaardenH.jl
+++ /dev/null
@@ -1,25 +0,0 @@
-"""
-    LoftsgaardenH <: ProbabilitiesEstimator
-
-The `LoftsGaardenH` Shannon entropy estimator is based on the `k`-th nearest neighbor
-density estimation from [LoftsGaarden1965](@citet).
-
-It estimates probabilities by first estimating the density locally at each sample
-point `xᵢ` using the distance from `xᵢ` to its `k`-th nearest neighbor. The density
-distribution over the sample points is then normalized to form probabilities.
-
-## Outcome space
-
-The outcome space `Ω` for `LoftsGaarden` is the indices of the input data, `1:length(x)`.
-The reason to not return the data points themselves is because duplicate data points may
-not have same probabilities (due to having different neighbors).
-"""
-Base.@kwdef struct LoftsGaarden{M} <: ProbabilitiesEstimator
-    k::Int = 5
-    w::Int = 0
-    metric::M = Euclidean()
-end
-
-function entropy(e::Renyi, est::LoftsGaarden, x)
-    ρs = point_densities(est, StateSpaceSet(x))
-end
diff --git a/src/methods/infomeasures/various/entropies/Pal.jl b/src/methods/infomeasures/various/entropies/Pal.jl
deleted file mode 100644
index 2c9771275..000000000
--- a/src/methods/infomeasures/various/entropies/Pal.jl
+++ /dev/null
@@ -1,110 +0,0 @@
-#export Pal
-using Neighborhood
-using StateSpaceSets: dimension, AbstractStateSpaceSet, StateSpaceSet
-export Pal
-
-"""
-    Pal <: <: DifferentialEntropyEstimator
-    Pal(; k = 1, w = 0, p = 1.0, n::Int = 10000)
-
-A [`Shannon`](@ref] and [`Renyi`](@ref) differential entropy estimator (Pàl et al., 2010).
-
-`w` is the Theiler window, which determines if temporal neighbors are excluded
-during neighbor searches (defaults to `0`, meaning that only the point itself is excluded
-when searching for neighbours).
-
-## Description
-
-Pál et al. (2010)'s estimator is based on generalized nearest neighbor graphs. It is
-similar to several other kNN-based estimators (e.g. [`LeonenkoProzantoSavani`](@ref)).
-Given samples ``\\bf{X}_{1:n} = \\{\\bf{X}_1, \\bf{X}_2, \\ldots, \\bf{X}_n \\}``
-where ``\\bf{X}_1 \\in \\mathbb{R}^d`` from some distribution ``\\mu`` over
-``\\mathbb{R}^d`` with density function ``f``,
-approximates the [`Renyi`](@ref) differential entropy
-
-```math
-h_q^R(\\bf(X)) = \\dfrac{1}{1-q} \\int_{\\mathbb{R}^d} f^q(\\bf{x}) d\\bf{x}
-```
-
-using the estimator
-
-```math
-\\hat{H}_q^R(\\bf{X_{1:n}}) = \\dfrac{1}{1-q}\\log
-\\left( \\dfrac{L_p(\\bf{X}_{1:n})}{\\gamma n^{1-p/d}} \\right),
-```
-
-where ``L_p(\\bf{X}_{1:n}`` is the sum of the `p`-th powers of the Euclidean
-lengths of the edges of the nearest neighbor graph over ``\\bf{X}_{1:n}``
-(see their paper for details).
-
-The constant ``\\gamma`` is determined by the limit given in equation 4 in
-Pàl et al. (2010),
-and is approximated on `n` randomly generated points from the `d`-dimensional
-unit cube, as they describe in the end of section 3 of their paper.
-
-
-[^Pál2010]:
-    Pál, D., Póczos, B., & Szepesvári, C. (2010). Estimation of Rényi entropy and mutual
-    information based on generalized nearest-neighbor graphs. Advances in Neural
-    Information Processing Systems, 23.
-"""
-Base.@kwdef struct Pal{P} <: DifferentialEntropyEstimator
-    k::Int = 1
-    w::Int = 0
-    p::P = 2.0
-    n::Int = 10000
-end
-
-function entropy(e::Renyi, est::Pal, x)
-    (; k, w, p, n) = est
-    (; q, base) = e
-    q <= 1 || error("Pal estimator only defined for 0 < q <= 1")
-    if q == 1
-        q = 0.999999999
-    end
-
-    X = StateSpaceSet(x)
-    d = dimension(X)
-    γ = approximate_γ(est, d)
-    h = 1 / (1 - q) * log(Lₚ(est, X) / (γ * n^(1 - p/d)))
-    return _convert_logunit(ℯ, base)
-end
-
-function entropy(e::Shannon, est::Pal, x)
-    (; k, w, p, n) = est
-    (; base) = e
-    q = 1.0 - eps() # approximate Shannon entropy by simply letting q → 1
-    X = StateSpaceSet(x)
-    N = length(x)
-    d = dimension(X)
-    γ = approximate_γ(est, d)
-    L = Lₚ(est, X)
-    f = L / (γ * N^(1 - p/d))
-    h = (1 / (1 - q)) * log(f)
-    return _convert_logunit(ℯ, base)
-end
-
-function Lₚ(est::Pal, x::AbstractStateSpaceSet{D}) where D
-    (; k, w, p, n) = est
-    N = length(x)
-    tree = KDTree(x, Euclidean())
-    idxs, ds = bulksearch(tree, x, NeighborNumber(k), Theiler(w))
-    Lₚ = 0.0
-    for i = 1:N
-        Lₚ += sum(ds[i] .^ p)
-    end
-    return Lₚ
-end
-
-# TODO: Estimates of `γ` typically don't stabilize until millions of points are
-# included. Thus, for practical applications where parameters of the
-# analysis are allowed to vary, runtime quickly increases.
-# Fitting a function f(d, p, k) would dramatically reduce runtime (how?).
-# Alternatively, providing a look-up table for "commonly used" (as we define how we
-# see fit) parameters ranges.
-function approximate_γ(est::Pal, d::Int)
-    p = est.p
-    n = est.n
-    x = StateSpaceSet(rand(n, d))
-    Lₚ(est, x) / n^(1 - (p/d))
-end
diff --git a/src/methods/infomeasures/various/probabilities.jl b/src/methods/infomeasures/various/probabilities.jl
deleted file mode 100644
index aca4c7dd0..000000000
--- a/src/methods/infomeasures/various/probabilities.jl
+++ /dev/null
@@ -1,3 +0,0 @@
-include("probabilities/LocalLikelihood.jl")
-include("probabilities/LoftsGaarden.jl")
-include("probabilities/Contingency.jl")
diff --git a/src/methods/infomeasures/various/probabilities/Contingency.jl b/src/methods/infomeasures/various/probabilities/Contingency.jl
deleted file mode 100644
index a2b925a88..000000000
--- a/src/methods/infomeasures/various/probabilities/Contingency.jl
+++ /dev/null
@@ -1,22 +0,0 @@
-export Contingency
-
-"""
-    Contingency <: ProbabilitiesEstimator
-    Contingency(est::Union{ProbabilitiesEstimator, Nothing} = nothing)
-
-`Contingency` is a probabilities estimator that transforms input data to a multidimensional
-probability mass function (internally represented as [`ContingencyMatrix`](@ref).
-
-It works directly on raw discrete/categorical data. Alternatively, if a
-[`ProbabilitiesEstimator`](@ref) `est` for which [`marginal_encodings`](@ref) is implemented
-is given, then input data are first discretized before creating the contingency matrix.
-
-!!! note
-    The `Contingency` estimator differs from other [`ProbabilitiesEstimator`](@ref)s in that
-    it's not compatible with [`probabilities`](@ref) and other methods. Instead, it is
-    used to construct [`ContingencyMatrix`](@ref), from which probabilities can be
-    computed.
-"""
-Base.@kwdef struct Contingency{E <: Union{Nothing, ProbabilitiesEstimator}} <: ProbabilitiesEstimator
-    est::E = nothing
-end
diff --git a/src/methods/infomeasures/various/probabilities/LocalLikelihood.jl b/src/methods/infomeasures/various/probabilities/LocalLikelihood.jl
deleted file mode 100644
index 42dead603..000000000
--- a/src/methods/infomeasures/various/probabilities/LocalLikelihood.jl
+++ /dev/null
@@ -1,141 +0,0 @@
-using Neighborhood: Euclidean, Chebyshev, KDTree, Theiler, NeighborNumber
-using Neighborhood: bulksearch
-using Distances: evaluate
-using DelayEmbeddings.StateSpaceSets: SubStateSpaceSet
-using LinearAlgebra: det, norm
-using StateSpaceSets: StateSpaceSet
-using StaticArrays: MVector, MMatrix, SVector, SMatrix, @SVector
-
-import ComplexityMeasures: entropy, total_outcomes, outcomes, probabilities, probabilities_and_outcomes
-
-export LocalLikelihood
-"""
-    LocalLikelihood <: ProbabilitiesEstimator
-    LocalLikelihood(k = 5, w = 0, metric = Euclidean())
-
-The `LocalLikelihood` estimator estimates the density around a given query point
-by a Gaussian kernel informed by the local mean and covariance.
-
-To form probabilities from the pointwise density estimates, the densities are
-simply sum-normalized to 1.
-
-## Outcome space
-
-The [`outcome_space`](@ref) for `LocalLikelihood` is the set of input points.
-"""
-Base.@kwdef struct LocalLikelihood{M} <: ProbabilitiesEstimator
-    k::Int = 5
-    w::Int = 0
-    metric::M = Euclidean()
-end
-
-function point_densities(est::LocalLikelihood, x::AbstractStateSpaceSet{D}) where D
-    (; k, w, metric) = est
-    N = length(x)
-    # Modified heuristic from Gao et al. (2017): it is sufficient to consider the
-    # `K = max(floor(Int, log(N), k)` nearest neighbors neighbors of `x[i]` when
-    # estimating the local density. A global point-search is pointless and expensive.
-    kmax = max(floor(Int, log(N)), k)
-
-    # The bandwidth `bws[i]` for the point `x[i]` is the distance to the `k`-th nearest
-    # neighbor of `x[i]`. The local density around, in contrast, in formed by the `kmax`
-    # nearest neighbors.
-    tree = KDTree(x, Euclidean())
-    idxs, ds = bulksearch(tree, x, NeighborNumber(kmax), Theiler(w))
-    bws = [d[k] for d in ds]
-
-    S₁ = zeros(MVector{D, Float64})
-    S₂ = zeros(MMatrix{D, D, Float64})
-    densities = zeros(N)
-    for i = 1:N
-        xᵢ = x[i]
-        bwᵢ = bws[i]
-        neighborsᵢ = x[idxs[i]]
-        densities[i] = point_density!(S₁, S₂, est, xᵢ, bwᵢ, neighborsᵢ, N)
-    end
-
-    return densities
-end
-
-"""
-    point_density!(S₁, S₂, est::LocalLikelihood, xᵢ, bwᵢ,
-        neighborsᵢ::AbstractStateSpaceSet{D}) where D
-
-Estimate the density around point `xᵢ` using a local likehood estimator, which is
-a generalization of kernel density estimation. This is done by fitting a local gaussian
-distribution around `xᵢ` from its local neighborhood (represented the points `neighborsᵢ`).
-The bandwidth  `bwᵢ` is given by the distance from `xᵢ` to its `k`-th nearest neighbor.
-
-`S₁` is a pre-allocated length-`D` vector which holds the means, and `S₂` is a pre-allocated
-`D`-by-`D` matrix which holds the covariances. Both `S₁` and `S₂` are zeroed every time
-`point_density!` is called.
-"""
-function point_density!(S₁, S₂, est::LocalLikelihood, xᵢ, bwᵢ, neighborsᵢ::AbstractStateSpaceSet{D}, Ntot::Int) where D
-    N = length(neighborsᵢ)
-    S₀ = 0.0;
-    S₁ .= 0.0
-    S₂ .= 0.0
-    bwᵢ_sq = bwᵢ^2
-    twice_bwᵢ_sq = 2*bwᵢ_sq
-    for (k, nⱼ) in enumerate(neighborsᵢ)
-        Δⱼ = (nⱼ - xᵢ)
-        dᵢ = evaluate(est.metric, nⱼ, xᵢ)^2
-        eᵢ = exp(-dᵢ / twice_bwᵢ_sq)
-        S₀ += eᵢ
-        S₁ += eᵢ * (Δⱼ / bwᵢ)
-        S₂ += eᵢ * ((Δⱼ * transpose(Δⱼ)) / bwᵢ_sq)
-    end
-    # Weighted sample mean and sample variance
-    μ = S₁ / S₀
-    Σ = S₂ / S₀ - S₁*transpose(S₁) / S₀^2
-
-    # if Σ is singular, we can't take its inverse either, so just return 0.0
-    # density straight away. Heuristic from origina paper.
-    detΣ = det(Σ)
-    if detΣ < 1e-4^D
-        return 0.0
-    end
-
-    # The commented-out code follows the paper. This gives nonsense results.
-    #num = S₀ * exp(-0.5 * transpose(μ) * inv(Σ) * μ)
-    #den = N * (2π)^(D/2) * (bwᵢ^D) * sqrt(detΣ)
-    #return #num/den
-    # the following code is from https://github.com/wgao9/lnn/blob/master/lnn.py,
-    # by one of the original authors,
-    # but I have no idea where this formula comes from. It seems to work, though.
-    offset = transpose(μ) * inv(Σ) * μ
-    return -log(S₀) +
-        log(Ntot - 1) +
-        0.5*D*log(2π) +
-        D*log(bwᵢ) +
-        0.5*log(detΣ) + 0.5*offset[1, 1]
-end
-
-function probabilities_and_outcomes(est::LocalLikelihood, x)
-    return Probabilities(point_densities(est, x)), x
-end
-probabilities(est::LocalLikelihood, x) = Probabilities(point_densities(est, x))
-outcomes(est::LocalLikelihood, x) = x
-total_outcomes(x, est::LocalLikelihood) = length(x)
-
-function entropy(e::Renyi, est::LocalLikelihood, x)
-    !(e.q ≈ 1.0) || error("Renyi entropy for $(typeof(est)) estimator not defined for q = $(e.q) (i.e. Shannon entropy not defined)")
-    N = length(x)
-    ρs = point_densities(est, x)
-    ĴkLNN = sum(ρs .^ (e.q - 1)) / (bias(e, est, x) * N)
-    h = ĴkLNN / (e.q - 1)
-    return _convert_logunit(h, ℯ, e.base)
-end
-
-function pt_in_unit_sphere(dim::Int)
-    u = @SVector randn(dim)
-    c = rand()^(1/dim)
-    m = sqrt(sum(u .^ 2))
-    v = u ./ m .* c
-    return v
-end
-pts_in_unit_sphere(dim::Int, n::Int) = StateSpaceSet([pt_in_unit_sphere(dim) for i = 1:n])
-
-
-# TODO: implement. not sure how, though. Gao (2017) is not very clear...
-bias(e::Renyi, est::LocalLikelihood, x) = 1.0
diff --git a/src/methods/infomeasures/various/probabilities/LoftsGaarden.jl b/src/methods/infomeasures/various/probabilities/LoftsGaarden.jl
deleted file mode 100644
index fbf1c12fd..000000000
--- a/src/methods/infomeasures/various/probabilities/LoftsGaarden.jl
+++ /dev/null
@@ -1,51 +0,0 @@
-export LoftsGaarden
-import ComplexityMeasures: outcome_space
-
-"""
-    Loftsgaarden <: ProbabilitiesEstimator
-
-The `Loftsgaarden` probabilities estimator is based on the `k`-th nearest neighbor
-density estimatio from Loftsgaarden & Quesenberry (1965).
-
-It estimates probabilities by first estimating the density locally at each sample
-point `xᵢ` using the distance from `xᵢ` to its `k`-th nearest neighbor. The density
-distribution over the sample points is then normalized to form probabilities.
-
-## Outcome space
-
-The outcome space `Ω` for `LoftsGaarden` is the indices of the input data, `1:length(x)`.
-The reason to not return the data points themselves is because duplicate data points may
-not have same probabilities (due to having different neighbors).
-"""
-Base.@kwdef struct LoftsGaarden{M} <: ProbabilitiesEstimator
-    k::Int = 5
-    w::Int = 0
-    metric::M = Euclidean()
-end
-
-function probabilities_and_outcomes(est::LoftsGaarden, x::AbstractStateSpaceSet{D}) where D
-    Probabilities(point_densities(est, x)), 1:length(x)
-end
-
-outcome_space(x::AbstractStateSpaceSet, ::LoftsGaarden) = 1:length(x)
-
-function point_densities(est::LoftsGaarden, x::AbstractStateSpaceSet{D}) where D
-    (; k, w, metric) = est
-    N = length(x)
-    bᵥ = ComplexityMeasures.ball_volume(D)
-
-    # The bandwidth `bws[i]` for the point `x[i]` is the distance to the `k`-th nearest
-    # neighbor of `x[i]`. The local density around, in contrast, in formed by the `kmax`
-    # nearest neighbors.
-    tree = KDTree(x, metric)
-    ds = last.(bulksearch(tree, x, NeighborNumber(k), Theiler(w))[2])
-
-    densities = zeros(N)
-    for (i, dᵢ) in enumerate(ds)
-        densities[i] = point_density(est, dᵢ, N, bᵥ)
-    end
-
-    return densities
-end
-
-point_density(est::LoftsGaarden, dᵢ, N, bᵥ) = est.k / (N*bᵥ*dᵢ)
diff --git a/src/methods/infomeasures/various/probabilities/NNKDE.jl b/src/methods/infomeasures/various/probabilities/NNKDE.jl
deleted file mode 100644
index ccaf670b4..000000000
--- a/src/methods/infomeasures/various/probabilities/NNKDE.jl
+++ /dev/null
@@ -1 +0,0 @@
-# Based on LocalLikelihood, but simplified.
diff --git a/src/methods/information/core.jl b/src/methods/information/core.jl
new file mode 100644
index 000000000..00ffa2618
--- /dev/null
+++ b/src/methods/information/core.jl
@@ -0,0 +1,156 @@
+import ComplexityMeasures: information 
+export information
+
+export MultivariateInformationMeasure
+export MultivariateInformationMeasureEstimator
+export BivariateInformationMeasure
+export BivariateInformationMeasureEstimator
+
+export MutualInformationEstimator
+export ConditionalMutualInformationEstimator
+
+# The estimator *always* has the measure definition as the first field with type 
+# parameter `M`.
+"""
+    MultivariateInformationMeasureEstimator
+
+The supertype for all estimators of multivariate information measures.
+
+## Generic implementations
+
+- [`JointProbababilities`](@ref)
+- [`EntropyDecomposition`](@ref)
+- [`MIDecomposition`](@ref)
+- [`CMIDecomposition`](@ref)
+
+## Dedicated implementations
+
+[`MutualInformationEstimator`](@ref)s:
+
+- [`KraskovStögbauerGrassberger1`](@ref)
+- [`KraskovStögbauerGrassberger2`](@ref)
+- [`GaoOhViswanath`](@ref)
+- [`GaoKannanOhViswanath`](@ref)
+- [`GaussianMI`](@ref)
+
+[`ConditionalMutualInformationEstimator`](@ref)s:
+
+- [`FPVP`](@ref)
+- [`MesnerShalizi`](@ref)
+- [`Rahimzamani`](@ref)
+- [`PoczosSchneiderCMI`](@ref)
+- [`GaussianCMI`](@ref)
+
+[`TransferEntropyEstimator`](@ref)s:
+
+- [`Zhu1`](@ref)
+- [`Lindner`](@ref)
+"""
+abstract type MultivariateInformationMeasureEstimator{M} <: InformationMeasureEstimator{M} end
+abstract type BivariateInformationMeasureEstimator{M} <: MultivariateInformationMeasureEstimator{M} end
+
+"""
+    MutualInformationEstimator
+
+The supertype for dedicated [`MutualInformation`](@ref) estimators.
+
+## Concrete implementations
+
+- [`KSG1`](@ref)
+- [`KSG2`](@ref)
+- [`GaoOhViswanath`](@ref)
+- [`GaoKannanOhViswanath`](@ref)
+- [`GaussianMI`](@ref)
+"""
+abstract type MutualInformationEstimator{M} <: BivariateInformationMeasureEstimator{M} end
+
+"""
+    ConditionalMutualInformationEstimator
+
+The supertype for dedicated [`ConditionalMutualInformation`](@ref) estimators.
+
+## Concrete implementations
+
+- [`FPVP`](@ref)
+- [`GaussianCMI`](@ref)
+- [`MesnerShalizi`](@ref)
+- [`Rahimzamani`](@ref)
+- [`PoczosSchneiderCMI`](@ref)
+"""
+abstract type ConditionalMutualInformationEstimator{M} <: MultivariateInformationMeasureEstimator{M} end
+
+"""
+    MultivariateInformationMeasure <: AssociationMeasure
+
+The supertype for all multivariate information-based measure definitions.
+
+## Definition
+
+Following [Datseris2024](@citet), we define a multivariate information measure as *any functional 
+of a multidimensional probability mass functions (PMFs) or multidimensional probability density*.
+
+## Implementations
+
+[`JointEntropy`](@ref) definitions:
+
+- [`JointEntropyShannon`](@ref)
+- [`JointEntropyRenyi`](@ref)
+- [`JointEntropyTsallis`](@ref)
+
+[`ConditionalEntropy`](@ref) definitions:
+
+- [`ConditionalEntropyShannon`](@ref)
+- [`ConditionalEntropyTsallisAbe`](@ref)
+- [`ConditionalEntropyTsallisFuruichi`](@ref)
+
+[`DivergenceOrDistance`](@ref) definitions:
+
+- [`HellingerDistance`](@ref)
+- [`KLDivergence`](@ref)
+- [`RenyiDivergence`](@ref)
+- [`VariationDistance`](@ref)
+
+[`MutualInformation`](@ref) definitions:
+
+- [`MIShannon`](@ref)
+- [`MIRenyiJizba`](@ref)
+- [`MIRenyiMartin`](@ref)
+- [`MITsallisAbe`](@ref)
+- [`MITsallisFuruchi`](@ref)
+
+[`ConditionalMutualInformation`](@ref) definitions:
+
+- [`CMIShannon`](@ref)
+- [`CMITsallisPapapetrou`](@ref)
+- [`CMIRenyiJizba`](@ref)
+- [`CMIRenyiPoczos`](@ref)
+- [`CMIRenyiSarbu`](@ref)
+
+[`TransferEntropy`](@ref) definitions:
+
+- [`TEShannon`](@ref)
+- [`TERenyiJizba`](@ref)
+
+Other definitions:
+
+- [`PartialMutualInformation`](@ref)
+"""
+abstract type MultivariateInformationMeasure <: AssociationMeasure end
+
+"""
+    BivariateInformationMeasure <: MultivariateInformationMeasure
+
+The supertype of all bivariate information measure definitions.
+"""
+abstract type BivariateInformationMeasure <: MultivariateInformationMeasure end
+
+min_inputs_vars(::BivariateInformationMeasure) = 2
+max_inputs_vars(::BivariateInformationMeasure) = 2
+
+function verify_number_of_inputs_vars(est::MultivariateInformationMeasureEstimator, n)
+    return verify_number_of_inputs_vars(est.definition, n)
+end
+
+function size_match(measure::BivariateInformationMeasure, px::Probabilities, py::Probabilities)
+    size(px) == size(py) || throw(DimensionMismatch("px and py must have the same size"))
+end
diff --git a/src/methods/information/counts_and_probs/counts.jl b/src/methods/information/counts_and_probs/counts.jl
new file mode 100644
index 000000000..53c192553
--- /dev/null
+++ b/src/methods/information/counts_and_probs/counts.jl
@@ -0,0 +1,205 @@
+using StatsBase: levelsmap
+using ComplexityMeasures
+using ComplexityMeasures: Counts
+
+import ComplexityMeasures: counts
+import ComplexityMeasures: codify
+export counts
+
+# ##########################################################################################
+# Counts API.
+# The following code extends the functionality of ComplexityMeasures.jl for multiple
+# input data (ComplexityMeasures.jl only deals with single-variable estimation)
+# ##########################################################################################
+"""
+    counts(o::UniqueElements, x₁, x₂, ..., xₙ) → Counts{N}
+    counts(encoding::CodifyPoints, x₁, x₂, ..., xₙ) → Counts{N}
+    counts(encoding::CodifyVariables, x₁, x₂, ..., xₙ) → Counts{N}
+
+Construct an `N`-dimensional contingency table from the input iterables
+`x₁, x₂, ..., xₙ` which are such that 
+`length(x₁) == length(x₂) == ⋯ == length(xₙ)`.
+
+If `x₁, x₂, ..., xₙ` are already discrete, then use [`UniqueElements`](@ref) as 
+the first argument to directly construct the joint contingency table.
+
+If `x₁, x₂, ..., xₙ` need to be discretized, provide as the first argument
+- [`CodifyPoints`](@ref) (encodes every *point* in each of the input variables `xᵢ`s individually)
+- [`CodifyVariables`](@ref) (encodes every `xᵢ` individually using a sliding window encoding).
+
+## Examples
+
+```julia
+# Discretizing some non-discrete data using a sliding-window encoding for each variable
+x, y = rand(100), rand(100)
+c = CodifyVariables(OrdinalPatterns(m = 4))
+counts(c, x, y)
+
+# Discretizing the data by binning each individual data point
+binning = RectangularBinning(3)
+encoding = RectangularBinEncoding(binning, [x; y]) # give input values to ensure binning covers all data
+c = CodifyPoints(encoding)
+counts(c, x, y)
+
+# Counts table for already discrete data
+n = 50 # all variables must have the same number of elements
+x = rand(["dog", "cat", "mouse"], n)
+y = rand(1:3, n)
+z = rand([(1, 2), (2, 1)], n)
+
+counts(UniqueElements(), x, y, z)
+```
+
+See also: [`CodifyPoints`](@ref), [`CodifyVariables`](@ref), [`UniqueElements`](@ref), [`OutcomeSpace`](@ref),
+[`probabilities`](@ref).
+"""
+function counts(o::UniqueElements, x::Vararg{VectorOrStateSpaceSet, N}) where N # this extends ComplexityMeasures.jl definition
+    # Get marginal probabilities and outcomes
+    L = length(x)
+    cts, lmaps, encoded_outcomes = counts_table(x...)
+    # lmaps[i]: a `Dict{outcome_type, Int}` containing the conversion between the
+    #   internally encoded outcomes for the `i`-th input, and the actual outcomes
+    #   for the `i`-th input.
+    actual_outcomes = map(i -> to_outcomes(lmaps[i], encoded_outcomes[i]), tuple(1:L...))
+    return Counts(cts, actual_outcomes)
+end
+
+function counts(x::Vararg{VectorOrStateSpaceSet, N}) where N
+    if N == 1
+        return ComplexityMeasures.counts(UniqueElements(), x)
+    else
+        return counts(UniqueElements(), x...)
+    end
+end
+
+function to_outcomes(lmap::Dict, encoded_outcomes::Vector{<:Integer})
+    # We want the encoded integers as keys and the actual outcomes as values.
+    lmap_swapped = Dict(values(lmap) .=> keys(lmap))
+    return [lmap_swapped[ωᵢ] for ωᵢ in encoded_outcomes]
+end
+
+function counts_table(x...)
+    Ls = length.(x);
+    if !allequal(Ls)
+        throw(ArgumentError("Input data must have equal lengths. Got lengths $Ls."))
+    end
+    L = first(Ls)
+
+    # Map the input data to integers. This ensures compatibility with *any* input type.
+    # Then, we can simply create a joint `StateSpaceSet{length(x), Int}` and use its elements
+    # as `CartesianIndex`es to update counts.
+    lvl = tolevels.(x)
+    levels = (first(l) for l in lvl) # TODO: construct SVector directly.
+    lmaps = [last(l) for l in lvl]
+
+    # Create the table with correct dimensions, assumming the outcome space is
+    # fully determined by the elements that are present in `x`.
+    table_dims = length.(unique_elements.(x));
+    cts = zeros(Int, table_dims)
+
+    # Each element in `X` isa `SVector{m, Int}`, so can be treated as a cartesian index.
+    X = StateSpaceSet(levels...)
+
+    # We sort, so that the positions in `cts` will correspond to the indices on
+    # each of the axes of `cts`. Note: these are not the *actual* outcomes, but the
+    # internal integer representation of each outcome. We need to use `lmaps` to convert
+    # back in the higher-level function.
+    for ix in X
+        cts[ix...] += 1
+    end
+
+    # One set of outcomes per input
+    outcomes = sort!.(unique!.(columns(X)))
+    return cts, lmaps, outcomes
+end
+
+function to_cartesian(x)
+    (CartesianIndex.(xᵢ...) for xᵢ in x)
+end
+
+"""
+    tolevels!(levels, x) → levels, dict
+    tolevels(x) → levels, dict
+
+Apply the bijective map ``f : \\mathcal{Q} \\to \\mathbb{N}^+`` to each `x[i]` and store
+the result in `levels[i]`, where `levels` is a pre-allocated integer vector such that
+`length(x) == length(levels)`.
+
+``\\mathcal{Q}`` can be any space, and each ``q \\in \\mathcal{Q}`` is mapped to a unique
+integer  in the range `1, 2, …, length(unique(x))`. This is useful for integer-encoding
+categorical data such as strings, or other complex discrete data structures.
+
+The single-argument method allocated a `levels` vector internally.
+
+`dict` gives the inverse mapping.
+"""
+function tolevels!(levels, x)
+    @assert length(levels) == length(x)
+    lmap = _levelsmap(x)
+    for i in eachindex(x)
+        levels[i] = lmap[x[i]]
+    end
+    return levels, lmap
+end
+
+function tolevels(x)
+    lmap = _levelsmap(x)
+    levels = zeros(Int, length(x))
+    for i in eachindex(x)
+        levels[i] = lmap[x[i]]
+    end
+    return levels, lmap
+end
+
+# Ugly hack, because levelsmap doesn't work out-of-the-box for statespacesets.
+_levelsmap(x) = levelsmap(x)
+_levelsmap(x::AbstractStateSpaceSet) = levelsmap(x.data)
+
+# So that we can mix discrete-valued state space sets with discrete-valued regular
+# vectors.
+unique_elements(x) = unique(x)
+unique_elements(x::AbstractStateSpaceSet) = unique(x.data)
+
+function marginal(c::Counts; dims = 1:ndims(c))
+    alldims = 1:ndims(c)
+    reduce_dims = (setdiff(alldims, dims)...,)
+    marginal = dropdims(sum(c.cts, dims = reduce_dims), dims = reduce_dims)
+    include_idxs = setdiff(alldims, reduce_dims)
+    new_outcomes = c.outcomes[include_idxs]
+    new_dimlabels = c.dimlabels[include_idxs]
+    return Counts(marginal, new_outcomes, new_dimlabels)
+end
+
+# ----------------------------------------------------------------
+# Estimation from data
+# ----------------------------------------------------------------
+
+# Per point/row
+# ----------------------------------------------------------------
+# If multiple encodings are given, the number of encodings must match the number of
+# input variables.
+function counts(encoding::CodifyPoints{N}, x::Vararg{Any, N}) where {N}
+    x̂ = codify(encoding, x...)
+    return counts(UniqueElements(), x̂...)
+end
+
+# If only one encoding is given, apply same encoding to all points
+function counts(encoding::CodifyPoints{1}, x::Vararg{Any, N}) where {Any, N}
+    e = first(encoding.encodings)
+    x̂ = ([encode(e, pt) for pt in xₖ] for xₖ in x)
+    return counts(UniqueElements(), x̂...)
+end
+
+# Per variable/column
+# ----------------------------------------------------------------
+function counts(discretization::CodifyVariables{1}, x::Vararg{ArrayOrStateSpaceSet, N}) where N
+    o = first(discretization.outcome_spaces)
+    # Treat 1D state space sets as vectors, so we can apply the outcome space sequentially.
+    # TODO: show warning or not? I think this can be silent, because I can't really think of a situation
+    # where the outcome space couldn't be applied to the raw values of a 1D dataset.
+    # @warn "`CodifyVariables` is meant for sequential application over vectors. You provided a 1D `StateSpaceSet`. Treating this 1D input dataset as a vector..."
+    x̂ = (codify(o, xₖ isa AbstractStateSpaceSet{1} ? as_vec(xₖ) : xₖ) for xₖ in x)
+    return counts(x̂...)
+end
+
+as_vec(x::AbstractStateSpaceSet{1}) = [first(xᵢ) for xᵢ in vec(x)]
\ No newline at end of file
diff --git a/src/methods/information/counts_and_probs/counts_and_probs.jl b/src/methods/information/counts_and_probs/counts_and_probs.jl
new file mode 100644
index 000000000..08bee174b
--- /dev/null
+++ b/src/methods/information/counts_and_probs/counts_and_probs.jl
@@ -0,0 +1,23 @@
+export Discretization
+
+# The type parameter `N` indicates the number of input datasets to be discretized.
+"""
+    Discretization
+
+The supertype of all discretization schemes.
+
+## Concrete implementations
+
+- [`CodifyVariables`](@ref)
+- [`CodifyPoints`](@ref)
+"""
+abstract type Discretization{N} end
+
+
+# Concrete ways of encoding multivariate data, each defined as a type.
+include("encoding/codify_points.jl")
+include("encoding/codify_variables.jl")
+
+# Counting and probabilities (contingency tables and probabilities for multivariate data)
+include("counts.jl")
+include("probabilities.jl")
diff --git a/src/methods/information/counts_and_probs/encoding/codify_points.jl b/src/methods/information/counts_and_probs/encoding/codify_points.jl
new file mode 100644
index 000000000..6aa27766f
--- /dev/null
+++ b/src/methods/information/counts_and_probs/encoding/codify_points.jl
@@ -0,0 +1,180 @@
+import ComplexityMeasures: codify
+import ComplexityMeasures: counts
+using ComplexityMeasures: Encoding
+
+export CodifyPoints
+export codify
+
+"""
+    CodifyPoints{N}
+    CodifyPoints(encodings::NTuple{N, Encoding})
+
+`CodifyPoints` points is a [`Discretization`](@ref) scheme that encodes input data points
+*without* applying any sequential transformation to the input (as opposed to 
+[`CodifyVariables`](@ref), which may apply some transformation before encoding).
+
+## Usage
+
+- Use with [`codify`](@ref)` to encode/discretize input variable on a point-by-point basis.
+
+## Compatible encodings
+
+- [`GaussianCDFEncoding`](@ref)
+- [`OrdinalPatternEncoding`](@ref)
+- [`RelativeMeanEncoding`](@ref)
+- [`RelativeFirstDifferenceEncoding`](@ref)
+- [`UniqueElementsEncoding`](@ref)
+- [`RectangularBinEncoding`](@ref)
+- [`CombinationEncoding`](@ref)
+
+## Description
+
+Given `x::AbstractStateSpaceSet...`, where the `i`-th dataset is assumed to represent
+a single series of measurements, `CodifyPoints` encodes each point `pₖ ∈ x[i]` 
+using some [`Encoding`](@ref)(s), *without* applying any (sequential) transformation to
+the `x[i]` first. This behaviour is different to [`CodifyVariables`](@ref), which
+*does* apply a transformation to `x[i]` before encoding.
+
+If `length(x) == N` (i.e. there are `N` input dataset), then `encodings` must be a tuple
+of `N` [`Encoding`](@ref). Alternatively, if `encodings` is a single [`Encoding`](@ref),
+then that same encoding is applied to every `x[i]`.
+
+## Examples
+
+```julia
+using CausalityTools
+
+# The same encoding on two input datasets
+x = StateSpaceSet(rand(100, 3))
+y = StateSpaceSet(rand(100, 3))
+encoding_ord = OrdinalPatternEncoding(3)
+cx, cy = codify(CodifyPoints(encoding_ord), x, y)
+
+# Different encodings on multiple datasets
+z = StateSpaceSet(rand(100, 2))
+encoding_bin = RectangularBinEncoding(RectangularBinning(3), z)
+d = CodifyPoints(encoding_ord, encoding_ord, encoding_bin)
+cx, cy, cz = codify(d, x, y, z)
+```
+"""
+struct CodifyPoints{N} <: Discretization{N}
+    encodings::NTuple{N, Encoding}
+    function CodifyPoints(encodings::NTuple{N, Encoding}) where N
+        if !(N ≥ 1)
+            throw(ArgumentError("CodifyPoints requires at least 1 dimensions"))
+        end
+        new{N}(encodings)
+    end
+end
+Base.getindex(e::CodifyPoints, i) = getindex(e.encodings, i)
+
+function CodifyPoints(encodings::Vararg{Encoding, N}) where N
+    return CodifyPoints(tuple(encodings...))
+end
+
+"""
+    codify(encoding::CodifyPoints{N}, x::Vararg{<:AbstractStateSpaceSet, N})
+
+Codify each timeseries `xᵢ ∈ x` according to the given `encoding`.
+
+## Examples
+
+```julia
+x = StateSpaceSet(rand(10000, 2))
+y = StateSpaceSet(rand(10000, 3))
+z = StateSpaceSet(rand(10000, 2))
+
+# For `x`, we use a relative mean encoding.
+ex = RelativeMeanEncoding(0.0, 1.0, n = 3)
+# For `y`, we use a combination encoding.
+ey = CombinationEncoding(
+    RelativeMeanEncoding(0.0, 1.0, n = 2), 
+    OrdinalPatternEncoding(3)
+)
+# For `z`, we use ordinal patterns to encode.
+ez = OrdinalPatternEncoding(2)
+
+# Codify two input datasets gives a 2-tuple of Vector{Int}
+codify(CodifyPoints(ex, ey), x, y)
+
+# Codify three input datasets gives a 3-tuple of Vector{Int}
+codify(CodifyPoints(ex, ey, ez), x, y, z)
+```
+"""
+function codify(encoding::CodifyPoints, x) end
+
+function codify(encoding::CodifyPoints{1}, x::Vararg{Any, 1})
+    e = first(encoding.encodings)
+    x̂ = codify_individual_dataset(e, first(x))
+    return x̂::Vector{<:Integer}
+end
+
+# Apply the same encoding to all input datasets.
+function codify(encoding::CodifyPoints{1}, x::Vararg{Any, M}) where {M}
+    verify_input(encoding, x...)
+    e = first(encoding.encodings)
+    x̂ = map(k -> codify_individual_dataset(e, x[k]), tuple(1:M...))
+
+    return x̂::NTuple{M, Vector{<:Integer}}
+end
+
+
+function codify(encoding::CodifyPoints{N}, x::Vararg{Any, M}) where {N, M}
+    verify_input(encoding, x...)
+    x̂ = map(k -> codify_individual_dataset(encoding[k], x[k]), tuple(1:M...))
+
+    return x̂::NTuple{M, Vector{<:Integer}}
+end
+
+function verify_input(encoding::CodifyPoints{N}, x...) where N
+    M = length(x)
+    if N != M && N != 1
+        s = "The given `encoding` is for $N input datasets. $M input datasets were given."
+        throw(ArgumentError(s))
+    end
+    Ls = length.(x)
+    if !allequal(Ls)
+        throw(ArgumentError("All input datasets must have the same length."))
+    end
+end
+
+function codify_individual_dataset(encoding::Encoding, x)
+    if !(typeof(x) <: AbstractStateSpaceSet)
+        encoding = UniqueElementsEncoding(x)
+        x̂ = encode.(Ref(encoding), x)
+        return x̂
+    end
+
+    # x̂[i] := the integer code for the state vector `x[i]`.
+    x̂ = zeros(Int, length(x))
+    @inbounds for i in eachindex(x)
+        x̂[i] = encode(encoding, x[i])
+    end
+    return x̂
+end
+
+ # The decoding step on the second-to-last line is not possible without actually providing
+ # the encodings. Therefore, we need to override the Generic implementation of
+ # `counts`.
+function counts(encoding::CodifyPoints, x...)
+    # This converts each dataset `x[i]::StateSpaceSet` into `x̂[i]::Vector{Int}`,
+    # where `length(x[i]) == length(x̂[i])`.
+    x̂ = codify(encoding, x...)
+    # lmaps[i]: a `Dict{outcome_type, Int}` containing the conversion between the
+    #   internally encoded outcomes for the `i`-th input, and the actual outcomes
+    #   for the `i`-th input.
+    cts, lmaps, encoded_outcomes = counts_table(x̂...)
+
+    # Actual outcomes (these outcomes map correspond to those in `x̂`).
+    # We can't actually decode any further than this.
+    L = length(x)
+    outcomes = map(i -> to_outcomes(lmaps[i], encoded_outcomes[i]), tuple(1:L...))
+
+    # Marginal labels are the decoded outcomes.
+    decoded_outcomes = map(i -> decode_outcomes(encoding[i], outcomes[i]), tuple(1:L...))
+    return Counts(cts, decoded_outcomes)
+end
+
+function decode_outcomes(encoding::Encoding, outcomes::Vector{<:Integer})
+    return ComplexityMeasures.decode.(Ref(encoding), outcomes)
+end
diff --git a/src/methods/information/counts_and_probs/encoding/codify_variables.jl b/src/methods/information/counts_and_probs/encoding/codify_variables.jl
new file mode 100644
index 000000000..adeab3429
--- /dev/null
+++ b/src/methods/information/counts_and_probs/encoding/codify_variables.jl
@@ -0,0 +1,115 @@
+using ComplexityMeasures
+import ComplexityMeasures: codify
+import ComplexityMeasures: OutcomeSpace
+
+using DelayEmbeddings: embed
+export CodifyVariables
+export codify
+
+# TODO: implement this Generically for `Encodings` too (will require type-parameterized
+# number of elements for the Encodings).
+
+"""
+    CodifyVariables <: Discretization
+    CodifyVariables(outcome_space::OutcomeSpace)
+
+The `CodifyVariables` discretization scheme quantises input data in a column-wise manner
+using the given `outcome_space`.
+
+## Compatible outcome spaces
+
+- [`UniqueElements`](@ref) (for when data are pre-discretized)
+- [`BubbleSortSwaps`](@ref)
+- [`CosineSimilarityBinning`](@ref)
+- [`OrdinalPatterns`](@ref)
+- [`Dispersion`](@ref)
+
+# Description
+
+The main difference between `CodifyVariables` and [`CodifyPoints`] is that the former
+uses [`OutcomeSpace`](@ref)s for discretization. This usually means that some
+transformation is applied to the data before discretizing. For example, some outcome
+constructs a delay embedding from the input (and thus encodes sequential information)
+before encoding the data.
+
+Specifically, given `x::AbstractStateSpaceSet...`, where the `i`-th dataset `x[i]` 
+is assumed to represent a single series of measurements, `CodifyVariables` encodes
+ `x[i]` by [`codify`](@ref)-ing into a series of integers 
+using an appropriate  [`OutcomeSpace`](@ref). This is typically done by first 
+sequentially transforming the data and then running sliding window (the width of 
+the window is controlled by `outcome_space`) across the data, and then encoding the 
+values within each window to an integer.
+
+## Examples
+
+```julia
+using CausalityTools
+x, y = rand(100), rand(100)
+d = CodifyVariables(OrdinalPatterns(m=2))
+cx, cy = codify(d, x, y)
+```
+"""
+struct CodifyVariables{N, E} <: Discretization{N}
+    outcome_spaces::NTuple{N, OutcomeSpace}
+    function CodifyVariables(outcome_spaces::NTuple{N, OutcomeSpace}) where N
+        if N > 1
+            s = "It is currently only possible to use the same `OutcomeSpace` for all " *
+                "variables. Got $N different encodings"
+            throw(ArgumentError(s))
+        end
+        new{N, eltype(outcome_spaces)}(outcome_spaces)
+    end
+end
+
+function CodifyVariables(o::OutcomeSpace)
+    return CodifyVariables((o,))
+end
+
+"""
+    codify(d::CodifyVariables, x::Vararg{<:AbstractStateSpaceSet, N})
+    codify(d::CodifyPoints, x::Vararg{<:AbstractStateSpaceSet, N})
+
+Codify each timeseries `xᵢ ∈ x` according to the given encoding/discretization `d`.
+
+## Compatible discretizations
+
+- [`CodifyVariables`](@ref)
+- [`CodifyPoints`](@ref)
+
+## Examples
+
+```julia
+using CausalityTools
+
+# Sliding window encoding
+x = [0.1, 0.2, 0.3, 0.2, 0.1, 0.0, 0.5, 0.3, 0.5]
+xc1 = codify(CodifyVariables(OrdinalPatterns(m=2)), x) # should give [1, 1, 2, 2, 2, 1, 2, 1]
+xc2 = codify(OrdinalPatterns(m=2), x) # equivalent
+length(xc1) < length(x) # should be true, because `OrdinalPatterns` delay embeds.  
+
+# Point-by-point encoding
+x, y = StateSpaceSet(rand(100, 3)), StateSpaceSet(rand(100, 3))
+cx, cy = codify(CodifyPoints(OrdinalPatternEncoding(3)), x, y)
+```
+"""
+function codify(encoding::CodifyVariables, x) end
+
+function codify(encoding::CodifyVariables{1}, x::Vararg{Any, 1})
+    e = first(encoding.outcome_spaces)
+    x̂ = ComplexityMeasures.codify(e, first(x))
+    return x̂::Vector{<:Integer}
+end
+
+function codify(encoding::CodifyVariables{1}, x::NTuple{1})
+    return (codify(encoding, x...), )
+end
+
+function codify(encoding::CodifyVariables{1}, x::Vararg{Any, N}) where N
+    e = first(encoding.outcome_spaces)
+    x̂ = map(xᵢ -> ComplexityMeasures.codify(e, xᵢ), x)
+    return x̂::NTuple{N, Vector{<:Integer}}
+end
+
+function codify(encoding::CodifyVariables{1}, x::AbstractStateSpaceSet)
+    return codify(encoding, columns(x)...)
+end
diff --git a/src/methods/information/counts_and_probs/probabilities.jl b/src/methods/information/counts_and_probs/probabilities.jl
new file mode 100644
index 000000000..4fccc671a
--- /dev/null
+++ b/src/methods/information/counts_and_probs/probabilities.jl
@@ -0,0 +1,130 @@
+import ComplexityMeasures: probabilities
+export marginal
+
+# ##########################################################################################
+# Probabilities API.
+# The following code extends the functionality of ComplexityMeasures.jl for multiple
+# input variables (ComplexityMeasures.jl only deals with single-variable estimation)
+# ##########################################################################################
+
+"""
+    probabilities(o::UniqueElements, x₁, x₂, ..., xₙ) → Counts{N}
+    probabilities(encoding::CodifyPoints, x₁, x₂, ..., xₙ) → Counts{N}
+    probabilities(encoding::CodifyVariables, x₁, x₂, ..., xₙ) → Counts{N}
+
+Construct an `N`-dimensional [`Probabilities`](@ref) array from the input iterables
+`x₁, x₂, ..., xₙ` which are such that 
+`length(x₁) == length(x₂) == ⋯ == length(xₙ)`.
+
+## Description
+
+Probabilities are computed by first constructing a joint contingency matrix in the form 
+of a [`Counts`](@ref) instance. 
+
+If `x₁, x₂, ..., xₙ` are already discrete, then use [`UniqueElements`](@ref) as 
+the first argument to directly construct the joint contingency table.
+
+If `x₁, x₂, ..., xₙ` need to be discretized, provide as the first argument
+- [`CodifyPoints`](@ref) (encodes every *point* in each of the input variables `xᵢ`s individually)
+- [`CodifyVariables`](@ref) (encodes every `xᵢ` individually using a sliding window encoding).
+
+## Examples
+
+```julia
+# Discretizing some non-discrete data using a sliding-window encoding for each variable
+x, y = rand(100), rand(100)
+c = CodifyVariables(OrdinalPatterns(m = 4))
+probabilities(c, x, y)
+
+# Discretizing the data by binning each individual data point
+binning = RectangularBinning(3)
+encoding = RectangularBinEncoding(binning, [x; y]) # give input values to ensure binning covers all data
+c = CodifyPoints(encoding)
+probabilities(c, x, y)
+
+# Joint probabilities for already discretized data
+n = 50 # all variables must have the same number of elements
+x = rand(["dog", "cat", "mouse"], n)
+y = rand(1:3, n)
+z = rand([(1, 2), (2, 1)], n)
+
+probabilities(UniqueElements(), x, y, z)
+```
+
+See also: [`CodifyPoints`](@ref), [`CodifyVariables`](@ref), [`UniqueElements`](@ref), [`OutcomeSpace`](@ref).
+"""
+function probabilities(o::OutcomeSpace) end
+
+function probabilities(o::OutcomeSpace, x::Vararg{VectorOrStateSpaceSet, N}) where N # this extends ComplexityMeasures.jl definition
+    return Probabilities(counts(o, x...))
+end
+function probabilities(est::RelativeAmount, c::Counts{<:Integer, N}) where N
+    probs = Probabilities(c)
+    return Probabilities(probs.p, c.outcomes, c.dimlabels)
+end
+
+function probabilities(est::ProbabilitiesEstimator, c::Counts{<:Integer, N}) where N
+    return Probabilities(probs.p, c.outcomes, c.dimlabels)
+end
+
+# Not providing any discretization defaults to `RelativeAmount` estimation.
+function probabilities(x::Vararg{VectorOrStateSpaceSet, N}) where N
+    cts = counts(UniqueElements(), x...)
+    probs = probabilities(RelativeAmount(), cts)
+    return Probabilities(probs.p, cts.outcomes, cts.dimlabels)
+end
+
+"""
+    marginal(p::Probabilities; dims = 1:ndims(p))
+    marginal(c::Counts; dims = 1:ndims(p))
+
+Given a set of counts `c` (a contingency table), or a multivariate probability mass
+function `p`, return the marginal counts/probabilities along the given `dims`.
+"""
+function marginal(p::Probabilities; dims = 1:ndims(p))
+    alldims = 1:ndims(p)
+    reduce_dims = (setdiff(alldims, dims)...,)
+    # if all(a == b for (a, b) in zip(reduce_dims, alldims))
+    #     @show "not taking marginal for $dims and $p"
+    #     return p
+    # end
+    marg = dropdims(sum(p.p, dims = reduce_dims), dims = reduce_dims)
+    include_idxs = setdiff(alldims, reduce_dims)
+    N = length(include_idxs)
+    if N > 0
+        new_outcomes = p.outcomes[include_idxs]
+        new_dimlabels = p.dimlabels[include_idxs]
+
+        if marg isa Number
+            marg = [marg]
+        end
+        return Probabilities(marg, new_outcomes, new_dimlabels)
+    end
+    return Probabilities(marg)
+   
+end
+
+
+
+# ----------------------------------------------------------------
+# Estimation from data
+# ----------------------------------------------------------------
+
+# Per point/row
+# ----------------------------------------------------------------
+function probabilities(encoding::CodifyPoints{1}, x::Vararg{Any, N}) where {N}
+    cts = counts(encoding, x...)
+    return Probabilities(cts)
+end
+
+function probabilities(encoding::CodifyPoints{N}, x::Vararg{Any, N}) where {N}
+    cts = counts(encoding, x...)
+    return Probabilities(cts)
+end
+
+# Per variable/column
+# ----------------------------------------------------------------
+function probabilities(discretization::CodifyVariables{1}, x::Vararg{ArrayOrStateSpaceSet, N}) where N
+    cts = counts(discretization, x...)
+    return probabilities(RelativeAmount(), cts)
+end
\ No newline at end of file
diff --git a/src/methods/information/definitions/conditional_entropies/ConditionalEntropyShannon.jl b/src/methods/information/definitions/conditional_entropies/ConditionalEntropyShannon.jl
new file mode 100644
index 000000000..c531ddb7b
--- /dev/null
+++ b/src/methods/information/definitions/conditional_entropies/ConditionalEntropyShannon.jl
@@ -0,0 +1,100 @@
+using ComplexityMeasures: Shannon
+import ComplexityMeasures: log_with_base
+
+export ConditionalEntropyShannon
+
+"""
+    ConditionalEntropyShannon <: ConditionalEntropy
+    ConditionalEntropyShannon(; base = 2)
+
+The [`Shannon`](@ref) conditional entropy measure.
+
+## Usage 
+
+- Use with [`association`](@ref) to compute the Shannon conditional entropy between 
+    two variables.
+
+## Compatible estimators
+
+- [`JointProbabilities`](@ref)
+
+## Discrete definition
+
+### Sum formulation
+
+The conditional entropy between discrete random variables
+``X`` and ``Y`` with finite ranges ``\\mathcal{X}`` and ``\\mathcal{Y}`` is defined as
+
+```math
+H^{S}(X | Y) = -\\sum_{x \\in \\mathcal{X}, y \\in \\mathcal{Y}} p(x, y) \\log(p(x | y)).
+```
+
+This is the definition used when calling [`entropy_conditional`](@ref) with a
+[`ContingencyMatrix`](@ref).
+
+### Two-entropies formulation
+
+Equivalently, the following differenConditionalEntropy of entropies hold
+
+```math
+H^S(X | Y) = H^S(X, Y) - H^S(Y),
+```
+
+where ``H^S(\\cdot)`` and ``H^S(\\cdot | \\cdot)`` are the [`Shannon`](@ref) entropy and
+Shannon joint entropy, respectively. This is the definition used when calling
+[`entropy_conditional`](@ref) with a [`ProbabilitiesEstimator`](@ref).
+
+## Differential definition
+
+The differential conditional Shannon entropy is analogously defined as
+
+```math
+H^S(X | Y) = h^S(X, Y) - h^S(Y),
+```
+
+where ``h^S(\\cdot)`` and ``h^S(\\cdot | \\cdot)`` are the [`Shannon`](@ref)
+differential entropy and Shannon joint differential entropy, respectively. This is the
+definition used when calling [`entropy_conditional`](@ref) with a
+[`DifferentialEntropyEstimator`](@ref).
+
+## Estimation
+
+- [Example 1](@ref example_ConditionalEntropyShannon_analytical): Analytical example from Cover & Thomas's book.
+- [Example 2](@ref example_ConditionalEntropyShannon_JointProbabilities_CodifyVariables_UniqueElements): 
+    [`JointProbabilities`](@ref) estimator with[`CodifyVariables`](@ref) discretization and 
+    [`UniqueElements`](@ref) outcome space on categorical data.
+- [Example 3](@ref example_ConditionalEntropyShannon_JointProbabilities_CodifyPoints_UniqueElementsEncoding): 
+    [`JointProbabilities`](@ref) estimator with [`CodifyPoints`](@ref) discretization and [`UniqueElementsEncoding`](@ref)
+    encoding of points on numerical data.
+"""
+Base.@kwdef struct ConditionalEntropyShannon{B} <: ConditionalEntropy
+    base::B = 2
+end
+
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(est::JointProbabilities{<:ConditionalEntropyShannon}, inputs...)
+    probs = probabilities(est.discretization, inputs...)
+    return association(est.definition, probs)
+end
+
+function association(definition::ConditionalEntropyShannon, pxy::Probabilities{T, 2}) where {T}
+    base = definition.base
+    Nx, Ny = size(pxy)
+    py = marginal(pxy, dims = 2)
+
+    ce = 0.0
+    log0 = log_with_base(base)
+    for j in 1:Ny
+        pyⱼ = py[j]
+        for i in 1:Nx
+            pxyᵢⱼ = pxy[i, j]
+            if pxyᵢⱼ != 0.0
+                ce += pxyᵢⱼ * log0(pxyᵢⱼ / pyⱼ)
+            end
+        end
+    end
+    return -ce
+end
+
diff --git a/src/methods/information/definitions/conditional_entropies/ConditionalEntropyTsallisAbe.jl b/src/methods/information/definitions/conditional_entropies/ConditionalEntropyTsallisAbe.jl
new file mode 100644
index 000000000..5721d94f0
--- /dev/null
+++ b/src/methods/information/definitions/conditional_entropies/ConditionalEntropyTsallisAbe.jl
@@ -0,0 +1,72 @@
+using ComplexityMeasures: Tsallis
+
+export ConditionalEntropyTsallisAbe
+
+"""
+    ConditionalEntropyTsallisAbe <: ConditionalEntropy
+    ConditionalEntropyTsallisAbe(; base = 2, q = 1.5)
+
+[Abe2001](@citet)'s discrete Tsallis conditional entropy measure.
+
+## Usage 
+
+- Use with [`association`](@ref) to compute the Tsallis-Abe conditional entropy between 
+    two variables.
+
+## Compatible estimators
+
+- [`JointProbabilities`](@ref)
+
+## Definition
+
+Abe & Rajagopal's Tsallis conditional entropy between discrete random variables
+``X`` and ``Y`` with finite ranges ``\\mathcal{X}`` and ``\\mathcal{Y}`` is defined as
+
+```math
+H_q^{T_A}(X | Y) = \\dfrac{H_q^T(X, Y) - H_q^T(Y)}{1 + (1-q)H_q^T(Y)},
+```
+
+where ``H_q^T(\\cdot)`` and ``H_q^T(\\cdot, \\cdot)`` is the [`Tsallis`](@ref)
+entropy and the joint Tsallis entropy.
+
+## Estimation
+
+- [Example 1](@ref example_ConditionalEntropyTsallisAbe_JointProbabilities_CodifyVariables_UniqueElements): 
+    [`JointProbabilities`](@ref) estimator with[`CodifyVariables`](@ref) discretization and 
+    [`UniqueElements`](@ref) outcome space on categorical data.
+- [Example 2](@ref example_ConditionalEntropyTsallisAbe_JointProbabilities_CodifyPoints_UniqueElementsEncoding): 
+    [`JointProbabilities`](@ref) estimator with [`CodifyPoints`](@ref) discretization and [`UniqueElementsEncoding`](@ref)
+    encoding of points on numerical data.
+"""
+Base.@kwdef struct ConditionalEntropyTsallisAbe{B, Q} <: ConditionalEntropy
+    base::B = 2
+    q::Q = 1.5
+end
+
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(est::JointProbabilities{<:ConditionalEntropyTsallisAbe}, inputs...)
+    probs = probabilities(est.discretization, inputs...)
+    return association(est.definition, probs)
+end
+
+function association(definition::ConditionalEntropyTsallisAbe, pxy::Probabilities{T, 2}) where {T}
+    (; base, q) = definition
+
+    if q == 1 # if shannon, normalize
+        return association(ConditionalEntropyShannon(; base), pxy)
+    end
+
+    py = marginal(pxy, dims = 2)
+    # Definition 7 in Abe & Rajagopal (2001)
+    hjoint = 1 / (1 - q) * (sum(pxy .^ 2) - 1)
+
+    # The marginal Tsallis entropy for the second variable
+    hy = information(Tsallis(; q, base), py)
+
+    # Equation 13 in Abe & Rajagopal (2001)
+    ce = (hjoint - hy) / (1 + (1 - q)*hy)
+
+    return ce
+end
\ No newline at end of file
diff --git a/src/methods/information/definitions/conditional_entropies/ConditionalEntropyTsallisFuruichi.jl b/src/methods/information/definitions/conditional_entropies/ConditionalEntropyTsallisFuruichi.jl
new file mode 100644
index 000000000..ec91235d5
--- /dev/null
+++ b/src/methods/information/definitions/conditional_entropies/ConditionalEntropyTsallisFuruichi.jl
@@ -0,0 +1,89 @@
+export ConditionalEntropyTsallisFuruichi
+
+"""
+    ConditionalEntropyTsallisFuruichi <: ConditionalEntropy
+    ConditionalEntropyTsallisFuruichi(; base = 2, q = 1.5)
+
+Furuichi (2006)'s discrete Tsallis conditional entropy definition.
+
+## Usage 
+
+- Use with [`association`](@ref) to compute the Tsallis-Furuichi conditional entropy between 
+    two variables.
+
+## Compatible estimators
+
+- [`JointProbabilities`](@ref)
+
+## Definition
+
+Furuichi's Tsallis conditional entropy between discrete random variables
+``X`` and ``Y`` with finite ranges ``\\mathcal{X}`` and ``\\mathcal{Y}`` is defined as
+
+```math
+H_q^T(X | Y) = -\\sum_{x \\in \\mathcal{X}, y \\in \\mathcal{Y}}
+p(x, y)^q \\log_q(p(x | y)),
+```
+
+``\\ln_q(x) = \\frac{x^{1-q} - 1}{1 - q}`` and ``q \\neq 1``. For ``q = 1``, ``H_q^T(X | Y)`` reduces to the Shannon conditional
+entropy:
+
+```math
+H_{q=1}^T(X | Y) = -\\sum_{x \\in \\mathcal{X}, y \\in \\mathcal{Y}} =
+p(x, y) \\log(p(x | y))
+```
+
+If any of the entries of the marginal distribution for `Y` are zero, or the q-logarithm 
+is undefined for a particular value, then the measure is undefined and `NaN` is returned.
+
+## Estimation
+
+- [Example 1](@ref example_ConditionalEntropyTsallisFuruichi_JointProbabilities_CodifyVariables_UniqueElements): 
+    [`JointProbabilities`](@ref) estimator with[`CodifyVariables`](@ref) discretization and 
+    [`UniqueElements`](@ref) outcome space on categorical data.
+- [Example 2](@ref example_ConditionalEntropyTsallisFuruichi_JointProbabilities_CodifyPoints_UniqueElementsEncoding): 
+    [`JointProbabilities`](@ref) estimator with [`CodifyPoints`](@ref) discretization and [`UniqueElementsEncoding`](@ref)
+    encoding of points on numerical data.
+"""
+Base.@kwdef struct ConditionalEntropyTsallisFuruichi{B, Q} <: ConditionalEntropy
+    base::B = 2
+    q::Q = 1.5
+end
+
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(est::JointProbabilities{<:ConditionalEntropyTsallisFuruichi}, inputs...)
+    probs = probabilities(est.discretization, inputs...)
+    return association(est.definition, probs)
+end
+
+function association(definition::ConditionalEntropyTsallisFuruichi, pxy::Probabilities{T, 2}) where {T}
+    (; base, q) = definition
+    Nx, Ny = size(pxy)
+    if q == 1
+        return association(ConditionalEntropyShannon(; base), pxy)
+    end
+    py = marginal(pxy, dims = 2)
+    ce = 0.0
+    qlog = logq0(q)
+    for j in 1:Ny
+        pyⱼ = py[j]
+        for i in 1:Nx
+            pxyᵢⱼ = pxy[i, j]
+            ce += pxyᵢⱼ^q * qlog(pxyᵢⱼ / pyⱼ)
+        end
+    end
+    ce *= -1.0
+    return ce
+end
+
+
+function logq0(q)
+    if q == 1.0
+        return x -> zero(x)
+    else
+        return x -> (x^(1 - q) - 1)/(1 - q)
+    end
+end
+
diff --git a/src/methods/information/definitions/conditional_entropies/conditional_entropies.jl b/src/methods/information/definitions/conditional_entropies/conditional_entropies.jl
new file mode 100644
index 000000000..2ba4c0b9f
--- /dev/null
+++ b/src/methods/information/definitions/conditional_entropies/conditional_entropies.jl
@@ -0,0 +1,21 @@
+export ConditionalEntropy
+
+"""
+    ConditionalEntropy <: MultivariateInformationMeasure
+
+The supertype for all conditional entropy measures.
+
+## Concrete subtypes
+
+- [`ConditionalEntropyShannon`](@ref)
+- [`ConditionalEntropyTsallisAbe`](@ref)
+- [`ConditionalEntropyTsallisFuruichi`](@ref)
+"""
+abstract type ConditionalEntropy <: MultivariateInformationMeasure end
+
+min_inputs_vars(::ConditionalEntropy) = 2
+max_inputs_vars(::ConditionalEntropy) = 2
+
+include("ConditionalEntropyShannon.jl")
+include("ConditionalEntropyTsallisAbe.jl")
+include("ConditionalEntropyTsallisFuruichi.jl")
\ No newline at end of file
diff --git a/src/methods/information/definitions/conditional_mutual_informations/CMIRenyiJizba.jl b/src/methods/information/definitions/conditional_mutual_informations/CMIRenyiJizba.jl
new file mode 100644
index 000000000..5d63a3c55
--- /dev/null
+++ b/src/methods/information/definitions/conditional_mutual_informations/CMIRenyiJizba.jl
@@ -0,0 +1,100 @@
+using ComplexityMeasures: Renyi
+
+export CMIRenyiJizba
+
+"""
+    CMIRenyiJizba <: ConditionalMutualInformation
+    CMIRenyiJizba(; base = 2, q = 1.5)
+
+The Rényi conditional mutual information ``I_q^{R_{J}}(X; Y | Z)`` defined in
+[Jizba2012](@citet).
+
+## Usage
+
+- Use with [`association`](@ref) to compute the raw  Rényi-Jizba conditional mutual information
+    using of of the estimators listed below.
+- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise conditional 
+    independence using the Rényi-Jizba conditional mutual information.
+
+## Compatible estimators
+
+- [`JointProbabilities`](@ref)
+- [`EntropyDecomposition`](@ref)
+
+## Definition
+
+```math
+I_q^{R_{J}}(X; Y | Z) = I_q^{R_{J}}(X; Y, Z) - I_q^{R_{J}}(X; Z),
+```
+
+where ``I_q^{R_{J}}(X; Z)`` is the [`MIRenyiJizba`](@ref) mutual information.
+
+## Estimation
+
+- [Example 1](@ref example_CMIRenyiJizba_JointProbabilities_BubbleSortSwaps): 
+    [`JointProbabilities`](@ref) with [`BubbleSortSwaps`](@ref) outcome space.
+- [Example 2](@ref example_CMIRenyiJizba_EntropyDecomposition_OrdinalPatterns): 
+    [`EntropyDecomposition`](@ref) with [`OrdinalPatterns`](@ref) outcome space.
+- [Example 3](@ref example_CMIRenyiJizba_EntropyDecomposition_LeonenkoProzantoSavani): 
+    [`EntropyDecomposition`](@ref) with differential entropy estimator [`LeonenkoProzantoSavani`](@ref).
+"""
+Base.@kwdef struct CMIRenyiJizba{B, Q} <: ConditionalMutualInformation
+    base::B = 2
+    q::Q = 1.5
+end
+
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(est::JointProbabilities{<:CMIRenyiJizba}, x, y, z)
+    pxyz = probabilities(est.discretization, x, y, z)
+    pxz = marginal(pxyz, dims = [1,3])
+    pyz = marginal(pxyz, dims = [2,3])
+    pz = marginal(pxyz, dims = 3)
+    infodef = Renyi(q = est.definition.q, base = est.definition.base)
+    HXYZ = information(infodef, pxyz)
+    HXZ = information(infodef, pxz)
+    HYZ = information(infodef, pyz)
+    HZ = information(infodef, pz)
+    return HXZ + HYZ - HXYZ - HZ
+end
+
+function association(est::EntropyDecomposition{<:CMIRenyiJizba, <:DifferentialInfoEstimator{<:Renyi}}, x, y, z)
+    HXZ, HYZ, HXYZ, HZ = marginal_entropies_cmi4h_differential(est, x, y, z)
+    cmi = HXZ + HYZ - HXYZ - HZ
+    return cmi
+end
+
+function association(est::EntropyDecomposition{<:CMIRenyiJizba, <:DiscreteInfoEstimator{<:Renyi}}, x, y, z)
+    HXZ, HYZ, HXYZ, HZ = marginal_entropies_cmi4h_discrete(est, x, y, z)
+    cmi = HXZ + HYZ - HXYZ - HZ
+    return cmi
+end
+
+# ------------------------------------------------
+# Pretty printing for decomposition estimators.
+# ------------------------------------------------
+function decomposition_string(
+        definition::CMIRenyiJizba, 
+        est::EntropyDecomposition{<:CMIRenyiJizba, <:DiscreteInfoEstimator{<:Renyi}}
+    ) 
+    return "Iᵣⱼ(X, Y | Z) = Hᵣ(X,Z) + Hᵣ(Y,Z) - Hᵣ(X,Y,Z) - Hᵣ(Z)";
+end
+
+function decomposition_string(
+    definition::CMIRenyiJizba, 
+    est::EntropyDecomposition{<:CMIRenyiJizba, <:DifferentialInfoEstimator{<:Renyi}}
+) 
+    return "Iᵣⱼ(X, Y | Z) = hᵣ(X,Z) + hᵣ(Y,Z) - hᵣ(X,Y,Z) - hᵣ(Z)";
+end
+
+# ---------------------------------
+# Avoid some common errors
+# ---------------------------------
+function verify_decomposition_entropy_type(definition::CMIRenyiJizba, est::INFO_ESTS)
+    if !(est.definition isa Renyi)
+        T = typeof(est.definition).name.name
+        msg = "Can't decompose CMIRenyiJizba into a combination of $T entropies. Please provide a `Renyi` entropy estimator instead."
+        throw(ArgumentError(msg))
+    end
+end
diff --git a/src/methods/information/definitions/conditional_mutual_informations/CMIRenyiPoczos.jl b/src/methods/information/definitions/conditional_mutual_informations/CMIRenyiPoczos.jl
new file mode 100644
index 000000000..b771a0b27
--- /dev/null
+++ b/src/methods/information/definitions/conditional_mutual_informations/CMIRenyiPoczos.jl
@@ -0,0 +1,41 @@
+using ComplexityMeasures: Renyi
+
+export CMIRenyiPoczos
+
+"""
+    CMIRenyiPoczos <: ConditionalMutualInformation
+    CMIRenyiPoczos(; base = 2, q = 1.5)
+
+The differential Rényi conditional mutual information ``I_q^{R_{P}}(X; Y | Z)``
+defined in [Poczos2012](@citet).
+
+## Usage
+
+- Use with [`association`](@ref) to compute the raw Rényi-Poczos conditional mutual information
+    using of of the estimators listed below.
+- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise conditional 
+    independence using the Rényi-Poczos conditional mutual information.
+
+## Compatible estimators
+
+- [`PoczosSchneiderCMI`](@ref)
+
+## Definition
+
+```math
+\\begin{align*}
+I_q^{R_{P}}(X; Y | Z) &= \\dfrac{1}{q-1}
+\\int \\int \\int \\dfrac{p_Z(z) p_{X, Y | Z}^q}{( p_{X|Z}(x|z) p_{Y|Z}(y|z) )^{q-1}} \\\\
+&= \\mathbb{E}_{X, Y, Z} \\sim p_{X, Y, Z}
+\\left[ \\dfrac{p_{X, Z}^{1-q}(X, Z) p_{Y, Z}^{1-q}(Y, Z) }{p_{X, Y, Z}^{1-q}(X, Y, Z) p_Z^{1-q}(Z)} \\right]
+\\end{align*}
+```
+
+## Estimation
+
+- [Example 1](@ref @id CMIRenyiPoczos_PoczosSchneiderCMI): Dedicated [`PoczosSchneiderCMI`](@ref) estimator.
+"""
+Base.@kwdef struct CMIRenyiPoczos{B, Q} <: ConditionalMutualInformation
+    base::B = 2
+    q::Q = 1.5
+end
diff --git a/src/methods/infomeasures/condmutualinfo/CMIRenyiSarbu.jl b/src/methods/information/definitions/conditional_mutual_informations/CMIRenyiSarbu.jl
similarity index 50%
rename from src/methods/infomeasures/condmutualinfo/CMIRenyiSarbu.jl
rename to src/methods/information/definitions/conditional_mutual_informations/CMIRenyiSarbu.jl
index 60d31bf59..ab753d1fd 100644
--- a/src/methods/infomeasures/condmutualinfo/CMIRenyiSarbu.jl
+++ b/src/methods/information/definitions/conditional_mutual_informations/CMIRenyiSarbu.jl
@@ -1,15 +1,24 @@
+using ComplexityMeasures: Renyi
+import ComplexityMeasures: log_with_base
+
 export CMIRenyiSarbu
 
 """
     CMIRenyiSarbu <: ConditionalMutualInformation
-    CMIRenyiSarbu(; base = 2, definition = CMIRenyiSarbuSarbu())
+    CMIRenyiSarbu(; base = 2, q = 1.5)
 
 The Rényi conditional mutual information from [Sarbu2014](@citet).
 
 ## Usage
 
-- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise dependence.
-- Use with [`condmutualinfo`](@ref) to compute the raw conditional mutual information.
+- Use with [`association`](@ref) to compute the raw  Rényi-Sarbu conditional mutual information
+    using of of the estimators listed below.
+- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise conditional 
+    independence using the Rényi-Sarbu conditional mutual information.
+
+## Compatible estimators
+
+- [`JointProbabilities`](@ref)
 
 ## Discrete description
 
@@ -22,43 +31,34 @@ Rényi ``\\alpha``-divergence between the conditional joint probability mass fun
 I(X, Y; Z)^R_q =
 \\dfrac{1}{q-1} \\sum_{z \\in Z} p(Z = z)
 \\log \\left(
-    \\sum{x \\in X}\\sum{y \\in Y}
+    \\sum_{x \\in X}\\sum_{y \\in Y}
     \\dfrac{p(x, y|z)^q}{\\left( p(x|z)\\cdot p(y|z) \\right)^{q-1}}
 \\right)
 ```
-
-See also: [`condmutualinfo`](@ref).
 """
-struct CMIRenyiSarbu{E <: Renyi} <: ConditionalMutualInformation{E}
-    e::E
-    function CMIRenyiSarbu(; base = 2, q = 1.5)
-        e = Renyi(; base, q)
-        new{typeof(e)}(e)
-    end
+Base.@kwdef struct CMIRenyiSarbu{B, Q} <: ConditionalMutualInformation
+    base::B = 2
+    q::Q = 1.5
 end
 
-min_inputs_vars(::CMIRenyiSarbu) = 3
-max_inputs_vars(::CMIRenyiSarbu) = 3
-
-function estimate(measure::CMIRenyiSarbu, est::Contingency{<:ProbabilitiesEstimator}, x, y, z)
-    return estimate(measure, contingency_matrix(est.est, x, y, z))
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(est::JointProbabilities{<:CMIRenyiSarbu}, x, y, z)
+    probs = probabilities(est.discretization, x, y, z)
+    return association(est.definition, probs)
 end
 
-function estimate(measure::CMIRenyiSarbu, est::Contingency{<:Nothing}, x, y, z)
-    return estimate(measure, contingency_matrix(x, y, z))
-end
+function association(definition::CMIRenyiSarbu, pxyz::Probabilities{T, 3}) where T
+    (; base, q) = definition
 
-function estimate(
-        measure::CMIRenyiSarbu,
-        pxyz::ContingencyMatrix{T, 3}) where T
-    e = measure.e
-    q = e.q
     dx, dy, dz = size(pxyz)
-    pxz = probabilities(pxyz, dims = [1, 3])
-    pyz = probabilities(pxyz, dims = [2, 3])
-    pz = probabilities(pxyz, dims = 3)
+    pxz = marginal(pxyz, dims = [1, 3])
+    pyz = marginal(pxyz, dims = [2, 3])
+    pz = marginal(pxyz, dims = 3)
+
     cmi = 0.0
-    logb = log_with_base(e.base)
+    logb = log_with_base(base)
     for k in 1:dz
         pzₖ = pz[k]
         inner = 0.0
@@ -76,5 +76,9 @@ function estimate(
         end
         cmi += pzₖ * logb(inner)
     end
-    return 1 / (1 - q) * cmi
+    return 1 / (q - 1) * cmi
+end
+
+function association(est::EntropyDecomposition{<:CMIRenyiSarbu}, args...)
+    throw(ArgumentError("CMIRenyiSarbu not implemented for $(typeof(est).name.name)"))
 end
diff --git a/src/methods/information/definitions/conditional_mutual_informations/CMIShannon.jl b/src/methods/information/definitions/conditional_mutual_informations/CMIShannon.jl
new file mode 100644
index 000000000..7c3a31023
--- /dev/null
+++ b/src/methods/information/definitions/conditional_mutual_informations/CMIShannon.jl
@@ -0,0 +1,170 @@
+using ComplexityMeasures: Shannon
+import ComplexityMeasures: log_with_base
+
+export CMIShannon
+
+"""
+    CMIShannon <: ConditionalMutualInformation
+    CMIShannon(; base = 2)
+
+The Shannon conditional mutual information (CMI) ``I^S(X; Y | Z)``.
+
+## Usage
+
+- Use with [`association`](@ref) to compute the raw Shannon conditional mutual information
+    using of of the estimators listed below.
+- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise conditional 
+    independence using the Shannon conditional mutual information.
+
+## Compatible estimators
+
+- [`JointProbabilities`](@ref)
+- [`EntropyDecomposition`](@ref)
+- [`MIDecomposition`](@ref)
+- [`FPVP`](@ref)
+- [`MesnerShalizi`](@ref)
+- [`Rahimzamani`](@ref)
+- [`PoczosSchneiderCMI`](@ref)
+- [`GaussianCMI`](@ref)
+
+## Supported definitions
+
+Consider random variables ``X \\in \\mathbb{R}^{d_X}`` and
+``Y \\in \\mathbb{R}^{d_Y}``, given ``Z \\in \\mathbb{R}^{d_Z}``. The Shannon
+conditional mutual information is defined as
+
+```math
+\\begin{align*}
+I(X; Y | Z)
+&= H^S(X, Z) + H^S(Y, z) - H^S(X, Y, Z) - H^S(Z) \\\\
+&= I^S(X; Y, Z) + I^S(X; Y)
+\\end{align*},
+```
+
+where ``I^S(\\cdot; \\cdot)`` is the Shannon mutual information [`MIShannon`](@ref),
+and ``H^S(\\cdot)`` is the [`Shannon`](@ref) entropy.
+
+Differential Shannon CMI is obtained by replacing the entropies by
+differential entropies.
+
+## Estimation
+
+- [Example 1](@ref example_CMIShannon_EntropyDecomposition_Kraskov): 
+    [`EntropyDecomposition`](@ref) with [`Kraskov`](@ref) estimator.
+- [Example 2](@ref CMIShannon_EntropyDecomposition_ValueBinning):
+    [`EntropyDecomposition`](@ref) with [`ValueBinning`](@ref) estimator.
+- [Example 3](@ref example_CMIShannon_MIDecomposition_KSG1): 
+    [`MIDecomposition`](@ref) with [`KSG1`](@ref) estimator.
+"""
+Base.@kwdef struct CMIShannon{B} <: ConditionalMutualInformation
+    base::B = 2
+end
+
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(est::JointProbabilities{<:CMIShannon}, x, y, z)
+    probs = probabilities(est.discretization, x, y, z)
+    return association(est.definition, probs)
+end
+
+function association(definition::CMIShannon, pxyz::Probabilities{T, 3}) where T
+    dx, dy, dz = size(pxyz)
+    pxz = marginal(pxyz, dims = [1, 3])
+    pyz = marginal(pxyz, dims = [2, 3])
+    pz = marginal(pxyz, dims = 3)
+    cmi = 0.0
+    log0 = log_with_base(definition.base)
+    for k in 1:dz
+        pzₖ = pz[k]
+        for j in 1:dy
+            pyⱼzₖ = pyz[j, k]
+            pyⱼzₖ > 0 || continue # leads to NaN
+            for i in 1:dx
+                pxᵢzₖ = pxz[i, k]
+                pxᵢzₖ > 0 || continue # leads to NaN
+                pxᵢyⱼzₖ = pxyz[i, j, k]
+                inner = (pzₖ * pxᵢyⱼzₖ) / (pxᵢzₖ * pyⱼzₖ)
+                if inner != 0.0
+                    cmi += pxᵢyⱼzₖ * log0(inner)
+                end
+            end
+        end
+    end
+    return cmi
+end
+
+# ------------------------------------------------
+# Four-entropies decompostion of CMIShannon
+# ------------------------------------------------
+function association(est::EntropyDecomposition{<:CMIShannon, <:DifferentialInfoEstimator{<:Shannon}}, x, y, z)
+    HXZ, HYZ, HXYZ, HZ = marginal_entropies_cmi4h_differential(est, x, y, z)
+    cmi = HXZ + HYZ - HXYZ - HZ
+    return cmi
+end
+
+function association(est::EntropyDecomposition{<:CMIShannon, <:DiscreteInfoEstimator{<:Shannon}}, x, y, z)
+    HXZ, HYZ, HXYZ, HZ = marginal_entropies_cmi4h_discrete(est, x, y, z)
+    cmi = HXZ + HYZ - HXYZ - HZ
+    return cmi
+end
+
+# ---------------------------------------------------
+# Two-mutual-information decomposition of CMIShannon 
+# ---------------------------------------------------
+function association(est::MIDecomposition{<:ConditionalMutualInformation, <:MutualInformationEstimator{<:MIShannon}}, x, y, z)
+    MI_X_YZ, MI_X_Z = marginal_mutual_informations(est, x, y, z)
+    cmi = MI_X_YZ - MI_X_Z
+    return cmi
+end
+
+# We don't care if the estimated is mixed, discrete or handles both. The MI estimator 
+# handles that internally.
+function marginal_mutual_informations(est::MIDecomposition{<:ConditionalMutualInformation, <:MutualInformationEstimator{<:MIShannon}}, x, y, z)
+    X = StateSpaceSet(x)
+    Y = StateSpaceSet(y)
+    Z = StateSpaceSet(z)
+    YZ = StateSpaceSet(Y, Z)
+
+    modified_est = estimator_with_overridden_parameters(est.definition, est.est)
+    MI_X_YZ = association(modified_est, X, YZ)
+    MI_X_Z = association(modified_est, X, Z)
+
+    return MI_X_YZ, MI_X_Z
+end
+
+# ---------------------------------
+# Avoid some common errors
+# ---------------------------------
+function verify_decomposition_entropy_type(definition::CMIShannon, est::INFO_ESTS)
+    if !(est.definition isa Shannon)
+        T = typeof(est.definition).name.name
+        msg = "Can't decompose CMIShannon into a combination of $T entropies. Please provide a `Shannon` entropy estimator instead."
+        throw(ArgumentError(msg))
+    end
+end
+
+
+# ------------------------------------------------
+# Pretty printing for decomposition estimators.
+# ------------------------------------------------
+function decomposition_string(
+        definition::CMIShannon, 
+        est::EntropyDecomposition{<:CMIShannon, <:DiscreteInfoEstimator{<:Shannon}}
+    )
+    return "Iₛ(X, Y | Z) = Hₛ(X,Z) + Hₛ(Y,Z) - Hₛ(X,Y,Z) - Hₛ(Z)";
+end
+
+function decomposition_string(
+        definition::CMIShannon, 
+        est::EntropyDecomposition{<:CMIShannon, <:DifferentialInfoEstimator{<:Shannon}}
+    )
+    return "Iₛ(X, Y | Z) = hₛ(X,Z) + hₛ(Y,Z) - hₛ(X,Y,Z) - hₛ(Z)";
+end
+
+function decomposition_string(
+        definition::CMIShannon, 
+        est::MIDecomposition{<:CMIShannon, <:MutualInformationEstimator}
+    )
+    return "Iₛ(X, Y | Z) = Iₛ(X; Y, Z) + Iₛ(X; Z)"
+end
\ No newline at end of file
diff --git a/src/methods/information/definitions/conditional_mutual_informations/CMITsallisPapapetrou.jl b/src/methods/information/definitions/conditional_mutual_informations/CMITsallisPapapetrou.jl
new file mode 100644
index 000000000..59e31266f
--- /dev/null
+++ b/src/methods/information/definitions/conditional_mutual_informations/CMITsallisPapapetrou.jl
@@ -0,0 +1,67 @@
+using ComplexityMeasures: Tsallis
+import ComplexityMeasures: log_with_base
+export CMITsallisPapapetrou
+
+"""
+    CMITsallisPapapetrou <: ConditionalMutualInformation
+    CMITsallisPapapetrou(; base = 2, q = 1.5)
+
+The Tsallis-Papapetrou conditional mutual information [Papapetrou2020](@cite).
+
+## Usage
+
+- Use with [`association`](@ref) to compute the raw Tsallis-Papapetrou conditional mutual information
+    using of of the estimators listed below.
+- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise conditional 
+    independence using the Tsallis-Papapetrou conditional mutual information.
+
+## Compatible estimators
+
+- [`JointProbabilities`](@ref)
+
+## Definition
+
+Tsallis-Papapetrou conditional mutual information is defined as 
+
+```math
+I_T^q(X, Y \\mid Z) = \\frac{1}{1 - q} \\left( 1 - \\sum_{XYZ} \\frac{p(x, y, z)^q}{p(x \\mid z)^{q-1} p(y \\mid z)^{q-1} p(z)^{q-1}} \\right).
+```
+"""
+Base.@kwdef struct CMITsallisPapapetrou{B, Q} <: ConditionalMutualInformation
+    base::B = 2
+    q::Q = 1.5
+end
+
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(est::JointProbabilities{<:CMITsallisPapapetrou}, x, y, z)
+    probs = probabilities(est.discretization, x, y, z)
+    return association(est.definition, probs)
+end
+
+function association(definition::CMITsallisPapapetrou, pxyz::Probabilities{T, 3}) where T
+    (; base, q) = definition
+
+    dx, dy, dz = size(pxyz)
+    pxz = marginal(pxyz, dims = [1, 3])
+    pyz = marginal(pxyz, dims = [2, 3])
+    pz = marginal(pxyz, dims = 3)
+
+    cmi = 0.0
+    pq = q-1
+    for k in 1:dz
+        pzₖ = pz[k]
+        for j in 1:dy
+            pyⱼzₖ = pyz[j, k]
+            for i in 1:dx
+                pxᵢzₖ = pxz[i, k]
+                pxᵢyⱼzₖ = pxyz[i, j, k]
+                if pxᵢzₖ != 0.0 && pyⱼzₖ != 0.0 && pzₖ != 0.0
+                    cmi +=pxᵢyⱼzₖ / (pxᵢzₖ^pq * pyⱼzₖ^pq * pzₖ^pq)
+                end
+            end
+        end
+    end
+    return 1 / (1 - q) * (1 - cmi)
+end
diff --git a/src/methods/information/definitions/conditional_mutual_informations/conditional_mutual_informations.jl b/src/methods/information/definitions/conditional_mutual_informations/conditional_mutual_informations.jl
new file mode 100644
index 000000000..7b6e7f5fc
--- /dev/null
+++ b/src/methods/information/definitions/conditional_mutual_informations/conditional_mutual_informations.jl
@@ -0,0 +1,77 @@
+export ConditionalMutualInformation
+
+"""
+    CondiitionalMutualInformation
+
+Abstract type for all mutual information measures.
+
+## Concrete implementations
+
+- [`CMIShannon`](@ref)
+- [`CMITsallisPapapetrou`](@ref)
+- [`CMIRenyiJizba`](@ref)
+- [`CMIRenyiSarbu`](@ref)
+- [`CMIRenyiPoczos`](@ref)
+
+See also: [`ConditionalMutualInformationEstimator`](@ref)
+"""
+abstract type ConditionalMutualInformation <: MultivariateInformationMeasure end
+
+min_inputs_vars(::ConditionalMutualInformation) = 3
+max_inputs_vars(::ConditionalMutualInformation) = 3
+
+
+# --------------------------------------------------------------------------
+# Estimation methods (override if it doesn't apply for a particular measure)
+# --------------------------------------------------------------------------
+function association(est::CMIDecomposition{<:ConditionalMutualInformation}, x, y, z)
+    return association(est.est, x, y, z)
+end
+
+# --------------------------------------------------------------------------
+# Utils
+# --------------------------------------------------------------------------
+# Generic H4-formulation of CMI
+function marginal_entropies_cmi4h_differential(est::EntropyDecomposition{<:ConditionalMutualInformation, <:DifferentialInfoEstimator}, x, y, z)
+    Z = StateSpaceSet(z)
+    Y = StateSpaceSet(y)
+    X = StateSpaceSet(x)
+    XZ = StateSpaceSet(X, Z)
+    YZ = StateSpaceSet(Y, Z)
+    XYZ = StateSpaceSet(X, Y, Z)
+
+    modified_est = estimator_with_overridden_parameters(est.definition, est.est)
+    HXZ = entropy(modified_est, XZ)
+    HYZ = entropy(modified_est, YZ)
+    HXYZ = entropy(modified_est, XYZ)
+    HZ = entropy(modified_est, Z)
+
+    return HXZ, HYZ, HXYZ, HZ
+end
+
+function marginal_entropies_cmi4h_discrete(est::EntropyDecomposition{<:ConditionalMutualInformation, <:DiscreteInfoEstimator}, x, y, z)
+    # Encode marginals to integers based on the outcome space.
+    eX, eY, eZ = codified_marginals(est.discretization, x, y, z)
+    eXZ = StateSpaceSet(eX, eZ)
+    eYZ = StateSpaceSet(eY, eZ)
+    eXYZ = StateSpaceSet(eX, eY, eZ)
+ 
+    # The outcome space is no longer relevant from this point on. We're done discretizing, 
+    # so now we can just count (i.e. use `UniqueElements` as the outcome space).
+    o = UniqueElements()
+
+    modified_est = estimator_with_overridden_parameters(est.definition, est.est)
+    HXZ = information(modified_est, est.pest, o, eXZ)
+    HYZ = information(modified_est, est.pest, o, eYZ)
+    HXYZ = information(modified_est, est.pest, o, eXYZ)
+    HZ = information(modified_est, est.pest, o, eZ)
+
+    return HXZ, HYZ, HXYZ, HZ
+end
+
+
+include("CMIShannon.jl")
+include("CMITsallisPapapetrou.jl")
+include("CMIRenyiJizba.jl")
+include("CMIRenyiPoczos.jl")
+include("CMIRenyiSarbu.jl")
diff --git a/src/methods/information/definitions/divergences_and_distances/HellingerDistance.jl b/src/methods/information/definitions/divergences_and_distances/HellingerDistance.jl
new file mode 100644
index 000000000..dcc3eb185
--- /dev/null
+++ b/src/methods/information/definitions/divergences_and_distances/HellingerDistance.jl
@@ -0,0 +1,49 @@
+export HellingerDistance
+
+"""
+    HellingerDistance <: DivergenceOrDistance
+
+The Hellinger distance.
+
+## Usage 
+
+- Use with [`association`](@ref) to compute the compute the Hellinger distance between two pre-computed
+    probability distributions, or from raw data using one of the estimators listed below.
+
+## Compatible estimators
+
+- [`JointProbabilities`](@ref)
+
+## Description
+
+The Hellinger distance between two probability distributions
+``P_X = (p_x(\\omega_1), \\ldots, p_x(\\omega_n))`` and
+``P_Y = (p_y(\\omega_1), \\ldots, p_y(\\omega_m))``, both defined over the same
+[`OutcomeSpace`](@ref) ``\\Omega = \\{\\omega_1, \\ldots, \\omega_n \\}``, is
+[defined](https://en.wikipedia.org/wiki/Hellinger_distance) as
+
+```math
+D_{H}(P_Y(\\Omega) || P_Y(\\Omega)) =
+\\dfrac{1}{\\sqrt{2}} \\sum_{\\omega \\in \\Omega} (\\sqrt{p_x(\\omega)} - \\sqrt{p_y(\\omega)})^2
+```
+
+## Estimation
+
+- [Example 1](@ref example_HellingerDistance_precomputed_probabilities): From precomputed probabilities
+- [Example 2](@ref example_HellingerDistance_JointProbabilities_OrdinalPatterns): 
+    [`JointProbabilities`](@ref) with [`OrdinalPatterns`](@ref) outcome space
+"""
+struct HellingerDistance <: DivergenceOrDistance end
+
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(est::JointProbabilities{<:HellingerDistance}, x, y)
+    # Dispatch to generic method in `divergences_and_distances.jl` with 2D `Probabilities`
+    probs = probabilities(est.discretization, x, y)
+    return association(est.definition, probs)
+end
+
+function association(measure::HellingerDistance, px::Probabilities, py::Probabilities)
+    return 1/sqrt(2) * sum((sqrt(pxᵢ) - sqrt(pyᵢ))^2 for (pxᵢ, pyᵢ) in zip(px, py))
+end
diff --git a/src/methods/information/definitions/divergences_and_distances/KLDivergence.jl b/src/methods/information/definitions/divergences_and_distances/KLDivergence.jl
new file mode 100644
index 000000000..b3d365713
--- /dev/null
+++ b/src/methods/information/definitions/divergences_and_distances/KLDivergence.jl
@@ -0,0 +1,69 @@
+export KLDivergence
+
+"""
+    KLDivergence <: DivergenceOrDistance
+
+The Kullback-Leibler (KL) divergence.
+
+## Usage 
+
+- Use with [`association`](@ref) to compute the compute the KL-divergence between two 
+    pre-computed probability distributions, or from raw data using one of the estimators
+    listed below.
+
+## Compatible estimators
+
+- [`JointDistanceDistribution`](@ref)
+
+## Estimators 
+
+- [`JointProbabilities`](@ref).
+
+## Description
+
+The KL-divergence between two probability distributions
+``P_X = (p_x(\\omega_1), \\ldots, p_x(\\omega_n))`` and
+``P_Y = (p_y(\\omega_1), \\ldots, p_y(\\omega_m))``, both defined over the same
+[`OutcomeSpace`](@ref) ``\\Omega = \\{\\omega_1, \\ldots, \\omega_n \\}``, is defined as
+
+```math
+D_{KL}(P_Y(\\Omega) || P_Y(\\Omega)) =
+\\sum_{\\omega \\in \\Omega} p_x(\\omega) \\log\\dfrac{p_x(\\omega)}{p_y(\\omega)}
+```
+
+## Implements
+
+- [`association`](@ref). Used to compute the KL-divergence between two pre-computed
+    probability distributions. If used with [`RelativeAmount`](@ref), the KL divergence may
+    be undefined to due some outcomes having zero counts. Use some other
+    [`ProbabilitiesEstimator`](@ref) like [`BayesianRegularization`](@ref) to ensure
+    all estimated probabilities are nonzero.
+
+!!! note 
+    Distances.jl also defines `KLDivergence`. Quality it if you're loading both 
+    packages, i.e. do `association(CausalityTools.KLDivergence(), x, y)`.
+
+## Estimation
+
+- [Example 1](@ref example_KLDivergence_precomputed_probabilities): From precomputed probabilities
+- [Example 2](@ref example_KLDivergence_JointProbabilities_OrdinalPatterns): 
+    [`JointProbabilities`](@ref) with [`OrdinalPatterns`](@ref) outcome space
+"""
+struct KLDivergence{B} <: DivergenceOrDistance
+    base::B
+end
+KLDivergence(; base = 2) = KLDivergence(base)
+
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(est::JointProbabilities{<:KLDivergence}, x, y)
+    # Dispatch to generic method in `divergences_and_distances.jl` with 2D `Probabilities`
+    probs = probabilities(est.discretization, x, y)
+    return association(est.definition, probs)
+end
+
+function association(measure::KLDivergence, px::Probabilities, py::Probabilities)
+    size_match(measure, px, py)
+    return sum(pxᵢ * log(measure.base, pxᵢ / pyᵢ) for (pxᵢ, pyᵢ) in zip(px, py))
+end
diff --git a/src/methods/information/definitions/divergences_and_distances/RenyiDivergence.jl b/src/methods/information/definitions/divergences_and_distances/RenyiDivergence.jl
new file mode 100644
index 000000000..6b770df76
--- /dev/null
+++ b/src/methods/information/definitions/divergences_and_distances/RenyiDivergence.jl
@@ -0,0 +1,83 @@
+export RenyiDivergence
+
+"""
+    RenyiDivergence <: DivergenceOrDistance
+    RenyiDivergence(q; base = 2)
+
+The Rényi divergence of positive order `q`.
+
+## Usage 
+
+- Use with [`association`](@ref) to compute the compute the Rényi divergence between two 
+    pre-computed probability distributions, or from raw data using one of the estimators
+    listed below.
+
+## Compatible estimators
+
+- [`JointDistanceDistribution`](@ref)
+
+## Description
+
+The Rényi divergence between two probability distributions
+``P_X = (p_x(\\omega_1), \\ldots, p_x(\\omega_n))`` and
+``P_Y = (p_y(\\omega_1), \\ldots, p_y(\\omega_m))``, both defined over the same
+[`OutcomeSpace`](@ref) ``\\Omega = \\{\\omega_1, \\ldots, \\omega_n \\}``, is defined as
+[vanErven2014](@citet).
+
+```math
+D_{q}(P_Y(\\Omega) || P_Y(\\Omega)) =
+\\dfrac{1}{q - 1} \\log \\sum_{\\omega \\in \\Omega}p_x(\\omega)^{q}p_y(\\omega)^{1-\\alpha}
+```
+
+## Implements
+
+- [`information`](@ref). Used to compute the Rényi divergence between two pre-computed
+    probability distributions. If used with [`RelativeAmount`](@ref), the KL divergence may
+    be undefined to due some outcomes having zero counts. Use some other
+    [`ProbabilitiesEstimator`](@ref) like [`BayesianRegularization`](@ref) to ensure
+    all estimated probabilities are nonzero.
+
+!!! note 
+    Distances.jl also defines `RenyiDivergence`. Quality it if you're loading both 
+    packages, i.e. do `association(CausalityTools.RenyiDivergence(), x, y)`.
+
+
+## Estimation
+
+- [Example 1](@ref example_RenyiDivergence_precomputed_probabilities): From precomputed probabilities
+- [Example 2](@ref example_RenyiDivergence_JointProbabilities_OrdinalPatterns): 
+    [`JointProbabilities`](@ref) with [`OrdinalPatterns`](@ref) outcome space
+"""
+struct RenyiDivergence{Q, B} <: DivergenceOrDistance
+    q::Q
+    base::B
+    function RenyiDivergence(q::Q, base::B) where {Q, B}
+        q > 0 || throw(ArgumentError("`q` must be positive. Got $q"))
+        new{Q, B}(q, base)
+    end
+end
+RenyiDivergence(; q = 0.5, base = 2) = RenyiDivergence(q, base)
+
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(est::JointProbabilities{<:RenyiDivergence}, x, y)
+    # Dispatch to generic method in `divergences_and_distances.jl` with 2D `Probabilities`
+    probs = probabilities(est.discretization, x, y)
+    return association(est.definition, probs)
+end
+
+function association(definition::RenyiDivergence, px::Probabilities, py::Probabilities)
+    (; base, q) = definition
+
+    if q == Inf
+        return maximum(pxᵢ / pyᵢ for (pxᵢ, pyᵢ) in zip(px, py))
+    end
+    s = 0.0
+    for (pxᵢ, pyᵢ) in zip(px, py)
+        if pxᵢ != 0.0 && pyᵢ != 0.0
+            s += pxᵢ^q * pyᵢ^(1 - q)
+        end
+    end
+    return 1 / (q - 1) * log(base, s)
+end
diff --git a/src/methods/information/definitions/divergences_and_distances/VariationDistance.jl b/src/methods/information/definitions/divergences_and_distances/VariationDistance.jl
new file mode 100644
index 000000000..9dec8b311
--- /dev/null
+++ b/src/methods/information/definitions/divergences_and_distances/VariationDistance.jl
@@ -0,0 +1,50 @@
+export VariationDistance
+
+"""
+    VariationDistance <: DivergenceOrDistance
+
+The variation distance.
+
+## Usage 
+
+- Use with [`association`](@ref) to compute the compute the variation distance between two 
+    pre-computed probability distributions, or from raw data using one of the estimators
+    listed below.
+
+## Compatible estimators
+
+- [`JointDistanceDistribution`](@ref)
+
+## Description
+
+The variation distance between two probability distributions
+``P_X = (p_x(\\omega_1), \\ldots, p_x(\\omega_n))`` and
+``P_Y = (p_y(\\omega_1), \\ldots, p_y(\\omega_m))``, both defined over the same
+[`OutcomeSpace`](@ref) ``\\Omega = \\{\\omega_1, \\ldots, \\omega_n \\}``, is
+[defined](https://en.wikipedia.org/wiki/Variation_distance) as
+
+```math
+D_{V}(P_Y(\\Omega) || P_Y(\\Omega)) =
+\\dfrac{1}{2} \\sum_{\\omega \\in \\Omega} | p_x(\\omega) - p_y(\\omega) |
+```
+
+## Examples
+
+- [Example 1](@ref example_VariationDistance_precomputed_probabilities): From precomputed probabilities
+- [Example 2](@ref example_VariationDistance_JointProbabilities_OrdinalPatterns): 
+    [`JointProbabilities`](@ref) with [`OrdinalPatterns`](@ref) outcome space
+"""
+struct VariationDistance <: DivergenceOrDistance end
+
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(est::JointProbabilities{<:VariationDistance}, x, y)
+    # Dispatch to generic method in `divergences_and_distances.jl` with 2D `Probabilities`
+    probs = probabilities(est.discretization, x, y)
+    return association(est.definition, probs)
+end
+
+function association(measure::VariationDistance, px::Probabilities, py::Probabilities)
+    return 1/2 * sum(abs(pxᵢ - pyᵢ) for (pxᵢ, pyᵢ) in zip(px, py))
+end
diff --git a/src/methods/information/definitions/divergences_and_distances/divergences_and_distances.jl b/src/methods/information/definitions/divergences_and_distances/divergences_and_distances.jl
new file mode 100644
index 000000000..072691154
--- /dev/null
+++ b/src/methods/information/definitions/divergences_and_distances/divergences_and_distances.jl
@@ -0,0 +1,31 @@
+"""
+    DivergenceOrDistance <: BivariateInformationMeasure
+
+The supertype for bivariate information measures aiming to quantify some sort of
+divergence, distance or closeness between two probability distributions.
+
+Some of these measures are proper metrics, while others are not, but they have in
+common that they aim to quantify how "far from each other" two probabilities distributions
+are.
+
+## Concrete implementations
+
+- [`HellingerDistance`](@ref)
+- [`KLDivergence`](@ref)
+- [`RenyiDivergence`](@ref)
+- [`VariationDistance`](@ref)
+"""
+abstract type DivergenceOrDistance <: BivariateInformationMeasure end
+
+# If a joint probability is given, get the marginals
+function association(measure::DivergenceOrDistance, p::Probabilities{T, 2}) where T
+    px = marginal(p, dims = 1)
+    py = marginal(p, dims = 2)
+    return association(measure, px, py)
+end
+
+include("KLDivergence.jl")
+include("RenyiDivergence.jl")
+include("HellingerDistance.jl")
+include("VariationDistance.jl")
+
diff --git a/src/methods/information/definitions/information_definitions.jl b/src/methods/information/definitions/information_definitions.jl
new file mode 100644
index 000000000..bfc84f867
--- /dev/null
+++ b/src/methods/information/definitions/information_definitions.jl
@@ -0,0 +1,10 @@
+
+include("divergences_and_distances/divergences_and_distances.jl")
+include("joint_entropies/joint_entropies.jl")
+include("conditional_entropies/conditional_entropies.jl")
+include("mutual_informations/mutual_informations.jl")
+include("conditional_mutual_informations/conditional_mutual_informations.jl")
+include("partial_mutual_information/partial_mutual_information.jl")
+include("transferentropy/transfer_entropies.jl")
+
+include("override_parameters.jl")
\ No newline at end of file
diff --git a/src/methods/information/definitions/joint_entropies/JointEntropyRenyi.jl b/src/methods/information/definitions/joint_entropies/JointEntropyRenyi.jl
new file mode 100644
index 000000000..1a6694e4c
--- /dev/null
+++ b/src/methods/information/definitions/joint_entropies/JointEntropyRenyi.jl
@@ -0,0 +1,61 @@
+using ComplexityMeasures: Renyi
+
+export JointEntropyRenyi
+
+"""
+    JointEntropyRenyi <: JointEntropy
+    JointEntropyRenyi(; base = 2, q = 1.5)
+
+The Rényi joint entropy measure [Golshani2009](@cite).
+
+## Usage 
+
+- Use with [`association`](@ref) to compute the Golshani-Rényi joint entropy between 
+    two variables.
+
+## Compatible estimators
+
+- [`JointProbabilities`](@ref)
+
+## Definition
+
+Given two two discrete random variables ``X`` and ``Y`` with ranges ``\\mathcal{X}`` and
+``\\mathcal{X}``, [Golshani2009](@citet) defines the Rényi joint entropy as
+
+```math
+H_q^R(X, Y) = \\dfrac{1}{1-\\alpha} \\log \\sum_{i = 1}^N p_i^q,
+```
+
+where ``q > 0`` and ``q != 1``.
+
+## Estimation
+
+- [Example 1](@ref example_JointEntropyRenyi_ValueBinning): 
+    [`JointProbabilities`](@ref) with [`ValueBinning`](@ref) outcome space
+"""
+Base.@kwdef struct JointEntropyRenyi{B, Q} <: JointEntropy
+    base::B = 2
+    q::Q = 1.5
+end
+
+
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(est::JointProbabilities{<:JointEntropyRenyi}, x, y)
+    probs = probabilities(est.discretization, x, y)
+    return association(est.definition, probs)
+end
+
+function association(definition::JointEntropyRenyi, pxy::Probabilities{T, 2}) where T
+    (; base, q) = definition
+    
+    h = 0.0
+    for p in pxy
+        if p != 0
+            h += p^q
+        end
+    end
+    h = 1 / (1 - q) * log(h) 
+    return _convert_logunit(h, ℯ, base)
+end
\ No newline at end of file
diff --git a/src/methods/information/definitions/joint_entropies/JointEntropyShannon.jl b/src/methods/information/definitions/joint_entropies/JointEntropyShannon.jl
new file mode 100644
index 000000000..2ec11be08
--- /dev/null
+++ b/src/methods/information/definitions/joint_entropies/JointEntropyShannon.jl
@@ -0,0 +1,59 @@
+using ComplexityMeasures: Shannon
+
+export JointEntropyShannon
+
+"""
+    JointEntropyShannon <: JointEntropy
+    JointEntropyShannon(; base = 2)
+
+The Shannon joint entropy measure [CoverThomas1999](@cite).
+
+## Usage 
+
+- Use with [`association`](@ref) to compute the Shannon joint entropy between 
+    two variables.
+
+## Compatible estimators
+
+- [`JointProbabilities`](@ref)
+
+## Definition
+
+Given two two discrete random variables ``X`` and ``Y`` with ranges ``\\mathcal{X}`` and
+``\\mathcal{X}``, [CoverThomas1999](@citet) defines the Shannon joint entropy as
+
+```math
+H^S(X, Y) = -\\sum_{x\\in \\mathcal{X}, y \\in \\mathcal{Y}} p(x, y) \\log p(x, y),
+```
+
+where we define ``log(p(x, y)) := 0`` if ``p(x, y) = 0``.
+
+## Estimation
+
+- [Example 1](@ref example_JointEntropyShannon_Dispersion): 
+    [`JointProbabilities`](@ref) with [`Dispersion`](@ref) outcome space
+"""
+Base.@kwdef struct JointEntropyShannon{B} <: JointEntropy
+    base::B = 2
+end
+
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(est::JointProbabilities{<:JointEntropyShannon}, x, y)
+    probs = probabilities(est.discretization, x, y)
+    return association(est.definition, probs)
+end
+
+function association(definition::JointEntropyShannon, pxy::Probabilities{T, 2}) where T
+    (; base) = definition
+    
+    h = 0.0
+    for p in pxy
+        if p != 0 # Define log(0) = 0
+            h += p * log(p)
+        end
+    end
+    h = -h
+    return _convert_logunit(h, ℯ, base)
+end
\ No newline at end of file
diff --git a/src/methods/information/definitions/joint_entropies/JointEntropyTsallis.jl b/src/methods/information/definitions/joint_entropies/JointEntropyTsallis.jl
new file mode 100644
index 000000000..f2324c310
--- /dev/null
+++ b/src/methods/information/definitions/joint_entropies/JointEntropyTsallis.jl
@@ -0,0 +1,61 @@
+using ComplexityMeasures: Tsallis
+
+export JointEntropyTsallis
+
+"""
+    JointEntropyTsallis <: JointEntropy
+    JointEntropyTsallis(; base = 2, q = 1.5)
+
+The Tsallis joint entropy definition from [Furuichi2006](@citet). 
+
+## Usage 
+
+- Use with [`association`](@ref) to compute the Furuichi-Tsallis joint entropy between 
+    two variables.
+
+## Compatible estimators
+
+- [`JointProbabilities`](@ref)
+
+## Definition
+
+Given two two discrete random variables ``X`` and ``Y`` with ranges ``\\mathcal{X}`` and
+``\\mathcal{X}``, [Furuichi2006](@citet) defines the Tsallis joint entropy as
+
+```math
+H_q^T(X, Y) = -\\sum_{x\\in \\mathcal{X}, y \\in \\mathcal{Y}} p(x, y)^q \\log_q p(x, y),
+```
+
+where ``log_q(x, q) = \\dfrac{x^{1-q} - 1}{1-q}`` is the q-logarithm, and 
+we define ``log_q(x, q) := 0`` if ``q = 0``.
+
+## Estimation
+
+- [Example 1](@ref example_JointEntropyTsallis_OrdinalPatterns): 
+    [`JointProbabilities`](@ref) with [`OrdinalPatterns`](@ref) outcome space
+"""
+Base.@kwdef struct JointEntropyTsallis{B, Q} <: JointEntropy
+    base::B = 2
+    q::Q = 1.5
+end
+
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(est::JointProbabilities{<:JointEntropyTsallis}, x, y)
+    probs = probabilities(est.discretization, x, y)
+    return association(est.definition, probs)
+end
+
+function association(definition::JointEntropyTsallis, pxy::Probabilities{T, 2}) where T
+    (; base, q) = definition
+    
+    h = 0.0
+    for p in pxy
+        if p != 0.0 # Define logq(0) = 0
+            h += p^q * logq(p, q)
+        end
+    end
+    h = -h
+    return _convert_logunit(h, ℯ, base)
+end
\ No newline at end of file
diff --git a/src/methods/information/definitions/joint_entropies/joint_entropies.jl b/src/methods/information/definitions/joint_entropies/joint_entropies.jl
new file mode 100644
index 000000000..8ee6b07d5
--- /dev/null
+++ b/src/methods/information/definitions/joint_entropies/joint_entropies.jl
@@ -0,0 +1,21 @@
+export JointEntropy
+
+"""
+    JointEntropy <: BivariateInformationMeasure
+
+The supertype for all joint entropy measures.
+
+## Concrete implementations
+
+- [`JointEntropyShannon`](@ref)
+- [`JointEntropyRenyi`](@ref)
+- [`JointEntropyTsallis`](@ref)
+"""
+abstract type JointEntropy <: BivariateInformationMeasure end
+
+# q-logarithm for Tsallis and Renyi joint entropies
+logq(x, q) = (x^(1-q) - 1) / (1 - q)
+
+include("JointEntropyShannon.jl")
+include("JointEntropyRenyi.jl")
+include("JointEntropyTsallis.jl")
\ No newline at end of file
diff --git a/src/methods/information/definitions/mutual_informations/MIRenyiJizba.jl b/src/methods/information/definitions/mutual_informations/MIRenyiJizba.jl
new file mode 100644
index 000000000..720c404e0
--- /dev/null
+++ b/src/methods/information/definitions/mutual_informations/MIRenyiJizba.jl
@@ -0,0 +1,108 @@
+using ComplexityMeasures: Renyi
+
+export MIRenyiJizba
+
+"""
+    MIRenyiJizba <: <: BivariateInformationMeasure
+    MIRenyiJizba(; q = 1.5, base = 2)
+
+The Rényi mutual information ``I_q^{R_{J}}(X; Y)`` defined in [Jizba2012](@cite).
+
+## Usage
+
+- Use with [`association`](@ref) to compute the raw Rényi-Jizba mutual information from input data
+    using of of the estimators listed below.
+- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise dependence using
+    the Rényi-Jizba mutual information.
+
+## Compatible estimators
+
+- [`JointProbabilities`](@ref).
+- [`EntropyDecomposition`](@ref).
+
+## Definition
+
+```math
+I_q^{R_{J}}(X; Y) = H_q^{R}(X) + H_q^{R}(Y) - H_q^{R}(X, Y),
+```
+
+where ``H_q^{R}(\\cdot)`` is the [`Rényi`](@ref) entropy.
+
+
+## Estimation
+
+- [Example 1](@ref example_MIRenyiJizba_JointProbabilities_UniqueElements): [`JointProbabilities`](@ref) with [`UniqueElements`](@ref) outcome space.
+- [Example 2](@ref example_MIRenyiJizba_JointProbabilities_LeonenkoProzantoSavani): [`EntropyDecomposition`](@ref) with [`LeonenkoProzantoSavani`](@ref).
+- [Example 3](@ref example_MIRenyiJizba_EntropyDecomposition_ValueBinning): [`EntropyDecomposition`](@ref) with [`ValueBinning`](@ref).
+"""
+Base.@kwdef struct MIRenyiJizba{B, Q} <: MutualInformation
+    base::B = 2
+    q::Q = 1.5
+end
+
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(est::JointProbabilities{<:MIRenyiJizba}, x, y)
+    probs = probabilities(est.discretization, x, y)
+    return association(est.definition, probs)
+end
+
+function association(definition::MIRenyiJizba, pxy::Probabilities{T, 2}) where T
+    (; base, q) = definition
+
+    px = marginal(pxy, dims = 1)
+    py = marginal(pxy, dims = 2)
+    
+    logb = log_with_base(base)
+    num = 0.0
+    den = 0.0
+    for i in eachindex(px.p)
+        for j in eachindex(py.p)
+            num += px[i]^q * py[j]^q
+            den += pxy[i, j]^q
+        end
+    end
+    if den != 0
+        mi = logb(num / den)
+    else
+        mi = 0.0
+    end
+
+    return (1 / (1 / q)) * mi
+end
+
+# --------------------------------------------------------------
+# `MIRenyiJizba` through entropy decomposition.
+# Eq. 24 in
+# Jizba, P., Lavička, H., & Tabachová, Z. (2021). Rényi Transfer Entropy Estimators for
+# Financial Time Series. Engineering Proceedings, 5(1), 33.
+# --------------------------------------------------------------
+function association(est::EntropyDecomposition{<:MIRenyiJizba, <:DifferentialInfoEstimator{<:Renyi}}, x, y)
+    HX, HY, HXY = marginal_entropies_mi3h_differential(est, x, y)
+    mi =  HX + HY - HXY
+    return mi
+end
+
+function association(est::EntropyDecomposition{<:MIRenyiJizba, <:DiscreteInfoEstimator{<:Renyi}}, x, y)
+    HX, HY, HXY = marginal_entropies_mi3h_discrete(est, x, y)
+    mi =  HX + HY - HXY
+    return mi
+end
+
+# ------------------------------------------------
+# Pretty printing for decomposition estimators.
+# ------------------------------------------------
+function decomposition_string(
+        definition::MIRenyiJizba, 
+        est::EntropyDecomposition{<:MIRenyiJizba, <:DifferentialInfoEstimator{<:Renyi}}
+    )
+    return "Iᵣⱼ(X, Y) = hᵣ(X) + hᵣ(Y) - hᵣ(X, Y)"
+end
+
+function decomposition_string(
+        definition::MIRenyiJizba, 
+        est::EntropyDecomposition{<:MIRenyiJizba, <:DiscreteInfoEstimator{<:Renyi}}
+    )
+    return "Iᵣⱼ(X, Y) = Hᵣ(X) + Hᵣ(Y) - Hᵣ(X, Y)"
+end
diff --git a/src/methods/information/definitions/mutual_informations/MIRenyiSarbu.jl b/src/methods/information/definitions/mutual_informations/MIRenyiSarbu.jl
new file mode 100644
index 000000000..6aa2d3217
--- /dev/null
+++ b/src/methods/information/definitions/mutual_informations/MIRenyiSarbu.jl
@@ -0,0 +1,77 @@
+using ComplexityMeasures: Renyi
+
+export MIRenyiSarbu 
+
+"""
+    MIRenyiSarbu <: BivariateInformationMeasure
+    MIRenyiSarbu(; base = 2, q = 1.5)
+
+The discrete Rényi mutual information from [Sarbu2014](@citet).
+
+## Usage
+
+- Use with [`association`](@ref) to compute the raw Rényi-Sarbu mutual information from input data
+    using of of the estimators listed below.
+- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise dependence using
+    the Rényi-Sarbu mutual information.
+
+## Compatible estimators
+
+- [`JointProbabilities`](@ref).
+
+## Description
+
+Sarbu (2014) defines discrete Rényi mutual information as the
+Rényi ``\\alpha``-divergence between the conditional joint probability mass function
+``p(x, y)`` and the product of the conditional marginals, ``p(x) \\cdot p(y)``:
+
+```math
+I(X, Y)^R_q =
+\\dfrac{1}{q-1}
+\\log \\left(
+    \\sum_{x \\in X, y \\in Y}
+    \\dfrac{p(x, y)^q}{\\left( p(x)\\cdot p(y) \\right)^{q-1}}
+\\right)
+```
+
+## Estimation
+
+- [Example 1](@ref example_MIRenyiSarbu_JointProbabilities_UniqueElements): [`JointProbabilities`](@ref) with [`UniqueElements`](@ref) for categorical data.
+- [Example 2](@ref example_MIRenyiSarbu_JointProbabilities_CosineSimilarityBinning): [`JointProbabilities`](@ref) with [`CosineSimilarityBinning`](@ref) for numerical data.
+"""
+Base.@kwdef struct MIRenyiSarbu{B, Q} <: MutualInformation
+    base::B = 2
+    q::Q = 1.5
+end
+
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(est::JointProbabilities{<:MIRenyiSarbu}, x, y)
+    probs = probabilities(est.discretization, x, y)
+    return association(est.definition, probs)
+end
+
+function association(definition::MIRenyiSarbu, pxy::Probabilities{T, 2}) where T
+    (; base, q) = definition
+
+    px = marginal(pxy, dims = 1)
+    py = marginal(pxy, dims = 2)
+
+    mi = 0.0
+    for i in eachindex(px.p)
+        for j in eachindex(py.p)
+            pxyᵢⱼ = pxy[i, j]
+            mi += pxyᵢⱼ^q / ((px[i] * py[j])^(q - 1))
+        end
+    end
+    if mi == 0
+        return 0.0
+    else
+        return _convert_logunit(1 / (q - 1) * log(mi), ℯ, base)
+    end
+end
+
+function association(est::EntropyDecomposition{<:MIRenyiSarbu}, x, y)
+    throw(ArgumentError("MIRenyiSarbu not implemented for $(typeof(est).name.name)"))
+end
diff --git a/src/methods/information/definitions/mutual_informations/MIShannon.jl b/src/methods/information/definitions/mutual_informations/MIShannon.jl
new file mode 100644
index 000000000..a29c6ae9d
--- /dev/null
+++ b/src/methods/information/definitions/mutual_informations/MIShannon.jl
@@ -0,0 +1,155 @@
+using ComplexityMeasures: log_with_base
+
+export MIShannon
+
+"""
+    MIShannon <: BivariateInformationMeasure
+    MIShannon(; base = 2)
+
+The Shannon mutual information ``I_S(X; Y)``.
+
+## Usage
+
+- Use with [`association`](@ref) to compute the raw Shannon mutual information from input data
+    using of of the estimators listed below.
+- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise dependence using
+    the Shannon mutual information.
+
+## Compatible estimators
+
+- [`JointProbabilities`](@ref) (generic)
+- [`EntropyDecomposition`](@ref) (generic)
+- [`KSG1`](@ref)
+- [`KSG2`](@ref)
+- [`GaoOhViswanath`](@ref)
+- [`GaoKannanOhViswanath`](@ref)
+- [`GaussianMI`](@ref)
+
+## Discrete definition
+
+There are many equivalent formulations of discrete Shannon mutual information, meaning that 
+it can be estimated in several ways, either using [`JointProbabilities`](@ref)  (double-sum formulation),
+[`EntropyDecomposition`](@ref) (three-entropies decomposition), or some dedicated estimator.
+
+### Double sum formulation
+
+Assume we observe samples
+``\\bar{\\bf{X}}_{1:N_y} = \\{\\bar{\\bf{X}}_1, \\ldots, \\bar{\\bf{X}}_n \\}`` and
+``\\bar{\\bf{Y}}_{1:N_x} = \\{\\bar{\\bf{Y}}_1, \\ldots, \\bar{\\bf{Y}}_n \\}`` from
+two discrete random variables ``X`` and ``Y`` with finite supports
+``\\mathcal{X} = \\{ x_1, x_2, \\ldots, x_{M_x} \\}`` and
+``\\mathcal{Y} = y_1, y_2, \\ldots, x_{M_y}``.
+The double-sum estimate is obtained by replacing the double sum
+
+```math
+\\hat{I}_{DS}(X; Y) =
+ \\sum_{x_i \\in \\mathcal{X}, y_i \\in \\mathcal{Y}} p(x_i, y_j) \\log \\left( \\dfrac{p(x_i, y_i)}{p(x_i)p(y_j)} \\right)
+```
+
+where  ``\\hat{p}(x_i) = \\frac{n(x_i)}{N_x}``, ``\\hat{p}(y_i) = \\frac{n(y_j)}{N_y}``,
+and ``\\hat{p}(x_i, x_j) = \\frac{n(x_i)}{N}``, and ``N = N_x N_y``.
+This definition is used by [`mutualinfo`](@ref) when called with a
+[`ContingencyMatrix`](@ref).
+
+### Three-entropies formulation
+
+An equivalent formulation of discrete Shannon mutual information is
+
+```math
+I^S(X; Y) = H^S(X) + H_q^S(Y) - H^S(X, Y),
+```
+
+where ``H^S(\\cdot)`` and ``H^S(\\cdot, \\cdot)`` are the marginal and joint discrete
+Shannon entropies. This definition is used by [`mutualinfo`](@ref) when called with a
+[`ProbabilitiesEstimator`](@ref).
+
+## Differential mutual information
+
+One possible formulation of differential Shannon mutual information is
+
+```math
+I^S(X; Y) = h^S(X) + h_q^S(Y) - h^S(X, Y),
+```
+
+where ``h^S(\\cdot)`` and ``h^S(\\cdot, \\cdot)`` are the marginal and joint
+differential Shannon entropies. This definition is used by [`mutualinfo`](@ref) when
+called with a [`DifferentialEntropyEstimator`](@ref).
+
+## Estimation
+
+- [Example 1](@ref example_MIShannon_JointProbabilities_ValueBinning): [`JointProbabilities`](@ref) with [`ValueBinning`](@ref) outcome space.
+- [Example 2](@ref example_MIShannon_JointProbabilities_UniqueElements): [`JointProbabilities`](@ref) with [`UniqueElements`](@ref) outcome space on string data.
+- [Example 3](@ref example_MIShannon_GaussianMI): Dedicated [`GaussianMI`](@ref) estimator.
+- [Example 4](@ref example_MIShannon_KSG1): Dedicated [`KSG1`](@ref) estimator.
+- [Example 5](@ref example_MIShannon_KSG2): Dedicated [`KSG2`](@ref) estimator.
+- [Example 6](@ref example_MIShannon_GaoKannanOhViswanath): Dedicated [`GaoKannanOhViswanath`](@ref) estimator.
+- [Example 7](@ref example_MIShannon_EntropyDecomposition_Kraskov): [`EntropyDecomposition`](@ref) with [`Kraskov`](@ref) estimator.
+- [Example 8](@ref example_MIShannon_EntropyDecomposition_BubbleSortSwaps): [`EntropyDecomposition`](@ref) with [`BubbleSortSwaps`](@ref).
+- [Example 9](@ref example_MIShannon_EntropyDecomposition_Jackknife_ValueBinning): [`EntropyDecomposition`](@ref) with [`Jackknife`](@ref) estimator and [`ValueBinning`](@ref) outcome space.
+"""
+Base.@kwdef struct MIShannon{B} <: MutualInformation
+    base::B = 2
+end
+
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(est::JointProbabilities{<:MIShannon}, x, y)
+    cts = counts(est.discretization, x, y)
+    probs = probabilities(est.discretization, x, y)
+
+    return association(est.definition, probs)
+end
+
+function association(definition::MIShannon, pxy::Probabilities{T, 2}) where T
+    (; base) = definition
+    
+    px = marginal(pxy, dims = 1)
+    py = marginal(pxy, dims = 2)
+    mi = 0.0
+    logb = log_with_base(base)
+    for i in eachindex(px.p)
+        pxᵢ = px[i]
+        for j in eachindex(py.p)
+            pyⱼ = py[j]
+            pxyᵢⱼ = pxy[i, j]
+            inner = pxyᵢⱼ / (pxᵢ * pyⱼ)
+            if inner != 0.0
+                mi += pxyᵢⱼ * logb(inner)
+            end
+        end
+    end
+    return mi
+end
+
+# ------------------------------------------------
+# Mutual information through entropy decomposition
+# ------------------------------------------------
+function association(est::EntropyDecomposition{<:MIShannon, <:DifferentialInfoEstimator{<:Shannon}}, x, y)
+    HX, HY, HXY = marginal_entropies_mi3h_differential(est, x, y)
+    mi =  HX + HY - HXY
+    return mi
+end
+
+function association(est::EntropyDecomposition{<:MIShannon, <:DiscreteInfoEstimator{<:Shannon}, D, P}, x, y) where {D, P}
+    HX, HY, HXY = marginal_entropies_mi3h_discrete(est, x, y)
+    mi =  HX + HY - HXY
+    return mi
+end
+
+# ------------------------------------------------
+# Pretty printing for decomposition estimators.
+# ------------------------------------------------
+function decomposition_string(
+        definition::MIShannon, 
+        est::EntropyDecomposition{<:MIShannon, <:DifferentialInfoEstimator{<:Shannon}}
+    )
+    return "Iₛ(X, Y) = hₛ(X) + hₛ(Y) - hₛ(X, Y)";
+end
+
+function decomposition_string(
+        definition::MIShannon, 
+        est::EntropyDecomposition{<:MIShannon, <:DiscreteInfoEstimator{<:Shannon}}
+    )
+    return "Iₛ(X, Y) = Hₛ(X) + Hₛ(Y) - Hₛ(X, Y)";
+end
diff --git a/src/methods/information/definitions/mutual_informations/MITsallisFuruichi.jl b/src/methods/information/definitions/mutual_informations/MITsallisFuruichi.jl
new file mode 100644
index 000000000..a2e643b36
--- /dev/null
+++ b/src/methods/information/definitions/mutual_informations/MITsallisFuruichi.jl
@@ -0,0 +1,95 @@
+using ComplexityMeasures: Tsallis
+
+export MITsallisFuruichi
+"""
+    MITsallisFuruichi <: BivariateInformationMeasure
+    MITsallisFuruichi(; base = 2, q = 1.5)
+
+The discrete Tsallis mutual information from Furuichi (2006)[Furuichi2006](@cite), which
+in that paper is called the *mutual entropy*.
+
+## Usage
+
+- Use with [`association`](@ref) to compute the raw Tsallis-Furuichi mutual information from input data
+    using of of the estimators listed below.
+- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise dependence using
+    the Tsallis-Furuichi mutual information.
+
+## Compatible estimators
+
+- [`JointProbabilities`](@ref)
+- [`EntropyDecomposition`](@ref)
+
+## Description
+
+Furuichi's Tsallis mutual entropy between variables ``X \\in \\mathbb{R}^{d_X}`` and
+``Y \\in \\mathbb{R}^{d_Y}`` is defined as
+
+```math
+I_q^T(X; Y) = H_q^T(X) - H_q^T(X | Y) = H_q^T(X) + H_q^T(Y) - H_q^T(X, Y),
+```
+
+where ``H^T(\\cdot)`` and ``H^T(\\cdot, \\cdot)`` are the marginal and joint Tsallis
+entropies, and `q` is the [`Tsallis`](@ref)-parameter.
+
+## Estimation
+
+- [Example 1](@ref example_MITsallisFuruichi_JointProbabilities_UniqueElements): [`JointProbabilities`](@ref) with [`UniqueElements`](@ref) outcome space.
+- [Example 2](@ref example_MITsallisFuruichi_EntropyDecomposition_LeonenkoProsantoSavani): [`EntropyDecomposition`](@ref) with [`LeonenkoProsantoSavani`](@ref) estimator.
+- [Example 3](@ref example_MITsallisFuruichi_EntropyDecomposition_Dispersion): [`EntropyDecomposition`](@ref) with [`Dispersion`](@ref)
+"""
+Base.@kwdef struct MITsallisFuruichi{B, Q} <: MutualInformation
+    base::B = 2
+    q::Q = 1.5
+end
+
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(est::JointProbabilities{<:MITsallisFuruichi}, x, y)
+    probs = probabilities(est.discretization, x, y)
+    return association(est.definition, probs)
+end
+
+function association(definition::MITsallisFuruichi, pxy::Probabilities{T, 2}) where T
+    (; base, q) = definition
+
+    px = marginal(pxy, dims = 1)
+    py = marginal(pxy, dims = 2)
+
+    mi = 0.0
+    for i in eachindex(px.p)
+        for j in eachindex(py.p)
+            pxyᵢⱼ = pxy[i, j]
+            mi += pxyᵢⱼ^q / (px[i]^(q - 1) * py[j]^(q - 1))
+        end
+    end
+    mi = (1 / (q - 1) * (1 - mi) / (1-q))
+    return _convert_logunit(mi, ℯ, base)
+end
+
+
+
+function association(est::EntropyDecomposition{<:MITsallisFuruichi, <:DifferentialInfoEstimator{<:Tsallis}}, x, y)
+    HX, HY, HXY = marginal_entropies_mi3h_differential(est, x, y)
+    mi = HX + HY - HXY
+    return mi
+end
+
+function association(est::EntropyDecomposition{<:MITsallisFuruichi, <:DiscreteInfoEstimator{<:Tsallis}}, x, y)
+    HX, HY, HXY = marginal_entropies_mi3h_discrete(est, x, y)
+    mi = HX + HY - HXY
+    return mi
+end
+
+
+# ------------------------------------------------
+# Pretty printing for decomposition estimators.
+# ------------------------------------------------
+function decomposition_string(definition::MITsallisFuruichi, est::DiscreteInfoEstimator{<:Tsallis})
+    return "MI_TF(X, Y) = H_T(X) + H_T(Y) - H_T(X, Y)";
+end
+
+function decomposition_string(definition::MITsallisFuruichi, est::DifferentialInfoEstimator{<:Tsallis})
+    return "MI_TF(X, Y) = h_T(X) + h_T(Y) - h_T(X, Y)";
+end
\ No newline at end of file
diff --git a/src/methods/information/definitions/mutual_informations/MITsallisMartin.jl b/src/methods/information/definitions/mutual_informations/MITsallisMartin.jl
new file mode 100644
index 000000000..4a5c95a59
--- /dev/null
+++ b/src/methods/information/definitions/mutual_informations/MITsallisMartin.jl
@@ -0,0 +1,98 @@
+using ComplexityMeasures: Tsallis
+
+export MITsallisMartin
+
+"""
+    MITsallisMartin <: BivariateInformationMeasure
+    MITsallisMartin(; base = 2, q = 1.5)
+
+The discrete Tsallis mutual information from [Martin2004](@citet).
+
+## Usage
+
+- Use with [`association`](@ref) to compute the raw Tsallis-Martin mutual information from input data
+    using of of the estimators listed below.
+- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise dependence using
+    the Tsallis-Martin mutual information.
+
+## Compatible estimators
+
+- [`JointProbabilities`](@ref)
+- [`EntropyDecomposition`](@ref)
+
+## Description
+
+Martin et al.'s Tsallis mutual information between variables ``X \\in \\mathbb{R}^{d_X}``
+and ``Y \\in \\mathbb{R}^{d_Y}`` is defined as
+
+```math
+I_{\\text{Martin}}^T(X, Y, q) := H_q^T(X) + H_q^T(Y) - (1 - q) H_q^T(X) H_q^T(Y) - H_q(X, Y),
+```
+
+where ``H^S(\\cdot)`` and ``H^S(\\cdot, \\cdot)`` are the marginal and joint Shannon
+entropies, and `q` is the [`Tsallis`](@ref)-parameter.
+
+## Estimation
+
+- [Example 1](@ref example_MITsallisMartin_JointProbabilities_UniqueElements): [`JointProbabilities`](@ref) with [`UniqueElements`](@ref) outcome space.
+- [Example 2](@ref example_MITsallisMartin_EntropyDecomposition_LeonenkoProsantoSavani): [`EntropyDecomposition`](@ref) with [`LeonenkoProsantoSavani`](@ref) estimator.
+- [Example 3](@ref example_MITsallisMartin_EntropyDecomposition_OrdinalPatterns): [`EntropyDecomposition`](@ref) with [`OrdinalPatterns`](@ref) outcome space.
+"""
+Base.@kwdef struct MITsallisMartin{B, Q} <: MutualInformation
+    base::B = 2
+    q::Q = 1.5
+end
+
+
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(est::JointProbabilities{<:MITsallisMartin}, x, y)
+    probs = probabilities(est.discretization, x, y)
+    return association(est.definition, probs)
+end
+
+# This is definition 3 in Martin et al. (2004), but with pᵢ replaced by the joint
+# distribution and qᵢ replaced by the product of the marginal distributions.
+function association(definition::MITsallisMartin, pxy::Probabilities{T, 2}) where T
+    (; base, q) = definition
+    # TODO: return MIShannon if q = 1? otherwise, we don't need `base`.
+    q != 1 || throw(ArgumentError("`MITsallisMartin` for q=$(q) not defined."))
+    px = marginal(pxy, dims = 1)
+    py = marginal(pxy, dims = 2)
+
+    mi = 0.0
+    for (i, pxᵢ) in enumerate(px.p)
+        for (j, pyⱼ) in enumerate(py.p)
+            pxyᵢⱼ = pxy[i, j]
+            mi += pxyᵢⱼ^q / (pxᵢ^(q - 1) * pyⱼ^(q - 1))
+        end
+    end
+    f = 1 / (q - 1)
+    return f * (1 - mi)
+end
+
+function association(est::EntropyDecomposition{<:MITsallisMartin, <:DifferentialInfoEstimator{<:Tsallis}}, x, y)
+    HX, HY, HXY = marginal_entropies_mi3h_differential(est, x, y)
+    q = est.definition.q
+    mi = HX + HY - (1 - q) * HX * HY - HXY
+    return mi
+end
+
+function association(est::EntropyDecomposition{<:MITsallisMartin, <:DiscreteInfoEstimator{<:Tsallis}}, x, y)
+    HX, HY, HXY = marginal_entropies_mi3h_discrete(est, x, y)
+    q = est.definition.q
+    mi = HX + HY - (1 - q) * HX * HY - HXY
+    return mi
+end
+
+# ------------------------------------------------
+# Pretty printing for decomposition estimators.
+# ------------------------------------------------
+function decomposition_string(definition::MITsallisMartin, est::DiscreteInfoEstimator{<:Tsallis})
+    return "MI_S(X, Y) = H_T(X) + H_T(Y) - (1 - q)*H_T(X)*H_T(Y) - H_T(X, Y)";
+end
+
+function decomposition_string(definition::MITsallisMartin, est::DifferentialInfoEstimator{<:Tsallis})
+    return "MI_S(X, Y) = h_T(X) + h_T(Y) - (1 - q)*h_T(X)*H_T(Y) - h_T(X, Y)";
+end
\ No newline at end of file
diff --git a/src/methods/information/definitions/mutual_informations/mutual_informations.jl b/src/methods/information/definitions/mutual_informations/mutual_informations.jl
new file mode 100644
index 000000000..9b7344c7f
--- /dev/null
+++ b/src/methods/information/definitions/mutual_informations/mutual_informations.jl
@@ -0,0 +1,55 @@
+export MutualInformation
+
+"""
+    MutualInformation
+
+Abstract type for all mutual information measures.
+
+## Concrete implementations
+
+- [`MIShannon`](@ref)
+- [`MITsallisMartin`](@ref)
+- [`MITsallisFuruichi`](@ref)
+- [`MIRenyiJizba`](@ref)
+- [`MIRenyiSarbu`](@ref)
+
+See also: [`MutualInformationEstimator`](@ref)
+"""
+abstract type MutualInformation <: BivariateInformationMeasure end
+
+# Generic 3H-formulation of Shannon mutual information (a scaled sum of these entropies are also used by 
+# some of the other mutual information measures, so we define it generically here).
+function marginal_entropies_mi3h_differential(est::EntropyDecomposition{<:MutualInformation, <:DifferentialInfoEstimator}, x, y)
+    X = StateSpaceSet(x)
+    Y = StateSpaceSet(y)
+    XY = StateSpaceSet(X, Y)
+    modified_est = estimator_with_overridden_parameters(est.definition, est.est)
+    HX = information(modified_est, X) # estimates entropy in the X marginal
+    HY = information(modified_est, Y) # estimates entropy in the Y marginal
+    HXY = information(modified_est, XY) # estimates entropy in the joint XY space
+    return HX, HY, HXY
+end
+
+function marginal_entropies_mi3h_discrete(est::EntropyDecomposition{<:MutualInformation, <:DiscreteInfoEstimator}, x, y)
+    # Encode marginals to integers based on the outcome space.
+    eX, eY = codified_marginals(est.discretization, x, y)
+    eXY = StateSpaceSet(eX, eY)
+
+    # The outcome space is no longer relevant from this point on. We're done discretizing, 
+    # so now we can just count (i.e. use `UniqueElements` as the outcome space).
+    o = UniqueElements()
+
+    modified_est = estimator_with_overridden_parameters(est.definition, est.est)
+    HX = information(modified_est, est.pest, o, eX) # estimates entropy in the X marginal
+    HY = information(modified_est, est.pest, o, eY) # estimates entropy in the Y marginal
+    HXY = information(modified_est, est.pest, o, eXY) # estimates entropy in the joint XY space
+    
+    return HX, HY, HXY
+end
+
+
+include("MIShannon.jl")
+include("MITsallisMartin.jl")
+include("MITsallisFuruichi.jl")
+include("MIRenyiSarbu.jl")
+include("MIRenyiJizba.jl")
\ No newline at end of file
diff --git a/src/methods/information/definitions/override_parameters.jl b/src/methods/information/definitions/override_parameters.jl
new file mode 100644
index 000000000..82e8961d3
--- /dev/null
+++ b/src/methods/information/definitions/override_parameters.jl
@@ -0,0 +1,87 @@
+using Accessors: @set
+
+export estimator_with_overridden_parameters
+
+# For internal use only.
+"""
+    estimator_with_overridden_parameters(definition, lower_level_estimator) → e::typeof(lower_level_estimator)
+
+Given some higher-level `definition` of an information measure, which is to be 
+estimated using some `lower_level_estimator`, return a modified version of 
+the estimator in which its parameter have been overriden by any overlapping
+parameters from the `defintiion`.
+
+This method is explicitly extended for each possible decomposition.
+"""
+function estimator_with_overridden_parameters(definition, lower_level_estimator) end
+
+const TSALLIS_MULTIVARIATE_MEASURES = Union{
+    CMITsallisPapapetrou, 
+    MITsallisFuruichi, MITsallisMartin,
+    ConditionalEntropyTsallisAbe, ConditionalEntropyTsallisFuruichi,
+    JointEntropyTsallis,
+}
+
+const RENYI_MULTIVARIATE_MEASURES = Union{
+    TERenyiJizba,
+    CMIRenyiPoczos, CMIRenyiSarbu, CMIRenyiJizba,
+    MIRenyiJizba, MIRenyiSarbu,
+    JointEntropyRenyi,
+}
+
+const SHANNON_MULTIVARIATE_MEASURES = Union{
+    CMIShannon,
+    MIShannon,
+    ConditionalEntropyShannon,
+    JointEntropyShannon,
+    TEShannon,
+}
+
+
+function estimator_with_overridden_parameters(
+        definition::TSALLIS_MULTIVARIATE_MEASURES, 
+        est::InformationMeasureEstimator{<:Tsallis}
+    )
+    # The low-level definition
+    lowdef = est.definition
+   
+    # Update the low-level definition. Have to do this step-wise. Ugly, but works.
+    modified_lowdef = @set lowdef.base = definition.base # update `base` field
+    modified_lowdef = @set modified_lowdef.q = definition.q # update `q` field
+
+    # Set the definition for the low-level estimator to the updated definition.
+    modified_est = @set est.definition = modified_lowdef
+    
+    return modified_est
+end
+
+function estimator_with_overridden_parameters(
+        definition::RENYI_MULTIVARIATE_MEASURES, 
+        est::InformationMeasureEstimator{<:Renyi}
+    )
+    lowdef = est.definition
+    modified_lowdef = @set lowdef.base = definition.base # update `base` field
+    modified_lowdef = @set modified_lowdef.q = definition.q # update `q` field
+    modified_est = @set est.definition = modified_lowdef
+    return modified_est
+end
+
+function estimator_with_overridden_parameters(
+        definition::SHANNON_MULTIVARIATE_MEASURES, 
+        est::InformationMeasureEstimator{<:Shannon}
+    )
+    lowdef = est.definition
+    modified_lowdef = @set lowdef.base = definition.base # update `base` field
+    modified_est = @set est.definition = modified_lowdef
+    return modified_est
+end
+
+function estimator_with_overridden_parameters(
+        definition::CMIShannon, 
+        est::MutualInformationEstimator{<:MIShannon}
+    )
+    lowdef = est.definition
+    modified_lowdef = @set lowdef.base = definition.base # update `base` field
+    modified_est = @set est.definition = modified_lowdef
+    return modified_est
+end
\ No newline at end of file
diff --git a/src/methods/information/definitions/partial_mutual_information/partial_mutual_information.jl b/src/methods/information/definitions/partial_mutual_information/partial_mutual_information.jl
new file mode 100644
index 000000000..2d41cd0cc
--- /dev/null
+++ b/src/methods/information/definitions/partial_mutual_information/partial_mutual_information.jl
@@ -0,0 +1,118 @@
+export PartialMutualInformation
+
+"""
+    PartialMutualInformation <: MultivariateInformationMeasure
+    PartialMutualInformation(; base = 2)
+
+The partial mutual information (PMI) measure of conditional association [Zhao2016](@cite).
+
+## Definition
+
+PMI is defined as for variables ``X``, ``Y`` and ``Z`` as
+
+```math
+PMI(X; Y | Z) = D(p(x, y, z) || p^{*}(x|z) p^{*}(y|z) p(z)),
+```
+
+where ``p(x, y, z)`` is the joint distribution for ``X``, ``Y`` and ``Z``, and
+``D(\\cdot, \\cdot)`` is the extended Kullback-Leibler divergence from
+``p(x, y, z)`` to ``p^{*}(x|z) p^{*}(y|z) p(z)``. See [Zhao2016](@citet) for details.
+
+## Estimation
+
+The PMI is estimated by first estimating a 3D probability mass function using 
+[`probabilities`](@ref), then computing ``PMI(X; Y | Z)`` from those probaiblities.
+
+## Properties
+
+For the discrete case, the following identities hold in theory (when estimating PMI, they
+may not).
+
+- `PMI(X, Y, Z) >= CMI(X, Y, Z)` (where CMI is the Shannon CMI). Holds in theory, but
+    when estimating PartialMutualInformation, the identity may not hold.
+- `PMI(X, Y, Z) >= 0`. Holds both in theory and when estimating using discrete estimators.
+- `X ⫫ Y | Z => PMI(X, Y, Z) = CMI(X, Y, Z) = 0` (in theory, but not necessarily for
+    estimation).
+"""
+Base.@kwdef struct PartialMutualInformation <: MultivariateInformationMeasure
+    base::Real = 2
+end
+
+min_inputs_vars(::PartialMutualInformation) = 3
+max_inputs_vars(::PartialMutualInformation) = 3
+
+
+# ----------------------------------------------------------------
+# Estimation methods
+# ----------------------------------------------------------------
+function association(definition::PartialMutualInformation, pxyz::Probabilities{T, 3}) where T
+    dx, dy, dz = size(pxyz)
+    px = marginal(pxyz, dims = [1])
+    py = marginal(pxyz, dims = [2])
+    pz = marginal(pxyz, dims = [3])
+    pxz = marginal(pxyz, dims = [1, 3])
+    pyz = marginal(pxyz, dims = [2, 3])
+    
+    # Precompute p⭐x⎸z and p⭐y⎸z sums
+    p⭐x⎸z = zeros(dx, dz)
+    p⭐y⎸z = zeros(dy, dz)
+    
+    for i in 1:dx
+        for k in 1:dz
+            sum_j = 0.0
+            for j in 1:dy
+                pyⱼ = py[j]
+                pyⱼzₖ = pyz[j, k]
+                if pyⱼzₖ > 0
+                    sum_j += (pxyz[i, j, k] / pyⱼzₖ) * pyⱼ
+                end
+            end
+            p⭐x⎸z[i, k] = sum_j
+        end
+    end
+
+    for j in 1:dy
+        for k in 1:dz
+            sum_i = 0.0
+            for i in 1:dx
+                pxᵢ = px[i]
+                pxᵢzₖ = pxz[i, k]
+                if pxᵢzₖ > 0
+                    sum_i += (pxyz[i, j, k] / pxᵢzₖ) * pxᵢ
+                end
+            end
+            p⭐y⎸z[j, k] = sum_i
+        end
+    end
+
+    # Compute PMI
+    pmi = 0.0
+    logb = log_with_base(definition.base)
+    for k in 1:dz
+        pzₖ = pz[k]
+        for j in 1:dy
+            p⭐yⱼ⎸zₖ = p⭐y⎸z[j, k]
+            if p⭐yⱼ⎸zₖ > 0
+                for i in 1:dx
+                    p⭐xᵢ⎸zₖ = p⭐x⎸z[i, k]
+                    if (p⭐xᵢ⎸zₖ > 0)
+                        pxᵢyⱼzₖ = pxyz[i, j, k]
+                        if pxᵢyⱼzₖ > 0
+                            pxᵢyⱼ⎸zₖ = pxᵢyⱼzₖ / pzₖ
+                            logratio = logb(pxᵢyⱼ⎸zₖ  / (p⭐xᵢ⎸zₖ * p⭐yⱼ⎸zₖ))
+                            if logratio > 0
+                                pmi += pxᵢyⱼzₖ * logratio
+                            end
+                        end
+                    end
+                end
+            end
+        end
+    end
+    return pmi
+end
+
+function association(est::JointProbabilities{PartialMutualInformation}, x, y, z)
+    pxyz = probabilities(est.discretization, x, y, z)
+    return association(est.definition, pxyz)
+end
\ No newline at end of file
diff --git a/src/methods/information/definitions/transferentropy/TERenyiJizba.jl b/src/methods/information/definitions/transferentropy/TERenyiJizba.jl
new file mode 100644
index 000000000..48dd5d51b
--- /dev/null
+++ b/src/methods/information/definitions/transferentropy/TERenyiJizba.jl
@@ -0,0 +1,95 @@
+export TERenyiJizba
+
+"""
+    TERenyiJizba() <: TransferEntropy
+
+The Rényi transfer entropy from [Jizba2012](@citet).
+
+## Usage
+
+- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise
+    and conditional dependence.
+- Use with [`transferentropy`](@ref) to compute the raw transfer entropy.
+
+## Description
+
+The transfer entropy from source ``S`` to target ``T``, potentially
+conditioned on ``C`` is defined as
+
+```math
+\\begin{align*}
+TE(S \\to T) &:= I_q^{R_J}(T^+; S^- | T^-) \\\\
+TE(S \\to T | C) &:= I_q^{R_J}(T^+; S^- | T^-, C^-),
+\\end{align*},
+```
+where ``I_q^{R_J}(T^+; S^- | T^-)`` is Jizba et al. (2012)'s definition of
+conditional mutual information ([`CMIRenyiJizba`](@ref)).
+The variables ``T^+``, ``T^-``,
+``S^-`` and ``C^-`` are described in the docstring for [`transferentropy`](@ref).
+
+## Estimation
+
+Estimating Jizba's Rényi transfer entropy is a bit complicated, since it doesn't have 
+a dedicated estimator. Instead, we re-write the Rényi transfer entropy as a 
+Rényi conditional mutual information, and estimate it using an 
+[`EntropyDecomposition`](@ref) with a suitable discrete/differential Rényi entropy
+estimator from the list below as its input.
+
+| Estimator                      | Sub-estimator                    | Principle                    |
+| :----------------------------- | :------------------------------- | :--------------------------- |
+| [`EntropyDecomposition`](@ref) | [`LeonenkoProzantoSavani`](@ref) | Four-entropies decomposition |
+| [`EntropyDecomposition`](@ref) | [`ValueBinning`](@ref)           | Four-entropies decomposition |
+| [`EntropyDecomposition`](@ref) | [`Dispersion`](@ref)             | Four-entropies decomposition |
+| [`EntropyDecomposition`](@ref) | [`OrdinalPatterns`](@ref)        | Four-entropies decomposition |
+| [`EntropyDecomposition`](@ref) | [`UniqueElements`](@ref)         | Four-entropies decomposition |
+| [`EntropyDecomposition`](@ref) | [`TransferOperator`](@ref)       | Four-entropies decomposition |
+
+Any of these estimators must be given as input to a [`CMIDecomposition](@ref) estimator.
+
+## Estimation
+
+- [Example 1](@ref example_TERenyiJizba_EntropyDecomposition_TransferOperator): [`EntropyDecomposition`](@ref) with [`TransferOperator`](@ref) outcome space.
+
+"""
+struct TERenyiJizba{B, Q, EMB} <: TransferEntropy
+    base::B
+    q::Q
+    embedding::EMB
+    function TERenyiJizba(; base::B = 2, q::Q = 1.5, embedding::EMB = EmbeddingTE()) where {B, Q, EMB}
+        return new{B, Q, EMB}(base, q, embedding)
+    end
+end
+
+function convert_to_cmi_estimator(est::EntropyDecomposition{<:TERenyiJizba, <:DiscreteInfoEstimator{<:Renyi}})
+    (; definition, est, discretization, pest) = est
+    base = definition.base
+    return EntropyDecomposition(CMIRenyiJizba(; base), est, discretization, pest)
+end
+
+function convert_to_cmi_estimator(est::EntropyDecomposition{<:TERenyiJizba, <:DifferentialInfoEstimator{<:Renyi}})
+    return EntropyDecomposition(CMIRenyiJizba(; est.definition.base), est.est)
+end
+
+# ------------------------------------------------
+# Pretty printing for decomposition estimators.
+# ------------------------------------------------
+# These are some possible decompositions
+# TE(s -> t | c) =
+# = I(t⁺; s⁻ | t⁻, c⁻)
+# = I(t⁺; s⁻, t⁻, c⁻) - I(t⁺; t⁻, c⁻)
+# = h(t⁺ | t⁻,c⁻) - h(t⁺ | s⁻,t⁻,c⁻)
+# = h(t⁺, t⁻,c⁻) - h(t⁻,c⁻) - h(t⁺,s⁻,t⁻,c⁻) + h(s⁻,t⁻,c⁻)"
+
+function decomposition_string(
+        definition::TERenyiJizba, 
+        est::EntropyDecomposition{M, <:DiscreteInfoEstimator}
+    ) where M
+    return "TEᵣⱼ(s → t | c) = Hᵣ(t⁺, t⁻,c⁻) - Hᵣ(t⁻,c⁻) - Hᵣ(t⁺,s⁻,t⁻,c⁻) + Hᵣ(s⁻,t⁻,c⁻)"
+end
+
+function decomposition_string(
+    definition::TERenyiJizba, 
+    est::EntropyDecomposition{M, <:DifferentialInfoEstimator}
+    ) where M
+    return "TEᵣⱼ(s → t | c) = hᵣ(t⁺, t⁻,c⁻) - hᵣ(t⁻,c⁻) - hᵣ(t⁺,s⁻,t⁻,c⁻) + hᵣ(s⁻,t⁻,c⁻)"
+end
\ No newline at end of file
diff --git a/src/methods/information/definitions/transferentropy/TEShannon.jl b/src/methods/information/definitions/transferentropy/TEShannon.jl
new file mode 100644
index 000000000..37327d934
--- /dev/null
+++ b/src/methods/information/definitions/transferentropy/TEShannon.jl
@@ -0,0 +1,109 @@
+export TEShannon
+
+"""
+    TEShannon <: TransferEntropy
+    TEShannon(; base = 2; embedding = EmbeddingTE()) <: TransferEntropy
+
+The Shannon-type transfer entropy measure.
+
+## Usage
+
+- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise
+    and conditional dependence.
+- Use with [`transferentropy`](@ref) to compute the raw transfer entropy.
+
+## Description
+
+The transfer entropy from source ``S`` to target ``T``, potentially
+conditioned on ``C`` is defined as
+
+```math
+\\begin{align*}
+TE(S \\to T) &:= I^S(T^+; S^- | T^-) \\\\
+TE(S \\to T | C) &:= I^S(T^+; S^- | T^-, C^-)
+\\end{align*}
+```
+
+where ``I(T^+; S^- | T^-)`` is the Shannon conditional mutual information
+([`CMIShannon`](@ref)). The variables ``T^+``, ``T^-``,
+``S^-`` and ``C^-`` are described in the docstring for [`transferentropy`](@ref).
+
+## Estimation
+
+- [Example 1](@ref example_TEShannon_EntropyDecomposition_TransferOperator): 
+    [`EntropyDecomposition`](@ref) with [`TransferOperator`](@ref) outcome space.
+- [Example 2](@ref example_TEShannon_SymbolicTransferEntropy): Estimation using the
+    [`SymbolicTransferEntropy`](@ref) estimator.
+"""
+struct TEShannon{B, EMB} <: TransferEntropy
+    base::B
+    embedding::EMB
+    function TEShannon(; base::B = 2, embedding::EMB = EmbeddingTE()) where {B, EMB}
+        return new{B, EMB}(base, embedding)
+    end
+    # TODO: add constructor that automatically determines the embedding.
+end
+
+function convert_to_cmi_estimator(est::EntropyDecomposition{<:TEShannon, <:DiscreteInfoEstimator})
+    (; definition, est, discretization, pest) = est
+    base = definition.base
+    return EntropyDecomposition(CMIShannon(; base), est, discretization, pest)
+end
+
+function convert_to_cmi_estimator(est::EntropyDecomposition{<:TEShannon, <:DifferentialInfoEstimator})
+    return EntropyDecomposition(CMIShannon(; est.definition.base), est.est)
+end
+
+function convert_to_cmi_estimator(est::MIDecomposition{<:TEShannon})
+    base = est.definition.base
+    return MIDecomposition(CMIShannon(; base), est.est)
+end
+
+function convert_to_cmi_estimator(est::CMIDecomposition{<:TEShannon})
+    base = est.definition.base
+    return CMIDecomposition(CMIShannon(; base), est.est)
+end
+
+function convert_to_cmi_estimator(est::JointProbabilities{<:TEShannon})
+    base = est.definition.base
+    return JointProbabilities(CMIShannon(; base), est.discretization, est.pest)
+end
+
+# ------------------------------------------------
+# Pretty printing for decomposition estimators.
+# ------------------------------------------------
+# These are some possible decompositions
+# TE(s -> t | c) =
+# = I(t⁺; s⁻ | t⁻, c⁻)
+# = I(t⁺; s⁻, t⁻, c⁻) - I(t⁺; t⁻, c⁻)
+# = h(t⁺ | t⁻,c⁻) - h(t⁺ | s⁻,t⁻,c⁻)
+# = h(t⁺, t⁻,c⁻) - h(t⁻,c⁻) - h(t⁺,s⁻,t⁻,c⁻) + h(s⁻,t⁻,c⁻)"
+
+function decomposition_string(
+        definition::TEShannon, 
+        est::EntropyDecomposition{M, <:DiscreteInfoEstimator{<:Shannon}}
+    ) where M
+    return "TEₛ(s → t | c) = Hₛ(t⁺, t⁻,c⁻) - Hₛ(t⁻,c⁻) - Hₛ(t⁺,s⁻,t⁻,c⁻) + Hₛ(s⁻,t⁻,c⁻)"
+end
+
+function decomposition_string(
+    definition::TEShannon, 
+    est::EntropyDecomposition{M, <:DifferentialInfoEstimator{<:Shannon}}
+    ) where M
+    return "TEₛ(s → t | c) = hₛ(t⁺, t⁻,c⁻) - hₛ(t⁻,c⁻) - hₛ(t⁺,s⁻,t⁻,c⁻) + hₛ(s⁻,t⁻,c⁻)"
+end
+
+function decomposition_string(
+        definition::TEShannon, 
+        est::MIDecomposition{M, <:MutualInformationEstimator{<:MIShannon}}
+    ) where M
+    return "TEₛ(s → t | c) = Iₛ(t⁺; s⁻, t⁻, c⁻) - Iₛ(t⁺; t⁻, c⁻)"
+end
+
+
+function decomposition_string(
+        definition::TEShannon, 
+        est::CMIDecomposition{M, <:ConditionalMutualInformationEstimator{<:CMIShannon}}
+    ) where M
+    return "TEₛ(s → t | c) = Iₛ(t⁺; s⁻ | t⁻, c⁻)"
+end
diff --git a/src/methods/infomeasures/transferentropy/embedding.jl b/src/methods/information/definitions/transferentropy/embedding.jl
similarity index 99%
rename from src/methods/infomeasures/transferentropy/embedding.jl
rename to src/methods/information/definitions/transferentropy/embedding.jl
index 0b42f9d76..c1e5559d7 100644
--- a/src/methods/infomeasures/transferentropy/embedding.jl
+++ b/src/methods/information/definitions/transferentropy/embedding.jl
@@ -175,3 +175,5 @@ function Base.show(io::IO, x::EmbeddingTE)
     s = "EmbeddingTE(dS=$(x.dS), dT=$(x.dT), dC=$(x.dC), dTf=$(x.dTf), τS=$(x.τS), τT=$(x.τT), τC=$(x.τC), ηTf=$(x.ηTf))"
     print(io, s)
 end
+
+include("optimization/traditional_optimal_embedding.jl")
\ No newline at end of file
diff --git a/src/methods/information/definitions/transferentropy/estimation_utils.jl b/src/methods/information/definitions/transferentropy/estimation_utils.jl
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/src/methods/information/definitions/transferentropy/estimation_utils.jl
@@ -0,0 +1 @@
+
diff --git a/src/methods/infomeasures/transferentropy/optimization/bbnue/bbnue.jl b/src/methods/information/definitions/transferentropy/optimization/bbnue/bbnue.jl
similarity index 99%
rename from src/methods/infomeasures/transferentropy/optimization/bbnue/bbnue.jl
rename to src/methods/information/definitions/transferentropy/optimization/bbnue/bbnue.jl
index f22971104..6f760b5dd 100644
--- a/src/methods/infomeasures/transferentropy/optimization/bbnue/bbnue.jl
+++ b/src/methods/information/definitions/transferentropy/optimization/bbnue/bbnue.jl
@@ -106,7 +106,7 @@ z, w = randn(n), rand(n) # some completely unrelated timeseries to condition on.
 # to keep computation costs low and to ensure the probability distributions
 # over the bins don't approach the uniform distribution (need enough points
 # to fill bins).
-est = ValueHistogram(4)
+est = ValueBinning(4)
 
 te_xy = bbnue(est, x, y, surr = RandomShuffle(), nsurr = 50)
 te_yx = bbnue(est, y, x, surr = RandomShuffle(), nsurr = 50)
diff --git a/src/methods/infomeasures/transferentropy/optimization/bbnue/bbnue_new.jl b/src/methods/information/definitions/transferentropy/optimization/bbnue/bbnue_new.jl
similarity index 100%
rename from src/methods/infomeasures/transferentropy/optimization/bbnue/bbnue_new.jl
rename to src/methods/information/definitions/transferentropy/optimization/bbnue/bbnue_new.jl
diff --git a/src/methods/infomeasures/transferentropy/optimization/bbnue/candidate_variable_sets.jl b/src/methods/information/definitions/transferentropy/optimization/bbnue/candidate_variable_sets.jl
similarity index 100%
rename from src/methods/infomeasures/transferentropy/optimization/bbnue/candidate_variable_sets.jl
rename to src/methods/information/definitions/transferentropy/optimization/bbnue/candidate_variable_sets.jl
diff --git a/src/methods/infomeasures/transferentropy/optimization/bbnue/construct_candidates.jl b/src/methods/information/definitions/transferentropy/optimization/bbnue/construct_candidates.jl
similarity index 100%
rename from src/methods/infomeasures/transferentropy/optimization/bbnue/construct_candidates.jl
rename to src/methods/information/definitions/transferentropy/optimization/bbnue/construct_candidates.jl
diff --git a/src/methods/infomeasures/transferentropy/optimization/traditional_optimal_embedding.jl b/src/methods/information/definitions/transferentropy/optimization/traditional_optimal_embedding.jl
similarity index 97%
rename from src/methods/infomeasures/transferentropy/optimization/traditional_optimal_embedding.jl
rename to src/methods/information/definitions/transferentropy/optimization/traditional_optimal_embedding.jl
index 51a0251c7..cbca5a385 100644
--- a/src/methods/infomeasures/transferentropy/optimization/traditional_optimal_embedding.jl
+++ b/src/methods/information/definitions/transferentropy/optimization/traditional_optimal_embedding.jl
@@ -37,7 +37,7 @@ Optimise the marginals for a transfer entropy analysis from source time series `
 target time series `t`, potentially given a conditional time series `c`.
 
 If `exclude_source == true`, then no optimisation is done for the source. This is
-useful for [`SurrogateTest`](@ref), because most surrogate methods accept
+useful for [`SurrogateAssociationTest`](@ref), because most surrogate methods accept
 univariate time series, and if we embed the source and it becomes multidimensional,
 then we can't create surrogates. A future optimization is to do column-wise surrogate
 generation.
diff --git a/src/methods/information/definitions/transferentropy/transfer_entropies.jl b/src/methods/information/definitions/transferentropy/transfer_entropies.jl
new file mode 100644
index 000000000..f33e029a5
--- /dev/null
+++ b/src/methods/information/definitions/transferentropy/transfer_entropies.jl
@@ -0,0 +1,83 @@
+export TransferEntropy
+
+"""
+    TransferEntropy <: AssociationMeasure
+
+The supertype of all transfer entropy measures. Concrete subtypes are
+- [`TEShannon`](@ref)
+- [`TERenyiJizba`](@ref)
+"""
+abstract type TransferEntropy <: MultivariateInformationMeasure end
+
+max_inputs_vars(::TransferEntropy) = 3
+is_directed(m::TransferEntropy) = true
+
+
+
+include("embedding.jl")
+include("utils/utils.jl")
+include("utils/OptimiseTraditional.jl")
+
+
+# If the estimator is not a dedicated `TransferEntropyEstimator`, then we
+# convert the estimator to a conditional mutual information estimator which we apply
+# to appropriately constructed marginals.
+function association(est::MultivariateInformationMeasureEstimator{<:TransferEntropy}, x...)
+    te_definition = est.definition
+    embedding = te_definition.embedding
+    # If a conditional input (x[3]) is not provided, then C is just a 0-dimensional
+    # StateSpaceSet. The horizontal concatenation of C with T then just returns T.
+    # We therefore don't need separate methods for the conditional and non-conditional
+    # cases.
+    S, T, T⁺, C = individual_marginals_te(embedding, x...)
+
+    cmi_est = convert_to_cmi_estimator(est)
+    if est isa JointProbabilities
+        # If discrete, we must codify each marginal separately and 
+        # collect the "marginal marginals" into new statespace sets.
+        tmp_TC = codify(est.discretization, StateSpaceSet(T, C))
+        if tmp_TC isa AbstractVector
+            T̂Ĉ = StateSpaceSet(tmp_TC)
+        else
+            T̂Ĉ = StateSpaceSet(tmp_TC...,)
+        end
+        Ŝ = codify(est.discretization, S)
+        T̂⁺ = codify(est.discretization, T⁺)
+        # We have already encoded the marginals, so when computing CMI, we can 
+        # simply use `UniqueElements`.
+        disc = CodifyVariables(UniqueElements())
+        est_unique = JointProbabilities(cmi_est.definition, disc, est.pest)
+        return association(est_unique, T̂⁺, Ŝ , T̂Ĉ)
+
+    else
+        #Estimate by letting TE(s -> t | c) := I(t⁺; s⁻ | t⁻, c⁻). 
+        return association(cmi_est, T⁺, S, StateSpaceSet(T, C))
+    end
+
+end
+
+function individual_marginals_te(emb::EmbeddingTE, x::VectorOr1DDataset...)
+    joint, vars, τs, js = te_embed(emb, x...)
+    S = joint[:, vars.S]
+    T = joint[:, vars.T]
+    Tf = joint[:, vars.Tf]
+    C = joint[:, vars.C]
+    return S, T, Tf, C
+end
+
+# function h4_marginals(definition::TransferEntropy, x...)
+#     S, T, T⁺, C = individual_marginals_te(definition.embedding, x...)
+#     joint = StateSpaceSet(S, T, T⁺, C)
+#     ST = StateSpaceSet(S, T, C)
+#     TT⁺ = StateSpaceSet(T, T⁺, C)
+#     T = StateSpaceSet(T, C)
+#     return joint, ST, TT⁺, T
+# end
+
+# Concrete implementations
+include("TEShannon.jl")
+include("TERenyiJizba.jl")
+
+
+# Special estimation
+include("transferoperator.jl")
\ No newline at end of file
diff --git a/src/methods/infomeasures/transferentropy/estimators/transferoperator.jl b/src/methods/information/definitions/transferentropy/transferoperator.jl
similarity index 53%
rename from src/methods/infomeasures/transferentropy/estimators/transferoperator.jl
rename to src/methods/information/definitions/transferentropy/transferoperator.jl
index aa06b8349..48220bad8 100644
--- a/src/methods/infomeasures/transferentropy/estimators/transferoperator.jl
+++ b/src/methods/information/definitions/transferentropy/transferoperator.jl
@@ -3,6 +3,8 @@ import ComplexityMeasures: TransferOperator, invariantmeasure, InvariantMeasure,
 using ComplexityMeasures.GroupSlices
 export TransferOperator
 
+using ComplexityMeasures: Probabilities
+
 """
 	marginal_indices(x)
 
@@ -60,41 +62,75 @@ function _marginal_encodings(encoder::RectangularBinEncoding, x::VectorOrStateSp
     return encodings
 end
 
-function transferentropy(
-        measure::TransferEntropy,
-        est::TransferOperator{<:RectangularBinning}, x...)
-    e = measure.e
-    joint_pts, vars, τs, js = te_embed(measure.embedding, x...)
-    iv = invariantmeasure(joint_pts, est.binning)
+# Only works for `RelativeAmount`, because probabilities are obtained from the 
+# transfer operator.
+function h4_marginal_probs(
+    est::EntropyDecomposition{
+        <:TransferEntropy, 
+        <:DiscreteInfoEstimator, 
+        <:CodifyVariables{1, <:TransferOperator},
+        <:RelativeAmount
+    },
+        x...
+    )
+    # We never reach this point unless the outcome space is the same for all marginals,
+    # so we can safely pick the first outcome space.
+    d::TransferOperator = first(est.discretization.outcome_spaces)
+
+    if !d.binning.precise
+        throw(ArgumentError("Please supply a binning with `precise == true`, otherwise points may end up outside the binning."))
+    end
+    joint_pts, vars, τs, js = te_embed(est.definition.embedding, x...)
+    iv = invariantmeasure(joint_pts, d.binning)
 
     # TODO: this needs to be done more cleverly in ComplexityMeasures.jl, so we don't
     # need to do the conversion twice. We should explicitly store the bin indices for all
     # marginals, not a single encoding integer for each bin. Otherwise, we can't
     # properly subset marginals here and relate them to the approximated invariant measure.
-    # The bins visited by the orbit are
-    visited_bins_coordinates = StateSpaceSet(decode.(Ref(iv.to.encoder), iv.to.bins))
-    unique_visited_bins = _marginal_encodings(iv.to.encoder, visited_bins_coordinates)[1]
+    encoding = iv.to.encoding
+    visited_bins_coordinates = StateSpaceSet(decode.(Ref(encoding), iv.to.bins))
+    unique_visited_bins = _marginal_encodings(iv.to.encoding, visited_bins_coordinates)[1]
 
     # # The subset of visited bins with nonzero measure
     inds_non0measure = findall(iv.ρ .> 0)
     positive_measure_bins = unique_visited_bins[inds_non0measure]
 
     # Estimate marginal probability distributions from joint measure
-    cols_ST = [vars.S; vars.T; vars.C]
-    cols_TTf = [vars.Tf; vars.T; vars.C]
-    cols_T = [vars.T; vars.C]
-    p_T  = marginal_probs_from_μ(cols_T, positive_measure_bins, iv, inds_non0measure)
-    p_ST = marginal_probs_from_μ(cols_ST, positive_measure_bins, iv, inds_non0measure)
-    p_TTf = marginal_probs_from_μ(cols_TTf, positive_measure_bins, iv, inds_non0measure)
-    p_joint = iv.ρ[inds_non0measure]
-
-    te = entropy(e, Probabilities(p_ST)) +
-        entropy(e, Probabilities(p_TTf)) -
-        entropy(e, Probabilities(p_T)) -
-        entropy(e, Probabilities(p_joint))
+    cols_STC = [vars.S; vars.T; vars.C]
+    cols_T⁺TC = [vars.Tf; vars.T; vars.C]
+    cols_TC = [vars.T; vars.C]
+    pTC  = marginal_probs_from_μ(cols_TC, positive_measure_bins, iv, inds_non0measure)
+    pSTC = marginal_probs_from_μ(cols_STC, positive_measure_bins, iv, inds_non0measure)
+    pT⁺TC = marginal_probs_from_μ(cols_T⁺TC, positive_measure_bins, iv, inds_non0measure)
+    pST⁺TC = iv.ρ[inds_non0measure]
+
+    return Probabilities(pTC), 
+        Probabilities(pSTC), 
+        Probabilities(pT⁺TC), 
+        Probabilities(pST⁺TC)
 end
 
-transferentropy(est::TransferOperator{<:RectangularBinning}, s, t; kwargs...) =
-    transferentropy(Shannon(; base), est, s, t; kwargs...)
-transferentropy(est::TransferOperator{<:RectangularBinning}, s, t, c; kwargs...) =
-    transferentropy(Shannon(; base), est, s, t, c; kwargs...)
+function association(
+        est::EntropyDecomposition{
+            <:TransferEntropy, 
+            <:DiscreteInfoEstimator, 
+            <:CodifyVariables{1, <:TransferOperator},
+            <:RelativeAmount
+        },
+        x...)
+    # If a conditional input (x[3]) is not provided, then C is just a 0-dimensional
+    # StateSpaceSet. The horizontal concatenation of C with T then just returns T.
+    # We therefore don't need separate methods for the conditional and non-conditional
+    # cases.
+    pTC, pSTC, pT⁺TC, pST⁺TC = h4_marginal_probs(est, x...)
+    cmi_est = convert_to_cmi_estimator(est)
+    h_est = estimator_with_overridden_parameters(cmi_est.definition, cmi_est.est)
+
+    # Estimate by letting TE(s -> t | c) := I(t⁺; s⁻ | t⁻, c⁻).
+    hSTC =  information(h_est, pSTC)
+    hT⁺TC = information(h_est, pT⁺TC)
+    hTC = information(h_est, pTC)
+    hST⁺TC = information(h_est, pST⁺TC)
+    te = hT⁺TC - hTC - hST⁺TC + hSTC
+    return te 
+end
\ No newline at end of file
diff --git a/src/methods/information/definitions/transferentropy/utils/OptimiseTraditional.jl b/src/methods/information/definitions/transferentropy/utils/OptimiseTraditional.jl
new file mode 100644
index 000000000..cbca5a385
--- /dev/null
+++ b/src/methods/information/definitions/transferentropy/utils/OptimiseTraditional.jl
@@ -0,0 +1,97 @@
+using DelayEmbeddings
+
+export OptimiseTraditional
+export optimize_marginals_te
+
+"""
+    OptimiseTraditional(; η = 1, maxlag = 50, maxdim = 10,
+        method = delay_ifnn, dmethod = "mi_min")
+
+Optimize embedding parameters using traditional delay embedding optimization methods
+with a maximum lag of `maxlag` and a maximum dimension of `maxdim`. `method` can
+be either `delay_ifnn`, `delay_fnn` or `delay_f1nn`.
+"""
+Base.@kwdef struct OptimiseTraditional{L}
+    η::Int = 1
+    maxlag::L = 0.05
+    maxdim::Int = 5
+    method::Function = delay_ifnn
+    dmethod::AbstractString = "mi_min"
+end
+
+"""
+    optimize_marginals_te([scheme = OptimiseTraditional()], s, t, [c]) → EmbeddingTE
+
+Optimize marginal embeddings for transfer entropy computation from source time series `s`
+to target time series `t`, conditioned on `c` if `c` is given, using the provided
+optimization `scheme`.
+"""
+function optimize_marginals_te end
+getlags(opt::OptimiseTraditional{<:Float64}, x) = 1:floor(Int, length(x)*opt.maxlag)
+getlags(opt::OptimiseTraditional{<:Int}, args...) = 1:opt.maxlag
+
+"""
+    optimize_marginals_te(opt::OptimiseTraditional, s, t, [c]; exclude_source = false) → EmbeddingTE
+
+Optimise the marginals for a transfer entropy analysis from source time series `s` to
+target time series `t`, potentially given a conditional time series `c`.
+
+If `exclude_source == true`, then no optimisation is done for the source. This is
+useful for [`SurrogateAssociationTest`](@ref), because most surrogate methods accept
+univariate time series, and if we embed the source and it becomes multidimensional,
+then we can't create surrogates. A future optimization is to do column-wise surrogate
+generation.
+"""
+function optimize_marginals_te(opt::OptimiseTraditional, s, t; exclude_source = false)
+    τs = getlags(opt, t)
+    dims = 1:opt.maxdim
+    f = opt.method
+
+    if exclude_source
+        τT = estimate_delay(t, opt.dmethod, τs)
+        statT = f(t, τT, dims)
+        dT = dims[argmin(statT)]
+        return EmbeddingTE(; dT = dT, τT = -τT, ηTf = opt.η, dTf = 1)
+    else
+        τT = estimate_delay(t, opt.dmethod, τs)
+        τS = estimate_delay(s, opt.dmethod, τs)
+        statT = f(t, τT, dims)
+        statS = f(s, τS, dims)
+        dT = dims[argmin(statT)]
+        dS = dims[argmin(statS)]
+        return EmbeddingTE(; dT = dT, τT = -τT, dS = dS, τS = -τS, ηTf = opt.η, dTf = 1)
+    end
+end
+
+function optimize_marginals_te(opt::OptimiseTraditional, s, t, c; exclude_source = false)
+    τs = getlags(opt, t)
+    dims = 1:opt.maxdim
+    if exclude_source
+        τT = estimate_delay(t, opt.dmethod, τs)
+        τC = estimate_delay(c, opt.dmethod, τs)
+        f = opt.method
+        statC = f(c, τC, dims)
+        statT = f(t, τT, dims)
+        dC = dims[argmin(statC)]
+        dT = dims[argmin(statT)]
+        return EmbeddingTE(; dT = dT, τT = -τT, dC = dC, τC = -τC, ηTf = opt.η, dTf = 1) # always predict a one-dimensional target vector.
+    else
+        τT = estimate_delay(t, opt.dmethod, τs)
+        τS = estimate_delay(s, opt.dmethod, τs)
+        τC = estimate_delay(c, opt.dmethod, τs)
+
+        f = opt.method
+        statC = f(c, τC, dims)
+        statT = f(t, τT, dims)
+        statS = f(s, τS, dims)
+
+        dC = dims[argmin(statC)]
+        dT = dims[argmin(statT)]
+        dS = dims[argmin(statS)]
+
+        return EmbeddingTE(; dT = dT, τT = -τT, dS = dS, τS = -τS, dC = dC, τC = -τC,
+            ηTf = opt.η, dTf = 1) # always predict a one-dimensional target vector.
+    end
+end
+
+EmbeddingTE(opt::OptimiseTraditional, x...) = optimize_marginals_te(opt, x...)
diff --git a/src/methods/infomeasures/transferentropy/utils.jl b/src/methods/information/definitions/transferentropy/utils/utils.jl
similarity index 94%
rename from src/methods/infomeasures/transferentropy/utils.jl
rename to src/methods/information/definitions/transferentropy/utils/utils.jl
index 743e360d1..d1207c8cb 100644
--- a/src/methods/infomeasures/transferentropy/utils.jl
+++ b/src/methods/information/definitions/transferentropy/utils/utils.jl
@@ -30,12 +30,11 @@ function rc(x::Union{AbstractStateSpaceSet, AbstractVector{T}},
     # Multiple time series input
     if (x isa AbstractVector{T} where T <: AbstractVector{N} where N <: Number) || (x isa AbstractStateSpaceSet)
         if x isa AbstractStateSpaceSet
-            N = length(x)
+            N = dimension(x)
         elseif x isa AbstractVector
             N = size(x, 1)
         end
 
-
         if dim isa Int
             dim % N == 0 || throw(ArgumentError("If using multiple (`N` different) time series in a marginal, each time series is lagged `dim/N` times. Hence, `dim` must be a multiple of `N`."))
 
@@ -132,8 +131,8 @@ function get_delay_reconstruction_params(source, target, cond, p::EmbeddingTE)
 end
 
 """
-    te_embed(source::AbstractVector{T}, target::AbstractVector{T}, p::EmbeddingTE) → (points, vars, τs)
-    te_embed(source::AbstractVector{T}, target::AbstractVector{T}, cond::AbstractVector{T}, p::EmbeddingTE) → (points, vars, τs)
+    te_embed(source::VectorOr1DDataset, target::VectorOr1DDataset, p::EmbeddingTE) → (points, vars, τs)
+    te_embed(source::VectorOr1DDataset, target::VectorOr1DDataset, cond::VectorOr1DDataset, p::EmbeddingTE) → (points, vars, τs)
 
 Generalised delay reconstruction of `source` and `target` (and `cond` if provided)
 for transfer entropy computation using embedding parameters provided by the [`EmbeddingTE`](@ref)
@@ -143,9 +142,8 @@ Returns a tuple of the embedded `points`, `vars` (a [`TEVars`](@ref) instance th
 variables of the embedding belong to which marginals of the reconstruction; indices are: source = 1,
 target = 2, cond = 3), and a tuple `τs`, which stores the lags for each variable of the reconstruction.
 """
-function te_embed(p::EmbeddingTE, source::AbstractVector{T}, target::AbstractVector{T}) where T
+function te_embed(p::EmbeddingTE, source::VectorOr1DDataset{T}, target::VectorOr1DDataset{T}) where T
 
-    #@show p.τS
     #if (p.τS isa Int && p.τS > 0) || (length(p.τS) > 1 && any(p.τS[p.τS .> 0]))
     #    @warn("Backwards lag τS should be negative. You might be getting nonsensical results!")
     #end
@@ -181,9 +179,8 @@ function te_embed(p::EmbeddingTE, source::AbstractVector{T}, target::AbstractVec
     return pts, vars, τs, js
 end
 
-function te_embed(p::EmbeddingTE, source::AbstractVector{T}, target::AbstractVector{T}, cond::AbstractVector{T}) where T
+function te_embed(p::EmbeddingTE, source::VectorOr1DDataset{T}, target::VectorOr1DDataset{T}, cond::VectorOr1DDataset{T}) where T
 
-    #@show p.τS
     #if (p.τS isa Int && p.τS > 0) || (length(p.τS) > 1 && any(p.τS[p.τS .> 0]))
     #    @warn("Backwards lag τS should be negative. You might be getting nonsensical results!")
     #end
diff --git a/src/methods/information/estimators/JointProbabilities.jl b/src/methods/information/estimators/JointProbabilities.jl
new file mode 100644
index 000000000..5c9e39546
--- /dev/null
+++ b/src/methods/information/estimators/JointProbabilities.jl
@@ -0,0 +1,59 @@
+using ComplexityMeasures: OutcomeSpace
+export JointProbabilities
+
+"""
+    JointProbabilities <: InformationMeasureEstimator
+    JointProbabilities(
+        definition::MultivariateInformationMeasure,
+        discretization::Discretization
+    )
+
+`JointProbabilities` is a generic estimator for multivariate discrete information measures.
+    
+## Usage
+
+- Use with [`association`](@ref) to compute an information measure from input data.
+
+## Description
+
+It first encodes the input data according to the given `discretization`, then constructs 
+`probs`, a multidimensional [`Probabilities`](@ref) instance. Finally, `probs` are 
+forwarded to a [`PlugIn`](@ref) estimator, which computes the measure according to 
+`definition`.
+
+# Compatible encoding schemes
+
+-  [`CodifyVariables`](@ref) (encode each *variable*/column of the input data independently by 
+    applying an encoding in a sliding window over each input variable).  
+- [`CodifyPoints`](@ref) (encode each *point*/column of the input data)
+
+Works for any [`OutcomeSpace`](@ref) that implements [`codify`](@ref).
+
+!!! note "Joint probabilities vs decomposition methods"
+
+    Using [`JointProbabilities`](@ref) to compute an information measure, 
+    e.g. conditional mutual estimation,
+    is typically slower than other dedicated estimation procedures like [`EntropyDecomposition`](@ref).
+    The reason is that measures such as [`CMIShannon`](@ref) can be formulated as a
+    sum of four entropies, which can be estimated individually and summed afterwards. 
+    This decomposition is fast because because we avoid *explicitly* estimating the entire joint pmf, 
+    which demands many extra calculation steps, However, the decomposition is biased, 
+    because it fails to fully take into consideration the joint relationships between the variables.
+    Pick your estimator according to your needs.
+
+See also: [`Counts`](@ref), [`Probabilities`](@ref), [`ProbabilitiesEstimator`](@ref),
+[`OutcomeSpace`](@ref), [`DiscreteInfoEstimator`](@ref).
+"""
+struct JointProbabilities{M <: MultivariateInformationMeasure, O, P} <: MultivariateInformationMeasureEstimator{M}
+    definition::M # API from complexity measures: definition must be the first field of the infoestimator.
+    discretization::O
+    pest::P # Not exposed to user for now.
+
+    function JointProbabilities(def::M, disc::D, pest = RelativeAmount()) where {M, D}
+        new{M, D, typeof(pest)}(def, disc, pest)
+    end
+end
+
+function association(est::JointProbabilities{MultivariateInformationMeasure}, x...)
+    throw(ArgumentError("`JointProbabilities` not implemented for `$(typeof(est.definition).name.name)`"))
+end
\ No newline at end of file
diff --git a/src/methods/information/estimators/codify_marginals.jl b/src/methods/information/estimators/codify_marginals.jl
new file mode 100644
index 000000000..4b02603aa
--- /dev/null
+++ b/src/methods/information/estimators/codify_marginals.jl
@@ -0,0 +1,125 @@
+using ComplexityMeasures
+export codified_marginals 
+
+"""
+    codified_marginals(o::OutcomeSpace, x::VectorOrStateSpaceSet...)
+
+Encode/discretize each input vector (e.g. timeseries) `xᵢ ∈ x` according to a procedure
+determined by `o`. 
+
+For some outcome spaces, the encoding is sequential (i.e. time ordering matters). 
+Any `xᵢ ∈ X` that are multidimensional ([`StateSpaceSet`](@ref)s) will be encoded
+column-wise, i.e. each column of `xᵢ` is treated as a timeseries and is encoded separately.
+
+This is useful for discretizing input data when computing some 
+[`MultivariateInformationMeasure`](@ref). This method is used internally by
+both the [`JointProbabilities`](@ref) and [`EntropyDecomposition`](@ref) estimators
+to handle discretization.
+
+## Supported estimators
+
+- [`ValueBinning`](@ref). Bin visitation frequencies are counted in the joint space `XY`,
+    then marginal visitations are obtained from the joint bin visits.
+    This behaviour is the same for both [`FixedRectangularBinning`](@ref) and
+    [`RectangularBinning`](@ref) (which adapts the grid to the data).
+    When using [`FixedRectangularBinning`](@ref), the range along the first dimension
+    is used as a template for all other dimensions.
+- [`OrdinalPatterns`](@ref). Each timeseries is separately [`codify`](@ref)-ed by 
+    embedding the timeseries, then sequentially encoding the ordinal patterns of 
+    the embedding vectors.
+- [`Dispersion`](@ref). Each timeseries is separately [`codify`](@ref)-ed by 
+    embedding the timeseries, then sequentially encoding the embedding vectors
+    according to their dispersion pattern (which for each embedding vector is computed
+    relative to all other embedding vectors).
+- [`CosineSimilarityBinning`](@ref). Each timeseries is separately [`codify`](@ref)-ed
+    by embedding the timeseries, the encoding the embedding points in a 
+    in a sequential manner according to the cosine similarity of the embedding vectors.
+- [`UniqueElements`](@ref). Each timeseries is [`codify`](@ref)-ed according to 
+    its unique values (i.e. each unique element gets assigned a specific integer).
+
+More implementations are possible.
+"""
+function codified_marginals end
+
+function codified_marginals(d::CodifyVariables, x::VectorOrStateSpaceSet...)
+    T = eltype(d.outcome_spaces) # assume identical outcome spaces.
+    if !allequal(typeof.(d.outcome_spaces))
+        throw(ArgumentError("Outcome space for each marginal must be identical. Got outcome spaces of type $T"))
+    end
+    o = first(d.outcome_spaces) # we can do this because we assume all out come spaces are the same
+    return codified_marginals(o, x...)
+end
+
+function codified_marginals(o::OutcomeSpace, x::VectorOrStateSpaceSet...)
+    return codify_marginal.(Ref(o), x)
+end
+
+# Generic dispatch to ComplexityMeasures.jl. We override if something special
+# needs to happen. For example, for ValueBinning we override such that 
+# we bin in the joint space to reduce bias.
+function codify_marginal(o::OutcomeSpace, x::VectorOrStateSpaceSet)
+    return codify(o, x)
+end
+# Apply per column.
+function codify_marginal(o::OutcomeSpace, x::AbstractStateSpaceSet)
+    return StateSpaceSet(codify_marginal.(Ref(o), columns(x))...)
+end
+
+# ------------------------------------------------------------------------
+# Outcome space specific implementations
+# ------------------------------------------------------------------------
+
+# TODO: maybe construct a convenience wrapper where the user can avoid constructing the
+# joint space, for performance benefits (but increased bias).
+function codify_marginal(
+        o::ValueBinning{<:FixedRectangularBinning{D}},
+        x::AbstractVector) where D
+    range = first(o.binning.ranges)
+    ϵmin = minimum(range)
+    ϵmax = maximum(range)
+    N = length(range)
+    encoder = RectangularBinEncoding(FixedRectangularBinning(ϵmin, ϵmax, N, 1))
+    return encode.(Ref(encoder), x)
+end
+
+# Special treatment for RectangularBinning. We create the joint embedding, then
+# extract marginals from that. This could probably be faster,
+# but it *works*. I'd rather things be a bit slower than having marginals
+# that are not derived from the same joint distribution, which would hugely increase
+# bias, because we're not guaranteed cancellation between entropy terms
+# in higher-level methods.
+function codified_marginals(o::ValueBinning{<:RectangularBinning}, x::VectorOrStateSpaceSet...)
+    # TODO: The following line can be faster by explicitly writing out loops that create the 
+    # joint embedding vectors.
+    X = StateSpaceSet(StateSpaceSet.(x)...)
+    encoder = RectangularBinEncoding(o.binning, X)
+
+    bins = [vec(encode_as_tuple(encoder, pt))' for pt in X]
+    joint_bins = reduce(vcat, bins)
+    idxs = size.(x, 2) #each input can have different dimensions
+    s = 1
+    encodings = Vector{Vector}(undef, 0)
+    for (i, cidx) in enumerate(idxs)
+        variable_subset = s:(s + cidx - 1)
+        s += cidx
+        y = @views joint_bins[:, variable_subset]
+        for j in size(y, 2)
+            push!(encodings, y[:, j])
+        end
+    end
+
+    return encodings
+end
+
+# A version of `cartesian_bin_index` that directly returns the joint bin encoding
+# instead of converting it to a cartesian index.
+function encode_as_tuple(e::RectangularBinEncoding, point::SVector{D, T}) where {D, T}
+    ranges = e.ranges
+    if e.precise
+        # Don't know how to make this faster unfurtunately...
+        bin = map(searchsortedlast, ranges, point)
+    else
+        bin = floor.(Int, (point .- e.mini) ./ e.widths) .+ 1
+    end
+    return bin
+end
\ No newline at end of file
diff --git a/src/methods/information/estimators/conditional_mutual_info_estimators/FPVP.jl b/src/methods/information/estimators/conditional_mutual_info_estimators/FPVP.jl
new file mode 100644
index 000000000..eebde04c9
--- /dev/null
+++ b/src/methods/information/estimators/conditional_mutual_info_estimators/FPVP.jl
@@ -0,0 +1,89 @@
+
+using Neighborhood: bulkisearch, inrangecount
+using Neighborhood: Theiler, NeighborNumber, KDTree, Chebyshev
+using SpecialFunctions: digamma
+
+export FPVP
+
+"""
+    FPVP <: ConditionalMutualInformationEstimator
+    FPVP(definition = CMIShannon(); k = 1, w = 0)
+
+The Frenzel-Pompe-Vejmelka-Paluš (or `FPVP` for short)
+[`ConditionalMutualInformationEstimator`](@ref) is used to estimate the
+conditional mutual information using a `k`-th nearest neighbor approach that is
+analogous to that of the [`KraskovStögbauerGrassberger1`](@ref) mutual information
+estimator from [Frenzel2007](@citet) and [Vejmelka2008](@citet).
+
+`k` is the number of nearest neighbors. `w` is the Theiler window, which controls the
+number of temporal neighbors that are excluded during neighbor searches.
+
+## Usage
+
+- Use with [`association`](@ref) to compute [`ConditionalMutualInformation`](@ref) measure
+    from input data.
+
+## Example 
+
+```julia
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x = rand(rng, 10000)
+y = rand(rng, 10000) .+ x
+z = rand(rng, 10000) .+ y
+association(FPVP(; k = 10), x, z, y) # should be near 0 (and can be negative)
+```
+
+## Compatible definitions
+
+- [`CMIShannon`](@ref)
+"""
+struct FPVP{M <: ConditionalMutualInformation, MJ, MM} <: ConditionalMutualInformationEstimator{M}
+    definition::M
+    k::Int
+    w::Int
+    metric_joint::MJ
+    metric_marginals::MM
+end
+
+function FPVP(definition = CMIShannon(); k = 1, w = 0)
+    # Metrics shouldn't be modified by the user.
+    metric_joint = Chebyshev()
+    metric_marginals = Chebyshev()
+    return FPVP(definition, k, w, metric_joint, metric_marginals)
+end
+
+function association(est::FPVP{<:CMIShannon}, x, y, z)
+    (; definition, k, w, metric_joint, metric_marginals) = est
+
+    # Ensures that vector-valued inputs are converted to StateSpaceSets, so that
+    # building the marginal/joint spaces and neighbor searches are fast.
+    X = StateSpaceSet(x)
+    Y = StateSpaceSet(y)
+    Z = StateSpaceSet(z)
+    @assert length(X) == length(Y) == length(Z)
+    N = length(X)
+    joint = StateSpaceSet(X, Y, Z)
+    XZ = StateSpaceSet(X, Z)
+    YZ = StateSpaceSet(Y, Z)
+
+    tree_joint = KDTree(joint, metric_joint)
+    ds_joint = last.(bulksearch(tree_joint, joint, NeighborNumber(k), Theiler(w))[2])
+    tree_xz = KDTree(XZ, metric_marginals)
+    tree_yz = KDTree(YZ, metric_marginals)
+    tree_z = KDTree(Z, metric_marginals)
+
+    condmi = 0.0
+    for (i, dᵢ) in enumerate(ds_joint)
+        # Usually, we subtract 1 because inrangecount includes the point itself,
+        # but we'll have to add it again inside the digamma, so just skip it.
+        condmi += digamma(k)
+        condmi -= digamma(inrangecount(tree_xz, XZ[i], dᵢ))
+        condmi -= digamma(inrangecount(tree_yz, YZ[i], dᵢ))
+        condmi += digamma(inrangecount(tree_z, Z[i], dᵢ))
+    end
+    # The "unit" is nats.
+    condmi /= N
+
+    return _convert_logunit(condmi, ℯ, definition.base)
+end
diff --git a/src/methods/information/estimators/conditional_mutual_info_estimators/GaussianCMI.jl b/src/methods/information/estimators/conditional_mutual_info_estimators/GaussianCMI.jl
new file mode 100644
index 000000000..8198094f0
--- /dev/null
+++ b/src/methods/information/estimators/conditional_mutual_info_estimators/GaussianCMI.jl
@@ -0,0 +1,56 @@
+export GaussianCMI
+using StateSpaceSets: StateSpaceSet
+
+"""
+    GaussianCMI <: MutualInformationEstimator
+    GaussianCMI(definition = CMIShannon(); normalize::Bool = false)
+
+`GaussianCMI` is a parametric [`ConditionalMutualInformationEstimator`](@ref) 
+[Vejmelka2008](@cite).
+
+## Usage
+
+- Use with [`association`](@ref) to compute [`CMIShannon`](@ref) from input data.
+
+## Description
+
+`GaussianCMI` estimates Shannon CMI through a sum of two mutual information terms that
+each are estimated using [`GaussianMI`](@ref) (the `normalize` keyword is the same as
+for [`GaussianMI`](@ref)):
+
+```math
+\\hat{I}_{Gaussian}(X; Y | Z) = \\hat{I}_{Gaussian}(X; Y, Z) - \\hat{I}_{Gaussian}(X; Z)
+```
+
+## Example 
+
+```julia
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x = rand(rng, 10000)
+y = rand(rng, 10000) .+ x
+z = rand(rng, 10000) .+ y
+association(GaussianCMI(CMIShannon(base = 2)), x, z, y)
+```
+
+## Compatible definitions
+
+- [`CMIShannon`](@ref)
+"""
+struct GaussianCMI{M <: ConditionalMutualInformation} <: ConditionalMutualInformationEstimator{M}
+    definition::M
+    normalize::Bool
+end
+function GaussianCMI(definition = CMIShannon(); normalize = false)
+    return GaussianCMI(definition, normalize)
+end
+
+function association(est::GaussianCMI{<:CMIShannon}, x, y, z)
+    YZ = StateSpaceSet(y, z)
+
+    mi_est_modified = estimator_with_overridden_parameters(est.definition, GaussianMI())  
+    MI_x_yz = association(mi_est_modified, x, YZ)
+    MI_x_z = association(mi_est_modified, x, z)
+
+    return MI_x_yz - MI_x_z
+end
diff --git a/src/methods/infomeasures/condmutualinfo/estimators/MesnerShalizi.jl b/src/methods/information/estimators/conditional_mutual_info_estimators/MesnerShalizi.jl
similarity index 52%
rename from src/methods/infomeasures/condmutualinfo/estimators/MesnerShalizi.jl
rename to src/methods/information/estimators/conditional_mutual_info_estimators/MesnerShalizi.jl
index b8f242225..ad1f01123 100644
--- a/src/methods/infomeasures/condmutualinfo/estimators/MesnerShalizi.jl
+++ b/src/methods/information/estimators/conditional_mutual_info_estimators/MesnerShalizi.jl
@@ -2,16 +2,43 @@ export MesnerShalizi
 export MesnerShalisi
 """
     MesnerShalizi <: ConditionalMutualInformationEstimator
-    MesnerShalizi(k = 1, w = 0)
+    MesnerShalizi(definition = CMIShannon(); k = 1, w = 0)
 
-The `MesnerShalizi` estimator is an estimator for conditional mutual information for data
-that can be mixtures of discrete and continuous data [MesnerShalizi2020](@cite).
+The `MesnerShalizi` [`ConditionalMutualInformationEstimator`](@ref) is designed for
+data that can be mixtures of discrete and continuous data [Mesner2020](@cite).
 
+`k` is the number of nearest neighbors. `w` is the Theiler window, which controls the
+number of temporal neighbors that are excluded during neighbor searches.
+
+## Usage
+
+- Use with [`association`](@ref) to compute [`CMIShannon`](@ref) from input data.
+
+## Example 
+
+```julia
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x = rand(rng, 10000)
+y = rand(rng, 10000) .+ x
+z = rand(rng, 10000) .+ y
+association(MesnerShalizi(; k = 10), x, z, y) # should be near 0 (and can be negative)
+```
+
+## Compatible definitions
+
+- [`CMIShannon`](@ref)
 """
-Base.@kwdef struct MesnerShalizi{M} <: ConditionalMutualInformationEstimator
-    k::Int = 1
-    w::Int = 0
-    metric::M = Chebyshev()
+struct MesnerShalizi{M <: ConditionalMutualInformation, ME} <: ConditionalMutualInformationEstimator{M}
+    definition::M
+    k::Int
+    w::Int
+    metric::ME
+end
+function MesnerShalizi(definition = CMIShannon(); k = 1, w = 0)
+    # Metric shouldn't be modified by the user.
+    metric = Chebyshev()
+    return MesnerShalisi(definition, k, w, metric)
 end
 
 function MesnerShalisi(args...; kwargs...)
@@ -19,9 +46,9 @@ function MesnerShalisi(args...; kwargs...)
     return MesnerShalizi(args...; kwargs...)
 end
 
-function estimate(measure::CMIShannon, est::MesnerShalizi, x, y, z)
-    e = measure.e
-    (; k, w, metric) = est
+function association(est::MesnerShalizi{<:CMIShannon}, x, y, z)
+ 
+    (; definition, k, w, metric) = est
     X = StateSpaceSet(x)
     Y = StateSpaceSet(y)
     Z = StateSpaceSet(z)
@@ -52,5 +79,5 @@ function estimate(measure::CMIShannon, est::MesnerShalizi, x, y, z)
     end
     # The "unit" is nats.
     condmi /= N
-    return _convert_logunit(condmi, ℯ, e.base)
+    return _convert_logunit(condmi, ℯ, definition.base)
 end
diff --git a/src/methods/infomeasures/condmutualinfo/estimators/PoczosSchneiderCMI.jl b/src/methods/information/estimators/conditional_mutual_info_estimators/PoczosSchneiderCMI.jl
similarity index 52%
rename from src/methods/infomeasures/condmutualinfo/estimators/PoczosSchneiderCMI.jl
rename to src/methods/information/estimators/conditional_mutual_info_estimators/PoczosSchneiderCMI.jl
index f97d25ddd..ebf124b55 100644
--- a/src/methods/infomeasures/condmutualinfo/estimators/PoczosSchneiderCMI.jl
+++ b/src/methods/information/estimators/conditional_mutual_info_estimators/PoczosSchneiderCMI.jl
@@ -4,32 +4,56 @@ using SpecialFunctions: gamma
 export PoczosSchneiderCMI
 """
     PoczosSchneiderCMI <: ConditionalMutualInformationEstimator
-    PoczosSchneiderCMI(k = 1, w = 0)
+    PoczosSchneiderCMI(definition = CMIRenyiPoczos(); k = 1, w = 0)
 
-The `PoczosSchneiderCMI` estimator computes various (differential) conditional
-mutual informations, using a `k`-th nearest neighbor approach (Póczos & Schneider,
-2012)[^Póczos2012].
+The `PoczosSchneiderCMI` [`ConditionalMutualInformationEstimator`](@ref) 
+computes conditional mutual informations using a `k`-th nearest neighbor approach
+[Poczos2012](@cite).
 
-[^Póczos2012]:
-    Póczos, B., & Schneider, J. (2012, March). Nonparametric estimation of conditional
-    information and divergences. In Artificial Intelligence and Statistics (pp. 914-923).
-    PMLR.
+`k` is the number of nearest neighbors. `w` is the Theiler window, which controls the
+number of temporal neighbors that are excluded during neighbor searches.
+
+## Usage
+
+- Use with [`association`](@ref) to compute [`CMIRenyiPoczos`](@ref) from input data.
+
+## Example 
+
+```julia
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x = rand(rng, 10000)
+y = rand(rng, 10000) .+ x
+z = rand(rng, 10000) .+ y
+association(PoczosSchneiderCMI(CMIRenyiPoczos(), k = 10), x, z, y) # should be near 0 (and can be negative)
+```
+
+## Compatible definitions
+
+- [`CMIRenyiPoczos`](@ref)
 """
-Base.@kwdef struct PoczosSchneiderCMI{M} <: ConditionalMutualInformationEstimator
-    k::Int = 1
-    w::Int = 0
-    metric::M = Euclidean() # Needs to be euclidean for ball volume formula to be valid.
+struct PoczosSchneiderCMI{M <: ConditionalMutualInformation, ME} <: ConditionalMutualInformationEstimator{M}
+    definition::M
+    k::Int
+    w::Int
+    metric::ME # Needs to be euclidean for ball volume formula to be valid.
 end
 
-function estimate(measure::CMIRenyiPoczos, est::PoczosSchneiderCMI, x, y, z)
-    e = measure.e
-    # The "unit" is nats.
-    c = log(Q3(e, est, x, y, z)) / (e.q-1)
-    return _convert_logunit(c, ℯ, e.base)
+function PoczosSchneiderCMI(definition = CMIRenyiPoczos(); k = 1, w = 0)
+    metric = Euclidean()
+    return PoczosSchneiderCMI(definition, k, w, metric)
 end
 
-function Q3(e::EntropyDefinition, est::PoczosSchneiderCMI, x, y, z)
-    q = e.q
+function association(est::PoczosSchneiderCMI{<:CMIRenyiPoczos}, x, y, z)
+    (; base, q) = est.definition
+    # The "unit" of `c` is nats.
+    c = log(Q3(est, x, y, z)) / (q - 1)
+    return _convert_logunit(c, ℯ, base)
+end
+
+function Q3(est::PoczosSchneiderCMI{<:CMIRenyiPoczos}, x, y, z)
+    (; base, q) = est.definition
+
     (; k, w, metric) = est
     @assert length(x) == length(y) == length(z)
     N = length(x)
@@ -54,8 +78,6 @@ function Q3(e::EntropyDefinition, est::PoczosSchneiderCMI, x, y, z)
     ds_z = last.(dists_Z) .^ (dimension(Z) * (1 - q))
 
     # Not sure about the index sets here.
-    # fyz = (N - 1)^(1 - q)
-    #fxz = (N - 1)^(1 - q)
     bv_yz = ball_volume(dimension(YZ)) ^ (1 - q)
     bv_xz = ball_volume(dimension(XZ)) ^ (1 - q)
     bv_xyz = ball_volume(dimension(XYZ)) ^(1 - q)
@@ -71,9 +93,4 @@ function Q3(e::EntropyDefinition, est::PoczosSchneiderCMI, x, y, z)
     end
     condmi /= N
     return condmi
-end
-
-
-function condmutualinfo(est::PoczosSchneiderCMI, x, y, z)
-    return condmutualinfo(CMIRenyiPoczos(), est, x, y, z)
-end
+end
\ No newline at end of file
diff --git a/src/methods/information/estimators/conditional_mutual_info_estimators/Rahimzamani.jl b/src/methods/information/estimators/conditional_mutual_info_estimators/Rahimzamani.jl
new file mode 100644
index 000000000..2f2833064
--- /dev/null
+++ b/src/methods/information/estimators/conditional_mutual_info_estimators/Rahimzamani.jl
@@ -0,0 +1,86 @@
+export Rahimzamani
+
+"""
+    Rahimzamani <: ConditionalMutualInformationEstimator
+    Rahimzamani(k = 1, w = 0)
+
+The `Rahimzamani` [`ConditionalMutualInformationEstimator`](@ref) is designed
+for data that can be mixtures of discrete and continuous data [Rahimzamani2018](@cite).
+
+## Usage
+
+- Use with [`association`](@ref) to compute a [`CMIShannon`](@ref) from input data.
+
+## Description
+
+This estimator is very similar to the [`GaoKannanOhViswanath`](@ref) mutual information
+estimator, but has been expanded to the conditional mutual information case.
+
+`k` is the number of nearest neighbors. `w` is the Theiler window, which controls the
+number of temporal neighbors that are excluded during neighbor searches.
+
+## Example 
+
+```julia
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x = rand(rng, 10000)
+y = rand(rng, 10000) .+ x
+z = rand(rng, 10000) .+ y
+association(Rahimzamani(; k = 10), x, z, y) # should be near 0 (and can be negative)
+```
+
+## Compatible definitions
+
+- [`CMIShannon`](@ref)
+"""
+struct Rahimzamani{M <: ConditionalMutualInformation, ME} <: ConditionalMutualInformationEstimator{M}
+    definition::M
+    k::Int
+    w::Int
+    metric::ME
+end
+
+function Rahimzamani(definition = CMIShannon(); k = 1, w = 0)
+    # Metric shouldn't be modified by the user.
+    metric = Chebyshev()
+    return Rahimzamani(definition, k, w, metric)
+end
+
+function association(est::Rahimzamani{<:CMIShannon}, x, y, z)
+    (; definition, k, w, metric) = est
+
+    X = StateSpaceSet(x)
+    Y = StateSpaceSet(y)
+    Z = StateSpaceSet(z)
+    joint = StateSpaceSet(X, Y, Z)
+    XZ = StateSpaceSet(x, z)
+    YZ = StateSpaceSet(y, z)
+    Z = StateSpaceSet(z)
+
+    N = length(joint)
+    M = 3
+    tree_joint = KDTree(joint, metric)
+    ds_joint = last.(bulksearch(tree_joint, joint, NeighborNumber(k), Theiler(w))[2])
+    tree_xz = KDTree(XZ, metric)
+    tree_yz = KDTree(YZ, metric)
+    tree_z = KDTree(Z, metric)
+
+    condmi = 0.0
+    for i = 1:N
+        # The notation for ρ_{i, xy} in the paper in unclear. They claim in the paper that
+        # the estimator reduces to the KSG1 estimator when k̂ == k. Therefore,
+        # I assume ρ_{i, xy} is the distance in the *joint* space.
+        # ... but isn't this just the FPVP estimator?
+        dmax = ds_joint[i]
+        k̂ = dmax == 0 ? inrangecount(tree_joint, joint[i], 0.0) - 1  : k
+        condmi += digamma(k̂)
+        condmi -= log(inrangecount(tree_xz, XZ[i], dmax))
+        condmi -= log(inrangecount(tree_yz, YZ[i], dmax))
+        condmi += log(inrangecount(tree_z, Z[i], dmax))
+    end
+    # The "unit" is nats
+    condmi /= N
+
+    return _convert_logunit(condmi, ℯ, definition.base)
+end
diff --git a/src/methods/information/estimators/conditional_mutual_info_estimators/conditional_mutual_info_estimators.jl b/src/methods/information/estimators/conditional_mutual_info_estimators/conditional_mutual_info_estimators.jl
new file mode 100644
index 000000000..dffe09379
--- /dev/null
+++ b/src/methods/information/estimators/conditional_mutual_info_estimators/conditional_mutual_info_estimators.jl
@@ -0,0 +1,5 @@
+include("FPVP.jl")
+include("Rahimzamani.jl")
+include("PoczosSchneiderCMI.jl")
+include("MesnerShalizi.jl")
+include("GaussianCMI.jl")
diff --git a/src/methods/information/estimators/decomposition/CMIDecomposition.jl b/src/methods/information/estimators/decomposition/CMIDecomposition.jl
new file mode 100644
index 000000000..49ea85c44
--- /dev/null
+++ b/src/methods/information/estimators/decomposition/CMIDecomposition.jl
@@ -0,0 +1,46 @@
+export CMIDecomposition
+
+"""
+    CMIDecomposition(definition::MultivariateInformationMeasure, 
+        est::ConditionalMutualInformationEstimator)
+
+Estimate some multivariate information measure specified by `definition`, by decomposing
+it into a combination of conditional mutual information terms. Each of these 
+terms are then estimated using `est`, which can be any 
+[`ConditionalMutualInformationEstimator`](@ref). Finally, these estimates are combined
+according to the relevant decomposition formula.
+
+## Usage
+
+- Use with [`association`](@ref) to compute a [`MultivariateInformationMeasure`](@ref)
+    from input data.
+
+## Description
+
+Similar to [`EntropyDecomposition`](@ref), but `definition` is expressed as 
+conditional mutual information terms instead of entropy terms.
+
+## Usage
+
+- [`information`](@ref)`(est::CMIDecomposition, x...)`.
+
+## Example 
+
+```julia
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x = rand(rng, 100000)
+y = rand(rng, 100000) .+ x
+z = rand(rng, 100000) .+ y
+
+# Estimate transfer entropy by representing it as a CMI and using the `FPVP` estimator.
+est = CMIDecomposition(TEShannon(base = 2), FPVP(k = 3))
+association(est, x, z, y) # should be near 0 (and can be negative)
+```
+
+See also: [`ConditionalMutualInformationEstimator`](@ref), [`MultivariateInformationMeasure`](@ref).
+"""
+struct CMIDecomposition{M <: MultivariateInformationMeasure, E} <: DecompositionEstimator{M}
+    definition::M # extend API from complexity measures: definition must be the first field of the info estimator.
+    est::E # The MI estimator + measure which `definition` is decomposed into.
+end
diff --git a/src/methods/information/estimators/decomposition/EntropyDecomposition.jl b/src/methods/information/estimators/decomposition/EntropyDecomposition.jl
new file mode 100644
index 000000000..bedf04000
--- /dev/null
+++ b/src/methods/information/estimators/decomposition/EntropyDecomposition.jl
@@ -0,0 +1,225 @@
+
+export EntropyDecomposition
+
+"""
+    EntropyDecomposition(definition::MultivariateInformationMeasure, 
+        est::DifferentialInfoEstimator)
+    EntropyDecomposition(definition::MultivariateInformationMeasure,
+        est::DiscreteInfoEstimator,
+        discretization::CodifyVariables{<:OutcomeSpace},
+        pest::ProbabilitiesEstimator = RelativeAmount())
+
+Estimate the multivariate information measure specified by `definition` by rewriting
+its formula into some combination of entropy terms. 
+
+If calling the second method (discrete variant), then discretization is always done 
+per variable/column and each column is encoded into integers using [`codify`](@ref).
+
+## Usage
+
+- Use with [`association`](@ref) to compute a [`MultivariateInformationMeasure`](@ref)
+    from input data.
+- Use with [`independence`](@ref) to test for independence between variables.
+
+## Description
+
+The entropy terms are estimated using `est`, and then combined to form the final 
+estimate of `definition`. No bias correction is applied.
+If `est` is a [`DifferentialInfoEstimator`](@ref), then `discretization` and `pest` 
+are ignored. If `est` is a [`DiscreteInfoEstimator`](@ref), then `discretization` and a
+probabilities estimator `pest` must also be provided (default to `RelativeAmount`, 
+which uses naive plug-in probabilities).
+
+## Compatible differential information estimators
+
+If using the first signature, any compatible [`DifferentialInfoEstimator`](@ref) can be 
+used.
+
+## Compatible outcome spaces for discrete estimation
+
+If using the second signature, the outcome spaces can be used for discretisation. 
+Note that not all outcome spaces will work with all measures.
+
+| Estimator                         | Principle                             | Note                             |
+| :-------------------------------- | :------------------------------------ | :------------------------------- |
+| [`UniqueElements`](@ref)          | Count of unique elements              |                                  |
+| [`ValueBinning`](@ref)            | Binning (histogram)                   |                                  |
+| [`OrdinalPatterns`](@ref)         | Ordinal patterns                      |                                  |
+| [`Dispersion`](@ref)              | Dispersion patterns                   |                                  |
+| [`CosineSimilarityBinning`](@ref) | Cosine similarities histogram         |                                  |
+| [`TransferOperator`](@ref)        | Transfer operator on rectangular bins | `binning.precise` must be `true` |
+
+## Bias 
+
+Estimating the `definition` by decomposition into a combination of entropy terms,
+which are estimated independently, will in general be more biased than when using a
+dedicated estimator. One reason is that this decomposition may miss out on crucial
+information in the joint space. To remedy this, dedicated information measure 
+estimators typically derive the marginal estimates by first considering the joint
+space, and then does some clever trick to eliminate the bias that is introduced
+through a naive decomposition. Unless specified below, no bias correction is 
+applied for `EntropyDecomposition`.
+
+
+## Handling of overlapping parameters
+
+If there are overlapping parameters between the measure to be estimated, and the
+lower-level decomposed measures, then the top-level measure parameter takes precedence.
+For example, if we want to estimate `CMIShannon(base = 2)` through a decomposition 
+of entropies using the `Kraskov(Shannon(base = ℯ))` Shannon entropy estimator, then
+`base = 2` is used.
+
+!!! info 
+    Not all measures have the property that they can be decomposed into more fundamental
+    information theoretic quantities. For example, [`MITsallisMartin`](@ref) *can* be 
+    decomposed into a combination of marginal entropies, while [`MIRenyiSarbu`](@ref)
+    cannot. An error will be thrown if decomposition is not possible.
+
+## Discrete entropy decomposition 
+
+The second signature is for discrete estimation using [`DiscreteInfoEstimator`](@ref)s,
+for example [`PlugIn`](@ref). The given `discretization` scheme (typically an 
+[`OutcomeSpace`](@ref)) controls how the joint/marginals are discretized, and the
+probabilities estimator `pest` controls how probabilities are estimated from counts.
+
+!!! note "Bias"
+    Like for [`DifferentialDecomposition`](@ref), using a dedicated estimator 
+    for the measure in question will be more reliable than using a decomposition
+    estimate. Here's how different `discretization`s are applied:
+
+    - [`ValueBinning`](@ref). Bin visitation frequencies are counted in the joint space
+        `XY`, then marginal visitations are obtained from the joint bin visits.
+        This behaviour is the same for both [`FixedRectangularBinning`](@ref) and
+        [`RectangularBinning`](@ref) (which adapts the grid to the data).
+        When using [`FixedRectangularBinning`](@ref), the range along the first dimension
+        is used as a template for all other dimensions. This is a bit slower than naively 
+        binning each marginal, but lessens bias.
+    - [`OrdinalPatterns`](@ref). Each timeseries is separately [`codify`](@ref)-ed
+        according to its ordinal pattern (no bias correction).
+    - [`Dispersion`](@ref). Each timeseries is separately [`codify`](@ref)-ed according
+        to its dispersion pattern  (no bias correction).
+
+## Examples
+
+Both Shannon-type mutual information and conditional mutual information can be 
+written as a sum of marginal entropy terms. First a discrete example for mutual 
+information:
+
+```julia
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+
+x = StateSpaceSet(rand(rng, 1000000, 2))
+y = StateSpaceSet(rand(rng, 1000000, 2))
+# Compute Shannon mutual information by discretizing each marginal column-wise
+# (per variable) using length-`3` ordinal patterns.
+est = EntropyDecomposition(MIShannon(), PlugIn(Shannon()), OrdinalPatterns(m=3))
+association(est, x, y) # should be close to 0
+```
+
+Here, we estimate Shannon-type conditional mutual information using the `ZhuSingh`
+entropy estimator.
+
+```julia
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x = rand(rng, 100000)
+y = rand(rng, 100000) .+ x
+z = rand(rng, 100000) .+ y
+
+est = EntropyDecomposition(CMIShannon(), ZhuSingh(k = 3))
+association(est, x, z, y) # should be near 0 (and can be negative)
+```
+
+See also: [`MutualInformationEstimator`](@ref), [`MultivariateInformationMeasure`](@ref).
+"""
+struct EntropyDecomposition{
+        M <: MultivariateInformationMeasure, 
+        E <: InformationMeasureEstimator, 
+        D <: Union{Discretization, Nothing}, 
+        P <: Union{ProbabilitiesEstimator, Nothing}
+        } <: DecompositionEstimator{M}
+    definition::M # extend API from complexity measures: definition must be the first field of the info estimator.
+    est::E # The estimator + measure which `definition` is decomposed into.
+    discretization::D # `Nothing` if `est` is a `DifferentialInfoEstimator`.
+    pest::P # `Nothing` if `est` is a `DifferentialInfoEstimator`.
+
+
+    function EntropyDecomposition(
+        definition::MultivariateInformationMeasure, 
+        est::DifferentialInfoEstimator)
+        M = typeof(definition)
+        E = typeof(est)
+        verify_decomposition_entropy_type(definition, est)
+        return new{M, E, Nothing, Nothing}(definition, est, nothing, nothing)
+    end
+
+    function EntropyDecomposition(
+            definition::MultivariateInformationMeasure, 
+            est::DiscreteInfoEstimator, 
+            discretization::D,
+            pest::ProbabilitiesEstimator = RelativeAmount(),
+        ) where {D}
+        M = typeof(definition)
+        E = typeof(est)
+        P = typeof(pest)
+        verify_decomposition_entropy_type(definition, est)
+
+        return new{M, E, D, P}(definition, est, discretization, pest)
+    end
+end
+
+# For internal use.
+"""
+    verify_decomposition_entropy_type(
+        definition::MultivariateInformationMeasure, 
+        est::Union{DiscreteInfoEstimator, DifferentialInfoEstimator}
+    )
+
+Check that we can actually decompose the `definition` into `est.definition`. The 
+default is to do nothing. Certain definitions  may override (e.g. `CMIRenyiJizba` does so).
+"""
+function verify_decomposition_entropy_type(
+        definition::MultivariateInformationMeasure, 
+        est::Union{DiscreteInfoEstimator, DifferentialInfoEstimator})
+end
+
+
+# ----------------------------------------------------------------------------------------
+# Custom pretty printing for discrete entropy estimators, since it has more field.
+# ----------------------------------------------------------------------------------------
+function summary_strings(est::EntropyDecomposition{<:M, <:DiscreteInfoEstimator}) where M
+    return [
+        "Measure to be decomposed",
+        "Estimator for decomposed components",
+        "Discretization",
+        "Probabilities estimator"
+    ]
+end
+
+function summary_types(est::EntropyDecomposition{<:M, <:DiscreteInfoEstimator}) where M
+    return [
+        typeof(est.definition),
+        typeof(est.est),
+        typeof(est.discretization),
+        typeof(est.pest)
+    ]
+end
+
+function measure_colors(est::EntropyDecomposition{<:M, <:DiscreteInfoEstimator}) where M
+    return [
+        :light_red,
+        :light_green,
+        :light_blue,
+        :light_yellow,
+    ]
+end
+
+function info_colors(est::EntropyDecomposition{<:M, <:DiscreteInfoEstimator}) where M
+    return [
+        :red,
+        :green,
+        :blue,
+        :yellow,
+    ]
+end
diff --git a/src/methods/information/estimators/decomposition/MIDecomposition.jl b/src/methods/information/estimators/decomposition/MIDecomposition.jl
new file mode 100644
index 000000000..8f7604741
--- /dev/null
+++ b/src/methods/information/estimators/decomposition/MIDecomposition.jl
@@ -0,0 +1,41 @@
+
+export MIDecomposition
+
+"""
+    MIDecomposition(definition::MultivariateInformationMeasure, 
+        est::MutualInformationEstimator)
+
+Estimate the [`MultivariateInformationMeasure`](@ref) specified by `definition` by
+by decomposing, the measure, if possible, into a combination of mutual information terms.
+These terms are individually estimated using the given
+[`MutualInformationEstimator`](@ref) `est`, and finally combined to form the final 
+value of the measure. 
+
+## Usage
+
+- Use with [`association`](@ref) to compute a [`MultivariateInformationMeasure`](@ref)
+    from input data.
+
+## Examples
+
+One common application is computing Shannon-type conditional mutual information.
+It can be decomposed as a sum of mutual information terms, which we can each 
+estimate with any dedicated [`MutualInformationEstimator`](@ref) estimator.
+
+```julia
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x = rand(rng, 100000)
+y = rand(rng, 100000) .+ x
+z = rand(rng, 100000) .+ y
+
+est = MIDecomposition(CMIShannon(), KSG1(MIShannon(base = 2), k = 3))
+association(est, x, z, y) # should be near 0 (and can be negative)
+```
+
+See also: [`EntropyDecomposition`](@ref).
+"""
+struct MIDecomposition{M <: MultivariateInformationMeasure, E} <: DecompositionEstimator{M}
+    definition::M # extend API from complexity measures: definition must be the first field of the info estimator.
+    est::E # The MI estimator + measure which `definition` is decomposed into.
+end
diff --git a/src/methods/information/estimators/decomposition/decomposition_estimators.jl b/src/methods/information/estimators/decomposition/decomposition_estimators.jl
new file mode 100644
index 000000000..d6b42ddd6
--- /dev/null
+++ b/src/methods/information/estimators/decomposition/decomposition_estimators.jl
@@ -0,0 +1,71 @@
+# Exists just to make dispatch easier for pretty printing.
+abstract type DecompositionEstimator{M} <: MultivariateInformationMeasureEstimator{M} end
+
+include("EntropyDecomposition.jl")
+include("MIDecomposition.jl")
+include("CMIDecomposition.jl")
+
+function decomposition_string(::SOURCE_DEF, ::DecompositionEstimator{<:TARGET_DEF}) where {SOURCE_DEF, TARGET_DEF}
+    #"Decomposition formula for $M not defined for $E"
+    "Not specified. Are you sure $SOURCE_DEF is the measure you want to decompose " * 
+    "into $TARGET_DEF?"
+end
+# ----------------------------------------------------------------------------------------
+# Pretty printing
+# ----------------------------------------------------------------------------------------
+# A common method for displaying. For pretty printing to to, each `DecompositionEstimator`
+# must implement relevant methods for `summary_types`, `summary_strings`,
+# `measure_colors` and `info_colors`.
+# 
+# If custom printing isn't defined for a particular measure-definition combination,
+# then default type printing is used.
+function Base.show(io::IO, est::DecompositionEstimator)
+    types = summary_types(est)
+    strs = summary_strings(est)
+    measurecolors = measure_colors(est)
+    infocolors = info_colors(est)
+    n = maximum(length.(strs))
+
+    spaces_needed = [n - length(s) for s in strs] 
+    spaced_strs = [strs[i] * repeat(" ", spaces_needed[i]) for i in eachindex(strs)]
+    ctx = IOContext(io, :color => true)
+    printstyled(ctx,  "$(typeof(est).name.name) estimator\n\n", color=:bold)
+    d = decomposition_string(est.definition, est)
+        printstyled(ctx,  "  Formula: $(d)\n\n", color=:light_grey)
+    indent = " "
+    for i in eachindex(strs)
+        printstyled(ctx, "$(indent)$(spaced_strs[i]): ", color=infocolors[i])
+        printstyled(ctx, string(types[i]), color=measurecolors[i])
+        if i < length(strs)
+            print(io, "\n")
+        end
+    end
+end
+
+function summary_strings(est::DecompositionEstimator)
+    return [
+        "Measure to be decomposed",
+        "Estimator for components",
+    ]
+end
+
+function summary_types(est::DecompositionEstimator)
+    return [
+        typeof(est.definition),
+        typeof(est.est),
+    ]
+end
+
+function measure_colors(est::DecompositionEstimator)
+    return [
+        :light_red,
+        :light_green,
+    ]
+end
+
+function info_colors(est::DecompositionEstimator)
+    return [
+        :red,
+        :green,
+    ]
+end
diff --git a/src/methods/information/estimators/information_estimators.jl b/src/methods/information/estimators/information_estimators.jl
new file mode 100644
index 000000000..bda17aae1
--- /dev/null
+++ b/src/methods/information/estimators/information_estimators.jl
@@ -0,0 +1,4 @@
+include("codify_marginals.jl")
+
+include("JointProbabilities.jl")
+include("decomposition/decomposition_estimators.jl")
\ No newline at end of file
diff --git a/src/methods/infomeasures/mutualinfo/estimators/GaoKannanOhViswanath.jl b/src/methods/information/estimators/mutual_info_estimators/GaoKannanOhViswanath.jl
similarity index 83%
rename from src/methods/infomeasures/mutualinfo/estimators/GaoKannanOhViswanath.jl
rename to src/methods/information/estimators/mutual_info_estimators/GaoKannanOhViswanath.jl
index 9ec8ea329..7d87923bb 100644
--- a/src/methods/infomeasures/mutualinfo/estimators/GaoKannanOhViswanath.jl
+++ b/src/methods/information/estimators/mutual_info_estimators/GaoKannanOhViswanath.jl
@@ -7,15 +7,16 @@ export GaoKannanOhViswanath
     GaoKannanOhViswanath(; k = 1, w = 0)
 
 The `GaoKannanOhViswanath` (Shannon) estimator is designed for estimating
-mutual information between variables that may be either discrete, continuous or
+Shannon mutual information between variables that may be either discrete, continuous or
 a mixture of both [GaoKannanOhViswanath2017](@cite).
 
-!!! note "Explicitly convert your discrete data to floats"
-    Even though the `GaoKannanOhViswanath` estimator is designed to handle discrete data,
-    our implementation demands that all input data are `StateSpaceSet`s whose data points
-    are floats. If you have discrete data, such as strings or symbols, encode them using
-    integers and convert those integers to floats before passing them to
-    [`mutualinfo`](@ref).
+## Compatible definitions
+
+- [`MIShannon`](@ref)
+
+## Usage
+
+- Use with [`association`](@ref) to compute Shannon mutual information from input data.
 
 ## Description
 
@@ -28,7 +29,6 @@ due to tied points, which may bias other estimators. Citing their paper, the
 estimator *"strongly outperforms natural baselines of discretizing the mixed random
 variables (by quantization) or making it continuous by adding a small Gaussian noise."*
 
-
 !!! warn "Implementation note"
     In [GaoKannanOhViswanath2017](@citet), they claim (roughly speaking) that the estimator
     reduces to the [`KraskovStögbauerGrassberger1`](@ref) estimator for continuous-valued data.
@@ -47,21 +47,38 @@ variables (by quantization) or making it continuous by adding a small Gaussian n
     there will be slight differences between the methods. See the source code for more
     details.
 
+!!! note "Explicitly convert your discrete data to floats"
+    Even though the `GaoKannanOhViswanath` estimator is designed to handle discrete data,
+    our implementation demands that all input data are `StateSpaceSet`s whose data points
+    are floats. If you have discrete data, such as strings or symbols, encode them using
+    integers and convert those integers to floats before passing them to
+    [`mutualinfo`](@ref).
+
+## Examples
 
-See also: [`mutualinfo`](@ref).
+```julia
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x = rand(rng, 10000); y = rand(rng, 10000)
+association(GaoKannanOhViswanath(; k = 10), x, y) # should be near 0 (and can be negative)
+```
 """
-Base.@kwdef struct GaoKannanOhViswanath <: MutualInformationEstimator
-    k::Int = 1
-    w::Int = 0
+struct GaoKannanOhViswanath{M <: MutualInformation} <: MutualInformationEstimator{M}
+    definition::M
+    k::Int
+    w::Int 
+end
+
+function GaoKannanOhViswanath(definition = MIShannon(); k = 1, w = 0)
+    return GaoKannanOhViswanath(definition, k, w)
 end
 # TODO: We here extend the estimator to multiple variables (i.e. the multi-information),
 # which was not treated in Gao et al., (2017).
 
 # Note: input StateSpaceSets must have the same type. Remind the user ot convert in the
 # docstring.
-function estimate(measure::MIShannon, est::GaoKannanOhViswanath, x, y)
-    e = measure.e
-    (; k, w) = est
+function association(est::GaoKannanOhViswanath{<:MIShannon}, x, y)
+    (; definition, k, w) = est
     X = StateSpaceSet(x)
     Y = StateSpaceSet(y)
     joint = StateSpaceSet(X, Y)
@@ -104,5 +121,5 @@ function estimate(measure::MIShannon, est::GaoKannanOhViswanath, x, y)
     end
     # The "unit" is nats.
     mi /= N
-    return _convert_logunit(mi, ℯ, e.base)
+    return _convert_logunit(mi, ℯ, definition.base)
 end
diff --git a/src/methods/infomeasures/mutualinfo/estimators/GaoOhViswanath.jl b/src/methods/information/estimators/mutual_info_estimators/GaoOhViswanath.jl
similarity index 71%
rename from src/methods/infomeasures/mutualinfo/estimators/GaoOhViswanath.jl
rename to src/methods/information/estimators/mutual_info_estimators/GaoOhViswanath.jl
index 6f4a65fa8..3e601f9c6 100644
--- a/src/methods/infomeasures/mutualinfo/estimators/GaoOhViswanath.jl
+++ b/src/methods/information/estimators/mutual_info_estimators/GaoOhViswanath.jl
@@ -4,8 +4,20 @@ export GaoOhViswanath
 """
     GaoOhViswanath <: MutualInformationEstimator
 
-The `GaoOhViswanath` mutual information estimator, also called the bias-improved-KSG
-estimator, or BI-KSG, by [Gao2018](@cite), is given by
+The `GaoOhViswanath` is a mutual information estimator based on nearest neighbors,
+and is also called the bias-improved-KSG estimator, or BI-KSG, by [Gao2018](@cite).
+
+## Compatible definitions
+
+- [`MIShannon`](@ref)
+
+## Usage
+
+- Use with [`association`](@ref) to compute Shannon mutual information from input data.
+
+## Description
+
+The estimator is given by
 
 ```math
 \\begin{align*}
@@ -24,17 +36,34 @@ estimator, or BI-KSG, by [Gao2018](@cite), is given by
 
 where ``c_{d, 2} = \\dfrac{\\pi^{\\frac{d}{2}}}{\\Gamma{(\\dfrac{d}{2} + 1)}}`` is the
 volume of a ``d``-dimensional unit ``\\mathcal{l}_2``-ball.
+
+## Example 
+
+```julia
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x = rand(rng, 10000); y = rand(rng, 10000)
+association(GaoOhViswanath(; k = 10), x, y) # should be near 0 (and can be negative)
+```
 """
-Base.@kwdef struct GaoOhViswanath{MJ, MM} <: MutualInformationEstimator
-    k::Int = 1
-    w::Int = 0
-    metric_joint::MJ = Euclidean()
-    metric_marginals::MM = Euclidean()
+struct GaoOhViswanath{M <: MutualInformation, MJ, MM} <: MutualInformationEstimator{M}
+    definition::M
+    k::Int
+    w::Int
+    metric_joint::MJ
+    metric_marginals::MM
+end
+
+function GaoOhViswanath(definition = MIShannon(); 
+        k = 1, w = 0, 
+        metric_joint = Euclidean(),
+        metric_marginals = Euclidean()
+    )
+    return GaoOhViswanath(definition, k, w, metric_joint, metric_marginals)
 end
 
-function estimate(measure::MIShannon, est::GaoOhViswanath, x::VectorOrStateSpaceSet...)
-    verify_number_of_inputs_vars(measure, length(x))
-    e = measure.e
+function association(est::GaoOhViswanath{<:MIShannon}, x::VectorOrStateSpaceSet...)
+    verify_number_of_inputs_vars(est.definition, length(x))
 
     @assert length(x) >= 2 ||
         error("Need at leats two input StateSpaceSets to compute mutual information between them.")
@@ -64,7 +93,7 @@ function estimate(measure::MIShannon, est::GaoOhViswanath, x::VectorOrStateSpace
         (M - 1) * log(N) +
         log(bvₘs / ball_volume(dimension(joint))) -
         (1 / N) * sum(sum(log.(nₖ)) for nₖ in marginal_nₖs)
-    return _convert_logunit(mi, ℯ, e.base)
+    return _convert_logunit(mi, ℯ, est.definition.base)
 end
 
 function marginal_inrangecount!(est::GaoOhViswanath, ns, xₘ, ds)
diff --git a/src/methods/infomeasures/mutualinfo/estimators/GaussianMI.jl b/src/methods/information/estimators/mutual_info_estimators/GaussianMI.jl
similarity index 74%
rename from src/methods/infomeasures/mutualinfo/estimators/GaussianMI.jl
rename to src/methods/information/estimators/mutual_info_estimators/GaussianMI.jl
index d6383a267..ca531afd7 100644
--- a/src/methods/infomeasures/mutualinfo/estimators/GaussianMI.jl
+++ b/src/methods/information/estimators/mutual_info_estimators/GaussianMI.jl
@@ -1,7 +1,7 @@
 export GaussianMI
 using StateSpaceSets: StateSpaceSet
 using StateSpaceSets: dimension, standardize
-using LinearAlgebra: eigvals
+using LinearAlgebra: eigvals, det
 
 """
     GaussianMI <: MutualInformationEstimator
@@ -9,6 +9,14 @@ using LinearAlgebra: eigvals
 
 `GaussianMI` is a parametric estimator for Shannon mutual information.
 
+## Compatible definitions
+
+- [`MIShannon`](@ref)
+
+## Usage
+
+- Use with [`association`](@ref) to compute Shannon mutual information from input data.
+
 ## Description
 
 Given ``d_x``-dimensional and ``d_y``-dimensional input data `X` and `Y`,
@@ -49,12 +57,26 @@ If `normalize == true`, then the mutual information is estimated as
 ```
 
 where ``\\sigma_i`` are the eigenvalues for ``\\Sigma``.
+
+## Example 
+
+```julia
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x = rand(rng, 10000); y = rand(rng, 10000)
+association(GaussianMI(), x, y) # should be near 0 (and can be negative)
+```
 """
-Base.@kwdef struct GaussianMI <: MutualInformationEstimator
-    normalize::Bool = false
+struct GaussianMI{M <: MutualInformation} <: MutualInformationEstimator{M}
+    definition::M
+    normalize::Bool
+end
+
+function GaussianMI(definition = MIShannon(); normalize = true)
+    return GaussianMI(definition, normalize)
 end
 
-function estimate(measure::MIShannon, est::GaussianMI, x, y)
+function association(est::GaussianMI{<:MIShannon}, x, y)
     X = StateSpaceSet(x)
     Y = StateSpaceSet(y)
     DX = dimension(X)
@@ -62,15 +84,15 @@ function estimate(measure::MIShannon, est::GaussianMI, x, y)
 
     XY = StateSpaceSet(X, Y)
     if est.normalize
-        Σ = fastcor(standardize(XY))
+        Σ = cor(standardize(XY))
         σ = eigvals(Σ)
         mi = -0.5 * sum(log(σᵢ) for σᵢ in σ)
     else
-        Σ = fastcor(XY)
+        Σ = cor(XY)
         Σx = Σ[1:DX, 1:DX]
         Σy = Σ[DX+1:end, DX+1:end]
         mi = 0.5 * log((det(Σx) * det(Σy)) / det(Σ))
     end
 
-    return convert_logunit(mi, ℯ, measure.e.base)
+    return convert_logunit(mi, ℯ, est.definition.base)
 end
diff --git a/src/methods/infomeasures/mutualinfo/estimators/KSG1.jl b/src/methods/information/estimators/mutual_info_estimators/KSG1.jl
similarity index 66%
rename from src/methods/infomeasures/mutualinfo/estimators/KSG1.jl
rename to src/methods/information/estimators/mutual_info_estimators/KSG1.jl
index 8397624c4..b97c2793d 100644
--- a/src/methods/infomeasures/mutualinfo/estimators/KSG1.jl
+++ b/src/methods/information/estimators/mutual_info_estimators/KSG1.jl
@@ -6,6 +6,13 @@ using DelayEmbeddings: dimension
 using Statistics: mean
 export KraskovStögbauerGrassberger1, KSG1
 
+# TODO: finish description
+# ## Description
+
+# Let the joint StateSpaceSet ``X := \\{\\bf{X}_1, \\bf{X_2}, \\ldots, \\bf{X}_m \\}`` be defined by the
+# concatenation of the marginal StateSpaceSets ``\\{ \\bf{X}_k \\}_{k=1}^m``, where each ``\\bf{X}_k``
+# is potentially multivariate. Let ``\\bf{x}_1, \\bf{x}_2, \\ldots, \\bf{x}_N`` be the points
+# in the joint space ``X``.
 """
     KSG1 <: MutualInformationEstimator
     KraskovStögbauerGrassberger1 <: MutualInformationEstimator
@@ -14,6 +21,14 @@ export KraskovStögbauerGrassberger1, KSG1
 The `KraskovStögbauerGrassberger1` mutual information estimator (you can use `KSG1` for
 short) is the ``I^{(1)}`` `k`-th nearest neighbor estimator from [Kraskov2004](@citet).
 
+## Compatible definitions
+
+- [`MIShannon`](@ref)
+
+## Usage
+
+- Use with [`association`](@ref) to compute Shannon mutual information from input data.
+
 ## Keyword arguments
 
 - **`k::Int`**: The number of nearest neighbors to consider. Only information about the
@@ -25,34 +40,35 @@ short) is the ``I^{(1)}`` `k`-th nearest neighbor estimator from [Kraskov2004](@
     during neighbor searches in the joint space. Defaults to `0`, meaning that only the
     point itself is excluded.
 
-## Description
+## Example 
 
-Let the joint StateSpaceSet ``X := \\{\\bf{X}_1, \\bf{X_2}, \\ldots, \\bf{X}_m \\}`` be defined by the
-concatenation of the marginal StateSpaceSets ``\\{ \\bf{X}_k \\}_{k=1}^m``, where each ``\\bf{X}_k``
-is potentially multivariate. Let ``\\bf{x}_1, \\bf{x}_2, \\ldots, \\bf{x}_N`` be the points
-in the joint space ``X``.
+```julia
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x = rand(rng, 10000); y = rand(rng, 10000)
+association(KSG1(; k = 10), x, y) # should be near 0 (and can be negative)
+```
 """
-struct KraskovStögbauerGrassberger1{MJ, MM} <: MutualInformationEstimator
+struct KraskovStögbauerGrassberger1{M <: MutualInformation, MJ, MM} <: MutualInformationEstimator{M}
+    definition::M # the definition of the measure
     k::Int
     w::Int
     metric_joint::MJ # always Chebyshev, otherwise estimator is not valid!
     metric_marginals::MM
-
-    function KraskovStögbauerGrassberger1(;
-            k::Int = 1,
-            w::Int = 0,
-            metric_marginals::MM = Chebyshev()) where MM
-        metric_joint = Chebyshev()
-        new{typeof(metric_joint), MM}(k, w, metric_joint, metric_marginals)
-    end
 end
+function KraskovStögbauerGrassberger1(definition = MIShannon();
+        k::Int = 1,
+        w::Int = 0,
+        metric_marginals = Chebyshev())
+    metric_joint = Chebyshev()
+    KraskovStögbauerGrassberger1(definition, k, w, metric_joint, metric_marginals)
+end
+const KSG1 = KraskovStögbauerGrassberger1
 
-function estimate(measure::MIShannon, est::KraskovStögbauerGrassberger1, x::VectorOrStateSpaceSet...)
-    verify_number_of_inputs_vars(measure, length(x))
-
-    e = measure.e
+function association(est::KSG1{<:MIShannon}, x::VectorOrStateSpaceSet...)
+    verify_number_of_inputs_vars(est.definition, length(x))
 
-    (; k, w, metric_joint, metric_marginals) = est
+    (; definition, k, w, metric_joint, metric_marginals) = est
     joint = StateSpaceSet(x...)
     marginals = map(xᵢ -> StateSpaceSet(xᵢ), x)
     M = length(x)
@@ -74,9 +90,8 @@ function estimate(measure::MIShannon, est::KraskovStögbauerGrassberger1, x::Vec
     mi = digamma(k) +
         (M - 1) * digamma(N) -
         mean(sum(digamma.(nₖ)) for nₖ in marginal_nₖs)
-    return convert_logunit(mi, ℯ, e.base)
+    return convert_logunit(mi, ℯ, definition.base)
 end
-const KSG1 = KraskovStögbauerGrassberger1
 
 function marginal_inrangecount!(est::KraskovStögbauerGrassberger1, ns, xₘ, ds)
     @assert length(ns) == length(xₘ)
diff --git a/src/methods/infomeasures/mutualinfo/estimators/KSG2.jl b/src/methods/information/estimators/mutual_info_estimators/KSG2.jl
similarity index 80%
rename from src/methods/infomeasures/mutualinfo/estimators/KSG2.jl
rename to src/methods/information/estimators/mutual_info_estimators/KSG2.jl
index 8d50a7091..189ae05e7 100644
--- a/src/methods/infomeasures/mutualinfo/estimators/KSG2.jl
+++ b/src/methods/information/estimators/mutual_info_estimators/KSG2.jl
@@ -3,6 +3,7 @@ using Neighborhood: bulksearch
 using SpecialFunctions: digamma
 using DelayEmbeddings: StateSpaceSet, AbstractStateSpaceSet
 using Statistics: mean
+
 export KraskovStögbauerGrassberger2, KSG2
 
 """
@@ -13,6 +14,14 @@ export KraskovStögbauerGrassberger2, KSG2
 The `KraskovStögbauerGrassberger2` mutual information estimator (you can use `KSG2` for
 short) is the ``I^{(2)}`` `k`-th nearest neighbor estimator from [Kraskov2004](@cite).
 
+## Compatible definitions
+
+- [`MIShannon`](@ref)
+
+## Usage
+
+- Use with [`association`](@ref) to compute Shannon mutual information from input data.
+
 ## Keyword arguments
 
 - **`k::Int`**: The number of nearest neighbors to consider. Only information about the
@@ -42,7 +51,7 @@ are a distance less than ``\\epsilon_i`` away from ``\\bf{x}_i^m``. That is, we
 distance from a query point ``\\bf{x}_i \\in X`` (in the *joint* space) to count neighbors of
 ``x_i^m \\in \\bf{X}_m`` (in the marginal space).
 
-Mutual information between the variables ``\\bf{X}_1, \\bf{X_2}, \\ldots, \\bf{X}_m`` is
+Shannon mutual information between the variables ``\\bf{X}_1, \\bf{X_2}, \\ldots, \\bf{X}_m`` is
 then estimated as
 
 ```math
@@ -52,32 +61,41 @@ then estimated as
     (m - 1)\\psi{(N)} -
     \\dfrac{1}{N} \\sum_{i = 1}^N \\sum_{j = 1}^m \\psi{(\\theta_i^j + 1)}
 ```
+
+## Example 
+
+```julia
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x = rand(rng, 10000); y = rand(rng, 10000)
+association(KSG2(; k = 10), x, y) # should be near 0 (and can be negative)
+```
 """
-struct KraskovStögbauerGrassberger2{MJ, MM} <: MutualInformationEstimator
+struct KraskovStögbauerGrassberger2{M <: MutualInformation, MJ, MM} <: MutualInformationEstimator{M}
+    definition::M # the definition of the measure
     k::Int
     w::Int
     metric_joint::MJ # always Chebyshev, otherwise estimator is not valid!
     metric_marginals::MM # can be any metric
-
-    function KraskovStögbauerGrassberger2(;
-            k::Int = 1,
-            w::Int = 0,
-            metric_marginals::MM = Chebyshev()
-        ) where MM
-        metric_joint = Chebyshev()
-        new{typeof(metric_joint), MM}(k, w, metric_joint, metric_marginals)
-    end
 end
 const KSG2 = KraskovStögbauerGrassberger2
 
-function estimate(measure::MIShannon, est::KraskovStögbauerGrassberger2, x::VectorOrStateSpaceSet...)
-    verify_number_of_inputs_vars(measure, length(x))
+function KraskovStögbauerGrassberger2(definition = MIShannon();
+        k::Int = 1,
+        w::Int = 0,
+        metric_marginals = Chebyshev()
+    )
+    metric_joint = Chebyshev()
+    KraskovStögbauerGrassberger2(definition, k, w, metric_joint, metric_marginals)
+end
+
+function association(est::KSG2{<:MIShannon}, x::VectorOrStateSpaceSet...)
+    verify_number_of_inputs_vars(est.definition, length(x))
 
-    e = measure.e
     @assert length(x) >= 2 ||
         error("Need at leats two input StateSpaceSets to compute mutual information between them.")
 
-    (; k, w, metric_joint, metric_marginals) = est
+    (; definition, k, w, metric_joint, metric_marginals) = est
     joint = StateSpaceSet(x...)
     marginals = map(xᵢ -> StateSpaceSet(xᵢ), x)
     M = length(x)
@@ -96,7 +114,6 @@ function estimate(measure::MIShannon, est::KraskovStögbauerGrassberger2, x::Vec
         marginal_inrangecount!(est, ns[m], xₘ, idxs, ds, m)
     end
     ϵ_maxes = [maximum(x) for x in StateSpaceSet(ϵs...)]
-    #@show all(ϵ_maxes .== ds)
     marginal_nₖs = StateSpaceSet(ns...)
 
     mi = digamma(k) -
@@ -109,7 +126,7 @@ function estimate(measure::MIShannon, est::KraskovStögbauerGrassberger2, x::Vec
         (M-1) - #(M - 1) / k  -
         mean(sum(digamma.(nₖ)) for nₖ in marginal_nₖs) +
         (M - 1) * digamma(N)
-    return convert_logunit(mi, ℯ, e.base)
+    return convert_logunit(mi, ℯ, definition.base)
 end
 
 function marginal_inrangecount!(est::KraskovStögbauerGrassberger2, ns::Vector{Int},
@@ -121,7 +138,6 @@ function marginal_inrangecount!(est::KraskovStögbauerGrassberger2, ns::Vector{I
         xᵢᵐ = xₘ[i]
         # Add small noise to facilitate ≤ while still using inrangecount
         ϵᵢᵐ = evaluate(est.metric_marginals, xᵢᵐ, xₘ[knn_idxs[i]]) + 1e1*eps()
-        #@show m, i, ϵᵢᵐ, ds[i]
         # Subtract 1 because `inrangecount` includes the point itself.
         ns[i] = inrangecount(tree, xᵢᵐ, ϵᵢᵐ) - 1
     end
diff --git a/src/methods/infomeasures/mutualinfo/estimators/estimators.jl b/src/methods/information/estimators/mutual_info_estimators/mutual_info_estimators.jl
similarity index 62%
rename from src/methods/infomeasures/mutualinfo/estimators/estimators.jl
rename to src/methods/information/estimators/mutual_info_estimators/mutual_info_estimators.jl
index 03b23fc91..7eabab496 100644
--- a/src/methods/infomeasures/mutualinfo/estimators/estimators.jl
+++ b/src/methods/information/estimators/mutual_info_estimators/mutual_info_estimators.jl
@@ -1,9 +1,6 @@
 include("utils.jl")
-
 include("KSG1.jl")
 include("KSG2.jl")
+include("GaussianMI.jl")
 include("GaoOhViswanath.jl")
 include("GaoKannanOhViswanath.jl")
-include("GaussianMI.jl")
-include("wip/copula/copula_nonparametric.jl")
-include("wip/copula/copula_parametric.jl")
diff --git a/src/methods/information/estimators/mutual_info_estimators/utils.jl b/src/methods/information/estimators/mutual_info_estimators/utils.jl
new file mode 100644
index 000000000..9339b5c82
--- /dev/null
+++ b/src/methods/information/estimators/mutual_info_estimators/utils.jl
@@ -0,0 +1,22 @@
+using Distances: evaluate
+
+# TODO: not used? delete?
+# # In the Kraskov1 estimator, ϵs are the distances in the Z = (X, Y) joint space
+# # In the Kraskov2 estimator, ϵs are the distances in the X and Y marginal spaces
+# function count_within_radius!(p, x, metric, ϵs, N)
+#     @inbounds for i in 1:N
+#         ϵ = ϵs[i] / 2
+#         xᵢ = x[i]
+#         p[i] = count(evaluate(metric, xᵢ, x[j]) < ϵ for j in 1:N)
+#     end
+
+#     return p
+# end
+
+function eval_dists_to_knns!(ds, pts, knn_idxs, metric)
+    @inbounds for i in eachindex(pts)
+        ds[i] = evaluate(metric, pts[i], pts[knn_idxs[i]])
+    end
+
+    return ds
+end
diff --git a/src/methods/infomeasures/transferentropy/convenience/Hilbert.jl b/src/methods/information/estimators/transfer_entropy_estimators/Hilbert.jl
similarity index 83%
rename from src/methods/infomeasures/transferentropy/convenience/Hilbert.jl
rename to src/methods/information/estimators/transfer_entropy_estimators/Hilbert.jl
index 07775fbe9..c8863597a 100644
--- a/src/methods/infomeasures/transferentropy/convenience/Hilbert.jl
+++ b/src/methods/information/estimators/transfer_entropy_estimators/Hilbert.jl
@@ -17,6 +17,7 @@ struct Amplitude <: InstantaneousSignalProperty end
 Indicates that the instantaneous phases of a signal should be used. """
 struct Phase <: InstantaneousSignalProperty end
 
+# TODO: update to new syntax
 """
     Hilbert(est;
         source::InstantaneousSignalProperty = Phase(),
@@ -29,7 +30,7 @@ obtained by first applying the Hilbert transform to each signal, then extracting
 phases/amplitudes of the resulting complex numbers [Palus2014](@cite). Original time series are
 thus transformed to instantaneous phase/amplitude time series. Transfer
 entropy is then estimated using the provided `est` on those phases/amplitudes (use e.g.
-[`VisitationFrequency`](@ref), or [`SymbolicPermutation`](@ref)).
+[`VisitationFrequency`](@ref), or [`OrdinalPatterns`](@ref)).
 
 !!! info
     Details on estimation of the transfer entropy (conditional mutual information)
@@ -39,21 +40,21 @@ entropy is then estimated using the provided `est` on those phases/amplitudes (u
 
 See also: [`Phase`](@ref), [`Amplitude`](@ref).
 """
-struct Hilbert{E} <: TransferEntropyEstimator
+struct Hilbert{M} <: TransferEntropyEstimator{M}
+    est::M # the estimator of the transfer entropy to be applied *after* transformation of the time series.
     source::InstantaneousSignalProperty
     target::InstantaneousSignalProperty
     cond::InstantaneousSignalProperty
-    est::E
 
-    function Hilbert(est::E;
+    function Hilbert(est::M;
             source::InstantaneousSignalProperty = Phase(),
             target::InstantaneousSignalProperty = Phase(),
-            cond::InstantaneousSignalProperty = Phase()) where E
-        new{E}(source, target, cond, est)
+            cond::InstantaneousSignalProperty = Phase()) where M
+        new{M}(est, source, target, cond)
     end
 end
 
-function estimate(measure::TransferEntropy, est::Hilbert, source, target)
+function association(est::Hilbert, source, target)
     hil_s = DSP.hilbert(source)
     hil_t = DSP.hilbert(target)
 
@@ -73,11 +74,10 @@ function estimate(measure::TransferEntropy, est::Hilbert, source, target)
         error("est.target must be either Phase or Amplitude instance")
     end
 
-    # Now, estimate transfer entropy on the phases/amplitudes with the given estimator.
-    transferentropy(measure, est.est, s, t)
+    association(est.est, s, t)
 end
 
-function estimate(measure::TransferEntropy, est::Hilbert, source, target, cond)
+function association(est::Hilbert, source, target, cond)
     hil_s = DSP.hilbert(source)
     hil_t = DSP.hilbert(target)
     hil_c = DSP.hilbert(cond)
@@ -105,6 +105,6 @@ function estimate(measure::TransferEntropy, est::Hilbert, source, target, cond)
     else
         error("est.cond must be either Phase or Amplitude instance")
     end
-
-    transferentropy(measure, est.est, s, t, c)
+    
+    association(est.est, s, t, c)
 end
diff --git a/src/methods/infomeasures/transferentropy/estimators/Lindner.jl b/src/methods/information/estimators/transfer_entropy_estimators/Lindner.jl
similarity index 71%
rename from src/methods/infomeasures/transferentropy/estimators/Lindner.jl
rename to src/methods/information/estimators/transfer_entropy_estimators/Lindner.jl
index e37522869..ad5d9259f 100644
--- a/src/methods/infomeasures/transferentropy/estimators/Lindner.jl
+++ b/src/methods/information/estimators/transfer_entropy_estimators/Lindner.jl
@@ -6,11 +6,17 @@ export Lindner
 
 """
     Lindner <: TransferEntropyEstimator
-    Lindner(k = 1, w = 0, base = 2)
+    Lindner(definition = Shannon(); k = 1, w = 0, base = 2)
 
 The `Lindner` transfer entropy estimator [Lindner2011](@cite), which is
 also used in the Trentool MATLAB toolbox, and is based on nearest neighbor searches.
 
+## Usage
+
+- Use with [`association`](@ref) to compute [`TEShannon`](@ref) from input data.
+
+## Keyword parameters
+
 `w` is the Theiler window, which determines if temporal neighbors are excluded
 during neighbor searches (defaults to `0`, meaning that only the point itself is excluded
 when searching for neighbours).
@@ -23,10 +29,10 @@ For a given points in the joint embedding space `jᵢ`, this estimator first com
 distance `dᵢ` from `jᵢ` to its `k`-th nearest neighbor. Then, for each point `mₖ[i]` in
 the `k`-th marginal space, it counts the number of points within radius `dᵢ`.
 
-The transfer entropy is then computed as
+The Shannon transfer entropy is then computed as
 
 ```math
-TE(X \\to Y) =
+TE_S(X \\to Y) =
 \\psi(k) + \\dfrac{1}{N} \\sum_{i}^n
 \\left[
     \\sum_{k=1}^3 \\left( \\psi(m_k[i] + 1) \\right)
@@ -36,47 +42,62 @@ TE(X \\to Y) =
 where the index `k` references the three marginal subspaces `T`, `TTf` and `ST` for which
 neighbor searches are performed. Here this estimator has been modified to allow for 
 conditioning too (a simple modification to [Lindner2011](@citet)'s equation 5 and 6). 
+
+## Example 
+
+```julia
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x = rand(rng, 10000)
+y = rand(rng, 10000) .+ x
+z = rand(rng, 10000) .+ y
+est = Lindner(TEShannon(), k = 10)
+association(est, x, z, y) # should be near 0 (and can be negative)
+```
+
+## Compatible definitions
+
+- [`TEShannon`](@ref)
 """
-Base.@kwdef struct Lindner{B} <: TransferEntropyEstimator
-    k::Int = 2 # number of neighbors in joint space.
-    w::Int = 0
-    base::B = 2
+struct Lindner{E} <: TransferEntropyEstimator{E}
+    definition::E
+    k::Int # number of neighbors in joint space.
+    w::Int
 
-    function Lindner(k::Int, w::Int, base::B) where B
+    function Lindner(definition::E = TEShannon(); k::Int = 2, w::Int = 0) where {E}
         k >= 2 || throw(DomainError("The number of neighbors k must be >= 2."))
-        new{B}(k, w, base)
+        new{E}(definition, k, w)
     end
 end
 
-function estimate(measure::TEShannon, est::Lindner, x::AbstractVector...)
-    verify_number_of_inputs_vars(measure, length(x))
-    S, T, T⁺, C = individual_marginals_te(measure.embedding, x...)
-    return estimate(measure, est, S, T, T⁺, C)
+function association(est::Lindner{<:TEShannon}, x::VectorOr1DDataset...)
+    verify_number_of_inputs_vars(est.definition, length(x))
+    S, T, T⁺, C = individual_marginals_te(est.definition.embedding, x...)
+    return estimate_from_marginals(est, S, T, T⁺, C)
 end
 
-# This method is separate from the one above because when using `SurrogateTest`,
+# This method is separate from the one above because when using `SurrogateAssociationTest`,
 # `S` is repeatedly shuffled, while the other marginals are not, so we avoid
 # allocating a bunch of new StateSpaceSets for every shuffle.
-function estimate(measure::TEShannon, est::Lindner,
+function estimate_from_marginals(est::Lindner{<:TEShannon},
         S::AbstractStateSpaceSet,
         T::AbstractStateSpaceSet,
         T⁺::AbstractStateSpaceSet,
         C::AbstractStateSpaceSet)
-    (; k, w, base) = est
 
     # This layer ensures that the number of `StateSpaceSet`s that must be 
     # constructed is minimal when doing e.g. surrogate testing (then,
     # `S` is the only marginal changing).
     TT⁺C = StateSpaceSet(T, T⁺, C)
     TC = StateSpaceSet(T, C)
-    return estimate_with_premade_embeddings(measure, est, S, TT⁺C, TC)
+    return estimate_with_premade_embeddings(est, S, TT⁺C, TC)
 end
 
-function estimate_with_premade_embeddings(measure::TEShannon, est::Lindner,
+function estimate_with_premade_embeddings(est::Lindner{<:TEShannon},
         S::AbstractStateSpaceSet,
         TT⁺C::AbstractStateSpaceSet,
         TC::AbstractStateSpaceSet)
-    (; k, w, base) = est
+    (; definition, k, w) = est
 
     joint = StateSpaceSet(S, TT⁺C)
     STC = StateSpaceSet(S, TC)
@@ -109,5 +130,5 @@ function estimate_with_premade_embeddings(measure::TEShannon, est::Lindner,
 
     # Convert to target base *after* digamma computations, because the digamma function
     # is a function of the natural log.
-    return _convert_logunit(te, ℯ, measure.e.base)
+    return _convert_logunit(te, ℯ, definition.base)
 end
diff --git a/src/methods/information/estimators/transfer_entropy_estimators/SymbolicTransferEntropy.jl b/src/methods/information/estimators/transfer_entropy_estimators/SymbolicTransferEntropy.jl
new file mode 100644
index 000000000..a0405b09a
--- /dev/null
+++ b/src/methods/information/estimators/transfer_entropy_estimators/SymbolicTransferEntropy.jl
@@ -0,0 +1,63 @@
+export SymbolicTransferEntropy
+
+# TODO: update to new syntax
+"""
+    SymbolicTransferEntropy <: TransferEntropyEstimator
+    SymbolicTransferEntropy(definition = TEShannon(); m = 3, τ = 1, 
+        lt = ComplexityMeasures.isless_rand
+
+A convenience estimator for symbolic transfer entropy [Staniek2008](@cite).
+
+## Compatible measures
+
+- [`TEShannon`](@ref)
+
+## Description
+
+[Symbolic transfer entropy](https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.100.158101)
+consists of two simple steps. First, the input time series are encoded using [`codify`](@ref) with
+the [`CodifyVariables`](@ref) discretization and the [`OrdinalPatterns`](@ref) outcome space. This 
+transforms the input time series into integer time series. Transfer entropy entropy is then 
+estimated from the encoded time series by applying  
+
+Transfer entropy is then estimated as usual on the encoded timeseries with the embedding
+dictated by `definition` and the [`JointProbababilities`](@ref) estimator.
+
+## Examples
+
+- [Example 1](@ref example_TEShannon_SymbolicTransferEntropy)
+"""
+struct SymbolicTransferEntropy{M} <: TransferEntropyEstimator{M}
+    definition::M
+    m::Int
+    τ::Int
+    lt::Function
+end
+
+function SymbolicTransferEntropy(definition::M = TEShannon(); 
+        m = 3, τ = 1, lt = ComplexityMeasures.isless_rand) where M
+    return SymbolicTransferEntropy{M}(definition, m, τ, lt)
+end
+
+function association(est::SymbolicTransferEntropy{<:TEShannon}, x::AbstractVector...)
+    (; m, τ, lt) = est
+    discretization = CodifyVariables(OrdinalPatterns(; m, τ, lt))
+
+    x̂ = (codify(discretization, xᵢ) for xᵢ in x) 
+
+    te_definition = est.definition
+    embedding = te_definition.embedding
+    # If a conditional input (x[3]) is not provided, then C is just a 0-dimensional
+    # StateSpaceSet. The horizontal concatenation of C with T then just returns T.
+    # We therefore don't need separate methods for the conditional and non-conditional
+    # cases.
+    S, T, T⁺, C = individual_marginals_te(embedding, x̂...)
+
+    # We have already encoded the marginals, so when computing CMI, we can 
+    # simply use `UniqueElements`.
+    cmi_def = CMIShannon(; base = est.definition.base)
+    disc = CodifyVariables(UniqueElements())
+    
+    est_unique = JointProbabilities(cmi_def, disc)
+    return association(est_unique, T⁺, S,  StateSpaceSet(T, C))
+end
diff --git a/src/methods/infomeasures/transferentropy/estimators/Zhu1.jl b/src/methods/information/estimators/transfer_entropy_estimators/Zhu1.jl
similarity index 66%
rename from src/methods/infomeasures/transferentropy/estimators/Zhu1.jl
rename to src/methods/information/estimators/transfer_entropy_estimators/Zhu1.jl
index a6b87cffa..3304074c8 100644
--- a/src/methods/infomeasures/transferentropy/estimators/Zhu1.jl
+++ b/src/methods/information/estimators/transfer_entropy_estimators/Zhu1.jl
@@ -11,10 +11,12 @@ export Zhu1
     Zhu1 <: TransferEntropyEstimator
     Zhu1(k = 1, w = 0, base = MathConstants.e)
 
-The `Zhu1` transfer entropy estimator [Zhu2015](@cite).
+The `Zhu1` transfer entropy estimator [Zhu2015](@cite) for normalized input data 
+(as described in [Zhu2015](@citet)) for both for pairwise and conditional transfer entropy.
 
-Assumes that the input data have been normalized as described in [Zhu2015](@citet).
-The estimator can be used both for pairwise and conditional transfer entropy.
+## Usage
+
+- Use with [`association`](@ref) to compute [`TEShannon`](@ref) from input data.
 
 ## Description
 
@@ -26,27 +28,75 @@ This estimator is an extension to the entropy estimator in [Singh2003](@citet).
 `w` is the Theiler window, which determines if temporal neighbors are excluded
 during neighbor searches (defaults to `0`, meaning that only the point itself is excluded
 when searching for neighbours).
+
+## Description
+
+For a given points in the joint embedding space `jᵢ`, this estimator first computes the
+distance `dᵢ` from `jᵢ` to its `k`-th nearest neighbor. Then, for each point `mₖ[i]` in
+the `k`-th marginal space, it counts the number of points within radius `dᵢ`.
+
+The Shannon transfer entropy is then computed as
+
+```math
+TE_S(X \\to Y) =
+\\psi(k) + \\dfrac{1}{N} \\sum_{i}^n
+\\left[
+    \\sum_{k=1}^3 \\left( \\psi(m_k[i] + 1) \\right)
+\\right],
+```
+
+where the index `k` references the three marginal subspaces `T`, `TTf` and `ST` for which
+neighbor searches are performed. Here this estimator has been modified to allow for 
+conditioning too (a simple modification to [Lindner2011](@citet)'s equation 5 and 6). 
+
+
+## Usage
+
+- [`information`](@ref)`(est::Zhu1, x, y, z)`.
+
+## Example 
+
+```julia
+using CausalityTools
+using Random; rng = MersenneTwister(1234)
+x = rand(rng, 10000)
+y = rand(rng, 10000) .+ x
+z = rand(rng, 10000) .+ y
+est = Zhu1(TEShannon(), k = 10)
+association(est, x, z, y) # should be near 0 (and can be negative)
+```
+
+## Compatible definitions
+
+- [`TEShannon`](@ref)
 """
-Base.@kwdef struct Zhu1 <: TransferEntropyEstimator
-    k::Int = 2
-    w::Int = 0
+struct Zhu1{M} <: TransferEntropyEstimator{M}
+    definition::M
+    k::Int
+    w::Int
+end
 
-    function Zhu1(k::Int, w::Int)
-        k >= 2 || throw(DomainError("The number of neighbors k must be >= 2."))
-        new(k, w)
-    end
+function Zhu1(definition::M = TEShannon(); k::Int = 2, w::Int = 0) where M
+    k >= 2 || throw(DomainError("The number of neighbors k must be >= 2."))
+    return Zhu1(definition, k, w)
 end
 
-function estimate(measure::TEShannon, est::Zhu1, x::AbstractVector...)
+function association(est::Zhu1{<:TEShannon}, x::VectorOr1DDataset...)
     # The Zhu1 estimator needs to keep track of the dimension of the individual
     # terms that goes into the implicit CMI computation. We could have just used
     # `h4_marginals` here, but then we wouldn't get the dimensions out of the box.
-    S, T, T⁺, C = individual_marginals_te(measure.embedding, x...)
-    return estimate(measure, est, S, T, T⁺, C)
+    embedding = est.definition.embedding
+    S, T, T⁺, C = individual_marginals_te(embedding, x...)
+    return estimate_from_marginals(est, S, T, T⁺, C)
 end
 
-function estimate(measure::TEShannon, est::Zhu1, S::AbstractStateSpaceSet, T::AbstractStateSpaceSet, T⁺::AbstractStateSpaceSet, C::AbstractStateSpaceSet)
-    (; k, w) = est
+function estimate_from_marginals(est::Zhu1, 
+        S::AbstractStateSpaceSet, 
+        T::AbstractStateSpaceSet, 
+        T⁺::AbstractStateSpaceSet, 
+        C::AbstractStateSpaceSet
+    )
+    (; definition, k, w) = est
 
     joint = StateSpaceSet(S, T, T⁺, C)
     ST = StateSpaceSet(S, T, C)
@@ -89,7 +139,7 @@ function estimate(measure::TEShannon, est::Zhu1, S::AbstractStateSpaceSet, T::Ab
     te = mean_volumes(vJ, vST, vTT⁺, vT, N) +
         mean_digamma(kST, kTT⁺, kT, k, N, DS, DT, DT⁺)
     # Convert to target unit *after* computations, which all use natural logs.
-    return _convert_logunit(te, ℯ, measure.e.base)
+    return _convert_logunit(te, ℯ, definition.base)
 end
 
 function volumes(x::AbstractStateSpaceSet, nn_idxs, N::Int)
diff --git a/src/methods/information/estimators/transfer_entropy_estimators/transfer_entropy_estimators.jl b/src/methods/information/estimators/transfer_entropy_estimators/transfer_entropy_estimators.jl
new file mode 100644
index 000000000..1304d94ff
--- /dev/null
+++ b/src/methods/information/estimators/transfer_entropy_estimators/transfer_entropy_estimators.jl
@@ -0,0 +1,14 @@
+export TransferEntropyEstimator
+
+"""
+The supertype of all dedicated transfer entropy estimators.
+"""
+abstract type TransferEntropyEstimator{M} <: MultivariateInformationMeasureEstimator{M} end
+
+# Concrete implementations
+include("Zhu1.jl")
+include("Lindner.jl")
+
+# convenience
+include("Hilbert.jl")
+include("SymbolicTransferEntropy.jl")
\ No newline at end of file
diff --git a/src/methods/information/information.jl b/src/methods/information/information.jl
new file mode 100644
index 000000000..12624c201
--- /dev/null
+++ b/src/methods/information/information.jl
@@ -0,0 +1,11 @@
+include("counts_and_probs/counts_and_probs.jl")
+include("core.jl")
+
+# These files extend the single-variable information API in ComplexityMeasures.jl.
+include("estimators/information_estimators.jl")
+include("definitions/information_definitions.jl")
+
+# Specific estimators must be included after definitions.
+include("estimators/mutual_info_estimators/mutual_info_estimators.jl")
+include("estimators/conditional_mutual_info_estimators/conditional_mutual_info_estimators.jl")
+include("estimators/transfer_entropy_estimators/transfer_entropy_estimators.jl")
diff --git a/src/methods/recurrence/MCR.jl b/src/methods/recurrence/MCR.jl
index d423aec17..c421b3ac4 100644
--- a/src/methods/recurrence/MCR.jl
+++ b/src/methods/recurrence/MCR.jl
@@ -11,6 +11,14 @@ export mcr
 An association measure based on mean conditional probabilities of recurrence
 (MCR) introduced by [Romano2007](@citet).
 
+## Usage
+
+- Use with [`association`](@ref) to compute the raw MCR for pairwise or conditional association.
+- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise or 
+    conditional association.
+
+## Description
+
 `r` is  mandatory keyword which specifies the recurrence threshold when constructing
 recurrence matrices. It can be instance of
 any subtype of `AbstractRecurrenceType` from
@@ -19,14 +27,6 @@ To use any `r` that is not a real number, you have to do `using RecurrenceAnalys
 The `metric` is any valid metric
 from [Distances.jl](https://github.com/JuliaStats/Distances.jl).
 
-## Usage
-
-- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise
-    association.
-- Use with [`mcr`](@ref) to compute the raw MCR for pairwise association.
-
-## Description
-
 For input variables `X` and `Y`, the conditional probability of recurrence
 is defined as
 
@@ -47,6 +47,11 @@ defined analogously.
 
 `X` and `Y` can be either both univariate timeseries, or both multivariate
 [`StateSpaceSet`](@ref)s.
+
+
+## Estimation
+
+- [Example 1](@ref example_MCR). Pairwise versus conditional MCR.
 """
 Base.@kwdef struct MCR{R, M} <: AssociationMeasure
     r::R
@@ -55,7 +60,7 @@ end
 
 max_inputs_vars(::MCR) = 3
 
-function estimate(measure::MCR, x, y)
+function association(measure::MCR, x, y)
     (; r, metric) = measure
     N = length(x)
     @assert length(x) == length(y)
@@ -69,8 +74,7 @@ function estimate(measure::MCR, x, y)
     return rp / N
 end
 
-# The
-function estimate(measure::MCR, x, y, z)
+function association(measure::MCR, x, y, z)
     (; r, metric) = measure
     N = length(x)
     @assert length(x) == length(y)
@@ -92,15 +96,3 @@ function estimate(measure::MCR, x, y, z)
     ΔMCR = -(rp_x_y - rp_x_yz)
     return ΔMCR
 end
-
-# For compatibility with causal graph and independence testing API
-estimate(r::MCR, est::Nothing, x, y) = estimate(r, x, y)
-
-"""
-    mcr(m::MCR, x, y)
-
-Compute the association between `x` and `y` based on conditional probabilities of
-recurrence using the given [`MCR`](@ref) `measure`, where `x` and `y` can be either
-univariate timeseries or multivariate [`StateSpaceSet`](@ref)s.
-"""
-mcr(args...) = estimate(args...)
diff --git a/src/methods/recurrence/RMCD.jl b/src/methods/recurrence/RMCD.jl
index 8942fb0e9..043fec31c 100644
--- a/src/methods/recurrence/RMCD.jl
+++ b/src/methods/recurrence/RMCD.jl
@@ -10,6 +10,15 @@ The recurrence measure of conditional dependence, or RMCD [Ramos2017](@cite),
 is a recurrence-based measure that mimics the conditional mutual
 information, but uses recurrence probabilities.
 
+## Usage
+
+- Use with [`association`](@ref)/[`rmcd`](@ref) to compute the raw RMCD for pairwise 
+    or conditional association.
+- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise
+    or conditional association.
+
+## Description
+
 `r` is a mandatory keyword which specifies the recurrence threshold when constructing
 recurrence matrices. It can be instance of
 any subtype of `AbstractRecurrenceType` from
@@ -21,12 +30,6 @@ from [Distances.jl](https://github.com/JuliaStats/Distances.jl).
 Both the pairwise and conditional RMCD is non-negative, but due to round-off error,
 negative values may occur. If that happens, an RMCD value of `0.0` is returned.
 
-## Usage
-
-- Use with [`independence`](@ref) to perform a formal hypothesis test for pairwise
-    or conditional association.
-- Use with [`rmcd`](@ref) to compute the raw RMCD for pairwise or conditional association.
-
 ## Description
 
 The RMCD measure is defined by
@@ -50,7 +53,6 @@ case the following mutual information-like quantitity is computed (not
 discussed in [Ramos2017](@citet).
 
 ```math
-
 I_{RMCD}(X; Y) = \\dfrac{1}{N}
 \\sum_{i} \\left[
 \\dfrac{1}{N} \\sum_{j} R_{ij}^{X, Y}
@@ -59,6 +61,10 @@ I_{RMCD}(X; Y) = \\dfrac{1}{N}
     \\right)
 \\right]
 ```
+
+## Estimation
+
+- [Example 1](@ref example_RMCD). Pairwise versus conditional RMCD.
 """
 Base.@kwdef struct RMCD{R, M, B} <: AssociationMeasure
     r::R
@@ -68,28 +74,7 @@ end
 
 max_inputs_vars(::RMCD{R, M, D}) where {R, M, D} = 3
 
-"""
-    rmcd(measure::RMCD, x, y)
-    rmcd(measure::RMCD, x, y, [z, ...])
-
-Estimate the recurrence-based `measure` of dependence between
-`x` and `y`, conditional on `z` if given.
-
-Parameters for recurrence matrix estimation are given as a [`RMCD`](@ref) instance.
-Inputs `x`, `y`, `z` can be either univariate timeseries or multivariate
-[`StateSpaceSet`](@ref)s.
-"""
-rmcd(measure::RMCD, args...) = estimate(measure, args...)
-
-# For compatibility with independence testing framework.
-function estimate(measure::RMCD, est::Nothing, x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet, z::VectorOrStateSpaceSet)
-    return estimate(measure, x, y, z)
-end
-function estimate(measure::RMCD, est::Nothing, x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet)
-    return estimate(measure, x, y)
-end
-
-function estimate(measure::RMCD, x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet, z::VectorOrStateSpaceSet)
+function association(measure::RMCD, x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet, z::VectorOrStateSpaceSet)
     (; r, metric, base) = measure
     @assert length(x) == length(y) == length(z)
     N = length(x)
@@ -125,7 +110,7 @@ function estimate(measure::RMCD, x::VectorOrStateSpaceSet, y::VectorOrStateSpace
 end
 
 # Similar, but analogous to mutual information
-function estimate(measure::RMCD, x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet)
+function association(measure::RMCD, x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet)
     (; r, metric, base) = measure
     @assert length(x) == length(y)
     N = length(x)
diff --git a/src/todo.md b/src/todo.md
index 0f238db15..9ea244ff7 100644
--- a/src/todo.md
+++ b/src/todo.md
@@ -1,6 +1,6 @@
 # TODO
 
-- When using `ValueHistogram`, provide a `FixedRectangularBinning` with the same
+- When using `ValueBinning`, provide a `FixedRectangularBinning` with the same
     dimensions as the joint StateSpaceSet. Or, provide a `RectangularBinning`, and first encode the joint data, then take the marginals of that.
 - When using `TransferOperator`, what to do?
 
diff --git a/src/utils/cov.jl b/src/utils/cov.jl
deleted file mode 100644
index 69be3805b..000000000
--- a/src/utils/cov.jl
+++ /dev/null
@@ -1,55 +0,0 @@
-import Statistics.cov
-using Statistics: mean
-using StateSpaceSets: AbstractStateSpaceSet
-using StaticArrays: @MMatrix, @MVector, SMatrix, SVector
-
-export fastcov
-
-# Non-allocating and more than twice as fast as writing a wrapper
-# `f(x) = Statistics.cov(Matrix(x))`.
-# Also accepts SubStateSpaceSets, so we can use views on neighbor points.
-# These functions return StaticArrays.
-fastcov(x::AbstractStateSpaceSet) = fastcov(x.data)
-fastmean_and_cov(x::AbstractStateSpaceSet) = fastmean_and_cov(x.data)
-
-function fastcov(x̄, x::Vector{SVector{D, T}}) where {D, T}
-    T <: AbstractFloat || error("Need `eltype(x[i]) <: AbstractFloat` ∀ i ∈ 1:length(x). Got `eltype(x[i])=$(eltype(first(x)))`")
-    N = length(x) - 1
-    C = @MMatrix zeros(D, D)
-    x̄ = mean(x)
-    Δx = @MVector zeros(D)
-    @inbounds for xᵢ in x
-        Δx .= xᵢ - x̄
-        C .+= Δx * transpose(Δx)
-    end
-    C ./= N
-    return SMatrix{D, D}(C)
-end
-# So we don't have to compute the mean twice at every iteration.
-function fastcov(x::Vector{SVector{D, T}}) where {D, T}
-    T <: AbstractFloat || error("Need `eltype(x[i]) <: AbstractFloat` ∀ i ∈ 1:length(x). Got `eltype(x[i])=$(eltype(first(x)))`")
-
-    μ = mean(x)
-    fastcov(μ, x)
-end
-function fastmean_and_cov(x::Vector{SVector{D, T}}) where {D, T}
-    μ = mean(x)
-    Σ = fastcov(μ, x)
-    return μ, Σ
-end
-
-# `fastcor(x)` is twice as fast as `cor(Matrix(x)` and non-allocating.
-export fastcor
-fastcor(x::AbstractStateSpaceSet) = fastcor(x.data)
-function fastcor(x::Vector{SVector{D, T}}) where {D, T}
-    N = length(x)
-    μ, Σ = fastmean_and_cov(x)
-    σ = std(x)
-    C = @MMatrix zeros(D, D)
-    for j in 1:D
-        for i in 1:D
-            C[i, j] = Σ[i, j] / (σ[i] * σ[j])
-        end
-    end
-    return SMatrix{D, D}(C)
-end
diff --git a/src/utils/logs.jl b/src/utils/logs.jl
new file mode 100644
index 000000000..7172d6270
--- /dev/null
+++ b/src/utils/logs.jl
@@ -0,0 +1,11 @@
+
+# Just use ComplexityMeasures.convert_logunit when it is released.
+"""
+    _convert_logunit(h_a::Real, , to) → h_b
+
+Convert a number `h_a` computed with logarithms to base `a` to an entropy `h_b` computed
+with logarithms to base `b`. This can be used to convert the "unit" of e.g. an entropy.
+"""
+function _convert_logunit(h::Real, base_from, base_to)
+    h / log(base_from, base_to)
+end
diff --git a/src/utils/multidimensional_surrogates.jl b/src/utils/multidimensional_surrogates.jl
index 26c0fc5e7..b32a9e1ee 100644
--- a/src/utils/multidimensional_surrogates.jl
+++ b/src/utils/multidimensional_surrogates.jl
@@ -3,16 +3,15 @@ using TimeseriesSurrogates: RandomShuffle, SurrogateGenerator
 
 function surrogenerator(x::AbstractStateSpaceSet, rf::RandomShuffle, rng = Random.default_rng())
     n = length(x)
-    idxs = collect(1:n)
 
     init = (
         permutation = collect(1:n),
     )
 
-    return SurrogateGenerator(rf, x, similar(x), init, rng)
+    return SurrogateGenerator(rf, x, similar(x.data), init, rng)
 end
 
-function (sg::SurrogateGenerator{<:RandomShuffle, <:AbstractStateSpaceSet})()
+function (sg::SurrogateGenerator{<:RandomShuffle, T})() where T<:AbstractStateSpaceSet
     x, s, rng = sg.x, sg.s, sg.rng
     n = length(x)
     permutation = getfield.(Ref(sg.init), (:permutation))
@@ -20,5 +19,5 @@ function (sg::SurrogateGenerator{<:RandomShuffle, <:AbstractStateSpaceSet})()
     for i in 1:n
         s[i] = x[permutation[i]]
     end
-    return s
+    return T(s)
 end
diff --git a/src/utils/transformations.jl b/src/utils/transformations.jl
index 7c4dcf5e2..0f251d907 100644
--- a/src/utils/transformations.jl
+++ b/src/utils/transformations.jl
@@ -1,54 +1,5 @@
 
-"""
-    rank_transformation(x::AbstractVector)
-    rank_transformation(x::AbstractStateSpaceSet) → ranks::NTuple{D, Vector}
-
-Rank-transform each variable/column of the length-`n` `D`-dimensional StateSpaceSet `x` and return the
-rank-transformed variables as a `D`-tuple of length-`n` vectors.
-
-Returns the unscaled `ranks`. Divide by `n` to get an *approximation* to the
-empirical cumulative distribution function (ECDF)  `x`.
 
-## Description
-
-Modulo division by `n`, `rank_transformation` does *roughly* the same as naively computing the ECDF as
-```julia
-[count(xᵢ .<= x)  for xᵢ in x] / length(x)
-```
-
-but an order of magnitude faster and with roughly three orders of magnitude less
-allocations. The increased efficiency of this function relative to naively computing the
-ECDF is
-because it uses sorting of the input data to determine ranks,
-arbitrarily breaking ties according to the sorting algorithm. Rank ties can therefore
-never occur, and equal values are assigned different but close ranks. To preserve
-ties, which you might want to do for example when dealing with
-categorical or integer-valued data, use (the much slower) [`empcdf`](@ref).
-"""
-function rank_transformation(x::AbstractStateSpaceSet)
-    s = zeros(Int, length(x)) # re-use for each marginal
-    [rank_transformation!(s, xⱼ) for xⱼ in columns(x)]
-end
-
-function rank_transformation(x::AbstractVector{T}) where T
-    N = length(x)
-    s = zeros(Int, N)
-    return rank_transformation!(s, x)
-end
-
-function rank_transformation!(
-        s::AbstractVector{Int},
-        x::AbstractVector{T}) where T <: Real
-    N = length(x)
-    r = zeros(N)
-    # Break ties arbitrarily by sorting. This means that ties are broken according to the
-    # sorting algorithm used, and equal values are assigned different ranks.
-    sortperm!(s, x)
-    for j in 1:N
-        r[s[j]] = j
-    end
-    return r
-end
 
 """
     empirical_cdf(x::AbstractVector{<:Real}) → x̄::Vector
diff --git a/src/utils/utils.jl b/src/utils/utils.jl
index f11458c4f..9c3cb72ae 100644
--- a/src/utils/utils.jl
+++ b/src/utils/utils.jl
@@ -1,14 +1,6 @@
-include("cov.jl")
+include("logs.jl")
 include("kde.jl")
 include("cca.jl")
 include("multidimensional_surrogates.jl")
 include("extensions.jl")
 include("transformations.jl")
-
-function logq0(q)
-    if q == 1.0
-        return x -> zero(x)
-    else
-        return x -> (x^(1 - q) - 1)/(1 - q)
-    end
-end
diff --git a/test/Project.toml b/test/Project.toml
index 7209da636..a80794b84 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,6 +1,5 @@
 [deps]
 CausalInference = "8e462317-f959-576b-b3c1-403f26cec956"
-ComplexityMeasures = "ed8fcbec-b94c-44b6-89df-898894ad9591"
 DelayEmbeddings = "5732040d-69e3-5649-938a-b6b4f237613f"
 Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
@@ -15,3 +14,4 @@ StateSpaceSets = "40b095a5-5852-4c12-98c7-d43bf788e795"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+TimeseriesSurrogates = "c804724b-8c18-5caa-8579-6025a0767c70"
diff --git a/test/causal_graphs/oce.jl b/test/causal_graphs/oce.jl
index 8c2e0134d..a3a124abd 100644
--- a/test/causal_graphs/oce.jl
+++ b/test/causal_graphs/oce.jl
@@ -3,12 +3,16 @@ using CausalityTools: OCESelectedParents
 using Test
 using StableRNGs
 using Graphs.SimpleGraphs: SimpleEdge
+using DynamicalSystemsBase
 
 rng = StableRNG(123)
 sys = system(Logistic4Chain(; rng))
 X = columns(first(trajectory(sys, 50, Ttr = 10000)))
-utest = SurrogateTest(MIShannon(), KSG1(k = 5, w = 1); rng, nshuffles = 30)
-ctest = LocalPermutationTest(CMIShannon(), MesnerShalizi(k = 5, w = 1); rng, nshuffles = 30)
+uest = KSG1(MIShannon(), k = 5, w = 1)
+utest = SurrogateAssociationTest(uest; rng, nshuffles = 19)
+
+cest = MesnerShalizi(CMIShannon(), k = 5, w = 1)
+ctest = LocalPermutationTest(cest; rng, nshuffles = 19)
 alg = OCE(; utest, ctest, τmax = 2)
 parents = infer_graph(alg, X; verbose = true)
 @test parents isa Vector{<:OCESelectedParents}
@@ -21,9 +25,12 @@ parents = infer_graph(alg, d; verbose = true)
 
 rng = StableRNG(123)
 sys = system(Logistic2Bidir(; rng))
-X = columns(first(trajectory(sys, 200, Ttr = 10000)))
-utest = SurrogateTest(MIShannon(), KSG1(k = 5, w = 1); rng, nshuffles = 100)
-ctest = LocalPermutationTest(CMIShannon(), MesnerShalizi(k = 5, w = 1); rng, nshuffles = 100)
+X = columns(first(trajectory(sys, 100, Ttr = 10000)))
+
+uest = KSG1(MIShannon(); k = 5, w = 1)
+cest = MesnerShalizi(CMIShannon(); k = 5, w = 1)
+utest = SurrogateAssociationTest(uest; rng, nshuffles = 19)
+ctest = LocalPermutationTest(cest; rng, nshuffles = 19)
 parents = infer_graph(OCE(; utest, ctest, τmax = 1), X; verbose = true)
 @test parents isa Vector{<:OCESelectedParents}
 g = SimpleDiGraph(parents)
diff --git a/test/causal_graphs/pc.jl b/test/causal_graphs/pc.jl
index e6fd3b4f5..7a90c11a5 100644
--- a/test/causal_graphs/pc.jl
+++ b/test/causal_graphs/pc.jl
@@ -1,10 +1,12 @@
 using Test
 using Graphs: SimpleDiGraph
 using StableRNGs
-using CausalInference: pgalg
+using CausalInference: pcalg, gausscitest
 using Combinatorics
 rng = StableRNG(123)
 
+@test_throws ArgumentError PC(CorrTest(), CorrTest(), α = -0.5)
+
 # -------------------------------------------------------------------------------
 # "Analytical" tests
 # -------------------------------------------------------------------------------
@@ -18,7 +20,7 @@ rng = StableRNG(123)
 α = 0.01
 alg = PC(CorrTest(), CorrTest(); α)
 
-n = 10000
+n = 1000
 
 # Case 1
 x = randn(rng, n)
@@ -70,14 +72,14 @@ nshuffles = 3
 
 utests = [
     CorrTest(),
-    SurrogateTest(PearsonCorrelation(); nshuffles, rng),# nonparametric version of CorrTest
-    SurrogateTest(MIShannon(), KSG2(); nshuffles, rng),
-    SurrogateTest(DistanceCorrelation(); nshuffles, rng),
+    SurrogateAssociationTest(PearsonCorrelation(); nshuffles, rng),# nonparametric version of CorrTest
+    SurrogateAssociationTest(KSG2(MIShannon()); nshuffles, rng),
+    SurrogateAssociationTest(DistanceCorrelation(); nshuffles, rng),
     ];
 ctests = [
     CorrTest(),
-    SurrogateTest(PartialCorrelation(); nshuffles, rng), # nonparametric version of CorrTest
-    LocalPermutationTest(CMIShannon(), KSG2(); nshuffles, rng),
+    SurrogateAssociationTest(PartialCorrelation(); nshuffles, rng), # nonparametric version of CorrTest
+    LocalPermutationTest(MIDecomposition(CMIShannon(), KSG2()); nshuffles, rng),
     LocalPermutationTest(DistanceCorrelation(); nshuffles, rng),
 ]
 
@@ -95,10 +97,13 @@ end
 alg = PC(CorrTest(), CorrTest(), maxdepth = 1)
 @test infer_graph(alg, X) isa SimpleDiGraph
 
-x, y, z = rand(rng, 50), rand(rng, 50), rand(rng, 50)
-X = [x, y, z]
-tt = SurrogateTest(TEShannon(), KSG2())
-ct = CorrTest()
-@test_throws ArgumentError infer_graph(PC(ct, tt), X)
-@test_throws ArgumentError infer_graph(PC(tt, ct), X)
-@test_throws ArgumentError infer_graph(PC(tt, tt), X)
+# In the future this should error when is_directed is implemented,
+# because it shouldn't be possible to use `PC` with directed measures.
+# ----------------------------------------------------------------
+# x, y, z = rand(rng, 50), rand(rng, 50), rand(rng, 50)
+# X = [x, y, z]
+# tt = SurrogateAssociationTest(MIDecomposition(TEShannon(), KSG2()))
+# ct = CorrTest()
+# @test_throws ArgumentError infer_graph(PC(ct, tt), X)
+# @test_throws ArgumentError infer_graph(PC(tt, ct), X)
+# @test_throws ArgumentError infer_graph(PC(tt, tt), X)
diff --git a/test/contingency_matrices.jl b/test/contingency_matrices.jl
index 748ef7371..a868a3c6f 100644
--- a/test/contingency_matrices.jl
+++ b/test/contingency_matrices.jl
@@ -45,12 +45,12 @@ w = rand(1000)
 
 # These are the estimators that have implementations of `marginal_encodings`
 probests = [
-    SymbolicPermutation(m = 3),
+    OrdinalPatterns{3}(),
     Dispersion(),
-    ValueHistogram(3),
-    Contingency(SymbolicPermutation(m = 3)),
+    ValueBinning(3),
+    Contingency(OrdinalPatterns{3}()),
     Contingency(Dispersion()),
-    Contingency(ValueHistogram(3)),
+    Contingency(ValueBinning(3)),
 ]
 
 @testset "Contingency table: with $(probests[i]) discretization" for i in eachindex(probests)
diff --git a/test/core.jl b/test/core.jl
deleted file mode 100644
index 9b7c97d13..000000000
--- a/test/core.jl
+++ /dev/null
@@ -1,8 +0,0 @@
-using Test, CausalityTools
-using Random
-rng = MersenneTwister(1234)
-x, y, z = rand(rng, 50), rand(rng, 50), rand(rng, 50)
-
-# Testing for number of input arguments.
-@test_throws ArgumentError estimate(MIShannon(), KSG1(), x)
-@test_throws ArgumentError estimate(MIShannon(), KSG1(), x, y, z)
diff --git a/test/deprecations.jl b/test/deprecations.jl
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/independence/LocalPermutationTest/api.jl b/test/independence/LocalPermutationTest/api.jl
index 0b5e8d2fb..def8597ae 100644
--- a/test/independence/LocalPermutationTest/api.jl
+++ b/test/independence/LocalPermutationTest/api.jl
@@ -5,7 +5,7 @@ using StableRNGs
 rng = StableRNG(123)
 x, y, z = rand(rng, 30), rand(rng, 30), rand(rng, 30)
 
-independence_test = LocalPermutationTest(CMIShannon(), FPVP())
+independence_test = LocalPermutationTest(FPVP(CMIShannon()))
 # We should get back a convenience wrapper containing the result.
 res = independence(independence_test, x, z, y)
 @test res isa LocalPermutationTestResult
@@ -19,8 +19,8 @@ res = independence(independence_test, x, z, y)
 @test_throws ArgumentError independence(independence_test, x, y)
 
 # Sampling with/without replacement
-test_cmi_replace = LocalPermutationTest(CMIShannon(), FPVP(), replace = true)
-test_cmi_nonreplace = LocalPermutationTest(CMIShannon(), FPVP(), replace = false)
+test_cmi_replace = LocalPermutationTest(FPVP(CMIShannon()), replace = true)
+test_cmi_nonreplace = LocalPermutationTest(FPVP(CMIShannon()), replace = false)
 @test independence(test_cmi_replace, x, y, z) isa LocalPermutationTestResult
 @test independence(test_cmi_nonreplace, x, y, z) isa LocalPermutationTestResult
 
@@ -28,5 +28,5 @@ test_cmi_nonreplace = LocalPermutationTest(CMIShannon(), FPVP(), replace = false
 @test_throws ArgumentError LocalPermutationTest(TEShannon()) # estimator needed
 
 # The number of local neighbors can't exceed the number of input datapoints
-test_kperm_toolarge = LocalPermutationTest(CMIShannon(), FPVP(); kperm = 200, rng)
+test_kperm_toolarge = LocalPermutationTest(FPVP(CMIShannon()); kperm = 200, rng)
 @test_throws ArgumentError independence(test_kperm_toolarge, x, y, z)
diff --git a/test/independence/LocalPermutationTest/conditional_mutual_information.jl b/test/independence/LocalPermutationTest/conditional_mutual_information.jl
index 118d67a77..864054d89 100644
--- a/test/independence/LocalPermutationTest/conditional_mutual_information.jl
+++ b/test/independence/LocalPermutationTest/conditional_mutual_information.jl
@@ -9,10 +9,13 @@ X = StateSpaceSet(x)
 Y = StateSpaceSet(y)
 Z = StateSpaceSet(z)
 
-nshuffles = 5
-lptest_sp = LocalPermutationTest(CMIShannon(), SymbolicPermutation(); nshuffles, rng)
-lptest_vh = LocalPermutationTest(CMIShannon(), ValueHistogram(4); nshuffles, rng)
-lptest_dp = LocalPermutationTest(CMIShannon(), Dispersion(); nshuffles, rng)
+nshuffles = 2
+est_ord = JointProbabilities(CMIShannon(), CodifyVariables(OrdinalPatterns()))
+est_vh = JointProbabilities(CMIShannon(), CodifyVariables(ValueHistogram(3)))
+est_dp = JointProbabilities(CMIShannon(), CodifyVariables( Dispersion(m = 2)))
+lptest_sp = LocalPermutationTest(est_ord; nshuffles, rng)
+lptest_vh = LocalPermutationTest(est_vh; nshuffles, rng)
+lptest_dp = LocalPermutationTest(est_dp; nshuffles, rng)
 @test independence(lptest_sp, x, y, z) isa LocalPermutationTestResult
 @test independence(lptest_vh, x, y, z) isa LocalPermutationTestResult
 @test independence(lptest_dp, x, y, z) isa LocalPermutationTestResult
diff --git a/test/independence/LocalPermutationTest/distance_correlation.jl b/test/independence/LocalPermutationTest/distance_correlation.jl
index df0c74725..89c28c8bf 100644
--- a/test/independence/LocalPermutationTest/distance_correlation.jl
+++ b/test/independence/LocalPermutationTest/distance_correlation.jl
@@ -5,5 +5,5 @@ using StableRNGs
 rng = StableRNG(123)
 x, y, z = rand(rng, 30), rand(rng, 30), rand(rng, 30)
 
-independence_test = LocalPermutationTest(DistanceCorrelation())
+independence_test = LocalPermutationTest(DistanceCorrelation(), nshuffles = 2)
 @test independence(independence_test, x, y, z) isa LocalPermutationTestResult
\ No newline at end of file
diff --git a/test/independence/LocalPermutationTest/local_permutation_test.jl b/test/independence/LocalPermutationTest/local_permutation_test.jl
index b91c34e9c..2fc1c8308 100644
--- a/test/independence/LocalPermutationTest/local_permutation_test.jl
+++ b/test/independence/LocalPermutationTest/local_permutation_test.jl
@@ -4,6 +4,6 @@ include("api.jl")
 # of `LocalPermutationTest`.
 include("conditional_mutual_information.jl")
 include("part_mutual_information.jl")
-include("transferentropy.jl")
+#include("transferentropy.jl")
 include("partial_correlation.jl")
 include("distance_correlation.jl")
\ No newline at end of file
diff --git a/test/independence/LocalPermutationTest/part_mutual_information.jl b/test/independence/LocalPermutationTest/part_mutual_information.jl
index 9f3bb6c82..156cc1621 100644
--- a/test/independence/LocalPermutationTest/part_mutual_information.jl
+++ b/test/independence/LocalPermutationTest/part_mutual_information.jl
@@ -9,10 +9,14 @@ X = StateSpaceSet(x)
 Y = StateSpaceSet(y)
 Z = StateSpaceSet(z)
 
-nshuffles = 5
-lptest_sp = LocalPermutationTest(PMI(), SymbolicPermutation(); nshuffles, rng)
-lptest_vh = LocalPermutationTest(PMI(), ValueHistogram(4); nshuffles, rng)
-lptest_dp = LocalPermutationTest(PMI(), Dispersion(); nshuffles, rng)
+nshuffles = 2
+est_ord = JointProbabilities(PMI(), CodifyVariables(OrdinalPatterns()))
+est_vh = JointProbabilities(PMI(), CodifyVariables(ValueHistogram(3)))
+est_dp = JointProbabilities(PMI(), CodifyVariables( Dispersion(m = 2)))
+
+lptest_sp = LocalPermutationTest(est_ord; nshuffles, rng)
+lptest_vh = LocalPermutationTest(est_vh; nshuffles, rng)
+lptest_dp = LocalPermutationTest(est_dp; nshuffles, rng)
 @test independence(lptest_sp, x, y, z) isa LocalPermutationTestResult
 @test independence(lptest_vh, x, y, z) isa LocalPermutationTestResult
 @test independence(lptest_dp, x, y, z) isa LocalPermutationTestResult
@@ -20,6 +24,7 @@ lptest_dp = LocalPermutationTest(PMI(), Dispersion(); nshuffles, rng)
 @test independence(lptest_vh, X, Y, Z) isa LocalPermutationTestResult
 @test independence(lptest_dp, X, Y, Z) isa LocalPermutationTestResult
 
+#
 
 α = 0.05
 n = 10000
@@ -50,9 +55,9 @@ z = z + rand(rng, n) * 1e-3
 # We should not be able to reject the null hypothesis `x ⫫ z | y`, because
 # x → y → z, so when conditioning on the intermediate variable,
 # the first and last variable in the chain should be independent.
-test_sp = LocalPermutationTest(PMI(), SymbolicPermutation(); nshuffles = 200, rng)
-test_dp = LocalPermutationTest(PMI(), Dispersion(); nshuffles = 200, rng)
-test_vh = LocalPermutationTest(PMI(), ValueHistogram(2); nshuffles = 200, rng)
-@test pvalue(independence(test_sp, x, y, z)) > α
+test_ord = LocalPermutationTest(est_ord; nshuffles = 19, rng)
+test_dp = LocalPermutationTest(est_dp; nshuffles = 19, rng)
+test_vh = LocalPermutationTest(est_vh; nshuffles = 19, rng)
+@test pvalue(independence(test_ord, x, y, z)) > α
 @test pvalue(independence(test_dp, x, y, z)) > α
 @test pvalue(independence(test_vh, x, y, z)) > α
diff --git a/test/independence/LocalPermutationTest/partial_correlation.jl b/test/independence/LocalPermutationTest/partial_correlation.jl
index 60bb3a20e..8e842d3e9 100644
--- a/test/independence/LocalPermutationTest/partial_correlation.jl
+++ b/test/independence/LocalPermutationTest/partial_correlation.jl
@@ -5,5 +5,5 @@ using StableRNGs
 rng = StableRNG(123)
 x, y, z = rand(rng, 30), rand(rng, 30), rand(rng, 30)
 
-independence_test = LocalPermutationTest(PartialCorrelation())
+independence_test = LocalPermutationTest(PartialCorrelation(), nshuffles = 2)
 @test independence(independence_test, x, y, z) isa LocalPermutationTestResult
\ No newline at end of file
diff --git a/test/independence/LocalPermutationTest/transferentropy.jl b/test/independence/LocalPermutationTest/transferentropy.jl
index db45484c4..7e96e483d 100644
--- a/test/independence/LocalPermutationTest/transferentropy.jl
+++ b/test/independence/LocalPermutationTest/transferentropy.jl
@@ -29,11 +29,11 @@ measure = TEShannon(; embedding)
 # For the dedicated estimators, we actually test the outcome on longer timeseries.
 # This is because the transfer entropy based local permutation test implemented 
 # here doesn't appear in the literature. It is new, so we need to verify that it works.
-dedicated_estimators = [Lindner(k=10), Zhu1(k=10)]
+dedicated_estimators = [Lindner(measure, k=10), Zhu1(measure, k=10)]
 @testset "LocalPermutationTest with TEShannon + dedicated TE estimator $estimator" for estimator in dedicated_estimators
-    x, y, z = ar3(500, rng)
+    x, y, z = ar3(200, rng)
 
-    independence_test = LocalPermutationTest(measure, estimator; nshuffles = 100, rng = rng)
+    independence_test = LocalPermutationTest(estimator; nshuffles = 19, rng = rng)
     # x and z should be independent given y 
     # (so we shouldn't be able to reject the null, i.e. pvalue >= α)
     @test independence(independence_test, x, z, y).pvalue >= α
@@ -44,7 +44,7 @@ dedicated_estimators = [Lindner(k=10), Zhu1(k=10)]
 
     # A test with noise (all variables should be conditionally independent)
     # (so we shouldn't be able to reject the null, i.e. pvalue >= α)
-    x, y, z = randn(rng, 500), randn(rng, 500), randn(rng, 500)
+    x, y, z = randn(rng, 100), randn(rng, 100), randn(rng, 100)
     @test independence(independence_test, x, z, y).pvalue >= α
     @test independence(independence_test, x, y, z).pvalue >= α
 
@@ -53,11 +53,14 @@ dedicated_estimators = [Lindner(k=10), Zhu1(k=10)]
     @test_throws ArgumentError independence(independence_test, x, y)
 end
 
-nondedicated_estimators = [FPVP(), GaussianMI(), Kraskov(), ValueHistogram(2)]
+nondedicated_estimators = [FPVP(), GaussianCMI(), 
+    EntropyDecomposition(TEShannon(), Kraskov()), 
+    EntropyDecomposition(TEShannon(), PlugIn(Shannon()), CodifyVariables(ValueHistogram(2)))
+]
 @testset "LocalPermutationTest with TEShannon + non-dedicated estimator $estimator" for estimator in nondedicated_estimators
     x, y, z = ar3(50, rng)
 
-    independence_test = LocalPermutationTest(measure, estimator; nshuffles = 100, rng = rng)
+    independence_test = LocalPermutationTest(estimator; nshuffles = 19, rng = rng)
     @test independence(independence_test, x, z, y) isa LocalPermutationTestResult
 end
 
diff --git a/test/independence/PATest.jl b/test/independence/PATest.jl
index c5a56767a..9841720a7 100644
--- a/test/independence/PATest.jl
+++ b/test/independence/PATest.jl
@@ -1,5 +1,6 @@
 using Test
 using CausalityTools
+using DynamicalSystemsBase
 using Random
 
 rng = MersenneTwister(1234)
diff --git a/test/independence/SurrogateAssociationTest/ConditionalMutualInformation.jl b/test/independence/SurrogateAssociationTest/ConditionalMutualInformation.jl
new file mode 100644
index 000000000..2865b539b
--- /dev/null
+++ b/test/independence/SurrogateAssociationTest/ConditionalMutualInformation.jl
@@ -0,0 +1,96 @@
+using Test
+using Random
+rng = MersenneTwister(1234)
+n = 100
+
+# Pre-discretized data
+likeit = rand(rng, ["yes", "no"], n)
+food = rand(rng, ["veggies", "meat", "fish"], n)
+service = rand(rng, ["netflix", "hbo"], n)
+nshuffles = 3
+
+@test_throws ArgumentError SurrogateAssociationTest(CMIShannon())
+
+# Estimators
+d = CodifyVariables(UniqueElements()) # discretization
+est_cmi_shannon = JointProbabilities(CMIShannon(), d)
+est_cmi_renyisarbu = JointProbabilities(CMIRenyiSarbu(), d)
+est_cmi_renyijizba = JointProbabilities(CMIRenyiJizba(), d)
+est_cmi_tsallispapa = JointProbabilities(CMITsallisPapapetrou(), d)
+
+# Independence tests
+test_cmi_shannon = SurrogateAssociationTest(est_cmi_shannon; nshuffles, rng)
+test_cmi_renyisarbu = SurrogateAssociationTest(est_cmi_renyisarbu; nshuffles, rng)
+test_cmi_renyijizba = SurrogateAssociationTest(est_cmi_renyijizba; nshuffles, rng)
+test_cmi_tsallispapa = SurrogateAssociationTest(est_cmi_tsallispapa; nshuffles, rng)
+
+@test independence(test_cmi_shannon, food, likeit, service) isa SurrogateAssociationTestResult
+@test independence(test_cmi_renyisarbu, food, likeit, service) isa SurrogateAssociationTestResult
+@test independence(test_cmi_renyijizba, food, likeit, service) isa SurrogateAssociationTestResult
+@test independence(test_cmi_tsallispapa, food, likeit, service) isa SurrogateAssociationTestResult
+
+# Analytical tests, in the limit of many samples
+# ----------------------------------------------
+n = 1000
+# Pre-discretized data
+likeit = rand(rng, ["yes", "no"], n)
+food = rand(rng, ["veggies", "meat", "fish"], n)
+service = rand(rng, ["netflix", "hbo"], n)
+
+α = 0.01 # pick some arbitrary significance level
+
+# We should not be able to reject the null hypothesis `food ⫫ likeit | service`, because
+# the variables are all independent.
+nshuffles = 19
+d = CodifyVariables(UniqueElements()) # outcome space
+est = JointProbabilities(CMIShannon(), d)
+test = SurrogateAssociationTest(est; nshuffles, rng)
+test_cmi = independence(test, food, likeit, service)
+@test pvalue(test_cmi) > α
+
+
+# Independence tests
+test_cmi_shannon = SurrogateAssociationTest(est_cmi_shannon; nshuffles, rng)
+test_cmi_renyisarbu = SurrogateAssociationTest(est_cmi_renyisarbu; nshuffles, rng)
+test_cmi_renyijizba = SurrogateAssociationTest(est_cmi_renyijizba; nshuffles, rng)
+test_cmi_tsallispapa = SurrogateAssociationTest(est_cmi_tsallispapa; nshuffles, rng)
+
+@test independence(test_cmi_shannon, food, likeit, service) |> pvalue > α
+@test independence(test_cmi_renyisarbu, food, likeit, service) |> pvalue > α
+@test independence(test_cmi_renyijizba, food, likeit, service) |> pvalue > α
+@test independence(test_cmi_tsallispapa, food, likeit, service) |> pvalue > α
+
+
+# Simulate a survey where the place a person grew up controls how many times they
+# fell while going skiing. The control happens through an intermediate variable
+# `preferred_equipment`, which indicates what type of physical activity the
+# person has engaged with. For this example, we should be able to reject
+# places ⫫ experience, but not reject places ⫫ experience | preferred_equipment
+places = rand(rng, ["city", "countryside", "under a rock"], n);
+preferred_equipment = map(places) do place
+    if cmp(place, "city") == 1
+        return rand(rng, ["skateboard", "bmx bike"])
+    elseif cmp(place, "countryside") == 1
+        return rand(rng, ["sled", "snowcarpet"])
+    else
+        return rand(rng, ["private jet", "car"])
+    end
+end;
+experience = map(preferred_equipment) do equipment
+    if equipment ∈ ["skateboard", "bmx bike"]
+        return "didn't fall"
+    elseif equipment ∈ ["sled", "snowcarpet"]
+        return "fell 3 times or less"
+    else
+        return "fell uncontably many times"
+    end
+end;
+
+# We should not be able to reject the null hypothesis `places ⫫ experience | preferred_equipment`, because
+# places → preferred_equipment → experience, so when conditioning on the intermediate variable,
+# the first and last variable in the chain should be independent.
+@test independence(test_cmi_shannon, places, experience, preferred_equipment) |> pvalue > α
+@test independence(test_cmi_renyisarbu, places, experience, preferred_equipment) |> pvalue > α
+@test independence(test_cmi_renyijizba, places, experience, preferred_equipment) |> pvalue > α
+@test independence(test_cmi_tsallispapa, places, experience, preferred_equipment) |> pvalue > α
+
diff --git a/test/independence/SurrogateTest/HMeasure.jl b/test/independence/SurrogateAssociationTest/HMeasure.jl
similarity index 72%
rename from test/independence/SurrogateTest/HMeasure.jl
rename to test/independence/SurrogateAssociationTest/HMeasure.jl
index 6679b573c..3ea9ab224 100644
--- a/test/independence/SurrogateTest/HMeasure.jl
+++ b/test/independence/SurrogateAssociationTest/HMeasure.jl
@@ -1,10 +1,11 @@
-# Analytical tests (in the limit of a lot of samples)
+# Analytical tests (in the limit of a lot of "many" samples)
 # ------------------------------------------------------------
 using Random
 rng = MersenneTwister(1234)
-x, y = rand(rng, 500), rand(rng, 500)
+n = 100
+x, y = rand(rng, n), rand(rng, n)
 z = x .+ y
-test = SurrogateTest(HMeasure(); rng)
+test = SurrogateAssociationTest(HMeasure(); rng)
 α = 0.04 # Some arbitrary significance level.
 
 # We shouldn't be able to reject the null when the variables are independent
diff --git a/test/independence/SurrogateTest/LMeasure.jl b/test/independence/SurrogateAssociationTest/LMeasure.jl
similarity index 82%
rename from test/independence/SurrogateTest/LMeasure.jl
rename to test/independence/SurrogateAssociationTest/LMeasure.jl
index b2ebfa68e..da6caacf4 100644
--- a/test/independence/SurrogateTest/LMeasure.jl
+++ b/test/independence/SurrogateAssociationTest/LMeasure.jl
@@ -2,9 +2,10 @@
 # ------------------------------------------------------------
 using Random
 rng = MersenneTwister(1234)
-x, y = rand(rng, 500), rand(rng, 500)
+n = 100
+x, y = rand(rng, n), rand(rng, n)
 z = x .+ y
-test = SurrogateTest(LMeasure(); rng)
+test = SurrogateAssociationTest(LMeasure(); rng)
 α = 0.05 # Some arbitrary significance level.
 
 # We shouldn't be able to reject the null when the variables are independent
diff --git a/test/independence/SurrogateTest/MMeasure.jl b/test/independence/SurrogateAssociationTest/MMeasure.jl
similarity index 83%
rename from test/independence/SurrogateTest/MMeasure.jl
rename to test/independence/SurrogateAssociationTest/MMeasure.jl
index e44ec4b08..136d37729 100644
--- a/test/independence/SurrogateTest/MMeasure.jl
+++ b/test/independence/SurrogateAssociationTest/MMeasure.jl
@@ -2,9 +2,10 @@
 # ------------------------------------------------------------
 using Random
 rng = MersenneTwister(1234)
-x, y = rand(rng, 300), rand(rng, 300)
+n = 100
+x, y = rand(rng, n), rand(rng, n)
 z = x .+ y
-test = SurrogateTest(MMeasure(); rng)
+test = SurrogateAssociationTest(MMeasure(); rng)
 α = 0.04 # Some arbitrary significance level.
 
 # We shouldn't be able to reject the null when the variables are independent
diff --git a/test/independence/SurrogateTest/MutualInformation.jl b/test/independence/SurrogateAssociationTest/MutualInformation.jl
similarity index 57%
rename from test/independence/SurrogateTest/MutualInformation.jl
rename to test/independence/SurrogateAssociationTest/MutualInformation.jl
index 925ada049..8107cc7ed 100644
--- a/test/independence/SurrogateTest/MutualInformation.jl
+++ b/test/independence/SurrogateAssociationTest/MutualInformation.jl
@@ -6,20 +6,23 @@ n = 100
 likeit = rand(rng, ["yes", "no"], n)
 food = rand(rng, ["veggies", "meat", "fish"], n)
 service = rand(rng, ["netflix", "hbo"], n)
-est = Contingency()
+d = CodifyVariables(UniqueElements())
 nshuffles = 3
+test_mi_s = SurrogateAssociationTest(JointProbabilities(MIShannon(), d); nshuffles, rng)
+test_mi_rj = SurrogateAssociationTest(JointProbabilities(MIRenyiJizba(), d); nshuffles, rng)
+test_mi_rs = SurrogateAssociationTest(JointProbabilities(MIRenyiSarbu(), d); nshuffles, rng)
+test_mi_tf = SurrogateAssociationTest(JointProbabilities(MITsallisFuruichi(), d); nshuffles, rng)
+test_mi_tm = SurrogateAssociationTest(JointProbabilities(MITsallisMartin(), d); nshuffles, rng)
 
-@test_throws ArgumentError SurrogateTest(MIShannon())
-
-@test independence(SurrogateTest(MIShannon(), est; nshuffles, rng), food, likeit) isa SurrogateTestResult
-@test independence(SurrogateTest(MIRenyiJizba(), est; nshuffles, rng), food, likeit) isa SurrogateTestResult
-@test independence(SurrogateTest(MIRenyiSarbu(), est; nshuffles, rng), food, likeit) isa SurrogateTestResult
-@test independence(SurrogateTest(MITsallisFuruichi(), est; nshuffles, rng), food, likeit) isa SurrogateTestResult
-@test independence(SurrogateTest(MITsallisMartin(), est; nshuffles, rng), food, likeit) isa SurrogateTestResult
+@test independence(test_mi_s, food, likeit) isa SurrogateAssociationTestResult
+@test independence(test_mi_rj, food, likeit) isa SurrogateAssociationTestResult
+@test independence(test_mi_rs, food, likeit) isa SurrogateAssociationTestResult
+@test independence(test_mi_tf, food, likeit) isa SurrogateAssociationTestResult
+@test independence(test_mi_tm, food, likeit) isa SurrogateAssociationTestResult
 
 # Analytical tests, in the limit.
 # -------------------------------
-n = 100000
+n = 1000
 α = 0.02 # pick some arbitrary significance level
 
 # Simulate a survey where the place a person grew up controls how many times they
@@ -48,5 +51,7 @@ experience = map(preferred_equipment) do equipment
 end;
 
 # We should be able to reject the null hypothesis of `places ⫫ experience`.
-test_mi = independence(SurrogateTest(MIShannon(), est; nshuffles, rng), places, experience)
+d = CodifyVariables(UniqueElements())
+est = JointProbabilities(MIShannon(), d)
+test_mi = independence(SurrogateAssociationTest(est), places, experience)
 @test pvalue(test_mi) < α
diff --git a/test/independence/SurrogateTest/SMeasure.jl b/test/independence/SurrogateAssociationTest/SMeasure.jl
similarity index 82%
rename from test/independence/SurrogateTest/SMeasure.jl
rename to test/independence/SurrogateAssociationTest/SMeasure.jl
index aa1420866..ff10a02d0 100644
--- a/test/independence/SurrogateTest/SMeasure.jl
+++ b/test/independence/SurrogateAssociationTest/SMeasure.jl
@@ -2,9 +2,10 @@
 # ------------------------------------------------------------
 using Random
 rng = MersenneTwister(1234)
-x, y = rand(rng, 300), rand(rng, 300)
+n = 100
+x, y = rand(rng, 100), rand(rng, 100)
 z = x .+ y
-test = SurrogateTest(SMeasure(); rng)
+test = SurrogateAssociationTest(SMeasure(); rng)
 α = 0.04 # Some arbitrary significance level.
 
 # We shouldn't be able to reject the null when the variables are independent
diff --git a/test/independence/SurrogateTest/SurrogateTest.jl b/test/independence/SurrogateAssociationTest/SurrogateAssociationTest.jl
similarity index 87%
rename from test/independence/SurrogateTest/SurrogateTest.jl
rename to test/independence/SurrogateAssociationTest/SurrogateAssociationTest.jl
index 74f593e16..ec0b62007 100644
--- a/test/independence/SurrogateTest/SurrogateTest.jl
+++ b/test/independence/SurrogateAssociationTest/SurrogateAssociationTest.jl
@@ -2,7 +2,8 @@
 # API
 # ------------------------------------------------------------------------
 # Error for wrong number of input datasets.
-test = SurrogateTest(MIShannon(), KSG1())
+est = JointProbabilities(MIShannon(), OrdinalPatterns(m=3))
+test = SurrogateAssociationTest(est)
 x, y, z = rand(30), rand(30), rand(30)
 @test_throws ArgumentError independence(test, x)
 @test_throws ArgumentError independence(test, x, y, z)
diff --git a/test/independence/SurrogateTest/TransferEntropyConditional.jl b/test/independence/SurrogateAssociationTest/TransferEntropyConditional.jl
similarity index 60%
rename from test/independence/SurrogateTest/TransferEntropyConditional.jl
rename to test/independence/SurrogateAssociationTest/TransferEntropyConditional.jl
index 9b8edff8d..2865def90 100644
--- a/test/independence/SurrogateTest/TransferEntropyConditional.jl
+++ b/test/independence/SurrogateAssociationTest/TransferEntropyConditional.jl
@@ -4,19 +4,22 @@
 using Random
 rng = Random.MersenneTwister(1234)
 sys = system(Logistic4Chain(; xi = [0.1, 0.2, 0.3, 0.4], rng));
-n = 500
+n = 300
 x, y, z, w = columns(first(trajectory(sys, n, Ttr = 10000)));
 
 α = 0.04 # Arbitrary significance level 1 - α = 0.96
 
 # The ground truth is X → Y → Z.
-test = SurrogateTest(TEShannon(), FPVP(); rng)
-
+est = CMIDecomposition(TEShannon(), FPVP())
+test = SurrogateAssociationTest(est; rng, nshuffles = 19)
 
 @test pvalue(independence(test, x, z)) < α # This has been manually tested to occur with c₁₂ = 0.8
 
 # We should be able to reject the null when testing transferentropy(x → y | z)
 @test pvalue(independence(test, x, z, y)) > α
 
-@test independence(SurrogateTest(TEShannon(), Zhu1()), x, y, z) isa SurrogateTestResult
-@test independence(SurrogateTest(TEShannon(), Lindner()), x, y, z) isa SurrogateTestResult
+test = SurrogateAssociationTest(Zhu1(TEShannon()); nshuffles = 3) # not testing values, so few shuffles is ok
+@test independence(test, x, y, z) isa SurrogateAssociationTestResult
+
+test = SurrogateAssociationTest(Lindner(TEShannon()); nshuffles = 3)# not testing values, so few shuffles is ok
+@test independence(test, x, y, z) isa SurrogateAssociationTestResult
diff --git a/test/independence/SurrogateAssociationTest/TransferEntropyPairwise.jl b/test/independence/SurrogateAssociationTest/TransferEntropyPairwise.jl
new file mode 100644
index 000000000..45327fc2a
--- /dev/null
+++ b/test/independence/SurrogateAssociationTest/TransferEntropyPairwise.jl
@@ -0,0 +1,56 @@
+using Random
+using TimeseriesSurrogates
+
+rng = Xoshiro(1234)
+
+sys = system(Logistic2Unidir(; c_xy = 0.5))
+x, y = columns(first(trajectory(sys, 300, Ttr = 10000)))
+
+# Creation
+est = MIDecomposition(TEShannon(), KSG1())
+@test SurrogateAssociationTest(est) isa SurrogateAssociationTest
+est = CMIDecomposition(TEShannon(), FPVP())
+# ArgumentError thrown if an estimator isn't provided.
+@test_throws ArgumentError SurrogateAssociationTest(TEShannon())
+
+
+α = 0.0287 # Arbitrary significance level 1 - α = 0.9713
+test = SurrogateAssociationTest(est; rng, nshuffles = 50)
+
+# The ground truth is X → Y, so we should be able to reject the null
+# when testing transferentropy(x → y)
+@test pvalue(independence(test, x, y)) < α
+
+# The ground truth is X → Y, so we shouldn't be able to reject the null
+# when testing transferentropy(y → x)
+@test pvalue(independence(test, y, x)) > α
+
+# MIDecomposition estimator 
+est = MIDecomposition(TEShannon(), KSG1())
+test = SurrogateAssociationTest(est)
+@test independence(test, x, y) isa SurrogateAssociationTestResult
+
+# Dedicated estimators
+x, y = columns(first(trajectory(sys, 200, Ttr = 1000)))
+est = Lindner()
+test = SurrogateAssociationTest(est, nshuffles = 2)
+@test independence(test, x, y) isa SurrogateAssociationTestResult
+
+est = Zhu1()
+test = SurrogateAssociationTest(est; nshuffles = 2)
+@test independence(test, x, y) isa SurrogateAssociationTestResult
+
+# `EntropyDecomposition`
+est = EntropyDecomposition(TEShannon(), Kraskov())
+test = SurrogateAssociationTest(est; nshuffles = 2);
+@test independence(test, x, y) isa SurrogateAssociationTestResult
+
+# Can't use single-variable surrogate methods when a dimension is higher than 1
+est = EntropyDecomposition(TEShannon(embedding = EmbeddingTE(dS = 2)), Kraskov())
+test = SurrogateAssociationTest(est; nshuffles = 2, surrogate = AAFT());
+@test_throws ArgumentError independence(test, x, y)
+
+# Optimising parameters using traditional methods
+est = EntropyDecomposition(TEShannon(embedding = OptimiseTraditional()), Kraskov())
+test = SurrogateAssociationTest(est; nshuffles = 2);
+@test independence(test, x, y) isa SurrogateAssociationTestResult
diff --git a/test/independence/SurrogateAssociationTest/crossmappings.jl b/test/independence/SurrogateAssociationTest/crossmappings.jl
new file mode 100644
index 000000000..825b4b917
--- /dev/null
+++ b/test/independence/SurrogateAssociationTest/crossmappings.jl
@@ -0,0 +1,35 @@
+using Test
+using Random
+rng = MersenneTwister(1234)
+n = 250
+x, y = rand(rng, n), rand(rng, n)
+z = x.+ y
+
+d = 2
+τ = -1
+
+# Regular variant.
+est_ccm = RandomVectors(CCM(; d, τ); libsizes = 100, replace = true, rng)
+test_ccm = SurrogateAssociationTest(est_ccm; rng, nshuffles = 19)
+
+est_pai = RandomVectors(PAI(; d, τ); libsizes = 100, replace = true, rng)
+test_pai = SurrogateAssociationTest(est_pai; rng, nshuffles = 19)
+
+# Invalid syntax.
+@test_throws ArgumentError SurrogateAssociationTest(CCM(), RandomVectors(libsizes = 100:100:300))
+
+α = 0.03 # arbitrarily set confidence level to 1 - α
+@test pvalue(independence(test_ccm, x, y)) > α
+@test pvalue(independence(test_ccm, x, z)) < α
+@test pvalue(independence(test_ccm, x, z)) < α
+@test pvalue(independence(test_pai, x, y)) > α
+
+# Ensemble variant.
+eccm = Ensemble(RandomVectors(CCM(; d, τ); libsizes = 50, replace = true, rng))
+epai = Ensemble(RandomVectors(PAI(; d, τ); libsizes = 50, replace = true, rng))
+test_ccm = SurrogateAssociationTest(eccm; rng, nshuffles = 19)
+test_pai = SurrogateAssociationTest(epai; rng, nshuffles = 19)
+@test pvalue(independence(test_ccm, x, y)) > α
+@test pvalue(independence(test_ccm, x, z)) < α
+@test pvalue(independence(test_ccm, x, z)) < α
+@test pvalue(independence(test_pai, x, y)) > α
diff --git a/test/independence/SurrogateTest/pmi.jl b/test/independence/SurrogateAssociationTest/pmi.jl
similarity index 57%
rename from test/independence/SurrogateTest/pmi.jl
rename to test/independence/SurrogateAssociationTest/pmi.jl
index 66c9a86ed..ed3a34642 100644
--- a/test/independence/SurrogateTest/pmi.jl
+++ b/test/independence/SurrogateAssociationTest/pmi.jl
@@ -1,24 +1,29 @@
+
+using Test
 using Random
 rng = MersenneTwister(1234)
-n = 200
+n = 100
 
 # Pre-discretized data
 likeit = rand(rng, ["yes", "no"], n)
 food = rand(rng, ["veggies", "meat", "fish"], n)
 service = rand(rng, ["netflix", "hbo"], n)
-est = Contingency()
 nshuffles = 3
 
-@test_throws ArgumentError SurrogateTest(PMI())
+@test_throws ArgumentError SurrogateAssociationTest(PMI())
+
+# estimator 
+d = CodifyVariables(UniqueElements())
+est_pmi = JointProbabilities(PMI(), d)
 
-@test independence(SurrogateTest(PMI(), est; nshuffles, rng), food, likeit, service) isa SurrogateTestResult
-@test independence(SurrogateTest(CMIRenyiSarbu(), est; nshuffles, rng), food, likeit, service) isa SurrogateTestResult
-@test independence(SurrogateTest(CMIRenyiJizba(), est; nshuffles, rng), food, likeit, service) isa SurrogateTestResult
+# Tests 
+test = SurrogateAssociationTest(est_pmi; nshuffles, rng)
+@test independence(test, food, likeit, service) isa SurrogateAssociationTestResult
 
 
 # Analytical tests, in the limit.
 # -------------------------------
-n = 10000
+n = 3000
 # Pre-discretized data
 likeit = rand(rng, ["yes", "no"], n);
 food = rand(rng, ["veggies", "meat", "fish"], n);
@@ -28,7 +33,7 @@ service = rand(rng, ["netflix", "hbo"], n);
 
 # We should not be able to reject the null hypothesis `food ⫫ likeit | service`, because
 # the variables are all independent.
-test_cmi = independence(SurrogateTest(PMI(), est; nshuffles = 200, rng), food, likeit, service)
+test_cmi = independence(test, food, likeit, service)
 @test pvalue(test_cmi) > α
 
 # Simulate a survey where the place a person grew up controls how many times they
@@ -60,20 +65,29 @@ end;
 # We should not be able to reject the null hypothesis `places ⫫ experience | preferred_equipment`, because
 # places → preferred_equipment → experience, so when conditioning on the intermediate variable,
 # the first and last variable in the chain should be independent.
-test = SurrogateTest(PMI(), est; nshuffles = 200, rng)
-test_cmi = independence(test, places, experience, preferred_equipment)
+test_pmi = independence(test, places, experience, preferred_equipment)
 @test pvalue(test_cmi) > α
 
 
-nshuffles = 5
-surrtest_sp = SurrogateTest(PMI(), SymbolicPermutation(); nshuffles, rng)
-surrtest_vh = SurrogateTest(PMI(), ValueHistogram(4); nshuffles, rng)
-surrtest_dp = SurrogateTest(PMI(), Dispersion(); nshuffles, rng)
+# Numeric tests
+x, y, z = rand(rng, n), rand(rng, n), rand(rng, n)
+X, Y, Z = StateSpaceSet(x), StateSpaceSet(y), StateSpaceSet(z)
+nshuffles = 19
+d_ord = CodifyVariables(OrdinalPatterns())
+d_disp = CodifyVariables(Dispersion())
+d_bin = CodifyVariables(ValueBinning(4))
+est_ord = JointProbabilities(PMI(), d_ord)
+est_disp = JointProbabilities(PMI(), d_disp)
+est_bin = JointProbabilities(PMI(), d_bin)
+
+surrtest_ord = SurrogateAssociationTest(est_ord; nshuffles, rng)
+surrtest_disp = SurrogateAssociationTest(est_disp; nshuffles, rng)
+surrtest_bin = SurrogateAssociationTest(est_bin; nshuffles, rng)
 
-@test independence(surrtest_sp, x, y, z) isa SurrogateTestResult
-@test independence(surrtest_vh, x, y, z) isa SurrogateTestResult
-@test independence(surrtest_dp, x, y, z) isa SurrogateTestResult
+@test independence(surrtest_ord, x, y, z) isa SurrogateAssociationTestResult
+@test independence(surrtest_disp, x, y, z) isa SurrogateAssociationTestResult
+@test independence(surrtest_bin, x, y, z) isa SurrogateAssociationTestResult
 
-@test independence(surrtest_sp, X, Y, Z) isa SurrogateTestResult
-@test independence(surrtest_vh, X, Y, Z) isa SurrogateTestResult
-@test independence(surrtest_dp, X, Y, Z) isa SurrogateTestResult
+@test independence(surrtest_ord, X, Y, Z) isa SurrogateAssociationTestResult
+@test independence(surrtest_disp, X, Y, Z) isa SurrogateAssociationTestResult
+@test independence(surrtest_bin, X, Y, Z) isa SurrogateAssociationTestResult
diff --git a/test/independence/SurrogateTest/ConditionalMutualInformation.jl b/test/independence/SurrogateTest/ConditionalMutualInformation.jl
deleted file mode 100644
index b9983ce25..000000000
--- a/test/independence/SurrogateTest/ConditionalMutualInformation.jl
+++ /dev/null
@@ -1,65 +0,0 @@
-using Random
-rng = MersenneTwister(1234)
-n = 200
-
-# Pre-discretized data
-likeit = rand(rng, ["yes", "no"], n)
-food = rand(rng, ["veggies", "meat", "fish"], n)
-service = rand(rng, ["netflix", "hbo"], n)
-est = Contingency()
-nshuffles = 3
-
-@test_throws ArgumentError SurrogateTest(CMIShannon())
-
-@test independence(SurrogateTest(CMIShannon(), est; nshuffles, rng), food, likeit, service) isa SurrogateTestResult
-@test independence(SurrogateTest(CMIRenyiSarbu(), est; nshuffles, rng), food, likeit, service) isa SurrogateTestResult
-@test independence(SurrogateTest(CMIRenyiJizba(), est; nshuffles, rng), food, likeit, service) isa SurrogateTestResult
-
-
-# Analytical tests, in the limit.
-# -------------------------------
-n = 10000
-# Pre-discretized data
-likeit = rand(rng, ["yes", "no"], n)
-food = rand(rng, ["veggies", "meat", "fish"], n)
-service = rand(rng, ["netflix", "hbo"], n)
-
-α = 0.01 # pick some arbitrary significance level
-
-# We should not be able to reject the null hypothesis `food ⫫ likeit | service`, because
-# the variables are all independent.
-test_cmi = independence(SurrogateTest(CMIShannon(), est; nshuffles = 200, rng), food, likeit, service)
-@test pvalue(test_cmi) > α
-
-# Simulate a survey where the place a person grew up controls how many times they
-# fell while going skiing. The control happens through an intermediate variable
-# `preferred_equipment`, which indicates what type of physical activity the
-# person has engaged with. For this example, we should be able to reject
-# places ⫫ experience, but not reject places ⫫ experience | preferred_equipment
-
-places = rand(rng, ["city", "countryside", "under a rock"], n);
-preferred_equipment = map(places) do place
-    if cmp(place, "city") == 1
-        return rand(rng, ["skateboard", "bmx bike"])
-    elseif cmp(place, "countryside") == 1
-        return rand(rng, ["sled", "snowcarpet"])
-    else
-        return rand(rng, ["private jet", "car"])
-    end
-end;
-experience = map(preferred_equipment) do equipment
-    if equipment ∈ ["skateboard", "bmx bike"]
-        return "didn't fall"
-    elseif equipment ∈ ["sled", "snowcarpet"]
-        return "fell 3 times or less"
-    else
-        return "fell uncontably many times"
-    end
-end;
-
-# We should not be able to reject the null hypothesis `places ⫫ experience | preferred_equipment`, because
-# places → preferred_equipment → experience, so when conditioning on the intermediate variable,
-# the first and last variable in the chain should be independent.
-test = SurrogateTest(CMIShannon(), est; nshuffles = 200, rng)
-test_cmi = independence(test, places, experience, preferred_equipment)
-@test pvalue(test_cmi) > α
diff --git a/test/independence/SurrogateTest/TransferEntropyPairwise.jl b/test/independence/SurrogateTest/TransferEntropyPairwise.jl
deleted file mode 100644
index 66674bc3d..000000000
--- a/test/independence/SurrogateTest/TransferEntropyPairwise.jl
+++ /dev/null
@@ -1,23 +0,0 @@
-using Random
-rng = Random.MersenneTwister(1234)
-sys = system(Logistic2Unidir(; c_xy = 0.5, rng))
-x, y = columns(first(trajectory(sys, 1000, Ttr = 10000)))
-
-# ArgumentError thrown if an estimator isn't provided.
-@test_throws ArgumentError SurrogateTest(TEShannon())
-@test SurrogateTest(TEShannon(), FPVP()) isa SurrogateTest
-
-α = 0.0287 # Arbitrary significance level 1 - α = 0.9713
-test = SurrogateTest(TEShannon(), FPVP(); rng)
-
-# The ground truth is X → Y, so we should be able to reject the null
-# when testing transferentropy(x → y)
-@test pvalue(independence(test, x, y)) < α
-
-# The ground truth is X → Y, so we shouldn't be able to reject the null
-# when testing transferentropy(y → x)
-@test pvalue(independence(test, y, x)) > α
-
-x, y = columns(first(trajectory(sys, 100, Ttr = 1000)))
-@test independence(SurrogateTest(TEShannon(), Lindner()), x, y) isa SurrogateTestResult
-@test independence(SurrogateTest(TEShannon(), Zhu1()), x, y) isa SurrogateTestResult
diff --git a/test/independence/SurrogateTest/crossmappings.jl b/test/independence/SurrogateTest/crossmappings.jl
deleted file mode 100644
index ec3c9b088..000000000
--- a/test/independence/SurrogateTest/crossmappings.jl
+++ /dev/null
@@ -1,29 +0,0 @@
-using Random
-rng = MersenneTwister(1234)
-x, y = rand(rng, 500), rand(rng, 500)
-z = x.+ y
-
-d = 2
-τ = -1
-
-# Regular variant.
-test_ccm = SurrogateTest(CCM(; d, τ), RandomVectors(libsizes = 300; replace = true, rng))
-test_pai = SurrogateTest(PAI(; d, τ), RandomVectors(libsizes = 300; replace = true, rng))
-@test_throws ArgumentError SurrogateTest(Ensemble(CCM(), RandomVectors(libsizes = 100:100:300)))
-@test_throws ArgumentError SurrogateTest(CCM(), RandomVectors(libsizes = 100:100:300))
-
-α = 0.03 # arbitrarily set confidence level to 1 - α
-@test pvalue(independence(test_ccm, x, y)) > α
-@test pvalue(independence(test_ccm, x, z)) < α
-@test pvalue(independence(test_ccm, x, z)) < α
-@test pvalue(independence(test_pai, x, y)) > α
-
-# Ensemble variant.
-eccm = Ensemble(CCM(; d, τ), RandomVectors(libsizes = 100; replace = true, rng))
-epai = Ensemble(PAI(; d, τ), RandomVectors(libsizes = 100; replace = true, rng))
-test_ccm = SurrogateTest(eccm)
-test_pai = SurrogateTest(epai)
-@test pvalue(independence(test_ccm, x, y)) > α
-@test pvalue(independence(test_ccm, x, z)) < α
-@test pvalue(independence(test_ccm, x, z)) < α
-@test pvalue(independence(test_pai, x, y)) > α
diff --git a/test/independence/independence.jl b/test/independence/independence.jl
index b74d30248..2bafe60a7 100644
--- a/test/independence/independence.jl
+++ b/test/independence/independence.jl
@@ -1,5 +1,5 @@
 include("LocalPermutationTest/local_permutation_test.jl")
-include("SurrogateTest/SurrogateTest.jl")
+include("SurrogateAssociationTest/SurrogateAssociationTest.jl")
 include("JointDistanceDistributionTest.jl")
-include("PATest.jl")
+#include("PATest.jl")
 include("CorrTest.jl")
diff --git a/test/integrations/test_uncertaindata_integration.jl b/test/integrations/test_uncertaindata_integration.jl
deleted file mode 100644
index 2db11240e..000000000
--- a/test/integrations/test_uncertaindata_integration.jl
+++ /dev/null
@@ -1,20 +0,0 @@
-# The uncertainty handling framework in this file will be added
-# as part of a 1.X release. Can be ignored for now.
-
-using CausalityTools, UncertainData
-
-n = 20
-
-# Mean values for two time series x and y, and standard deviations for those values
-vals_x, stds_x = rand(n), rand(n) * 0.1
-vals_y, stds_y = rand(n), rand(n) * 0.1
-vals_z, stds_z = rand(n), rand(n) * 0.1
-
-# Represent values as normal distributions
-uvals_x = [UncertainValue(Normal, vals_x[i], stds_x[i]) for i = 1:n]
-uvals_y = [UncertainValue(Normal, vals_y[i], stds_y[i]) for i = 1:n]
-uvals_z = [UncertainValue(Normal, vals_z[i], stds_z[i]) for i = 1:n]
-
-X = UncertainValueStateSpaceSet(uvals_x)
-Y = UncertainValueStateSpaceSet(uvals_y)
-Z = UncertainValueStateSpaceSet(uvals_z)
diff --git a/test/methods/closeness/hmeasure.jl b/test/methods/closeness/hmeasure.jl
index 23b617ce0..426c75233 100644
--- a/test/methods/closeness/hmeasure.jl
+++ b/test/methods/closeness/hmeasure.jl
@@ -1,14 +1,18 @@
-x, y = rand(100), rand(100)
-X, Y = StateSpaceSet(rand(100, 3)), StateSpaceSet(rand(100, 2))
-Z, W = StateSpaceSet(rand(110, 2)), StateSpaceSet(rand(90, 4))
+x, y = rand(200), rand(200)
+X, Y = StateSpaceSet(rand(200, 3)), StateSpaceSet(rand(200, 2))
+Z, W = StateSpaceSet(rand(210, 2)), StateSpaceSet(rand(190, 4))
 dx, τx = 2, 1
 dy, τy = 2, 1
 
-# V2.X
-@test h_measure(HMeasure(), x, y) isa Float64
-@test h_measure(HMeasure(dx = dx, τx = τx), x, Y) isa Float64
-@test h_measure(HMeasure(dy = dy, τy = τy), X, y) isa Float64
-@test h_measure(HMeasure(), X, Y) isa Float64
+@test HMeasure() isa ClosenessMeasure
+@test association(HMeasure(), x, y) isa Float64
+@test association(HMeasure(dx = dx, τx = τx), x, Y) isa Float64
+@test association(HMeasure(dy = dy, τy = τy), X, y) isa Float64
+@test association(HMeasure(), X, Y) isa Float64
 # test that multivariate StateSpaceSets are being length-matched
-@test h_measure(HMeasure(), X, Z) isa Float64
-@test h_measure(HMeasure(), W, X) isa Float64
+@test association(HMeasure(), X, Z) isa Float64
+@test association(HMeasure(), W, X) isa Float64
+
+# Deprecations
+@test_logs (:warn, "Convenience function `h_measure` is deprecated. Use `association(HMeasure(; kwargs...), source, target) instead.") h_measure(HMeasure(), x, y)
+@test_logs (:warn, "Convenience function `h_measure` is deprecated. Use `h_measure(HMeasure(; kwargs...), source, target)` instead.") h_measure(x, y)
\ No newline at end of file
diff --git a/test/methods/closeness/joint_distance_distribution.jl b/test/methods/closeness/joint_distance_distribution.jl
index 5063cc66f..62089e7d3 100644
--- a/test/methods/closeness/joint_distance_distribution.jl
+++ b/test/methods/closeness/joint_distance_distribution.jl
@@ -6,3 +6,11 @@ x, y = rand(rng, 1000), rand(rng, 1000)
 
 @test jdd(x, y) isa Vector
 @test jdd(OneSampleTTest, x, y) isa OneSampleTTest
+
+
+# v2.X and upwards
+@test association(JointDistanceDistribution(), x, y) isa Vector
+
+@test_logs (:warn, "Convenience function `jdd` is deprecated. Use `association(JointDistanceDistribution(; kwargs...), x, y)` instead.") jdd(x, y)
+@test_logs (:warn, "Convenience function `jdd` is deprecated. Use `association(JointDistanceDistribution(; kwargs...), x, y)` instead.") jdd(JointDistanceDistribution(), x, y)
+@test_logs (:warn, "jdd(::OneSampleTTest, x, y; kwargs...) is deprecated. Instead, do `measure = JointDistanceDistribution(); independence(JointDistanceDistributionTest(measure), x, y)`.") jdd(OneSampleTTest, x, y)
\ No newline at end of file
diff --git a/test/methods/closeness/lmeasure.jl b/test/methods/closeness/lmeasure.jl
new file mode 100644
index 000000000..3e50f6d2e
--- /dev/null
+++ b/test/methods/closeness/lmeasure.jl
@@ -0,0 +1,18 @@
+x, y = rand(200), rand(200)
+X, Y = StateSpaceSet(rand(200, 3)), StateSpaceSet(rand(200, 2))
+Z, W = StateSpaceSet(rand(210, 2)), StateSpaceSet(rand(190, 4))
+dx, τx = 2, 1
+dy, τy = 2, 1
+
+@test LMeasure() isa ClosenessMeasure
+@test association(LMeasure(), x, y) isa Float64
+@test association(LMeasure(dx = dx, τx = τx), x, Y) isa Float64
+@test association(LMeasure(dy = dy, τy = τy), X, y) isa Float64
+@test association(LMeasure(), X, Y) isa Float64
+# test that multivariate StateSpaceSets are being length-matched
+@test association(LMeasure(), X, Z) isa Float64
+@test association(LMeasure(), W, X) isa Float64
+
+# Deprecations
+@test_logs (:warn, "Convenience function `l_measure` is deprecated. Use `association(LMeasure(; kwargs...), source, target) instead.") l_measure(LMeasure(), x, y)
+@test_logs (:warn, "Convenience function `l_measure` is deprecated. Use `l_measure(LMeasure(; kwargs...), source, target)` instead.") l_measure(x, y)
\ No newline at end of file
diff --git a/test/methods/closeness/mmeasure.jl b/test/methods/closeness/mmeasure.jl
index 40e8b3294..72f0b040c 100644
--- a/test/methods/closeness/mmeasure.jl
+++ b/test/methods/closeness/mmeasure.jl
@@ -1,14 +1,18 @@
-x, y = rand(100), rand(100)
-X, Y = StateSpaceSet(rand(100, 3)), StateSpaceSet(rand(100, 2))
-Z, W = StateSpaceSet(rand(110, 2)), StateSpaceSet(rand(90, 4))
+x, y = rand(200), rand(200)
+X, Y = StateSpaceSet(rand(200, 3)), StateSpaceSet(rand(200, 2))
+Z, W = StateSpaceSet(rand(210, 2)), StateSpaceSet(rand(190, 4))
 dx, τx = 2, 1
 dy, τy = 2, 1
 
-# V2.X
-@test m_measure(MMeasure(), x, y) isa Float64
-@test m_measure(MMeasure(dx = dx, τx = τx), x, Y) isa Float64
-@test m_measure(MMeasure(dy = dy, τy = τy), X, y) isa Float64
-@test m_measure(MMeasure(), X, Y) isa Float64
+@test MMeasure() isa ClosenessMeasure
+@test association(MMeasure(), x, y) isa Float64
+@test association(MMeasure(dx = dx, τx = τx), x, Y) isa Float64
+@test association(MMeasure(dy = dy, τy = τy), X, y) isa Float64
+@test association(MMeasure(), X, Y) isa Float64
 # test that multivariate StateSpaceSets are being length-matched
-@test m_measure(MMeasure(), X, Z) isa Float64
-@test m_measure(MMeasure(), W, X) isa Float64
+@test association(MMeasure(), X, Z) isa Float64
+@test association(MMeasure(), W, X) isa Float64
+
+# Deprecations
+@test_logs (:warn, "Convenience function `m_measure` is deprecated. Use `association(MMeasure(; kwargs...), source, target) instead.") m_measure(MMeasure(), x, y)
+@test_logs (:warn, "Convenience function `m_measure` is deprecated. Use `m_measure(MMeasure(; kwargs...), source, target)` instead.") m_measure(x, y)
\ No newline at end of file
diff --git a/test/methods/closeness/smeasure.jl b/test/methods/closeness/smeasure.jl
index ee3a696e1..54dbfa8d2 100644
--- a/test/methods/closeness/smeasure.jl
+++ b/test/methods/closeness/smeasure.jl
@@ -1,23 +1,18 @@
-x, y = rand(100), rand(100)
-X, Y = StateSpaceSet(rand(100, 3)), StateSpaceSet(rand(100, 2))
-Z, W = StateSpaceSet(rand(110, 2)), StateSpaceSet(rand(90, 4))
+x, y = rand(200), rand(200)
+X, Y = StateSpaceSet(rand(200, 3)), StateSpaceSet(rand(200, 2))
+Z, W = StateSpaceSet(rand(210, 2)), StateSpaceSet(rand(190, 4))
 dx, τx = 2, 1
 dy, τy = 2, 1
 
-# Compat
-@test s_measure(x, y) isa Float64
-@test s_measure(x, Y, dx = dx, τx = τx) isa Float64
-@test s_measure(X, y, dy = dy, τy = τy) isa Float64
-@test s_measure(X, Y) isa Float64
+@test SMeasure() isa ClosenessMeasure
+@test association(SMeasure(), x, y) isa Float64
+@test association(SMeasure(dx = dx, τx = τx), x, Y) isa Float64
+@test association(SMeasure(dy = dy, τy = τy), X, y) isa Float64
+@test association(SMeasure(), X, Y) isa Float64
 # test that multivariate StateSpaceSets are being length-matched
-@test s_measure(X, Z) isa Float64
-@test s_measure(W, X) isa Float64
+@test association(SMeasure(), X, Z) isa Float64
+@test association(SMeasure(), W, X) isa Float64
 
-# V2.X
-@test s_measure(SMeasure(), x, y) isa Float64
-@test s_measure(SMeasure(dx = dx, τx = τx), x, Y) isa Float64
-@test s_measure(SMeasure(dy = dy, τy = τy), X, y) isa Float64
-@test s_measure(SMeasure(), X, Y) isa Float64
-# test that multivariate StateSpaceSets are being length-matched
-@test s_measure(SMeasure(), X, Z) isa Float64
-@test s_measure(SMeasure(), W, X) isa Float64
+# Deprecations
+@test_logs (:warn, "Convenience function `s_measure` is deprecated. Use `association(SMeasure(; kwargs...), x, y)` instead.") s_measure(SMeasure(), x, y)
+@test_logs (:warn, "Convenience function `s_measure` is deprecated. Use `association(SMeasure(; kwargs...), x, y)` instead.") s_measure(x, y)
\ No newline at end of file
diff --git a/test/methods/correlation/distance_correlation.jl b/test/methods/correlation/distance_correlation.jl
index 3726645b2..409830538 100644
--- a/test/methods/correlation/distance_correlation.jl
+++ b/test/methods/correlation/distance_correlation.jl
@@ -7,12 +7,12 @@ a = StateSpaceSet(repeat([1], 100))
 @test CausalityTools.distance_variance(a) == 0.0
 
 v = rand(1000, 3); w = 0.5 .* v .+ 1.2;
-@test distance_correlation(v, w) ≈ 1.0
+@test association(DistanceCorrelation(), v, w) ≈ 1.0
 # Comparison with `energy` R package, which is by the authors of the original paper
 x = -1.0:0.1:1.0 |> collect
 y = map(xᵢ -> xᵢ^3 - 2xᵢ^2 - 3, x)
 z = map(yᵢ -> yᵢ^2 - 2yᵢ, y)
-dcov = distance_correlation(x, y)
+dcov = association(DistanceCorrelation(), x, y)
 @test round(dcov, digits = 3) == 0.673
 
 # ----------------------------
@@ -25,5 +25,12 @@ M = reshape([0.0, 0.2, 0.3, 0.2, 0.0, 0.6, 0.3, 0.6, 0.3], 3, 3)
     0.15 0.0 -0.15;
     -0.15 -0.15 0.0]
 
-@test round(distance_correlation(x, z, y), digits = 5) ≈ round(0.1556139, digits = 5)
+@test round(association(DistanceCorrelation(), x, z, y), digits = 5) ≈ round(0.1556139, digits = 5)
 @test round(CausalityTools.distance_covariance(x, z, y), digits = 5) ≈ round(0.02379782, digits = 5)
+
+# Deprecations
+@test_logs (:warn, "Convenience function `distance_correlation` is deprecated. Use `association(DistanceCorrelation(), x, y)` instead.") distance_correlation(x, y)
+@test_logs (:warn, "Convenience function `distance_correlation` is deprecated. Use `association(DistanceCorrelation(), x, y, z)` instead.") distance_correlation(x, y, z)
+
+@test CausalityTools.min_inputs_vars(DistanceCorrelation()) == 2
+@test CausalityTools.max_inputs_vars(DistanceCorrelation()) == 3
\ No newline at end of file
diff --git a/test/methods/correlation/partial_correlation.jl b/test/methods/correlation/partial_correlation.jl
index 26ec70121..93f84c97d 100644
--- a/test/methods/correlation/partial_correlation.jl
+++ b/test/methods/correlation/partial_correlation.jl
@@ -1,7 +1,12 @@
 using Test
-using StatsBase
+using StatsBase: partialcor
 x = rand(100)
 y = rand(100)
 z = rand(100, 2)
 
-@test partial_correlation(x, y, z) ≈ StatsBase.partialcor(x, y, z)
+@test association(PartialCorrelation(), x, y, z) ≈ partialcor(x, y, z)
+
+@test_logs (:warn, "Convenience function `partial_correlation` is deprecated. Use `association(PartialCorrelation(), x, y, z)` instead.") partial_correlation(x, y, z)
+
+@test CausalityTools.min_inputs_vars(PartialCorrelation()) == 3
+@test CausalityTools.max_inputs_vars(PartialCorrelation()) == Inf
\ No newline at end of file
diff --git a/test/methods/correlation/pearson_correlation.jl b/test/methods/correlation/pearson_correlation.jl
index 8c1d32b88..436e8333e 100644
--- a/test/methods/correlation/pearson_correlation.jl
+++ b/test/methods/correlation/pearson_correlation.jl
@@ -1,6 +1,13 @@
 using Test
 using Statistics
+using StateSpaceSets
+
 x = rand(100)
 y = rand(100)
+X, Y = StateSpaceSet(x), StateSpaceSet(y)
+
+@test association(PearsonCorrelation(), x, y) ≈ Statistics.cor(x, y)
+@test association(PearsonCorrelation(), X, Y) ≈ Statistics.cor(x, y)
 
-@test pearson_correlation(x, y) ≈ Statistics.cor(x, y)
+# Deprecations
+@test_logs (:warn, "Convenience function `pearson_correlation` is deprecated. Use `association(PearsonCorrelation(; kwargs...), source, target)` instead.") pearson_correlation(x, y)
diff --git a/test/methods/cross_mappings/ccm_like.jl b/test/methods/cross_mappings/ccm_like.jl
index c3533c5b0..ee984e578 100644
--- a/test/methods/cross_mappings/ccm_like.jl
+++ b/test/methods/cross_mappings/ccm_like.jl
@@ -1,87 +1,122 @@
 using Test
 using CausalityTools
-using StateSpaceSets: StateSpaceSet
-n = 1000
-x, y, z, w = rand(n), rand(n), StateSpaceSet(rand(n, 3)), StateSpaceSet(rand(n + 1, 3))
-
-τ = -1
-
-# Deprecated
-@test crossmap(x, y, 3, τ) isa Float64
-@test crossmap(x, y, 3, τ , :random) isa Vector{Float64}
-@test crossmap(x, y, 3, τ, :segment) isa Vector{Float64}
-
+using StateSpaceSets
+using Random
+rng = Xoshiro(1234)
 # V2.x
 @testset "ConvergentCrossMapping" begin
+    n = 600
+    x, y, z, w = rand(rng, n), rand(rng, n), StateSpaceSet(rand(rng, n, 3)), StateSpaceSet(rand(rng, n + 1, 3))
+    τ = -1
+    def = CCM(; τ)
     @test ConvergentCrossMapping() isa ConvergentCrossMapping
     @test ConvergentCrossMapping() isa CrossmapMeasure
     @test CCM() isa ConvergentCrossMapping
-    @test crossmap(CCM(; τ), ExpandingSegment(libsizes = 100), x, y) isa Real
-    @test crossmap(CCM(; τ), RandomSegment(libsizes = 100), x, y) isa Real
-    @test crossmap(CCM(; τ), RandomVectors(libsizes = 100, replace = false), x, y) isa Real
-    @test crossmap(CCM(; τ), RandomVectors(libsizes = 100, replace = true), x, y) isa Real
-    @test crossmap(CCM(; τ), ExpandingSegment(libsizes = 100:100:500), x, y) isa Vector{<:Real}
-    @test crossmap(CCM(; τ), RandomSegment(libsizes = 100:100:500), x, y) isa Vector{<:Real}
-    @test crossmap(CCM(; τ), RandomVectors(libsizes = 100:100:500, replace = false), x, y) isa Vector{<:Real}
-    @test crossmap(CCM(; τ), RandomVectors(libsizes = 100:100:500, replace = true), x, y) isa Vector{<:Real}
-    @test_throws ArgumentError crossmap(CCM(; τ), RandomSegment(libsizes = 100), x, w) isa Real
-    @test_throws ArgumentError crossmap(CCM(; τ), RandomVectors(libsizes = 100), x, w) isa Real
+    @test crossmap(ExpandingSegment(def, libsizes = 100), x, y) isa Real
+    @test crossmap(RandomSegment(def, libsizes = 100), x, y) isa Real
+    @test crossmap(RandomVectors(def, libsizes = 100, replace = false), x, y) isa Real
+    @test crossmap(RandomVectors(def, libsizes = 100, replace = true), x, y) isa Real
+    @test crossmap(ExpandingSegment(def, libsizes = 100:100:500), x, y) isa Vector{<:Real}
+    @test crossmap(RandomSegment(def, libsizes = 100:100:500), x, y) isa Vector{<:Real}
+    @test crossmap(RandomVectors(def, libsizes = 100:100:500, replace = false), x, y) isa Vector{<:Real}
+    @test crossmap(RandomVectors(def, libsizes = 100:100:500, replace = true), x, y) isa Vector{<:Real}
+    @test_throws ArgumentError crossmap(RandomSegment(def, libsizes = 100), x, w) isa Real
+    @test_throws ArgumentError crossmap(RandomVectors(def, libsizes = 100), x, w) isa Real
 
     # Ensemble analysis
     libsizes = 50
-    e = Ensemble(CCM(), RandomVectors(; libsizes), nreps = 7)
+    e = Ensemble(RandomVectors(def; libsizes), nreps = 7)
     @test crossmap(e, x, y) isa Vector{<:Real}
     @test crossmap(e, x, y) |> length == 7
 
     libsizes = 20:10:40
-    e = Ensemble(CCM(), RandomVectors(; libsizes), nreps = 7)
+    e = Ensemble(RandomVectors(def; libsizes), nreps = 7)
     @test crossmap(e, x, y) isa Vector{Vector{T}} where T
     @test crossmap(e, x, y) |> length == length(libsizes)
     @test all(length.(crossmap(e, x, y)) .== 7)
+
+    @testset "Embed using CCM" begin
+        x, y = rand(rng, 100), rand(rng, 100)
+        # Embedding
+        d, colidx_target, colidxs_source = CausalityTools.embed(ConvergentCrossMapping(), x, y)
+        @test d isa AbstractStateSpaceSet
+        @test colidx_target isa Int
+        @test colidxs_source isa AbstractVector{Int}
+
+        # Segment length
+        @test CausalityTools.max_segmentlength(def, rand(10)) == 10 - 2 + 1
+        def = ConvergentCrossMapping(d = 2)
+
+        # Num of neighbors
+        def = ConvergentCrossMapping(d = 2)
+        @test CausalityTools.n_neighbors_simplex(def) == 3
+
+        # If using forward embedding, warn.
+        msg = """τ > 0. You're using future values of source to predict the target. Turn \
+        off this warning by setting `embed_warn = false` in the \
+        `PairwiseAsymmetricInference` constructor."""
+        @test_warn msg CausalityTools.embed(ConvergentCrossMapping(τ = 1), x, y)
+    end
 end
 
 @testset "PairwiseAsymmetricInference" begin
+    n = 600
     @test PairwiseAsymmetricInference() isa PairwiseAsymmetricInference
     @test PairwiseAsymmetricInference() isa CrossmapMeasure
     @test PAI() isa PairwiseAsymmetricInference
+    τ = -1
+    def = PAI(; τ)
+    x, y, z, w = rand(rng, n), rand(rng, n), StateSpaceSet(rand(rng, n, 3)), StateSpaceSet(rand(rng, n + 1, 3))
+    @test crossmap(ExpandingSegment(def; libsizes = 100), x, y) isa Real
+    @test crossmap(RandomSegment(def; libsizes = 100), x, y) isa Real
+    @test crossmap(RandomVectors(def; libsizes = 100, replace = false), x, y) isa Real
+    @test crossmap(RandomVectors(def; libsizes = 100, replace = true), x, y) isa Real
+    @test crossmap(ExpandingSegment(def; libsizes = 100:100:500), x, y) isa Vector{<:Real}
+    @test crossmap(RandomSegment(def; libsizes = 100:100:500), x, y) isa Vector{<:Real}
+    @test crossmap(RandomVectors(def; libsizes = 100:100:500, replace = false), x, y) isa Vector{<:Real}
+    @test crossmap(RandomVectors(def; libsizes = 100:100:500, replace = true), x, y) isa Vector{<:Real}
+    @test_throws ArgumentError crossmap(RandomSegment(def; libsizes = 100), x, w) isa Real
+    @test_throws ArgumentError crossmap(RandomVectors(def; libsizes = 100), x, w) isa Real
+
+    @testset "Embed using CCM" begin
+        x, y = rand(rng, 100), rand(rng, 100)
+        # Embedding
+        d, colidx_target, colidxs_source = CausalityTools.embed(def, x, y)
+        @test d isa AbstractStateSpaceSet
+        @test colidx_target isa Int
+        @test colidxs_source isa AbstractVector{Int}
+
+        # Segment length
+        @test CausalityTools.max_segmentlength(def, rand(10)) == 10 - 2 + 1
+        def = ConvergentCrossMapping(d = 2)
 
+        # Num of neighbors
+        def = ConvergentCrossMapping(d = 2)
+        @test CausalityTools.n_neighbors_simplex(def) == 3
 
-    @test crossmap(PAI(; τ), ExpandingSegment(libsizes = 100), x, y) isa Real
-    @test crossmap(PAI(; τ), RandomSegment(libsizes = 100), x, y) isa Real
-    @test crossmap(PAI(; τ), RandomVectors(libsizes = 100, replace = false), x, y) isa Real
-    @test crossmap(PAI(; τ), RandomVectors(libsizes = 100, replace = true), x, y) isa Real
-    @test crossmap(PAI(; τ), ExpandingSegment(libsizes = 100:100:500), x, y) isa Vector{<:Real}
-    @test crossmap(PAI(; τ), RandomSegment(libsizes = 100:100:500), x, y) isa Vector{<:Real}
-    @test crossmap(PAI(; τ), RandomVectors(libsizes = 100:100:500, replace = false), x, y) isa Vector{<:Real}
-    @test crossmap(PAI(; τ), RandomVectors(libsizes = 100:100:500, replace = true), x, y) isa Vector{<:Real}
-    @test_throws ArgumentError crossmap(PAI(; τ), RandomSegment(libsizes = 100), x, w) isa Real
-    @test_throws ArgumentError crossmap(PAI(; τ), RandomVectors(libsizes = 100), x, w) isa Real
+        # If using forward embedding, warn.
+        msg = """τ > 0. You're using future values of source to predict the target. Turn \
+        off this warning by setting `embed_warn = false` in the \
+        `PairwiseAsymmetricInference` constructor."""
+        @test_warn msg CausalityTools.embed(ConvergentCrossMapping(τ = 1), x, y)
+    end
 end
 
 @testset "Estimator specifics" begin
     x, y = rand(50), rand(50)
-    @test ExpandingSegment(libsizes = 100) isa CrossmapEstimator
-    @test RandomSegment(libsizes = 100) isa CrossmapEstimator
-    @test RandomVectors(libsizes = 100) isa CrossmapEstimator
-    @test Ensemble(CCM(), RandomVectors(libsizes = 100)) isa Ensemble
-    @test crossmap(PAI(; τ), RandomVectors(libsizes = 10), x, y) |> length == 1
-    @test crossmap(PAI(; τ), RandomVectors(libsizes = 10:10:20), x, y) |> length == 2
-    @test crossmap(PAI(; τ), RandomVectors(libsizes = [10, 20, 30]), x, y) |> length == 3
-    @test crossmap(PAI(; τ), RandomSegment(libsizes = 10), x, y) |> length == 1
-    @test crossmap(PAI(; τ), RandomSegment(libsizes = 10:10:20), x, y) |> length == 2
-    @test crossmap(PAI(; τ), RandomSegment(libsizes = [10, 20, 30]), x, y) |> length == 3
-    @test crossmap(PAI(; τ), ExpandingSegment(libsizes = 10), x, y) |> length == 1
-    @test crossmap(PAI(; τ), ExpandingSegment(libsizes = 10:10:20), x, y) |> length == 2
-    @test crossmap(PAI(; τ), ExpandingSegment(libsizes = [10, 20, 30]), x, y) |> length == 3
-end
-
-# TODO: remove for v2.0
-@testset "Compat" begin
-    x, y = rand(100), rand(100)
-    @test crossmap(x, y, 2, τ) isa Real
-    @test pai(x, y, 2, τ) isa Real
-    @test length(crossmap(x, y, 2, τ, :segment, nreps = 100)) == 100
-    @test length(pai(x, y, 2, τ, :segment, nreps = 100)) == 100
-    @test length(crossmap(x, y, 2, τ, :random, nreps = 100)) == 100
-    @test length(pai(x, y, 2, τ, :random, nreps = 100)) == 100
+    τ = -1
+    def = CCM(; τ)
+    @test ExpandingSegment(def; libsizes = 100) isa CrossmapEstimator
+    @test RandomSegment(def; libsizes = 100) isa CrossmapEstimator
+    @test RandomVectors(def; libsizes = 100) isa CrossmapEstimator
+    @test Ensemble(RandomVectors(def; libsizes = 100)) isa Ensemble
+    @test crossmap(RandomVectors(def; libsizes = 10), x, y) |> length == 1
+    @test crossmap(RandomVectors(def; libsizes = 10:10:20), x, y) |> length == 2
+    @test crossmap(RandomVectors(def; libsizes = [10, 20, 30]), x, y) |> length == 3
+    @test crossmap(RandomSegment(def; libsizes = 10), x, y) |> length == 1
+    @test crossmap(RandomSegment(def; libsizes = 10:10:20), x, y) |> length == 2
+    @test crossmap(RandomSegment(def; libsizes = [10, 20, 30]), x, y) |> length == 3
+    @test crossmap(ExpandingSegment(def; libsizes = 10), x, y) |> length == 1
+    @test crossmap(ExpandingSegment(def; libsizes = 10:10:20), x, y) |> length == 2
+    @test crossmap(ExpandingSegment(def; libsizes = [10, 20, 30]), x, y) |> length == 3
 end
diff --git a/test/methods/deprecations.jl b/test/methods/deprecations.jl
new file mode 100644
index 000000000..01c0c671a
--- /dev/null
+++ b/test/methods/deprecations.jl
@@ -0,0 +1,11 @@
+@test_throws ArgumentError ExpandingSegment(; libsizes = 10:10:50)
+@test_throws ArgumentError RandomVectors(; libsizes = 10:10:50)
+@test_throws ArgumentError RandomSegment(; libsizes = 10:10:50)
+est =  RandomSegment(CCM(); libsizes = 10:10:50)
+x, y = rand(100), rand(100)
+@test_throws ArgumentError crossmap(CCM(), est, x, y)
+
+x, y = rand(100), rand(100)
+τ = -2
+@test_throws ArgumentError crossmap(x, y, 2, τ) isa Real
+@test_throws ArgumentError pai(x, y, 2, τ) isa Real
diff --git a/test/methods/infomeasures/conditional_mutualinfo/CMIShannon.jl b/test/methods/infomeasures/conditional_mutualinfo/CMIShannon.jl
deleted file mode 100644
index 945088519..000000000
--- a/test/methods/infomeasures/conditional_mutualinfo/CMIShannon.jl
+++ /dev/null
@@ -1,119 +0,0 @@
-using CausalityTools
-using StateSpaceSets: StateSpaceSet
-
-probests = [
-    ValueHistogram(RectangularBinning(3))
-    #ValueHistogram(FixedRectangularBinning(0, 1, 3))
-    NaiveKernel(0.2) # probably shouldn't be used.
-]
-
-probests_for_timeseries = [
-    SymbolicPermutation(m = 3),
-    Dispersion(c = 3, m = 2)
-]
-
-k = 5
-diff_entropy_estimators = [
-    Kraskov(; k),
-    KozachenkoLeonenko(),
-    ZhuSingh(; k),
-    Zhu(; k),
-    LeonenkoProzantoSavani(; k),
-    Lord(; k = k*5),
-]
-
-diff_mi_estimators = [
-    GaussianMI(),
-    KSG1(; k),
-    KSG2(; k),
-    GaoKannanOhViswanath(; k),
-    GaoOhViswanath(; k),
-]
-
-
-x = StateSpaceSet(rand(1000, 2))
-y = StateSpaceSet(rand(1000, 1))
-z = StateSpaceSet(rand(1000, 1))
-
-@test FPVP() isa FPVP
-@test MesnerShalisi() isa MesnerShalizi
-@test MesnerShalizi() isa MesnerShalizi
-@test PoczosSchneiderCMI() isa PoczosSchneiderCMI
-@test Rahimzamani() isa Rahimzamani
-@test GaussianCMI() isa GaussianCMI
-@test condmutualinfo(FPVP(), x, y, z) isa Real
-@test condmutualinfo(MesnerShalizi(), x, y, z) isa Real
-@test condmutualinfo(PoczosSchneiderCMI(), x, y, z) isa Real
-@test condmutualinfo(Rahimzamani(), x, y, z) isa Real
-@test condmutualinfo(GaussianCMI(), x, y, z) isa Real
-
-@test_throws ArgumentError condmutualinfo(CMIShannon(), FPVP(), x, y)
-@test_throws ArgumentError condmutualinfo(CMIShannon(), FPVP(), x)
-
-@testset "CMIShannon" begin
-    @test m = CMIShannon(base = 2) isa CMIShannon
-
-    x = StateSpaceSet(rand(10000, 2))
-    y = StateSpaceSet(rand(10000, 1))
-    z = StateSpaceSet(rand(10000, 1))
-    w = StateSpaceSet(rand(10000, 1))
-
-    @testset "Defaults" begin
-        s, t, c = rand(100), rand(100), rand(100)
-        est_bin = ValueHistogram(RectangularBinning(3))
-        est_ksg = KSG1()
-
-        # binning estimator yields non-negative values
-        @test condmutualinfo(CMIShannon(base = 2), est_bin, s, t, c) >= 0.0
-        @test condmutualinfo(CMIShannon(base = 2), est_ksg, s, t, c) isa Real # not guaranteed to be >= 0
-    end
-
-    @testset "Definition: CMIDefinitionShannonH4" begin
-        @test CMIShannon() isa CMIShannon
-        # ----------------------------------------------------------------
-        # Dedicated estimators.
-        # ----------------------------------------------------------------
-        # Just test that each estimator is reasonably close to zero for data from a uniform
-        # distribution. This number varies wildly between estimators, so we're satisfied
-        # to test just that they don't blow up.
-        @testset "$(typeof(diff_mi_estimators[i]).name.name)" for i in eachindex(diff_mi_estimators)
-            est = diff_mi_estimators[i]
-            mi = condmutualinfo(CMIShannon(base = 2), est, x, y, z)
-            @test mi isa Real
-            @test -0.5 < mi < 0.1
-        end
-
-        # ----------------------------------------------------------------
-        # Probability-based estimators.
-        #
-        # We can't guarantee that the result is any particular value, because these are just
-        # plug-in estimators. Just check that pluggin in works.
-        # ----------------------------------------------------------------
-
-        # Estimators that accept StateSpaceSet inputs
-        @testset "$(typeof(probests[i]).name.name)" for i in eachindex(probests)
-            est = probests[i]
-            @test condmutualinfo(CMIShannon(base = 2), est, x, y, z) isa Real # default
-        end
-
-        # Estimators that only accept timeseries input
-        a, b, c = rand(10000), rand(10000), rand(10000)
-
-        @testset "$(typeof(probests_for_timeseries[i]).name)" for i in eachindex(probests_for_timeseries)
-            est = probests_for_timeseries[i]
-            cmi = CMIShannon(base = 2)
-            @test condmutualinfo(cmi, est, a, b, c) >= 0
-            @test condmutualinfo(cmi, est, x, y, z) >= 0
-        end
-
-        # ----------------------------------------------------------------
-        # Entropy-based estimators.
-        # ----------------------------------------------------------------
-        @testset "$(typeof(diff_entropy_estimators[i]).name.name)" for i in eachindex(diff_entropy_estimators)
-            est = diff_entropy_estimators[i]
-            mi = condmutualinfo(CMIShannon(base = 2), est, x, y, z)
-            @test mi isa Real
-            @test -0.5 < mi < 0.1
-        end
-    end
-end
diff --git a/test/methods/infomeasures/conditional_mutualinfo/cmi.jl b/test/methods/infomeasures/conditional_mutualinfo/cmi.jl
deleted file mode 100644
index 2092d6444..000000000
--- a/test/methods/infomeasures/conditional_mutualinfo/cmi.jl
+++ /dev/null
@@ -1,2 +0,0 @@
-include("CMIShannon.jl")
-include("contingency.jl")
diff --git a/test/methods/infomeasures/conditional_mutualinfo/contingency.jl b/test/methods/infomeasures/conditional_mutualinfo/contingency.jl
deleted file mode 100644
index 2c42f284d..000000000
--- a/test/methods/infomeasures/conditional_mutualinfo/contingency.jl
+++ /dev/null
@@ -1,19 +0,0 @@
-# Pre-discretized
-x = rand(["a", "b", "c"], 200)
-y = rand(["hello", "yoyo", "heyhey"], 200)
-z = rand(["a", "b"], 200)
-@test condmutualinfo(CMIShannon(), Contingency(), x, y, z) >= 0.0
-@test condmutualinfo(CMIShannon(), Contingency(), x, y, z) isa Real
-@test condmutualinfo(CMIRenyiJizba(), Contingency(), x, y, z) isa Real
-@test condmutualinfo(CMIRenyiSarbu(), Contingency(), x, y, z) isa Real
-
-# With discretization using a probabilities estimator
-a, b, c = rand(100), rand(100), rand(100)
-est = SymbolicPermutation(m = 2)
-s1 = condmutualinfo(CMIShannon(), Contingency(est), a, b, c)
-s2 = condmutualinfo(CMIShannon(), est, a, b, c)
-@test s1 >= 0.0
-@test s1 ≈ s2
-@test condmutualinfo(CMIShannon(), Contingency(est), a, b, c) isa Real
-@test condmutualinfo(CMIRenyiJizba(), Contingency(est), a, b, c) isa Real
-@test condmutualinfo(CMIRenyiSarbu(), Contingency(est), a, b, c) isa Real
diff --git a/test/methods/infomeasures/entropy_conditional/contingency_matrix.jl b/test/methods/infomeasures/entropy_conditional/contingency_matrix.jl
deleted file mode 100644
index 5e63320e6..000000000
--- a/test/methods/infomeasures/entropy_conditional/contingency_matrix.jl
+++ /dev/null
@@ -1,26 +0,0 @@
-# Analytical test from Cover & Thomas textbook.
-freqs_yx = [1//8 1//16 1//32 1//32;
-    1//16 1//8  1//32 1//32;
-    1//16 1//16 1//16 1//16;
-    1//4  0//1  0//1  0//1];
-
-freqs_xy = transpose(freqs_yx);
-probs_xy = freqs_xy ./ sum(freqs_xy)
-c_xy = ContingencyMatrix(probs_xy, freqs_xy)
-ce_x_given_y = entropy_conditional(CEShannon(), c_xy) |> Rational
-@test ce_x_given_y == 11//8
-
-probs_yx = freqs_yx ./ sum(freqs_yx);
-c_yx = ContingencyMatrix(probs_yx, freqs_yx);
-ce_y_given_x = entropy_conditional(CEShannon(), c_yx) |> Rational
-@test ce_y_given_x == 13//8
-
-# We don't have analytical tests for the other conditional entropies, so just test
-# that they successfully compute something.
-x = rand(["a", "b", "c"], 100)
-y = rand(["hello", "yoyo"], 100)
-c = contingency_matrix(x, y)
-
-@test entropy_conditional(CEShannon(), c) >= 0.0
-@test entropy_conditional(CETsallisAbe(), c) isa Real
-@test entropy_conditional(CETsallisFuruichi(), c) isa Real
diff --git a/test/methods/infomeasures/entropy_conditional/continuous.jl b/test/methods/infomeasures/entropy_conditional/continuous.jl
deleted file mode 100644
index 32bf5f677..000000000
--- a/test/methods/infomeasures/entropy_conditional/continuous.jl
+++ /dev/null
@@ -1,19 +0,0 @@
-k = 5
-ests = [
-    Kraskov(; k),
-    KozachenkoLeonenko(),
-    ZhuSingh(; k),
-    Zhu(; k),
-    Lord(; k = k*5),
-]
-
-# We don't have analytical tests for these estimators, so just test that
-# they compute something.
-x = rand(100)
-y = rand(100)
-@testset "$(typeof(ests[i]).name)" for i in eachindex(ests)
-    est = ests[i]
-    @test entropy_conditional(CEShannon(), est, x, y) isa Real
-    @test_throws ArgumentError  entropy_conditional(CETsallisAbe(), est, x, y)
-    @test_throws ArgumentError entropy_conditional(CETsallisFuruichi(), est, x, y)
-end
diff --git a/test/methods/infomeasures/entropy_conditional/discrete.jl b/test/methods/infomeasures/entropy_conditional/discrete.jl
deleted file mode 100644
index ff4ce2235..000000000
--- a/test/methods/infomeasures/entropy_conditional/discrete.jl
+++ /dev/null
@@ -1,17 +0,0 @@
-ests = [
-    ValueHistogram(RectangularBinning(3)),
-    ValueHistogram(FixedRectangularBinning(0:0.25:1.0)),
-    SymbolicPermutation(m = 3),
-    Dispersion(m = 2, c = 3)
-]
-
-# We don't have analytical tests for these estimators, so just test that
-# they compute something.
-x = rand(100)
-y = rand(100)
-@testset "$(typeof(ests[i]).name)" for i in eachindex(ests)
-    est = ests[i]
-    @test entropy_conditional(CEShannon(), est, x, y) >= 0
-    @test entropy_conditional(CETsallisAbe(), est, x, y) isa Real
-    @test_throws ArgumentError entropy_conditional(CETsallisFuruichi(), est, x, y)
-end
diff --git a/test/methods/infomeasures/entropy_conditional/entropy_conditional.jl b/test/methods/infomeasures/entropy_conditional/entropy_conditional.jl
deleted file mode 100644
index 2589d270c..000000000
--- a/test/methods/infomeasures/entropy_conditional/entropy_conditional.jl
+++ /dev/null
@@ -1,7 +0,0 @@
-@test CEShannon() isa CEShannon
-@test CETsallisAbe() isa CETsallisAbe
-@test CETsallisFuruichi() isa CETsallisFuruichi
-
-include("contingency_matrix.jl")
-include("discrete.jl")
-include("continuous.jl")
diff --git a/test/methods/infomeasures/infomeasures.jl b/test/methods/infomeasures/infomeasures.jl
deleted file mode 100644
index 3090ba05d..000000000
--- a/test/methods/infomeasures/infomeasures.jl
+++ /dev/null
@@ -1,7 +0,0 @@
-#testfile("divergence/divergence.jl")
-testfile("entropy_conditional/entropy_conditional.jl")
-testfile("mutualinfo/mutualinfo.jl")
-testfile("conditional_mutualinfo/cmi.jl")
-testfile("transferentropy.jl")
-testfile("pmi.jl")
-#testfile("predictive_asymmetry/predictive_asymmetry.jl")
diff --git a/test/methods/infomeasures/mutualinfo/contingency_matrix.jl b/test/methods/infomeasures/mutualinfo/contingency_matrix.jl
deleted file mode 100644
index f06219dd7..000000000
--- a/test/methods/infomeasures/mutualinfo/contingency_matrix.jl
+++ /dev/null
@@ -1,17 +0,0 @@
-# Pre-discretized
-x = rand(["a", "b", "c"], 200)
-y = rand(["hello", "yoyo", "heyhey"], 200)
-@test mutualinfo(MIShannon(), Contingency(), x, y) >= 0.0
-
-@test mutualinfo(MITsallisFuruichi(), Contingency(), x, y) isa Real
-@test mutualinfo(MITsallisMartin(), Contingency(), x, y) isa Real
-@test mutualinfo(MIRenyiJizba(), Contingency(), x, y) isa Real
-@test mutualinfo(MIRenyiSarbu(), Contingency(), x, y) isa Real
-
-# With discretization using a probabilities estimator
-z, w = rand(100), rand(100)
-est = SymbolicPermutation(m = 3)
-@test mutualinfo(MIShannon(), Contingency(est), z, w) >= 0.0
-@test mutualinfo(MITsallisFuruichi(), Contingency(est), z, w) isa Real
-@test mutualinfo(MITsallisMartin(), Contingency(est), z, w) isa Real
-@test mutualinfo(MIRenyiJizba(), Contingency(est), z, w) isa Real
diff --git a/test/methods/infomeasures/mutualinfo/continuous.jl b/test/methods/infomeasures/mutualinfo/continuous.jl
deleted file mode 100644
index 00a2e0296..000000000
--- a/test/methods/infomeasures/mutualinfo/continuous.jl
+++ /dev/null
@@ -1,21 +0,0 @@
-k = 5
-ests = [
-    Kraskov(; k),
-    KozachenkoLeonenko(),
-    ZhuSingh(; k),
-    Zhu(; k),
-    Lord(; k = k*5),
-]
-
-# We don't have analytical tests for these estimators, so just test that
-# they compute something.
-x = rand(100)
-y = rand(100)
-@testset "$(typeof(ests[i]).name)" for i in eachindex(ests)
-    est = ests[i]
-    @test mutualinfo(MIShannon(), est, x, y) isa Real
-    @test_throws ArgumentError mutualinfo(MITsallisFuruichi(), est, x, y) isa Real
-    @test_throws ArgumentError mutualinfo(MITsallisMartin(), est, x, y) isa Real
-    @test_throws ArgumentError mutualinfo(MIRenyiJizba(), est, x, y) isa Real
-    @test_throws ArgumentError mutualinfo(MIRenyiSarbu(), est, x, y) isa Real
-end
diff --git a/test/methods/infomeasures/mutualinfo/discrete.jl b/test/methods/infomeasures/mutualinfo/discrete.jl
deleted file mode 100644
index 20897ecda..000000000
--- a/test/methods/infomeasures/mutualinfo/discrete.jl
+++ /dev/null
@@ -1,19 +0,0 @@
-ests = [
-    ValueHistogram(RectangularBinning(3)),
-    ValueHistogram(FixedRectangularBinning(0:0.25:1.0)),
-    SymbolicPermutation(m = 3),
-    Dispersion(m = 2, c = 3)
-]
-
-# We don't have analytical tests for these estimators, so just test that
-# they compute something.
-x = rand(100)
-y = rand(100)
-@testset "$(typeof(ests[i]).name)" for i in eachindex(ests)
-    est = ests[i]
-    @test mutualinfo(MIShannon(), est, x, y) >= 0
-    @test mutualinfo(MITsallisFuruichi(), est, x, y) isa Real
-    @test mutualinfo(MITsallisMartin(), est, x, y) isa Real
-    @test mutualinfo(MIRenyiJizba(), est, x, y) isa Real
-    @test_throws ArgumentError mutualinfo(MIRenyiSarbu(), est, x, y) isa Real
-end
diff --git a/test/methods/infomeasures/mutualinfo/estimators.jl b/test/methods/infomeasures/mutualinfo/estimators.jl
deleted file mode 100644
index 642a924e2..000000000
--- a/test/methods/infomeasures/mutualinfo/estimators.jl
+++ /dev/null
@@ -1,147 +0,0 @@
-using Random
-import Random: seed!
-rng = MersenneTwister(1234)
-using Distributions: MvNormal
-using LinearAlgebra: det
-
-k = 5
-ests_mi = [
-    GaussianMI(normalize=true),
-    GaussianMI(normalize=false),
-    KSG1(; k),
-    KSG2(; k),
-    GaoKannanOhViswanath(; k),
-    GaoOhViswanath(; k),
-]
-
-ests_diffent = [
-    Kraskov(; k),
-    KozachenkoLeonenko(;),
-    Zhu(; k),
-    ZhuSingh(; k),
-    Gao(; k),
-    Goria(; k),
-    Lord(; k)
-]
-
-
-x = StateSpaceSet(rand(rng, 1000, 1))
-y = StateSpaceSet(rand(rng, 1000, 1))
-@testset "MIShannon" begin
-    @test MIShannon(base = 2) isa MIShannon
-
-    # ----------------------------------------------------------------
-    # Dedicated estimators.
-    # ----------------------------------------------------------------
-    # Just test that each estimator is reasonably close to zero for data from a uniform
-    # distribution. This number varies wildly between estimators, so we're satisfied
-    # to test just that they don't blow up.
-    @testset "$(typeof(ests_mi[i]).name.name)" for i in eachindex(ests_mi)
-        est = ests_mi[i]
-        measure = MIShannon(base = 2)
-        mi = mutualinfo(measure, est, x, y)
-        @test mi isa Real
-        @test -0.5 < mi < 0.1
-
-
-
-        N = 5000
-        c = 0.9
-        Σ = [1 c; c 1]
-        N2 = MvNormal([0, 0], Σ)
-        mitrue = -0.5*log(det(Σ)) # in nats
-        D2 = StateSpaceSet([rand(rng, N2) for i = 1:N])
-        X = D2[:, 1] |> StateSpaceSet
-        Y = D2[:, 2] |> StateSpaceSet
-
-        mitrue_nats = -0.5*log(det(Σ))
-        mitrue_bits = CausalityTools._convert_logunit(mitrue_nats, ℯ, 2)
-        estimated_nats = mutualinfo(MIShannon(; base = ℯ), est, X, Y)
-        estimated_bits = mutualinfo(MIShannon(; base = 2), est, X, Y)
-        estimated_bits_kr = mutualinfo(MIShannon(; base = 2), Kraskov(), X, Y)
-    end
-
-    # ----------------------------------------------------------------
-    # `DifferentialEntropyEstimator`s`
-    # ----------------------------------------------------------------
-    @testset "$(typeof(ests_diffent[i]).name.name)" for i in eachindex(ests_diffent)
-        est = ests_diffent[i]
-        measure = MIShannon(base = 2)
-        mi = mutualinfo(measure, est, x, y)
-        @test mi isa Real
-        @test -0.5 < mi < 0.1
-
-
-
-        N = 5000
-        c = 0.9
-        Σ = [1 c; c 1]
-        N2 = MvNormal([0, 0], Σ)
-        mitrue = -0.5*log(det(Σ)) # in nats
-        D2 = StateSpaceSet([rand(rng, N2) for i = 1:N])
-        X = D2[:, 1] |> StateSpaceSet
-        Y = D2[:, 2] |> StateSpaceSet
-
-        mitrue_nats = -0.5*log(det(Σ))
-        mitrue_bits = CausalityTools._convert_logunit(mitrue_nats, ℯ, 2)
-        estimated_nats = mutualinfo(MIShannon(; base = ℯ), est, X, Y)
-        estimated_bits = mutualinfo(MIShannon(; base = 2), est, X, Y)
-        estimated_bits_kr = mutualinfo(MIShannon(; base = 2), Kraskov(), X, Y)
-    end
-
-end
-
-@testset "GaussianMI" begin
-    # The other estimator tests only compute whether the estimators run "at all".
-    # For some special cases of the Gaussian we can also compare with a closed form solution.
-
-    @testset "Normalized equals unnormalized" begin
-        x′ = StateSpaceSet(2. .* x.data .+ [SVector(1.)])
-        y′ = StateSpaceSet(3. .* y.data .- [SVector(1.)])
-        @test (  mutualinfo(GaussianMI(normalize=false), x , y)
-                 ≈ mutualinfo(GaussianMI(normalize=true) , x′, y′))
-    end
-
-    @testset "Compare with analytic eq" begin
-        # Test based on https://en.wikipedia.org/wiki/Mutual_information#Linear_correlation.
-        # We choose parameters arbitrarily:
-        @testset "normalize=false" begin
-            σ_1 = 1.0
-            σ_2 = 1.0
-            ρ = 0.5
-
-            μ = [0.; 0.]
-            Σ = [σ_1^2 ρ*σ_1*σ_2;
-                ρ*σ_1*σ_2 σ_2^2]
-
-            seed!(rng, 1)
-            xys = rand(rng, MvNormal(μ, Σ), 1_000_000)
-            # Notice that MIShannon.base is `2` by default, but math expects `ℯ`.
-            @test estimate(
-                MIShannon(; base=ℯ),
-                GaussianMI(normalize=false),
-                StateSpaceSet(xys[1:1, :]'),
-                StateSpaceSet(xys[2:2, :]')
-            ) ≈ -1/2 * log(1 - ρ^2)  atol=1e-3
-        end
-        @testset "normalize=true" begin
-            σ_1 = 0.5
-            σ_2 = 1.5
-            ρ = 0.5
-
-            μ = [1.5; 2.5]
-            Σ = [σ_1^2 ρ*σ_1*σ_2;
-                ρ*σ_1*σ_2 σ_2^2]
-
-            seed!(rng, 1)
-            xys = rand(rng, MvNormal(μ, Σ), 1_000_000)
-            # Notice that MIShannon.base is `2` by default, but math expects `ℯ`.
-            @test estimate(
-                MIShannon(; base=ℯ),
-                GaussianMI(normalize=true),
-                StateSpaceSet(xys[1:1, :]'),
-                StateSpaceSet(xys[2:2, :]')
-            ) ≈ -1/2 * log(1 - ρ^2)  atol=1e-3
-        end
-    end
-end
diff --git a/test/methods/infomeasures/mutualinfo/mutualinfo.jl b/test/methods/infomeasures/mutualinfo/mutualinfo.jl
deleted file mode 100644
index c0dac20e2..000000000
--- a/test/methods/infomeasures/mutualinfo/mutualinfo.jl
+++ /dev/null
@@ -1,4 +0,0 @@
-include("contingency_matrix.jl")
-include("discrete.jl")
-include("continuous.jl")
-include("estimators.jl")
diff --git a/test/methods/infomeasures/pmi.jl b/test/methods/infomeasures/pmi.jl
deleted file mode 100644
index 5eb76d897..000000000
--- a/test/methods/infomeasures/pmi.jl
+++ /dev/null
@@ -1,58 +0,0 @@
-using StableRNGs
-rng = StableRNG(123)
-
-ed = Dispersion(m = 2, c = 2)
-vh = ValueHistogram(2)
-sp = SymbolicPermutation(m=2)
-estd = Contingency(ed)
-esth = Contingency(vh)
-ests = Contingency(sp)
-
-sys = system(Logistic4Chain(xi = rand(rng, 4); rng))
-x, y, z, w = columns(first(trajectory(sys, 50, Ttr = 10000)))
-ZW = StateSpaceSet(z, w)
-@test pmi(estd, x, y, z) >= 0
-@test pmi(esth, x, y, z) >= 0
-@test pmi(ests, x, y, z) >= 0
-@test pmi(estd, x, y, z) >= 0
-@test pmi(ed, x, y, z) >= 0
-@test pmi(vh, x, y, z) >= 0
-@test pmi(sp, x, y, z) >= 0
-@test pmi(estd, x, y, ZW) >= 0
-@test pmi(esth, x, y, ZW) >= 0
-@test pmi(ests, x, y, ZW) >= 0
-@test pmi(estd, x, y, ZW) >= 0
-@test pmi(ed, x, y, ZW) >= 0
-@test pmi(vh, x, y, ZW) >= 0
-@test pmi(sp, x, y, ZW) >= 0
-
-
-sys = system(Logistic4Chain(xi = rand(rng, 4); rng))
-x, y, z, w = columns(first(trajectory(sys, 1000, Ttr = 10000)))
-@test estimate(PMI(), estd, x, w, z) >= 0
-# Test that multivariate marginals work too.
-@test estimate(PMI(), esth, x, w, Dataset(z, y)) >= 0
-
-@test pmi(PMI(base = 3), esth, x, w, z) >= 0
-@test pmi(SymbolicPermutation(m = 3), x, w, z) >= 0
-
-x = rand(rng, 1:3, 20000)
-y = rand(rng, 1:3, 20000)
-z = rand(rng, 1:3, 20000)
-@test round(pmi(CountOccurrences(), x, y, z), digits = 3) == 0
-
-# Independence tests
-x = rand(rng, 50)
-y = rand(rng, 50)
-z = rand(rng, 50)
-X = StateSpaceSet(x)
-Y = StateSpaceSet(y)
-Z = StateSpaceSet(z)
-
-nshuffles = 5
-lptest = LocalPermutationTest(PMI(), SymbolicPermutation(); nshuffles, rng)
-sutest = SurrogateTest(PMI(), SymbolicPermutation(); nshuffles, rng)
-@test independence(lptest, x, y, z) isa LocalPermutationTestResult
-@test independence(lptest, X, Y, Z) isa LocalPermutationTestResult
-@test independence(sutest, x, y, z) isa SurrogateTestResult
-@test independence(sutest, X, Y, Z) isa SurrogateTestResult
diff --git a/test/methods/infomeasures/transferentropy.jl b/test/methods/infomeasures/transferentropy.jl
deleted file mode 100644
index 144e0f8a2..000000000
--- a/test/methods/infomeasures/transferentropy.jl
+++ /dev/null
@@ -1,24 +0,0 @@
-x, y, z = rand(1000), rand(1000), rand(1000)
-
-# Transfer entropy is asymmetric.
-pest = SymbolicPermutation(m = 2)
-@test transferentropy(TEShannon(), pest, x, y) != transferentropy(TEShannon(), pest, y, x)
-
-est = Lindner( k = 5)
-@test transferentropy(est, x, y) isa Real
-
-est = Zhu1(k = 5)
-@test_throws DomainError Zhu1(k = 1)
-@test transferentropy(est, x, y) isa Real
-
-est = ZhuSingh(k = 5)
-@test transferentropy(est, x, y) isa Real
-
-@testset "Convenience" begin
-    est = SymbolicTransferEntropy()
-    @test transferentropy(est, x, y, z) >= 0.0
-    @test transferentropy(TERenyiJizba(), est, x, y, z) isa Real
-
-    est = Hilbert(ValueHistogram(4), target = Amplitude(), source = Phase())
-    @test transferentropy(est, x, y, z) >= 0.0
-end
diff --git a/test/methods/information/conditional_entropies/ce_shannon.jl b/test/methods/information/conditional_entropies/ce_shannon.jl
new file mode 100644
index 000000000..3f9790140
--- /dev/null
+++ b/test/methods/information/conditional_entropies/ce_shannon.jl
@@ -0,0 +1,31 @@
+using Test
+using CausalityTools
+using Random
+rng = Xoshiro(1234)
+
+# ---------------
+# Internals
+# ---------------
+def = ConditionalEntropyShannon()
+@test CausalityTools.min_inputs_vars(def) == 2
+@test CausalityTools.max_inputs_vars(def) == 2
+
+p_nonzeros = Probabilities([0.5 0.5; 0.1 0.1 ])
+p_zeros = Probabilities([0.5 0.0; 0.1 0.1])
+
+@test association(ConditionalEntropyShannon(), p_nonzeros) isa Real
+@test association(ConditionalEntropyShannon(), p_nonzeros) ≥ 0
+@test association(ConditionalEntropyShannon(), p_zeros) isa Real
+@test association(ConditionalEntropyShannon(), p_zeros) ≥ 0
+
+
+# ---------------------------------------------------------------------------------------
+# Test all possible ways of estimating `ConditionalEntropyShannon`.
+# ---------------------------------------------------------------------------------------
+# `JointProbabilities` with ` CodifyPoints`
+x, y, z = rand(rng, 1:5, 100), rand(rng, 1:5, 100), rand(rng, 1:3, 100)
+X = StateSpaceSet(x, z)
+Y = StateSpaceSet(y, z)
+disc = CodifyPoints(UniqueElementsEncoding(X), UniqueElementsEncoding(Y));
+est = JointProbabilities(ConditionalEntropyShannon(), disc);
+association(est, X, Y)
\ No newline at end of file
diff --git a/test/methods/information/conditional_entropies/ce_tsallis_abe.jl b/test/methods/information/conditional_entropies/ce_tsallis_abe.jl
new file mode 100644
index 000000000..7cc7e929e
--- /dev/null
+++ b/test/methods/information/conditional_entropies/ce_tsallis_abe.jl
@@ -0,0 +1,32 @@
+using Test
+using CausalityTools
+using Random
+
+
+# ---------------
+# Internals
+# ---------------
+def = ConditionalEntropyTsallisAbe()
+@test CausalityTools.min_inputs_vars(def) == 2
+@test CausalityTools.max_inputs_vars(def) == 2
+
+
+# ---------------------------------------------------------------------------------------
+# Test all possible ways of estimating `ConditionalEntropyTsallisAbe`.
+# ---------------------------------------------------------------------------------------
+# `JointProbabilities` with ` CodifyPoints`
+x, y, z = rand(rng, 1:5, 100), rand(rng, 1:5, 100), rand(rng, 1:3, 100)
+X = StateSpaceSet(x, z)
+Y = StateSpaceSet(y, z)
+disc = CodifyPoints(UniqueElementsEncoding(X), UniqueElementsEncoding(Y));
+est = JointProbabilities(ConditionalEntropyTsallisAbe(q = 0.5), disc);
+@test association(est, X, Y) isa Real
+
+est = JointProbabilities(ConditionalEntropyTsallisAbe(q = 1.5), disc);
+@test association(est, X, Y) isa Real
+
+
+est_t = JointProbabilities(ConditionalEntropyTsallisAbe(q = 1.0), disc);
+est_s = JointProbabilities(ConditionalEntropyShannon(), disc);
+
+@test association(est_t, X, Y) == association(est_s, X, Y)
\ No newline at end of file
diff --git a/test/methods/information/conditional_entropies/ce_tsallis_furuichi.jl b/test/methods/information/conditional_entropies/ce_tsallis_furuichi.jl
new file mode 100644
index 000000000..55f3b1766
--- /dev/null
+++ b/test/methods/information/conditional_entropies/ce_tsallis_furuichi.jl
@@ -0,0 +1,30 @@
+using Test
+using CausalityTools
+
+# ---------------
+# Internals
+# ---------------
+def = ConditionalEntropyTsallisFuruichi()
+@test CausalityTools.min_inputs_vars(def) == 2
+@test CausalityTools.max_inputs_vars(def) == 2
+
+
+# ---------------------------------------------------------------------------------------
+# Test all possible ways of estimating `ConditionalEntropyTsallisFuruichi`.
+# ---------------------------------------------------------------------------------------
+# `JointProbabilities` with ` CodifyPoints`
+x, y, z = rand(rng, 1:5, 100), rand(rng, 1:5, 100), rand(rng, 1:3, 100)
+X = StateSpaceSet(x, z)
+Y = StateSpaceSet(y, z)
+disc = CodifyPoints(UniqueElementsEncoding(X), UniqueElementsEncoding(Y));
+est = JointProbabilities(ConditionalEntropyTsallisFuruichi(q = 0.5), disc);
+@test association(est, X, Y) isa Real
+
+est = JointProbabilities(ConditionalEntropyTsallisFuruichi(q = 1.5), disc);
+@test association(est, X, Y) isa Real
+
+
+est_t = JointProbabilities(ConditionalEntropyTsallisFuruichi(q = 1.0), disc);
+est_s = JointProbabilities(ConditionalEntropyShannon(), disc);
+
+@test association(est_t, X, Y) == association(est_s, X, Y)
\ No newline at end of file
diff --git a/test/methods/information/conditional_entropies/conditional_entropies.jl b/test/methods/information/conditional_entropies/conditional_entropies.jl
new file mode 100644
index 000000000..5d17a9569
--- /dev/null
+++ b/test/methods/information/conditional_entropies/conditional_entropies.jl
@@ -0,0 +1,3 @@
+include("ce_shannon.jl")
+include("ce_tsallis_abe.jl")
+include("ce_tsallis_furuichi.jl")
\ No newline at end of file
diff --git a/test/methods/information/conditional_mutual_informations/api.jl b/test/methods/information/conditional_mutual_informations/api.jl
new file mode 100644
index 000000000..662f61b48
--- /dev/null
+++ b/test/methods/information/conditional_mutual_informations/api.jl
@@ -0,0 +1,23 @@
+
+# ----------------------------------------------------------------
+# Joint probabilities estimation
+# ----------------------------------------------------------------
+definitions = [CMIShannon(), CMIRenyiSarbu(), CMITsallisPapapetrou()]
+
+@testset "JointProbabilities with $(typeof(def).name.name)" for def in definitions
+    x, y, z = rand(rng, 100), rand(rng, 100), rand(rng, 100)
+    X, Y, Z = StateSpaceSet(rand(rng, 100, 2)), 
+        StateSpaceSet(rand(rng, 100, 2)), 
+        StateSpaceSet(rand(rng, 100, 2))
+    
+    est = JointProbabilities(def, ValueBinning(2), RelativeAmount())
+    # The estimation of probabilities is decoupled from the estimation of the mutual info.
+    # We could in principle use any probabilities estimator here, but we default to `RelativeAmount`.
+    @test association(est, x, Y, z) isa Real;
+    @test association(est, X, y, z) isa Real;
+    @test association(est, x, y, z) isa Real;
+    @test association(est, X, Y, Z) isa Real;
+end
+
+# Not defined for joint probabilities estimator
+defs = [CMIRenyiJizba()]
\ No newline at end of file
diff --git a/test/methods/information/conditional_mutual_informations/cmi_renyi_jizba.jl b/test/methods/information/conditional_mutual_informations/cmi_renyi_jizba.jl
new file mode 100644
index 000000000..5355fcdc6
--- /dev/null
+++ b/test/methods/information/conditional_mutual_informations/cmi_renyi_jizba.jl
@@ -0,0 +1,45 @@
+using CausalityTools
+using Test
+
+# ---------------
+# Input checks
+# ---------------
+def = CMIRenyiJizba()
+@test_throws ArgumentError EntropyDecomposition(def, LeonenkoProzantoSavani(Shannon()))
+@test_throws ArgumentError EntropyDecomposition(def, PlugIn(Shannon()), CodifyVariables(OrdinalPatterns(m=2)), RelativeAmount())
+
+# ---------------
+# Pretty printing
+# ---------------
+out_hdiff = repr(EntropyDecomposition(def, LeonenkoProzantoSavani(Renyi())))
+out_hdisc = repr(EntropyDecomposition(def, PlugIn(Renyi()), CodifyVariables(ValueBinning(2))))
+@test occursin("Iᵣⱼ(X, Y | Z) = Hᵣ(X,Z) + Hᵣ(Y,Z) - Hᵣ(X,Y,Z) - Hᵣ(Z)", out_hdisc)
+@test occursin("Iᵣⱼ(X, Y | Z) = hᵣ(X,Z) + hᵣ(Y,Z) - hᵣ(X,Y,Z) - hᵣ(Z)", out_hdiff)
+
+
+# ---------------------------------------------------------------------------------------
+# Test all possible ways of estimating `CMIRenyiJizba`.
+# ---------------------------------------------------------------------------------------
+x = randn(rng, 50)
+y = randn(rng, 50)
+z = randn(rng, 50)
+
+def = CMIRenyiJizba()
+est_diff = EntropyDecomposition(def, LeonenkoProzantoSavani(Renyi(), k=3))
+@test association(est_diff, x, z, y) isa Real
+
+d = CodifyVariables(ValueBinning(2))
+est_joint = JointProbabilities(def, d)
+@test  association(est_joint, x, y, z) isa Real
+
+est_disc = EntropyDecomposition(def, PlugIn(Renyi()), CodifyVariables(ValueBinning(2)));
+@test association(est_disc, x, z, y) isa Real
+
+
+# ---------------
+# Pretty printing
+# ---------------
+out_hdiff = repr(EntropyDecomposition(def, LeonenkoProzantoSavani(Renyi())))
+out_hdisc = repr(EntropyDecomposition(def, PlugIn(Renyi()), CodifyVariables(ValueBinning(2))))
+@test occursin("Iᵣⱼ(X, Y | Z) = Hᵣ(X,Z) + Hᵣ(Y,Z) - Hᵣ(X,Y,Z) - Hᵣ(Z)", out_hdisc)
+@test occursin("Iᵣⱼ(X, Y | Z) = hᵣ(X,Z) + hᵣ(Y,Z) - hᵣ(X,Y,Z) - hᵣ(Z)", out_hdiff)
\ No newline at end of file
diff --git a/test/methods/information/conditional_mutual_informations/cmi_renyi_poczos.jl b/test/methods/information/conditional_mutual_informations/cmi_renyi_poczos.jl
new file mode 100644
index 000000000..833d945c8
--- /dev/null
+++ b/test/methods/information/conditional_mutual_informations/cmi_renyi_poczos.jl
@@ -0,0 +1,27 @@
+
+using Test
+using CausalityTools
+using Random
+rng = MersenneTwister(1234)
+
+x = randn(rng, 50)
+y = randn(rng, 50)
+z = randn(rng, 50)
+
+
+# ---------------
+# Internals
+# ---------------
+def = CMIRenyiPoczos()
+@test CausalityTools.min_inputs_vars(def) == 3
+@test CausalityTools.max_inputs_vars(def) == 3
+
+# ---------------------------------------------------------------------------------------
+# Test all possible ways of estimating `CMIRenyiPoczos`.
+# ---------------------------------------------------------------------------------------
+def = CMIRenyiPoczos()
+@test association(PoczosSchneiderCMI(def, k = 2), x, y, z) isa Real
+
+data = [rand(rng, 50, 2) for i = 1:3]
+x, y, z = StateSpaceSet.(data)
+@test association(PoczosSchneiderCMI(def, k = 2), x, y, z) isa Real
diff --git a/test/methods/information/conditional_mutual_informations/cmi_renyi_sarbu.jl b/test/methods/information/conditional_mutual_informations/cmi_renyi_sarbu.jl
new file mode 100644
index 000000000..55c1cf940
--- /dev/null
+++ b/test/methods/information/conditional_mutual_informations/cmi_renyi_sarbu.jl
@@ -0,0 +1,31 @@
+using Test
+using CausalityTools
+
+def = CMIRenyiSarbu()
+@test CausalityTools.min_inputs_vars(def) == 3
+@test CausalityTools.max_inputs_vars(def) == 3
+
+# ---------------
+# Internals
+# ---------------
+def = CMIRenyiSarbu()
+@test CausalityTools.min_inputs_vars(def) == 3
+@test CausalityTools.max_inputs_vars(def) == 3
+
+# Double-sum estimation.
+x = rand(["a", "b", "c"], 50)
+y = rand(["hello", "yoyo", "heyhey"], 50)
+z = rand([1, 2, 5], 50)
+
+# The estimation of probabilities is decoupled from the estimation of the mutual info.
+# We could in principle use any probabilities estimator here, but we default to `RelativeAmount`.
+p = probabilities(x, y, z) 
+
+@test association(def, p) >= 0.0
+
+est_diff = EntropyDecomposition(def, Kraskov(k=3))
+@test_throws ArgumentError association(est_diff, x, z, y)
+
+d = CodifyVariables(OrdinalPatterns(m=3))
+est = JointProbabilities(def, d)
+@test association(est, x, y, z) isa Real
\ No newline at end of file
diff --git a/test/methods/information/conditional_mutual_informations/cmi_shannon.jl b/test/methods/information/conditional_mutual_informations/cmi_shannon.jl
new file mode 100644
index 000000000..531b151cf
--- /dev/null
+++ b/test/methods/information/conditional_mutual_informations/cmi_shannon.jl
@@ -0,0 +1,80 @@
+
+using Test
+using CausalityTools
+using Random
+rng = MersenneTwister(1234)
+
+# ---------------
+# Internals
+# ---------------
+def = CMIShannon()
+@test CausalityTools.min_inputs_vars(def) == 3
+@test CausalityTools.max_inputs_vars(def) == 3
+
+# ---------------
+# Input checks
+# ---------------
+def = CMIShannon()
+@test_throws ArgumentError EntropyDecomposition(def, LeonenkoProzantoSavani(Renyi()))
+@test_throws ArgumentError EntropyDecomposition(def, PlugIn(Renyi()), CodifyVariables(OrdinalPatterns(m=2)), RelativeAmount())
+
+# ---------------------------------------------------------------------------------------
+# Test all possible ways of estimating `CMIShannon`.
+# ---------------------------------------------------------------------------------------
+
+# ::::::::::::::::::::::::
+# PMF
+# ::::::::::::::::::::::::
+x = rand(["a", "b", "c"], 50)
+y = rand(["hello", "yoyo", "heyhey"], 50)
+z = rand([1, 2, 5], 50)
+est = JointProbabilities(def, UniqueElements())
+@test association(est, x, y, z) ≥ 0
+
+# ::::::::::::::::::::::::
+# Decomposition estimators
+# ::::::::::::::::::::::::
+x = randn(rng, 50)
+y = randn(rng, 50)
+z = randn(rng, 50)
+est_diff = EntropyDecomposition(def, Kraskov(k=3))
+@test association(est_diff, x, z, y) isa Real
+
+est_disc = EntropyDecomposition(def, PlugIn(Shannon()), CodifyVariables(ValueBinning(2)));
+@test association(est_disc, x, z, y) isa Real
+
+est_mi = MIDecomposition(def, KSG1())
+@test association(est_mi, x, z, y) isa Real
+
+
+# ::::::::::::::::::::::::
+# Dedicated estimators
+# ::::::::::::::::::::::::
+# On vector-valued inputs
+def = CMIShannon()
+x, y, z = rand(rng, 100), rand(rng, 100), rand(rng, 100);
+@test association(MesnerShalizi(def, k = 2), x, y, z) isa Real
+@test association(FPVP(def, k = 2), x, y, z) isa Real
+@test association(Rahimzamani(def, k = 2), x, y, z) isa Real
+@test association(GaussianCMI(def), x, y, z) isa Real
+
+# On `StateSpaceSet`s
+data = [rand(rng, 50, 2) for i = 1:3]
+x, y, z = StateSpaceSet.(data)
+def = CMIShannon()
+@test association(MesnerShalizi(def, k = 2), x, y, z) isa Real
+@test association(FPVP(def, k = 2), x, y, z) isa Real
+@test association(Rahimzamani(def, k = 2), x, y, z) isa Real
+@test association(GaussianCMI(def), x, y, z) isa Real
+
+
+# ---------------
+# Pretty printing
+# ---------------
+out_mi = repr(MIDecomposition(def, KSG1()))
+out_hdiff = repr(EntropyDecomposition(def, Kraskov()))
+out_hdisc = repr(EntropyDecomposition(def, PlugIn(Shannon()), CodifyVariables(ValueBinning(2))))
+
+@test occursin("Iₛ(X, Y | Z) = Iₛ(X; Y, Z) + Iₛ(X; Z)", out_mi)
+@test occursin("Iₛ(X, Y | Z) = Hₛ(X,Z) + Hₛ(Y,Z) - Hₛ(X,Y,Z) - Hₛ(Z)", out_hdisc)
+@test occursin("Iₛ(X, Y | Z) = hₛ(X,Z) + hₛ(Y,Z) - hₛ(X,Y,Z) - hₛ(Z)", out_hdiff)
\ No newline at end of file
diff --git a/test/methods/information/conditional_mutual_informations/cmi_tsallis_papetrou.jl b/test/methods/information/conditional_mutual_informations/cmi_tsallis_papetrou.jl
new file mode 100644
index 000000000..32d96e3d1
--- /dev/null
+++ b/test/methods/information/conditional_mutual_informations/cmi_tsallis_papetrou.jl
@@ -0,0 +1,25 @@
+using Test
+using CausalityTools
+# ---------------
+# Internals
+# ---------------
+def = CMITsallisPapapetrou()
+@test CausalityTools.min_inputs_vars(def) == 3
+@test CausalityTools.max_inputs_vars(def) == 3
+
+# ---------------------------------------------------------------------------------------
+# Test all possible ways of estimating `CMIRenyiJizba`.
+# ---------------------------------------------------------------------------------------
+
+x = rand(["a", "b", "c"], 50)
+y = rand(["hello", "yoyo", "heyhey"], 50)
+z = rand([1, 2, 5], 50)
+
+# From raw probabilities
+p = probabilities(x, y, z) 
+@test association(CMITsallisPapapetrou(), p) >= 0.0
+
+# `JointProbabilities` estimator
+d = CodifyVariables(UniqueElements())
+est_joint = JointProbabilities(def, d)
+@test  association(est_joint, x, y, z) isa Real
diff --git a/test/methods/information/conditional_mutual_informations/conditional_mutual_informations.jl b/test/methods/information/conditional_mutual_informations/conditional_mutual_informations.jl
new file mode 100644
index 000000000..36006b90c
--- /dev/null
+++ b/test/methods/information/conditional_mutual_informations/conditional_mutual_informations.jl
@@ -0,0 +1,5 @@
+include("cmi_shannon.jl")
+include("cmi_tsallis_papetrou.jl")
+include("cmi_renyi_sarbu.jl")
+include("cmi_renyi_jizba.jl")
+include("cmi_renyi_poczos.jl")
\ No newline at end of file
diff --git a/test/methods/information/core/core.jl b/test/methods/information/core/core.jl
new file mode 100644
index 000000000..d2aeab1ee
--- /dev/null
+++ b/test/methods/information/core/core.jl
@@ -0,0 +1,3 @@
+include("encoding.jl")
+include("counts.jl")
+include("probabilities.jl")
diff --git a/test/methods/information/core/counts.jl b/test/methods/information/core/counts.jl
new file mode 100644
index 000000000..545c7daa1
--- /dev/null
+++ b/test/methods/information/core/counts.jl
@@ -0,0 +1,40 @@
+using Test
+using CausalityTools
+using Random
+rng = Xoshiro(1234)
+
+
+# Analytic test
+x = [1, 2, 3, 1, 2, 3, 1, 2, 3]
+y = [1, 2, 1, 2, 1, 2, 1, 2, 1]
+
+@test counts(x, y) == counts(UniqueElements(), x, y)
+
+# With `OutcomeSpaces` directly
+# ----------------------------
+
+x = StateSpaceSet(rand(rng, 50, 3))
+y = StateSpaceSet(rand(rng, 50, 3))
+z = StateSpaceSet(rand(rng, 50, 2))
+w = rand(rng, ['a', 'b'], 50)
+o2 = OrdinalPatternEncoding(2)
+o3 = OrdinalPatternEncoding(3)
+ow = UniqueElementsEncoding(w)
+
+# Using a single encoding should apply the encoding to all input datasets.
+@test counts(CodifyPoints(o3), x) isa Counts{<:Integer, 1}
+@test counts(CodifyPoints(o3), x, x) isa Counts{<:Integer, 2}
+
+# Using multiple encodings, the number of input encodings must match the number of
+# input datasets.
+@test counts(CodifyPoints(o3, ow), x, w) isa Counts{<:Integer, 2}
+@test counts(CodifyPoints(o3, o3), x, x) isa Counts{<:Integer, 2}
+@test counts(CodifyPoints(o2, o3), z, x) isa Counts{<:Integer, 2}
+@test counts(CodifyPoints(o2, o3, o3), z, x, y) isa Counts{<:Integer, 3}
+
+# Length-2 encoding won't work on state vectors of length 3
+@test_throws ArgumentError counts(CodifyPoints(o2), x)
+
+# When multiple encodings are provided, then the length of the encoding must match
+# the length of the points. Here, we accidentally mixed the order of the encodings.
+@test_throws ArgumentError counts(CodifyPoints(o3, o2, o3), z, x, y)
diff --git a/test/methods/information/core/encoding.jl b/test/methods/information/core/encoding.jl
new file mode 100644
index 000000000..91ece0060
--- /dev/null
+++ b/test/methods/information/core/encoding.jl
@@ -0,0 +1,46 @@
+using Test
+using CausalityTools
+using Random
+rng = Xoshiro(1234)
+
+x = StateSpaceSet(rand(rng, 50, 3))
+y = StateSpaceSet(rand(rng, 50, 3))
+z = StateSpaceSet(rand(rng, 50, 2))
+w = rand(rng, ['a', 'b'], 50)
+o2 = OrdinalPatternEncoding(2)
+o3 = OrdinalPatternEncoding(3)
+ow = UniqueElementsEncoding(w)
+
+# Using a single encoding should apply the encoding to all input datasets.
+@test codify(CodifyPoints(o3), x) isa Vector{<:Integer}
+@test codify(CodifyPoints(o3), x, x) isa NTuple{2, Vector{<:Integer}}
+
+# Using multiple encodings, the number of input encodings must match the number of
+# input datasets.
+@test codify(CodifyPoints(o3, ow), x, w) isa NTuple{2, Vector{<:Integer}}
+@test codify(CodifyPoints(o3, o3), x, x) isa NTuple{2, Vector{<:Integer}}
+@test codify(CodifyPoints(o2, o3), z, x) isa NTuple{2, Vector{<:Integer}}
+@test codify(CodifyPoints(o2, o3, o3), z, x, y) isa NTuple{3, Vector{<:Integer}}
+
+# Length-2 encoding won't work on state vectors of length 3
+@test_throws ArgumentError codify(CodifyPoints(o2), x)
+
+# When multiple encodings are provided, then the length of the encoding must match
+# the length of the points. Here, we accidentally mixed the order of the encodings.
+@test_throws ArgumentError codify(CodifyPoints(o3, o2, o3), z, x, y)
+
+#----------------------------------------------------------------
+# Per variable/column encoding
+#----------------------------------------------------------------
+
+# Single variables
+x = rand(rng, 100)
+o = ValueBinning(3)
+@test codify(CodifyVariables(o), x) isa Vector{<:Integer}
+@test codify(CodifyVariables(o), (x, )) isa NTuple{1, Vector{<:Integer}}
+
+# Point-by-point encoding
+x, y = StateSpaceSet(rand(100, 3)), StateSpaceSet(rand(100, 3))
+cx, cy = codify(CodifyPoints(OrdinalPatternEncoding(3)), x, y)
+@test cx isa Vector{Int}
+@test cy isa Vector{Int}
diff --git a/test/methods/information/core/probabilities.jl b/test/methods/information/core/probabilities.jl
new file mode 100644
index 000000000..0a9df94ca
--- /dev/null
+++ b/test/methods/information/core/probabilities.jl
@@ -0,0 +1,30 @@
+using Test
+using CausalityTools
+using Random
+rng = Xoshiro(1234)
+
+x = StateSpaceSet(rand(rng, 50, 3))
+y = StateSpaceSet(rand(rng, 50, 3))
+z = StateSpaceSet(rand(rng, 50, 2))
+w = rand(rng, ['a', 'b'], 50)
+o2 = OrdinalPatternEncoding{2}()
+o3 = OrdinalPatternEncoding{3}()
+ow = UniqueElementsEncoding(w)
+
+# Using a single encoding should apply the encoding to all input datasets.
+@test probabilities(CodifyPoints(o3), x) isa Probabilities{T, 1} where T
+@test probabilities(CodifyPoints(o3), x, x) isa Probabilities{T, 2} where T
+
+# Using multiple encodings, the number of input encodings must match the number of
+# input datasets.
+@test probabilities(CodifyPoints(o3, ow), x, w) isa Probabilities{T, 2} where T
+@test probabilities(CodifyPoints(o3, o3), x, x) isa Probabilities{T, 2} where T
+@test probabilities(CodifyPoints(o2, o3), z, x) isa Probabilities{T, 2} where T
+@test probabilities(CodifyPoints(o2, o3, o3), z, x, y) isa Probabilities{T, 3} where T
+
+# Length-2 encoding won't work on state vectors of length 3
+@test_throws ArgumentError probabilities(CodifyPoints(o2), x)
+
+# When multiple encodings are provided, then the length of the encoding must match
+# the length of the points. Here, we accidentally mixed the order of the encodings.
+@test_throws ArgumentError probabilities(CodifyPoints(o3, o2, o3), z, x, y)
diff --git a/test/methods/information/distances_and_divergences/distances_and_divergences.jl b/test/methods/information/distances_and_divergences/distances_and_divergences.jl
new file mode 100644
index 000000000..95daf319d
--- /dev/null
+++ b/test/methods/information/distances_and_divergences/distances_and_divergences.jl
@@ -0,0 +1,3 @@
+include("hellinger_distance.jl")
+include("kl_divergence.jl")
+include("renyi_divergence.jl")
\ No newline at end of file
diff --git a/test/methods/information/distances_and_divergences/hellinger_distance.jl b/test/methods/information/distances_and_divergences/hellinger_distance.jl
new file mode 100644
index 000000000..7b41f54be
--- /dev/null
+++ b/test/methods/information/distances_and_divergences/hellinger_distance.jl
@@ -0,0 +1,11 @@
+using Test
+using CausalityTools
+
+# There should be zero information gain from `x` over `y` for independent random variables.
+using Random
+rng = Xoshiro(1234)
+n = 100000
+x, y = rand(rng, n), rand(rng, n)
+disc = CodifyVariables(OrdinalPatterns(m = 3))
+hel = association(JointProbabilities(HellingerDistance(), disc), x, y)
+@test abs(hel) ≤ 0.001
diff --git a/test/methods/information/distances_and_divergences/kl_divergence.jl b/test/methods/information/distances_and_divergences/kl_divergence.jl
new file mode 100644
index 000000000..34578746f
--- /dev/null
+++ b/test/methods/information/distances_and_divergences/kl_divergence.jl
@@ -0,0 +1,11 @@
+using Test
+using CausalityTools
+
+# There should be zero information gain from `x` over `y` for independent random variables.
+using Random
+rng = Xoshiro(1234)
+n = 100000
+x, y = rand(rng, n), rand(rng, n)
+d = CodifyVariables(OrdinalPatterns(m = 3))
+div_kl = association(JointProbabilities(KLDivergence(), d), x, y)
+@test abs(div_kl) ≤ 0.001
diff --git a/test/methods/information/distances_and_divergences/renyi_divergence.jl b/test/methods/information/distances_and_divergences/renyi_divergence.jl
new file mode 100644
index 000000000..ffe6ff84b
--- /dev/null
+++ b/test/methods/information/distances_and_divergences/renyi_divergence.jl
@@ -0,0 +1,14 @@
+using Test
+using CausalityTools
+
+# There should be zero information gain from `x` over `y` for independent random variables.
+using Random
+rng = Xoshiro(1234)
+n = 100000
+x, y = rand(rng, n), rand(rng, n)
+d = CodifyVariables(OrdinalPatterns(m = 3))
+
+@test div_r = association(JointProbabilities(RenyiDivergence(q = 0.5), d), x, y) ≤ 0.001
+@test div_r = association(JointProbabilities(RenyiDivergence(q = 2), d), x, y) ≤ 0.001
+@test div_r = association(JointProbabilities(RenyiDivergence(q = 4), d), x, y) ≤ 0.001
+@test association(JointProbabilities(RenyiDivergence(q = Inf), d), x, y) ≥ 0.0
diff --git a/test/methods/information/estimators/estimators.jl b/test/methods/information/estimators/estimators.jl
new file mode 100644
index 000000000..1b9f04627
--- /dev/null
+++ b/test/methods/information/estimators/estimators.jl
@@ -0,0 +1 @@
+include("joint.jl")
diff --git a/test/methods/information/estimators/joint.jl b/test/methods/information/estimators/joint.jl
new file mode 100644
index 000000000..90c5ca1d0
--- /dev/null
+++ b/test/methods/information/estimators/joint.jl
@@ -0,0 +1,70 @@
+using CausalityTools
+using Test
+using Random
+rng = Xoshiro(1234)
+
+# ------------------------------------------------------------
+# API test 
+# ------------------------------------------------------------
+x = randn(rng, 100)
+y = x .+ randn(rng, 100)
+z = y .+ randn(rng, 100)
+
+os = [
+    OrdinalPatterns(m=2),
+    Dispersion(),
+    ValueBinning(2),
+]
+
+bivariate_symmetric_measures = [
+    MIShannon(),
+    MITsallisMartin(),
+    MITsallisFuruichi(),
+    MIRenyiSarbu(),
+    MIRenyiJizba(),
+    JointEntropyRenyi(),
+    JointEntropyTsallis(),
+    JointEntropyShannon(),
+]
+
+@testset "JointProbabilities estimator with $(typeof(m).name.name)" for m in bivariate_symmetric_measures
+    @testset "CodifyVariables with $(typeof(o).name.name)" for o in os
+        est = JointProbabilities(m, CodifyVariables(o))
+        est_xy = association(est, x, y)
+        est_yx = association(est, y, x)
+        
+        @test est_xy isa Real
+        @test est_yx isa Real
+        @test est_xy ≈ est_yx # symmetry
+    end
+end;
+
+bivariate_asymmetric_measures = [
+    ConditionalEntropyShannon(),
+    ConditionalEntropyTsallisAbe(),
+    ConditionalEntropyTsallisFuruichi(),
+    HellingerDistance(),
+    KLDivergence(),
+    RenyiDivergence(),
+    VariationDistance(),
+]
+
+@testset "JointProbabilities estimator with $(typeof(m).name.name)" for m in bivariate_asymmetric_measures
+    @testset "CodifyVariables with $(typeof(o).name.name)" for o in os
+        est = JointProbabilities(m, CodifyVariables(o))
+        @test association(est, x, y) isa Real
+    end
+end;
+
+trivariate_asymmetric_measures = [
+    CMIShannon(),
+    CMITsallisPapapetrou(),
+    CMIRenyiSarbu(),
+]
+
+@testset "JointProbabilities estimator with $(typeof(m).name.name)" for m in trivariate_asymmetric_measures
+    @testset "CodifyVariables with $(typeof(o).name.name)" for o in os
+        est = JointProbabilities(m, CodifyVariables(o))
+        @test association(est, x, y, z) isa Real
+    end
+end;
\ No newline at end of file
diff --git a/test/methods/information/information.jl b/test/methods/information/information.jl
new file mode 100644
index 000000000..94670347e
--- /dev/null
+++ b/test/methods/information/information.jl
@@ -0,0 +1,12 @@
+include("core/core.jl")
+
+include("distances_and_divergences/distances_and_divergences.jl")
+include("joint_entropies/joint_entropies.jl")
+include("conditional_entropies/conditional_entropies.jl")
+include("mutual_informations/mutual_informations.jl")
+include("conditional_mutual_informations/conditional_mutual_informations.jl")
+include("transfer_entropies/transfer_entropies.jl")
+
+# Estimators of the information measures.
+include("estimators/estimators.jl")
+include("internal_api.jl")
\ No newline at end of file
diff --git a/test/methods/information/internal_api.jl b/test/methods/information/internal_api.jl
new file mode 100644
index 000000000..f46792d59
--- /dev/null
+++ b/test/methods/information/internal_api.jl
@@ -0,0 +1,32 @@
+using Test 
+using CausalityTools
+
+# ----------------------------------------------------------------
+# This file tests internal functions.
+# ----------------------------------------------------------------
+def_renyi = CMIRenyiSarbu(; q = 5, base = 5)
+def_tsallis = CMITsallisPapapetrou(; q = 5, base = 5)
+def_shannon = CMIShannon(; base = 5)
+est_renyi = PlugIn(Renyi(; q = 0.5, base = 2))
+est_tsallis = PlugIn(Tsallis(; q = 0.5, base = 2))
+est_shannon = PlugIn(Shannon(; base = 2))
+
+new_est_renyi = CausalityTools.estimator_with_overridden_parameters(def_renyi, est_renyi)
+new_est_tsallis = CausalityTools.estimator_with_overridden_parameters(def_tsallis, est_tsallis)
+new_est_shannon = CausalityTools.estimator_with_overridden_parameters(def_shannon, est_shannon)
+@test new_est_renyi == PlugIn(Renyi(; q = 5, base = 5)) 
+@test new_est_tsallis == PlugIn(Tsallis(; q = 5, base = 5)) 
+@test new_est_shannon == PlugIn(Shannon(; base = 5)) 
+
+
+p1 = Probabilities([0.1, 0.2, 0.3])
+p2 = Probabilities([0.1, 0.2, 0.3, 0.4])
+@test_throws DimensionMismatch CausalityTools.size_match(KLDivergence(), p1, p2)
+
+struct BogusMeasure2{M} <: MultivariateInformationMeasure
+    def::M
+end
+
+est = JointProbabilities(BogusMeasure2(2), CodifyVariables(OrdinalPatterns()))
+x, y = rand(100), rand(100)
+@test_throws ArgumentError association(est, x, y)
\ No newline at end of file
diff --git a/test/methods/information/joint_entropies/joint_entropies.jl b/test/methods/information/joint_entropies/joint_entropies.jl
new file mode 100644
index 000000000..19742d16a
--- /dev/null
+++ b/test/methods/information/joint_entropies/joint_entropies.jl
@@ -0,0 +1,3 @@
+include("joint_entropy_renyi.jl")
+include("joint_entropy_shannon.jl")
+include("joint_entropy_tsallis.jl")
\ No newline at end of file
diff --git a/test/methods/information/joint_entropies/joint_entropy_renyi.jl b/test/methods/information/joint_entropies/joint_entropy_renyi.jl
new file mode 100644
index 000000000..a755f6630
--- /dev/null
+++ b/test/methods/information/joint_entropies/joint_entropy_renyi.jl
@@ -0,0 +1,12 @@
+using Test
+using CausalityTools
+
+x = rand(["a", "b", "c"], 200)
+y = rand(["hello", "yoyo", "heyhey"], 200)
+
+# The estimation of probabilities is decoupled from the estimation of the mutual info.
+# We could in principle use any probabilities estimator here, but we default to `RelativeAmount`.
+p = probabilities(x, y) 
+@test association(JointEntropyRenyi(q = 1.5), p) ≥ 0
+
+# TODO: estimation using discretizations..
\ No newline at end of file
diff --git a/test/methods/information/joint_entropies/joint_entropy_shannon.jl b/test/methods/information/joint_entropies/joint_entropy_shannon.jl
new file mode 100644
index 000000000..db7bc14d8
--- /dev/null
+++ b/test/methods/information/joint_entropies/joint_entropy_shannon.jl
@@ -0,0 +1,12 @@
+using Test
+using CausalityTools
+
+x = rand(["a", "b", "c"], 200)
+y = rand(["hello", "yoyo", "heyhey"], 200)
+
+# The estimation of probabilities is decoupled from the estimation of the mutual info.
+# We could in principle use any probabilities estimator here, but we default to `RelativeAmount`.
+p = probabilities(x, y) 
+@test association(JointEntropyShannon(), p) ≥ 0
+
+# TODO: estimation using discretizations..
\ No newline at end of file
diff --git a/test/methods/information/joint_entropies/joint_entropy_tsallis.jl b/test/methods/information/joint_entropies/joint_entropy_tsallis.jl
new file mode 100644
index 000000000..b48f4965e
--- /dev/null
+++ b/test/methods/information/joint_entropies/joint_entropy_tsallis.jl
@@ -0,0 +1,10 @@
+using Test
+using CausalityTools
+
+x = rand(["a", "b", "c"], 200)
+y = rand(["hello", "yoyo", "heyhey"], 200)
+
+# The estimation of probabilities is decoupled from the estimation of the mutual info.
+# We could in principle use any probabilities estimator here, but we default to `RelativeAmount`.
+p = probabilities(x, y) 
+@test association(JointEntropyTsallis(q = 1.5), p) ≥ 0
\ No newline at end of file
diff --git a/test/methods/information/mutual_informations/api.jl b/test/methods/information/mutual_informations/api.jl
new file mode 100644
index 000000000..8d12e6362
--- /dev/null
+++ b/test/methods/information/mutual_informations/api.jl
@@ -0,0 +1,63 @@
+using Test
+using CausalityTools
+using Random
+rng = MersenneTwister(1234)
+
+# ----------------------------------------------------------------
+# Joint probabilities estimation
+# ----------------------------------------------------------------
+# all MI measures can be computed from the joint pmf
+definitions = [MIShannon(), MIRenyiJizba(), MIRenyiSarbu(), MITsallisFuruichi(), MITsallisMartin()]
+
+@testset "JointProbabilities with $(typeof(def).name.name)" for def in definitions
+    x, y = rand(rng, 100), rand(rng, 100)
+    X, Y = StateSpaceSet(rand(rng, 100, 2)), StateSpaceSet(rand(rng, 100, 2))
+    
+    d = CodifyVariables(ValueBinning(2))
+    est = JointProbabilities(def, d, RelativeAmount())
+    # The estimation of probabilities is decoupled from the estimation of the mutual info.
+    # We could in principle use any probabilities estimator here, but we default to `RelativeAmount`.
+    @test association(est, x, Y) isa Real;
+    @test association(est, X, y) isa Real;
+    @test association(est, x, y) isa Real;
+    @test association(est, X, Y) isa Real;
+end
+
+# ----------------------------------------------------------------
+# Decomposition estimation
+# ----------------------------------------------------------------
+
+# The following measures can be estimated using an entropy decomposition
+defs = [MIShannon(), MITsallisMartin(), MITsallisFuruichi()]
+hests = [PlugIn(Shannon()), PlugIn(Tsallis(q = 1.5)), PlugIn(Tsallis(q = 1.5))]
+@testset "EntropyDecomposition with $(typeof(def).name.name)" for (def, hest) in zip(defs, hests)
+    x, y = rand(rng, 100), rand(rng, 100)
+    X, Y = StateSpaceSet(rand(rng, 100, 2)), StateSpaceSet(rand(rng, 100, 2))
+    est = EntropyDecomposition(def, hest, CodifyVariables(OrdinalPatterns(m=2)), RelativeAmount())
+    # The estimation of probabilities is decoupled from the estimation of the mutual info.
+    # We could in principle use any probabilities estimator here, but we default to `RelativeAmount`.
+    @test association(est, x, Y) isa Real
+    @test association(est, X, y) isa Real
+    @test association(est, x, y) isa Real
+    @test association(est, X, Y) isa Real
+end
+
+# The following measures cannot be decomposed into entropies and should throw errors
+definitions = [MIRenyiSarbu()]
+@testset "EntropyDecomposition with $(typeof(def).name.name)" for def in definitions
+    x, y = rand(rng, 100), rand(rng, 100)
+    X, Y = StateSpaceSet(rand(rng, 100, 2)), StateSpaceSet(rand(rng, 100, 2))
+    
+    est_diff = EntropyDecomposition(def, Kraskov(k=3))
+    est_disc = EntropyDecomposition(def, PlugIn(Shannon()), CodifyVariables(OrdinalPatterns(m=2)), RelativeAmount())
+
+    @test_throws ArgumentError association(est_diff, x, Y)
+    @test_throws ArgumentError association(est_diff, X, y)
+    @test_throws ArgumentError association(est_diff, x, y)
+    @test_throws ArgumentError association(est_diff, X, Y)
+
+    @test_throws ArgumentError association(est_disc, x, Y)
+    @test_throws ArgumentError association(est_disc, X, y)
+    @test_throws ArgumentError association(est_disc, x, y)
+    @test_throws ArgumentError association(est_disc, X, Y)
+end
diff --git a/test/methods/information/mutual_informations/mi_renyi_jizba.jl b/test/methods/information/mutual_informations/mi_renyi_jizba.jl
new file mode 100644
index 000000000..c082d6f92
--- /dev/null
+++ b/test/methods/information/mutual_informations/mi_renyi_jizba.jl
@@ -0,0 +1,37 @@
+using Test
+using CausalityTools
+using Random
+rng = MersenneTwister(1234)
+
+def = MIRenyiJizba(q = 0.5)
+
+# ---------------------------------------------------------------------------------------
+# Test all possible ways of estimating `MIRenyiJizba`.
+# ---------------------------------------------------------------------------------------
+# ::::::::::::::::::::::::
+# PMF
+# ::::::::::::::::::::::::
+x = rand(rng, ["a", "b", "c"], 200);
+y = rand(rng, ["hello", "yoyo", "heyhey"], 200);
+est = JointProbabilities(def, UniqueElements())
+@test association(est, x, y) ≥ 0
+
+# ::::::::::::::::::::::::
+# Decomposition estimators
+# ::::::::::::::::::::::::
+x = randn(rng, 50);
+y = randn(rng, 50);
+est_diff = EntropyDecomposition(def, LeonenkoProzantoSavani(Renyi(), k=3))
+@test association(est_diff, x, y) isa Real
+
+est_disc = EntropyDecomposition(def, PlugIn(Renyi()), CodifyVariables(ValueBinning(2)));
+@test association(est_disc, x, y) isa Real
+
+# ---------------
+# Pretty printing
+# ---------------
+def = MIRenyiJizba()
+out_hdiff = repr(EntropyDecomposition(def, LeonenkoProzantoSavani(Renyi())))
+out_hdisc = repr(EntropyDecomposition(def, PlugIn(Renyi()), CodifyVariables(ValueBinning(2))))
+@test occursin("Iᵣⱼ(X, Y) = Hᵣ(X) + Hᵣ(Y) - Hᵣ(X, Y)", out_hdisc)
+@test occursin("Iᵣⱼ(X, Y) = hᵣ(X) + hᵣ(Y) - hᵣ(X, Y)", out_hdiff)
\ No newline at end of file
diff --git a/test/methods/information/mutual_informations/mi_renyi_sarbu.jl b/test/methods/information/mutual_informations/mi_renyi_sarbu.jl
new file mode 100644
index 000000000..02ed58408
--- /dev/null
+++ b/test/methods/information/mutual_informations/mi_renyi_sarbu.jl
@@ -0,0 +1,16 @@
+using Test
+using CausalityTools
+
+# Double-sum estimation.
+x = rand(["a", "b", "c"], 200)
+y = rand(["hello", "yoyo", "heyhey"], 200)
+
+
+# ---------------------------------------------------------------------------------------
+# Test all possible ways of estimating `MIRenyiSarbu`.
+# ---------------------------------------------------------------------------------------
+est = JointProbabilities(MIRenyiSarbu(), UniqueElements())
+@test association(est, x, y) ≥ 0.0 # we don't have any better analytical numbers here.
+
+p = probabilities(x, y) 
+@test association(MIRenyiSarbu(), p) isa Real # we don't have any better analytical numbers here.
diff --git a/test/methods/information/mutual_informations/mi_shannon.jl b/test/methods/information/mutual_informations/mi_shannon.jl
new file mode 100644
index 000000000..cc0263baa
--- /dev/null
+++ b/test/methods/information/mutual_informations/mi_shannon.jl
@@ -0,0 +1,63 @@
+using Test
+using CausalityTools
+using Random
+rng = MersenneTwister(1234)
+def = MIShannon()
+
+# ---------------------------------------------------------------------------------------
+# Test all possible ways of estimating `CMIShannon`.
+# ---------------------------------------------------------------------------------------
+
+# ::::::::::::::::::::::::
+# PMF
+# ::::::::::::::::::::::::
+x = rand(rng, ["a", "b", "c"], 200)
+y = rand(rng, ["hello", "yoyo", "heyhey"], 200)
+est = JointProbabilities(def, UniqueElements())
+@test association(est, x, y) ≥ 0
+
+# ::::::::::::::::::::::::
+# Decomposition estimators
+# ::::::::::::::::::::::::
+x = randn(rng, 50)
+y = randn(rng, 50)
+est_diff = EntropyDecomposition(def, Kraskov(k=3))
+@test association(est_diff, x, y) isa Real
+
+est_disc = EntropyDecomposition(def, PlugIn(Shannon()), CodifyVariables(ValueBinning(2)));
+@test association(est_disc, x, y) isa Real
+
+# ::::::::::::::::::::::::
+# Dedicated estimators
+# ::::::::::::::::::::::::
+# On vector-valued inputs
+def = MIShannon()
+x, y = rand(rng, 100), rand(rng, 100)
+X, Y = StateSpaceSet(x), StateSpaceSet(y)
+@test association(KSG1(def, k = 2), x, y) isa Real
+@test association(KSG2(def, k = 2), x, y) isa Real
+@test association(GaoOhViswanath(def, k = 2), x, y) isa Real
+@test association(GaoKannanOhViswanath(def, k = 2), x, y) isa Real
+@test association(GaussianMI(def), x, y) isa Real
+@test association(GaussianMI(def), X, Y) isa Real
+
+# On `StateSpaceSet`s
+data = [rand(rng, 50, 2) for i = 1:2]
+x, y = StateSpaceSet.(data)
+def = MIShannon()
+@test association(KSG1(def, k = 2), x, y) isa Real
+@test association(KSG2(def, k = 2), x, y) isa Real
+@test association(GaoOhViswanath(def, k = 2), x, y) isa Real
+@test association(GaoKannanOhViswanath(def, k = 2), x, y) isa Real
+@test association(GaussianMI(def), x, y) isa Real
+
+
+# ---------------
+# Pretty printing
+# ---------------
+def = MIShannon()
+out_hdiff = repr(EntropyDecomposition(def, Kraskov()))
+out_hdisc = repr(EntropyDecomposition(def, PlugIn(Shannon()), CodifyVariables(ValueBinning(2))))
+
+@test occursin("Iₛ(X, Y) = Hₛ(X) + Hₛ(Y) - Hₛ(X, Y)", out_hdisc)
+@test occursin("Iₛ(X, Y) = hₛ(X) + hₛ(Y) - hₛ(X, Y)", out_hdiff)
\ No newline at end of file
diff --git a/test/methods/information/mutual_informations/mi_tsallis_furuichi.jl b/test/methods/information/mutual_informations/mi_tsallis_furuichi.jl
new file mode 100644
index 000000000..80b6309ae
--- /dev/null
+++ b/test/methods/information/mutual_informations/mi_tsallis_furuichi.jl
@@ -0,0 +1,39 @@
+using Test
+using CausalityTools
+using Random
+rng = Xoshiro(1234)
+
+x = rand(rng, ["a", "b", "c"], 200)
+y = rand(rng, ["hello", "yoyo", "heyhey"], 200)
+
+# ---------------------------------------------------------------------------------------
+# Test all possible ways of estimating `MITsallisFuruichi`.
+# ---------------------------------------------------------------------------------------
+def = MITsallisFuruichi()
+# Directly from probabilities
+p = probabilities(x, y)
+@test association(def, p) ≥ 0.0
+
+# `JointProbabilities` estimator
+est = JointProbabilities(def, UniqueElements())
+@test association(est, x, y) ≥ 0.0 # we don't have any better analytical numbers here.
+
+# Discrete entropy decomposition (on numerical data)
+x, y = rand(rng, 100), rand(rng, 100)
+est_disc = EntropyDecomposition(def, PlugIn(Tsallis()), CodifyVariables(OrdinalPatterns()), AddConstant())
+@test association(est_disc, x, y) isa Real
+
+# Differential entropy decomposition (on numerical data)
+x, y = rand(rng, 100), rand(rng, 100)
+est_diff = EntropyDecomposition(def, LeonenkoProzantoSavani(Tsallis(q= 2)))
+@test association(est_diff, x, y) isa Real
+
+# ---------------
+# Pretty printing
+# ---------------
+def = MIShannon()
+out_hdiff = repr(EntropyDecomposition(def, Kraskov()))
+out_hdisc = repr(EntropyDecomposition(def, PlugIn(Shannon()), CodifyVariables(ValueBinning(2))))
+
+@test occursin("Iₛ(X, Y) = Hₛ(X) + Hₛ(Y) - Hₛ(X, Y)", out_hdisc)
+@test occursin("Iₛ(X, Y) = hₛ(X) + hₛ(Y) - hₛ(X, Y)", out_hdiff)
\ No newline at end of file
diff --git a/test/methods/information/mutual_informations/mi_tsallis_martin.jl b/test/methods/information/mutual_informations/mi_tsallis_martin.jl
new file mode 100644
index 000000000..5f0c5f238
--- /dev/null
+++ b/test/methods/information/mutual_informations/mi_tsallis_martin.jl
@@ -0,0 +1,31 @@
+using Test
+using CausalityTools
+using Random
+rng = Xoshiro(1234)
+
+x = rand(rng, ["a", "b", "c"], 200)
+y = rand(rng, ["hello", "yoyo", "heyhey"], 200)
+
+
+# ---------------------------------------------------------------------------------------
+# Test all possible ways of estimating `MITsallisMartin`.
+# ---------------------------------------------------------------------------------------
+def = MITsallisMartin()
+
+# Directly from probabilities
+p = probabilities(x, y)
+@test association(def, p) isa Real # we don't have any better analytical numbers here.
+@test_throws ArgumentError association(MITsallisMartin(q = 1), p)
+
+# `JointProbabilities` estimator
+est = JointProbabilities(MITsallisMartin(), UniqueElements())
+@test association(est, x, y) isa Real 
+
+# Discrete entropy decomposition
+est_disc = EntropyDecomposition(def, PlugIn(Tsallis()), CodifyVariables(UniqueElements()), AddConstant())
+@test association(est_disc, x, y) isa Real
+
+# Differential entropy decomposition (on numerical data)
+x, y = rand(rng, 100), rand(rng, 100)
+est_diff = EntropyDecomposition(def, LeonenkoProzantoSavani(Tsallis(q= 2)))
+@test association(est_diff, x, y) isa Real
diff --git a/test/methods/information/mutual_informations/mutual_informations.jl b/test/methods/information/mutual_informations/mutual_informations.jl
new file mode 100644
index 000000000..5fbd36ba9
--- /dev/null
+++ b/test/methods/information/mutual_informations/mutual_informations.jl
@@ -0,0 +1,6 @@
+include("mi_renyi_jizba.jl")
+include("mi_renyi_sarbu.jl")
+include("mi_shannon.jl")
+include("mi_tsallis_furuichi.jl")
+include("mi_tsallis_martin.jl")
+include("api.jl")
\ No newline at end of file
diff --git a/test/methods/information/transfer_entropies/te_renyi_jizba.jl b/test/methods/information/transfer_entropies/te_renyi_jizba.jl
new file mode 100644
index 000000000..b098f71eb
--- /dev/null
+++ b/test/methods/information/transfer_entropies/te_renyi_jizba.jl
@@ -0,0 +1,38 @@
+using Test
+using CausalityTools
+using Random
+rng = MersenneTwister(1234)
+
+# Double-sum estimation.
+x = randn(rng, 100)
+y = randn(rng, 100)
+z = randn(rng, 100)
+
+def = TERenyiJizba(base = 3, q = 0.5)
+
+# Here we test all the possible "generic" ways of estimating `TERenyiJizba`.
+est_diff = EntropyDecomposition(def, LeonenkoProzantoSavani(Renyi(); k=3))
+@test association(est_diff, x, z) isa Real
+@test association(est_diff, x, z, y) isa Real
+
+est_disc = EntropyDecomposition(def, PlugIn(Renyi()), CodifyVariables(ValueBinning(2)));
+@test association(est_disc, x, z) isa Real
+@test association(est_disc, x, z, y) isa Real
+
+# Test `TransferOperator` decomposition explicitly, because it has a special implementation
+precise = true # precise bin edge
+discretization = CodifyVariables(TransferOperator(RectangularBinning(2, precise))) #
+est_disc = EntropyDecomposition(TERenyiJizba(), PlugIn(Renyi()), discretization);
+@test association(est_disc, x, z) isa Real
+@test association(est_disc, x, z, y) isa Real
+
+
+
+# ---------------
+# Pretty printing
+# ---------------
+out_hdiff = repr(EntropyDecomposition(def, LeonenkoProzantoSavani(Renyi())))
+out_hdisc = repr(EntropyDecomposition(def, PlugIn(Renyi()), CodifyVariables(ValueBinning(2))))
+
+@test occursin("TEᵣⱼ(s → t | c) = hᵣ(t⁺, t⁻,c⁻) - hᵣ(t⁻,c⁻) - hᵣ(t⁺,s⁻,t⁻,c⁻) + hᵣ(s⁻,t⁻,c⁻)", out_hdiff)
+@test occursin("TEᵣⱼ(s → t | c) = Hᵣ(t⁺, t⁻,c⁻) - Hᵣ(t⁻,c⁻) - Hᵣ(t⁺,s⁻,t⁻,c⁻) + Hᵣ(s⁻,t⁻,c⁻)", out_hdisc)
\ No newline at end of file
diff --git a/test/methods/information/transfer_entropies/te_shannon.jl b/test/methods/information/transfer_entropies/te_shannon.jl
new file mode 100644
index 000000000..81ee36997
--- /dev/null
+++ b/test/methods/information/transfer_entropies/te_shannon.jl
@@ -0,0 +1,88 @@
+using Test
+using CausalityTools
+using DynamicalSystemsBase
+using Random
+rng = Xoshiro(1234)
+
+# Double-sum estimation.
+x = randn(rng, 50)
+y = randn(rng, 50)
+z = randn(rng, 50)
+
+# Here we test all the possible "generic" ways of estimating `TEShannon`.
+# Remaining tests are in the dedicated estimator test files, e.g. `Zhu1.jl`.
+def = TEShannon()
+est_diff = EntropyDecomposition(def, Kraskov(k=3))
+@test association(est_diff, x, z) isa Real
+@test association(est_diff, x, z, y) isa Real
+
+est_disc = EntropyDecomposition(def, PlugIn(Shannon()), CodifyVariables(ValueBinning(2)));
+@test association(est_disc, x, z) isa Real
+@test association(est_disc, x, z, y) isa Real
+
+est_mi = MIDecomposition(def, KSG1())
+@test association(est_mi, x, z) isa Real
+@test association(est_mi, x, z, y) isa Real
+
+est_cmi = CMIDecomposition(def, FPVP())
+@test association(est_cmi, x, z) isa Real
+@test association(est_cmi, x, z, y) isa Real
+
+est_zhu = Zhu1(def, k = 3)
+@test association(est_zhu, x, z) isa Real
+@test association(est_zhu, x, z, y) isa Real
+
+est_lindner = Lindner(def, k = 3)
+@test association(est_lindner, x, z) isa Real
+@test association(est_lindner, x, z, y) isa Real
+
+
+# Test `TransferOperator` decomposition explicitly, because it has a special implementation
+precise = true # precise bin edge
+discretization = CodifyVariables(TransferOperator(RectangularBinning(2, precise))) #
+est_disc = EntropyDecomposition(TEShannon(), PlugIn(Shannon()), discretization);
+@test association(est_disc, x, z) isa Real
+@test association(est_disc, x, z, y) isa Real
+
+# `JointProbabilities`
+x, y, z = rand(rng, 30), rand(rng, 30), rand(rng, 30)
+est = JointProbabilities(TEShannon(), CodifyVariables(OrdinalPatterns()));
+@test association(est, x, y) >= 0.0
+@test association(est, x, y, z) >= 0.0
+
+# `Hilbert`
+est_te = JointProbabilities(TEShannon(), CodifyVariables(OrdinalPatterns()));
+est = Hilbert(est)
+@test association(Hilbert(est, source = Phase(), target = Amplitude()), x, y) >= 0.0
+@test association(Hilbert(est, source = Amplitude(), target = Phase()), x, y) >= 0.0
+@test association(Hilbert(est, source = Amplitude(), target = Amplitude()), x, y) >= 0.0
+@test association(Hilbert(est, source = Phase(), target = Phase()), x, y) >= 0.0
+
+@test association(Hilbert(est, source = Phase(), target = Amplitude(), cond = Phase() ), x, y, z) >= 0.0
+@test association(Hilbert(est, source = Amplitude(), target = Phase(), cond = Phase() ), x, y, z) >= 0.0
+@test association(Hilbert(est, source = Amplitude(), target = Amplitude(), cond = Phase() ), x, y, z) >= 0.0
+@test association(Hilbert(est, source = Phase(), target = Phase(), cond = Phase() ), x, y, z) >= 0.0
+@test association(Hilbert(est, source = Phase(), target = Amplitude(), cond = Amplitude() ), x, y, z) >= 0.0
+@test association(Hilbert(est, source = Amplitude(), target = Phase(), cond = Amplitude() ), x, y, z) >= 0.0
+@test association(Hilbert(est, source = Amplitude(), target = Amplitude(), cond = Amplitude() ), x, y, z) >= 0.0
+@test association(Hilbert(est, source = Phase(), target = Phase(), cond = Amplitude() ), x, y, z) >= 0.0
+
+# `SymbolicTransferEntropy`
+sys = system(Logistic4Chain(; rng))
+x, y, z, w = columns(first(trajectory(sys, 300, Ttr = 10000)))
+est = SymbolicTransferEntropy(m = 5)
+@test association(est, x, y) ≥ 0.0
+@test association(est, x, z) > association(est, x, z, y)
+
+# ---------------
+# Pretty printing
+# ---------------
+out_cmi = repr(CMIDecomposition(def, FPVP()))
+out_mi = repr(MIDecomposition(def, KSG1()))
+out_hdiff = repr(EntropyDecomposition(def, Kraskov()))
+out_hdisc = repr(EntropyDecomposition(def, PlugIn(Shannon()), CodifyVariables(ValueBinning(2))))
+
+@test occursin("TEₛ(s → t | c) = Iₛ(t⁺; s⁻ | t⁻, c⁻)", out_cmi)
+@test occursin("TEₛ(s → t | c) = Iₛ(t⁺; s⁻, t⁻, c⁻) - Iₛ(t⁺; t⁻, c⁻)", out_mi)
+@test occursin("TEₛ(s → t | c) = hₛ(t⁺, t⁻,c⁻) - hₛ(t⁻,c⁻) - hₛ(t⁺,s⁻,t⁻,c⁻) + hₛ(s⁻,t⁻,c⁻)", out_hdiff)
+@test occursin("TEₛ(s → t | c) = Hₛ(t⁺, t⁻,c⁻) - Hₛ(t⁻,c⁻) - Hₛ(t⁺,s⁻,t⁻,c⁻) + Hₛ(s⁻,t⁻,c⁻)", out_hdisc)
\ No newline at end of file
diff --git a/test/methods/information/transfer_entropies/transfer_entropies.jl b/test/methods/information/transfer_entropies/transfer_entropies.jl
new file mode 100644
index 000000000..c95ba13fe
--- /dev/null
+++ b/test/methods/information/transfer_entropies/transfer_entropies.jl
@@ -0,0 +1,2 @@
+include("te_shannon.jl")
+include("te_renyi_jizba.jl")
\ No newline at end of file
diff --git a/test/methods/information/transfer_entropies/transferoperator.jl b/test/methods/information/transfer_entropies/transferoperator.jl
new file mode 100644
index 000000000..d6d3b56c3
--- /dev/null
+++ b/test/methods/information/transfer_entropies/transferoperator.jl
@@ -0,0 +1,99 @@
+# import ComplexityMeasures: TransferOperator, invariantmeasure, InvariantMeasure, Probabilities
+# using ComplexityMeasures.GroupSlices
+# export TransferOperator
+
+# """
+# 	marginal_indices(x)
+
+# Returns a column vector `v` with the same number of elements as there are unique
+# elements in `x`. `v[i]` is the indices of elements in `x` matching `v[i]`.
+
+# For example, if the third unique element in `x`, and the element `u₃ = unique(x)[3]`
+# appears four times in `x`, then `v[3]` is a vector of four integers indicating the
+# position of the elements matching `u₃`.
+# """
+# function marginal_indices(visited_bins, selected_axes)
+#     marginal_pts = [x[selected_axes] for x in visited_bins]
+#     groupinds(groupslices(marginal_pts))
+# end
+
+# """
+#     marginal_probs_from_μ(seleced_axes, visited_bins, iv::InvariantMeasure, inds_non0measure)
+
+# Estimate marginal probabilities from a pre-computed invariant measure, given a set
+# of visited bins, an invariant measure and the indices of the positive-measure bins.
+# The indices in `selected_axes` determines which marginals are selected.
+# """
+# function marginal_probs_from_μ(seleced_axes, visited_bins, iv::InvariantMeasure, inds_non0measure)
+
+#     marginal_inds::Vector{Vector{Int}} =
+#         marginal_indices(visited_bins, seleced_axes)
+
+#     # When the invariant measure over the joint space is already known, we don't
+#     # need to estimate histograms. We simply sum over the nonzero entries of the
+#     # (already estimated) invariant distribution `iv` in the marginal space
+#     # (whose indices are given by `seleced_axes`).
+#     μpos = iv.ρ[inds_non0measure]
+#     marginal = zeros(Float64, length(marginal_inds))
+#     @inbounds for i in eachindex(marginal_inds)
+#         marginal[i] = sum(μpos[marginal_inds[i]])
+#     end
+#     return marginal
+# end
+
+
+# function _marginal_encodings(encoder::RectangularBinEncoding, x::VectorOrStateSpaceSet...)
+#     X = StateSpaceSet(StateSpaceSet.(x)...)
+#     bins = [vec(encode_as_tuple(encoder, pt))' for pt in X]
+#     joint_bins = reduce(vcat, bins)
+#     idxs = size.(x, 2) #each input can have different dimensions
+#     s = 1
+#     encodings = Vector{StateSpaceSet}(undef, length(idxs))
+#     for (i, cidx) in enumerate(idxs)
+#         variable_subset = s:(s + cidx - 1)
+#         s += cidx
+#         y = @views joint_bins[:, variable_subset]
+#         encodings[i] = StateSpaceSet(y)
+#     end
+
+#     return encodings
+# end
+
+# function transferentropy(
+#         measure::TransferEntropy,
+#         est::TransferOperator{<:RectangularBinning}, x...)
+#     e = measure.e
+#     joint_pts, vars, τs, js = te_embed(measure.embedding, x...)
+#     iv = invariantmeasure(joint_pts, est.binning)
+
+#     # TODO: this needs to be done more cleverly in ComplexityMeasures.jl, so we don't
+#     # need to do the conversion twice. We should explicitly store the bin indices for all
+#     # marginals, not a single encoding integer for each bin. Otherwise, we can't
+#     # properly subset marginals here and relate them to the approximated invariant measure.
+#     # The bins visited by the orbit are
+#     visited_bins_coordinates = StateSpaceSet(decode.(Ref(iv.to.encoder), iv.to.bins))
+#     unique_visited_bins = _marginal_encodings(iv.to.encoder, visited_bins_coordinates)[1]
+
+#     # # The subset of visited bins with nonzero measure
+#     inds_non0measure = findall(iv.ρ .> 0)
+#     positive_measure_bins = unique_visited_bins[inds_non0measure]
+
+#     # Estimate marginal probability distributions from joint measure
+#     cols_ST = [vars.S; vars.T; vars.C]
+#     cols_TTf = [vars.Tf; vars.T; vars.C]
+#     cols_T = [vars.T; vars.C]
+#     p_T  = marginal_probs_from_μ(cols_T, positive_measure_bins, iv, inds_non0measure)
+#     p_ST = marginal_probs_from_μ(cols_ST, positive_measure_bins, iv, inds_non0measure)
+#     p_TTf = marginal_probs_from_μ(cols_TTf, positive_measure_bins, iv, inds_non0measure)
+#     p_joint = iv.ρ[inds_non0measure]
+
+#     te = entropy(e, Probabilities(p_ST)) +
+#         entropy(e, Probabilities(p_TTf)) -
+#         entropy(e, Probabilities(p_T)) -
+#         entropy(e, Probabilities(p_joint))
+# end
+
+# transferentropy(est::TransferOperator{<:RectangularBinning}, s, t; kwargs...) =
+#     transferentropy(Shannon(; base), est, s, t; kwargs...)
+# transferentropy(est::TransferOperator{<:RectangularBinning}, s, t, c; kwargs...) =
+#     transferentropy(Shannon(; base), est, s, t, c; kwargs...)
\ No newline at end of file
diff --git a/test/methods/methods.jl b/test/methods/methods.jl
index b8f3a6781..19d8b6346 100644
--- a/test/methods/methods.jl
+++ b/test/methods/methods.jl
@@ -1,5 +1,7 @@
 testfile("closeness/closeness.jl")
 testfile("cross_mappings/ccm_like.jl")
-testfile("infomeasures/infomeasures.jl")
 testfile("correlation/correlation.jl")
+testfile("information/information.jl")
 testfile("recurrence/recurrence.jl")
+
+testfile("deprecations.jl")
\ No newline at end of file
diff --git a/test/methods/pai.jl b/test/methods/pai.jl
index df34bcadf..68f89d428 100644
--- a/test/methods/pai.jl
+++ b/test/methods/pai.jl
@@ -2,6 +2,6 @@ using Random
 rng = MersenneTwister(123456)
 
 x, y = rand(rng, 1000), rand(rng, 1000)
-@test pai(x, y, 3, 1) isa Float64
-@test pai(x, y, 3, 1, :random) isa Vector{Float64}
-@test pai(x, y, 3, 1, :segment) isa Vector{Float64}
+@test association(x, y, 3, 1) isa Float64
+@test association(x, y, 3, 1, :random) isa Vector{Float64}
+@test association(x, y, 3, 1, :segment) isa Vector{Float64}
diff --git a/test/methods/recurrence/MCR.jl b/test/methods/recurrence/MCR.jl
index 4644fb778..a743fdeb2 100644
--- a/test/methods/recurrence/MCR.jl
+++ b/test/methods/recurrence/MCR.jl
@@ -1,21 +1,24 @@
-using StableRNGs
+using Test
 using StateSpaceSets
 using Distances: Chebyshev
-
-rng = StableRNG(1234)
-x = rand(rng, 200)
-y = rand(rng, 200)
-X = rand(rng, 200, 2) |> StateSpaceSet
-Y = rand(rng, 200, 3) |> StateSpaceSet
+using Random
+rng = Xoshiro(1234)
+n = 80
+x = rand(rng, n)
+y = rand(rng, n)
+z = rand(rng, n)
+X = rand(rng, n, 2) |> StateSpaceSet
+Y = rand(rng, n, 3) |> StateSpaceSet
 @test_throws UndefKeywordError MCR()
-@test mcr(MCR(; r = 0.5), x, y) isa Real
-@test mcr(MCR(; r = 0.5), x, Y) isa Real
-@test mcr(MCR(; r = 0.5), X, Y) isa Real
-@test mcr(MCR(; r = 0.5, metric = Chebyshev()), x, y) isa Real
-@test mcr(MCR(; r = 0.5, metric = Chebyshev()), X, Y) isa Real
+@test association(MCR(; r = 0.5), x, y, z) isa Real
+@test association(MCR(; r = 0.5), x, y) isa Real
+@test association(MCR(; r = 0.5), x, Y) isa Real
+@test association(MCR(; r = 0.5), X, Y) isa Real
+@test association(MCR(; r = 0.5, metric = Chebyshev()), x, y) isa Real
+@test association(MCR(; r = 0.5, metric = Chebyshev()), X, Y) isa Real
 
 
-test = SurrogateTest(MCR(r = 0.2); rng)
+test = SurrogateAssociationTest(MCR(r = 0.2); rng)
 α = 0.05
 # We should not be able to reject null for independent variables
 @test pvalue(independence(test, x, y)) >= α
@@ -30,7 +33,7 @@ z = x .+ y
 
 # Romano et al. claim that if A drives B, then ΔM(A | B) = M(A | B) - M(B | A) > 0
 m = MCR(; r = 0.5)
-Δxz = mcr(m, x, z) - mcr(m, z, x)
-Δyz = mcr(m, y, z) - mcr(m, y, x)
+Δxz = association(m, x, z) - association(m, z, x)
+Δyz = association(m, y, z) - association(m, y, x)
 @test Δxz > 0
 @test Δyz > 0
diff --git a/test/methods/recurrence/RMCD.jl b/test/methods/recurrence/RMCD.jl
index 1395c64f7..6cd4200a2 100644
--- a/test/methods/recurrence/RMCD.jl
+++ b/test/methods/recurrence/RMCD.jl
@@ -1,9 +1,10 @@
 using StableRNGs
 using StateSpaceSets
 using Distances: Chebyshev
+using DynamicalSystemsBase
 
 rng = StableRNG(1234)
-n = 100
+n = 80
 x = rand(rng, n)
 y = rand(rng, n)
 z = rand(rng, n)
@@ -13,22 +14,22 @@ Z = rand(rng, n, 2) |> StateSpaceSet
 
 @test_throws UndefKeywordError RMCD()
 
-@test rmcd(RMCD(; r = 0.5), x, y) >= 0
-@test rmcd(RMCD(; r = 0.5), x, Y) >= 0
-@test rmcd(RMCD(; r = 0.5), X, Y) >= 0
-@test rmcd(RMCD(; r = 0.5, metric = Chebyshev()), x, y) >= 0
-@test rmcd(RMCD(; r = 0.5, metric = Chebyshev()), X, Y) >= 0
+@test association(RMCD(; r = 0.5), x, y) >= 0
+@test association(RMCD(; r = 0.5), x, Y) >= 0
+@test association(RMCD(; r = 0.5), X, Y) >= 0
+@test association(RMCD(; r = 0.5, metric = Chebyshev()), x, y) >= 0
+@test association(RMCD(; r = 0.5, metric = Chebyshev()), X, Y) >= 0
 
-@test rmcd(RMCD(; r = 0.5), x, y, z) >= 0
-@test rmcd(RMCD(; r = 0.5), x, Y, z) >= 0
-@test rmcd(RMCD(; r = 0.5), X, Y, Z) >= 0
-@test rmcd(RMCD(; r = 0.5, metric = Chebyshev()), x, y, z) >= 0
-@test rmcd(RMCD(; r = 0.1, metric = Chebyshev()), X, Y, z) >= 0
-@test rmcd(RMCD(; r = 0.5), x, y, x) == 0
-@test rmcd(RMCD(; r = 0.5), x, y, y) == 0
+@test association(RMCD(; r = 0.5), x, y, z) >= 0
+@test association(RMCD(; r = 0.5), x, Y, z) >= 0
+@test association(RMCD(; r = 0.5), X, Y, Z) >= 0
+@test association(RMCD(; r = 0.5, metric = Chebyshev()), x, y, z) >= 0
+@test association(RMCD(; r = 0.1, metric = Chebyshev()), X, Y, z) >= 0
+@test association(RMCD(; r = 0.5), x, y, x) == 0
+@test association(RMCD(; r = 0.5), x, y, y) == 0
 
 # We should not be able to reject null for independent variables
-test = SurrogateTest(RMCD(r = 0.5); rng, nshuffles = 50)
+test = SurrogateAssociationTest(RMCD(r = 0.5); rng, nshuffles = 50)
 
 @test pvalue(independence(test, x, y)) >= α
 @test pvalue(independence(test, X, Y)) >= α
@@ -36,8 +37,8 @@ test = SurrogateTest(RMCD(r = 0.5); rng, nshuffles = 50)
 
 # Test on a dynamical system.
 sys = system(Logistic4Chain(; xi = rand(rng, 4), rng))
-x, y, z, w = columns(first(trajectory(sys, 200, Ttr = 10000)));
-test = LocalPermutationTest(RMCD(r = 0.5); rng)
+x, y, z, w = columns(first(trajectory(sys, n, Ttr = 10000)));
+test = LocalPermutationTest(RMCD(r = 0.5); rng, nshuffles = 19)
 
 # X and Z are independent given Y, so we shouldn't be able to reject the null (p > α).
 pval = pvalue(independence(test, x, z, y))
diff --git a/test/runtests.jl b/test/runtests.jl
index 116f79b80..cb150894b 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,12 +4,9 @@ defaultname(file) = uppercasefirst(replace(splitext(basename(file))[1], '_' => '
 testfile(file, testname=defaultname(file)) = @testset "$testname" begin; include(file); end
 
 @testset "CausalityTools.jl" begin
-    testfile("core.jl")
+    include("test_systems.jl")
+    testfile("deprecations.jl")
     testfile("methods/methods.jl")
-    testfile("utils.jl")
     testfile("independence/independence.jl")
     testfile("causal_graphs/oce.jl")
-    testfile("systems/systems.jl")
 end
-
-#include("integrations/test_uncertaindata_integration.jl")
diff --git a/test/systems/continuous.jl b/test/systems/continuous.jl
deleted file mode 100644
index d0ae8c102..000000000
--- a/test/systems/continuous.jl
+++ /dev/null
@@ -1,16 +0,0 @@
-using StateSpaceSets: StateSpaceSet
-using DynamicalSystemsBase: ContinuousDynamicalSystem
-@test ContinuousDefinition <: SystemDefinition
-
-@test trajectory(system(ChuaCircuitsBidir6()), 10, Δt = 0.05)[1] isa StateSpaceSet{6}
-@test trajectory(system(ChuaScrollSine3()), 10, Δt = 0.05)[1] isa StateSpaceSet{3}
-@test trajectory(system(HindmarshRose3()), 10, Δt = 0.05)[1] isa StateSpaceSet{3}
-@test trajectory(system(LorenzBidir6()), 10, Δt = 0.05)[1] isa StateSpaceSet{6}
-@test trajectory(system(LorenzForced9()), 10, Δt = 0.05)[1] isa StateSpaceSet{9}
-@test trajectory(system(LorenzTransitive9()), 10, Δt = 0.05)[1] isa StateSpaceSet{9}
-@test trajectory(system(MediatedLink9()), 10, Δt = 0.05)[1] isa StateSpaceSet{9}
-@test trajectory(system(Repressilator6()), 10, Δt = 0.05)[1] isa StateSpaceSet{6}
-@test trajectory(system(RosslerBidir6()), 10, Δt = 0.05)[1] isa StateSpaceSet{6}
-@test trajectory(system(RosslerForced9()), 10, Δt = 0.05)[1] isa StateSpaceSet{9}
-@test trajectory(system(RosslerLorenzUnidir6()), 10, Δt = 0.05)[1] isa StateSpaceSet{6}
-@test trajectory(system(Thomas3()), 10, Δt = 0.05)[1] isa StateSpaceSet{3}
diff --git a/test/systems/discrete.jl b/test/systems/discrete.jl
deleted file mode 100644
index c4d7a0847..000000000
--- a/test/systems/discrete.jl
+++ /dev/null
@@ -1,36 +0,0 @@
-using StateSpaceSets: StateSpaceSet
-using DynamicalSystemsBase: DiscreteDynamicalSystem
-@test DiscreteDefinition <: SystemDefinition
-
-@test Anishchenko() isa DiscreteDefinition
-@test AR1Unidir() isa DiscreteDefinition
-@test AR1Bidir() isa DiscreteDefinition
-@test Henon2() isa DiscreteDefinition
-@test Henon3() isa LaggedDiscreteDefinition
-@test Ikeda2() isa DiscreteDefinition
-@test ChaoticMaps3() isa DiscreteDefinition
-@test ChaoticNoisyLinear2() isa LaggedDiscreteDefinition
-@test Logistic2Unidir() isa DiscreteDefinition
-@test Logistic2Bidir() isa DiscreteDefinition
-@test Logistic3CommonDriver() isa DiscreteDefinition
-@test Logistic4Chain() isa DiscreteDefinition
-@test Nonlinear3() isa DiscreteDefinition
-
-n = 50
-@test trajectory(system(Anishchenko()), n)[1] isa StateSpaceSet
-@test trajectory(system(AR1Unidir()), n)[1] isa StateSpaceSet
-@test trajectory(system(AR1Bidir()), n)[1] isa StateSpaceSet
-@test trajectory(system(Henon2()), n)[1] isa StateSpaceSet
-@test trajectory(system(Henon3()), n)[1] isa StateSpaceSet
-@test trajectory(system(Ikeda2()), n)[1] isa StateSpaceSet
-@test trajectory(system(ChaoticMaps3()), n)[1] isa StateSpaceSet
-@test trajectory(system(ChaoticNoisyLinear2()), n)[1] isa StateSpaceSet
-@test trajectory(system(Logistic2Unidir()), n)[1] isa StateSpaceSet
-@test trajectory(system(Logistic2Bidir()), n)[1] isa StateSpaceSet
-@test trajectory(system(Logistic3CommonDriver()), n)[1] isa StateSpaceSet
-@test trajectory(system(Logistic4Chain()), n)[1] isa StateSpaceSet
-@test trajectory(system(Nonlinear3()), n)[1] isa StateSpaceSet
-@test trajectory(system(Peguin2()), n)[1] isa StateSpaceSet
-@test trajectory(system(UlamLattice()), n)[1] isa StateSpaceSet
-@test trajectory(system(Var1()), n)[1] isa StateSpaceSet
-@test trajectory(system(Verdes3()), n)[1] isa StateSpaceSet
diff --git a/test/systems/systems.jl b/test/systems/systems.jl
deleted file mode 100644
index 395c7b76e..000000000
--- a/test/systems/systems.jl
+++ /dev/null
@@ -1,2 +0,0 @@
-testfile("continuous.jl")
-testfile("discrete.jl")
diff --git a/test/test_systems.jl b/test/test_systems.jl
new file mode 100644
index 000000000..4d121cecb
--- /dev/null
+++ b/test/test_systems.jl
@@ -0,0 +1,87 @@
+using DynamicalSystemsBase
+using Random
+rng = Random.MersenneTwister(1234)
+
+Base.@kwdef struct Logistic2Unidir{V, C, R1, R2, Σy, R}
+    xi::V = [0.5, 0.5]
+    c_xy::C = 0.1
+    r₁::R1 = 3.78
+    r₂::R2 = 3.66
+    σ_xy::Σy = 0.05
+    rng::R = Random.default_rng()
+end
+
+function eom_logistic2uni(u, p::Logistic2Unidir, t)
+    (; xi, c_xy, r₁, r₂, σ_xy, rng) = p
+    x, y = u
+    f_xy = (y +  (c_xy*(x + σ_xy * rand(rng))/2) ) / (1 + (c_xy/2)*(1+σ_xy))
+
+    dx = r₁ * x * (1 - x)
+    dy = r₂ * (f_xy) * (1 - f_xy)
+    return SVector{2}(dx, dy)
+end
+
+
+function system(definition::Logistic2Unidir)
+    return DiscreteDynamicalSystem(eom_logistic2uni, definition.xi, definition)
+end
+
+Base.@kwdef struct Logistic4Chain{V, RX, RY, RZ, RW, C1, C2, C3, Σ1, Σ2, Σ3, RNG}
+    xi::V = [0.1, 0.2, 0.3, 0.4]
+    rx::RX = 3.9
+    ry::RY = 3.6
+    rz::RZ = 3.6
+    rw::RW = 3.8
+    c_xy::C1 = 0.4
+    c_yz::C2 = 0.4
+    c_zw::C3 = 0.35
+    σ_xy::Σ1 = 0.05
+    σ_yz::Σ2 = 0.05
+    σ_zw::Σ3 = 0.05
+    rng::RNG = Random.default_rng()
+end
+
+function system(definition::Logistic4Chain)
+    return DiscreteDynamicalSystem(eom_logistic4_chain, definition.xi, definition)
+end
+
+function eom_logistic4_chain(u, p::Logistic4Chain, t)
+    (; xi, rx, ry, rz, rw, c_xy, c_yz, c_zw, σ_xy, σ_yz, σ_zw, rng) = p
+    x, y, z, w = u
+    f_xy = (y +  c_xy*(x + σ_xy * rand(rng)) ) / (1 + c_xy*(1+σ_xy))
+    f_yz = (z +  c_yz*(y + σ_yz * rand(rng)) ) / (1 + c_yz*(1+σ_yz))
+    f_zw = (w +  c_zw*(z + σ_zw * rand(rng)) ) / (1 + c_zw*(1+σ_zw))
+    dx = rx * x * (1 - x)
+    dy = ry * (f_xy) * (1 - f_xy)
+    dz = rz * (f_yz) * (1 - f_yz)
+    dw = rw * (f_zw) * (1 - f_zw)
+    return SVector{4}(dx, dy, dz, dw)
+end
+
+Base.@kwdef struct Logistic2Bidir{V, C1, C2, R1, R2, Σx, Σy, R}
+    xi::V = [0.5, 0.5]
+    c_xy::C1 = 0.1
+    c_yx::C2 = 0.1
+    r₁::R1 = 3.78
+    r₂::R2 = 3.66
+    σ_xy::Σx = 0.05
+    σ_yx::Σy = 0.05
+    rng::R = Random.default_rng()
+end
+
+function system(definition::Logistic2Bidir)
+    return DiscreteDynamicalSystem(eom_logistic2bidir, definition.xi, definition)
+end
+
+# Note: Until the `eom_logistic2_bidir` function is deprecated, this function must
+# be called something different; otherwise the DiscreteDynamicalSystem constructor
+# doesn't work.
+function eom_logistic2bidir(u, p::Logistic2Bidir, t)
+    (; xi, c_xy, c_yx, r₁, r₂, σ_xy, σ_yx, rng) = p
+    x, y = u
+    f_xy = (y +  c_xy*(x + σ_xy * rand(rng)) ) / (1 + c_xy*(1+σ_xy))
+    f_yx = (x +  c_yx*(y + σ_yx * rand(rng)) ) / (1 + c_yx*(1+σ_yx))
+    dx = r₁ * (f_yx) * (1 - f_yx)
+    dy = r₂ * (f_xy) * (1 - f_xy)
+    return SVector{2}(dx, dy)
+end
\ No newline at end of file
diff --git a/test/utils.jl b/test/utils.jl
deleted file mode 100644
index e09f0c4ab..000000000
--- a/test/utils.jl
+++ /dev/null
@@ -1,6 +0,0 @@
-using Test
-using Statistics
-
-D = StateSpaceSet(rand(100, 3))
-@test all(fastcov(D) .≈ cov(Matrix(D)))
-@test all(fastcor(D) .≈ cor(Matrix(D)))