diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3b5f5914..452f4a44 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -29,7 +29,7 @@ jobs:
           access_token: ${{ github.token }}
 
       # Do tests
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v5
       - uses: julia-actions/setup-julia@v1
         with:
           version: ${{ matrix.version }}
diff --git a/Project.toml b/Project.toml
index c2a410e1..9c913dc4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -2,7 +2,7 @@ name = "Associations"
 uuid = "614afb3a-e278-4863-8805-9959372b9ec2"
 authors = ["Kristian Agasøster Haaga <kahaaga@gmail.com>", "Tor Einar Møller <temolle@gmail.com>", "George Datseris <datseris.george@gmail.com>"]
 repo = "https://github.com/kahaaga/Associations.jl.git"
-version = "4.4.2"
+version = "4.5.0"
 
 [deps]
 Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
@@ -31,8 +31,8 @@ TimeseriesSurrogates = "c804724b-8c18-5caa-8579-6025a0767c70"
 [compat]
 Accessors = "^0.1.28"
 Combinatorics = "1"
-ComplexityMeasures = "~3.7"
-DSP = "^0.7"
+ComplexityMeasures = "3.8"
+DSP = "^0.7, 0.8"
 DelayEmbeddings = "2.9"
 Distances = "^0.10"
 Distributions = "^0.25"
@@ -49,4 +49,4 @@ StaticArrays = "^1"
 Statistics = "1"
 StatsBase = "^0.34"
 TimeseriesSurrogates = "2.7"
-julia = "^1.10.6"
+julia = "^1.10.10"
diff --git a/docs/Project.toml b/docs/Project.toml
index c3872cdf..ce89e3e0 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -25,5 +25,6 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 TimeseriesSurrogates = "c804724b-8c18-5caa-8579-6025a0767c70"
 
 [compat]
+ComplexityMeasures = "^3.8"
 DynamicalSystemsBase = "3"
-julia = "^1.10.6"
+julia = "^1.10.10"
diff --git a/docs/build_docs_with_style.jl b/docs/build_docs_with_style.jl
index 33827955..6a43a883 100644
--- a/docs/build_docs_with_style.jl
+++ b/docs/build_docs_with_style.jl
@@ -1,40 +1,40 @@
 CI = get(ENV, "CI", nothing) == "true" || get(ENV, "GITHUB_TOKEN", nothing) !== nothing
 
- import Pkg
- Pkg.pkg"add Documenter@1"
+import Pkg
+Pkg.pkg"add Documenter@1"
 
- # Load documenter
- using Documenter
- using DocumenterTools: Themes
- ENV["JULIA_DEBUG"] = "Documenter"
+# Load documenter
+using Documenter
+using DocumenterTools: Themes
+ENV["JULIA_DEBUG"] = "Documenter"
 
- # For easier debugging when downloading from a specific branch.
- github_user = "JuliaDynamics"
- branch = "master"
- download_path = "https://raw.githubusercontent.com/$github_user/doctheme/$branch"
+# For easier debugging when downloading from a specific branch.
+github_user = "JuliaDynamics"
+branch = "master"
+download_path = "https://raw.githubusercontent.com/$github_user/doctheme/$branch"
 
- import Downloads
- for file in ("juliadynamics-lightdefs.scss", "juliadynamics-darkdefs.scss", "juliadynamics-style.scss")
-     Downloads.download("$download_path/$file", joinpath(@__DIR__, file))
- end
+import Downloads
+for file in ("juliadynamics-lightdefs.scss", "juliadynamics-darkdefs.scss", "juliadynamics-style.scss")
+    Downloads.download("$download_path/$file", joinpath(@__DIR__, file))
+end
 
- # create the themes
- for w in ("light", "dark")
-     header = read(joinpath(@__DIR__, "juliadynamics-style.scss"), String)
-     theme = read(joinpath(@__DIR__, "juliadynamics-$(w)defs.scss"), String)
-     write(joinpath(@__DIR__, "juliadynamics-$(w).scss"), header*"\n"*theme)
- end
+# create the themes
+for w in ("light", "dark")
+    header = read(joinpath(@__DIR__, "juliadynamics-style.scss"), String)
+    theme = read(joinpath(@__DIR__, "juliadynamics-$(w)defs.scss"), String)
+    write(joinpath(@__DIR__, "juliadynamics-$(w).scss"), header * "\n" * theme)
+end
 
- # compile the themes
- Themes.compile(joinpath(@__DIR__, "juliadynamics-light.scss"), joinpath(@__DIR__, "src/assets/themes/documenter-light.css"))
- Themes.compile(joinpath(@__DIR__, "juliadynamics-dark.scss"), joinpath(@__DIR__, "src/assets/themes/documenter-dark.css"))
+# compile the themes
+Themes.compile(joinpath(@__DIR__, "juliadynamics-light.scss"), joinpath(@__DIR__, "src/assets/themes/documenter-light.css"))
+Themes.compile(joinpath(@__DIR__, "juliadynamics-dark.scss"), joinpath(@__DIR__, "src/assets/themes/documenter-dark.css"))
 
- # Download and apply CairoMakie plotting style
- using CairoMakie
- Downloads.download("$download_path/style.jl", joinpath(@__DIR__, "style.jl"))
- include("style.jl")
+# Download and apply CairoMakie plotting style
+using CairoMakie
+Downloads.download("$download_path/style.jl", joinpath(@__DIR__, "style.jl"))
+include("style.jl")
 
- using DocumenterInterLinks
+using DocumenterInterLinks
 
 links = InterLinks(
     "ComplexityMeasures" => (
@@ -49,46 +49,48 @@ links = InterLinks(
     ),
 );
 
- function build_docs_with_style(pages, modules...; bib = nothing, authors = "George Datseris", draft = false, kwargs...)
-     settings = (
-         modules = [modules...],
-         format = Documenter.HTML(
-             prettyurls = CI,
-             assets = [
-                 asset("https://fonts.googleapis.com/css?family=Montserrat|Source+Code+Pro&display=swap", class=:css),
-             ],
-             collapselevel = 3,
-             size_threshold = 1250 * 2^10, 
-             size_threshold_warn = 750 * 2^10,
-             example_size_threshold = 100 * 2^10,
-             size_threshold_ignore = [
-                "examples/examples_associations.md", 
+function build_docs_with_style(pages, modules...; bib=nothing,
+    authors="Kristian Haaga, George Datseris and others", draft=false, kwargs...)
+    settings = (
+        modules=[modules...],
+        format=Documenter.HTML(
+            prettyurls=CI,
+            assets=[
+                asset("https://fonts.googleapis.com/css?family=Montserrat|Source+Code+Pro&display=swap", class=:css),
+            ],
+            collapselevel=3,
+            size_threshold=1250 * 2^10,
+            size_threshold_warn=750 * 2^10,
+            example_size_threshold=100 * 2^10,
+            size_threshold_ignore=[
+                "examples/examples_associations.md",
                 "examples/examples_independence.md",
                 "examples/examples_infer_graphs.md",
-             ]
-         ),
-         sitename = "$(modules[1]).jl",
-         authors,
-         pages,
-         draft,
-         doctest = false,
-         warnonly = false,
-         checkdocs = :exported,
-         kwargs...
-     )
+            ]
+        ),
+        sitename="$(modules[1]).jl",
+        authors,
+        pages,
+        draft,
+        doctest=false,
+        checkdocs=:exported,
+        # The following Documenter fails will NOT ERROR the docbuild!
+        warnonly=[:doctest, :missing_docs],
+        kwargs...
+    )
 
-     if isnothing(bib)
-         makedocs(; settings...)
-     else
-         makedocs(; plugins=[bib, links], settings...)
-     end
+    if isnothing(bib)
+        makedocs(; plugins=[links], settings...)
+    else
+        makedocs(; plugins=[links, bib], settings...)
+    end
 
-     if CI
-         deploydocs(
-             repo = "github.com/JuliaDynamics/$(modules[1]).jl.git",
-             target = "build",
-             push_preview = true
-         )
-     end
+    if CI
+        deploydocs(
+            repo="github.com/JuliaDynamics/$(modules[1]).jl.git",
+            target="build",
+            push_preview=true
+        )
+    end
 
- end 
\ No newline at end of file
+end
\ No newline at end of file
diff --git a/docs/make.jl b/docs/make.jl
index 37f471c1..6db62037 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -25,7 +25,7 @@ pages = [
         "api/cross_map_api.md",
     ],
     "Examples" => [
-        "Associations" => "examples/examples_associations.md", 
+        "Associations" => "examples/examples_associations.md",
         "Independence testing" => "examples/examples_independence.md",
         "Causal graph inference" => "examples/examples_infer_graphs.md",
     ],
@@ -43,9 +43,9 @@ bibliography = CitationBibliography(
     style=:authoryear
 )
 
-build_docs_with_style(pages, Associations, ComplexityMeasures, StateSpaceSets;
-    expandfirst = ["index.md"],
-    bib = bibliography,
-    pages = pages,
-    authors = "Kristian Agasøster Haaga, David Diego, Tor Einar Møller, George Datseris",
+build_docs_with_style(pages, Associations;
+    expandfirst=["index.md"],
+    bib=bibliography,
+    pages=pages,
+    authors="Kristian Agasøster Haaga, David Diego, Tor Einar Møller, George Datseris",
 )
diff --git a/docs/src/api/counts_and_probabilities_api.md b/docs/src/api/counts_and_probabilities_api.md
index 794e0cdf..b624ba23 100644
--- a/docs/src/api/counts_and_probabilities_api.md
+++ b/docs/src/api/counts_and_probabilities_api.md
@@ -1,20 +1,18 @@
 
 ```@meta
-CollapsedDocStrings = true
+CollapsedDocStrings = false
 ```
 
 # [Multivariate counts and probabilities API](@id counts_and_probabilities_api)
 
 For counting and probabilities, Associations.jl extends the single-variable machinery
-in ComplexityMeasures.jl to multiple variables.
+in ComplexityMeasures.jl to multiple variables. See the following types:
 
-```@docs
-Associations.Counts
-Associations.counts(::OutcomeSpace)
-```
+- [`Counts`](@extref ComplexityMeasures.Counts)
+- [`Probabilities`](@extref ComplexityMeasures.Probabilities)
 
 ```@docs
-Associations.Probabilities
+Associations.counts(::OutcomeSpace)
 Associations.probabilities(::OutcomeSpace)
 ```
 
@@ -24,10 +22,10 @@ The utility function [`marginal`](@ref) is also useful.
 marginal
 ```
 
-## [Example: estimating [`Counts`](@ref) and [`Probabilities`](@ref)](@id tutorial_probabilities)
+## [Example: estimating [`Counts`](@extref ComplexityMeasures.Counts) and [`Probabilities`](@extref ComplexityMeasures.Probabilities)](@id tutorial_probabilities)
 
 Estimating multivariate counts (contingency matrices) and PMFs is simple. If the data are pre-discretized, then
-we can use [`UniqueElements`](@ref) to simply count the number of occurrences.
+we can use [`UniqueElements`](@extref ComplexityMeasures.UniqueElements) to simply count the number of occurrences.
 
 ```@example counts_probs_tutorial
 using Associations
@@ -47,7 +45,7 @@ probabilities(discretization, x, y, z)
 ```
 
 For numerical data, we can estimate both counts and probabilities using [`CodifyVariables`](@ref)
-with any count-based [`OutcomeSpace`](@ref).
+with any count-based [`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace).
 
 ```@example counts_probs_tutorial
 using Associations
@@ -56,7 +54,7 @@ discretization = CodifyVariables(BubbleSortSwaps(m = 4))
 probabilities(discretization, x, y)
 ```
 
-For more fine-grained control, we can use [`CodifyPoints`](@ref) with one or several [`Encoding`](@ref)s.
+For more fine-grained control, we can use [`CodifyPoints`](@ref) with one or several [`Encoding`](@extref ComplexityMeasures.Encoding)s.
 
 ```@example counts_probs_tutorial
 using Associations
diff --git a/docs/src/api/cross_map_api.md b/docs/src/api/cross_map_api.md
index 469280bb..878e26c8 100644
--- a/docs/src/api/cross_map_api.md
+++ b/docs/src/api/cross_map_api.md
@@ -1,5 +1,5 @@
 ```@meta
-CollapsedDocStrings = true
+CollapsedDocStrings = false
 ```
 
 # Cross-map API
diff --git a/docs/src/api/discretization_counts_probs_api.md b/docs/src/api/discretization_counts_probs_api.md
index d74770b5..89bacd45 100644
--- a/docs/src/api/discretization_counts_probs_api.md
+++ b/docs/src/api/discretization_counts_probs_api.md
@@ -1,5 +1,5 @@
 ```@meta
-CollapsedDocStrings = true
+CollapsedDocStrings = false
 ```
 
 # Discretization API
@@ -9,7 +9,7 @@ CollapsedDocStrings = true
 A fundamental operation when computing multivariate information measures from data is *discretization*. 
 When discretizing, what happens is that we "encode" input data into an intermediate representation indexed by the positive integers. This intermediate representation is called an "encoding". This is useful in several ways:
 
-- Once a dataset has been encoded into integers, we can estimate [`Counts`](@ref) or [`Probabilities`](@ref) ([tutorial](@ref tutorial_probabilities)).
+- Once a dataset has been encoded into integers, we can estimate [`Counts`](@extref ComplexityMeasures.Counts) or [`Probabilities`](@extref ComplexityMeasures.Probabilities) ([tutorial](@ref tutorial_probabilities)).
 - Once probabilities have been estimated, one can use these to estimate [`MultivariateInformationMeasure`](@ref) ([tutorial](@ref tutorial_infomeasures)).
 
  The following functions and types are used by Associations.jl to perform discretization of input data.
@@ -24,12 +24,12 @@ codify
 In summary, the two main ways of discretizing data in Associations are as follows.
 
 - The [`CodifyPoints`](@ref) discretization scheme encodes input data on a point-by-point 
-    basis by applying some [`Encoding`](@ref) to each point.
+    basis by applying some [`Encoding`](@extref ComplexityMeasures.Encoding) to each point.
 - The [`CodifyVariables`](@ref) discretization scheme encodes input data on a column-by-column
-    basis by applying a sliding window to each column, and encoding the data within the sliding window according to some [`OutcomeSpace`](@ref) (*Internally, this uses [`codify`](@ref)*).
+    basis by applying a sliding window to each column, and encoding the data within the sliding window according to some [`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace) (*Internally, this uses [`codify`](@ref)*).
 
 !!! note 
-    [`Encoding`](@ref), [`OutcomeSpace`](@ref) and [`codify`](@ref) are all from
+    [`Encoding`](@extref ComplexityMeasures.Encoding), [`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace) and [`codify`](@ref) are all from
     [ComplexityMeasures.jl](https://github.com/JuliaDynamics/ComplexityMeasures.jl).
     In this package, they are used to discretize multiple input variables instead of just one input
     variable.
@@ -38,32 +38,30 @@ In summary, the two main ways of discretizing data in Associations are as follow
 ### Encoding per point/row
 
 In some cases, it may be desireable to encode data on a row-wise basis. This 
-typically happens when working with pre-embedded time series or [`StateSpaceSet`](@ref)s 
+typically happens when working with pre-embedded time series or [`StateSpaceSet`](@extref StateSpaceSets.StateSpaceSet)s 
 (respecting the fact that time ordering is already taken care of by the 
 embedding procedure). 
-If we want to apply something like [`OrdinalPatternEncoding`](@ref) to such data, then 
+If we want to apply something like [`OrdinalPatternEncoding`](@extref ComplexityMeasures.OrdinalPatternEncoding) to such data, then 
 we must encode each *point* individually, such that vectors like `[1.2, 2.4, 4.5]` or 
 `["howdy", "partner"]` gets mapped to an integer. The [`CodifyPoints`](@ref) discretization 
 intstruction ensures input data are encoded on a point-by-point basis.
 
-A point-by-point discretization using [`CodifyPoints`](@ref) is formally done by applying some [`Encoding`](@ref) to each input data point. You can pick between the following encodings, or combine 
-them in arbitrary ways using [`CombinationEncoding`](@ref).
+A point-by-point discretization using [`CodifyPoints`](@ref) is formally done by applying some [`Encoding`](@extref ComplexityMeasures.Encoding) to each input data point. You can pick between the following encodings, or combine 
+them in arbitrary ways using [`CombinationEncoding`](@extref ComplexityMeasures.CombinationEncoding).
 
-```@docs
-Encoding
-GaussianCDFEncoding
-OrdinalPatternEncoding
-RelativeMeanEncoding
-RelativeFirstDifferenceEncoding
-UniqueElementsEncoding
-RectangularBinEncoding
-CombinationEncoding
-```
+- [`Encoding`](@extref ComplexityMeasures.Encoding)
+- [`GaussianCDFEncoding`](@extref ComplexityMeasures.GaussianCDFEncoding)
+- [`OrdinalPatternEncoding`](@extref ComplexityMeasures.OrdinalPatternEncoding)
+- [`RelativeMeanEncoding`](@extref ComplexityMeasures.RelativeMeanEncoding)
+- [`RelativeFirstDifferenceEncoding`](@extref ComplexityMeasures.RelativeFirstDifferenceEncoding)
+- [`UniqueElementsEncoding`](@extref ComplexityMeasures.UniqueElementsEncoding)
+- [`RectangularBinEncoding`](@extref ComplexityMeasures.RectangularBinEncoding)
+- [`CombinationEncoding`](@extref ComplexityMeasures.CombinationEncoding)
 
 #### [Examples: encoding *rows* (one *point* at a time)](@id tutorial_codify_points)
 
-We'll here use the [`OrdinalPatternEncoding`](@ref) with differing parameter `m` to encode 
-multiple [`StateSpaceSet`](@ref) of differing dimensions.
+We'll here use the [`OrdinalPatternEncoding`](@extref ComplexityMeasures.OrdinalPatternEncoding) with differing parameter `m` to encode 
+multiple [`StateSpaceSet`](@extref StateSpaceSets.StateSpaceSet) of differing dimensions.
 
 ```@example example_encode_points
 using Associations
@@ -105,27 +103,26 @@ This typically happens when the input are either a single or multiple timeseries
 To encode columns, we move a sliding window across each input variable/column and 
 encode points within that window. Formally, such a sliding-window discretization 
 is done by using the [`CodifyVariables`](@ref) discretization scheme, which takes
-as input some [`OutcomeSpace`](@ref) that dictates how each window is encoded, and 
+as input some [`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace) that dictates how each window is encoded, and 
 also dictates the width of the encoding windows. 
 
 For column/variable-wise encoding, you can pick between the following outcome spaces.
+- [`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace)
+- [`UniqueElements`](@extref ComplexityMeasures.UniqueElements)
+- [`CosineSimilarityBinning`](@extref ComplexityMeasures.CosineSimilarityBinning)
+- [`Dispersion`](@extref ComplexityMeasures.Dispersion)
+- [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns)
+- [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns)
+- [`BubbleSortSwaps`](@extref ComplexityMeasures.BubbleSortSwaps)
+- [`ValueBinning`](@extref ComplexityMeasures.ValueBinning)
+- [`RectangularBinning`](@extref ComplexityMeasures.RectangularBinning)
+- [`FixedRectangularBinning`](@extref ComplexityMeasures.FixedRectangularBinning)
 
-```@docs
-OutcomeSpace
-UniqueElements
-CosineSimilarityBinning
-Dispersion
-OrdinalPatterns
-BubbleSortSwaps
-ValueBinning
-RectangularBinning
-FixedRectangularBinning
-```
 
 #### Example: encoding *columns* (one variable at a time)
 
-Some [`OutcomeSpace`](@ref)s dictate a sliding window which has the width of one element
-when used with [`CodifyVariables`](@ref). [`ValueBinning`](@ref) is such an outcome space.
+Some [`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace)s dictate a sliding window which has the width of one element
+when used with [`CodifyVariables`](@ref). [`ValueBinning`](@extref ComplexityMeasures.ValueBinning) is such an outcome space.
 
 ```@example example_encode_vars
 using Associations
@@ -136,17 +133,17 @@ o = ValueBinning(3)
 cx = codify(CodifyVariables(o), x)
 ```
 
-We can verify that [`ValueBinning`](@ref) preserves the cardinality of the input dataset.
+We can verify that [`ValueBinning`](@extref ComplexityMeasures.ValueBinning) preserves the cardinality of the input dataset.
 
 ```@example example_encode_vars
 length(x) == length(cx)
 ```
 
-Other outcome spaces such as [`Dispersion`](@ref) or [`OrdinalPatterns`](@ref) do not 
+Other outcome spaces such as [`Dispersion`](@extref ComplexityMeasures.Dispersion) or [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns) do not 
 preserve the cardinality of the input dataset when used with [`CodifyVariables`](@ref). This is 
 because when they are applied in a sliding window, they compress sliding windows consisting of 
 potentially multiple points into single integers. This means that some points at the 
-end of each input variable are lost. For example, with [`OrdinalPatterns`](@ref), the number 
+end of each input variable are lost. For example, with [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns), the number 
 of encoded points decrease with the embedding parameter `m`.
 
 ```@example example_encode_vars
@@ -158,7 +155,7 @@ o = OrdinalPatterns(m = 3)
 cx = codify(CodifyVariables(o), x)
 ```
 
-We can simultaneously encode multiple variable/columns of a [`StateSpaceSet`](@ref) using 
+We can simultaneously encode multiple variable/columns of a [`StateSpaceSet`](@extref StateSpaceSets.StateSpaceSet) using 
 the same outcome space, as long as the operation will result in the *same* number of encoded 
 data points for each column.
 
diff --git a/docs/src/api/information_multivariate_api.md b/docs/src/api/information_multivariate_api.md
index 1d79b318..71fea2a3 100644
--- a/docs/src/api/information_multivariate_api.md
+++ b/docs/src/api/information_multivariate_api.md
@@ -1,5 +1,5 @@
 ```@meta
-CollapsedDocStrings = true
+CollapsedDocStrings = false
 ```
 
 # [Multivariate information API](@id information_api)
@@ -110,7 +110,7 @@ find those that we implement [here](@ref divergences_and_distances).
 
 As an example, let's quantify the [`KLDivergence`](@ref) between two probability 
 mass functions estimated by symbolizing two input vectors `x` and `y` using 
-[`OrdinalPatterns`](@ref). Since the discrete [`KLDivergence`](@ref) can be 
+[`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns). Since the discrete [`KLDivergence`](@ref) can be 
 expressed as a function of a joint pmf, we can use the [`JointProbabilities`](@ref)
 estimator.
 
@@ -157,7 +157,7 @@ association(est, x, y)
 
 [Joint entropies](@ref joint_entropies), on the other hand, are *symmetric*. Joint
 entropies are functionals of a joint pmf, so we can still use the
-[`JointProbabilities`](@ref) estimator. This time, we use a [`Dispersion`](@ref)
+[`JointProbabilities`](@ref) estimator. This time, we use a [`Dispersion`](@extref ComplexityMeasures.Dispersion)
 based discretization.
 
 ```@example INFO_TUTORIAL
@@ -196,7 +196,7 @@ association(est, x, y) ≈ association(est, y, x) # should be true
 
 One can also estimate mutual information using the [`EntropyDecomposition`](@ref) 
 estimator, or (like above) using the [`JointProbabilities`](@ref) estimator.
-Let's construct a differential entropy based estimator based on the [`Kraskov`](@ref)
+Let's construct a differential entropy based estimator based on the [`Kraskov`](@extref ComplexityMeasures.Kraskov)
 estimator.
 
 ```@example INFO_TUTORIAL
@@ -207,8 +207,8 @@ est_diff = EntropyDecomposition(MIShannon(base = 2), Kraskov(Shannon(), k=10))
 association(est_diff, x, y)
 ```
 
-We can also construct a discrete entropy based estimator based on e.g. [`PlugIn`](@ref)
-estimator of [`Shannon`](@ref) entropy.
+We can also construct a discrete entropy based estimator based on e.g. [`PlugIn`](@extref ComplexityMeasures.PlugIn)
+estimator of [`Shannon`](@extref ComplexityMeasures.Shannon) entropy.
 
 ```@example INFO_TUTORIAL
 # We know that `x` and `y` were generated from a uniform distribution above,
@@ -224,7 +224,7 @@ association(est_disc, x, y)
 
 
 For numerical data, we can estimate both counts and probabilities using [`CodifyVariables`](@ref)
-with any count-based [`OutcomeSpace`](@ref). Here, we'll estimate [`MIShannon`](@ref) using 
+with any count-based [`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace). Here, we'll estimate [`MIShannon`](@ref) using 
 one type of encoding for the first variable, and another type of encoding for the second variable.
 
 ```@example counts_probs_tutorial
@@ -244,7 +244,7 @@ est = JointProbabilities(MIShannon(), discretization)
 association(est, x, y)
 ```
 
-For more fine-grained control than [`CodifyVariables`](@ref) can offer, we can use [`CodifyPoints`](@ref) with one or several [`Encoding`](@ref)s. Here's how we can estimate [`MIShannon`](@ref) one multivariate input 
+For more fine-grained control than [`CodifyVariables`](@ref) can offer, we can use [`CodifyPoints`](@ref) with one or several [`Encoding`](@extref ComplexityMeasures.Encoding)s. Here's how we can estimate [`MIShannon`](@ref) one multivariate input 
 data by discretizing each input variable in arbitrary ways.
 
 ```@example counts_probs_tutorial
diff --git a/docs/src/api/information_single_variable_api.md b/docs/src/api/information_single_variable_api.md
index 35ce51be..88e6918c 100644
--- a/docs/src/api/information_single_variable_api.md
+++ b/docs/src/api/information_single_variable_api.md
@@ -1,5 +1,5 @@
 ```@meta
-CollapsedDocStrings = true
+CollapsedDocStrings = false
 ```
 
 # Single-variable information API
@@ -14,40 +14,36 @@ information
 
 ## Single-variable information measures
 
-```@docs
-Shannon
-Renyi
-Tsallis
-Kaniadakis
-```
+- [`Shannon`](@extref ComplexityMeasures.Shannon)
+- [`Renyi`](@extref ComplexityMeasures.Renyi)
+- [`Tsallis`](@extref ComplexityMeasures.Tsallis)
+- [`Kaniadakis`](@extref ComplexityMeasures.Kaniadakis)
 
 ## Discrete information estimators
 
-```@docs
-DiscreteInfoEstimator
-PlugIn
-MillerMadow
-Schuermann
-GeneralizedSchuermann
-Jackknife
-HorvitzThompson
-ChaoShen
-```
+
+- [`DiscreteInfoEstimator`](@extref ComplexityMeasures.DiscreteInfoEstimator)
+- [`PlugIn`](@extref ComplexityMeasures.PlugIn)
+- [`MillerMadow`](@extref ComplexityMeasures.MillerMadow)
+- [`Schuermann`](@extref ComplexityMeasures.Schuermann)
+- [`GeneralizedSchuermann`](@extref ComplexityMeasures.GeneralizedSchuermann)
+- [`Jackknife`](@extref ComplexityMeasures.Jackknife)
+- [`HorvitzThompson`](@extref ComplexityMeasures.HorvitzThompson)
+- [`ChaoShen`](@extref ComplexityMeasures.ChaoShen)
+
 
 ## Differential information estimators
 
-```@docs
-DifferentialInfoEstimator
-Kraskov
-KozachenkoLeonenko
-Zhu
-ZhuSingh
-Gao
-Goria
-Lord
-LeonenkoProzantoSavani
-Vasicek
-AlizadehArghami
-Ebrahimi
-Correa
-```
\ No newline at end of file
+- [`DifferentialInfoEstimator`](@extref ComplexityMeasures.DifferentialInfoEstimator)
+- [`Kraskov`](@extref ComplexityMeasures.Kraskov)
+- [`KozachenkoLeonenko`](@extref ComplexityMeasures.KozachenkoLeonenko)
+- [`Zhu`](@extref ComplexityMeasures.Zhu)
+- [`ZhuSingh`](@extref ComplexityMeasures.ZhuSingh)
+- [`Gao`](@extref ComplexityMeasures.Gao)
+- [`Goria`](@extref ComplexityMeasures.Goria)
+- [`Lord`](@extref ComplexityMeasures.Lord)
+- [`LeonenkoProzantoSavani`](@extref ComplexityMeasures.LeonenkoProzantoSavani)
+- [`Vasicek`](@extref ComplexityMeasures.Vasicek)
+- [`AlizadehArghami`](@extref ComplexityMeasures.AlizadehArghami)
+- [`Ebrahimi`](@extref ComplexityMeasures.Ebrahimi)
+- [`Correa`](@extref ComplexityMeasures.Correa)
diff --git a/docs/src/associations.md b/docs/src/associations.md
index 8143384f..ee6f816c 100644
--- a/docs/src/associations.md
+++ b/docs/src/associations.md
@@ -1,5 +1,5 @@
 ```@meta
-CollapsedDocStrings = true
+CollapsedDocStrings = false
 ```
 
 # [Associations](@id association_measures)
diff --git a/docs/src/causal_graphs.md b/docs/src/causal_graphs.md
index dfbfeb4c..6a9cbf47 100644
--- a/docs/src/causal_graphs.md
+++ b/docs/src/causal_graphs.md
@@ -1,5 +1,5 @@
 ```@meta
-CollapsedDocStrings = true
+CollapsedDocStrings = false
 ```
 
 # [Inferring causal graphs](@id causal_graphs)
diff --git a/docs/src/examples/examples_associations.md b/docs/src/examples/examples_associations.md
index 887ecc91..7f3e36cf 100644
--- a/docs/src/examples/examples_associations.md
+++ b/docs/src/examples/examples_associations.md
@@ -12,7 +12,7 @@ p2 = Probabilities([0.3, 0.3, 0.2, 0.2])
 association(HellingerDistance(), p1, p2)
 ```
 
-### [[`JointProbabilities`](@ref) + [`OrdinalPatterns`](@ref)](@id example_HellingerDistance_JointProbabilities_OrdinalPatterns)
+### [[`JointProbabilities`](@ref) + [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns)](@id example_HellingerDistance_JointProbabilities_OrdinalPatterns)
 
 We expect the Hellinger distance between two uncorrelated variables to be close to zero.
 
@@ -37,7 +37,7 @@ p2 = Probabilities([0.3, 0.3, 0.2, 0.2])
 association(KLDivergence(), p1, p2)
 ```
 
-### [[`JointProbabilities`](@ref) + [`OrdinalPatterns`](@ref)](@id example_KLDivergence_JointProbabilities_OrdinalPatterns)
+### [[`JointProbabilities`](@ref) + [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns)](@id example_KLDivergence_JointProbabilities_OrdinalPatterns)
 
 We expect the [`KLDivergence`](@ref) between two uncorrelated variables to be close to zero.
 
@@ -63,7 +63,7 @@ p2 = Probabilities([0.3, 0.3, 0.2, 0.2])
 association(RenyiDivergence(), p1, p2)
 ```
 
-### [[`JointProbabilities`](@ref) + [`OrdinalPatterns`](@ref)](@id example_RenyiDivergence_JointProbabilities_OrdinalPatterns)
+### [[`JointProbabilities`](@ref) + [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns)](@id example_RenyiDivergence_JointProbabilities_OrdinalPatterns)
 
 We expect the [`RenyiDivergence`](@ref) between two uncorrelated variables to be close to zero.
 
@@ -89,7 +89,7 @@ p2 = Probabilities([0.3, 0.3, 0.2, 0.2])
 association(VariationDistance(), p1, p2)
 ```
 
-### [[`JointProbabilities`](@ref) + [`OrdinalPatterns`](@ref)](@id example_VariationDistance_JointProbabilities_OrdinalPatterns)
+### [[`JointProbabilities`](@ref) + [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns)](@id example_VariationDistance_JointProbabilities_OrdinalPatterns)
 
 We expect the [`VariationDistance`](@ref) between two uncorrelated variables to be close to zero.
 
@@ -104,7 +104,7 @@ div_hd = association(est, x, y) # pretty close to zero
 
 ## [`JointEntropyShannon`](@ref)
 
-### [[`JointProbabilities`](@ref) with [`Dispersion`](@ref)](@id example_JointEntropyShannon_Dispersion)
+### [[`JointProbabilities`](@ref) with [`Dispersion`](@extref ComplexityMeasures.Dispersion)](@id example_JointEntropyShannon_Dispersion)
 
 ```@example example_JointEntropyShannon
 using Associations
@@ -118,7 +118,7 @@ association(est, x, y)
 
 ## [`JointEntropyTsallis`](@ref)
 
-### [[`JointProbabilities`](@ref) with [`OrdinalPatterns`](@ref)](@id example_JointEntropyTsallis_OrdinalPatterns)
+### [[`JointProbabilities`](@ref) with [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns)](@id example_JointEntropyTsallis_OrdinalPatterns)
 
 ```@example example_JointEntropyTsallis
 using Associations
@@ -133,7 +133,7 @@ association(est, x, y)
 
 ## [`JointEntropyRenyi`](@ref)
 
-### [[`JointProbabilities`](@ref) with [`OrdinalPatterns`](@ref)](@id example_JointEntropyRenyi_ValueBinning)
+### [[`JointProbabilities`](@ref) with [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns)](@id example_JointEntropyRenyi_ValueBinning)
 
 ```@example example_JointEntropyRenyi
 using Associations
@@ -190,7 +190,7 @@ pyx = Probabilities(transpose(freqs_yx))
 ce_y_given_x = association(ConditionalEntropyShannon(), pyx) |> Rational
 ```
 
-### [[`JointProbabilities`](@ref) + [`CodifyVariables`](@ref) + [`UniqueElements`](@ref)](@id example_ConditionalEntropyShannon_JointProbabilities_CodifyVariables_UniqueElements)
+### [[`JointProbabilities`](@ref) + [`CodifyVariables`](@ref) + [`UniqueElements`](@extref ComplexityMeasures.UniqueElements)](@id example_ConditionalEntropyShannon_JointProbabilities_CodifyVariables_UniqueElements)
 
 We can of course also estimate conditional entropy from data. To do so, we'll use the 
 [`JointProbabilities`](@ref) estimator, which constructs a multivariate PMF for us.
@@ -198,7 +198,7 @@ Thus, we don't explicitly need a set of counts, like in the example above, becau
 are estimated under the hood for us. 
 
 Let's first demonstrate on some categorical data. For that, we must use
-[`UniqueElements`](@ref) as the discretization (i.e. just count unique elements).
+[`UniqueElements`](@extref ComplexityMeasures.UniqueElements) as the discretization (i.e. just count unique elements).
 
 ```@example example_ConditionalEntropyShannon_JointProbabilities_CodifyVariables_UniqueElements
 using Associations
@@ -212,7 +212,7 @@ est = JointProbabilities(ConditionalEntropyShannon(), disc)
 association(est, rating, movie)
 ```
 
-### [[`JointProbabilities`](@ref) + [`CodifyPoints`](@ref) + [`UniqueElementsEncoding`](@ref)](@id example_ConditionalEntropyShannon_JointProbabilities_CodifyPoints_UniqueElementsEncoding)
+### [[`JointProbabilities`](@ref) + [`CodifyPoints`](@ref) + [`UniqueElementsEncoding`](@extref ComplexityMeasures.UniqueElementsEncoding)](@id example_ConditionalEntropyShannon_JointProbabilities_CodifyPoints_UniqueElementsEncoding)
 
 ```@example example_ConditionalEntropyShannon_JointProbabilities_CodifyPoints_UniqueElementsEncoding
 using Associations
@@ -227,7 +227,7 @@ association(est, X, Y)
 
 ## [`ConditionalEntropyTsallisAbe`](@ref)
 
-### [[`JointProbabilities`](@ref) + [`CodifyVariables`](@ref) + [`UniqueElements`](@ref)](@id example_ConditionalEntropyTsallisAbe_JointProbabilities_CodifyVariables_UniqueElements)
+### [[`JointProbabilities`](@ref) + [`CodifyVariables`](@ref) + [`UniqueElements`](@extref ComplexityMeasures.UniqueElements)](@id example_ConditionalEntropyTsallisAbe_JointProbabilities_CodifyVariables_UniqueElements)
 
 We'll here repeat the analysis we did for [`ConditionalEntropyShannon`](@ref) above.
 
@@ -243,7 +243,7 @@ est = JointProbabilities(ConditionalEntropyTsallisAbe(q =1.5), disc)
 association(est, rating, movie)
 ```
 
-### [[`JointProbabilities`](@ref) + [`CodifyPoints`](@ref) + [`UniqueElementsEncoding`](@ref)](@id example_ConditionalEntropyTsallisAbe_JointProbabilities_CodifyPoints_UniqueElementsEncoding)
+### [[`JointProbabilities`](@ref) + [`CodifyPoints`](@ref) + [`UniqueElementsEncoding`](@extref ComplexityMeasures.UniqueElementsEncoding)](@id example_ConditionalEntropyTsallisAbe_JointProbabilities_CodifyPoints_UniqueElementsEncoding)
 
 ```@example example_ConditionalEntropyTsallisAbe_JointProbabilities_CodifyPoints_UniqueElementsEncoding
 using Associations
@@ -259,7 +259,7 @@ association(est, X, Y)
 
 ## [`ConditionalEntropyTsallisFuruichi`](@ref)
 
-### [[`JointProbabilities`](@ref) + [`CodifyVariables`](@ref) + [`UniqueElements`](@ref)](@id example_ConditionalEntropyTsallisFuruichi_JointProbabilities_CodifyVariables_UniqueElements)
+### [[`JointProbabilities`](@ref) + [`CodifyVariables`](@ref) + [`UniqueElements`](@extref ComplexityMeasures.UniqueElements)](@id example_ConditionalEntropyTsallisFuruichi_JointProbabilities_CodifyVariables_UniqueElements)
 
 We'll here repeat the analysis we did for [`ConditionalEntropyShannon`](@ref) and [`ConditionalEntropyTsallisAbe`](@ref) above.
 
@@ -275,7 +275,7 @@ est = JointProbabilities(ConditionalEntropyTsallisFuruichi(q =0.5), disc)
 association(est, rating, movie)
 ```
 
-### [[`JointProbabilities`](@ref) + [`CodifyPoints`](@ref) + [`UniqueElementsEncoding`](@ref)](@id example_ConditionalEntropyTsallisFuruichi_JointProbabilities_CodifyPoints_UniqueElementsEncoding)
+### [[`JointProbabilities`](@ref) + [`CodifyPoints`](@ref) + [`UniqueElementsEncoding`](@extref ComplexityMeasures.UniqueElementsEncoding)](@id example_ConditionalEntropyTsallisFuruichi_JointProbabilities_CodifyPoints_UniqueElementsEncoding)
 
 ```@example example_ConditionalEntropyTsallisFuruichi_JointProbabilities_CodifyPoints_UniqueElementsEncoding
 using Associations
@@ -291,7 +291,7 @@ association(est, X, Y)
 
 ## [`MIShannon`](@ref)
 
-### [[`JointProbabilities`](@ref) + [`ValueBinning`](@ref)](@id example_MIShannon_JointProbabilities_ValueBinning)
+### [[`JointProbabilities`](@ref) + [`ValueBinning`](@extref ComplexityMeasures.ValueBinning)](@id example_MIShannon_JointProbabilities_ValueBinning)
 
 ```@example mi_demonstration
 using Associations
@@ -304,7 +304,7 @@ association(est, x, y)
 ```
 
 
-### [[`JointProbabilities`](@ref) + [`UniqueElements`](@ref)](@id example_MIShannon_JointProbabilities_UniqueElements)
+### [[`JointProbabilities`](@ref) + [`UniqueElements`](@extref ComplexityMeasures.UniqueElements)](@id example_MIShannon_JointProbabilities_UniqueElements)
 
 The [`JointProbabilities`](@ref) estimator can also be used with categorical data.
 For example, let's compare the Shannon mutual information between the preferences
@@ -343,7 +343,7 @@ y = rand(1000) .+ x
 association(GaussianMI(MIShannon()), x, y) # defaults to `MIShannon()`
 ```
 
-### [Dedicated [`KraskovStögbauerGrassberger1`](@ref) estimator](@id example_MIShannon_KSG1)
+### [Dedicated [`KraskovStögbauerGrassberger2`](@ref) estimator](@id example_MIShannon_KSG1)
 
 ```@example mi_demonstration
 using Associations
@@ -367,9 +367,9 @@ x, y = rand(1000), rand(1000)
 association(GaoKannanOhViswanath(MIShannon(); k = 10), x, y)
 ```
 
-### [[`EntropyDecomposition`](@ref) + [`Kraskov`](@ref)](@id example_MIShannon_EntropyDecomposition_Kraskov)
+### [[`EntropyDecomposition`](@ref) + [`Kraskov`](@extref ComplexityMeasures.Kraskov)](@id example_MIShannon_EntropyDecomposition_Kraskov)
 
-We can compute [`MIShannon`](@ref) by naively applying a [`DifferentialInfoEstimator`](@ref).
+We can compute [`MIShannon`](@ref) by naively applying a [`DifferentialInfoEstimator`](@extref ComplexityMeasures.DifferentialInfoEstimator).
 Note that this doesn't apply any bias correction.
 
 ```@example mi_demonstration
@@ -379,9 +379,9 @@ association(EntropyDecomposition(MIShannon(), Kraskov(k = 3)), x, y)
 ```
 
 
-### [[`EntropyDecomposition`](@ref) + [`BubbleSortSwaps`](@ref)](@id example_MIShannon_EntropyDecomposition_BubbleSortSwaps)
+### [[`EntropyDecomposition`](@ref) + [`BubbleSortSwaps`](@extref ComplexityMeasures.BubbleSortSwaps)](@id example_MIShannon_EntropyDecomposition_BubbleSortSwaps)
 
-We can also compute [`MIShannon`](@ref) by naively applying a [`DiscreteInfoEstimator`](@ref).
+We can also compute [`MIShannon`](@ref) by naively applying a [`DiscreteInfoEstimator`](@extref ComplexityMeasures.DiscreteInfoEstimator).
 Note that this doesn't apply any bias correction.
 
 ```@example mi_demonstration
@@ -393,10 +393,10 @@ association(EntropyDecomposition(MIShannon(), hest, disc), x, y)
 ```
 
 
-### [[`EntropyDecomposition`](@ref) + [`Jackknife`](@ref) + [`ValueBinning`](@ref)](@id example_MIShannon_EntropyDecomposition_Jackknife_ValueBinning)
+### [[`EntropyDecomposition`](@ref) + [`Jackknife`](@extref ComplexityMeasures.Jackknife) + [`ValueBinning`](@extref ComplexityMeasures.ValueBinning)](@id example_MIShannon_EntropyDecomposition_Jackknife_ValueBinning)
 
 Shannon mutual information can be written as a sum of marginal entropy terms.
-Here, we use [`CodifyVariables`](@ref) with [`ValueBinning`](@ref) bin the data 
+Here, we use [`CodifyVariables`](@ref) with [`ValueBinning`](@extref ComplexityMeasures.ValueBinning) bin the data 
 and compute discrete Shannon mutual information.
 
 ```@example mi_demonstration
@@ -421,7 +421,7 @@ Here, we'll reproduce Figure 4 from [Kraskov2004](@citet)'s seminal paper on the
 between marginals of a bivariate Gaussian for a fixed time series length of 1000,
 varying the number of neighbors. *Note: in the original paper, they show multiple
 curves corresponding to different time series length. We only show two single curves:
-one for the [`KraskovStögbauerGrassberger1`](@ref) estimator and one for the [`KraskovStögbauerGrassberger2`](@ref) estimator*.
+one for the [`KraskovStögbauerGrassberger2`](@ref) estimator and one for the [`KraskovStögbauerGrassberger2`](@ref) estimator*.
 
 ```@example ex_mutualinfo
 using Associations
@@ -469,8 +469,8 @@ Most estimators suffer from significant bias when applied to discrete, finite da
 But instead of adding noise to your data, you can also consider using an
 estimator that is specifically designed to deal with continuous-discrete mixture data. 
 One example is the [`GaoKannanOhViswanath`](@ref) estimator. Below, we compare its
-performance to [`KraskovStögbauerGrassberger1`](@ref) on uniformly distributed discrete multivariate data.
-The true mutual information is zero. While the "naive" [`KraskovStögbauerGrassberger1`](@ref) estimator 
+performance to [`KraskovStögbauerGrassberger2`](@ref) on uniformly distributed discrete multivariate data.
+The true mutual information is zero. While the "naive" [`KraskovStögbauerGrassberger2`](@ref) estimator 
 diverges from the true value for these data, the [`GaoKannanOhViswanath`](@ref)
 converges to the true value.
 
@@ -548,7 +548,7 @@ axislegend(position = :rb)
 fig
 ```
 
-### Estimation using [`DifferentialInfoEstimator`](@ref)s: a comparison
+### Estimation using [`DifferentialInfoEstimator`](@extref ComplexityMeasures.DifferentialInfoEstimator)s: a comparison
 
 Let's compare the performance of a subset of the implemented mutual information estimators. We'll use example data from Lord et al., where the analytical mutual information is known.
 
@@ -775,15 +775,15 @@ fig = plot_results(family3, ifamily3;
     estimators = estimators, base = base)
 ```
 
-We see that the [`Lord`](@ref) estimator, which estimates local volume elements using a singular-value decomposition (SVD) of local neighborhoods, outperforms the other estimators by a large margin.
+We see that the [`Lord`](@extref ComplexityMeasures.Lord) estimator, which estimates local volume elements using a singular-value decomposition (SVD) of local neighborhoods, outperforms the other estimators by a large margin.
 
 
 ## [`MIRenyiJizba`](@ref)
 
-### [[`JointProbabilities`](@ref) + [`UniqueElements`](@ref)](@id example_MIRenyiJizba_JointProbabilities_UniqueElements)
+### [[`JointProbabilities`](@ref) + [`UniqueElements`](@extref ComplexityMeasures.UniqueElements)](@id example_MIRenyiJizba_JointProbabilities_UniqueElements)
 
 [`MIRenyiJizba`](@ref) can be estimated for categorical data using [`JointProbabilities`](@ref) estimator
-with the [`UniqueElements`](@ref) outcome space.
+with the [`UniqueElements`](@extref ComplexityMeasures.UniqueElements) outcome space.
 
 ```@example example_mirenyijizba
 using Associations
@@ -794,11 +794,11 @@ est = JointProbabilities(MIRenyiJizba(), UniqueElements())
 association(est, x, y)
 ```
 
-### [[`EntropyDecomposition`](@ref) + [`LeonenkoProzantoSavani`](@ref)](@id example_MIRenyiJizba_JointProbabilities_LeonenkoProzantoSavani)
+### [[`EntropyDecomposition`](@ref) + [`LeonenkoProzantoSavani`](@extref ComplexityMeasures.LeonenkoProzantoSavani)](@id example_MIRenyiJizba_JointProbabilities_LeonenkoProzantoSavani)
 
 [`MIRenyiJizba`](@ref) can also estimated for numerical data using [`EntropyDecomposition`](@ref)
-in combination with any [`DifferentialInfoEstimator`](@ref) capable of estimating differential 
-[`Renyi`](@ref) entropy.
+in combination with any [`DifferentialInfoEstimator`](@extref ComplexityMeasures.DifferentialInfoEstimator) capable of estimating differential 
+[`Renyi`](@extref ComplexityMeasures.Renyi) entropy.
 
 ```@example example_MIRenyiJizba
 using Associations
@@ -809,11 +809,11 @@ est_diff = EntropyDecomposition(def, LeonenkoProzantoSavani(Renyi(), k=3))
 association(est_diff, x, y) 
 ```
 
-### [[`EntropyDecomposition`](@ref) + [`LeonenkoProzantoSavani`](@ref)](@id example_MIRenyiJizba_EntropyDecomposition_ValueBinning)
+### [[`EntropyDecomposition`](@ref) + [`LeonenkoProzantoSavani`](@extref ComplexityMeasures.LeonenkoProzantoSavani)](@id example_MIRenyiJizba_EntropyDecomposition_ValueBinning)
 
 [`MIRenyiJizba`](@ref) can also estimated for numerical data using [`EntropyDecomposition`](@ref)
-in combination with any [`DiscreteInfoEstimator`](@ref) capable of estimating differential 
-[`Renyi`](@ref) entropy over some [`OutcomeSpace`](@ref), e.g. [`ValueBinning`](@ref).
+in combination with any [`DiscreteInfoEstimator`](@extref ComplexityMeasures.DiscreteInfoEstimator) capable of estimating differential 
+[`Renyi`](@extref ComplexityMeasures.Renyi) entropy over some [`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace), e.g. [`ValueBinning`](@extref ComplexityMeasures.ValueBinning).
 
 
 ```@example example_MIRenyiJizba
@@ -832,7 +832,7 @@ association(est_disc, x, y)
 [`MIRenyiSarbu`](@ref) can be estimated using the [`JointProbabilities`](@ref) estimator 
 in combination with any [`CodifyVariables`](@ref) or [`CodifyPoints`](@ref) discretization scheme.
 
-### [[`JointProbabilities`](@ref) + [`UniqueElements`](@ref)](@id example_MIRenyiSarbu_JointProbabilities_UniqueElements)
+### [[`JointProbabilities`](@ref) + [`UniqueElements`](@extref ComplexityMeasures.UniqueElements)](@id example_MIRenyiSarbu_JointProbabilities_UniqueElements)
 
 ```@example example_MIRenyiSarbu
 using Associations
@@ -844,7 +844,7 @@ est = JointProbabilities(MIRenyiSarbu(), CodifyVariables(UniqueElements()))
 association(est, x, y)
 ```
 
-### [[`JointProbabilities`](@ref) + [`CosineSimilarityBinning`](@ref)](@id example_MIRenyiSarbu_JointProbabilities_CosineSimilarityBinning)
+### [[`JointProbabilities`](@ref) + [`CosineSimilarityBinning`](@extref ComplexityMeasures.CosineSimilarityBinning)](@id example_MIRenyiSarbu_JointProbabilities_CosineSimilarityBinning)
 
 ```@example example_MIRenyiSarbu
 using Associations
@@ -858,7 +858,7 @@ association(est, x, y)
 
 ## [`MITsallisFuruichi`](@ref)
 
-### [[`JointProbabilities`](@ref) + [`UniqueElements`](@ref)](@id example_MITsallisFuruichi_JointProbabilities_UniqueElements)
+### [[`JointProbabilities`](@ref) + [`UniqueElements`](@extref ComplexityMeasures.UniqueElements)](@id example_MITsallisFuruichi_JointProbabilities_UniqueElements)
 
 [`MITsallisFuruichi`](@ref) can be estimated using the [`JointProbabilities`](@ref) estimator 
 in combination with any [`CodifyVariables`](@ref) or [`CodifyPoints`](@ref) discretization scheme.
@@ -873,7 +873,7 @@ est = JointProbabilities(MITsallisFuruichi(q = 0.3), UniqueElements())
 association(est, x, y) 
 ```
 
-### [[`EntropyDecomposition`](@ref) + [`LeonenkoProzantoSavani`](@ref)](@id example_MITsallisFuruichi_EntropyDecomposition_LeonenkoProzantoSavani)
+### [[`EntropyDecomposition`](@ref) + [`LeonenkoProzantoSavani`](@extref ComplexityMeasures.LeonenkoProzantoSavani)](@id example_MITsallisFuruichi_EntropyDecomposition_LeonenkoProzantoSavani)
 
 ```@example example_MITsallisFuruichi
 using Associations
@@ -886,7 +886,7 @@ association(est_diff, x, y)
 ```
 
 
-### [[`EntropyDecomposition`](@ref) + [`Dispersion`](@ref)](@id example_MITsallisFuruichi_EntropyDecomposition_Dispersion)
+### [[`EntropyDecomposition`](@ref) + [`Dispersion`](@extref ComplexityMeasures.Dispersion)](@id example_MITsallisFuruichi_EntropyDecomposition_Dispersion)
 
 ```@example example_MITsallisFuruichi
 using Associations
@@ -902,7 +902,7 @@ association(est_disc, x, y)
 
 ## [`MITsallisMartin`](@ref)
 
-### [[`JointProbabilities`](@ref) + [`UniqueElements`](@ref)](@id example_MITsallisMartin_JointProbabilities_UniqueElements)
+### [[`JointProbabilities`](@ref) + [`UniqueElements`](@extref ComplexityMeasures.UniqueElements)](@id example_MITsallisMartin_JointProbabilities_UniqueElements)
 
 ```@example example_MITsallisMartin
 using Associations
@@ -914,11 +914,14 @@ est = JointProbabilities(MITsallisMartin(q = 1.5), UniqueElements())
 association(est, x, y) 
 ```
 
-### [[`EntropyDecomposition`](@ref) + [`LeonenkoProzantoSavani`](@ref)](@id example_MITsallisMartin_EntropyDecomposition_LeonenkoProzantoSavani)
+### [[`EntropyDecomposition`](@ref) + [`LeonenkoProzantoSavani`](@extref ComplexityMeasures.LeonenkoProzantoSavani)](@id example_MITsallisMartin_EntropyDecomposition_LeonenkoProzantoSavani)
 
 [`MITsallisMartin`](@ref) can be estimated using a decomposition into entropy 
-terms using [`EntropyDecomposition`](@ref) with any compatible estimator 
-that can estimate differential [`Tsallis`](@ref) entropy. 
+terms using [`EntropyDecomposition`](@ref). This is done by using estimators from 
+[ComplexityMeasures.jl](https://juliadynamics.github.io/DynamicalSystemsDocs.jl/complexitymeasures/stable/). We can use any compatible 
+[`InformationMeasureEstimator`](@extref ComplexityMeasures.InformationMeasureEstimator)
+that can estimate differential [`Tsallis`](@extref ComplexityMeasures.Tsallis) entropy from
+[ComplexityMeasures.jl](https://juliadynamics.github.io/DynamicalSystemsDocs.jl/complexitymeasures/stable/).
 
 
 ```@example example_MITsallisMartin
@@ -931,7 +934,7 @@ est_diff = EntropyDecomposition(MITsallisMartin(), LeonenkoProzantoSavani(Tsalli
 association(est_diff, x, y)
 ```
 
-### [[`EntropyDecomposition`](@ref) + [`OrdinalPatterns`](@ref)](@id example_MITsallisMartin_EntropyDecomposition_OrdinalPatterns)
+### [[`EntropyDecomposition`](@ref) + [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns)](@id example_MITsallisMartin_EntropyDecomposition_OrdinalPatterns)
 
 
 ```@example 
@@ -1072,7 +1075,7 @@ taking the difference of mutual information terms.
 
 ## [`CMIShannon`](@ref)
 
-### [[`MIDecomposition`](@ref) + [`KraskovStögbauerGrassberger1`](@ref)](@id example_CMIShannon_MIDecomposition_KSG1)
+### [[`MIDecomposition`](@ref) + [`KraskovStögbauerGrassberger2`](@ref)](@id example_CMIShannon_MIDecomposition_KSG1)
 
 ```@example mi_demonstration
 using Associations
@@ -1095,7 +1098,7 @@ association(est, x, z, y)
 
 ## [`ShortExpansionConditionalMutualInformation`](@ref)
 
-### [[`JointProbabilities`](@ref) with [`CodifyVariables`](@ref) and [`ValueBinning`](@ref)](@id example_ShortExpansionConditionalMutualInformation_JointProbabilities_CodifyVariables_ValueBinning)
+### [[`JointProbabilities`](@ref) with [`CodifyVariables`](@ref) and [`ValueBinning`](@extref ComplexityMeasures.ValueBinning)](@id example_ShortExpansionConditionalMutualInformation_JointProbabilities_CodifyVariables_ValueBinning)
 
 ```@example
 using Associations
@@ -1111,9 +1114,9 @@ est = JointProbabilities(SECMI(base = 2), CodifyVariables(ValueBinning(3)))
 association(est, x, z, y)
 ```
 
-### [[`EntropyDecomposition`](@ref) + [`Kraskov`](@ref)](@id example_CMIShannon_EntropyDecomposition_Kraskov)
+### [[`EntropyDecomposition`](@ref) + [`Kraskov`](@extref ComplexityMeasures.Kraskov)](@id example_CMIShannon_EntropyDecomposition_Kraskov)
 
-Any [`DifferentialInfoEstimator`](@ref) can also be used to compute conditional
+Any [`DifferentialInfoEstimator`](@extref ComplexityMeasures.DifferentialInfoEstimator) can also be used to compute conditional
 mutual information using a sum of entropies. For that, we 
 usethe [`EntropyDecomposition`](@ref) estimator. No bias correction is applied for 
 [`EntropyDecomposition`](@ref) either.
@@ -1131,12 +1134,12 @@ est = EntropyDecomposition(CMIShannon(), Kraskov(k = 5))
 association(est, x, z, y)
 ```
 
-Any [`DiscreteInfoEstimator`](@ref) that computes entropy can also be used to compute
+Any [`DiscreteInfoEstimator`](@extref ComplexityMeasures.DiscreteInfoEstimator) that computes entropy can also be used to compute
 conditional mutual information using a sum of entropies. For that, we also
 use [`EntropyDecomposition`](@ref). In the discrete case, we also have to specify a
-discretization (an [`OutcomeSpace`](@ref)).
+discretization (an [`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace)).
 
-### [[`EntropyDecomposition`](@ref) + [`ValueBinning`](@ref)](@id example_CMIShannon_EntropyDecomposition_ValueBinning)
+### [[`EntropyDecomposition`](@ref) + [`ValueBinning`](@extref ComplexityMeasures.ValueBinning)](@id example_CMIShannon_EntropyDecomposition_ValueBinning)
 
 ```@example
 using Associations
@@ -1155,7 +1158,7 @@ association(est, x, y, z)
 
 ## [`CMIRenyiJizba`](@ref)
 
-### [[`JointProbabilities`](@ref) + [`BubbleSortSwaps`](@ref)](@id example_CMIRenyiJizba_JointProbabilities_BubbleSortSwaps)
+### [[`JointProbabilities`](@ref) + [`BubbleSortSwaps`](@extref ComplexityMeasures.BubbleSortSwaps)](@id example_CMIRenyiJizba_JointProbabilities_BubbleSortSwaps)
 
 ```@example example_CMIRenyiJizba
 using Associations
@@ -1169,7 +1172,7 @@ association(est, x, z, y)
 ```
 
 
-### [[`EntropyDecomposition`](@ref) + [`LeonenkoProzantoSavani`](@ref)](@id example_CMIRenyiJizba_EntropyDecomposition_LeonenkoProzantoSavani)
+### [[`EntropyDecomposition`](@ref) + [`LeonenkoProzantoSavani`](@extref ComplexityMeasures.LeonenkoProzantoSavani)](@id example_CMIRenyiJizba_EntropyDecomposition_LeonenkoProzantoSavani)
 
 ```@example example_CMIRenyiJizba
 using Associations
@@ -1183,7 +1186,7 @@ association(est, x, y, z)
 ```
 
 
-### [[`EntropyDecomposition`](@ref) + [`OrdinalPatterns`](@ref)](@id example_CMIRenyiJizba_EntropyDecomposition_OrdinalPatterns)
+### [[`EntropyDecomposition`](@ref) + [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns)](@id example_CMIRenyiJizba_EntropyDecomposition_OrdinalPatterns)
 
 ```@example example_CMIRenyiJizba
 using Associations
@@ -1198,7 +1201,7 @@ association(est, x, y, z)
 
 ## [`TEShannon`](@ref)
 
-### [[`EntropyDecomposition`](@ref) + [`TransferOperator`](@ref)](@id example_TEShannon_EntropyDecomposition_TransferOperator)
+### [[`EntropyDecomposition`](@ref) + [`TransferOperator`](@extref ComplexityMeasures.TransferOperator)](@id example_TEShannon_EntropyDecomposition_TransferOperator)
 
 For transfer entropy examples, we'll construct some time series for which 
 there is time-delayed forcing between variables.
@@ -1280,7 +1283,7 @@ association(est, x, z, y) # should be near 0 (and can be negative)
 ### [[`SymbolicTransferEntropy`](@ref) estimator](@id example_TEShannon_SymbolicTransferEntropy)
 
 The [`SymbolicTransferEntropy`](@ref) estimator is just a convenience wrapper which utilizes
-[`CodifyVariables`](@ref)with the [`OrdinalPatterns`](@ref) outcome space to 
+[`CodifyVariables`](@ref)with the [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns) outcome space to 
 discretize the input time series before computing transfer entropy.
 
 We'll use coupled time series from the `logistic4` system above, where `x → y → z → w`.
@@ -1306,9 +1309,9 @@ We will test
 
 - The [`Lindner`](@ref) and [`Zhu1`](@ref) dedicated transfer entropy estimators,
     which try to eliminate bias.
-- The [`KraskovStögbauerGrassberger1`](@ref) estimator, which computes TE naively as a sum of mutual information
+- The [`KraskovStögbauerGrassberger2`](@ref) estimator, which computes TE naively as a sum of mutual information
     terms (without guaranteed cancellation of biases for the total sum).
-- The [`Kraskov`](@ref) estimator, which computes TE naively as a sum of entropy 
+- The [`Kraskov`](@extref ComplexityMeasures.Kraskov) estimator, which computes TE naively as a sum of entropy 
     terms (without guaranteed cancellation of biases for the total sum).
 
 ```@example
@@ -1383,7 +1386,7 @@ fig
 Let's try to reproduce the results from Schreiber's original paper [Schreiber2000](@cite) where
 he introduced the transfer entropy. We'll here use the [`JointProbabilities`](@ref) estimator,
 discretizing per column of the input data using the [`CodifyVariables`](@ref) discretization
-scheme with the [`ValueBinning`](@ref) outcome space.
+scheme with the [`ValueBinning`](@extref ComplexityMeasures.ValueBinning) outcome space.
 
 ```@example example_te_schreiber
 using Associations
@@ -1484,7 +1487,7 @@ The plot above shows the original transfer entropies (solid lines) and the 95th
 
 ## [`TERenyiJizba`](@ref)
 
-### [[`EntropyDecomposition`](@ref) + [`TransferOperator`](@ref)](@id example_TERenyiJizba_EntropyDecomposition_TransferOperator)
+### [[`EntropyDecomposition`](@ref) + [`TransferOperator`](@extref ComplexityMeasures.TransferOperator)](@id example_TERenyiJizba_EntropyDecomposition_TransferOperator)
 
 We can perform the same type of analysis as above using [`TERenyiJizba`](@ref)
 instead of [`TEShannon`](@ref).
diff --git a/docs/src/examples/examples_independence.md b/docs/src/examples/examples_independence.md
index c29986f9..faff15ac 100644
--- a/docs/src/examples/examples_independence.md
+++ b/docs/src/examples/examples_independence.md
@@ -387,7 +387,7 @@ end
 
 ### [[`CMIShannon`](@ref)](@id example_LocalPermutationTest_CMIShannon)
 
-To estimate CMI, we'll use the [`Kraskov`](@ref) differential
+To estimate CMI, we'll use the [`Kraskov`](@extref ComplexityMeasures.Kraskov) differential
 entropy estimator, which naively computes CMI as a sum of entropy terms without guaranteed
 bias cancellation.
 
@@ -504,7 +504,7 @@ independence(test, x, z, y)
 
 ## [[`JointProbabilities`](@ref) estimation on categorical data](@id example_SECMITEST_JointProbabilities_CodifyVariables_UniqueElements)
 
-Note that this also works for categorical variables. Just use [`UniqueElements`](@ref) to 
+Note that this also works for categorical variables. Just use [`UniqueElements`](@extref ComplexityMeasures.UniqueElements) to 
 discretize!
 
 ```@example example_SECMITest_categorical
diff --git a/docs/src/independence.md b/docs/src/independence.md
index d9340540..ec536ff6 100644
--- a/docs/src/independence.md
+++ b/docs/src/independence.md
@@ -1,5 +1,5 @@
 ```@meta
-CollapsedDocStrings = true
+CollapsedDocStrings = false
 ```
 
 # [Independence testing](@id independence_testing)
diff --git a/docs/src/index.md b/docs/src/index.md
index 32714e77..b787c757 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -26,12 +26,6 @@ Important changes are:
 
 The quickest way to get going with the package is to check out the examples in the left-hand menu.
 
-!!! info
-    To make it easier to navigate the extensive documentation, all documentation strings are 
-    collapsed by default. Click the arrow icon in 
-    the top toolbar to expand/collapse the docstrings in a page.
-
-
 ## Documentation content 
 
 - [Association measures](@ref association_measures) lists all implemented association measures and their estimators.
@@ -48,17 +42,13 @@ Input data for Associations.jl are given as:
 
 - Univariate *timeseries*, which are given as standard Julia `Vector`s.
 - Multivariate timeseries, *StateSpaceSets*, or *state space sets*, which are given as
-    [`StateSpaceSet`](@ref)s. Many methods convert *timeseries* inputs to [`StateSpaceSet`](@ref)
+    [`StateSpaceSet`](@extref StateSpaceSets.StateSpaceSet)s. Many methods convert *timeseries* inputs to [`StateSpaceSet`](@extref StateSpaceSets.StateSpaceSet)
     for faster internal computations.
 - Categorical data can be used with [`JointProbabilities`](@ref) to compute various
     information theoretic measures and is represented using any iterable whose elements
     can be any arbitrarily complex data type (as long as it's hashable), for example
     `Vector{String}`, `{Vector{Int}}`, or `Vector{Tuple{Int, String}}`.
 
-```@docs
-StateSpaceSets.StateSpaceSet
-```
-
 ## Maintainers and contributors
 
 The Associations.jl software is maintained by
diff --git a/docs/style.jl b/docs/style.jl
index 168f16ee..691fffa7 100644
--- a/docs/style.jl
+++ b/docs/style.jl
@@ -18,11 +18,11 @@ Base.iterate(c::CyclicContainer, i = 1) = iterate(c.c, i)
 
 COLORSCHEME = [
     "#7143E0",
-    "#0A9A84",
     "#191E44",
+    "#0A9A84",
     "#AF9327",
-    "#701B80",
-    "#2E6137",
+    "#791457",
+    "#6C768C",
 ]
 
 COLORS = CyclicContainer(COLORSCHEME)
diff --git a/src/core.jl b/src/core.jl
index 85f03896..6cca5ccd 100644
--- a/src/core.jl
+++ b/src/core.jl
@@ -5,10 +5,10 @@ export AssociationMeasure
 export AssociationMeasureEstimator
 export association
 
-const VectorOr1DDataset{T} = Union{AbstractVector{T}, AbstractStateSpaceSet{1, T}} where T
-const VectorOrStateSpaceSet{D, T} = Union{AbstractVector{T}, AbstractStateSpaceSet{D, T}} where {D, T}
-const ArrayOrStateSpaceSet{D, T, N} = Union{AbstractArray{T, N}, AbstractStateSpaceSet{D, T}} where {D, T, N}
-const INFO_ESTS = Union{DifferentialInfoEstimator, DiscreteInfoEstimator}
+const VectorOr1DDataset{T} = Union{AbstractVector{T},AbstractStateSpaceSet{1,T}} where T
+const VectorOrStateSpaceSet{D,T} = Union{AbstractVector{T},AbstractStateSpaceSet{D,T}} where {D,T}
+const ArrayOrStateSpaceSet{D,T,N} = Union{AbstractArray{T,N},AbstractStateSpaceSet{D,T}} where {D,T,N}
+const INFO_ESTS = Union{DifferentialInfoEstimator,DiscreteInfoEstimator}
 
 """
     AssociationMeasure
@@ -105,7 +105,7 @@ Concrete subtypes are given as input to [`association`](@ref).
 | [`ConvergentCrossMapping`](@ref)                     | [`RandomVectors`](@ref), [`RandomSegment`](@ref)                                                                                                                                                                             |
 | [`MCR`](@ref)                                        | Not required                                                                                                                                                                                                                 |
 | [`RMCD`](@ref)                                       | Not required                                                                                                                                                                                                                 |
-| [`MIShannon`](@ref)                                  | [`JointProbabilities`](@ref), [`EntropyDecomposition`](@ref), [`KraskovStögbauerGrassberger1`](@ref), [`KraskovStögbauerGrassberger2`](@ref), [`GaoOhViswanath`](@ref), [`GaoKannanOhViswanath`](@ref), [`GaussianMI`](@ref) |
+| [`MIShannon`](@ref)                                  | [`JointProbabilities`](@ref), [`EntropyDecomposition`](@ref), [`KraskovStögbauerGrassberger2`](@ref), [`KraskovStögbauerGrassberger2`](@ref), [`GaoOhViswanath`](@ref), [`GaoKannanOhViswanath`](@ref), [`GaussianMI`](@ref) |
 | [`MIRenyiJizba`](@ref)                               | [`JointProbabilities`](@ref), [`EntropyDecomposition`](@ref)                                                                                                                                                                 |
 | [`MIRenyiSarbu`](@ref)                               | [`JointProbabilities`](@ref)                                                                                                                                                                                                 |
 | [`MITsallisFuruichi`](@ref)                          | [`JointProbabilities`](@ref), [`EntropyDecomposition`](@ref)                                                                                                                                                                 |
diff --git a/src/independence_tests/secmi/secmi_test.jl b/src/independence_tests/secmi/secmi_test.jl
index 2e4af9c2..5f8527a0 100644
--- a/src/independence_tests/secmi/secmi_test.jl
+++ b/src/independence_tests/secmi/secmi_test.jl
@@ -7,26 +7,26 @@ export SECMITestResult
 
 A test for conditional independence based on the [`ShortExpansionConditionalMutualInformation`](@ref) measure [Kubkowski2021](@cite).
 
-The first argument `est` must be a [`InformationMeasureEstimator`](@ref) that provides the 
+The first argument `est` must be a [`ComplexityMeasures.InformationMeasureEstimator`](@extref) that provides the 
 [`ShortExpansionConditionalMutualInformation`](@ref) instance. See examples below.
 
 ## Examples
 
 - [Example 1](@ref example_SECMITEST_JointProbabilities_CodifyVariables_ValueBinning): Independence test for small sample sizes using [`CodifyVariables`](@ref) with 
-    [`ValueBinning`](@ref) discretization.
+    [`ComplexityMeasures.ValueBinning`](@extref) discretization.
 - [Example 2](@ref example_SECMITEST_JointProbabilities_CodifyVariables_UniqueElements): Independence test for small sample
-    sizes with categorical data (using [`CodifyVariables`](@ref) with [`UniqueElements`](@ref) discretization).
+    sizes with categorical data (using [`CodifyVariables`](@ref) with [`ComplexityMeasures.UniqueElements`](@extref) discretization).
 """
-struct SECMITest{E, S, I, RNG} <: IndependenceTest{E}
+struct SECMITest{E,S,I,RNG} <: IndependenceTest{E}
     # really, this can only be an estimator, but we name it `est_or_measure` for consistency 
     # with the remaining tests, so we don't have to write custom code.
-    est_or_measure::E 
+    est_or_measure::E
     surrogate::S
     nshuffles::I
     rng::RNG
 end
 
-function SECMITest(est; nshuffles = 19, surrogate = RandomShuffle(), rng = Random.default_rng())
+function SECMITest(est; nshuffles=19, surrogate=RandomShuffle(), rng=Random.default_rng())
     return SECMITest(est, surrogate, nshuffles, rng)
 end
 
@@ -50,7 +50,7 @@ with [`independence`](@ref), as described in [Kubkowski2021](@cite).
 - `D𝒩`: The ``D_{N(\\hat{\\mu}, \\hat{\\sigma})}`` statistic.
 - `D𝒳²`: The ``D_{\\chi^2}`` statistic.
 """
-struct SECMITestResult{S0, SK, P, MU, S, E, DN, DCHI} <: IndependenceTestResult
+struct SECMITestResult{S0,SK,P,MU,S,E,DN,DCHI} <: IndependenceTestResult
     n_vars::Int # 3 vars = conditional (always 3)
     secmi₀::S0 # the value of the measure, non-permuted
     secmiₖ::SK # the values of the measure, permuted `nshuffles` times
@@ -74,7 +74,7 @@ function Base.show(io::IO, test::SECMITestResult)
 
         $(pvalue_text_summary(test))
         """
-        )
+    )
 end
 
 function independence(test::SECMITest, x, y, z)
@@ -85,9 +85,9 @@ function independence(test::SECMITest, x, y, z)
     for k = 1:nshuffles
         secmiₖ[k] = association(est_or_measure, sx(), y, z)
     end
-    μ̂ = 1/nshuffles * sum(secmiₖ)
-    σ̂ = 1/(nshuffles - 1) * sum((sₖ - μ̂)^2 for sₖ in secmiₖ)
-    emp_cdf = ecdf(secmiₖ) 
+    μ̂ = 1 / nshuffles * sum(secmiₖ)
+    σ̂ = 1 / (nshuffles - 1) * sum((sₖ - μ̂)^2 for sₖ in secmiₖ)
+    emp_cdf = ecdf(secmiₖ)
     F𝒩 = Normal(μ̂, σ̂)
 
     if μ̂ ≤ 0.0
@@ -99,7 +99,7 @@ function independence(test::SECMITest, x, y, z)
         # so we put μ̂ <= 0.0 in a separate criterion first to avoid errors.
         F𝒳² = Chisq(μ̂)
         D𝒩, D𝒳² = sup_values(emp_cdf, F𝒩, F𝒳², secmiₖ)
-        if  D𝒩 < D𝒳²
+        if D𝒩 < D𝒳²
             p = 1 - cdf(F𝒩, secmi₀)
         else
             p = 1 - cdf(F𝒳², secmi₀)
@@ -108,7 +108,7 @@ function independence(test::SECMITest, x, y, z)
 
     end
 
-   
+
     return SECMITestResult(3, secmi₀, secmiₖ, p, μ̂, σ̂, emp_cdf, D𝒩, D𝒳²)
 end
 
diff --git a/src/methods/closeness/SMeasure.jl b/src/methods/closeness/SMeasure.jl
index a3fd9f39..fc16a249 100644
--- a/src/methods/closeness/SMeasure.jl
+++ b/src/methods/closeness/SMeasure.jl
@@ -66,13 +66,13 @@ S^{(k)}(x|y) = \\dfrac{1}{N} \\sum_{i=1}^{N} \\dfrac{R_i^{(k)}(x)}{R_i^{(k)}(x|y
 
 The algorithm is slightly modified from [Arnhold1999](@cite) to allow univariate timeseries as input.
 
-- If `x` and `y` are [`StateSpaceSet`](@ref)s then use `x` and `y` as is and ignore the parameters
+- If `x` and `y` are [`StateSpaceSet`](@extref StateSpaceSets.StateSpaceSet)s then use `x` and `y` as is and ignore the parameters
     `dx`/`τx` and `dy`/`τy`.
 - If `x` and `y` are scalar time series, then create `dx` and `dy` dimensional embeddings,
     respectively, of both `x` and `y`, resulting in `N` different `m`-dimensional embedding points
     ``X = \\{x_1, x_2, \\ldots, x_N \\}`` and ``Y = \\{y_1, y_2, \\ldots, y_N \\}``.
     `τx` and `τy` control the embedding lags for `x` and `y`.
-- If `x` is a scalar-valued vector and `y` is a [`StateSpaceSet`](@ref), or vice versa,
+- If `x` is a scalar-valued vector and `y` is a [`StateSpaceSet`](@extref StateSpaceSets.StateSpaceSet), or vice versa,
     then create an embedding of the scalar timeseries using parameters `dx`/`τx` or `dy`/`τy`.
 
 In all three cases, input StateSpaceSets are length-matched by eliminating points at the end of
@@ -80,7 +80,7 @@ the longest StateSpaceSet (after the embedding step, if relevant) before analysi
 
 See also: [`ClosenessMeasure`](@ref).
 """
-Base.@kwdef struct SMeasure{M, TM} <: ClosenessMeasure
+Base.@kwdef struct SMeasure{M,TM} <: ClosenessMeasure
     K::Int = 2
     metric::M = SqEuclidean()
     tree_metric::TM = Euclidean()
@@ -96,13 +96,14 @@ function association(measure::SMeasure, x::AbstractStateSpaceSet, y::AbstractSta
     (; K, metric, tree_metric, τx, τy, dx, dy, w) = measure
 
     # Match length of StateSpaceSets by excluding end points.
-    lx = length(x); ly = length(y)
+    lx = length(x)
+    ly = length(y)
     lx > ly ? X = x[1:ly, :] : X = x
     ly > lx ? Y = y[1:lx, :] : Y = y
     N = length(X)
 
     T = eltype(1.0)
-     # Pre-allocate vectors to hold indices and distances during loops
+    # Pre-allocate vectors to hold indices and distances during loops
     dists_x = zeros(T, K)
     dists_x_cond_y = zeros(T, K)
 
diff --git a/src/methods/correlation/partial_correlation.jl b/src/methods/correlation/partial_correlation.jl
index 98eddd8c..800bf210 100644
--- a/src/methods/correlation/partial_correlation.jl
+++ b/src/methods/correlation/partial_correlation.jl
@@ -16,7 +16,7 @@ variables removed.
 
 There are several ways of estimating the partial correlation. We follow the
 [matrix inversion method](https://en.wikipedia.org/wiki/Partial_correlation), because
-for [`StateSpaceSet`](@ref)s, we can very efficiently compute the required
+for [`StateSpaceSet`](@extref StateSpaceSets.StateSpaceSet)s, we can very efficiently compute the required
 joint covariance matrix ``\\Sigma`` for the random variables.
 
 Formally, let ``X_1, X_2, \\ldots, X_n`` be a set of ``n`` real-valued random variables.
@@ -44,7 +44,7 @@ max_inputs_vars(::PartialCorrelation) = Inf
 
 # Compatibility with `independence`
 function association(::PartialCorrelation, x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet,
-        conds::ArrayOrStateSpaceSet...)
+    conds::ArrayOrStateSpaceSet...)
     X, Y, Z = construct_partialcor_datasets(x, y, conds...)
     D = StateSpaceSet(X, Y, Z)
     cov_matrix = cov(D)
@@ -53,7 +53,7 @@ function association(::PartialCorrelation, x::VectorOrStateSpaceSet, y::VectorOr
 end
 
 function construct_partialcor_datasets(x::VectorOrStateSpaceSet, y::VectorOrStateSpaceSet,
-        conds::ArrayOrStateSpaceSet...)
+    conds::ArrayOrStateSpaceSet...)
     dimension(x) == 1 || throw(ArgumentError("Input `x` must be 1-dimensional"))
     dimension(y) == 1 || throw(ArgumentError("Input `y` must be 1-dimensional"))
     X, Y = StateSpaceSet(x), StateSpaceSet(y)
@@ -79,5 +79,5 @@ Given a precision matrix `P`, compute the partial correlation of variables `i` a
 conditional on all other variables.
 """
 function partial_correlation_from_precision(P::AbstractMatrix, i::Int, j::Int)
-    return -P[i, j] / sqrt(P[i, i]*P[j, j])
+    return -P[i, j] / sqrt(P[i, i] * P[j, j])
 end
diff --git a/src/methods/crossmappings/crossmappings.jl b/src/methods/crossmappings/crossmappings.jl
index 5dcc44fc..1d3d68c1 100644
--- a/src/methods/crossmappings/crossmappings.jl
+++ b/src/methods/crossmappings/crossmappings.jl
@@ -59,11 +59,11 @@ for ensemble analysis (see [`Ensemble`](@ref)).
     For spatial analyses (not yet implemented), indices could be more complex and involve
     multi-indices.
 """
-abstract type CrossmapEstimator{M, LIBSIZES, RNG} end
+abstract type CrossmapEstimator{M,LIBSIZES,RNG} end
 
 segment_length_error() = "Segment lengths can be inferred only if both a cross-map " *
-    "measure and an input time series is provided. " *
-    "Do e.g. `ExpandingSegment(CCM(), x)`, where `x` is some time series."
+                         "measure and an input time series is provided. " *
+                         "Do e.g. `ExpandingSegment(CCM(), x)`, where `x` is some time series."
 
 """
     max_segmentlength(x::AbstractVector, measure::CrossmapMeasure)
@@ -122,7 +122,7 @@ according to the algorithm specified by the given cross-map `measure` (e.g.
 
 - **First method**: Jointly embeds the target `t` and source `s` time series (according to
     `measure`) to obtain time-index aligned target timeseries `t̄` and source embedding
-    `S̄` (which is now a [`StateSpaceSet`](@ref)).
+    `S̄` (which is now a [`StateSpaceSet`](@extref StateSpaceSets.StateSpaceSet)).
     Then calls `predict(measure, t̄, S̄)` (the first method), and returns both the
     predictions `t̂ₛ`, observations `t̄` and their correspondence `ρ` according to `measure`.
 - **Second method**: Returns a vector of predictions `t̂ₛ` (`t̂ₛ` := "predictions of `t̄` based
@@ -170,7 +170,7 @@ function predict(measure::CrossmapMeasure, t::AbstractVector, S̄::AbstractState
         if !(first(dᵢ) > 0.0)
             for i = 1:nnd
                 # One order of magnitude higher than smallest possible float
-                dᵢ[i] += eps()*10
+                dᵢ[i] += eps() * 10
             end
         end
         u .= exp.(-dᵢ ./ dᵢ[1])
diff --git a/src/methods/information/core.jl b/src/methods/information/core.jl
index 3aa65975..8859feda 100644
--- a/src/methods/information/core.jl
+++ b/src/methods/information/core.jl
@@ -1,4 +1,4 @@
-import ComplexityMeasures: information 
+import ComplexityMeasures: information
 export information
 
 export MultivariateInformationMeasure
@@ -27,7 +27,7 @@ The supertype for all estimators of multivariate information measures.
 
 [`MutualInformationEstimator`](@ref)s:
 
-- [`KraskovStögbauerGrassberger1`](@ref)
+- [`KraskovStögbauerGrassberger2`](@ref)
 - [`KraskovStögbauerGrassberger2`](@ref)
 - [`GaoOhViswanath`](@ref)
 - [`GaoKannanOhViswanath`](@ref)
@@ -56,7 +56,7 @@ The supertype for dedicated [`MutualInformation`](@ref) estimators.
 
 ## Concrete implementations
 
-- [`KraskovStögbauerGrassberger1`](@ref)
+- [`KraskovStögbauerGrassberger2`](@ref)
 - [`KraskovStögbauerGrassberger2`](@ref)
 - [`GaoOhViswanath`](@ref)
 - [`GaoKannanOhViswanath`](@ref)
diff --git a/src/methods/information/counts_and_probs/counts.jl b/src/methods/information/counts_and_probs/counts.jl
index abd4ecc9..36263c03 100644
--- a/src/methods/information/counts_and_probs/counts.jl
+++ b/src/methods/information/counts_and_probs/counts.jl
@@ -20,13 +20,13 @@ Construct an `N`-dimensional contingency table from the input iterables
 `x₁, x₂, ..., xₙ` which are such that 
 `length(x₁) == length(x₂) == ⋯ == length(xₙ)`.
 
-If `x₁, x₂, ..., xₙ` are already discrete, then use [`UniqueElements`](@ref) as 
+If `x₁, x₂, ..., xₙ` are already discrete, then use [`UniqueElements`](@extref ComplexityMeasures.UniqueElements) as 
 the first argument to directly construct the joint contingency table.
 
 If `x₁, x₂, ..., xₙ` need to be discretized, provide as the first argument
 - [`CodifyPoints`](@ref) (encodes every *point* in each of the input variables `xᵢ`s individually)
 - [`CodifyVariables`](@ref) (encodes every `xᵢ` individually using a sliding window encoding). NB: If 
-    using different [`OutcomeSpace`](@ref)s for the different `xᵢ`, then [`total_outcomes`](@ref) must 
+    using different [`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace)s for the different `xᵢ`, then [`total_outcomes`](@extref ComplexityMeasures.total_outcomes) must 
     be the same for every outcome space.
 
 ## Examples
@@ -52,10 +52,10 @@ z = rand([(1, 2), (2, 1)], n)
 counts(UniqueElements(), x, y, z)
 ```
 
-See also: [`CodifyPoints`](@ref), [`CodifyVariables`](@ref), [`UniqueElements`](@ref), [`OutcomeSpace`](@ref),
+See also: [`CodifyPoints`](@ref), [`CodifyVariables`](@ref), [`UniqueElements`](@extref ComplexityMeasures.UniqueElements), [`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace),
 [`probabilities`](@ref).
 """
-function counts(o::UniqueElements, x::Vararg{VectorOrStateSpaceSet, N}) where N # this extends ComplexityMeasures.jl definition
+function counts(o::UniqueElements, x::Vararg{VectorOrStateSpaceSet,N}) where N # this extends ComplexityMeasures.jl definition
     # Get marginal probabilities and outcomes
     L = length(x)
     cts, lmaps, encoded_outcomes = counts_table(x...)
@@ -66,7 +66,7 @@ function counts(o::UniqueElements, x::Vararg{VectorOrStateSpaceSet, N}) where N
     return Counts(cts, actual_outcomes)
 end
 
-function counts(x::Vararg{VectorOrStateSpaceSet, N}) where N
+function counts(x::Vararg{VectorOrStateSpaceSet,N}) where N
     if N == 1
         return ComplexityMeasures.counts(UniqueElements(), x...)
     else
@@ -81,7 +81,7 @@ function to_outcomes(lmap::Dict, encoded_outcomes::Vector{<:Integer})
 end
 
 function counts_table(x...)
-    Ls = length.(x);
+    Ls = length.(x)
     if !allequal(Ls)
         throw(ArgumentError("Input data must have equal lengths. Got lengths $Ls."))
     end
@@ -96,7 +96,7 @@ function counts_table(x...)
 
     # Create the table with correct dimensions, assumming the outcome space is
     # fully determined by the elements that are present in `x`.
-    table_dims = length.(unique_elements.(x));
+    table_dims = length.(unique_elements.(x))
     cts = zeros(Int, table_dims)
 
     # Each element in `X` isa `SVector{m, Int}`, so can be treated as a cartesian index.
@@ -162,10 +162,10 @@ _levelsmap(x::AbstractStateSpaceSet) = levelsmap(x.data)
 unique_elements(x) = unique(x)
 unique_elements(x::AbstractStateSpaceSet) = unique(x.data)
 
-function marginal(c::Counts; dims = 1:ndims(c))
+function marginal(c::Counts; dims=1:ndims(c))
     alldims = 1:ndims(c)
     reduce_dims = (setdiff(alldims, dims)...,)
-    marginal = dropdims(sum(c.cts, dims = reduce_dims), dims = reduce_dims)
+    marginal = dropdims(sum(c.cts, dims=reduce_dims), dims=reduce_dims)
     include_idxs = setdiff(alldims, reduce_dims)
     new_outcomes = c.outcomes[include_idxs]
     new_dimlabels = c.dimlabels[include_idxs]
@@ -180,13 +180,13 @@ end
 # ----------------------------------------------------------------
 # If multiple encodings are given, the number of encodings must match the number of
 # input variables.
-function counts(encoding::CodifyPoints{N}, x::Vararg{Any, N}) where {N}
+function counts(encoding::CodifyPoints{N}, x::Vararg{Any,N}) where {N}
     x̂ = codify(encoding, x...)
     return counts(UniqueElements(), x̂...)
 end
 
 # If only one encoding is given, apply same encoding to all points
-function counts(encoding::CodifyPoints{1}, x::Vararg{Any, N}) where {Any, N}
+function counts(encoding::CodifyPoints{1}, x::Vararg{Any,N}) where {Any,N}
     e = first(encoding.encodings)
     x̂ = ([encode(e, pt) for pt in xₖ] for xₖ in x)
     return counts(UniqueElements(), x̂...)
@@ -194,7 +194,7 @@ end
 
 # Per variable/column
 # ----------------------------------------------------------------
-function counts(discretization::CodifyVariables{1}, x::Vararg{ArrayOrStateSpaceSet, N}) where N
+function counts(discretization::CodifyVariables{1}, x::Vararg{ArrayOrStateSpaceSet,N}) where N
     o = first(discretization.outcome_spaces)
     # Treat 1D state space sets as vectors, so we can apply the outcome space sequentially.
     # TODO: show warning or not? I think this can be silent, because I can't really think of a situation
@@ -204,7 +204,7 @@ function counts(discretization::CodifyVariables{1}, x::Vararg{ArrayOrStateSpaceS
     return counts(x̂...)
 end
 
-function counts(d::CodifyVariables{1, UniqueElements}, x::Vararg{ArrayOrStateSpaceSet, N}) where N
+function counts(d::CodifyVariables{1,UniqueElements}, x::Vararg{ArrayOrStateSpaceSet,N}) where N
     o = first(d.outcome_spaces)
     return counts(o, x...)
 end
@@ -218,7 +218,7 @@ as_vec(x::AbstractStateSpaceSet{1}) = [first(xᵢ) for xᵢ in vec(x)]
 # guaranteed API-wise that embedding vectors are constructed in the same way
 # (although *in practice* all `OutcomeSpace` that use embeddings do so 
 # per v3.6 of ComplexityMeasures.jl).
-function counts(discretization::CodifyVariables{N}, x::Vararg{Any, N}) where N
+function counts(discretization::CodifyVariables{N}, x::Vararg{Any,N}) where N
     encoded_pts = codify(discretization, x...)
     return counts(encoded_pts...)
 end
diff --git a/src/methods/information/counts_and_probs/encoding/codify_points.jl b/src/methods/information/counts_and_probs/encoding/codify_points.jl
index 0a53ce8e..a4fc2d04 100644
--- a/src/methods/information/counts_and_probs/encoding/codify_points.jl
+++ b/src/methods/information/counts_and_probs/encoding/codify_points.jl
@@ -19,24 +19,24 @@ export codify
 
 ## Compatible encodings
 
-- [`GaussianCDFEncoding`](@ref)
-- [`OrdinalPatternEncoding`](@ref)
-- [`RelativeMeanEncoding`](@ref)
-- [`RelativeFirstDifferenceEncoding`](@ref)
-- [`UniqueElementsEncoding`](@ref)
-- [`RectangularBinEncoding`](@ref)
-- [`CombinationEncoding`](@ref)
+- [`GaussianCDFEncoding`](@extref ComplexityMeasures.GaussianCDFEncoding)
+- [`OrdinalPatternEncoding`](@extref ComplexityMeasures.OrdinalPatternEncoding)
+- [`RelativeMeanEncoding`](@extref ComplexityMeasures.RelativeMeanEncoding)
+- [`RelativeFirstDifferenceEncoding`](@extref ComplexityMeasures.RelativeFirstDifferenceEncoding)
+- [`UniqueElementsEncoding`](@extref ComplexityMeasures.UniqueElementsEncoding)
+- [`RectangularBinEncoding`](@extref ComplexityMeasures.RectangularBinEncoding)
+- [`CombinationEncoding`](@extref ComplexityMeasures.CombinationEncoding)
 
 ## Description
 
 Given `x::AbstractStateSpaceSet...`, where the `i`-th dataset is assumed to represent
 a single series of measurements, `CodifyPoints` encodes each point `pₖ ∈ x[i]` 
-using some [`Encoding`](@ref)(s), *without* applying any (sequential) transformation to
+using some [`Encoding`](@extref ComplexityMeasures.Encoding)(s), *without* applying any (sequential) transformation to
 the `x[i]` first. This behaviour is different to [`CodifyVariables`](@ref), which
 *does* apply a transformation to `x[i]` before encoding.
 
 If `length(x) == N` (i.e. there are `N` input dataset), then `encodings` must be a tuple
-of `N` [`Encoding`](@ref). Alternatively, if `encodings` is a single [`Encoding`](@ref),
+of `N` [`Encoding`](@extref ComplexityMeasures.Encoding). Alternatively, if `encodings` is a single [`Encoding`](@extref ComplexityMeasures.Encoding),
 then that same encoding is applied to every `x[i]`.
 
 ## Examples
@@ -58,8 +58,8 @@ cx, cy, cz = codify(d, x, y, z)
 ```
 """
 struct CodifyPoints{N} <: Discretization{N}
-    encodings::NTuple{N, Encoding}
-    function CodifyPoints(encodings::NTuple{N, Encoding}) where N
+    encodings::NTuple{N,Encoding}
+    function CodifyPoints(encodings::NTuple{N,Encoding}) where N
         if !(N ≥ 1)
             throw(ArgumentError("CodifyPoints requires at least 1 dimensions"))
         end
@@ -68,7 +68,7 @@ struct CodifyPoints{N} <: Discretization{N}
 end
 Base.getindex(e::CodifyPoints, i) = getindex(e.encodings, i)
 
-function CodifyPoints(encodings::Vararg{Encoding, N}) where N
+function CodifyPoints(encodings::Vararg{Encoding,N}) where N
     return CodifyPoints(tuple(encodings...))
 end
 
@@ -103,27 +103,27 @@ codify(CodifyPoints(ex, ey, ez), x, y, z)
 """
 function codify(encoding::CodifyPoints, x) end
 
-function codify(encoding::CodifyPoints{1}, x::Vararg{Any, 1})
+function codify(encoding::CodifyPoints{1}, x::Vararg{Any,1})
     e = first(encoding.encodings)
     x̂ = codify_individual_dataset(e, first(x))
     return x̂::Vector{<:Integer}
 end
 
 # Apply the same encoding to all input datasets.
-function codify(encoding::CodifyPoints{1}, x::Vararg{Any, M}) where {M}
+function codify(encoding::CodifyPoints{1}, x::Vararg{Any,M}) where {M}
     verify_input(encoding, x...)
     e = first(encoding.encodings)
     x̂ = map(k -> codify_individual_dataset(e, x[k]), tuple(1:M...))
 
-    return x̂::NTuple{M, Vector{<:Integer}}
+    return x̂::NTuple{M,Vector{<:Integer}}
 end
 
 
-function codify(encoding::CodifyPoints{N}, x::Vararg{Any, M}) where {N, M}
+function codify(encoding::CodifyPoints{N}, x::Vararg{Any,M}) where {N,M}
     verify_input(encoding, x...)
     x̂ = map(k -> codify_individual_dataset(encoding[k], x[k]), tuple(1:M...))
 
-    return x̂::NTuple{M, Vector{<:Integer}}
+    return x̂::NTuple{M,Vector{<:Integer}}
 end
 
 function verify_input(encoding::CodifyPoints{N}, x...) where N
@@ -153,9 +153,9 @@ function codify_individual_dataset(encoding::Encoding, x)
     return x̂
 end
 
- # The decoding step on the second-to-last line is not possible without actually providing
- # the encodings. Therefore, we need to override the Generic implementation of
- # `counts`.
+# The decoding step on the second-to-last line is not possible without actually providing
+# the encodings. Therefore, we need to override the Generic implementation of
+# `counts`.
 function counts(encoding::CodifyPoints, x...)
     # This converts each dataset `x[i]::StateSpaceSet` into `x̂[i]::Vector{Int}`,
     # where `length(x[i]) == length(x̂[i])`.
diff --git a/src/methods/information/counts_and_probs/encoding/codify_variables.jl b/src/methods/information/counts_and_probs/encoding/codify_variables.jl
index 5eaf4701..0281d437 100644
--- a/src/methods/information/counts_and_probs/encoding/codify_variables.jl
+++ b/src/methods/information/counts_and_probs/encoding/codify_variables.jl
@@ -18,16 +18,16 @@ using the given `outcome_space`.
 
 ## Compatible outcome spaces
 
-- [`UniqueElements`](@ref) (for when data are pre-discretized)
-- [`BubbleSortSwaps`](@ref)
-- [`CosineSimilarityBinning`](@ref)
-- [`OrdinalPatterns`](@ref)
-- [`Dispersion`](@ref)
+- [`UniqueElements`](@extref ComplexityMeasures.UniqueElements) (for when data are pre-discretized)
+- [`BubbleSortSwaps`](@extref ComplexityMeasures.BubbleSortSwaps)
+- [`CosineSimilarityBinning`](@extref ComplexityMeasures.CosineSimilarityBinning)
+- [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns)
+- [`Dispersion`](@extref ComplexityMeasures.Dispersion)
 
 # Description
 
 The main difference between `CodifyVariables` and [`CodifyPoints`] is that the former
-uses [`OutcomeSpace`](@ref)s for discretization. This usually means that some
+uses [`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace)s for discretization. This usually means that some
 transformation is applied to the data before discretizing. For example, some outcome
 constructs a delay embedding from the input (and thus encodes sequential information)
 before encoding the data.
@@ -35,7 +35,7 @@ before encoding the data.
 Specifically, given `x::AbstractStateSpaceSet...`, where the `i`-th dataset `x[i]` 
 is assumed to represent a single series of measurements, `CodifyVariables` encodes
  `x[i]` by [`codify`](@ref)-ing into a series of integers 
-using an appropriate  [`OutcomeSpace`](@ref). This is typically done by first 
+using an appropriate  [`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace). This is typically done by first 
 sequentially transforming the data and then running sliding window (the width of 
 the window is controlled by `outcome_space`) across the data, and then encoding the 
 values within each window to an integer.
@@ -49,9 +49,9 @@ d = CodifyVariables(OrdinalPatterns(m=2))
 cx, cy = codify(d, x, y)
 ```
 """
-struct CodifyVariables{N, E} <: Discretization{N}
-    outcome_spaces::NTuple{N, OutcomeSpace}
-    function CodifyVariables(outcome_spaces::NTuple{N, OutcomeSpace}) where N
+struct CodifyVariables{N,E} <: Discretization{N}
+    outcome_spaces::NTuple{N,OutcomeSpace}
+    function CodifyVariables(outcome_spaces::NTuple{N,OutcomeSpace}) where N
         if N > 1
             n_outs = [total_outcomes(o) for o in outcome_spaces]
             if !allequal(n_outs)
@@ -60,7 +60,7 @@ struct CodifyVariables{N, E} <: Discretization{N}
                 throw(ArgumentError(s))
             end
         end
-        new{N, eltype(outcome_spaces)}(outcome_spaces)
+        new{N,eltype(outcome_spaces)}(outcome_spaces)
     end
 end
 function CodifyVariables(os...)
@@ -100,20 +100,20 @@ cx, cy = codify(CodifyPoints(OrdinalPatternEncoding(3)), x, y)
 """
 function codify(encoding::CodifyVariables, x) end
 
-function codify(encoding::CodifyVariables{1}, x::Vararg{Any, 1})
+function codify(encoding::CodifyVariables{1}, x::Vararg{Any,1})
     e = first(encoding.outcome_spaces)
     x̂ = ComplexityMeasures.codify(e, first(x))
     return x̂::Vector{<:Integer}
 end
 
 function codify(encoding::CodifyVariables{1}, x::NTuple{1})
-    return (codify(encoding, x...), )
+    return (codify(encoding, x...),)
 end
 
-function codify(encoding::CodifyVariables{1}, x::Vararg{Any, N}) where N
+function codify(encoding::CodifyVariables{1}, x::Vararg{Any,N}) where N
     e = first(encoding.outcome_spaces)
     x̂ = map(xᵢ -> ComplexityMeasures.codify(e, xᵢ), x)
-    return x̂::NTuple{N, Vector{<:Integer}}
+    return x̂::NTuple{N,Vector{<:Integer}}
 end
 
 function codify(encoding::CodifyVariables{1}, x::AbstractStateSpaceSet)
@@ -126,7 +126,7 @@ end
 # guaranteed API-wise that embedding vectors are constructed in the same way
 # (although *in practice* all `OutcomeSpace` that use embeddings do so 
 # per v3.6 of ComplexityMeasures.jl).
-function codify(encoding::CodifyVariables{N}, x::Vararg{Any, N}) where N
+function codify(encoding::CodifyVariables{N}, x::Vararg{Any,N}) where N
     os = encoding.outcome_spaces
     return [codify(CodifyVariables(os[i]), x[i]) for i in 1:N]
 end
diff --git a/src/methods/information/counts_and_probs/probabilities.jl b/src/methods/information/counts_and_probs/probabilities.jl
index 986b4940..2c192bde 100644
--- a/src/methods/information/counts_and_probs/probabilities.jl
+++ b/src/methods/information/counts_and_probs/probabilities.jl
@@ -12,16 +12,16 @@ export marginal
     probabilities(encoding::CodifyPoints, x₁, x₂, ..., xₙ) → Counts{N}
     probabilities(encoding::CodifyVariables, x₁, x₂, ..., xₙ) → Counts{N}
 
-Construct an `N`-dimensional [`Probabilities`](@ref) array from the input iterables
+Construct an `N`-dimensional [`Probabilities`](@extref ComplexityMeasures.Probabilities) array from the input iterables
 `x₁, x₂, ..., xₙ` which are such that 
 `length(x₁) == length(x₂) == ⋯ == length(xₙ)`.
 
 ## Description
 
 Probabilities are computed by first constructing a joint contingency matrix in the form 
-of a [`Counts`](@ref) instance. 
+of a [`Counts`](@extref ComplexityMeasures.Counts) instance. 
 
-If `x₁, x₂, ..., xₙ` are already discrete, then use [`UniqueElements`](@ref) as 
+If `x₁, x₂, ..., xₙ` are already discrete, then use [`UniqueElements`](@extref ComplexityMeasures.UniqueElements) as 
 the first argument to directly construct the joint contingency table.
 
 If `x₁, x₂, ..., xₙ` need to be discretized, provide as the first argument
@@ -51,24 +51,24 @@ z = rand([(1, 2), (2, 1)], n)
 probabilities(UniqueElements(), x, y, z)
 ```
 
-See also: [`CodifyPoints`](@ref), [`CodifyVariables`](@ref), [`UniqueElements`](@ref), [`OutcomeSpace`](@ref).
+See also: [`CodifyPoints`](@ref), [`CodifyVariables`](@ref), [`UniqueElements`](@extref ComplexityMeasures.UniqueElements), [`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace).
 """
 function probabilities(o::OutcomeSpace) end
 
-function probabilities(o::OutcomeSpace, x::Vararg{VectorOrStateSpaceSet, N}) where N # this extends ComplexityMeasures.jl definition
+function probabilities(o::OutcomeSpace, x::Vararg{VectorOrStateSpaceSet,N}) where N # this extends ComplexityMeasures.jl definition
     return Probabilities(counts(o, x...))
 end
-function probabilities(est::RelativeAmount, c::Counts{<:Integer, N}) where N
+function probabilities(est::RelativeAmount, c::Counts{<:Integer,N}) where N
     probs = Probabilities(c)
     return Probabilities(probs.p, c.outcomes, c.dimlabels)
 end
 
-function probabilities(est::ProbabilitiesEstimator, c::Counts{<:Integer, N}) where N
+function probabilities(est::ProbabilitiesEstimator, c::Counts{<:Integer,N}) where N
     return Probabilities(probs.p, c.outcomes, c.dimlabels)
 end
 
 # Not providing any discretization defaults to `RelativeAmount` estimation.
-function probabilities(x::Vararg{VectorOrStateSpaceSet, N}) where N
+function probabilities(x::Vararg{VectorOrStateSpaceSet,N}) where N
     cts = counts(UniqueElements(), x...)
     probs = probabilities(RelativeAmount(), cts)
     return Probabilities(probs.p, cts.outcomes, cts.dimlabels)
@@ -81,14 +81,14 @@ end
 Given a set of counts `c` (a contingency table), or a multivariate probability mass
 function `p`, return the marginal counts/probabilities along the given `dims`.
 """
-function marginal(p::Probabilities; dims = 1:ndims(p))
+function marginal(p::Probabilities; dims=1:ndims(p))
     alldims = 1:ndims(p)
     reduce_dims = (setdiff(alldims, dims)...,)
     # if all(a == b for (a, b) in zip(reduce_dims, alldims))
     #     @show "not taking marginal for $dims and $p"
     #     return p
     # end
-    marg = dropdims(sum(p.p, dims = reduce_dims), dims = reduce_dims)
+    marg = dropdims(sum(p.p, dims=reduce_dims), dims=reduce_dims)
     include_idxs = setdiff(alldims, reduce_dims)
     N = length(include_idxs)
     if N > 0
@@ -101,7 +101,7 @@ function marginal(p::Probabilities; dims = 1:ndims(p))
         return Probabilities(marg, new_outcomes, new_dimlabels)
     end
     return Probabilities(marg)
-   
+
 end
 
 # ----------------------------------------------------------------
@@ -110,19 +110,19 @@ end
 
 # Per point/row
 # ----------------------------------------------------------------
-function probabilities(encoding::CodifyPoints{1}, x::Vararg{Any, N}) where {N}
+function probabilities(encoding::CodifyPoints{1}, x::Vararg{Any,N}) where {N}
     cts = counts(encoding, x...)
     return Probabilities(cts)
 end
 
-function probabilities(encoding::CodifyPoints{N}, x::Vararg{Any, N}) where {N}
+function probabilities(encoding::CodifyPoints{N}, x::Vararg{Any,N}) where {N}
     cts = counts(encoding, x...)
     return Probabilities(cts)
 end
 
 # Per variable/column
 # ----------------------------------------------------------------
-function probabilities(discretization::CodifyVariables, x::Vararg{ArrayOrStateSpaceSet, N}) where N
+function probabilities(discretization::CodifyVariables, x::Vararg{ArrayOrStateSpaceSet,N}) where N
     cts = counts(discretization, x...)
     return probabilities(RelativeAmount(), cts)
 end
\ No newline at end of file
diff --git a/src/methods/information/definitions/conditional_entropies/ConditionalEntropyShannon.jl b/src/methods/information/definitions/conditional_entropies/ConditionalEntropyShannon.jl
index aebecc38..ef1ee635 100644
--- a/src/methods/information/definitions/conditional_entropies/ConditionalEntropyShannon.jl
+++ b/src/methods/information/definitions/conditional_entropies/ConditionalEntropyShannon.jl
@@ -7,7 +7,7 @@ export ConditionalEntropyShannon
     ConditionalEntropyShannon <: ConditionalEntropy
     ConditionalEntropyShannon(; base = 2)
 
-The [`Shannon`](@ref) conditional entropy measure.
+The [`Shannon`](@extref ComplexityMeasures.Shannon) conditional entropy measure.
 
 ## Usage 
 
@@ -40,9 +40,9 @@ Equivalently, the following differenConditionalEntropy of entropies hold
 H^S(X | Y) = H^S(X, Y) - H^S(Y),
 ```
 
-where ``H^S(\\cdot)`` and ``H^S(\\cdot | \\cdot)`` are the [`Shannon`](@ref) entropy and
+where ``H^S(\\cdot)`` and ``H^S(\\cdot | \\cdot)`` are the [`Shannon`](@extref ComplexityMeasures.Shannon) entropy and
 Shannon joint entropy, respectively. This is the definition used when calling
-[`association`](@ref) with a [`ProbabilitiesEstimator`](@ref).
+[`association`](@ref) with a [`ProbabilitiesEstimator`](@extref ComplexityMeasures.ProbabilitiesEstimator).
 
 ## Differential definition
 
@@ -52,19 +52,19 @@ The differential conditional Shannon entropy is analogously defined as
 H^S(X | Y) = h^S(X, Y) - h^S(Y),
 ```
 
-where ``h^S(\\cdot)`` and ``h^S(\\cdot | \\cdot)`` are the [`Shannon`](@ref)
+where ``h^S(\\cdot)`` and ``h^S(\\cdot | \\cdot)`` are the [`Shannon`](@extref ComplexityMeasures.Shannon)
 differential entropy and Shannon joint differential entropy, respectively. This is the
 definition used when calling [`association`](@ref) with a
-[`DifferentialInfoEstimator`](@ref).
+[`DifferentialInfoEstimator`](@extref ComplexityMeasures.DifferentialInfoEstimator).
 
 ## Estimation
 
 - [Example 1](@ref example_ConditionalEntropyShannon_analytical): Analytical example from Cover & Thomas's book.
 - [Example 2](@ref example_ConditionalEntropyShannon_JointProbabilities_CodifyVariables_UniqueElements): 
     [`JointProbabilities`](@ref) estimator with[`CodifyVariables`](@ref) discretization and 
-    [`UniqueElements`](@ref) outcome space on categorical data.
+    [`UniqueElements`](@extref ComplexityMeasures.UniqueElements) outcome space on categorical data.
 - [Example 3](@ref example_ConditionalEntropyShannon_JointProbabilities_CodifyPoints_UniqueElementsEncoding): 
-    [`JointProbabilities`](@ref) estimator with [`CodifyPoints`](@ref) discretization and [`UniqueElementsEncoding`](@ref)
+    [`JointProbabilities`](@ref) estimator with [`CodifyPoints`](@ref) discretization and [`UniqueElementsEncoding`](@extref ComplexityMeasures.UniqueElementsEncoding)
     encoding of points on numerical data.
 """
 Base.@kwdef struct ConditionalEntropyShannon{B} <: ConditionalEntropy
@@ -79,10 +79,10 @@ function association(est::JointProbabilities{<:ConditionalEntropyShannon}, input
     return association(est.definition, probs)
 end
 
-function association(definition::ConditionalEntropyShannon, pxy::Probabilities{T, 2}) where {T}
+function association(definition::ConditionalEntropyShannon, pxy::Probabilities{T,2}) where {T}
     base = definition.base
     Nx, Ny = size(pxy)
-    py = marginal(pxy, dims = 2)
+    py = marginal(pxy, dims=2)
 
     ce = 0.0
     log0 = log_with_base(base)
diff --git a/src/methods/information/definitions/conditional_entropies/ConditionalEntropyTsallisAbe.jl b/src/methods/information/definitions/conditional_entropies/ConditionalEntropyTsallisAbe.jl
index 5721d94f..20b378c2 100644
--- a/src/methods/information/definitions/conditional_entropies/ConditionalEntropyTsallisAbe.jl
+++ b/src/methods/information/definitions/conditional_entropies/ConditionalEntropyTsallisAbe.jl
@@ -26,19 +26,19 @@ Abe & Rajagopal's Tsallis conditional entropy between discrete random variables
 H_q^{T_A}(X | Y) = \\dfrac{H_q^T(X, Y) - H_q^T(Y)}{1 + (1-q)H_q^T(Y)},
 ```
 
-where ``H_q^T(\\cdot)`` and ``H_q^T(\\cdot, \\cdot)`` is the [`Tsallis`](@ref)
+where ``H_q^T(\\cdot)`` and ``H_q^T(\\cdot, \\cdot)`` is the [`Tsallis`](@extref ComplexityMeasures.Tsallis)
 entropy and the joint Tsallis entropy.
 
 ## Estimation
 
 - [Example 1](@ref example_ConditionalEntropyTsallisAbe_JointProbabilities_CodifyVariables_UniqueElements): 
     [`JointProbabilities`](@ref) estimator with[`CodifyVariables`](@ref) discretization and 
-    [`UniqueElements`](@ref) outcome space on categorical data.
+    [`UniqueElements`](@extref ComplexityMeasures.UniqueElements) outcome space on categorical data.
 - [Example 2](@ref example_ConditionalEntropyTsallisAbe_JointProbabilities_CodifyPoints_UniqueElementsEncoding): 
-    [`JointProbabilities`](@ref) estimator with [`CodifyPoints`](@ref) discretization and [`UniqueElementsEncoding`](@ref)
+    [`JointProbabilities`](@ref) estimator with [`CodifyPoints`](@ref) discretization and [`UniqueElementsEncoding`](@extref ComplexityMeasures.UniqueElementsEncoding)
     encoding of points on numerical data.
 """
-Base.@kwdef struct ConditionalEntropyTsallisAbe{B, Q} <: ConditionalEntropy
+Base.@kwdef struct ConditionalEntropyTsallisAbe{B,Q} <: ConditionalEntropy
     base::B = 2
     q::Q = 1.5
 end
@@ -51,14 +51,14 @@ function association(est::JointProbabilities{<:ConditionalEntropyTsallisAbe}, in
     return association(est.definition, probs)
 end
 
-function association(definition::ConditionalEntropyTsallisAbe, pxy::Probabilities{T, 2}) where {T}
+function association(definition::ConditionalEntropyTsallisAbe, pxy::Probabilities{T,2}) where {T}
     (; base, q) = definition
 
     if q == 1 # if shannon, normalize
         return association(ConditionalEntropyShannon(; base), pxy)
     end
 
-    py = marginal(pxy, dims = 2)
+    py = marginal(pxy, dims=2)
     # Definition 7 in Abe & Rajagopal (2001)
     hjoint = 1 / (1 - q) * (sum(pxy .^ 2) - 1)
 
@@ -66,7 +66,7 @@ function association(definition::ConditionalEntropyTsallisAbe, pxy::Probabilitie
     hy = information(Tsallis(; q, base), py)
 
     # Equation 13 in Abe & Rajagopal (2001)
-    ce = (hjoint - hy) / (1 + (1 - q)*hy)
+    ce = (hjoint - hy) / (1 + (1 - q) * hy)
 
     return ce
 end
\ No newline at end of file
diff --git a/src/methods/information/definitions/conditional_entropies/ConditionalEntropyTsallisFuruichi.jl b/src/methods/information/definitions/conditional_entropies/ConditionalEntropyTsallisFuruichi.jl
index ec91235d..a941a0dc 100644
--- a/src/methods/information/definitions/conditional_entropies/ConditionalEntropyTsallisFuruichi.jl
+++ b/src/methods/information/definitions/conditional_entropies/ConditionalEntropyTsallisFuruichi.jl
@@ -40,12 +40,12 @@ is undefined for a particular value, then the measure is undefined and `NaN` is
 
 - [Example 1](@ref example_ConditionalEntropyTsallisFuruichi_JointProbabilities_CodifyVariables_UniqueElements): 
     [`JointProbabilities`](@ref) estimator with[`CodifyVariables`](@ref) discretization and 
-    [`UniqueElements`](@ref) outcome space on categorical data.
+    [`UniqueElements`](@extref ComplexityMeasures.UniqueElements) outcome space on categorical data.
 - [Example 2](@ref example_ConditionalEntropyTsallisFuruichi_JointProbabilities_CodifyPoints_UniqueElementsEncoding): 
-    [`JointProbabilities`](@ref) estimator with [`CodifyPoints`](@ref) discretization and [`UniqueElementsEncoding`](@ref)
+    [`JointProbabilities`](@ref) estimator with [`CodifyPoints`](@ref) discretization and [`UniqueElementsEncoding`](@extref ComplexityMeasures.UniqueElementsEncoding)
     encoding of points on numerical data.
 """
-Base.@kwdef struct ConditionalEntropyTsallisFuruichi{B, Q} <: ConditionalEntropy
+Base.@kwdef struct ConditionalEntropyTsallisFuruichi{B,Q} <: ConditionalEntropy
     base::B = 2
     q::Q = 1.5
 end
@@ -58,13 +58,13 @@ function association(est::JointProbabilities{<:ConditionalEntropyTsallisFuruichi
     return association(est.definition, probs)
 end
 
-function association(definition::ConditionalEntropyTsallisFuruichi, pxy::Probabilities{T, 2}) where {T}
+function association(definition::ConditionalEntropyTsallisFuruichi, pxy::Probabilities{T,2}) where {T}
     (; base, q) = definition
     Nx, Ny = size(pxy)
     if q == 1
         return association(ConditionalEntropyShannon(; base), pxy)
     end
-    py = marginal(pxy, dims = 2)
+    py = marginal(pxy, dims=2)
     ce = 0.0
     qlog = logq0(q)
     for j in 1:Ny
@@ -83,7 +83,7 @@ function logq0(q)
     if q == 1.0
         return x -> zero(x)
     else
-        return x -> (x^(1 - q) - 1)/(1 - q)
+        return x -> (x^(1 - q) - 1) / (1 - q)
     end
 end
 
diff --git a/src/methods/information/definitions/conditional_mutual_informations/CMIRenyiJizba.jl b/src/methods/information/definitions/conditional_mutual_informations/CMIRenyiJizba.jl
index 5d63a3c5..5c3ab043 100644
--- a/src/methods/information/definitions/conditional_mutual_informations/CMIRenyiJizba.jl
+++ b/src/methods/information/definitions/conditional_mutual_informations/CMIRenyiJizba.jl
@@ -32,13 +32,13 @@ where ``I_q^{R_{J}}(X; Z)`` is the [`MIRenyiJizba`](@ref) mutual information.
 ## Estimation
 
 - [Example 1](@ref example_CMIRenyiJizba_JointProbabilities_BubbleSortSwaps): 
-    [`JointProbabilities`](@ref) with [`BubbleSortSwaps`](@ref) outcome space.
+    [`JointProbabilities`](@ref) with [`BubbleSortSwaps`](@extref ComplexityMeasures.BubbleSortSwaps) outcome space.
 - [Example 2](@ref example_CMIRenyiJizba_EntropyDecomposition_OrdinalPatterns): 
-    [`EntropyDecomposition`](@ref) with [`OrdinalPatterns`](@ref) outcome space.
+    [`EntropyDecomposition`](@ref) with [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns) outcome space.
 - [Example 3](@ref example_CMIRenyiJizba_EntropyDecomposition_LeonenkoProzantoSavani): 
-    [`EntropyDecomposition`](@ref) with differential entropy estimator [`LeonenkoProzantoSavani`](@ref).
+    [`EntropyDecomposition`](@ref) with differential entropy estimator [`LeonenkoProzantoSavani`](@extref ComplexityMeasures.LeonenkoProzantoSavani).
 """
-Base.@kwdef struct CMIRenyiJizba{B, Q} <: ConditionalMutualInformation
+Base.@kwdef struct CMIRenyiJizba{B,Q} <: ConditionalMutualInformation
     base::B = 2
     q::Q = 1.5
 end
@@ -48,10 +48,10 @@ end
 # ----------------------------------------------------------------
 function association(est::JointProbabilities{<:CMIRenyiJizba}, x, y, z)
     pxyz = probabilities(est.discretization, x, y, z)
-    pxz = marginal(pxyz, dims = [1,3])
-    pyz = marginal(pxyz, dims = [2,3])
-    pz = marginal(pxyz, dims = 3)
-    infodef = Renyi(q = est.definition.q, base = est.definition.base)
+    pxz = marginal(pxyz, dims=[1, 3])
+    pyz = marginal(pxyz, dims=[2, 3])
+    pz = marginal(pxyz, dims=3)
+    infodef = Renyi(q=est.definition.q, base=est.definition.base)
     HXYZ = information(infodef, pxyz)
     HXZ = information(infodef, pxz)
     HYZ = information(infodef, pyz)
@@ -59,13 +59,13 @@ function association(est::JointProbabilities{<:CMIRenyiJizba}, x, y, z)
     return HXZ + HYZ - HXYZ - HZ
 end
 
-function association(est::EntropyDecomposition{<:CMIRenyiJizba, <:DifferentialInfoEstimator{<:Renyi}}, x, y, z)
+function association(est::EntropyDecomposition{<:CMIRenyiJizba,<:DifferentialInfoEstimator{<:Renyi}}, x, y, z)
     HXZ, HYZ, HXYZ, HZ = marginal_entropies_cmi4h_differential(est, x, y, z)
     cmi = HXZ + HYZ - HXYZ - HZ
     return cmi
 end
 
-function association(est::EntropyDecomposition{<:CMIRenyiJizba, <:DiscreteInfoEstimator{<:Renyi}}, x, y, z)
+function association(est::EntropyDecomposition{<:CMIRenyiJizba,<:DiscreteInfoEstimator{<:Renyi}}, x, y, z)
     HXZ, HYZ, HXYZ, HZ = marginal_entropies_cmi4h_discrete(est, x, y, z)
     cmi = HXZ + HYZ - HXYZ - HZ
     return cmi
@@ -75,17 +75,17 @@ end
 # Pretty printing for decomposition estimators.
 # ------------------------------------------------
 function decomposition_string(
-        definition::CMIRenyiJizba, 
-        est::EntropyDecomposition{<:CMIRenyiJizba, <:DiscreteInfoEstimator{<:Renyi}}
-    ) 
-    return "Iᵣⱼ(X, Y | Z) = Hᵣ(X,Z) + Hᵣ(Y,Z) - Hᵣ(X,Y,Z) - Hᵣ(Z)";
+    definition::CMIRenyiJizba,
+    est::EntropyDecomposition{<:CMIRenyiJizba,<:DiscreteInfoEstimator{<:Renyi}}
+)
+    return "Iᵣⱼ(X, Y | Z) = Hᵣ(X,Z) + Hᵣ(Y,Z) - Hᵣ(X,Y,Z) - Hᵣ(Z)"
 end
 
 function decomposition_string(
-    definition::CMIRenyiJizba, 
-    est::EntropyDecomposition{<:CMIRenyiJizba, <:DifferentialInfoEstimator{<:Renyi}}
-) 
-    return "Iᵣⱼ(X, Y | Z) = hᵣ(X,Z) + hᵣ(Y,Z) - hᵣ(X,Y,Z) - hᵣ(Z)";
+    definition::CMIRenyiJizba,
+    est::EntropyDecomposition{<:CMIRenyiJizba,<:DifferentialInfoEstimator{<:Renyi}}
+)
+    return "Iᵣⱼ(X, Y | Z) = hᵣ(X,Z) + hᵣ(Y,Z) - hᵣ(X,Y,Z) - hᵣ(Z)"
 end
 
 # ---------------------------------
diff --git a/src/methods/information/definitions/conditional_mutual_informations/CMIShannon.jl b/src/methods/information/definitions/conditional_mutual_informations/CMIShannon.jl
index b70c7b91..86270172 100644
--- a/src/methods/information/definitions/conditional_mutual_informations/CMIShannon.jl
+++ b/src/methods/information/definitions/conditional_mutual_informations/CMIShannon.jl
@@ -42,7 +42,7 @@ I(X; Y | Z)
 ```
 
 where ``I^S(\\cdot; \\cdot)`` is the Shannon mutual information [`MIShannon`](@ref),
-and ``H^S(\\cdot)`` is the [`Shannon`](@ref) entropy.
+and ``H^S(\\cdot)`` is the [`Shannon`](@extref ComplexityMeasures.Shannon) entropy.
 
 Differential Shannon CMI is obtained by replacing the entropies by
 differential entropies.
@@ -50,11 +50,11 @@ differential entropies.
 ## Estimation
 
 - [Example 1](@ref example_CMIShannon_EntropyDecomposition_Kraskov): 
-    [`EntropyDecomposition`](@ref) with [`Kraskov`](@ref) estimator.
+    [`EntropyDecomposition`](@ref) with [`Kraskov`](@extref ComplexityMeasures.Kraskov) estimator.
 - [Example 2](@ref example_CMIShannon_EntropyDecomposition_ValueBinning):
-    [`EntropyDecomposition`](@ref) with [`ValueBinning`](@ref) estimator.
+    [`EntropyDecomposition`](@ref) with [`ValueBinning`](@extref ComplexityMeasures.ValueBinning) estimator.
 - [Example 3](@ref example_CMIShannon_MIDecomposition_KSG1): 
-    [`MIDecomposition`](@ref) with [`KraskovStögbauerGrassberger1`](@ref) estimator.
+    [`MIDecomposition`](@ref) with [`KraskovStögbauerGrassberger2`](@ref) estimator.
 """
 Base.@kwdef struct CMIShannon{B} <: ConditionalMutualInformation
     base::B = 2
@@ -68,11 +68,11 @@ function association(est::JointProbabilities{<:CMIShannon}, x, y, z)
     return association(est.definition, probs)
 end
 
-function association(definition::CMIShannon, pxyz::Probabilities{T, 3}) where T
+function association(definition::CMIShannon, pxyz::Probabilities{T,3}) where T
     dx, dy, dz = size(pxyz)
-    pxz = marginal(pxyz, dims = [1, 3])
-    pyz = marginal(pxyz, dims = [2, 3])
-    pz = marginal(pxyz, dims = 3)
+    pxz = marginal(pxyz, dims=[1, 3])
+    pyz = marginal(pxyz, dims=[2, 3])
+    pz = marginal(pxyz, dims=3)
     cmi = 0.0
     log0 = log_with_base(definition.base)
     for k in 1:dz
@@ -97,13 +97,13 @@ end
 # ------------------------------------------------
 # Four-entropies decompostion of CMIShannon
 # ------------------------------------------------
-function association(est::EntropyDecomposition{<:CMIShannon, <:DifferentialInfoEstimator{<:Shannon}}, x, y, z)
+function association(est::EntropyDecomposition{<:CMIShannon,<:DifferentialInfoEstimator{<:Shannon}}, x, y, z)
     HXZ, HYZ, HXYZ, HZ = marginal_entropies_cmi4h_differential(est, x, y, z)
     cmi = HXZ + HYZ - HXYZ - HZ
     return cmi
 end
 
-function association(est::EntropyDecomposition{<:CMIShannon, <:DiscreteInfoEstimator{<:Shannon}}, x, y, z)
+function association(est::EntropyDecomposition{<:CMIShannon,<:DiscreteInfoEstimator{<:Shannon}}, x, y, z)
     HXZ, HYZ, HXYZ, HZ = marginal_entropies_cmi4h_discrete(est, x, y, z)
     cmi = HXZ + HYZ - HXYZ - HZ
     return cmi
@@ -112,7 +112,7 @@ end
 # ---------------------------------------------------
 # Two-mutual-information decomposition of CMIShannon 
 # ---------------------------------------------------
-function association(est::MIDecomposition{<:ConditionalMutualInformation, <:MutualInformationEstimator{<:MIShannon}}, x, y, z)
+function association(est::MIDecomposition{<:ConditionalMutualInformation,<:MutualInformationEstimator{<:MIShannon}}, x, y, z)
     MI_X_YZ, MI_X_Z = marginal_mutual_informations(est, x, y, z)
     cmi = MI_X_YZ - MI_X_Z
     return cmi
@@ -120,7 +120,7 @@ end
 
 # We don't care if the estimated is mixed, discrete or handles both. The MI estimator 
 # handles that internally.
-function marginal_mutual_informations(est::MIDecomposition{<:ConditionalMutualInformation, <:MutualInformationEstimator{<:MIShannon}}, x, y, z)
+function marginal_mutual_informations(est::MIDecomposition{<:ConditionalMutualInformation,<:MutualInformationEstimator{<:MIShannon}}, x, y, z)
     X = StateSpaceSet(x)
     Y = StateSpaceSet(y)
     Z = StateSpaceSet(z)
@@ -149,22 +149,22 @@ end
 # Pretty printing for decomposition estimators.
 # ------------------------------------------------
 function decomposition_string(
-        definition::CMIShannon, 
-        est::EntropyDecomposition{<:CMIShannon, <:DiscreteInfoEstimator{<:Shannon}}
-    )
-    return "Iₛ(X, Y | Z) = Hₛ(X,Z) + Hₛ(Y,Z) - Hₛ(X,Y,Z) - Hₛ(Z)";
+    definition::CMIShannon,
+    est::EntropyDecomposition{<:CMIShannon,<:DiscreteInfoEstimator{<:Shannon}}
+)
+    return "Iₛ(X, Y | Z) = Hₛ(X,Z) + Hₛ(Y,Z) - Hₛ(X,Y,Z) - Hₛ(Z)"
 end
 
 function decomposition_string(
-        definition::CMIShannon, 
-        est::EntropyDecomposition{<:CMIShannon, <:DifferentialInfoEstimator{<:Shannon}}
-    )
-    return "Iₛ(X, Y | Z) = hₛ(X,Z) + hₛ(Y,Z) - hₛ(X,Y,Z) - hₛ(Z)";
+    definition::CMIShannon,
+    est::EntropyDecomposition{<:CMIShannon,<:DifferentialInfoEstimator{<:Shannon}}
+)
+    return "Iₛ(X, Y | Z) = hₛ(X,Z) + hₛ(Y,Z) - hₛ(X,Y,Z) - hₛ(Z)"
 end
 
 function decomposition_string(
-        definition::CMIShannon, 
-        est::MIDecomposition{<:CMIShannon, <:MutualInformationEstimator}
-    )
+    definition::CMIShannon,
+    est::MIDecomposition{<:CMIShannon,<:MutualInformationEstimator}
+)
     return "Iₛ(X, Y | Z) = Iₛ(X; Y, Z) + Iₛ(X; Z)"
 end
\ No newline at end of file
diff --git a/src/methods/information/definitions/divergences_and_distances/HellingerDistance.jl b/src/methods/information/definitions/divergences_and_distances/HellingerDistance.jl
index dcc3eb18..f0071578 100644
--- a/src/methods/information/definitions/divergences_and_distances/HellingerDistance.jl
+++ b/src/methods/information/definitions/divergences_and_distances/HellingerDistance.jl
@@ -19,7 +19,7 @@ The Hellinger distance.
 The Hellinger distance between two probability distributions
 ``P_X = (p_x(\\omega_1), \\ldots, p_x(\\omega_n))`` and
 ``P_Y = (p_y(\\omega_1), \\ldots, p_y(\\omega_m))``, both defined over the same
-[`OutcomeSpace`](@ref) ``\\Omega = \\{\\omega_1, \\ldots, \\omega_n \\}``, is
+[`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace) ``\\Omega = \\{\\omega_1, \\ldots, \\omega_n \\}``, is
 [defined](https://en.wikipedia.org/wiki/Hellinger_distance) as
 
 ```math
@@ -31,7 +31,7 @@ D_{H}(P_Y(\\Omega) || P_Y(\\Omega)) =
 
 - [Example 1](@ref example_HellingerDistance_precomputed_probabilities): From precomputed probabilities
 - [Example 2](@ref example_HellingerDistance_JointProbabilities_OrdinalPatterns): 
-    [`JointProbabilities`](@ref) with [`OrdinalPatterns`](@ref) outcome space
+    [`JointProbabilities`](@ref) with [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns) outcome space
 """
 struct HellingerDistance <: DivergenceOrDistance end
 
@@ -45,5 +45,5 @@ function association(est::JointProbabilities{<:HellingerDistance}, x, y)
 end
 
 function association(measure::HellingerDistance, px::Probabilities, py::Probabilities)
-    return 1/sqrt(2) * sum((sqrt(pxᵢ) - sqrt(pyᵢ))^2 for (pxᵢ, pyᵢ) in zip(px, py))
+    return 1 / sqrt(2) * sum((sqrt(pxᵢ) - sqrt(pyᵢ))^2 for (pxᵢ, pyᵢ) in zip(px, py))
 end
diff --git a/src/methods/information/definitions/divergences_and_distances/KLDivergence.jl b/src/methods/information/definitions/divergences_and_distances/KLDivergence.jl
index 1ad7eb6a..b534ff9b 100644
--- a/src/methods/information/definitions/divergences_and_distances/KLDivergence.jl
+++ b/src/methods/information/definitions/divergences_and_distances/KLDivergence.jl
@@ -24,7 +24,7 @@ The Kullback-Leibler (KL) divergence.
 The KL-divergence between two probability distributions
 ``P_X = (p_x(\\omega_1), \\ldots, p_x(\\omega_n))`` and
 ``P_Y = (p_y(\\omega_1), \\ldots, p_y(\\omega_m))``, both defined over the same
-[`OutcomeSpace`](@ref) ``\\Omega = \\{\\omega_1, \\ldots, \\omega_n \\}``, is defined as
+[`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace) ``\\Omega = \\{\\omega_1, \\ldots, \\omega_n \\}``, is defined as
 
 ```math
 D_{KL}(P_Y(\\Omega) || P_Y(\\Omega)) =
@@ -34,9 +34,9 @@ D_{KL}(P_Y(\\Omega) || P_Y(\\Omega)) =
 ## Implements
 
 - [`association`](@ref). Used to compute the KL-divergence between two pre-computed
-    probability distributions. If used with [`RelativeAmount`](@ref), the KL divergence may
+    probability distributions. If used with [`RelativeAmount`](@extref ComplexityMeasures.RelativeAmount), the KL divergence may
     be undefined to due some outcomes having zero counts. Use some other
-    [`ProbabilitiesEstimator`](@ref) like [`BayesianRegularization`](@ref) to ensure
+    [`ProbabilitiesEstimator`](@extref ComplexityMeasures.ProbabilitiesEstimator) like [`BayesianRegularization`](@extref ComplexityMeasures.BayesianRegularization) to ensure
     all estimated probabilities are nonzero.
 
 !!! note 
@@ -47,12 +47,12 @@ D_{KL}(P_Y(\\Omega) || P_Y(\\Omega)) =
 
 - [Example 1](@ref example_KLDivergence_precomputed_probabilities): From precomputed probabilities
 - [Example 2](@ref example_KLDivergence_JointProbabilities_OrdinalPatterns): 
-    [`JointProbabilities`](@ref) with [`OrdinalPatterns`](@ref) outcome space
+    [`JointProbabilities`](@ref) with [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns) outcome space
 """
 struct KLDivergence{B} <: DivergenceOrDistance
     base::B
 end
-KLDivergence(; base = 2) = KLDivergence(base)
+KLDivergence(; base=2) = KLDivergence(base)
 
 # ----------------------------------------------------------------
 # Estimation methods
diff --git a/src/methods/information/definitions/divergences_and_distances/RenyiDivergence.jl b/src/methods/information/definitions/divergences_and_distances/RenyiDivergence.jl
index edaed3cb..99c3e696 100644
--- a/src/methods/information/definitions/divergences_and_distances/RenyiDivergence.jl
+++ b/src/methods/information/definitions/divergences_and_distances/RenyiDivergence.jl
@@ -21,7 +21,7 @@ The Rényi divergence of positive order `q`.
 The Rényi divergence between two probability distributions
 ``P_X = (p_x(\\omega_1), \\ldots, p_x(\\omega_n))`` and
 ``P_Y = (p_y(\\omega_1), \\ldots, p_y(\\omega_m))``, both defined over the same
-[`OutcomeSpace`](@ref) ``\\Omega = \\{\\omega_1, \\ldots, \\omega_n \\}``, is defined as
+[`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace) ``\\Omega = \\{\\omega_1, \\ldots, \\omega_n \\}``, is defined as
 [vanErven2014](@citet).
 
 ```math
@@ -32,9 +32,9 @@ D_{q}(P_Y(\\Omega) || P_Y(\\Omega)) =
 ## Implements
 
 - [`information`](@ref). Used to compute the Rényi divergence between two pre-computed
-    probability distributions. If used with [`RelativeAmount`](@ref), the KL divergence may
+    probability distributions. If used with [`RelativeAmount`](@extref ComplexityMeasures.RelativeAmount), the KL divergence may
     be undefined to due some outcomes having zero counts. Use some other
-    [`ProbabilitiesEstimator`](@ref) like [`BayesianRegularization`](@ref) to ensure
+    [`ProbabilitiesEstimator`](@extref ComplexityMeasures.ProbabilitiesEstimator) like [`BayesianRegularization`](@extref ComplexityMeasures.BayesianRegularization) to ensure
     all estimated probabilities are nonzero.
 
 !!! note 
@@ -46,17 +46,17 @@ D_{q}(P_Y(\\Omega) || P_Y(\\Omega)) =
 
 - [Example 1](@ref example_RenyiDivergence_precomputed_probabilities): From precomputed probabilities
 - [Example 2](@ref example_RenyiDivergence_JointProbabilities_OrdinalPatterns): 
-    [`JointProbabilities`](@ref) with [`OrdinalPatterns`](@ref) outcome space
+    [`JointProbabilities`](@ref) with [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns) outcome space
 """
-struct RenyiDivergence{Q, B} <: DivergenceOrDistance
+struct RenyiDivergence{Q,B} <: DivergenceOrDistance
     q::Q
     base::B
-    function RenyiDivergence(q::Q, base::B) where {Q, B}
+    function RenyiDivergence(q::Q, base::B) where {Q,B}
         q > 0 || throw(ArgumentError("`q` must be positive. Got $q"))
-        new{Q, B}(q, base)
+        new{Q,B}(q, base)
     end
 end
-RenyiDivergence(; q = 0.5, base = 2) = RenyiDivergence(q, base)
+RenyiDivergence(; q=0.5, base=2) = RenyiDivergence(q, base)
 
 # ----------------------------------------------------------------
 # Estimation methods
diff --git a/src/methods/information/definitions/divergences_and_distances/VariationDistance.jl b/src/methods/information/definitions/divergences_and_distances/VariationDistance.jl
index 9dec8b31..d5b45aa2 100644
--- a/src/methods/information/definitions/divergences_and_distances/VariationDistance.jl
+++ b/src/methods/information/definitions/divergences_and_distances/VariationDistance.jl
@@ -20,7 +20,7 @@ The variation distance.
 The variation distance between two probability distributions
 ``P_X = (p_x(\\omega_1), \\ldots, p_x(\\omega_n))`` and
 ``P_Y = (p_y(\\omega_1), \\ldots, p_y(\\omega_m))``, both defined over the same
-[`OutcomeSpace`](@ref) ``\\Omega = \\{\\omega_1, \\ldots, \\omega_n \\}``, is
+[`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace) ``\\Omega = \\{\\omega_1, \\ldots, \\omega_n \\}``, is
 [defined](https://en.wikipedia.org/wiki/Variation_distance) as
 
 ```math
@@ -32,7 +32,7 @@ D_{V}(P_Y(\\Omega) || P_Y(\\Omega)) =
 
 - [Example 1](@ref example_VariationDistance_precomputed_probabilities): From precomputed probabilities
 - [Example 2](@ref example_VariationDistance_JointProbabilities_OrdinalPatterns): 
-    [`JointProbabilities`](@ref) with [`OrdinalPatterns`](@ref) outcome space
+    [`JointProbabilities`](@ref) with [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns) outcome space
 """
 struct VariationDistance <: DivergenceOrDistance end
 
@@ -46,5 +46,5 @@ function association(est::JointProbabilities{<:VariationDistance}, x, y)
 end
 
 function association(measure::VariationDistance, px::Probabilities, py::Probabilities)
-    return 1/2 * sum(abs(pxᵢ - pyᵢ) for (pxᵢ, pyᵢ) in zip(px, py))
+    return 1 / 2 * sum(abs(pxᵢ - pyᵢ) for (pxᵢ, pyᵢ) in zip(px, py))
 end
diff --git a/src/methods/information/definitions/joint_entropies/JointEntropyRenyi.jl b/src/methods/information/definitions/joint_entropies/JointEntropyRenyi.jl
index 1a6694e4..9e3945b3 100644
--- a/src/methods/information/definitions/joint_entropies/JointEntropyRenyi.jl
+++ b/src/methods/information/definitions/joint_entropies/JointEntropyRenyi.jl
@@ -31,9 +31,9 @@ where ``q > 0`` and ``q != 1``.
 ## Estimation
 
 - [Example 1](@ref example_JointEntropyRenyi_ValueBinning): 
-    [`JointProbabilities`](@ref) with [`ValueBinning`](@ref) outcome space
+    [`JointProbabilities`](@ref) with [`ValueBinning`](@extref ComplexityMeasures.ValueBinning) outcome space
 """
-Base.@kwdef struct JointEntropyRenyi{B, Q} <: JointEntropy
+Base.@kwdef struct JointEntropyRenyi{B,Q} <: JointEntropy
     base::B = 2
     q::Q = 1.5
 end
@@ -47,15 +47,15 @@ function association(est::JointProbabilities{<:JointEntropyRenyi}, x, y)
     return association(est.definition, probs)
 end
 
-function association(definition::JointEntropyRenyi, pxy::Probabilities{T, 2}) where T
+function association(definition::JointEntropyRenyi, pxy::Probabilities{T,2}) where T
     (; base, q) = definition
-    
+
     h = 0.0
     for p in pxy
         if p != 0
             h += p^q
         end
     end
-    h = 1 / (1 - q) * log(h) 
+    h = 1 / (1 - q) * log(h)
     return _convert_logunit(h, ℯ, base)
 end
\ No newline at end of file
diff --git a/src/methods/information/definitions/joint_entropies/JointEntropyShannon.jl b/src/methods/information/definitions/joint_entropies/JointEntropyShannon.jl
index 2ec11be0..94bb8161 100644
--- a/src/methods/information/definitions/joint_entropies/JointEntropyShannon.jl
+++ b/src/methods/information/definitions/joint_entropies/JointEntropyShannon.jl
@@ -31,7 +31,7 @@ where we define ``log(p(x, y)) := 0`` if ``p(x, y) = 0``.
 ## Estimation
 
 - [Example 1](@ref example_JointEntropyShannon_Dispersion): 
-    [`JointProbabilities`](@ref) with [`Dispersion`](@ref) outcome space
+    [`JointProbabilities`](@ref) with [`Dispersion`](@extref ComplexityMeasures.Dispersion) outcome space
 """
 Base.@kwdef struct JointEntropyShannon{B} <: JointEntropy
     base::B = 2
@@ -45,9 +45,9 @@ function association(est::JointProbabilities{<:JointEntropyShannon}, x, y)
     return association(est.definition, probs)
 end
 
-function association(definition::JointEntropyShannon, pxy::Probabilities{T, 2}) where T
+function association(definition::JointEntropyShannon, pxy::Probabilities{T,2}) where T
     (; base) = definition
-    
+
     h = 0.0
     for p in pxy
         if p != 0 # Define log(0) = 0
diff --git a/src/methods/information/definitions/joint_entropies/JointEntropyTsallis.jl b/src/methods/information/definitions/joint_entropies/JointEntropyTsallis.jl
index f2324c31..c1f70341 100644
--- a/src/methods/information/definitions/joint_entropies/JointEntropyTsallis.jl
+++ b/src/methods/information/definitions/joint_entropies/JointEntropyTsallis.jl
@@ -32,9 +32,9 @@ we define ``log_q(x, q) := 0`` if ``q = 0``.
 ## Estimation
 
 - [Example 1](@ref example_JointEntropyTsallis_OrdinalPatterns): 
-    [`JointProbabilities`](@ref) with [`OrdinalPatterns`](@ref) outcome space
+    [`JointProbabilities`](@ref) with [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns) outcome space
 """
-Base.@kwdef struct JointEntropyTsallis{B, Q} <: JointEntropy
+Base.@kwdef struct JointEntropyTsallis{B,Q} <: JointEntropy
     base::B = 2
     q::Q = 1.5
 end
@@ -47,9 +47,9 @@ function association(est::JointProbabilities{<:JointEntropyTsallis}, x, y)
     return association(est.definition, probs)
 end
 
-function association(definition::JointEntropyTsallis, pxy::Probabilities{T, 2}) where T
+function association(definition::JointEntropyTsallis, pxy::Probabilities{T,2}) where T
     (; base, q) = definition
-    
+
     h = 0.0
     for p in pxy
         if p != 0.0 # Define logq(0) = 0
diff --git a/src/methods/information/definitions/mutual_informations/MIRenyiJizba.jl b/src/methods/information/definitions/mutual_informations/MIRenyiJizba.jl
index 720c404e..5898be2c 100644
--- a/src/methods/information/definitions/mutual_informations/MIRenyiJizba.jl
+++ b/src/methods/information/definitions/mutual_informations/MIRenyiJizba.jl
@@ -26,16 +26,16 @@ The Rényi mutual information ``I_q^{R_{J}}(X; Y)`` defined in [Jizba2012](@cite
 I_q^{R_{J}}(X; Y) = H_q^{R}(X) + H_q^{R}(Y) - H_q^{R}(X, Y),
 ```
 
-where ``H_q^{R}(\\cdot)`` is the [`Rényi`](@ref) entropy.
+where ``H_q^{R}(\\cdot)`` is the [`Renyi`](@extref ComplexityMeasures.Renyi) entropy.
 
 
 ## Estimation
 
-- [Example 1](@ref example_MIRenyiJizba_JointProbabilities_UniqueElements): [`JointProbabilities`](@ref) with [`UniqueElements`](@ref) outcome space.
-- [Example 2](@ref example_MIRenyiJizba_JointProbabilities_LeonenkoProzantoSavani): [`EntropyDecomposition`](@ref) with [`LeonenkoProzantoSavani`](@ref).
-- [Example 3](@ref example_MIRenyiJizba_EntropyDecomposition_ValueBinning): [`EntropyDecomposition`](@ref) with [`ValueBinning`](@ref).
+- [Example 1](@ref example_MIRenyiJizba_JointProbabilities_UniqueElements): [`JointProbabilities`](@ref) with [`UniqueElements`](@extref ComplexityMeasures.UniqueElements) outcome space.
+- [Example 2](@ref example_MIRenyiJizba_JointProbabilities_LeonenkoProzantoSavani): [`EntropyDecomposition`](@ref) with [`LeonenkoProzantoSavani`](@extref ComplexityMeasures.LeonenkoProzantoSavani).
+- [Example 3](@ref example_MIRenyiJizba_EntropyDecomposition_ValueBinning): [`EntropyDecomposition`](@ref) with [`ValueBinning`](@extref ComplexityMeasures.ValueBinning).
 """
-Base.@kwdef struct MIRenyiJizba{B, Q} <: MutualInformation
+Base.@kwdef struct MIRenyiJizba{B,Q} <: MutualInformation
     base::B = 2
     q::Q = 1.5
 end
@@ -48,12 +48,12 @@ function association(est::JointProbabilities{<:MIRenyiJizba}, x, y)
     return association(est.definition, probs)
 end
 
-function association(definition::MIRenyiJizba, pxy::Probabilities{T, 2}) where T
+function association(definition::MIRenyiJizba, pxy::Probabilities{T,2}) where T
     (; base, q) = definition
 
-    px = marginal(pxy, dims = 1)
-    py = marginal(pxy, dims = 2)
-    
+    px = marginal(pxy, dims=1)
+    py = marginal(pxy, dims=2)
+
     logb = log_with_base(base)
     num = 0.0
     den = 0.0
@@ -78,15 +78,15 @@ end
 # Jizba, P., Lavička, H., & Tabachová, Z. (2021). Rényi Transfer Entropy Estimators for
 # Financial Time Series. Engineering Proceedings, 5(1), 33.
 # --------------------------------------------------------------
-function association(est::EntropyDecomposition{<:MIRenyiJizba, <:DifferentialInfoEstimator{<:Renyi}}, x, y)
+function association(est::EntropyDecomposition{<:MIRenyiJizba,<:DifferentialInfoEstimator{<:Renyi}}, x, y)
     HX, HY, HXY = marginal_entropies_mi3h_differential(est, x, y)
-    mi =  HX + HY - HXY
+    mi = HX + HY - HXY
     return mi
 end
 
-function association(est::EntropyDecomposition{<:MIRenyiJizba, <:DiscreteInfoEstimator{<:Renyi}}, x, y)
+function association(est::EntropyDecomposition{<:MIRenyiJizba,<:DiscreteInfoEstimator{<:Renyi}}, x, y)
     HX, HY, HXY = marginal_entropies_mi3h_discrete(est, x, y)
-    mi =  HX + HY - HXY
+    mi = HX + HY - HXY
     return mi
 end
 
@@ -94,15 +94,15 @@ end
 # Pretty printing for decomposition estimators.
 # ------------------------------------------------
 function decomposition_string(
-        definition::MIRenyiJizba, 
-        est::EntropyDecomposition{<:MIRenyiJizba, <:DifferentialInfoEstimator{<:Renyi}}
-    )
+    definition::MIRenyiJizba,
+    est::EntropyDecomposition{<:MIRenyiJizba,<:DifferentialInfoEstimator{<:Renyi}}
+)
     return "Iᵣⱼ(X, Y) = hᵣ(X) + hᵣ(Y) - hᵣ(X, Y)"
 end
 
 function decomposition_string(
-        definition::MIRenyiJizba, 
-        est::EntropyDecomposition{<:MIRenyiJizba, <:DiscreteInfoEstimator{<:Renyi}}
-    )
+    definition::MIRenyiJizba,
+    est::EntropyDecomposition{<:MIRenyiJizba,<:DiscreteInfoEstimator{<:Renyi}}
+)
     return "Iᵣⱼ(X, Y) = Hᵣ(X) + Hᵣ(Y) - Hᵣ(X, Y)"
 end
diff --git a/src/methods/information/definitions/mutual_informations/MIRenyiSarbu.jl b/src/methods/information/definitions/mutual_informations/MIRenyiSarbu.jl
index 6aa2d321..90fbd80a 100644
--- a/src/methods/information/definitions/mutual_informations/MIRenyiSarbu.jl
+++ b/src/methods/information/definitions/mutual_informations/MIRenyiSarbu.jl
@@ -1,6 +1,6 @@
 using ComplexityMeasures: Renyi
 
-export MIRenyiSarbu 
+export MIRenyiSarbu
 
 """
     MIRenyiSarbu <: BivariateInformationMeasure
@@ -36,10 +36,10 @@ I(X, Y)^R_q =
 
 ## Estimation
 
-- [Example 1](@ref example_MIRenyiSarbu_JointProbabilities_UniqueElements): [`JointProbabilities`](@ref) with [`UniqueElements`](@ref) for categorical data.
-- [Example 2](@ref example_MIRenyiSarbu_JointProbabilities_CosineSimilarityBinning): [`JointProbabilities`](@ref) with [`CosineSimilarityBinning`](@ref) for numerical data.
+- [Example 1](@ref example_MIRenyiSarbu_JointProbabilities_UniqueElements): [`JointProbabilities`](@ref) with [`UniqueElements`](@extref ComplexityMeasures.UniqueElements) for categorical data.
+- [Example 2](@ref example_MIRenyiSarbu_JointProbabilities_CosineSimilarityBinning): [`JointProbabilities`](@ref) with [`CosineSimilarityBinning`](@extref ComplexityMeasures.CosineSimilarityBinning) for numerical data.
 """
-Base.@kwdef struct MIRenyiSarbu{B, Q} <: MutualInformation
+Base.@kwdef struct MIRenyiSarbu{B,Q} <: MutualInformation
     base::B = 2
     q::Q = 1.5
 end
@@ -52,11 +52,11 @@ function association(est::JointProbabilities{<:MIRenyiSarbu}, x, y)
     return association(est.definition, probs)
 end
 
-function association(definition::MIRenyiSarbu, pxy::Probabilities{T, 2}) where T
+function association(definition::MIRenyiSarbu, pxy::Probabilities{T,2}) where T
     (; base, q) = definition
 
-    px = marginal(pxy, dims = 1)
-    py = marginal(pxy, dims = 2)
+    px = marginal(pxy, dims=1)
+    py = marginal(pxy, dims=2)
 
     mi = 0.0
     for i in eachindex(px.p)
diff --git a/src/methods/information/definitions/mutual_informations/MIShannon.jl b/src/methods/information/definitions/mutual_informations/MIShannon.jl
index 06a76254..592541e1 100644
--- a/src/methods/information/definitions/mutual_informations/MIShannon.jl
+++ b/src/methods/information/definitions/mutual_informations/MIShannon.jl
@@ -19,7 +19,7 @@ The Shannon mutual information ``I_S(X; Y)``.
 
 - [`JointProbabilities`](@ref) (generic)
 - [`EntropyDecomposition`](@ref) (generic)
-- [`KraskovStögbauerGrassberger1`](@ref)
+- [`KraskovStögbauerGrassberger2`](@ref)
 - [`KraskovStögbauerGrassberger2`](@ref)
 - [`GaoOhViswanath`](@ref)
 - [`GaoKannanOhViswanath`](@ref)
@@ -73,19 +73,19 @@ I^S(X; Y) = h^S(X) + h_q^S(Y) - h^S(X, Y),
 
 where ``h^S(\\cdot)`` and ``h^S(\\cdot, \\cdot)`` are the marginal and joint
 differential Shannon entropies. This definition is used by [`association`](@ref) when
-called with [`EntropyDecomposition`](@ref) estimator and a [`DifferentialInfoEstimator`](@ref).
+called with [`EntropyDecomposition`](@ref) estimator and a [`DifferentialInfoEstimator`](@extref ComplexityMeasures.DifferentialInfoEstimator).
 
 ## Estimation
 
-- [Example 1](@ref example_MIShannon_JointProbabilities_ValueBinning): [`JointProbabilities`](@ref) with [`ValueBinning`](@ref) outcome space.
-- [Example 2](@ref example_MIShannon_JointProbabilities_UniqueElements): [`JointProbabilities`](@ref) with [`UniqueElements`](@ref) outcome space on string data.
+- [Example 1](@ref example_MIShannon_JointProbabilities_ValueBinning): [`JointProbabilities`](@ref) with [`ValueBinning`](@extref ComplexityMeasures.ValueBinning) outcome space.
+- [Example 2](@ref example_MIShannon_JointProbabilities_UniqueElements): [`JointProbabilities`](@ref) with [`UniqueElements`](@extref ComplexityMeasures.UniqueElements) outcome space on string data.
 - [Example 3](@ref example_MIShannon_GaussianMI): Dedicated [`GaussianMI`](@ref) estimator.
-- [Example 4](@ref example_MIShannon_KSG1): Dedicated [`KraskovStögbauerGrassberger1`](@ref) estimator.
+- [Example 4](@ref example_MIShannon_KSG1): Dedicated [`KraskovStögbauerGrassberger2`](@ref) estimator.
 - [Example 5](@ref example_MIShannon_KSG2): Dedicated [`KraskovStögbauerGrassberger2`](@ref) estimator.
 - [Example 6](@ref example_MIShannon_GaoKannanOhViswanath): Dedicated [`GaoKannanOhViswanath`](@ref) estimator.
-- [Example 7](@ref example_MIShannon_EntropyDecomposition_Kraskov): [`EntropyDecomposition`](@ref) with [`Kraskov`](@ref) estimator.
-- [Example 8](@ref example_MIShannon_EntropyDecomposition_BubbleSortSwaps): [`EntropyDecomposition`](@ref) with [`BubbleSortSwaps`](@ref).
-- [Example 9](@ref example_MIShannon_EntropyDecomposition_Jackknife_ValueBinning): [`EntropyDecomposition`](@ref) with [`Jackknife`](@ref) estimator and [`ValueBinning`](@ref) outcome space.
+- [Example 7](@ref example_MIShannon_EntropyDecomposition_Kraskov): [`EntropyDecomposition`](@ref) with [`Kraskov`](@extref ComplexityMeasures.Kraskov) estimator.
+- [Example 8](@ref example_MIShannon_EntropyDecomposition_BubbleSortSwaps): [`EntropyDecomposition`](@ref) with [`BubbleSortSwaps`](@extref ComplexityMeasures.BubbleSortSwaps).
+- [Example 9](@ref example_MIShannon_EntropyDecomposition_Jackknife_ValueBinning): [`EntropyDecomposition`](@ref) with [`Jackknife`](@extref ComplexityMeasures.Jackknife) estimator and [`ValueBinning`](@extref ComplexityMeasures.ValueBinning) outcome space.
 - [Example 10](@ref example_MIShannon_reproducing_Kraskov): Reproducing Kraskov et al. (2004).
 """
 Base.@kwdef struct MIShannon{B} <: MutualInformation
@@ -102,11 +102,11 @@ function association(est::JointProbabilities{<:MIShannon}, x, y)
     return association(est.definition, probs)
 end
 
-function association(definition::MIShannon, pxy::Probabilities{T, 2}) where T
+function association(definition::MIShannon, pxy::Probabilities{T,2}) where T
     (; base) = definition
-    
-    px = marginal(pxy, dims = 1)
-    py = marginal(pxy, dims = 2)
+
+    px = marginal(pxy, dims=1)
+    py = marginal(pxy, dims=2)
     mi = 0.0
     logb = log_with_base(base)
     for i in eachindex(px.p)
@@ -126,15 +126,15 @@ end
 # ------------------------------------------------
 # Mutual information through entropy decomposition
 # ------------------------------------------------
-function association(est::EntropyDecomposition{<:MIShannon, <:DifferentialInfoEstimator{<:Shannon}}, x, y)
+function association(est::EntropyDecomposition{<:MIShannon,<:DifferentialInfoEstimator{<:Shannon}}, x, y)
     HX, HY, HXY = marginal_entropies_mi3h_differential(est, x, y)
-    mi =  HX + HY - HXY
+    mi = HX + HY - HXY
     return mi
 end
 
-function association(est::EntropyDecomposition{<:MIShannon, <:DiscreteInfoEstimator{<:Shannon}, D, P}, x, y) where {D, P}
+function association(est::EntropyDecomposition{<:MIShannon,<:DiscreteInfoEstimator{<:Shannon},D,P}, x, y) where {D,P}
     HX, HY, HXY = marginal_entropies_mi3h_discrete(est, x, y)
-    mi =  HX + HY - HXY
+    mi = HX + HY - HXY
     return mi
 end
 
@@ -142,15 +142,15 @@ end
 # Pretty printing for decomposition estimators.
 # ------------------------------------------------
 function decomposition_string(
-        definition::MIShannon, 
-        est::EntropyDecomposition{<:MIShannon, <:DifferentialInfoEstimator{<:Shannon}}
-    )
-    return "Iₛ(X, Y) = hₛ(X) + hₛ(Y) - hₛ(X, Y)";
+    definition::MIShannon,
+    est::EntropyDecomposition{<:MIShannon,<:DifferentialInfoEstimator{<:Shannon}}
+)
+    return "Iₛ(X, Y) = hₛ(X) + hₛ(Y) - hₛ(X, Y)"
 end
 
 function decomposition_string(
-        definition::MIShannon, 
-        est::EntropyDecomposition{<:MIShannon, <:DiscreteInfoEstimator{<:Shannon}}
-    )
-    return "Iₛ(X, Y) = Hₛ(X) + Hₛ(Y) - Hₛ(X, Y)";
+    definition::MIShannon,
+    est::EntropyDecomposition{<:MIShannon,<:DiscreteInfoEstimator{<:Shannon}}
+)
+    return "Iₛ(X, Y) = Hₛ(X) + Hₛ(Y) - Hₛ(X, Y)"
 end
diff --git a/src/methods/information/definitions/mutual_informations/MITsallisFuruichi.jl b/src/methods/information/definitions/mutual_informations/MITsallisFuruichi.jl
index 9f2a5f89..1f2b2d37 100644
--- a/src/methods/information/definitions/mutual_informations/MITsallisFuruichi.jl
+++ b/src/methods/information/definitions/mutual_informations/MITsallisFuruichi.jl
@@ -30,15 +30,15 @@ I_q^T(X; Y) = H_q^T(X) - H_q^T(X | Y) = H_q^T(X) + H_q^T(Y) - H_q^T(X, Y),
 ```
 
 where ``H^T(\\cdot)`` and ``H^T(\\cdot, \\cdot)`` are the marginal and joint Tsallis
-entropies, and `q` is the [`Tsallis`](@ref)-parameter.
+entropies, and `q` is the [`Tsallis`](@extref ComplexityMeasures.Tsallis)-parameter.
 
 ## Estimation
 
-- [Example 1](@ref example_MITsallisFuruichi_JointProbabilities_UniqueElements): [`JointProbabilities`](@ref) with [`UniqueElements`](@ref) outcome space.
-- [Example 2](@ref example_MITsallisFuruichi_EntropyDecomposition_LeonenkoProzantoSavani): [`EntropyDecomposition`](@ref) with [`LeonenkoProzantoSavani`](@ref) estimator.
-- [Example 3](@ref example_MITsallisFuruichi_EntropyDecomposition_Dispersion): [`EntropyDecomposition`](@ref) with [`Dispersion`](@ref)
+- [Example 1](@ref example_MITsallisFuruichi_JointProbabilities_UniqueElements): [`JointProbabilities`](@ref) with [`UniqueElements`](@extref ComplexityMeasures.UniqueElements) outcome space.
+- [Example 2](@ref example_MITsallisFuruichi_EntropyDecomposition_LeonenkoProzantoSavani): [`EntropyDecomposition`](@ref) with [`LeonenkoProzantoSavani`](@extref ComplexityMeasures.LeonenkoProzantoSavani) estimator.
+- [Example 3](@ref example_MITsallisFuruichi_EntropyDecomposition_Dispersion): [`EntropyDecomposition`](@ref) with [`Dispersion`](@extref ComplexityMeasures.Dispersion)
 """
-Base.@kwdef struct MITsallisFuruichi{B, Q} <: MutualInformation
+Base.@kwdef struct MITsallisFuruichi{B,Q} <: MutualInformation
     base::B = 2
     q::Q = 1.5
 end
@@ -51,11 +51,11 @@ function association(est::JointProbabilities{<:MITsallisFuruichi}, x, y)
     return association(est.definition, probs)
 end
 
-function association(definition::MITsallisFuruichi, pxy::Probabilities{T, 2}) where T
+function association(definition::MITsallisFuruichi, pxy::Probabilities{T,2}) where T
     (; base, q) = definition
 
-    px = marginal(pxy, dims = 1)
-    py = marginal(pxy, dims = 2)
+    px = marginal(pxy, dims=1)
+    py = marginal(pxy, dims=2)
 
     mi = 0.0
     for i in eachindex(px.p)
@@ -64,19 +64,19 @@ function association(definition::MITsallisFuruichi, pxy::Probabilities{T, 2}) wh
             mi += pxyᵢⱼ^q / (px[i]^(q - 1) * py[j]^(q - 1))
         end
     end
-    mi = (1 / (q - 1) * (1 - mi) / (1-q))
+    mi = (1 / (q - 1) * (1 - mi) / (1 - q))
     return _convert_logunit(mi, ℯ, base)
 end
 
 
 
-function association(est::EntropyDecomposition{<:MITsallisFuruichi, <:DifferentialInfoEstimator{<:Tsallis}}, x, y)
+function association(est::EntropyDecomposition{<:MITsallisFuruichi,<:DifferentialInfoEstimator{<:Tsallis}}, x, y)
     HX, HY, HXY = marginal_entropies_mi3h_differential(est, x, y)
     mi = HX + HY - HXY
     return mi
 end
 
-function association(est::EntropyDecomposition{<:MITsallisFuruichi, <:DiscreteInfoEstimator{<:Tsallis}}, x, y)
+function association(est::EntropyDecomposition{<:MITsallisFuruichi,<:DiscreteInfoEstimator{<:Tsallis}}, x, y)
     HX, HY, HXY = marginal_entropies_mi3h_discrete(est, x, y)
     mi = HX + HY - HXY
     return mi
@@ -87,9 +87,9 @@ end
 # Pretty printing for decomposition estimators.
 # ------------------------------------------------
 function decomposition_string(definition::MITsallisFuruichi, est::DiscreteInfoEstimator{<:Tsallis})
-    return "MI_TF(X, Y) = H_T(X) + H_T(Y) - H_T(X, Y)";
+    return "MI_TF(X, Y) = H_T(X) + H_T(Y) - H_T(X, Y)"
 end
 
 function decomposition_string(definition::MITsallisFuruichi, est::DifferentialInfoEstimator{<:Tsallis})
-    return "MI_TF(X, Y) = h_T(X) + h_T(Y) - h_T(X, Y)";
+    return "MI_TF(X, Y) = h_T(X) + h_T(Y) - h_T(X, Y)"
 end
\ No newline at end of file
diff --git a/src/methods/information/definitions/mutual_informations/MITsallisMartin.jl b/src/methods/information/definitions/mutual_informations/MITsallisMartin.jl
index b82d039d..6937d0ee 100644
--- a/src/methods/information/definitions/mutual_informations/MITsallisMartin.jl
+++ b/src/methods/information/definitions/mutual_informations/MITsallisMartin.jl
@@ -30,15 +30,15 @@ I_{\\text{Martin}}^T(X, Y, q) := H_q^T(X) + H_q^T(Y) - (1 - q) H_q^T(X) H_q^T(Y)
 ```
 
 where ``H^S(\\cdot)`` and ``H^S(\\cdot, \\cdot)`` are the marginal and joint Shannon
-entropies, and `q` is the [`Tsallis`](@ref)-parameter.
+entropies, and `q` is the [`Tsallis`](@extref ComplexityMeasures.Tsallis)-parameter.
 
 ## Estimation
 
-- [Example 1](@ref example_MITsallisMartin_JointProbabilities_UniqueElements): [`JointProbabilities`](@ref) with [`UniqueElements`](@ref) outcome space.
-- [Example 2](@ref example_MITsallisMartin_EntropyDecomposition_LeonenkoProzantoSavani): [`EntropyDecomposition`](@ref) with [`LeonenkoProzantoSavani`](@ref) estimator.
-- [Example 3](@ref example_MITsallisMartin_EntropyDecomposition_OrdinalPatterns): [`EntropyDecomposition`](@ref) with [`OrdinalPatterns`](@ref) outcome space.
+- [Example 1](@ref example_MITsallisMartin_JointProbabilities_UniqueElements): [`JointProbabilities`](@ref) with [`UniqueElements`](@extref ComplexityMeasures.UniqueElements) outcome space.
+- [Example 2](@ref example_MITsallisMartin_EntropyDecomposition_LeonenkoProzantoSavani): [`EntropyDecomposition`](@ref) with [`LeonenkoProzantoSavani`](@extref ComplexityMeasures.LeonenkoProzantoSavani) estimator.
+- [Example 3](@ref example_MITsallisMartin_EntropyDecomposition_OrdinalPatterns): [`EntropyDecomposition`](@ref) with [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns) outcome space.
 """
-Base.@kwdef struct MITsallisMartin{B, Q} <: MutualInformation
+Base.@kwdef struct MITsallisMartin{B,Q} <: MutualInformation
     base::B = 2
     q::Q = 1.5
 end
@@ -54,12 +54,12 @@ end
 
 # This is definition 3 in Martin et al. (2004), but with pᵢ replaced by the joint
 # distribution and qᵢ replaced by the product of the marginal distributions.
-function association(definition::MITsallisMartin, pxy::Probabilities{T, 2}) where T
+function association(definition::MITsallisMartin, pxy::Probabilities{T,2}) where T
     (; base, q) = definition
     # TODO: return MIShannon if q = 1? otherwise, we don't need `base`.
     q != 1 || throw(ArgumentError("`MITsallisMartin` for q=$(q) not defined."))
-    px = marginal(pxy, dims = 1)
-    py = marginal(pxy, dims = 2)
+    px = marginal(pxy, dims=1)
+    py = marginal(pxy, dims=2)
 
     mi = 0.0
     for (i, pxᵢ) in enumerate(px.p)
@@ -72,14 +72,14 @@ function association(definition::MITsallisMartin, pxy::Probabilities{T, 2}) wher
     return f * (1 - mi)
 end
 
-function association(est::EntropyDecomposition{<:MITsallisMartin, <:DifferentialInfoEstimator{<:Tsallis}}, x, y)
+function association(est::EntropyDecomposition{<:MITsallisMartin,<:DifferentialInfoEstimator{<:Tsallis}}, x, y)
     HX, HY, HXY = marginal_entropies_mi3h_differential(est, x, y)
     q = est.definition.q
     mi = HX + HY - (1 - q) * HX * HY - HXY
     return mi
 end
 
-function association(est::EntropyDecomposition{<:MITsallisMartin, <:DiscreteInfoEstimator{<:Tsallis}}, x, y)
+function association(est::EntropyDecomposition{<:MITsallisMartin,<:DiscreteInfoEstimator{<:Tsallis}}, x, y)
     HX, HY, HXY = marginal_entropies_mi3h_discrete(est, x, y)
     q = est.definition.q
     mi = HX + HY - (1 - q) * HX * HY - HXY
@@ -90,9 +90,9 @@ end
 # Pretty printing for decomposition estimators.
 # ------------------------------------------------
 function decomposition_string(definition::MITsallisMartin, est::DiscreteInfoEstimator{<:Tsallis})
-    return "MI_S(X, Y) = H_T(X) + H_T(Y) - (1 - q)*H_T(X)*H_T(Y) - H_T(X, Y)";
+    return "MI_S(X, Y) = H_T(X) + H_T(Y) - (1 - q)*H_T(X)*H_T(Y) - H_T(X, Y)"
 end
 
 function decomposition_string(definition::MITsallisMartin, est::DifferentialInfoEstimator{<:Tsallis})
-    return "MI_S(X, Y) = h_T(X) + h_T(Y) - (1 - q)*h_T(X)*H_T(Y) - h_T(X, Y)";
+    return "MI_S(X, Y) = h_T(X) + h_T(Y) - (1 - q)*h_T(X)*H_T(Y) - h_T(X, Y)"
 end
\ No newline at end of file
diff --git a/src/methods/information/definitions/short_expansion_conditional_mutual_information/short_expansion_conditional_mutual_information.jl b/src/methods/information/definitions/short_expansion_conditional_mutual_information/short_expansion_conditional_mutual_information.jl
index b8f6ad85..41cf946f 100644
--- a/src/methods/information/definitions/short_expansion_conditional_mutual_information/short_expansion_conditional_mutual_information.jl
+++ b/src/methods/information/definitions/short_expansion_conditional_mutual_information/short_expansion_conditional_mutual_information.jl
@@ -35,7 +35,7 @@ This quantity is estimated from data using one of the estimators below from the
 
 - [Example 1](@ref example_ShortExpansionConditionalMutualInformation_JointProbabilities_CodifyVariables_ValueBinning):
     Estimating [`ShortExpansionConditionalMutualInformation`](@ref) using the [`JointProbabilities`](@ref) estimator using a
-    [`CodifyVariables`](@ref) with [`ValueBinning`](@ref) discretization.
+    [`CodifyVariables`](@ref) with [`ValueBinning`](@extref ComplexityMeasures.ValueBinning) discretization.
 """
 Base.@kwdef struct ShortExpansionConditionalMutualInformation{B} <: MultivariateInformationMeasure
     base::B = 2
@@ -51,22 +51,22 @@ max_inputs_vars(::ShortExpansionConditionalMutualInformation) = Inf
 
 # Assumes 1st dimension of `probs` corresponds to X, 2nd dimension of `probs`
 # corresponds to Y, and dimensions `3:ndims(probs)` correspond to marginals Zₖ, 
-function association(definition::SECMI, probs::Probabilities{T, N}) where {T, N}
+function association(definition::SECMI, probs::Probabilities{T,N}) where {T,N}
     @assert N >= 3
     (; base) = definition
 
-    def_mi = MIShannon(; base = base)
-    def_cmi = CMIShannon(; base = base)
+    def_mi = MIShannon(; base=base)
+    def_cmi = CMIShannon(; base=base)
 
     m = ndims(probs) - 2
-    pXY = marginal(probs, dims = 1:2)
+    pXY = marginal(probs, dims=1:2)
     mi_XY = association(def_mi, pXY)
     cmis = 0.0
     for k = 1:m
         # association(definition::CMIShannon, pxyz::Probabilities{T, 3})
         # is the call signature, so we simply replace the last variable
         # with the marginal Zₖ for each Î(X, Y | Zₖ) in the sum
-        cmis += association(def_cmi, marginal(probs, dims = (1, 2, 2 + k)))
+        cmis += association(def_cmi, marginal(probs, dims=(1, 2, 2 + k)))
     end
     return (1 - m) * mi_XY + cmis
 end
diff --git a/src/methods/information/definitions/transferentropy/TERenyiJizba.jl b/src/methods/information/definitions/transferentropy/TERenyiJizba.jl
index dec0fb26..c2af8624 100644
--- a/src/methods/information/definitions/transferentropy/TERenyiJizba.jl
+++ b/src/methods/information/definitions/transferentropy/TERenyiJizba.jl
@@ -38,36 +38,36 @@ estimator from the list below as its input.
 
 | Estimator                      | Sub-estimator                    | Principle                    |
 | :----------------------------- | :------------------------------- | :--------------------------- |
-| [`EntropyDecomposition`](@ref) | [`LeonenkoProzantoSavani`](@ref) | Four-entropies decomposition |
-| [`EntropyDecomposition`](@ref) | [`ValueBinning`](@ref)           | Four-entropies decomposition |
-| [`EntropyDecomposition`](@ref) | [`Dispersion`](@ref)             | Four-entropies decomposition |
-| [`EntropyDecomposition`](@ref) | [`OrdinalPatterns`](@ref)        | Four-entropies decomposition |
-| [`EntropyDecomposition`](@ref) | [`UniqueElements`](@ref)         | Four-entropies decomposition |
-| [`EntropyDecomposition`](@ref) | [`TransferOperator`](@ref)       | Four-entropies decomposition |
+| [`EntropyDecomposition`](@ref) | [`LeonenkoProzantoSavani`](@extref ComplexityMeasures.LeonenkoProzantoSavani) | Four-entropies decomposition |
+| [`EntropyDecomposition`](@ref) | [`ValueBinning`](@extref ComplexityMeasures.ValueBinning)           | Four-entropies decomposition |
+| [`EntropyDecomposition`](@ref) | [`Dispersion`](@extref ComplexityMeasures.Dispersion)             | Four-entropies decomposition |
+| [`EntropyDecomposition`](@ref) | [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns)        | Four-entropies decomposition |
+| [`EntropyDecomposition`](@ref) | [`UniqueElements`](@extref ComplexityMeasures.UniqueElements)         | Four-entropies decomposition |
+| [`EntropyDecomposition`](@ref) | [`TransferOperator`](@extref ComplexityMeasures.TransferOperator)       | Four-entropies decomposition |
 
 Any of these estimators must be given as input to a [`CMIDecomposition](@ref) estimator.
 
 ## Estimation
 
-- [Example 1](@ref example_TERenyiJizba_EntropyDecomposition_TransferOperator): [`EntropyDecomposition`](@ref) with [`TransferOperator`](@ref) outcome space.
+- [Example 1](@ref example_TERenyiJizba_EntropyDecomposition_TransferOperator): [`EntropyDecomposition`](@ref) with [`TransferOperator`](@extref ComplexityMeasures.TransferOperator) outcome space.
 
 """
-struct TERenyiJizba{B, Q, EMB} <: TransferEntropy
+struct TERenyiJizba{B,Q,EMB} <: TransferEntropy
     base::B
     q::Q
     embedding::EMB
-    function TERenyiJizba(; base::B = 2, q::Q = 1.5, embedding::EMB = EmbeddingTE()) where {B, Q, EMB}
-        return new{B, Q, EMB}(base, q, embedding)
+    function TERenyiJizba(; base::B=2, q::Q=1.5, embedding::EMB=EmbeddingTE()) where {B,Q,EMB}
+        return new{B,Q,EMB}(base, q, embedding)
     end
 end
 
-function convert_to_cmi_estimator(est::EntropyDecomposition{<:TERenyiJizba, <:DiscreteInfoEstimator{<:Renyi}})
+function convert_to_cmi_estimator(est::EntropyDecomposition{<:TERenyiJizba,<:DiscreteInfoEstimator{<:Renyi}})
     (; definition, est, discretization, pest) = est
     base = definition.base
     return EntropyDecomposition(CMIRenyiJizba(; base), est, discretization, pest)
 end
 
-function convert_to_cmi_estimator(est::EntropyDecomposition{<:TERenyiJizba, <:DifferentialInfoEstimator{<:Renyi}})
+function convert_to_cmi_estimator(est::EntropyDecomposition{<:TERenyiJizba,<:DifferentialInfoEstimator{<:Renyi}})
     return EntropyDecomposition(CMIRenyiJizba(; est.definition.base), est.est)
 end
 
@@ -82,15 +82,15 @@ end
 # = h(t⁺, t⁻,c⁻) - h(t⁻,c⁻) - h(t⁺,s⁻,t⁻,c⁻) + h(s⁻,t⁻,c⁻)"
 
 function decomposition_string(
-        definition::TERenyiJizba, 
-        est::EntropyDecomposition{M, <:DiscreteInfoEstimator}
-    ) where M
+    definition::TERenyiJizba,
+    est::EntropyDecomposition{M,<:DiscreteInfoEstimator}
+) where M
     return "TEᵣⱼ(s → t | c) = Hᵣ(t⁺, t⁻,c⁻) - Hᵣ(t⁻,c⁻) - Hᵣ(t⁺,s⁻,t⁻,c⁻) + Hᵣ(s⁻,t⁻,c⁻)"
 end
 
 function decomposition_string(
-    definition::TERenyiJizba, 
-    est::EntropyDecomposition{M, <:DifferentialInfoEstimator}
-    ) where M
+    definition::TERenyiJizba,
+    est::EntropyDecomposition{M,<:DifferentialInfoEstimator}
+) where M
     return "TEᵣⱼ(s → t | c) = hᵣ(t⁺, t⁻,c⁻) - hᵣ(t⁻,c⁻) - hᵣ(t⁺,s⁻,t⁻,c⁻) + hᵣ(s⁻,t⁻,c⁻)"
 end
\ No newline at end of file
diff --git a/src/methods/information/definitions/transferentropy/TEShannon.jl b/src/methods/information/definitions/transferentropy/TEShannon.jl
index da82cddf..07205c18 100644
--- a/src/methods/information/definitions/transferentropy/TEShannon.jl
+++ b/src/methods/information/definitions/transferentropy/TEShannon.jl
@@ -32,26 +32,26 @@ are constructed using present/past values and future values, respectively.
 ## Estimation
 
 - [Example 1](@ref example_TEShannon_EntropyDecomposition_TransferOperator): 
-    [`EntropyDecomposition`](@ref) with [`TransferOperator`](@ref) outcome space.
+    [`EntropyDecomposition`](@ref) with [`TransferOperator`](@extref ComplexityMeasures.TransferOperator) outcome space.
 - [Example 2](@ref example_TEShannon_SymbolicTransferEntropy): Estimation using the
     [`SymbolicTransferEntropy`](@ref) estimator.
 """
-struct TEShannon{B, EMB} <: TransferEntropy
+struct TEShannon{B,EMB} <: TransferEntropy
     base::B
     embedding::EMB
-    function TEShannon(; base::B = 2, embedding::EMB = EmbeddingTE()) where {B, EMB}
-        return new{B, EMB}(base, embedding)
+    function TEShannon(; base::B=2, embedding::EMB=EmbeddingTE()) where {B,EMB}
+        return new{B,EMB}(base, embedding)
     end
     # TODO: add constructor that automatically determines the embedding.
 end
 
-function convert_to_cmi_estimator(est::EntropyDecomposition{<:TEShannon, <:DiscreteInfoEstimator})
+function convert_to_cmi_estimator(est::EntropyDecomposition{<:TEShannon,<:DiscreteInfoEstimator})
     (; definition, est, discretization, pest) = est
     base = definition.base
     return EntropyDecomposition(CMIShannon(; base), est, discretization, pest)
 end
 
-function convert_to_cmi_estimator(est::EntropyDecomposition{<:TEShannon, <:DifferentialInfoEstimator})
+function convert_to_cmi_estimator(est::EntropyDecomposition{<:TEShannon,<:DifferentialInfoEstimator})
     return EntropyDecomposition(CMIShannon(; est.definition.base), est.est)
 end
 
@@ -81,30 +81,30 @@ end
 # = h(t⁺, t⁻,c⁻) - h(t⁻,c⁻) - h(t⁺,s⁻,t⁻,c⁻) + h(s⁻,t⁻,c⁻)"
 
 function decomposition_string(
-        definition::TEShannon, 
-        est::EntropyDecomposition{M, <:DiscreteInfoEstimator{<:Shannon}}
-    ) where M
+    definition::TEShannon,
+    est::EntropyDecomposition{M,<:DiscreteInfoEstimator{<:Shannon}}
+) where M
     return "TEₛ(s → t | c) = Hₛ(t⁺, t⁻,c⁻) - Hₛ(t⁻,c⁻) - Hₛ(t⁺,s⁻,t⁻,c⁻) + Hₛ(s⁻,t⁻,c⁻)"
 end
 
 function decomposition_string(
-    definition::TEShannon, 
-    est::EntropyDecomposition{M, <:DifferentialInfoEstimator{<:Shannon}}
-    ) where M
+    definition::TEShannon,
+    est::EntropyDecomposition{M,<:DifferentialInfoEstimator{<:Shannon}}
+) where M
     return "TEₛ(s → t | c) = hₛ(t⁺, t⁻,c⁻) - hₛ(t⁻,c⁻) - hₛ(t⁺,s⁻,t⁻,c⁻) + hₛ(s⁻,t⁻,c⁻)"
 end
 
 function decomposition_string(
-        definition::TEShannon, 
-        est::MIDecomposition{M, <:MutualInformationEstimator{<:MIShannon}}
-    ) where M
+    definition::TEShannon,
+    est::MIDecomposition{M,<:MutualInformationEstimator{<:MIShannon}}
+) where M
     return "TEₛ(s → t | c) = Iₛ(t⁺; s⁻, t⁻, c⁻) - Iₛ(t⁺; t⁻, c⁻)"
 end
 
 
 function decomposition_string(
-        definition::TEShannon, 
-        est::CMIDecomposition{M, <:ConditionalMutualInformationEstimator{<:CMIShannon}}
-    ) where M
+    definition::TEShannon,
+    est::CMIDecomposition{M,<:ConditionalMutualInformationEstimator{<:CMIShannon}}
+) where M
     return "TEₛ(s → t | c) = Iₛ(t⁺; s⁻ | t⁻, c⁻)"
 end
diff --git a/src/methods/information/definitions/transferentropy/transferoperator.jl b/src/methods/information/definitions/transferentropy/transferoperator.jl
index e5d30307..38772fd5 100644
--- a/src/methods/information/definitions/transferentropy/transferoperator.jl
+++ b/src/methods/information/definitions/transferentropy/transferoperator.jl
@@ -1,5 +1,5 @@
 import ComplexityMeasures: TransferOperator, invariantmeasure, InvariantMeasure, Probabilities
-using ComplexityMeasures.GroupSlices
+using ..GroupSlices
 export TransferOperator
 
 using ComplexityMeasures: Probabilities
@@ -55,13 +55,13 @@ end
 # transfer operator.
 function h4_marginal_probs(
     est::EntropyDecomposition{
-        <:TransferEntropy, 
-        <:DiscreteInfoEstimator, 
-        <:CodifyVariables{1, <:TransferOperator},
+        <:TransferEntropy,
+        <:DiscreteInfoEstimator,
+        <:CodifyVariables{1,<:TransferOperator},
         <:RelativeAmount
     },
-        x...
-    )
+    x...
+)
     # We never reach this point unless the outcome space is the same for all marginals,
     # so we can safely pick the first outcome space.
     d::TransferOperator = first(est.discretization.outcome_spaces)
@@ -90,25 +90,25 @@ function h4_marginal_probs(
     cols_STC = [vars.S; vars.T; vars.C]
     cols_T⁺TC = [vars.Tf; vars.T; vars.C]
     cols_TC = [vars.T; vars.C]
-    pTC  = marginal_probs_from_μ(cols_TC, positive_measure_bins, iv, inds_non0measure)
+    pTC = marginal_probs_from_μ(cols_TC, positive_measure_bins, iv, inds_non0measure)
     pSTC = marginal_probs_from_μ(cols_STC, positive_measure_bins, iv, inds_non0measure)
     pT⁺TC = marginal_probs_from_μ(cols_T⁺TC, positive_measure_bins, iv, inds_non0measure)
     pST⁺TC = iv.ρ[inds_non0measure]
 
-    return Probabilities(pTC), 
-        Probabilities(pSTC), 
-        Probabilities(pT⁺TC), 
-        Probabilities(pST⁺TC)
+    return Probabilities(pTC),
+    Probabilities(pSTC),
+    Probabilities(pT⁺TC),
+    Probabilities(pST⁺TC)
 end
 
 function association(
-        est::EntropyDecomposition{
-            <:TransferEntropy, 
-            <:DiscreteInfoEstimator, 
-            <:CodifyVariables{1, <:TransferOperator},
-            <:RelativeAmount
-        },
-        x...)
+    est::EntropyDecomposition{
+        <:TransferEntropy,
+        <:DiscreteInfoEstimator,
+        <:CodifyVariables{1,<:TransferOperator},
+        <:RelativeAmount
+    },
+    x...)
     # If a conditional input (x[3]) is not provided, then C is just a 0-dimensional
     # StateSpaceSet. The horizontal concatenation of C with T then just returns T.
     # We therefore don't need separate methods for the conditional and non-conditional
@@ -118,10 +118,10 @@ function association(
     h_est = estimator_with_overridden_parameters(cmi_est.definition, cmi_est.est)
 
     # Estimate by letting TE(s -> t | c) := I(t⁺; s⁻ | t⁻, c⁻).
-    hSTC =  information(h_est, pSTC)
+    hSTC = information(h_est, pSTC)
     hT⁺TC = information(h_est, pT⁺TC)
     hTC = information(h_est, pTC)
     hST⁺TC = information(h_est, pST⁺TC)
     te = hT⁺TC - hTC - hST⁺TC + hSTC
-    return te 
+    return te
 end
\ No newline at end of file
diff --git a/src/methods/information/estimators/JointProbabilities.jl b/src/methods/information/estimators/JointProbabilities.jl
index 9b972ba5..64d2c4d7 100644
--- a/src/methods/information/estimators/JointProbabilities.jl
+++ b/src/methods/information/estimators/JointProbabilities.jl
@@ -17,8 +17,8 @@ export JointProbabilities
 ## Description
 
 It first encodes the input data according to the given `discretization`, then constructs 
-`probs`, a multidimensional [`Probabilities`](@ref) instance. Finally, `probs` are 
-forwarded to a [`PlugIn`](@ref) estimator, which computes the measure according to 
+`probs`, a multidimensional [`Probabilities`](@extref ComplexityMeasures.Probabilities) instance. Finally, `probs` are 
+forwarded to a [`PlugIn`](@extref ComplexityMeasures.PlugIn) estimator, which computes the measure according to 
 `definition`.
 
 # Compatible encoding schemes
@@ -27,7 +27,7 @@ forwarded to a [`PlugIn`](@ref) estimator, which computes the measure according
     applying an encoding in a sliding window over each input variable).  
 - [`CodifyPoints`](@ref) (encode each *point*/column of the input data)
 
-Works for any [`OutcomeSpace`](@ref) that implements [`codify`](@ref).
+Works for any [`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace) that implements [`codify`](@ref).
 
 !!! note "Joint probabilities vs decomposition methods"
 
@@ -41,15 +41,15 @@ Works for any [`OutcomeSpace`](@ref) that implements [`codify`](@ref).
     because it fails to fully take into consideration the joint relationships between the variables.
     Pick your estimator according to your needs.
 
-See also: [`Counts`](@ref), [`Probabilities`](@ref), [`ProbabilitiesEstimator`](@ref),
-[`OutcomeSpace`](@ref), [`DiscreteInfoEstimator`](@ref).
+See also: [`Counts`](@extref ComplexityMeasures.Counts), [`Probabilities`](@extref ComplexityMeasures.Probabilities), [`ProbabilitiesEstimator`](@extref ComplexityMeasures.ProbabilitiesEstimator),
+[`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace), [`DiscreteInfoEstimator`](@extref ComplexityMeasures.DiscreteInfoEstimator).
 """
-struct JointProbabilities{M <: MultivariateInformationMeasure, O, P} <: MultivariateInformationMeasureEstimator{M}
+struct JointProbabilities{M<:MultivariateInformationMeasure,O,P} <: MultivariateInformationMeasureEstimator{M}
     definition::M # API from complexity measures: definition must be the first field of the infoestimator.
     discretization::O
     pest::P # Not exposed to user for now.
 
-    function JointProbabilities(def::M, disc::D, pest = RelativeAmount()) where {M, D}
-        new{M, D, typeof(pest)}(def, disc, pest)
+    function JointProbabilities(def::M, disc::D, pest=RelativeAmount()) where {M,D}
+        new{M,D,typeof(pest)}(def, disc, pest)
     end
 end
diff --git a/src/methods/information/estimators/codify_marginals.jl b/src/methods/information/estimators/codify_marginals.jl
index 4b02603a..fc837695 100644
--- a/src/methods/information/estimators/codify_marginals.jl
+++ b/src/methods/information/estimators/codify_marginals.jl
@@ -1,5 +1,5 @@
 using ComplexityMeasures
-export codified_marginals 
+export codified_marginals
 
 """
     codified_marginals(o::OutcomeSpace, x::VectorOrStateSpaceSet...)
@@ -8,7 +8,7 @@ Encode/discretize each input vector (e.g. timeseries) `xᵢ ∈ x` according to
 determined by `o`. 
 
 For some outcome spaces, the encoding is sequential (i.e. time ordering matters). 
-Any `xᵢ ∈ X` that are multidimensional ([`StateSpaceSet`](@ref)s) will be encoded
+Any `xᵢ ∈ X` that are multidimensional ([`StateSpaceSet`](@extref StateSpaceSets.StateSpaceSet)s) will be encoded
 column-wise, i.e. each column of `xᵢ` is treated as a timeseries and is encoded separately.
 
 This is useful for discretizing input data when computing some 
@@ -18,23 +18,23 @@ to handle discretization.
 
 ## Supported estimators
 
-- [`ValueBinning`](@ref). Bin visitation frequencies are counted in the joint space `XY`,
+- [`ValueBinning`](@extref ComplexityMeasures.ValueBinning). Bin visitation frequencies are counted in the joint space `XY`,
     then marginal visitations are obtained from the joint bin visits.
-    This behaviour is the same for both [`FixedRectangularBinning`](@ref) and
-    [`RectangularBinning`](@ref) (which adapts the grid to the data).
-    When using [`FixedRectangularBinning`](@ref), the range along the first dimension
+    This behaviour is the same for both [`FixedRectangularBinning`](@extref ComplexityMeasures.FixedRectangularBinning) and
+    [`RectangularBinning`](@extref ComplexityMeasures.RectangularBinning) (which adapts the grid to the data).
+    When using [`FixedRectangularBinning`](@extref ComplexityMeasures.FixedRectangularBinning), the range along the first dimension
     is used as a template for all other dimensions.
-- [`OrdinalPatterns`](@ref). Each timeseries is separately [`codify`](@ref)-ed by 
+- [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns). Each timeseries is separately [`codify`](@ref)-ed by 
     embedding the timeseries, then sequentially encoding the ordinal patterns of 
     the embedding vectors.
-- [`Dispersion`](@ref). Each timeseries is separately [`codify`](@ref)-ed by 
+- [`Dispersion`](@extref ComplexityMeasures.Dispersion). Each timeseries is separately [`codify`](@ref)-ed by 
     embedding the timeseries, then sequentially encoding the embedding vectors
     according to their dispersion pattern (which for each embedding vector is computed
     relative to all other embedding vectors).
-- [`CosineSimilarityBinning`](@ref). Each timeseries is separately [`codify`](@ref)-ed
+- [`CosineSimilarityBinning`](@extref ComplexityMeasures.CosineSimilarityBinning). Each timeseries is separately [`codify`](@ref)-ed
     by embedding the timeseries, the encoding the embedding points in a 
     in a sequential manner according to the cosine similarity of the embedding vectors.
-- [`UniqueElements`](@ref). Each timeseries is [`codify`](@ref)-ed according to 
+- [`UniqueElements`](@extref ComplexityMeasures.UniqueElements). Each timeseries is [`codify`](@ref)-ed according to 
     its unique values (i.e. each unique element gets assigned a specific integer).
 
 More implementations are possible.
@@ -72,8 +72,8 @@ end
 # TODO: maybe construct a convenience wrapper where the user can avoid constructing the
 # joint space, for performance benefits (but increased bias).
 function codify_marginal(
-        o::ValueBinning{<:FixedRectangularBinning{D}},
-        x::AbstractVector) where D
+    o::ValueBinning{<:FixedRectangularBinning{D}},
+    x::AbstractVector) where D
     range = first(o.binning.ranges)
     ϵmin = minimum(range)
     ϵmax = maximum(range)
@@ -100,7 +100,7 @@ function codified_marginals(o::ValueBinning{<:RectangularBinning}, x::VectorOrSt
     s = 1
     encodings = Vector{Vector}(undef, 0)
     for (i, cidx) in enumerate(idxs)
-        variable_subset = s:(s + cidx - 1)
+        variable_subset = s:(s+cidx-1)
         s += cidx
         y = @views joint_bins[:, variable_subset]
         for j in size(y, 2)
@@ -113,7 +113,7 @@ end
 
 # A version of `cartesian_bin_index` that directly returns the joint bin encoding
 # instead of converting it to a cartesian index.
-function encode_as_tuple(e::RectangularBinEncoding, point::SVector{D, T}) where {D, T}
+function encode_as_tuple(e::RectangularBinEncoding, point::SVector{D,T}) where {D,T}
     ranges = e.ranges
     if e.precise
         # Don't know how to make this faster unfurtunately...
diff --git a/src/methods/information/estimators/conditional_mutual_info_estimators/FPVP.jl b/src/methods/information/estimators/conditional_mutual_info_estimators/FPVP.jl
index a90b87d6..da9cde4e 100644
--- a/src/methods/information/estimators/conditional_mutual_info_estimators/FPVP.jl
+++ b/src/methods/information/estimators/conditional_mutual_info_estimators/FPVP.jl
@@ -12,7 +12,7 @@ export FPVP
 The Frenzel-Pompe-Vejmelka-Paluš (or `FPVP` for short)
 [`ConditionalMutualInformationEstimator`](@ref) is used to estimate the
 conditional mutual information using a `k`-th nearest neighbor approach that is
-analogous to that of the [`KraskovStögbauerGrassberger1`](@ref) mutual information
+analogous to that of the [`KraskovStögbauerGrassberger2`](@ref) mutual information
 estimator from [Frenzel2007](@citet) and [Vejmelka2008](@citet).
 
 `k` is the number of nearest neighbors. `w` is the Theiler window, which controls the
@@ -33,7 +33,7 @@ number of temporal neighbors that are excluded during neighbor searches.
 - [Example 1](@ref example_CMIShannon_FPVP): Estimating [`CMIShannon`](@ref)
 
 """
-struct FPVP{M <: ConditionalMutualInformation, MJ, MM} <: ConditionalMutualInformationEstimator{M}
+struct FPVP{M<:ConditionalMutualInformation,MJ,MM} <: ConditionalMutualInformationEstimator{M}
     definition::M
     k::Int
     w::Int
@@ -41,7 +41,7 @@ struct FPVP{M <: ConditionalMutualInformation, MJ, MM} <: ConditionalMutualInfor
     metric_marginals::MM
 end
 
-function FPVP(definition = CMIShannon(); k = 1, w = 0)
+function FPVP(definition=CMIShannon(); k=1, w=0)
     # Metrics shouldn't be modified by the user.
     metric_joint = Chebyshev()
     metric_marginals = Chebyshev()
diff --git a/src/methods/information/estimators/conditional_mutual_info_estimators/Rahimzamani.jl b/src/methods/information/estimators/conditional_mutual_info_estimators/Rahimzamani.jl
index a86268aa..c6a44973 100644
--- a/src/methods/information/estimators/conditional_mutual_info_estimators/Rahimzamani.jl
+++ b/src/methods/information/estimators/conditional_mutual_info_estimators/Rahimzamani.jl
@@ -36,14 +36,14 @@ association(Rahimzamani(; k = 10), x, z, y) # should be near 0 (and can be negat
 ```
 
 """
-struct Rahimzamani{M <: ConditionalMutualInformation, ME} <: ConditionalMutualInformationEstimator{M}
+struct Rahimzamani{M<:ConditionalMutualInformation,ME} <: ConditionalMutualInformationEstimator{M}
     definition::M
     k::Int
     w::Int
     metric::ME
 end
 
-function Rahimzamani(definition = CMIShannon(); k = 1, w = 0)
+function Rahimzamani(definition=CMIShannon(); k=1, w=0)
     # Metric shouldn't be modified by the user.
     metric = Chebyshev()
     return Rahimzamani(definition, k, w, metric)
@@ -75,7 +75,7 @@ function association(est::Rahimzamani{<:CMIShannon}, x, y, z)
         # I assume ρ_{i, xy} is the distance in the *joint* space.
         # ... but isn't this just the FPVP estimator?
         dmax = ds_joint[i]
-        k̂ = dmax == 0 ? inrangecount(tree_joint, joint[i], 0.0) - 1  : k
+        k̂ = dmax == 0 ? inrangecount(tree_joint, joint[i], 0.0) - 1 : k
         condmi += digamma(k̂)
         condmi -= log(inrangecount(tree_xz, XZ[i], dmax))
         condmi -= log(inrangecount(tree_yz, YZ[i], dmax))
diff --git a/src/methods/information/estimators/decomposition/EntropyDecomposition.jl b/src/methods/information/estimators/decomposition/EntropyDecomposition.jl
index 5b6a5055..2799daec 100644
--- a/src/methods/information/estimators/decomposition/EntropyDecomposition.jl
+++ b/src/methods/information/estimators/decomposition/EntropyDecomposition.jl
@@ -12,8 +12,26 @@ export EntropyDecomposition
 Estimate the multivariate information measure specified by `definition` by rewriting
 its formula into some combination of entropy terms. 
 
-If calling the second method (discrete variant), then discretization is always done 
-per variable/column and each column is encoded into integers using [`codify`](@ref).
+## Estimation 
+
+`EntropyDecomposition` allows using any 
+[`InformationMeasureEstimators`s](@extref ComplexityMeasures.InformationMeasureEstimator) from 
+[ComplexityMeasures.jl](https://juliadynamics.github.io/DynamicalSystemsDocs.jl/complexitymeasures/stable/)
+to estimate multivariate information measures.
+
+Computations are done by computing individual entropy terms using `est`, then combining them according to 
+`definition` to form the final estimate. 
+
+## Bias 
+
+Estimating the `definition` by decomposition into a combination of entropy terms,
+which are estimated independently, will in general be more biased than when using a
+dedicated estimator. One reason is that this decomposition may miss out on crucial
+information in the joint space. To remedy this, dedicated information measure 
+estimators typically derive the marginal estimates by first considering the joint
+space, and then does some clever trick to eliminate the bias that is introduced
+through a naive decomposition. Unless specified below, no bias correction is 
+applied for `EntropyDecomposition`.
 
 ## Usage
 
@@ -23,42 +41,37 @@ per variable/column and each column is encoded into integers using [`codify`](@r
 
 ## Description
 
-The entropy terms are estimated using `est`, and then combined to form the final 
-estimate of `definition`. No bias correction is applied.
-If `est` is a [`DifferentialInfoEstimator`](@ref), then `discretization` and `pest` 
-are ignored. If `est` is a [`DiscreteInfoEstimator`](@ref), then `discretization` and a
-probabilities estimator `pest` must also be provided (default to `RelativeAmount`, 
-which uses naive plug-in probabilities).
-
-## Compatible differential information estimators
+If `est` is a [`DifferentialInfoEstimator`](@extref ComplexityMeasures.DifferentialInfoEstimator), then `discretization` and `pest` 
+are ignored. 
+## Differential estimation
 
-If using the first signature, any compatible [`DifferentialInfoEstimator`](@ref) can be 
+If using the first signature, any compatible [`DifferentialInfoEstimator`](@extref ComplexityMeasures.DifferentialInfoEstimator) can be 
 used.
+[`MITsallisMartin`](@ref) can be estimated using a decomposition into entropy 
+terms using [`EntropyDecomposition`](@ref). This is done by using estimators from 
+[ComplexityMeasures.jl](https://juliadynamics.github.io/DynamicalSystemsDocs.jl/complexitymeasures/stable/). We can use any compatible 
+[`InformationMeasureEstimator`](@extref ComplexityMeasures.InformationMeasureEstimator)
+that can estimate differential [`Tsallis`](@extref ComplexityMeasures.Tsallis) entropy from
+[ComplexityMeasures.jl](https://juliadynamics.github.io/DynamicalSystemsDocs.jl/complexitymeasures/stable/).
 
-## Compatible outcome spaces for discrete estimation
+## Discrete estimation
 
-If using the second signature, the outcome spaces can be used for discretisation. 
-Note that not all outcome spaces will work with all measures.
+If `est` is a [`DiscreteInfoEstimator`](@extref ComplexityMeasures.DiscreteInfoEstimator), then 
+`discretization` and a probabilities estimator `pest` must also be provided (default to `RelativeAmount`, 
+which uses naive plug-in probabilities). This will always discretize the input data 
+per variable/column, and the encode the discretized column into integers using [`codify`](@ref).
 
-| Estimator                         | Principle                     |
-| :-------------------------------- | :---------------------------- |
-| [`UniqueElements`](@ref)          | Count of unique elements      |
-| [`ValueBinning`](@ref)            | Binning (histogram)           |
-| [`OrdinalPatterns`](@ref)         | Ordinal patterns              |
-| [`Dispersion`](@ref)              | Dispersion patterns           |
-| [`BubbleSortSwaps`](@ref)         | Sorting complexity            |
-| [`CosineSimilarityBinning`](@ref) | Cosine similarities histogram |
-
-## Bias 
+The following [`OutcomeSpace`s](@extref ComplexityMeasures.OutcomeSpace) can be used for discretisation. 
+Note that not all outcome spaces will work with all measures.
 
-Estimating the `definition` by decomposition into a combination of entropy terms,
-which are estimated independently, will in general be more biased than when using a
-dedicated estimator. One reason is that this decomposition may miss out on crucial
-information in the joint space. To remedy this, dedicated information measure 
-estimators typically derive the marginal estimates by first considering the joint
-space, and then does some clever trick to eliminate the bias that is introduced
-through a naive decomposition. Unless specified below, no bias correction is 
-applied for `EntropyDecomposition`.
+| Estimator                                                                       | Principle                     |
+| :------------------------------------------------------------------------------ | :---------------------------- |
+| [`UniqueElements`](@extref ComplexityMeasures.UniqueElements)                   | Count of unique elements      |
+| [`ValueBinning`](@extref ComplexityMeasures.ValueBinning)                       | Binning (histogram)           |
+| [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns)                 | Ordinal patterns              |
+| [`Dispersion`](@extref ComplexityMeasures.Dispersion)                           | Dispersion patterns           |
+| [`BubbleSortSwaps`](@extref ComplexityMeasures.BubbleSortSwaps)                 | Sorting complexity            |
+| [`CosineSimilarityBinning`](@extref ComplexityMeasures.CosineSimilarityBinning) | Cosine similarities histogram |
 
 
 ## Handling of overlapping parameters
@@ -77,48 +90,48 @@ of entropies using the `Kraskov(Shannon(base = ℯ))` Shannon entropy estimator,
 
 ## Discrete entropy decomposition 
 
-The second signature is for discrete estimation using [`DiscreteInfoEstimator`](@ref)s,
-for example [`PlugIn`](@ref). The given `discretization` scheme (typically an 
-[`OutcomeSpace`](@ref)) controls how the joint/marginals are discretized, and the
+The second signature is for discrete estimation using [`DiscreteInfoEstimator`](@extref ComplexityMeasures.DiscreteInfoEstimator)s,
+for example [`PlugIn`](@extref ComplexityMeasures.PlugIn). The given `discretization` scheme (typically an 
+[`OutcomeSpace`](@extref ComplexityMeasures.OutcomeSpace)) controls how the joint/marginals are discretized, and the
 probabilities estimator `pest` controls how probabilities are estimated from counts.
 
 !!! note "Bias"
-    Like for [`DifferentialInfoEstimator`](@ref), using a dedicated estimator 
+    Like for [`DifferentialInfoEstimator`](@extref ComplexityMeasures.DifferentialInfoEstimator), using a dedicated estimator 
     for the measure in question will be more reliable than using a decomposition
     estimate. Here's how different `discretization`s are applied:
 
-    - [`ValueBinning`](@ref). Bin visitation frequencies are counted in the joint space
+    - [`ValueBinning`](@extref ComplexityMeasures.ValueBinning). Bin visitation frequencies are counted in the joint space
         `XY`, then marginal visitations are obtained from the joint bin visits.
-        This behaviour is the same for both [`FixedRectangularBinning`](@ref) and
-        [`RectangularBinning`](@ref) (which adapts the grid to the data).
-        When using [`FixedRectangularBinning`](@ref), the range along the first dimension
+        This behaviour is the same for both [`FixedRectangularBinning`](@extref ComplexityMeasures.FixedRectangularBinning) and
+        [`RectangularBinning`](@extref ComplexityMeasures.RectangularBinning) (which adapts the grid to the data).
+        When using [`FixedRectangularBinning`](@extref ComplexityMeasures.FixedRectangularBinning), the range along the first dimension
         is used as a template for all other dimensions. This is a bit slower than naively 
         binning each marginal, but lessens bias.
-    - [`OrdinalPatterns`](@ref). Each timeseries is separately [`codify`](@ref)-ed
+    - [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns). Each timeseries is separately [`codify`](@ref)-ed
         according to its ordinal pattern (no bias correction).
-    - [`Dispersion`](@ref). Each timeseries is separately [`codify`](@ref)-ed according
+    - [`Dispersion`](@extref ComplexityMeasures.Dispersion). Each timeseries is separately [`codify`](@ref)-ed according
         to its dispersion pattern  (no bias correction).
 
 ## Examples
 
 - [Example 1](@ref example_MIShannon_EntropyDecomposition_Jackknife_ValueBinning):
-    [`MIShannon`](@ref) estimation using decomposition into discrete [`Shannon`](@ref)
-    entropy estimated using [`CodifyVariables`](@ref) with [`ValueBinning`](@ref).
+    [`MIShannon`](@ref) estimation using decomposition into discrete [`Shannon`](@extref ComplexityMeasures.Shannon)
+    entropy estimated using [`CodifyVariables`](@ref) with [`ValueBinning`](@extref ComplexityMeasures.ValueBinning).
 - [Example 2](@ref example_MIShannon_EntropyDecomposition_BubbleSortSwaps):
-    [`MIShannon`](@ref) estimation using decomposition into discrete [`Shannon`](@ref)
-    entropy estimated using [`CodifyVariables`](@ref) with [`BubbleSortSwaps`](@ref).
+    [`MIShannon`](@ref) estimation using decomposition into discrete [`Shannon`](@extref ComplexityMeasures.Shannon)
+    entropy estimated using [`CodifyVariables`](@ref) with [`BubbleSortSwaps`](@extref ComplexityMeasures.BubbleSortSwaps).
 - [Example 3](@ref example_MIShannon_EntropyDecomposition_Kraskov):
-    [`MIShannon`](@ref) estimation using decomposition into differental [`Shannon`](@ref)
-    entropy estimated using the [`Kraskov`](@ref) estimator.
+    [`MIShannon`](@ref) estimation using decomposition into differental [`Shannon`](@extref ComplexityMeasures.Shannon)
+    entropy estimated using the [`Kraskov`](@extref ComplexityMeasures.Kraskov) estimator.
 
 See also: [`MutualInformationEstimator`](@ref), [`MultivariateInformationMeasure`](@ref).
 """
 struct EntropyDecomposition{
-        M <: MultivariateInformationMeasure, 
-        E <: InformationMeasureEstimator, 
-        D <: Union{Discretization, Nothing}, 
-        P <: Union{ProbabilitiesEstimator, Nothing}
-        } <: DecompositionEstimator{M}
+    M<:MultivariateInformationMeasure,
+    E<:InformationMeasureEstimator,
+    D<:Union{Discretization,Nothing},
+    P<:Union{ProbabilitiesEstimator,Nothing}
+} <: DecompositionEstimator{M}
     definition::M # extend API from complexity measures: definition must be the first field of the info estimator.
     est::E # The estimator + measure which `definition` is decomposed into.
     discretization::D # `Nothing` if `est` is a `DifferentialInfoEstimator`.
@@ -126,26 +139,26 @@ struct EntropyDecomposition{
 
 
     function EntropyDecomposition(
-        definition::MultivariateInformationMeasure, 
+        definition::MultivariateInformationMeasure,
         est::DifferentialInfoEstimator)
         M = typeof(definition)
         E = typeof(est)
         verify_decomposition_entropy_type(definition, est)
-        return new{M, E, Nothing, Nothing}(definition, est, nothing, nothing)
+        return new{M,E,Nothing,Nothing}(definition, est, nothing, nothing)
     end
 
     function EntropyDecomposition(
-            definition::MultivariateInformationMeasure, 
-            est::DiscreteInfoEstimator, 
-            discretization::D,
-            pest::ProbabilitiesEstimator = RelativeAmount(),
-        ) where {D}
+        definition::MultivariateInformationMeasure,
+        est::DiscreteInfoEstimator,
+        discretization::D,
+        pest::ProbabilitiesEstimator=RelativeAmount(),
+    ) where {D}
         M = typeof(definition)
         E = typeof(est)
         P = typeof(pest)
         verify_decomposition_entropy_type(definition, est)
 
-        return new{M, E, D, P}(definition, est, discretization, pest)
+        return new{M,E,D,P}(definition, est, discretization, pest)
     end
 end
 
@@ -160,15 +173,15 @@ Check that we can actually decompose the `definition` into `est.definition`. The
 default is to do nothing. Certain definitions  may override (e.g. `CMIRenyiJizba` does so).
 """
 function verify_decomposition_entropy_type(
-        definition::MultivariateInformationMeasure, 
-        est::Union{DiscreteInfoEstimator, DifferentialInfoEstimator})
+    definition::MultivariateInformationMeasure,
+    est::Union{DiscreteInfoEstimator,DifferentialInfoEstimator})
 end
 
 
 # ----------------------------------------------------------------------------------------
 # Custom pretty printing for discrete entropy estimators, since it has more field.
 # ----------------------------------------------------------------------------------------
-function summary_strings(est::EntropyDecomposition{<:M, <:DiscreteInfoEstimator}) where M
+function summary_strings(est::EntropyDecomposition{<:M,<:DiscreteInfoEstimator}) where M
     return [
         "Measure to be decomposed",
         "Estimator for decomposed components",
@@ -177,7 +190,7 @@ function summary_strings(est::EntropyDecomposition{<:M, <:DiscreteInfoEstimator}
     ]
 end
 
-function summary_types(est::EntropyDecomposition{<:M, <:DiscreteInfoEstimator}) where M
+function summary_types(est::EntropyDecomposition{<:M,<:DiscreteInfoEstimator}) where M
     return [
         typeof(est.definition),
         typeof(est.est),
@@ -186,7 +199,7 @@ function summary_types(est::EntropyDecomposition{<:M, <:DiscreteInfoEstimator})
     ]
 end
 
-function measure_colors(est::EntropyDecomposition{<:M, <:DiscreteInfoEstimator}) where M
+function measure_colors(est::EntropyDecomposition{<:M,<:DiscreteInfoEstimator}) where M
     return [
         :light_red,
         :light_green,
@@ -195,7 +208,7 @@ function measure_colors(est::EntropyDecomposition{<:M, <:DiscreteInfoEstimator})
     ]
 end
 
-function info_colors(est::EntropyDecomposition{<:M, <:DiscreteInfoEstimator}) where M
+function info_colors(est::EntropyDecomposition{<:M,<:DiscreteInfoEstimator}) where M
     return [
         :red,
         :green,
diff --git a/src/methods/information/estimators/decomposition/MIDecomposition.jl b/src/methods/information/estimators/decomposition/MIDecomposition.jl
index 294cdfe4..b512ba52 100644
--- a/src/methods/information/estimators/decomposition/MIDecomposition.jl
+++ b/src/methods/information/estimators/decomposition/MIDecomposition.jl
@@ -21,11 +21,11 @@ value of the measure.
 
 - [Example 1](@ref example_CMIShannon_MIDecomposition): Estimating [`CMIShannon`](@ref)
     using a decomposition into [`MIShannon`](@ref) terms using 
-    the [`KraskovStögbauerGrassberger1`](@ref) mutual information estimator.
+    the [`KraskovStögbauerGrassberger2`](@ref) mutual information estimator.
 
 See also: [`MultivariateInformationMeasureEstimator`](@ref).
 """
-struct MIDecomposition{M <: MultivariateInformationMeasure, E} <: DecompositionEstimator{M}
+struct MIDecomposition{M<:MultivariateInformationMeasure,E} <: DecompositionEstimator{M}
     definition::M # extend API from complexity measures: definition must be the first field of the info estimator.
     est::E # The MI estimator + measure which `definition` is decomposed into.
 end
diff --git a/src/methods/information/estimators/mutual_info_estimators/GaoKannanOhViswanath.jl b/src/methods/information/estimators/mutual_info_estimators/GaoKannanOhViswanath.jl
index fe56ca9f..f27349d7 100644
--- a/src/methods/information/estimators/mutual_info_estimators/GaoKannanOhViswanath.jl
+++ b/src/methods/information/estimators/mutual_info_estimators/GaoKannanOhViswanath.jl
@@ -40,8 +40,8 @@ variables (by quantization) or making it continuous by adding a small Gaussian n
 
 !!! warn "Implementation note"
     In [GaoKannanOhViswanath2017](@citet), they claim (roughly speaking) that the estimator
-    reduces to the [`KraskovStögbauerGrassberger1`](@ref) estimator for continuous-valued data.
-    However, [`KraskovStögbauerGrassberger1`](@ref) uses the digamma function, while `GaoKannanOhViswanath`
+    reduces to the [`KraskovStögbauerGrassberger2`](@ref) estimator for continuous-valued data.
+    However, [`KraskovStögbauerGrassberger2`](@ref) uses the digamma function, while `GaoKannanOhViswanath`
     uses the logarithm instead, so the estimators are not exactly equivalent
     for continuous data.
 
@@ -51,7 +51,7 @@ variables (by quantization) or making it continuous by adding a small Gaussian n
     `k`-th nearest distances among the two marginal spaces, which are in general not the
     same as the `k`-th neighbor distance in the joint space (unless both marginals are
     univariate). Therefore, our implementation here differs slightly from algorithm 1 in
-    `GaoKannanOhViswanath`. We have modified it in a way that mimics [`KraskovStögbauerGrassberger1`](@ref) for
+    `GaoKannanOhViswanath`. We have modified it in a way that mimics [`KraskovStögbauerGrassberger2`](@ref) for
     continous data. Note that because of using the `log` function instead of `digamma`,
     there will be slight differences between the methods. See the source code for more
     details.
@@ -72,13 +72,13 @@ x = rand(rng, 10000); y = rand(rng, 10000)
 association(GaoKannanOhViswanath(; k = 10), x, y) # should be near 0 (and can be negative)
 ```
 """
-struct GaoKannanOhViswanath{M <: MutualInformation} <: MutualInformationEstimator{M}
+struct GaoKannanOhViswanath{M<:MutualInformation} <: MutualInformationEstimator{M}
     definition::M
     k::Int
-    w::Int 
+    w::Int
 end
 
-function GaoKannanOhViswanath(definition = MIShannon(); k = 1, w = 0)
+function GaoKannanOhViswanath(definition=MIShannon(); k=1, w=0)
     return GaoKannanOhViswanath(definition, k, w)
 end
 # TODO: We here extend the estimator to multiple variables (i.e. the multi-information),
diff --git a/src/methods/information/estimators/mutual_info_estimators/GaussianMI.jl b/src/methods/information/estimators/mutual_info_estimators/GaussianMI.jl
index d450187b..205e0742 100644
--- a/src/methods/information/estimators/mutual_info_estimators/GaussianMI.jl
+++ b/src/methods/information/estimators/mutual_info_estimators/GaussianMI.jl
@@ -21,7 +21,7 @@ using LinearAlgebra: eigvals, det
 ## Description
 
 Given ``d_x``-dimensional and ``d_y``-dimensional input data `X` and `Y`,
-`GaussianMI` first constructs the ``d_x + d_y``-dimensional joint [`StateSpaceSet`](@ref) `XY`.
+`GaussianMI` first constructs the ``d_x + d_y``-dimensional joint [`StateSpaceSet`](@extref StateSpaceSets.StateSpaceSet) `XY`.
 If `normalize == true`, then we follow the approach in Vejmelka & Palus
 (2008)[Vejmelka2008](@cite) and transform each column in `XY` to have zero mean and unit
 standard deviation. If `normalize == false`, then the algorithm proceeds without
@@ -31,7 +31,7 @@ Next, the `C_{XY}`, the correlation matrix for the (normalized) joint data `XY`
 computed. The mutual information estimate `GaussianMI` assumes the input variables are distributed according to normal
 distributions with zero means and unit standard deviations.
 Therefore, given ``d_x``-dimensional and ``d_y``-dimensional input data `X` and `Y`,
-`GaussianMI` first constructs the joint [`StateSpaceSet`](@ref) `XY`, then transforms each
+`GaussianMI` first constructs the joint [`StateSpaceSet`](@extref StateSpaceSets.StateSpaceSet) `XY`, then transforms each
 column in `XY` to have zero mean and unit standard deviation, and finally computes
 the `\\Sigma`, the correlation matrix for `XY`.
 
@@ -68,12 +68,12 @@ x = rand(rng, 10000); y = rand(rng, 10000)
 association(GaussianMI(), x, y) # should be near 0 (and can be negative)
 ```
 """
-struct GaussianMI{M <: MutualInformation} <: MutualInformationEstimator{M}
+struct GaussianMI{M<:MutualInformation} <: MutualInformationEstimator{M}
     definition::M
     normalize::Bool
 end
 
-function GaussianMI(definition = MIShannon(); normalize = true)
+function GaussianMI(definition=MIShannon(); normalize=true)
     return GaussianMI(definition, normalize)
 end
 
diff --git a/src/methods/information/estimators/transfer_entropy_estimators/Hilbert.jl b/src/methods/information/estimators/transfer_entropy_estimators/Hilbert.jl
index 99424a34..aaadddc2 100644
--- a/src/methods/information/estimators/transfer_entropy_estimators/Hilbert.jl
+++ b/src/methods/information/estimators/transfer_entropy_estimators/Hilbert.jl
@@ -30,7 +30,7 @@ obtained by first applying the Hilbert transform to each signal, then extracting
 phases/amplitudes of the resulting complex numbers [Palus2014](@cite). Original time series are
 thus transformed to instantaneous phase/amplitude time series. Transfer
 entropy is then estimated using the provided `est` on those phases/amplitudes (use e.g.
-[`ValueBinning`](@ref), or [`OrdinalPatterns`](@ref)).
+[`ValueBinning`](@extref ComplexityMeasures.ValueBinning), or [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns)).
 
 !!! info
     Details on estimation of the transfer entropy (conditional mutual information)
@@ -47,9 +47,9 @@ struct Hilbert{M} <: TransferEntropyEstimator{M}
     cond::InstantaneousSignalProperty
 
     function Hilbert(est::M;
-            source::InstantaneousSignalProperty = Phase(),
-            target::InstantaneousSignalProperty = Phase(),
-            cond::InstantaneousSignalProperty = Phase()) where M
+        source::InstantaneousSignalProperty=Phase(),
+        target::InstantaneousSignalProperty=Phase(),
+        cond::InstantaneousSignalProperty=Phase()) where M
         new{M}(est, source, target, cond)
     end
 end
@@ -105,6 +105,6 @@ function association(est::Hilbert, source, target, cond)
     else
         throw(ArgumentError("est.cond must be either Phase or Amplitude instance"))
     end
-    
+
     association(est.est, s, t, c)
 end
diff --git a/src/methods/information/estimators/transfer_entropy_estimators/SymbolicTransferEntropy.jl b/src/methods/information/estimators/transfer_entropy_estimators/SymbolicTransferEntropy.jl
index 9437ed18..f9253d93 100644
--- a/src/methods/information/estimators/transfer_entropy_estimators/SymbolicTransferEntropy.jl
+++ b/src/methods/information/estimators/transfer_entropy_estimators/SymbolicTransferEntropy.jl
@@ -16,7 +16,7 @@ A convenience estimator for symbolic transfer entropy [Staniek2008](@cite).
 
 [Symbolic transfer entropy](https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.100.158101)
 consists of two simple steps. First, the input time series are encoded using [`codify`](@ref) with
-the [`CodifyVariables`](@ref) discretization and the [`OrdinalPatterns`](@ref) outcome space. This 
+the [`CodifyVariables`](@ref) discretization and the [`OrdinalPatterns`](@extref ComplexityMeasures.OrdinalPatterns) outcome space. This 
 transforms the input time series into integer time series. Transfer entropy entropy is then 
 estimated from the encoded time series by applying  
 
@@ -34,8 +34,8 @@ struct SymbolicTransferEntropy{M} <: TransferEntropyEstimator{M}
     lt::Function
 end
 
-function SymbolicTransferEntropy(definition::M = TEShannon(); 
-        m = 3, τ = 1, lt = ComplexityMeasures.isless_rand) where M
+function SymbolicTransferEntropy(definition::M=TEShannon();
+    m=3, τ=1, lt=ComplexityMeasures.isless_rand) where M
     return SymbolicTransferEntropy{M}(definition, m, τ, lt)
 end
 
@@ -43,7 +43,7 @@ function association(est::SymbolicTransferEntropy{<:TEShannon}, x::AbstractVecto
     (; m, τ, lt) = est
     discretization = CodifyVariables(OrdinalPatterns(; m, τ, lt))
 
-    x̂ = (codify(discretization, xᵢ) for xᵢ in x) 
+    x̂ = (codify(discretization, xᵢ) for xᵢ in x)
 
     te_definition = est.definition
     embedding = te_definition.embedding
@@ -55,9 +55,9 @@ function association(est::SymbolicTransferEntropy{<:TEShannon}, x::AbstractVecto
 
     # We have already encoded the marginals, so when computing CMI, we can 
     # simply use `UniqueElements`.
-    cmi_def = CMIShannon(; base = est.definition.base)
+    cmi_def = CMIShannon(; base=est.definition.base)
     disc = CodifyVariables(UniqueElements())
-    
+
     est_unique = JointProbabilities(cmi_def, disc)
-    return association(est_unique, T⁺, S,  StateSpaceSet(T, C))
+    return association(est_unique, T⁺, S, StateSpaceSet(T, C))
 end
diff --git a/src/methods/recurrence/MCR.jl b/src/methods/recurrence/MCR.jl
index 190e36a7..9001c6f4 100644
--- a/src/methods/recurrence/MCR.jl
+++ b/src/methods/recurrence/MCR.jl
@@ -47,14 +47,14 @@ defined analogously.
 ## Input data
 
 `X` and `Y` can be either both univariate timeseries, or both multivariate
-[`StateSpaceSet`](@ref)s.
+[`StateSpaceSet`](@extref StateSpaceSets.StateSpaceSet)s.
 
 
 ## Estimation
 
 - [Example 1](@ref example_MCR). Pairwise versus conditional MCR.
 """
-Base.@kwdef struct MCR{R, M} <: AssociationMeasure
+Base.@kwdef struct MCR{R,M} <: AssociationMeasure
     r::R
     metric::M = Euclidean()
 end
diff --git a/src/utils/groupslices.jl b/src/utils/groupslices.jl
new file mode 100644
index 00000000..bd1f1379
--- /dev/null
+++ b/src/utils/groupslices.jl
@@ -0,0 +1,237 @@
+# This module is a direct copy of https://github.com/mcabbott/GroupSlices.jl,
+# originally written by Andy Greenwell at https://github.com/AndyGreenwell/GroupSlices.jl,
+# and is MIT licensed.
+#
+# Maintained here for future stability, because the registered GroupSlices package is only 
+# v0.0.3. 
+#
+# This file was previously in ComplexityMeasures.jl v3.7 and lower. After ComplexityMeasures v3.8,
+# the file was moved here as a quick fix for 
+# https://github.com/JuliaDynamics/Associations.jl/issues/394.
+# 
+# License from (https://github.com/mcabbott/GroupSlices.jl/blob/master/LICENSE): 
+#
+# The MIT License (MIT)
+
+# Copyright (c) 2016 
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+module GroupSlices
+
+import Base.hash
+import Base.Cartesian, Base.Cartesian.@nloops, Base.Cartesian.@nref
+
+export groupslices, groupinds, firstinds, lastinds
+
+struct Prehashed
+    hash::UInt
+end
+hash(x::Prehashed) = x.hash
+
+"""
+    groupslices(V::AbstractVector)
+Returns a vector of integers the same length as `V`,
+which in place of each entry `x` has the index of the first entry of `V` which is equal to `x`.
+This is true:
+```
+all(x == V[i] for (x,i) in zip(V, groupslices(V)))
+```
+"""
+groupslices(A::AbstractArray{T,N}; dims::Int=1) where {T,N} = groupslices(A, dims)
+
+"""
+    groupslices(A; dims) = groupslices(A, dims)
+Returns a vector of integers where each integer element of the returned vector
+is a group number corresponding to the unique slices along dimension `dims` as
+returned from `unique(A; dims=d)`, where `A` can be a multidimensional array.
+The default is `dims = 1`.
+Example usage:
+If `C = unique(A; dims=dims)`, `ic = groupslices(A, dims)`, and
+`ndims(A) == ndims(C) == 3`, then:
+```
+if dims == 1
+   all(A .== C[ic,:,:])
+elseif dims == 2
+   all(A .== C[:,ic,:])
+elseif dims == 3
+   all(A .== C[:,:,ic])
+end
+```
+"""
+@generated function groupslices(A::AbstractArray{T,N}, dim::Int) where {T,N}
+    quote
+        if !(1 <= dim <= $N)
+            ArgumentError("Input argument dim must be 1 <= dim <= $N, but is currently $dim")
+        end
+        hashes = zeros(UInt, size(A, dim))
+
+        # Compute hash for each row
+        k = 0
+        @nloops $N i A d -> (
+            if d == dim
+                k = i_d
+            end
+        ) begin
+            @inbounds hashes[k] = hash(hashes[k], hash((@nref $N A i)))
+        end
+
+        # Collect index of first row for each hash
+        uniquerow = Array{Int}(undef, size(A, dim))
+        firstrow = Dict{Prehashed,Int}()
+        for k = 1:size(A, dim)
+            uniquerow[k] = get!(firstrow, Prehashed(hashes[k]), k)
+        end
+        uniquerows = collect(values(firstrow))
+
+        # Check for collisions
+        collided = falses(size(A, dim))
+        @inbounds begin
+            @nloops $N i A d -> (
+                if d == dim
+                    k = i_d
+                    j_d = uniquerow[k]
+                else
+                    j_d = i_d
+                end
+            ) begin
+                if (@nref $N A j) != (@nref $N A i)
+                    collided[k] = true
+                end
+            end
+        end
+
+        if any(collided)
+            nowcollided = BitArray(undef, size(A, dim))
+            while any(collided)
+                # Collect index of first row for each collided hash
+                empty!(firstrow)
+                for j = 1:size(A, dim)
+                    collided[j] || continue
+                    uniquerow[j] = get!(firstrow, Prehashed(hashes[j]), j)
+                end
+                for v in values(firstrow)
+                    push!(uniquerows, v)
+                end
+
+                # Check for collisions
+                fill!(nowcollided, false)
+                @nloops $N i A d -> begin
+                    if d == dim
+                        k = i_d
+                        j_d = uniquerow[k]
+                        (!collided[k] || j_d == k) && continue
+                    else
+                        j_d = i_d
+                    end
+                end begin
+                    if (@nref $N A j) != (@nref $N A i)
+                        nowcollided[k] = true
+                    end
+                end
+                (collided, nowcollided) = (nowcollided, collided)
+            end
+        end
+        ie = unique(uniquerow)
+        ic_dict = Dict{Int,Int}()
+        for k = 1:length(ie)
+            ic_dict[ie[k]] = k
+        end
+
+        ic = similar(uniquerow)
+        for k = 1:length(ic)
+            ic[k] = ie[ic_dict[uniquerow[k]]]
+        end
+        return ic
+    end
+end
+
+"""
+    groupinds(ic)
+Returns a vector of vectors of integers wherein the vector of group slice
+index integers as returned from `groupslices(A, dim)` is converted into a
+grouped vector of vectors.  Each vector entry in the returned vector of
+vectors contains all of the positional indices of slices in the original
+input array `A` that correspond to the unique slices along dimension `dim`
+that are present in the array `C` as returned from `unique(A, dim)`.
+"""
+function groupinds(ic::Vector{Int})
+    d = Dict{Int,Int}()
+    ia = unique(ic)
+    n = length(ia)
+    for i = 1:n
+        d[ia[i]] = i
+    end
+
+    ib = Array{Vector{Int}}(undef, n)
+    for k = 1:n
+        ib[k] = Int[]
+    end
+
+    for h = 1:length(ic)
+        push!(ib[d[ic[h]]], h)
+    end
+    return ib
+end
+
+"""
+    firstinds(ic::Vector{Int})
+    firstinds(ib::Vector{Vector{Int}})
+Returns a vector of integers containing the first index position of each unique
+value in the input integer vector `ic`, or the first index position of each
+entry in the input vector of integer vectors `ib`.
+When operating on the output returned from `unique(A, dim)`, the returned
+vector of integers correspond to the positions of the first of each unique slice
+present in the original input multidimensional array `A` along dimension `dim`.
+The implementation of `firstinds` accepting a vector of integers operates on the
+output returned from `groupslices(A, dim)`.
+The implementation of `firstinds` accepting a vector of vector of integers
+operates on the output returned from `groupinds(ic::Vector{Int})`.
+"""
+function firstinds(ic::Vector{Int})
+    id = unique(ic)
+    n = length(id)
+    ia = Array{Int}(undef, n)
+    for i = 1:n
+        ia[i] = something(findfirst(isequal(id[i]), ic), 0) # findfirst(ic, id[i])
+    end
+    return ia
+end
+
+function firstinds(ib::Vector{Vector{Int}})
+    ia = map(first, ib)
+end
+
+"""
+    lastinds(ic::Vector{Int})
+Returns a vector of integers containing the last index position of each unique
+value in the input integer vector `ic`.
+When operating on the output returned from `groupinds(unique(A, dim))`, the
+returned vector of integers correspond to the positions of the last of each
+unique slice present in the original input multidimensional array `A` along
+dimension `dim`.
+The implementation of `firstinds` accepting a vector of vector of integers
+operates on the output returned from `groupinds(ic::Vector{Int})`.
+"""
+function lastinds(ib::Vector{Vector{Int}})
+    ia = map(last, ib)
+end
+
+
+end # module
\ No newline at end of file
diff --git a/src/utils/utils.jl b/src/utils/utils.jl
index 29216b97..5fb5e45a 100644
--- a/src/utils/utils.jl
+++ b/src/utils/utils.jl
@@ -1,3 +1,4 @@
 include("logs.jl")
 include("multidimensional_surrogates.jl")
-include("statespaceset_concat.jl")
\ No newline at end of file
+include("statespaceset_concat.jl")
+include("groupslices.jl")
\ No newline at end of file
diff --git a/test/Project.toml b/test/Project.toml
index 952c219d..ca2d3da4 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,6 +1,7 @@
 [deps]
 CausalInference = "8e462317-f959-576b-b3c1-403f26cec956"
 Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
+ComplexityMeasures = "ab4b797d-85ee-42ba-b621-05d793b346a2"
 DelayEmbeddings = "5732040d-69e3-5649-938a-b6b4f237613f"
 Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
@@ -16,3 +17,8 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 TimeseriesSurrogates = "c804724b-8c18-5caa-8579-6025a0767c70"
+
+[compat]
+ComplexityMeasures = "^3.8"
+DynamicalSystemsBase = "3"
+julia = "^1.10.10"
diff --git a/test/independence/JointDistanceDistributionTest.jl b/test/independence/JointDistanceDistributionTest.jl
index 33d49c52..c7bd7e84 100644
--- a/test/independence/JointDistanceDistributionTest.jl
+++ b/test/independence/JointDistanceDistributionTest.jl
@@ -1,14 +1,14 @@
 using Random: MersenneTwister
-rng = MersenneTwister(12346)
+rng = StableRNG(12346)
 x, y = randn(rng, 1000), randn(rng, 1000)
-m = JointDistanceDistribution(D = 3, B = 5)
-test =  JointDistanceDistributionTest(m)
+m = JointDistanceDistribution(D=3, B=5)
+test = JointDistanceDistributionTest(m)
 @test test isa JointDistanceDistributionTest
 @test independence(test, x, y) isa Associations.JDDTestResult
 
 # Don't reject null at significance level (1 - α) when there is no coupling.
 α = 0.05
-@test pvalue(independence(test, x, y)) > α 
+@test pvalue(independence(test, x, y)) > α
 
 # Reject null at significance level (1 - α) when there is coupling
 z = y .+ x
diff --git a/test/independence/SurrogateAssociationTest/LMeasure.jl b/test/independence/SurrogateAssociationTest/LMeasure.jl
index da6caacf..4d08ac64 100644
--- a/test/independence/SurrogateAssociationTest/LMeasure.jl
+++ b/test/independence/SurrogateAssociationTest/LMeasure.jl
@@ -1,7 +1,7 @@
 # Analytical tests (in the limit of a lot of samples)
 # ------------------------------------------------------------
 using Random
-rng = MersenneTwister(1234)
+rng = StableRNG(1234)
 n = 100
 x, y = rand(rng, n), rand(rng, n)
 z = x .+ y
diff --git a/test/independence/SurrogateAssociationTest/azadkia_chatterjee_correlation.jl b/test/independence/SurrogateAssociationTest/azadkia_chatterjee_correlation.jl
index 81b5f14b..863aa994 100644
--- a/test/independence/SurrogateAssociationTest/azadkia_chatterjee_correlation.jl
+++ b/test/independence/SurrogateAssociationTest/azadkia_chatterjee_correlation.jl
@@ -1,25 +1,25 @@
 using Test
 using Random
-rng = Xoshiro(1234)
+rng = StableRNG(1234)
 
 # We can use surrogate tests and p-values to further verify the correctness of the 
 # algorithm.
-test = SurrogateAssociationTest(AzadkiaChatterjeeCoefficient(), nshuffles = 19, rng = rng)
+test = SurrogateAssociationTest(AzadkiaChatterjeeCoefficient(), nshuffles=19, rng=rng)
 n = 200
 # We expect that we *cannot* reject the null hypothesis for independent variables
 x = rand(rng, n)
 y = rand(rng, n)
 α = 0.05
-@test pvalue(independence(test, x, y)) > α 
+@test pvalue(independence(test, x, y)) > α
 
 # We expect that we *can* reject the null hypothesis for for dependent variables.
 x = rand(rng, n)
 y = rand(rng, n) .* x
-@test pvalue(independence(test, x, y )) < α 
+@test pvalue(independence(test, x, y)) < α
 
 # We expect that we *cannot* reject the null hypothesis for two extremal variables 
 # connected by an intermediate variable when conditioning on the intermediate variable.
 x = rand(rng, n)
 y = rand(rng, n) .+ x
 z = rand(rng, n) .* y
-@test pvalue(independence(test, x, z, y)) > α 
+@test pvalue(independence(test, x, z, y)) > α
diff --git a/test/independence/SurrogateAssociationTest/chatterjee_correlation.jl b/test/independence/SurrogateAssociationTest/chatterjee_correlation.jl
index 1fac5481..7cbe6940 100644
--- a/test/independence/SurrogateAssociationTest/chatterjee_correlation.jl
+++ b/test/independence/SurrogateAssociationTest/chatterjee_correlation.jl
@@ -1,18 +1,19 @@
 using Test
 using Random
-rng = Xoshiro(1234)
+using StableRNGs
+rng = StableRNG(1234)
 
 # We can use surrogate tests and p-values to further verify the correctness of the 
 # algorithm.
-test = SurrogateAssociationTest(ChatterjeeCorrelation(), nshuffles = 19, rng = rng)
+test = SurrogateAssociationTest(ChatterjeeCorrelation(), nshuffles=19, rng=rng)
 
 # We expect that we *cannot* reject the null hypothesis for independent variables
 x = rand(rng, 1:10, 200)
 y = rand(rng, 1:10, 200)
 α = 0.05
-@test pvalue(independence(test, x, y)) > α 
+@test pvalue(independence(test, x, y)) > α
 
 # We expect that we *can* reject the null hypothesis for for dependent variables.
 w = rand(rng, 1:10, 200)
 z = rand(rng, 1:10, 200) .* sin.(w) .+ cos.(w)
-@test pvalue(independence(test, z, w)) < α 
+@test pvalue(independence(test, z, w)) < α
diff --git a/test/methods/information/transfer_entropies/te_renyi_jizba.jl b/test/methods/information/transfer_entropies/te_renyi_jizba.jl
index d5be3564..8b44bb38 100644
--- a/test/methods/information/transfer_entropies/te_renyi_jizba.jl
+++ b/test/methods/information/transfer_entropies/te_renyi_jizba.jl
@@ -5,8 +5,8 @@ using StableRNGs
 
 rng = StableRNG(123)
 sys = system(Logistic4Chain(; rng))
-x, y, z, w = columns(first(trajectory(sys, 30, Ttr = 10000)))
-def = TERenyiJizba(base = 3, q = 0.5)
+x, y, z, w = columns(first(trajectory(sys, 30, Ttr=10000)))
+def = TERenyiJizba(base=3, q=0.5)
 
 # Here we test all the possible "generic" ways of estimating `TERenyiJizba`.
 est_diff = EntropyDecomposition(def, LeonenkoProzantoSavani(Renyi(); k=3))
@@ -25,10 +25,11 @@ est_disc = EntropyDecomposition(TERenyiJizba(), PlugIn(Renyi()), discretization)
 @test association(est_disc, x, z, y) isa Real
 
 # Check that in the limit of a lot of points, we roughly get the same answer for transfer 
-# operator and regular value binning.
-x, y, z, w  = columns(first(trajectory(sys, 1000, Ttr = 10000)))
+# operator and regular value binning. 
+sys = system(Logistic4Chain(; rng))
+x, y, z, w = columns(first(trajectory(sys, 10000, Ttr=10000)))
 
-te_def = TERenyiJizba(base = 3, q = 0.5)
+te_def = TERenyiJizba(base=3, q=0.5)
 def_renyi = Renyi()
 
 disc_vf = CodifyVariables(ValueBinning(2))
@@ -38,12 +39,13 @@ est_disc_vf = EntropyDecomposition(te_def, PlugIn(def_renyi), disc_vf);
 est_disc_to = EntropyDecomposition(te_def, PlugIn(def_renyi), disc_to);
 te_vf = association(est_disc_vf, x, z)
 te_to = association(est_disc_to, x, z)
-@test in_agreement(te_vf, te_to; agreement_threshold = 0.005)
+# See that values are within 1% of each other
+@test in_agreement(te_vf, te_to; agreement_threshold=0.01)
 
 # ---------------
 # Pretty printing
 # ---------------
-te_def = TERenyiJizba(base = 3, q = 0.5)
+te_def = TERenyiJizba(base=3, q=0.5)
 out_hdiff = repr(EntropyDecomposition(te_def, LeonenkoProzantoSavani(Renyi())))
 out_hdisc = repr(EntropyDecomposition(te_def, PlugIn(Renyi()), CodifyVariables(ValueBinning(2))))
 
diff --git a/test/test_utils.jl b/test/test_utils.jl
index 0e5a811e..a17f45a8 100644
--- a/test/test_utils.jl
+++ b/test/test_utils.jl
@@ -1,10 +1,12 @@
 # Check if the difference is within a certain threshold percentage. Used to check 
 # agreement between `ValueBinning` and `TransferOperator` estimation.
-function in_agreement(val1, val2; agreement_threshold = 0.02)
-    largest_magnitude = max(abs(val1), abs(val2))    
+# `agreement_threshold` is a percentage indicating the maximal percentage 
+# discrepancy between the two values.
+function in_agreement(val1, val2; agreement_threshold=0.05)
+    largest_magnitude = max(abs(val1), abs(val2))
     if largest_magnitude == 0
         return val1 == val2
     else
         return abs(val1 - val2) / largest_magnitude <= agreement_threshold
-    end   
+    end
 end
\ No newline at end of file