From 7ccca837e3f0de823b0af98a858212db4e4d17c9 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Tue, 6 Sep 2022 23:21:11 +0200
Subject: [PATCH 01/32] Remove @show

---
 src/symbolic/spatial_permutation.jl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/symbolic/spatial_permutation.jl b/src/symbolic/spatial_permutation.jl
index aa4a69fb1..d41eef566 100644
--- a/src/symbolic/spatial_permutation.jl
+++ b/src/symbolic/spatial_permutation.jl
@@ -105,7 +105,6 @@ function Entropies.probabilities!(s, x, est::SpatialSymbolicPermutation)
         pixels = pixels_in_stencil(pixel, est)
         s[i] = Entropies.encode_motif(view(x, pixels), m)
     end
-    @show s
     return probabilities(s)
 end
 
@@ -114,4 +113,4 @@ function Base.show(io::IO, est::SpatialSymbolicPermutation{D}) where {D}
     print(io, "Spatial permutation estimator for $D-dimensional data. Stencil:")
     print(io, "\n")
     show(io, MIME"text/plain"(), est.stencil)
-end
\ No newline at end of file
+end

From 9eea571cbb6cd69533b265f4e314a69c87c1275d Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Wed, 7 Sep 2022 00:21:08 +0200
Subject: [PATCH 02/32] Refactor nearest neighbors methods

---
 src/nearest_neighbors/KozachenkoLeonenko.jl | 31 ++++++++++----------
 src/nearest_neighbors/Kraskov.jl            | 32 +++++++++------------
 src/nearest_neighbors/nearest_neighbors.jl  | 12 ++++----
 test/nn_tests.jl                            | 21 ++++++++++++++
 test/runtests.jl                            | 21 ++------------
 5 files changed, 57 insertions(+), 60 deletions(-)
 create mode 100644 test/nn_tests.jl

diff --git a/src/nearest_neighbors/KozachenkoLeonenko.jl b/src/nearest_neighbors/KozachenkoLeonenko.jl
index 0d52ed7de..8d5d47701 100644
--- a/src/nearest_neighbors/KozachenkoLeonenko.jl
+++ b/src/nearest_neighbors/KozachenkoLeonenko.jl
@@ -1,30 +1,29 @@
-export KozachenkoLeonenko, genentropy
+export entropy_kozachenkoleonenko
 
 """
-    KozachenkoLeonenko(; k::Int = 1, w::Int = 0) <: EntropyEstimator
+    entropy_kozachenkoleonenko(x::AbstractDataset{D, T}; k::Int = 1, w::Int = 0,
+        base::Real = MathConstants.e) where {D, T}
 
-Entropy estimator based on nearest neighbors. This implementation is based on Kozachenko
-& Leonenko (1987)[^KozachenkoLeonenko1987],
+Estimate Shannon entropy to the given `base` using `k`-th nearest neighbor
+searches, using the method from Kozachenko & Leonenko (1987)[^KozachenkoLeonenko1987],
 as described in Charzyńska and Gambin (2016)[^Charzyńska2016].
 
-`w` is the Theiler window (defaults to `0`, meaning that only the point itself is excluded
+`w` is the Theiler window, which determines if temporal neighbors are excluded
+during neighbor searches (defaults to `0`, meaning that only the point itself is excluded
 when searching for neighbours).
 
-!!! info
-    This estimator is only available for entropy estimation. Probabilities
-    cannot be obtained directly.
+See also: [`entropy_kraskov`](@ref).
 
-[^Charzyńska2016]: Charzyńska, A., & Gambin, A. (2016). Improvement of the k-NN entropy estimator with applications in systems biology. Entropy, 18(1), 13.
-[^KozachenkoLeonenko1987]: Kozachenko, L. F., & Leonenko, N. N. (1987). Sample estimate of the entropy of a random vector. Problemy Peredachi Informatsii, 23(2), 9-16.
+[^Charzyńska2016]: Charzyńska, A., & Gambin, A. (2016). Improvement of the k-NN entropy
+    estimator with applications in systems biology. Entropy, 18(1), 13.
+[^KozachenkoLeonenko1987]: Kozachenko, L. F., & Leonenko, N. N. (1987). Sample estimate of
+    the entropy of a random vector. Problemy Peredachi Informatsii, 23(2), 9-16.
 """
-Base.@kwdef struct KozachenkoLeonenko <: NearestNeighborEntropyEstimator
-    k::Int = 1
-    w::Int = 0
-end
+function entropy_kozachenkoleonenko(x::AbstractDataset{D, T}; k::Int = 2, w::Int = 0,
+        base::Real = MathConstants.e) where {D, T}
 
-function genentropy(x::AbstractDataset{D, T}, est::KozachenkoLeonenko; base::Real = MathConstants.e) where {D, T}
     N = length(x)
-    ρs = maximum_neighbor_distances(x, est)
+    ρs = maximum_neighbor_distances(x, w, k)
     h = D/N*sum(log.(base, ρs)) + log(base, ball_volume(D)) +
         MathConstants.eulergamma + log(base, N - 1)
     return h
diff --git a/src/nearest_neighbors/Kraskov.jl b/src/nearest_neighbors/Kraskov.jl
index 627430ee6..207bbef52 100644
--- a/src/nearest_neighbors/Kraskov.jl
+++ b/src/nearest_neighbors/Kraskov.jl
@@ -1,27 +1,23 @@
-export Kraskov, genentropy
+export entropy_kraskov
 
 """
-## k-th nearest neighbour(kNN) based
-    
-    Kraskov(; k::Int = 1, w::Int = 0) <: EntropyEstimator
+    entropy_kraskov(x::AbstractDataset{D, T}; k::Int = 1, w::Int = 0,
+        base::Real = MathConstants.e) where {D, T}
 
-Entropy estimator based on `k`-th nearest neighbor searches[^Kraskov2004].
-`w` is the [Theiler window](@ref).
+Estimate Shannon entropy to the given `base` using `k`-th nearest neighbor
+searches (Kraskov, 2004)[^Kraskov2004].
 
-!!! info
-    This estimator is only available for entropy estimation. 
-    Probabilities cannot be obtained directly.
+`w` is the Theiler window, which determines if temporal neighbors are excluded
+during neighbor searches (defaults to `0`, meaning that only the point itself is excluded
+when searching for neighbours).
+
+See also: [`entropy_kozachenkoleonenko`](@ref).
 
 [^Kraskov2004]: Kraskov, A., Stögbauer, H., & Grassberger, P. (2004). Estimating mutual information. Physical review E, 69(6), 066138.
 """
-Base.@kwdef struct Kraskov <: NearestNeighborEntropyEstimator
-    k::Int = 1
-    w::Int = 0
-end
-
-function genentropy(x::AbstractDataset{D, T}, est::Kraskov; base::Real = MathConstants.e) where {D, T}
+function entropy_kraskov(x::AbstractDataset{D, T}; k::Int = 1, w::Int = 0, base::Real = MathConstants.e) where {D, T}
     N = length(x)
-    ρs = maximum_neighbor_distances(x, est)
-    h = -digamma(est.k) + digamma(N) + log(base, ball_volume(D)) + D/N*sum(log.(base, ρs))
+    ρs = maximum_neighbor_distances(x, w, k)
+    h = -digamma(k) + digamma(N) + log(base, ball_volume(D)) + D/N*sum(log.(base, ρs))
     return h
-end
\ No newline at end of file
+end
diff --git a/src/nearest_neighbors/nearest_neighbors.jl b/src/nearest_neighbors/nearest_neighbors.jl
index c0ab6577d..cbe1791f4 100644
--- a/src/nearest_neighbors/nearest_neighbors.jl
+++ b/src/nearest_neighbors/nearest_neighbors.jl
@@ -1,13 +1,11 @@
 import SpecialFunctions: digamma, gamma
 using Neighborhood: Theiler, KDTree, bulksearch
 
-abstract type NearestNeighborEntropyEstimator <: EntropyEstimator end
-
-function maximum_neighbor_distances(pts, est::EntropyEstimator)
-    theiler = Theiler(est.w)
-    est.w >= 0 || error("w, the number of neighbors to exclude, must be >= 0")
+function maximum_neighbor_distances(pts, w::Int, k::Int)
+    theiler = Theiler(w)
+    w >= 0 || error("w, the number of neighbors to exclude, must be >= 0")
     tree = KDTree(pts) # Euclidean metric forced
-    idxs, dists = bulksearch(tree, pts.data, NeighborNumber(est.k), theiler)
+    idxs, dists = bulksearch(tree, pts.data, NeighborNumber(k), theiler)
     # Distance to k-th nearest neighbor for each of the points
     return [d[end] for d in dists]
 end
@@ -16,4 +14,4 @@ end
 ball_volume(d::Int) = π^(d/2)/gamma((d/2)+1)
 
 include("KozachenkoLeonenko.jl")
-include("Kraskov.jl")
\ No newline at end of file
+include("Kraskov.jl")
diff --git a/test/nn_tests.jl b/test/nn_tests.jl
new file mode 100644
index 000000000..55007817e
--- /dev/null
+++ b/test/nn_tests.jl
@@ -0,0 +1,21 @@
+using Test
+
+@testset "NN - Kraskov" begin
+    m = 4
+    τ = 1
+    τs = tuple([τ*i for i = 0:m-1]...)
+    x = rand(250)
+    D = genembed(x, τs)
+
+    @test entropy_kraskov(D, k = 3, w = 1) isa Real
+end
+
+@testset "NN - KozachenkoLeonenko" begin
+    m = 4
+    τ = 1
+    τs = tuple([τ*i for i = 0:m-1]...)
+    x = rand(250)
+    D = genembed(x, τs)
+
+    @test entropy_kozachenkoleonenko(D, k = 3, w = 1) isa Real
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 6110349fb..6ec794030 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -60,10 +60,6 @@ end
 
     @test VisitationFrequency(RectangularBinning(3)) isa VisitationFrequency
     @test TransferOperator(RectangularBinning(3)) isa TransferOperator
-    @test Kraskov(k = 2, w = 1) isa Kraskov
-    @test Kraskov() isa Kraskov
-    @test KozachenkoLeonenko() isa KozachenkoLeonenko
-    @test KozachenkoLeonenko(w = 5) isa KozachenkoLeonenko
     @test NaiveKernel(0.1) isa NaiveKernel
 
     @testset "Counting based" begin
@@ -302,20 +298,6 @@ end
         end
     end
 
-    @testset "Nearest neighbor based" begin
-        m = 4
-        τ = 1
-        τs = tuple([τ*i for i = 0:m-1]...)
-        x = rand(250)
-        D = genembed(x, τs)
-
-        est_nn = KozachenkoLeonenko(w = 5)
-        est_knn = Kraskov(k = 2, w = 1)
-
-        @test genentropy(D, est_nn) isa Real
-        @test genentropy(D, est_knn) isa Real
-    end
-
     @testset "TransferOperator" begin
         D = Dataset(rand(1000, 3))
 
@@ -370,4 +352,5 @@ end
     end
 end
 
-include("spatial_permutation_tests.jl")
\ No newline at end of file
+include("spatial_permutation_tests.jl")
+include("nn_tests.jl")

From 327dff7c9521946f4b5d03a0032ed9be0cf9dca6 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Wed, 7 Sep 2022 00:21:25 +0200
Subject: [PATCH 03/32] Typos

---
 src/timescales/wavelet_overlap.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/timescales/wavelet_overlap.jl b/src/timescales/wavelet_overlap.jl
index 647596a59..87c18aa3b 100644
--- a/src/timescales/wavelet_overlap.jl
+++ b/src/timescales/wavelet_overlap.jl
@@ -10,9 +10,9 @@ wavelet scales. This implementation is based on Rosso et
 al. (2001)[^Rosso2001].
 
 The probability `p[i]` is the relative energy for the `i`-th wavelet scale.
-To obtain a better understand of what these probabilities mean, we prepared
+To obtain a better understanding of what these probabilities mean, we prepared
 a notebook you can [view online](
-https://github.com/kahaaga/waveletentropy_example/blob/main/wavelet_entropy_example.ipynb)
+https://github.com/kahaaga/waveletentropy_example/blob/main/wavelet_entropy_example.ipynb).
 As such, this estimator only works for timeseries input.
 
 By default the wavelet `Wavelets.WT.Daubechies{12}()`

From 6b435b20334d4abf25c65f07623cca166d24d51b Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Wed, 7 Sep 2022 00:21:32 +0200
Subject: [PATCH 04/32] Remove duplicate function

---
 src/symbolic/utils.jl | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/src/symbolic/utils.jl b/src/symbolic/utils.jl
index fa5804d0d..1405d850c 100644
--- a/src/symbolic/utils.jl
+++ b/src/symbolic/utils.jl
@@ -1,15 +1,3 @@
-
-function isless_rand(a, b)
-    if a == b
-        rand(Bool)
-    elseif a < b
-        true
-    else
-        false
-    end
-end
-
-
 """ Compute probabilities of symbols `Π`, given weights `wts`. """
 function symprobs(Π::AbstractVector, wts::AbstractVector; normalize = true)
     length(Π) == length(wts) || error("Need length(Π) == length(wts)")

From 89e5b6268b0b25e7cf3d92c81bbb523648149c80 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Wed, 7 Sep 2022 00:36:13 +0200
Subject: [PATCH 05/32] Reorganize docs

---
 docs/make.jl                                 |   8 +-
 docs/src/DispersionEntropy.md                |   5 -
 docs/src/complexity_measures.md              |   7 ++
 docs/src/index.md                            |  25 +----
 docs/src/other_methods.md                    |  58 ++++++++++
 docs/src/{estimators.md => probabilities.md} | 112 +++++--------------
 docs/src/renyi_entropy.md                    |   5 +
 docs/src/tsallis_entropy.md                  |   5 +
 8 files changed, 110 insertions(+), 115 deletions(-)
 delete mode 100644 docs/src/DispersionEntropy.md
 create mode 100644 docs/src/complexity_measures.md
 create mode 100644 docs/src/other_methods.md
 rename docs/src/{estimators.md => probabilities.md} (56%)
 create mode 100644 docs/src/renyi_entropy.md
 create mode 100644 docs/src/tsallis_entropy.md

diff --git a/docs/make.jl b/docs/make.jl
index 157cc58d7..688d19aac 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -35,10 +35,10 @@ ENV["JULIA_DEBUG"] = "Documenter"
 
 PAGES = [
     "Entropies.jl" => "index.md",
-    "Estimators" => "estimators.md",
-    "Remaining" => [
-        "DispersionEntropy.md",
-    ],
+    "Probability estimation" => "probabilities.md",
+    "Entropy (Rényi)" => "renyi_entropy.md",
+    "Entropy (Tsallis)" => "tsallis_entropy.md",
+    "Complexity measures" => "complexity_measures.md",
     "Utility methods" => "utils.md",
 ]
 
diff --git a/docs/src/DispersionEntropy.md b/docs/src/DispersionEntropy.md
deleted file mode 100644
index f899e273c..000000000
--- a/docs/src/DispersionEntropy.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Dispersion entropy
-
-```@docs
-dispersion_entropy
-```
diff --git a/docs/src/complexity_measures.md b/docs/src/complexity_measures.md
new file mode 100644
index 000000000..eedcb6af7
--- /dev/null
+++ b/docs/src/complexity_measures.md
@@ -0,0 +1,7 @@
+# Complexity measures
+
+## Dispersion entropy
+
+```@docs
+dispersion_entropy
+```
diff --git a/docs/src/index.md b/docs/src/index.md
index aaeb961a9..4e7ad5c07 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -13,30 +13,9 @@ based on which method for probability/entropy estimation is applied.
 The main **API** of this package is contained in three functions:
 
 * [`probabilities`](@ref) which computes probability distributions of given datasets
-* [`genentropy`](@ref) which uses the output of [`probabilities`](@ref), or a set of
+* [`entropy_renyi`](@ref) which uses the output of [`probabilities`](@ref), or a set of
     pre-computed [`Probabilities`](@ref), to calculate entropies.
-* [`tsallisentropy`](@ref) which uses the output of [`probabilities`](@ref), or a set of
+* [`entropy_tsallis`](@ref) which uses the output of [`probabilities`](@ref), or a set of
     pre-computed [`Probabilities`](@ref), to calculate Tsallis entropies.
 
 These functions dispatch estimators listed [here](@ref estimators).
-
-## Probabilities
-
-```@docs
-Probabilities
-probabilities
-probabilities!
-ProbabilitiesEstimator
-```
-
-## Rényi (generalized) entropy
-
-```@docs
-Entropies.genentropy
-```
-
-## Tsallis entropy
-
-```@docs
-Entropies.tsallisentropy
-```
diff --git a/docs/src/other_methods.md b/docs/src/other_methods.md
new file mode 100644
index 000000000..3a11d9aef
--- /dev/null
+++ b/docs/src/other_methods.md
@@ -0,0 +1,58 @@
+# Methods
+
+## Shannon entropies
+
+Some methods in the literature compute Shannon entropy in ways that don't explicitly result in probability distributions. Hence, they can't be used with [`probabilities`](@ref), and appear instead as stand-alone functions.
+
+### Nearest neighbors
+
+```@docs
+entropy_kraskov
+entropy_kozachenkoleonenko
+```
+
+#### Example
+
+This example reproduces Figure in Charzyńska & Gambin (2016)[^Charzyńska2016]. Both
+estimators nicely converge to the true entropy with increasing time series length.
+For a uniform 1D distribution ``U(0, 1)``, the true entropy is `0`.
+
+```@example MAIN
+using DynamicalSystems, CairoMakie, Statistics
+using Distributions: Uniform, Normal
+
+Ns = [100:100:500; 1000:1000:10000]
+Ekl = Vector{Vector{Float64}}(undef, 0)
+Ekr = Vector{Vector{Float64}}(undef, 0)
+
+nreps = 50
+for N in Ns
+    kl = Float64[]
+    kr = Float64[]
+    for i = 1:nreps
+        pts = Dataset([rand(Uniform(0, 1), 1) for i = 1:N]);
+        
+        push!(kl, entropy_kozachenkoleonenko(pts, w = 0, k = 1))
+        # with k = 1, Kraskov is virtually identical to 
+        # Kozachenko-Leonenko, so pick a higher number of neighbors
+        push!(kr, entropy_kraskov(pts, w = 0, k = 3))
+    end
+    push!(Ekl, kl)
+    push!(Ekr, kr)
+end
+
+fig = Figure()
+ax = Axis(fig[1,1]; ylabel = "entropy (nats)", title = "Kozachenko-Leonenko")
+lines!(ax, Ns, mean.(Ekl); color = Cycled(1))
+band!(ax, Ns, mean.(Ekl) .+ std.(Ekl), mean.(Ekl) .- std.(Ekl);
+color = (Main.COLORS[1], 0.5))
+
+ay = Axis(fig[2,1]; xlabel = "time step", ylabel = "entropy (nats)", title = "Kraskov")
+lines!(ay, Ns, mean.(Ekr); color = Cycled(2))
+band!(ay, Ns, mean.(Ekr) .+ std.(Ekr), mean.(Ekr) .- std.(Ekr); 
+color = (Main.COLORS[2], 0.5))
+
+fig
+```
+
+[^Charzyńska2016]: Charzyńska, A., & Gambin, A. (2016). Improvement of the k-NN entropy estimator with applications in systems biology. Entropy, 18(1), 13.
diff --git a/docs/src/estimators.md b/docs/src/probabilities.md
similarity index 56%
rename from docs/src/estimators.md
rename to docs/src/probabilities.md
index f3f5113ae..27791333f 100644
--- a/docs/src/estimators.md
+++ b/docs/src/probabilities.md
@@ -1,18 +1,27 @@
-# [Estimators of probabilities and entropies](@id estimators)
+# Probabilities
 
-## Count occurrences (counting)
+```@docs
+Probabilities
+probabilities
+probabilities!
+ProbabilitiesEstimator
+```
+
+## [Estimators](@id estimators)
+
+### Count occurrences (counting)
 
 ```@docs
 CountOccurrences
 ```
 
-## Permutation (symbolic)
+### Permutation (symbolic)
 
 ```@docs
 SymbolicPermutation
 ```
 
-### Example
+#### Example
 
 This example reproduces an example from Bandt and Pompe (2002), where the permutation
 entropy is compared with the largest Lyapunov exponents from time series of the chaotic
@@ -39,9 +48,9 @@ for r in rs
     push!(lyaps, lyapunov(ds, N_lyap))
 
     x = trajectory(ds, N_ent) # time series
-    hperm = Entropies.genentropy(x, SymbolicPermutation(m = m, τ = τ), base = base)
-    hwtperm = Entropies.genentropy(x, SymbolicWeightedPermutation(m = m, τ = τ), base = base)
-    hampperm = Entropies.genentropy(x, SymbolicAmplitudeAwarePermutation(m = m, τ = τ), base = base)
+    hperm = Entropies.entropy_renyi(x, SymbolicPermutation(m = m, τ = τ), base = base)
+    hwtperm = Entropies.entropy_renyi(x, SymbolicWeightedPermutation(m = m, τ = τ), base = base)
+    hampperm = Entropies.entropy_renyi(x, SymbolicAmplitudeAwarePermutation(m = m, τ = τ), base = base)
 
     push!(hs_perm, hperm); push!(hs_wtperm, hwtperm); push!(hs_ampperm, hampperm)
 end
@@ -63,25 +72,25 @@ end
 fig
 ```
 
-## Visitation frequency (binning)
+### Visitation frequency (binning)
 
 ```@docs
 VisitationFrequency
 ```
 
-### Specifying binning/boxes
+#### Specifying binning/boxes
 
 ```@docs
 RectangularBinning
 ```
 
-## Transfer operator (binning)
+### Transfer operator (binning)
 
 ```@docs
 TransferOperator
 ```
 
-### Utility methods/types
+#### Utility methods/types
 
 ```@docs
 InvariantMeasure
@@ -89,13 +98,13 @@ invariantmeasure
 transfermatrix
 ```
 
-## Kernel density
+### Kernel density
 
 ```@docs
 NaiveKernel
 ```
 
-### Example
+#### Example
 
 Here, we draw some random points from a 2D normal distribution. Then, we use kernel 
 density estimation to associate a probability to each point `p`, measured by how many 
@@ -116,13 +125,13 @@ ax.zticklabelsvisible = false
 fig
 ```
 
-## Time-scale (wavelet)
+### Wavelet
 
 ```@docs
-TimeScaleMODWT
+WaveletOverlap
 ```
 
-### Example
+#### Example
 
 The scale-resolved wavelet entropy should be lower for very regular signals (most of the
 energy is contained at one scale) and higher for very irregular signals (energy spread
@@ -137,10 +146,10 @@ x = sin.(t);
 y = sin.(t .+ cos.(t/0.5));
 z = sin.(rand(1:15, N) ./ rand(1:10, N))
 
-est = TimeScaleMODWT()
-h_x = genentropy(x, est)
-h_y = genentropy(y, est)
-h_z = genentropy(z, est)
+est = WaveletOverlap()
+h_x = entropy_renyi(x, est)
+h_y = entropy_renyi(y, est)
+h_z = entropy_renyi(z, est)
 
 fig = Figure()
 ax = Axis(fig[1,1]; ylabel = "x")
@@ -153,66 +162,3 @@ for a in (ax, ay, az); axislegend(a); end
 for a in (ax, ay); hidexdecorations!(a; grid=false); end
 fig
 ```
-
-## Nearest neighbor estimators
-
-### Kraskov
-
-```@docs
-Kraskov
-```
-
-### Kozachenko-Leonenko
-
-```@docs
-KozachenkoLeonenko
-```
-
-#### Example
-
-This example reproduces Figure in Charzyńska & Gambin (2016)[^Charzyńska2016]. Both
-estimators nicely converge to the true entropy with increasing time series length.
-For a uniform 1D distribution ``U(0, 1)``, the true entropy is `0`.
-
-```@example MAIN
-using DynamicalSystems, CairoMakie, Statistics
-using Distributions: Uniform, Normal
-
-Ns = [100:100:500; 1000:1000:10000]
-Ekl = Vector{Vector{Float64}}(undef, 0)
-Ekr = Vector{Vector{Float64}}(undef, 0)
-
-est_nn = KozachenkoLeonenko(w = 0)
-# with k = 1, Kraskov is virtually identical to KozachenkoLeonenko, so pick a higher
-# number of neighbors
-est_knn = Kraskov(w = 0, k = 3)
-
-nreps = 50
-for N in Ns
-    kl = Float64[]
-    kr = Float64[]
-    for i = 1:nreps
-        pts = Dataset([rand(Uniform(0, 1), 1) for i = 1:N]);
-        push!(kl, genentropy(pts, est_nn))
-         # with k = 1 almost identical
-        push!(kr, genentropy(pts, est_knn))
-    end
-    push!(Ekl, kl)
-    push!(Ekr, kr)
-end
-
-fig = Figure()
-ax = Axis(fig[1,1]; ylabel = "entropy (nats)", title = "KozachenkoLeonenko")
-lines!(ax, Ns, mean.(Ekl); color = Cycled(1))
-band!(ax, Ns, mean.(Ekl) .+ std.(Ekl), mean.(Ekl) .- std.(Ekl);
-color = (Main.COLORS[1], 0.5))
-
-ay = Axis(fig[2,1]; xlabel = "time step", ylabel = "entropy (nats)", title = "Kraskov")
-lines!(ay, Ns, mean.(Ekr); color = Cycled(2))
-band!(ay, Ns, mean.(Ekr) .+ std.(Ekr), mean.(Ekr) .- std.(Ekr); 
-color = (Main.COLORS[2], 0.5))
-
-fig
-```
-
-[^Charzyńska2016]: Charzyńska, A., & Gambin, A. (2016). Improvement of the k-NN entropy estimator with applications in systems biology. Entropy, 18(1), 13.
diff --git a/docs/src/renyi_entropy.md b/docs/src/renyi_entropy.md
new file mode 100644
index 000000000..dfedc5156
--- /dev/null
+++ b/docs/src/renyi_entropy.md
@@ -0,0 +1,5 @@
+# Rényi (generalized) entropy
+
+```@docs
+Entropies.entropy_renyi
+```
diff --git a/docs/src/tsallis_entropy.md b/docs/src/tsallis_entropy.md
new file mode 100644
index 000000000..a458cdefc
--- /dev/null
+++ b/docs/src/tsallis_entropy.md
@@ -0,0 +1,5 @@
+# Tsallis (generalized) entropy
+
+```@docs
+Entropies.entropy_tsallis
+```

From 8281f5018bc6fb0587dc9f38521754b34c3dcf59 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Wed, 7 Sep 2022 00:37:05 +0200
Subject: [PATCH 06/32] API renaming

---
 src/core.jl                          | 34 ++++++++------------
 src/dispersion/dispersion_entropy.jl |  2 +-
 src/symbolic/SymbolicPermutation.jl  | 12 +++----
 src/symbolic/spatial_permutation.jl  |  6 ++--
 src/symbolic/symbolic.jl             |  4 +--
 src/tsallis/tsallis.jl               |  8 ++---
 test/runtests.jl                     | 48 ++++++++++++++--------------
 test/spatial_permutation_tests.jl    |  6 ++--
 test/timescales.jl                   |  4 +--
 9 files changed, 58 insertions(+), 66 deletions(-)

diff --git a/src/core.jl b/src/core.jl
index ddf66da7b..4addcfa6f 100644
--- a/src/core.jl
+++ b/src/core.jl
@@ -1,9 +1,8 @@
 using DelayEmbeddings
 using DelayEmbeddings: AbstractDataset, Dataset, dimension
 export ProbabilitiesEstimator, Probabilities
-export EntropyEstimator
 export probabilities, probabilities!
-export genentropy, genentropy!
+export entropy_renyi, entropy_renyi!
 export Dataset, dimension
 
 const Array_or_Dataset = Union{<:AbstractArray, <:AbstractDataset}
@@ -35,13 +34,6 @@ Base.IteratorSize(d::Probabilities) = Base.HasLength()
 @inline Base.:*(d::Probabilities, x::Number) = d.p * x
 @inline Base.sum(d::Probabilities{T}) where T = one(T)
 
-"""
-An abstract type for entropy estimators that don't explicitly estimate probabilities,
-but return the value of the entropy directly.
-"""
-abstract type EntropyEstimator end
-const EntEst = EntropyEstimator # shorthand
-
 """
 An abstract type for probabilities estimators.
 """
@@ -91,13 +83,13 @@ Only works for certain estimators. See for example [`SymbolicPermutation`](@ref)
 function probabilities! end
 
 """
-    genentropy(p::Probabilities; q = 1.0, base = MathConstants.e)
+    entropy_renyi(p::Probabilities; q = 1.0, base = MathConstants.e)
 
 Compute the generalized order-`q` entropy of some probabilities
 returned by the [`probabilities`](@ref) function. Alternatively, compute entropy
 from pre-computed `Probabilities`.
 
-    genentropy(x::Array_or_Dataset, est; q = 1.0, base)
+    entropy_renyi(x::Array_or_Dataset, est; q = 1.0, base)
 
 A convenience syntax, which calls first `probabilities(x, est)`
 and then calculates the entropy of the result (and thus `est` can be a
@@ -120,9 +112,9 @@ also known as Hartley entropy), or the correlation entropy
 [^Rényi1960]: A. Rényi, *Proceedings of the fourth Berkeley Symposium on Mathematics, Statistics and Probability*, pp 547 (1960)
 [^Shannon1948]: C. E. Shannon, Bell Systems Technical Journal **27**, pp 379 (1948)
 """
-function genentropy end
+function entropy_renyi end
 
-function genentropy(prob::Probabilities; q = 1.0, α = nothing, base = MathConstants.e)
+function entropy_renyi(prob::Probabilities; q = 1.0, α = nothing, base = MathConstants.e)
     if α ≠ nothing
         @warn "Keyword `α` is deprecated in favor of `q`."
         q = α
@@ -149,31 +141,31 @@ function genentropy(prob::Probabilities; q = 1.0, α = nothing, base = MathConst
     end
 end
 
-genentropy(::AbstractArray{<:Real}) =
-    error("For single-argument input, do `genentropy(Probabilities(x))` instead.")
+entropy_renyi(::AbstractArray{<:Real}) =
+    error("For single-argument input, do `entropy_renyi(Probabilities(x))` instead.")
 
-function genentropy(x::Array_or_Dataset, est; q = 1.0, α = nothing, base = MathConstants.e)
+function entropy_renyi(x::Array_or_Dataset, est; q = 1.0, α = nothing, base = MathConstants.e)
     if α ≠ nothing
         @warn "Keyword `α` is deprecated in favor of `q`."
         q = α
     end
     p = probabilities(x, est)
-    genentropy(p; q = q, base = base)
+    entropy_renyi(p; q = q, base = base)
 end
 
 """
-    genentropy!(p, x, est::ProbabilitiesEstimator; q = 1.0, base = MathConstants.e)
+    entropy_renyi!(p, x, est::ProbabilitiesEstimator; q = 1.0, base = MathConstants.e)
 
-Similarly with `probabilities!` this is an in-place version of `genentropy` that allows
+Similarly with `probabilities!` this is an in-place version of `entropy_renyi` that allows
 pre-allocation of temporarily used containers.
 
 Only works for certain estimators. See for example [`SymbolicPermutation`](@ref).
 """
-function genentropy!(p, x, est; q = 1.0, α = nothing, base = MathConstants.e)
+function entropy_renyi!(p, x, est; q = 1.0, α = nothing, base = MathConstants.e)
     if α ≠ nothing
         @warn "Keyword `α` is deprecated in favor of `q`."
         q = α
     end
     probabilities!(p, x, est)
-    genentropy(p; q = q, base = base)
+    entropy_renyi(p; q = q, base = base)
 end
diff --git a/src/dispersion/dispersion_entropy.jl b/src/dispersion/dispersion_entropy.jl
index 758b9ccc9..45eaa2a40 100644
--- a/src/dispersion/dispersion_entropy.jl
+++ b/src/dispersion/dispersion_entropy.jl
@@ -57,5 +57,5 @@ function dispersion_entropy(x::AbstractVector,
     N = length(x)
 
     hist = dispersion_histogram(dispersion_patterns, N, m, τ)
-    genentropy(hist, q = q, base = base)
+    entropy_renyi(hist, q = q, base = base)
 end
diff --git a/src/symbolic/SymbolicPermutation.jl b/src/symbolic/SymbolicPermutation.jl
index 619413fbb..1ce16ca80 100644
--- a/src/symbolic/SymbolicPermutation.jl
+++ b/src/symbolic/SymbolicPermutation.jl
@@ -51,7 +51,7 @@ To estimate probabilities or entropies from univariate time series, use the foll
 - `probabilities(x::AbstractVector, est::SymbolicProbabilityEstimator)`. Constructs state vectors
     from `x` using embedding lag `τ` and embedding dimension `m`, symbolizes state vectors,
     and computes probabilities as (weighted) relative frequency of symbols.
-- `genentropy(x::AbstractVector, est::SymbolicProbabilityEstimator; α=1, base = 2)` computes
+- `entropy_renyi(x::AbstractVector, est::SymbolicProbabilityEstimator; α=1, base = 2)` computes
     probabilities by calling `probabilities(x::AbstractVector, est)`,
     then computer the order-`α` generalized entropy to the given base.
 
@@ -84,7 +84,7 @@ existing state vectors ``\\{\\mathbf{x}_1, \\mathbf{x}_2, \\ldots, \\mathbf{x_L}
 - `probabilities(x::AbstractDataset, est::SymbolicProbabilityEstimator)`. Compute ordinal patterns of the
     state vectors of `x` directly (without doing any embedding), symbolize those patterns,
     and compute probabilities as (weighted) relative frequencies of symbols.
-- `genentropy(x::AbstractDataset, est::SymbolicProbabilityEstimator)`. Computes probabilities from
+- `entropy_renyi(x::AbstractDataset, est::SymbolicProbabilityEstimator)`. Computes probabilities from
     symbol frequencies using `probabilities(x::AbstractDataset, est::SymbolicProbabilityEstimator)`,
     then computes the order-`α` generalized (permutation) entropy to the given base.
 
@@ -254,7 +254,7 @@ function probabilities(x::AbstractVector{T}, est::SymbolicPermutation) where {T<
     probabilities!(s, x_emb, est)
 end
 
-function genentropy!(
+function entropy_renyi!(
     s::AbstractVector{Int}, x::AbstractDataset{m, T}, est::SymbolicPermutation;
     q = 1.0, α = nothing, base::Real = MathConstants.e
     ) where {m, T}
@@ -266,10 +266,10 @@ function genentropy!(
     length(s) == length(x) || error("Pre-allocated symbol vector s need the same number of elements as x. Got length(s)=$(length(s)) and length(x)=$(L).")
     ps = probabilities!(s, x, est)
 
-    genentropy(ps, α = α, base = base)
+    entropy_renyi(ps, α = α, base = base)
 end
 
-function genentropy!(
+function entropy_renyi!(
         s::AbstractVector{Int}, x::AbstractVector{T}, est::SymbolicPermutation;
         q::Real = 1.0, α = nothing, base::Real = MathConstants.e
     ) where {T<:Real}
@@ -282,5 +282,5 @@ function genentropy!(
     length(s) == N || error("Pre-allocated symbol vector `s` needs to have length `length(x) - (m-1)*τ` to match the number of state vectors after `x` has been embedded. Got length(s)=$(length(s)) and length(x)=$(L).")
 
     ps = probabilities!(s, x, est)
-    genentropy(ps, α = α, base = base)
+    entropy_renyi(ps, α = α, base = base)
 end
diff --git a/src/symbolic/spatial_permutation.jl b/src/symbolic/spatial_permutation.jl
index d41eef566..bca936047 100644
--- a/src/symbolic/spatial_permutation.jl
+++ b/src/symbolic/spatial_permutation.jl
@@ -25,11 +25,11 @@ within the stencil dictates the order that pixels are compared with.
 The pixel without any offset is always first in the order.
 
 After having defined `est`, one calculates the permutation entropy of ordinal patterns
-by calling [`genentropy`](@ref) with `est`, and with the array data.
+by calling [`entropy_renyi`](@ref) with `est`, and with the array data.
 To apply this to timeseries of spatial data, simply loop over the call, e.g.:
 ```julia
-entropy = genentropy(x, est)
-entropy_vs_time = genentropy.(data, est) # broadcasting with `.`
+entropy = entropy_renyi(x, est)
+entropy_vs_time = entropy_renyi.(data, est) # broadcasting with `.`
 ```
 
 The argument `periodic` decides whether the stencil should wrap around at the end of the
diff --git a/src/symbolic/symbolic.jl b/src/symbolic/symbolic.jl
index 3f6682cd9..ae761da20 100644
--- a/src/symbolic/symbolic.jl
+++ b/src/symbolic/symbolic.jl
@@ -13,11 +13,11 @@ include("spatial_permutation.jl")
 
 """
     permentropy(x; τ = 1, m = 3, base = MathConstants.e)
-Shorthand for `genentropy(x, SymbolicPermutation(m = m, τ = τ); base)` which
+Shorthand for `entropy_renyi(x, SymbolicPermutation(m = m, τ = τ); base)` which
 calculates the permutation entropy of order `m` with delay time `τ` used for embedding.
 """
 function permentropy(x; τ = 1, m = 3, base = MathConstants.e)
-    return genentropy(x, SymbolicPermutation(m = m, τ = τ); base)
+    return entropy_renyi(x, SymbolicPermutation(m = m, τ = τ); base)
 end
 
 export permentropy
diff --git a/src/tsallis/tsallis.jl b/src/tsallis/tsallis.jl
index afb03d5b1..7c1d3e701 100644
--- a/src/tsallis/tsallis.jl
+++ b/src/tsallis/tsallis.jl
@@ -1,7 +1,7 @@
-export tsallisentropy
+export entropy_tsallis
 
 """
-    tsallisentropy(x::Probabilities; k = 1, q = 0)
+    entropy_tsallis(x::Probabilities; k = 1, q = 0)
 
 Compute the Tsallis entropy of `x` (Tsallis, 1998)[^Tsallis1988].
 
@@ -11,11 +11,11 @@ S_q(\\bf p) = \\dfrac{k}{q - 1}\\left(1 - \\sum_{p_i \\in \\bf p} \\right)
 
 [^Tsallis1988]: Tsallis, C. (1988). Possible generalization of Boltzmann-Gibbs statistics. Journal of statistical physics, 52(1), 479-487.
 """
-function tsallisentropy(prob::Probabilities; k = 1, q = 0)
+function entropy_tsallis(prob::Probabilities; k = 1, q = 0)
     haszero = any(iszero, prob)
     p = if haszero
         i0 = findall(iszero, prob.p)
-        # As for genentropy, we copy because if someone initialized Probabilities with 0s,
+        # As for entropy_renyi, we copy because if someone initialized Probabilities with 0s,
         # they'd probably want to index the zeros as well.
         deleteat!(copy(prob.p), i0)
     else
diff --git a/test/runtests.jl b/test/runtests.jl
index 6ec794030..fdd4009a2 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -22,8 +22,8 @@ end
     @test Entropies._non0hist(D) |> sum ≈ 1.0
     @test Entropies._non0hist(D2)|> sum ≈ 1.0
     x = rand(100)
-    @test genentropy(x, 100) ≠ NaN
-    @test genentropy(x, 0.1) ≠ NaN
+    @test entropy_renyi(x, 100) ≠ NaN
+    @test entropy_renyi(x, 0.1) ≠ NaN
 end
 
 @testset "Shorthand" begin
@@ -40,9 +40,9 @@ end
     x = rand(1000)
     xn = x ./ sum(x)
     xp = Probabilities(xn)
-    @test genentropy(xp, q = 2) isa Real
-    @test genentropy(xp, q = 1) isa Real
-    @test_throws MethodError genentropy(xn, q = 2) isa Real
+    @test entropy_renyi(xp, q = 2) isa Real
+    @test entropy_renyi(xp, q = 1) isa Real
+    @test_throws MethodError entropy_renyi(xn, q = 2) isa Real
 end
 
 @testset "Probability/entropy estimators" begin
@@ -65,7 +65,7 @@ end
     @testset "Counting based" begin
         D = Dataset(rand(1:3, 1000, 3))
         ts = [(rand(1:4), rand(1:4), rand(1:4)) for i = 1:3000]
-        @test Entropies.genentropy(D, CountOccurrences(), q = 2, base = 2) isa Real
+        @test Entropies.entropy_renyi(D, CountOccurrences(), q = 2, base = 2) isa Real
     end
 
     @testset "NaiveKernel" begin
@@ -81,8 +81,8 @@ end
         p_direct = probabilities(pts, est_direct)
         @test all(p_tree .== p_direct) == true
 
-        @test Entropies.genentropy(pts, est_direct, base = 2) isa Real
-        @test Entropies.genentropy(pts, est_tree, base = 2) isa Real
+        @test Entropies.entropy_renyi(pts, est_direct, base = 2) isa Real
+        @test Entropies.entropy_renyi(pts, est_tree, base = 2) isa Real
     end
 
     @testset "Symbolization" begin
@@ -156,17 +156,17 @@ end
             @test sum(p2) ≈ 1.0
 
             # Entropies
-            @test genentropy!(s, x, est, q = 1) ≈ 0  # Regular order-1 entropy
-            @test genentropy!(s, y, est, q = 1) >= 0 # Regular order-1 entropy
-            @test genentropy!(s, x, est, q = 2) ≈ 0  # Higher-order entropy
-            @test genentropy!(s, y, est, q = 2) >= 0 # Higher-order entropy
+            @test entropy_renyi!(s, x, est, q = 1) ≈ 0  # Regular order-1 entropy
+            @test entropy_renyi!(s, y, est, q = 1) >= 0 # Regular order-1 entropy
+            @test entropy_renyi!(s, x, est, q = 2) ≈ 0  # Higher-order entropy
+            @test entropy_renyi!(s, y, est, q = 2) >= 0 # Higher-order entropy
 
             # For a time series
             sz = zeros(Int, N - (est.m-1)*est.τ)
             @test probabilities!(sz, z, est) isa Probabilities
             @test probabilities(z, est) isa Probabilities
-            @test genentropy!(sz, z, est) isa Real
-            @test genentropy(z, est) isa Real
+            @test entropy_renyi!(sz, z, est) isa Real
+            @test entropy_renyi(z, est) isa Real
         end
 
         @testset "Not pre-allocated" begin
@@ -182,8 +182,8 @@ end
             @test sum(p2) ≈ 1.0
 
             # Entropy
-            @test genentropy(x, est, q = 1) ≈ 0  # Regular order-1 entropy
-            @test genentropy(y, est, q = 2) >= 0 # Higher-order entropy
+            @test entropy_renyi(x, est, q = 1) ≈ 0  # Regular order-1 entropy
+            @test entropy_renyi(y, est, q = 2) >= 0 # Higher-order entropy
         end
     end
 
@@ -205,8 +205,8 @@ end
         @test all(p1.p .≈ p2.p)
 
         # Entropy
-        e1 = genentropy(D, est)
-        e2 = genentropy(x, est)
+        e1 = entropy_renyi(D, est)
+        e2 = entropy_renyi(x, est)
         @test e1 ≈ e2
     end
 
@@ -226,8 +226,8 @@ end
         @test all(p1.p .≈ p2.p)
 
         # Entropy
-        e1 = genentropy(D, est)
-        e2 = genentropy(x, est)
+        e1 = entropy_renyi(D, est)
+        e2 = entropy_renyi(x, est)
         @test e1 ≈ e2
     end
 
@@ -291,9 +291,9 @@ end
         @testset "Binning test $i" for i in 1:length(binnings)
             est = VisitationFrequency(binnings[i])
             @test probabilities(D, est) isa Probabilities
-            @test genentropy(D, est, q=1, base = 3) isa Real # Regular order-1 entropy
-            @test genentropy(D, est, q=3, base = 2) isa Real # Higher-order entropy
-            @test genentropy(D, est, q=3, base = 1) isa Real # Higher-order entropy
+            @test entropy_renyi(D, est, q=1, base = 3) isa Real # Regular order-1 entropy
+            @test entropy_renyi(D, est, q=3, base = 2) isa Real # Higher-order entropy
+            @test entropy_renyi(D, est, q=3, base = 1) isa Real # Higher-order entropy
 
         end
     end
@@ -348,7 +348,7 @@ end
 
     @testset "Tsallis" begin
         p = Probabilities(repeat([1/5], 5))
-        @assert round(tsallisentropy(p, q = -1/2, k = 1), digits = 2) ≈ 6.79
+        @assert round(entropy_tsallis(p, q = -1/2, k = 1), digits = 2) ≈ 6.79
     end
 end
 
diff --git a/test/spatial_permutation_tests.jl b/test/spatial_permutation_tests.jl
index ee49f08ea..b790f7a38 100644
--- a/test/spatial_permutation_tests.jl
+++ b/test/spatial_permutation_tests.jl
@@ -14,14 +14,14 @@ using Entropies, Test
     @test length(ps) == 3
     @test sort(ps) == [0.25, 0.25, 0.5]
 
-    h = genentropy(x, est, base = 2)
+    h = entropy_renyi(x, est, base = 2)
     @test h == 1.5
 
     # In fact, doesn't matter how we order the stencil,
     # the symbols will always be equal in top-left and bottom-right
     stencil = CartesianIndex.([(1,0), (1,1), (0,1)])
     est = SpatialSymbolicPermutation(stencil, x, false)
-    @test genentropy(x, est, base = 2) == 1.5
+    @test entropy_renyi(x, est, base = 2) == 1.5
 
     # But for sanity, let's ensure we get a different number
     # for a different stencil
@@ -46,4 +46,4 @@ using Entropies, Test
     w = shuffle!(Random.MersenneTwister(42), collect(z))
     ps = probabilities(w, est)
     @test length(ps) > 1
-end
\ No newline at end of file
+end
diff --git a/test/timescales.jl b/test/timescales.jl
index 67aec5612..fb0660ed5 100644
--- a/test/timescales.jl
+++ b/test/timescales.jl
@@ -12,6 +12,6 @@ using Entropies, Test
         ps = probabilities(x, est)
         @test length(ps) == 8
         @test ps isa Probabilities
-        @test genentropy(x, WaveletOverlap(), q = 1, base = 2) isa Real
+        @test entropy_renyi(x, WaveletOverlap(), q = 1, base = 2) isa Real
     end
-end
\ No newline at end of file
+end

From 37eee1e55f267530bf2872f212d03ff4ee8cee55 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Wed, 7 Sep 2022 01:08:49 +0200
Subject: [PATCH 07/32] Switch order of docs

---
 src/core.jl | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/core.jl b/src/core.jl
index 4addcfa6f..9156518f6 100644
--- a/src/core.jl
+++ b/src/core.jl
@@ -41,14 +41,10 @@ abstract type ProbabilitiesEstimator end
 const ProbEst = ProbabilitiesEstimator # shorthand
 
 """
-    probabilities(x::Array_or_Dataset, est::ProbabilitiesEstimator) → p::Probabilities
-
-Calculate probabilities representing `x` based on the provided
-estimator and return them as a [`Probabilities`](@ref) container (`Vector`-like).
-The probabilities are typically unordered and may or may not contain 0s, see the
-documentation of the individual estimators for more.
+    probabilities(x::Array_or_Dataset) → p::Probabilities
 
-The configuration options are always given as arguments to the chosen estimator.
+Directly count probabilities from the elements of `x` without any discretization,
+binning, or other processing (mostly useful when `x` contains categorical or integer data).
 
     probabilities(x::Array_or_Dataset, ε::AbstractFloat) → p::Probabilities
 
@@ -67,9 +63,14 @@ To obtain the bin information along with `p`, use [`binhist`](@ref).
 Same as the above method, but now each dimension of the data is binned into `n::Int` equal
 sized bins instead of bins of length `ε::AbstractFloat`.
 
-    probabilities(x::Array_or_Dataset) → p::Probabilities
-Directly count probabilities from the elements of `x` without any discretization,
-binning, or other processing (mostly useful when `x` contains categorical or integer data).
+    probabilities(x::Array_or_Dataset, est::ProbabilitiesEstimator) → p::Probabilities
+
+Calculate probabilities representing `x` based on the provided
+estimator and return them as a [`Probabilities`](@ref) container (`Vector`-like).
+The probabilities are typically unordered and may or may not contain 0s, see the
+documentation of the individual estimators for more.
+
+The configuration options are always given as arguments to the chosen estimator.
 """
 function probabilities end
 

From 0d767048d242b04960594c862feac4b4963fbd9d Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Wed, 7 Sep 2022 13:46:39 +0200
Subject: [PATCH 08/32] Refactoring

---
 docs/make.jl                                  |   8 +-
 docs/src/complexity_measures.md               |   8 +-
 docs/src/generalized_entropies.md             |  15 ++
 docs/src/index.md                             |  29 +--
 docs/src/other_methods.md                     |  58 ------
 docs/src/probabilities.md                     | 109 ++----------
 docs/src/renyi_entropy.md                     |   5 -
 docs/src/shannon_entropies.md                 | 165 ++++++++++++++++++
 docs/src/tsallis_entropy.md                   |   5 -
 docs/style.jl                                 |  40 +++++
 src/Entropies.jl                              |   2 +-
 .../rectangular/TransferOperator.jl           | 162 ++++++++---------
 src/core.jl                                   |   1 +
 src/counting_based/CountOccurrences.jl        |   4 +-
 src/dispersion/dispersion_entropy.jl          |   6 +-
 src/kerneldensity/kerneldensity.jl            |  27 ++-
 src/multiscale/multiscale.jl                  |  31 ++++
 src/symbolic/SymbolicAmplitudeAware.jl        |   2 +-
 src/symbolic/SymbolicPermutation.jl           |   2 +-
 src/symbolic/SymbolicWeightedPermutation.jl   |   2 +-
 src/symbolic/symbolic.jl                      |  62 +++++--
 src/timescales/timescales.jl                  |  22 ++-
 22 files changed, 477 insertions(+), 288 deletions(-)
 create mode 100644 docs/src/generalized_entropies.md
 delete mode 100644 docs/src/other_methods.md
 delete mode 100644 docs/src/renyi_entropy.md
 create mode 100644 docs/src/shannon_entropies.md
 delete mode 100644 docs/src/tsallis_entropy.md
 create mode 100644 docs/style.jl
 create mode 100644 src/multiscale/multiscale.jl

diff --git a/docs/make.jl b/docs/make.jl
index 688d19aac..b3801e406 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -35,13 +35,15 @@ ENV["JULIA_DEBUG"] = "Documenter"
 
 PAGES = [
     "Entropies.jl" => "index.md",
-    "Probability estimation" => "probabilities.md",
-    "Entropy (Rényi)" => "renyi_entropy.md",
-    "Entropy (Tsallis)" => "tsallis_entropy.md",
+    "Probabilities" => "probabilities.md",
+    "Generalized entropy" => "generalized_entropies.md",
+    "Shannon entropy" => "shannon_entropies.md",
     "Complexity measures" => "complexity_measures.md",
     "Utility methods" => "utils.md",
 ]
 
+include("style.jl")
+
 makedocs(
     modules = [Entropies],
     format = Documenter.HTML(
diff --git a/docs/src/complexity_measures.md b/docs/src/complexity_measures.md
index eedcb6af7..8cfe618d5 100644
--- a/docs/src/complexity_measures.md
+++ b/docs/src/complexity_measures.md
@@ -1,7 +1,7 @@
 # Complexity measures
 
-## Dispersion entropy
+## Sample entropy
 
-```@docs
-dispersion_entropy
-```
+## Approximate entropy
+
+## Disequilibrium
diff --git a/docs/src/generalized_entropies.md b/docs/src/generalized_entropies.md
new file mode 100644
index 000000000..efe841f45
--- /dev/null
+++ b/docs/src/generalized_entropies.md
@@ -0,0 +1,15 @@
+# [Generalized entropies](@id generalized_entropies)
+
+Computing entropy boils down to one thing: first estimating a probability distribution, then applying one of the generalized entropy formulas ([`entropy_renyi`](@ref) or [`entropy_tsallis`](@ref)) to these distributions. Thus, any of the implemented [probabilities estimators](@ref estimators) can be used to compute generalized entropies.
+
+## Rényi (generalized) entropy
+
+```@docs
+Entropies.entropy_renyi
+```
+
+## Tsallis (generalized) entropy
+
+```@docs
+Entropies.entropy_tsallis
+```
diff --git a/docs/src/index.md b/docs/src/index.md
index 4e7ad5c07..614b932bc 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -2,20 +2,29 @@
 
 This package provides estimators for probabilities, entropies, and complexity measures for timeseries, nonlinear dynamics and complex systems. It is used in the [CausalityTools.jl](https://github.com/JuliaDynamics/CausalityTools.jl) and [DynamicalSystems.jl](https://github.com/JuliaDynamics/DynamicalSystems.jl) packages.
 
-## Input data
-
 This package assumes that your data is represented by the `Dataset`-type from [`DelayEmbeddings.jl`](https://github.com/JuliaDynamics/DelayEmbeddings.jl), where each observation is a D-dimensional data point. See the [`DynamicalSystems.jl` documentation](https://juliadynamics.github.io/DynamicalSystems.jl/dev/) for more info. Univariate timeseries given as
 `AbstractVector{<:Real}` also work with some estimators, but are treated differently
 based on which method for probability/entropy estimation is applied.
 
-## API
+## API & terminology
+
+In the literature, the term "entropy" is used (and abused) in multiple contexts. We use the following distinctions.
+
+- [Generalized Rényi and Tsallis entropies](@ref generalized_entropies) are theoretically well-founded concepts that are functions of *probability distributions*.
+
+- [Shannon entropy](@ref shannon_entropies) is a special case of Rényi and Tsallis entropies. We provide convenience functions for most common Shannon entropy estimators.
+
+- [*Probability estimation*](@ref estimators) is a separate but required step to compute entropies. We provide a range of probability estimators. These estimators can be used in isolation for estimating probability distributions, or for computing generalized Rényi and Tsallis entropies.
+
+- "Entropy-like" complexity measures, which strictly speaking don't compute entropies, and may or may not explicitly compute probability distributions, appear in the [Complexity measures](@ref) section.
+
+The main **API** of this package is thus contained in three functions:
 
-The main **API** of this package is contained in three functions:
+- [`probabilities`](@ref), which computes probability distributions of given datasets.
+- [`entropy_renyi`](@ref), which uses the output of [`probabilities`](@ref), or a set of pre-computed [`Probabilities`](@ref), to calculate entropies.
+- [`entropy_tsallis`](@ref), which uses the output of [`probabilities`](@ref), or a set of pre-computed [`Probabilities`](@ref), to calculate Tsallis entropies.
+- Convenience functions for commonly used methods appear throughout the documentation.
 
-* [`probabilities`](@ref) which computes probability distributions of given datasets
-* [`entropy_renyi`](@ref) which uses the output of [`probabilities`](@ref), or a set of
-    pre-computed [`Probabilities`](@ref), to calculate entropies.
-* [`entropy_tsallis`](@ref) which uses the output of [`probabilities`](@ref), or a set of
-    pre-computed [`Probabilities`](@ref), to calculate Tsallis entropies.
+These functions dispatch on the probability estimators listed [here](@ref estimators).
 
-These functions dispatch estimators listed [here](@ref estimators).
+*Note: there are fewer probability estimators than there are Shannon entropy estimators, because some Shannon entropy are indirect, in the sense that they don't explicitly compute probability distributions*
diff --git a/docs/src/other_methods.md b/docs/src/other_methods.md
deleted file mode 100644
index 3a11d9aef..000000000
--- a/docs/src/other_methods.md
+++ /dev/null
@@ -1,58 +0,0 @@
-# Methods
-
-## Shannon entropies
-
-Some methods in the literature compute Shannon entropy in ways that don't explicitly result in probability distributions. Hence, they can't be used with [`probabilities`](@ref), and appear instead as stand-alone functions.
-
-### Nearest neighbors
-
-```@docs
-entropy_kraskov
-entropy_kozachenkoleonenko
-```
-
-#### Example
-
-This example reproduces Figure in Charzyńska & Gambin (2016)[^Charzyńska2016]. Both
-estimators nicely converge to the true entropy with increasing time series length.
-For a uniform 1D distribution ``U(0, 1)``, the true entropy is `0`.
-
-```@example MAIN
-using DynamicalSystems, CairoMakie, Statistics
-using Distributions: Uniform, Normal
-
-Ns = [100:100:500; 1000:1000:10000]
-Ekl = Vector{Vector{Float64}}(undef, 0)
-Ekr = Vector{Vector{Float64}}(undef, 0)
-
-nreps = 50
-for N in Ns
-    kl = Float64[]
-    kr = Float64[]
-    for i = 1:nreps
-        pts = Dataset([rand(Uniform(0, 1), 1) for i = 1:N]);
-        
-        push!(kl, entropy_kozachenkoleonenko(pts, w = 0, k = 1))
-        # with k = 1, Kraskov is virtually identical to 
-        # Kozachenko-Leonenko, so pick a higher number of neighbors
-        push!(kr, entropy_kraskov(pts, w = 0, k = 3))
-    end
-    push!(Ekl, kl)
-    push!(Ekr, kr)
-end
-
-fig = Figure()
-ax = Axis(fig[1,1]; ylabel = "entropy (nats)", title = "Kozachenko-Leonenko")
-lines!(ax, Ns, mean.(Ekl); color = Cycled(1))
-band!(ax, Ns, mean.(Ekl) .+ std.(Ekl), mean.(Ekl) .- std.(Ekl);
-color = (Main.COLORS[1], 0.5))
-
-ay = Axis(fig[2,1]; xlabel = "time step", ylabel = "entropy (nats)", title = "Kraskov")
-lines!(ay, Ns, mean.(Ekr); color = Cycled(2))
-band!(ay, Ns, mean.(Ekr) .+ std.(Ekr), mean.(Ekr) .- std.(Ekr); 
-color = (Main.COLORS[2], 0.5))
-
-fig
-```
-
-[^Charzyńska2016]: Charzyńska, A., & Gambin, A. (2016). Improvement of the k-NN entropy estimator with applications in systems biology. Entropy, 18(1), 13.
diff --git a/docs/src/probabilities.md b/docs/src/probabilities.md
index 27791333f..3ddceaf4a 100644
--- a/docs/src/probabilities.md
+++ b/docs/src/probabilities.md
@@ -1,4 +1,8 @@
-# Probabilities
+# [Probabilities](@id estimators)
+
+For categorical or integer-valued data, probabilities can be estimated by directly counting relative frequencies of data elements. For such data, use `probabilities(x::Array_or_Dataset) → p::Probabilities`.
+
+More advanced estimators computing probabilities by first either discretizing, symbolizing or transforming the data in a way that quantifies some useful properties about the underlying data (e.g. visitation frequencies, wavelet energies, or permutation patterns), from which probability distributions can be estimated. Use `probabilities(x::Array_or_Dataset, est::ProbabilitiesEstimator)` in combination with any of the estimators listed below.
 
 ```@docs
 Probabilities
@@ -7,90 +11,37 @@ probabilities!
 ProbabilitiesEstimator
 ```
 
-## [Estimators](@id estimators)
-
-### Count occurrences (counting)
+## Count occurrences (counting)
 
 ```@docs
 CountOccurrences
 ```
 
-### Permutation (symbolic)
+## Permutation (symbolic)
 
 ```@docs
 SymbolicPermutation
 ```
 
-#### Example
-
-This example reproduces an example from Bandt and Pompe (2002), where the permutation
-entropy is compared with the largest Lyapunov exponents from time series of the chaotic
-logistic map. Entropy estimates using [`SymbolicWeightedPermutation`](@ref)
-and [`SymbolicAmplitudeAwarePermutation`](@ref) are added here for comparison.
-
-```@example MAIN
-using DynamicalSystems, CairoMakie
-
-ds = Systems.logistic()
-rs = 3.4:0.001:4
-N_lyap, N_ent = 100000, 10000
-m, τ = 6, 1 # Symbol size/dimension and embedding lag
-
-# Generate one time series for each value of the logistic parameter r
-lyaps = Float64[]
-hs_perm = Float64[]
-hs_wtperm = Float64[]
-hs_ampperm = Float64[]
-
-base = Base.MathConstants.e
-for r in rs
-    ds.p[1] = r
-    push!(lyaps, lyapunov(ds, N_lyap))
-
-    x = trajectory(ds, N_ent) # time series
-    hperm = Entropies.entropy_renyi(x, SymbolicPermutation(m = m, τ = τ), base = base)
-    hwtperm = Entropies.entropy_renyi(x, SymbolicWeightedPermutation(m = m, τ = τ), base = base)
-    hampperm = Entropies.entropy_renyi(x, SymbolicAmplitudeAwarePermutation(m = m, τ = τ), base = base)
-
-    push!(hs_perm, hperm); push!(hs_wtperm, hwtperm); push!(hs_ampperm, hampperm)
-end
-
-fig = Figure()
-a1 = Axis(fig[1,1]; ylabel = L"\lambda")
-lines!(a1, rs, lyaps); ylims!(a1, (-2, log(2)))
-a2 = Axis(fig[2,1]; ylabel = L"h_6 (SP)")
-lines!(a2, rs, hs_perm; color = Cycled(2))
-a3 = Axis(fig[3,1]; ylabel = L"h_6 (WT)")
-lines!(a3, rs, hs_wtperm; color = Cycled(3))
-a4 = Axis(fig[4,1]; ylabel = L"h_6 (SAAP)")
-lines!(a4, rs, hs_ampperm; color = Cycled(4))
-a4.xlabel = L"r"
-
-for a in (a1,a2,a3)
-    hidexdecorations!(a, grid = false)
-end
-fig
-```
-
-### Visitation frequency (binning)
+## Visitation frequency (binning)
 
 ```@docs
 VisitationFrequency
 ```
 
-#### Specifying binning/boxes
+### Specifying binning/boxes
 
 ```@docs
 RectangularBinning
 ```
 
-### Transfer operator (binning)
+## Transfer operator (binning)
 
 ```@docs
 TransferOperator
 ```
 
-#### Utility methods/types
+### Utility methods/types
 
 ```@docs
 InvariantMeasure
@@ -98,13 +49,13 @@ invariantmeasure
 transfermatrix
 ```
 
-### Kernel density
+## Kernel density
 
 ```@docs
 NaiveKernel
 ```
 
-#### Example
+### Example
 
 Here, we draw some random points from a 2D normal distribution. Then, we use kernel 
 density estimation to associate a probability to each point `p`, measured by how many 
@@ -125,40 +76,8 @@ ax.zticklabelsvisible = false
 fig
 ```
 
-### Wavelet
+## Wavelet
 
 ```@docs
 WaveletOverlap
 ```
-
-#### Example
-
-The scale-resolved wavelet entropy should be lower for very regular signals (most of the
-energy is contained at one scale) and higher for very irregular signals (energy spread
-more out across scales).
-
-```@example MAIN
-using DynamicalSystems, CairoMakie
-N, a = 1000, 10
-t = LinRange(0, 2*a*π, N)
-
-x = sin.(t);
-y = sin.(t .+ cos.(t/0.5));
-z = sin.(rand(1:15, N) ./ rand(1:10, N))
-
-est = WaveletOverlap()
-h_x = entropy_renyi(x, est)
-h_y = entropy_renyi(y, est)
-h_z = entropy_renyi(z, est)
-
-fig = Figure()
-ax = Axis(fig[1,1]; ylabel = "x")
-lines!(ax, t, x; color = Cycled(1), label = "h=$(h=round(h_x, sigdigits = 5))");
-ay = Axis(fig[2,1]; ylabel = "y")
-lines!(ay, t, y; color = Cycled(2), label = "h=$(h=round(h_y, sigdigits = 5))");
-az = Axis(fig[3,1]; ylabel = "z", xlabel = "time")
-lines!(az, t, z; color = Cycled(3), label = "h=$(h=round(h_z, sigdigits = 5))");
-for a in (ax, ay, az); axislegend(a); end
-for a in (ax, ay); hidexdecorations!(a; grid=false); end
-fig
-```
diff --git a/docs/src/renyi_entropy.md b/docs/src/renyi_entropy.md
deleted file mode 100644
index dfedc5156..000000000
--- a/docs/src/renyi_entropy.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Rényi (generalized) entropy
-
-```@docs
-Entropies.entropy_renyi
-```
diff --git a/docs/src/shannon_entropies.md b/docs/src/shannon_entropies.md
new file mode 100644
index 000000000..81e1b4651
--- /dev/null
+++ b/docs/src/shannon_entropies.md
@@ -0,0 +1,165 @@
+# [Shannon entropies](@id shannon_entropies)
+
+Many methods in the literature compute (Shannon) entropy in ways that don't explicitly result in probability distributions, so they can't be used in combination with [`probabilities`](@ref), [`entropy_renyi`](@ref) or [`entropy_tsallis`](@ref). Instead, they appear here as stand-alone functions.
+
+
+## Nearest neighbors entropy
+
+```@docs
+entropy_kraskov
+entropy_kozachenkoleonenko
+```
+
+### Example
+
+This example reproduces Figure in Charzyńska & Gambin (2016)[^Charzyńska2016]. Both
+estimators nicely converge to the true entropy with increasing time series length.
+For a uniform 1D distribution ``U(0, 1)``, the true entropy is `0`.
+
+```@example MAIN
+using DynamicalSystems, CairoMakie, Statistics
+using Distributions: Uniform, Normal
+
+Ns = [100:100:500; 1000:1000:10000]
+Ekl = Vector{Vector{Float64}}(undef, 0)
+Ekr = Vector{Vector{Float64}}(undef, 0)
+
+nreps = 50
+for N in Ns
+    kl = Float64[]
+    kr = Float64[]
+    for i = 1:nreps
+        pts = Dataset([rand(Uniform(0, 1), 1) for i = 1:N]);
+        
+        push!(kl, entropy_kozachenkoleonenko(pts, w = 0, k = 1))
+        # with k = 1, Kraskov is virtually identical to 
+        # Kozachenko-Leonenko, so pick a higher number of neighbors
+        push!(kr, entropy_kraskov(pts, w = 0, k = 3))
+    end
+    push!(Ekl, kl)
+    push!(Ekr, kr)
+end
+
+fig = Figure()
+ax = Axis(fig[1,1]; ylabel = "entropy (nats)", title = "Kozachenko-Leonenko")
+lines!(ax, Ns, mean.(Ekl); color = Cycled(1))
+band!(ax, Ns, mean.(Ekl) .+ std.(Ekl), mean.(Ekl) .- std.(Ekl);
+color = (Main.COLORS[1], 0.5))
+
+ay = Axis(fig[2,1]; xlabel = "time step", ylabel = "entropy (nats)", title = "Kraskov")
+lines!(ay, Ns, mean.(Ekr); color = Cycled(2))
+band!(ay, Ns, mean.(Ekr) .+ std.(Ekr), mean.(Ekr) .- std.(Ekr); 
+color = (Main.COLORS[2], 0.5))
+
+fig
+```
+
+[^Charzyńska2016]: Charzyńska, A., & Gambin, A. (2016). Improvement of the k-NN entropy estimator with applications in systems biology. Entropy, 18(1), 13.
+
+## Permutation entropy
+
+```@docs
+entropy_perm
+entropy_weightedperm
+entropy_ampperm
+```
+
+### Example
+
+This example reproduces an example from Bandt and Pompe (2002), where the permutation
+entropy is compared with the largest Lyapunov exponents from time series of the chaotic
+logistic map. Entropy estimates using [`SymbolicWeightedPermutation`](@ref)
+and [`SymbolicAmplitudeAwarePermutation`](@ref) are added here for comparison.
+
+```@example MAIN
+using DynamicalSystems, CairoMakie
+
+ds = Systems.logistic()
+rs = 3.4:0.001:4
+N_lyap, N_ent = 100000, 10000
+m, τ = 6, 1 # Symbol size/dimension and embedding lag
+
+# Generate one time series for each value of the logistic parameter r
+lyaps = Float64[]
+hs_perm = Float64[]
+hs_wtperm = Float64[]
+hs_ampperm = Float64[]
+
+base = Base.MathConstants.e
+for r in rs
+    ds.p[1] = r
+    push!(lyaps, lyapunov(ds, N_lyap))
+
+    x = trajectory(ds, N_ent) # time series
+    hperm = Entropies.entropy_renyi(x, SymbolicPermutation(m = m, τ = τ), base = base)
+    hwtperm = Entropies.entropy_renyi(x, SymbolicWeightedPermutation(m = m, τ = τ), base = base)
+    hampperm = Entropies.entropy_renyi(x, SymbolicAmplitudeAwarePermutation(m = m, τ = τ), base = base)
+
+    push!(hs_perm, hperm); push!(hs_wtperm, hwtperm); push!(hs_ampperm, hampperm)
+end
+
+fig = Figure()
+a1 = Axis(fig[1,1]; ylabel = L"\lambda")
+lines!(a1, rs, lyaps); ylims!(a1, (-2, log(2)))
+a2 = Axis(fig[2,1]; ylabel = L"h_6 (SP)")
+lines!(a2, rs, hs_perm; color = Cycled(2))
+a3 = Axis(fig[3,1]; ylabel = L"h_6 (WT)")
+lines!(a3, rs, hs_wtperm; color = Cycled(3))
+a4 = Axis(fig[4,1]; ylabel = L"h_6 (SAAP)")
+lines!(a4, rs, hs_ampperm; color = Cycled(4))
+a4.xlabel = L"r"
+
+for a in (a1,a2,a3)
+    hidexdecorations!(a, grid = false)
+end
+fig
+```
+
+## Dispersion entropy
+
+```@docs
+entropy_dispersion
+```
+
+## Kernel entropy
+
+```@docs
+entropy_kernel
+```
+
+## Wavelet entropy
+
+```@docs
+entropy_wavelet
+```
+
+### Example
+
+The scale-resolved wavelet entropy should be lower for very regular signals (most of the
+energy is contained at one scale) and higher for very irregular signals (energy spread
+more out across scales).
+
+```@example MAIN
+using DynamicalSystems, CairoMakie
+N, a = 1000, 10
+t = LinRange(0, 2*a*π, N)
+
+x = sin.(t);
+y = sin.(t .+ cos.(t/0.5));
+z = sin.(rand(1:15, N) ./ rand(1:10, N))
+
+h_x = entropy_wavelet(x)
+h_y = entropy_wavelet(y)
+h_z = entropy_wavelet(z)
+
+fig = Figure()
+ax = Axis(fig[1,1]; ylabel = "x")
+lines!(ax, t, x; color = Cycled(1), label = "h=$(h=round(h_x, sigdigits = 5))");
+ay = Axis(fig[2,1]; ylabel = "y")
+lines!(ay, t, y; color = Cycled(2), label = "h=$(h=round(h_y, sigdigits = 5))");
+az = Axis(fig[3,1]; ylabel = "z", xlabel = "time")
+lines!(az, t, z; color = Cycled(3), label = "h=$(h=round(h_z, sigdigits = 5))");
+for a in (ax, ay, az); axislegend(a); end
+for a in (ax, ay); hidexdecorations!(a; grid=false); end
+fig
+```
diff --git a/docs/src/tsallis_entropy.md b/docs/src/tsallis_entropy.md
deleted file mode 100644
index a458cdefc..000000000
--- a/docs/src/tsallis_entropy.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Tsallis (generalized) entropy
-
-```@docs
-Entropies.entropy_tsallis
-```
diff --git a/docs/style.jl b/docs/style.jl
new file mode 100644
index 000000000..8d361fffe
--- /dev/null
+++ b/docs/style.jl
@@ -0,0 +1,40 @@
+using CairoMakie
+# %% Color theme definitions
+struct CyclicContainer <: AbstractVector{String}
+    c::Vector{String}
+    n::Int
+end
+CyclicContainer(c) = CyclicContainer(c, 0)
+
+Base.length(c::CyclicContainer) = length(c.c)
+Base.size(c::CyclicContainer) = size(c.c)
+Base.iterate(c::CyclicContainer, state=1) = Base.iterate(c.c, state)
+Base.getindex(c::CyclicContainer, i) = c.c[(i-1)%length(c.c) + 1]
+Base.getindex(c::CyclicContainer, i::AbstractArray) = c.c[i]
+function Base.getindex(c::CyclicContainer)
+    c.n += 1
+    c[c.n]
+end
+Base.iterate(c::CyclicContainer, i = 1) = iterate(c.c, i)
+
+COLORSCHEME = [
+    "#6D44D0",
+    "#2CB3BF",
+    "#1B1B1B",
+    "#DA5210",
+    "#03502A",
+    "#866373",
+]
+
+COLORS = CyclicContainer(COLORSCHEME)
+LINESTYLES = CyclicContainer(["-", ":", "--", "-."])
+
+# %% Makie styling
+# other styling elements for Makie
+set_theme!(;
+    palette = (color = COLORSCHEME,),
+    fontsize = 22,
+    figure_padding = 4,
+    resolution = (1000, 500),
+    linewidth = 3.0,
+)
diff --git a/src/Entropies.jl b/src/Entropies.jl
index 754d0554d..7eb5811eb 100644
--- a/src/Entropies.jl
+++ b/src/Entropies.jl
@@ -8,7 +8,7 @@ module Entropies
     include("kerneldensity/kerneldensity.jl")
     include("timescales/timescales.jl")
     include("nearest_neighbors/nearest_neighbors.jl")
-    include("dispersion/dispersion_entropy.jl")
+    include("dispersion/entropy_dispersion.jl")
     include("tsallis/tsallis.jl")
     include("deprecations.jl")
 end
diff --git a/src/binning_based/rectangular/TransferOperator.jl b/src/binning_based/rectangular/TransferOperator.jl
index d80e4b18c..8c9a026d9 100644
--- a/src/binning_based/rectangular/TransferOperator.jl
+++ b/src/binning_based/rectangular/TransferOperator.jl
@@ -1,7 +1,7 @@
 using DelayEmbeddings, SparseArrays
 include("GroupSlices.jl")
 
-export 
+export
     TransferOperator, # the probabilities estimator
     InvariantMeasure, invariantmeasure,
     transfermatrix
@@ -9,21 +9,21 @@ export
 """
     TransferOperator(ϵ::RectangularBinning) <: BinningProbabilitiesEstimator
 
-A probability estimator based on binning data into rectangular boxes dictated by 
-the binning scheme `ϵ`, then approxmating the transfer (Perron-Frobenius) operator 
-over the bins, then taking the invariant measure associated with that transfer operator 
+A probability estimator based on binning data into rectangular boxes dictated by
+the binning scheme `ϵ`, then approxmating the transfer (Perron-Frobenius) operator
+over the bins, then taking the invariant measure associated with that transfer operator
 as the bin probabilities. Assumes that the input data are sequential (time-ordered).
 
 This implementation follows the grid estimator approach in Diego et al. (2019)[^Diego2019].
 
 ## Description
 
-The transfer operator ``P^{N}``is computed as an `N`-by-`N` matrix of transition 
-probabilities between the states defined by the partition elements, where `N` is the 
-number of boxes in the partition that is visited by the orbit/points. 
+The transfer operator ``P^{N}``is computed as an `N`-by-`N` matrix of transition
+probabilities between the states defined by the partition elements, where `N` is the
+number of boxes in the partition that is visited by the orbit/points.
 
-If  ``\\{x_t^{(D)} \\}_{n=1}^L`` are the ``L`` different ``D``-dimensional points over 
-which the transfer operator is approximated, ``\\{ C_{k=1}^N \\}`` are the ``N`` different 
+If  ``\\{x_t^{(D)} \\}_{n=1}^L`` are the ``L`` different ``D``-dimensional points over
+which the transfer operator is approximated, ``\\{ C_{k=1}^N \\}`` are the ``N`` different
 partition elements (as dictated by `ϵ`) that gets visited by the points, and
  ``\\phi(x_t) = x_{t+1}``, then
 
@@ -33,34 +33,34 @@ P_{ij} = \\dfrac
 {\\#\\{ x_m | x_m \\in C_i \\}},
 ```
 
-where ``\\#`` denotes the cardinal. The element ``P_{ij}`` thus indicates how many points 
-that are initially in box ``C_i`` end up in box ``C_j`` when the points in ``C_i`` are 
-projected one step forward in time. Thus, the row ``P_{ik}^N`` where 
-``k \\in \\{1, 2, \\ldots, N \\}`` gives the probability 
-of jumping from the state defined by box ``C_i`` to any of the other ``N`` states. It 
-follows that ``\\sum_{k=1}^{N} P_{ik} = 1`` for all ``i``. Thus, ``P^N`` is a row/right 
+where ``\\#`` denotes the cardinal. The element ``P_{ij}`` thus indicates how many points
+that are initially in box ``C_i`` end up in box ``C_j`` when the points in ``C_i`` are
+projected one step forward in time. Thus, the row ``P_{ik}^N`` where
+``k \\in \\{1, 2, \\ldots, N \\}`` gives the probability
+of jumping from the state defined by box ``C_i`` to any of the other ``N`` states. It
+follows that ``\\sum_{k=1}^{N} P_{ik} = 1`` for all ``i``. Thus, ``P^N`` is a row/right
 stochastic matrix.
 
 ### Invariant measure estimation from transfer operator
 
-The left invariant distribution ``\\mathbf{\\rho}^N`` is a row vector, where 
-``\\mathbf{\\rho}^N P^{N} = \\mathbf{\\rho}^N``. Hence, ``\\mathbf{\\rho}^N`` is a row 
-eigenvector of the transfer matrix ``P^{N}`` associated with eigenvalue 1. The distribution 
-``\\mathbf{\\rho}^N`` approximates the invariant density of the system subject to the 
+The left invariant distribution ``\\mathbf{\\rho}^N`` is a row vector, where
+``\\mathbf{\\rho}^N P^{N} = \\mathbf{\\rho}^N``. Hence, ``\\mathbf{\\rho}^N`` is a row
+eigenvector of the transfer matrix ``P^{N}`` associated with eigenvalue 1. The distribution
+``\\mathbf{\\rho}^N`` approximates the invariant density of the system subject to the
 partition `ϵ`, and can be taken as a probability distribution over the partition elements.
 
-In practice, the invariant measure ``\\mathbf{\\rho}^N`` is computed using 
+In practice, the invariant measure ``\\mathbf{\\rho}^N`` is computed using
 [`invariantmeasure`](@ref), which also approximates the transfer matrix. The invariant distribution
-is initialized as a length-`N` random distribution which is then applied to ``P^{N}``. 
-The resulting length-`N` distribution is then applied to ``P^{N}`` again. This process 
-repeats until the difference between the distributions over consecutive iterations is 
-below some threshold. 
+is initialized as a length-`N` random distribution which is then applied to ``P^{N}``.
+The resulting length-`N` distribution is then applied to ``P^{N}`` again. This process
+repeats until the difference between the distributions over consecutive iterations is
+below some threshold.
 
 ## Probability and entropy estimation
 
-- `probabilities(x::AbstractDataset, est::TransferOperator{RectangularBinning})` estimates 
+- `probabilities(x::AbstractDataset, est::TransferOperator{RectangularBinning})` estimates
     probabilities for the bins defined by the provided binning (`est.ϵ`)
-- `genentropy(x::AbstractDataset, est::TransferOperator{RectangularBinning})` does the same, 
+- `entropy_renyi(x::AbstractDataset, est::TransferOperator{RectangularBinning})` does the same,
     but computes generalized entropy using the probabilities.
 
 
@@ -88,11 +88,11 @@ function inds_in_terms_of_unique(x)
             end
         end
     end
-    
+
     return inds
 end
 
-# Taking advantage of the fact that x is sorted reduces runtime by 1.5 orders of magnitude 
+# Taking advantage of the fact that x is sorted reduces runtime by 1.5 orders of magnitude
 # for datasets of >100 000+ points
 function inds_in_terms_of_unique_sorted(x) # assumes sorted
     @assert issorted(x)
@@ -111,12 +111,12 @@ function inds_in_terms_of_unique_sorted(x) # assumes sorted
         end
         inds[j] = uidx
     end
-    
+
     return inds
 end
 
 function inds_in_terms_of_unique(x, sorted::Bool)
-    if sorted 
+    if sorted
         return inds_in_terms_of_unique_sorted(x)
     else
         return inds_in_terms_of_unique(x)
@@ -126,22 +126,22 @@ end
 inds_in_terms_of_unique(x::AbstractDataset) = inds_in_terms_of_unique(x.data)
 
 """
-    TransferOperatorApproximationRectangular(to, ϵ::RectangularBinning, mini, edgelengths, 
+    TransferOperatorApproximationRectangular(to, ϵ::RectangularBinning, mini, edgelengths,
         bins, sort_idxs)
 
-The `N`-by-`N` matrix `to` is an approximation to the transfer operator, subject to the 
-partition `ϵ`, computed over some set of sequentially ordered points. 
+The `N`-by-`N` matrix `to` is an approximation to the transfer operator, subject to the
+partition `ϵ`, computed over some set of sequentially ordered points.
 
-For convenience, `mini` and `edgelengths` provide the minima and box edge lengths along 
-each coordinate axis, as determined by applying `ϵ` to the points. The coordinates of 
-the (leftmost, if axis is ordered low-high) box corners are given in `bins`. 
+For convenience, `mini` and `edgelengths` provide the minima and box edge lengths along
+each coordinate axis, as determined by applying `ϵ` to the points. The coordinates of
+the (leftmost, if axis is ordered low-high) box corners are given in `bins`.
 
-Only bins actually visited by the points are considered, and `bins` give the coordinates 
-of these bins. The element `bins[i]` correspond to the `i`-th state of the system, which 
+Only bins actually visited by the points are considered, and `bins` give the coordinates
+of these bins. The element `bins[i]` correspond to the `i`-th state of the system, which
 corresponds to the `i`-th column/row of the transfer operator `to`.
 
-`sort_idxs` contains the indices that would sort the input points. `visitors` is a 
-vector of vectors, where `visitors[i]` contains the indices of the (sorted) 
+`sort_idxs` contains the indices that would sort the input points. `visitors` is a
+vector of vectors, where `visitors[i]` contains the indices of the (sorted)
 points that visits `bins[i]`.
 See also: [`RectangularBinning`](@ref).
 """
@@ -159,10 +159,10 @@ end
 """
     transferoperator(pts::AbstractDataset{D, T}, ϵ::RectangularBinning) → TransferOperatorApproximationRectangular
 
-Estimate the transfer operator given a set of sequentially ordered points subject to a 
+Estimate the transfer operator given a set of sequentially ordered points subject to a
 rectangular partition given by `ϵ`.
 
-## Example 
+## Example
 
 ```julia
 using DynamicalSystems, Plots, Entropy
@@ -179,25 +179,25 @@ See also: [`RectangularBinning`](@ref).
 """
 function transferoperator(pts::AbstractDataset{D, T}, ϵ::RectangularBinning;
         boundary_condition = :circular) where {D, T<:Real}
-    
+
     L = length(pts)
     mini, edgelengths = Entropies.minima_edgelengths(pts, ϵ)
 
-    # The L points visits a total of L bins, which are the following bins: 
+    # The L points visits a total of L bins, which are the following bins:
     visited_bins = Entropies.encode_as_bin(pts, mini, edgelengths)
     sort_idxs = sortperm(visited_bins)
 
     # TODO: fix re-indexing after sorting. Sorting is much faster, so we want to do so.
     #sort!(visited_bins)
-    
+
     # There are N=length(unique(visited_bins)) unique bins.
-    # Which of the unqiue bins does each of the L points visit? 
+    # Which of the unqiue bins does each of the L points visit?
     visits_whichbin = inds_in_terms_of_unique(visited_bins, false) # set to true when sorting is fixed
 
     # `visitors` lists the indices of the points visiting each of the N unique bins.
     slices = GroupSlices.groupslices(visited_bins)
-    visitors = GroupSlices.groupinds(slices) 
-    
+    visitors = GroupSlices.groupinds(slices)
+
     # first_visited_by == [x[1] for x in visitors]
     first_visited_by = GroupSlices.firstinds(slices)
     L = length(first_visited_by)
@@ -210,7 +210,7 @@ function transferoperator(pts::AbstractDataset{D, T}, ϵ::RectangularBinning;
     # one point of the orbit visiting a bin.
     target_bin_j::Int = 0
     n_visitsᵢ::Int = 0
-    
+
     if boundary_condition == :circular
         #warn("Using circular boundary condition")
         append!(visits_whichbin, [1])
@@ -220,7 +220,7 @@ function transferoperator(pts::AbstractDataset{D, T}, ϵ::RectangularBinning;
     else
         error("Boundary condition $(boundary_condition) not implemented")
     end
-    
+
     # Loop over the visited bins bᵢ
     for i in 1:L
         # How many times is this bin visited?
@@ -280,28 +280,28 @@ function transferoperator(pts::AbstractDataset{D, T}, ϵ::RectangularBinning;
             end
         end
     end
-    
+
     # Transfer operator is just the normalized transition probabilities between the boxes.
     TO = sparse(I, J, P)
-    
-    # Compute the coordinates of the visited bins. bins[i] corresponds to the i-th 
+
+    # Compute the coordinates of the visited bins. bins[i] corresponds to the i-th
     # row/column of the transfer operator
     unique!(visited_bins)
     bins = [β .* edgelengths .+ mini for β in visited_bins]
 
-    TransferOperatorApproximationRectangular(TO, ϵ, mini, edgelengths, bins, 
+    TransferOperatorApproximationRectangular(TO, ϵ, mini, edgelengths, bins,
         sort_idxs, visitors)
 end
 
-""" 
+"""
     InvariantMeasure(to, ρ)
 
-Minimal return struct for [`invariantmeasure`](@ref) that contains the estimated invariant 
-measure `ρ`, as well as the transfer operator `to` from which it is computed (including 
+Minimal return struct for [`invariantmeasure`](@ref) that contains the estimated invariant
+measure `ρ`, as well as the transfer operator `to` from which it is computed (including
 bin information).
 
 See also: [`invariantmeasure`](@ref).
-""" 
+"""
 struct InvariantMeasure{T}
     to::T
     ρ::Probabilities
@@ -316,14 +316,14 @@ import LinearAlgebra: norm
 """
     invariantmeasure(x::AbstractDataset, ϵ::RectangularBinning) → iv::InvariantMeasure
 
-Estimate an invariant measure over the points in `x` based on binning the data into 
-rectangular boxes dictated by the binning scheme `ϵ`, then approximate the transfer 
-(Perron-Frobenius) operator over the bins. From the approximation to the transfer operator, 
+Estimate an invariant measure over the points in `x` based on binning the data into
+rectangular boxes dictated by the binning scheme `ϵ`, then approximate the transfer
+(Perron-Frobenius) operator over the bins. From the approximation to the transfer operator,
 compute an invariant distribution over the bins. Assumes that the input data are sequential.
 
 Details on the estimation procedure is found the [`TransferOperator`](@ref) docstring.
 
-## Example 
+## Example
 
 ```julia
 using DynamicalSystems, Plots, Entropies
@@ -335,7 +335,7 @@ orbit = trajectory(ds, N*dt; dt = dt, Ttr = 10.0)
 # Estimate the invariant measure over some coarse graining of the orbit.
 iv = invariantmeasure(orbit, RectangularBinning(15))
 
-# Get the probabilities and bins 
+# Get the probabilities and bins
 invariantmeasure(iv)
 ```
 
@@ -343,31 +343,31 @@ invariantmeasure(iv)
 
     invariantmeasure(iv::InvariantMeasure) → (ρ::Probabilities, bins::Vector{<:SVector})
 
-From a pre-computed invariant measure, return the probabilities and associated bins. 
-The element `ρ[i]` is the probability of visitation to the box `bins[i]`. Analogous to 
-[`binhist`](@ref). 
+From a pre-computed invariant measure, return the probabilities and associated bins.
+The element `ρ[i]` is the probability of visitation to the box `bins[i]`. Analogous to
+[`binhist`](@ref).
 
 
 !!! hint "Transfer operator approach vs. naive histogram approach"
 
-    Why bother with the transfer operator instead of using regular histograms to obtain 
-    probabilities? 
-    
-    In fact, the naive histogram approach and the 
-    transfer operator approach are equivalent in the limit of long enough time series 
+    Why bother with the transfer operator instead of using regular histograms to obtain
+    probabilities?
+
+    In fact, the naive histogram approach and the
+    transfer operator approach are equivalent in the limit of long enough time series
     (as ``n \\to \\intfy``), which is guaranteed by the ergodic theorem. There is a crucial
     difference, however:
-    
-    The naive histogram approach only gives the long-term probabilities that 
-    orbits visit a certain region of the state space. The transfer operator encodes that 
-    information too, but comes with the added benefit of knowing the *transition 
-    probabilities* between states (see [`transfermatrix`](@ref)). 
+
+    The naive histogram approach only gives the long-term probabilities that
+    orbits visit a certain region of the state space. The transfer operator encodes that
+    information too, but comes with the added benefit of knowing the *transition
+    probabilities* between states (see [`transfermatrix`](@ref)).
 
 See also: [`InvariantMeasure`](@ref).
 """
-function invariantmeasure(to::TransferOperatorApproximationRectangular; 
+function invariantmeasure(to::TransferOperatorApproximationRectangular;
         N::Int = 200, tolerance::Float64 = 1e-8, delta::Float64 = 1e-8)
-    
+
     TO = to.transfermatrix
     #=
     # Start with a random distribution `Ρ` (big rho). Normalise it so that it
@@ -444,9 +444,9 @@ end
 """
     transfermatrix(iv::InvariantMeasure) → (M::AbstractArray{<:Real, 2}, bins::Vector{<:SVector})
 
-Return the transfer matrix/operator and corresponding bins. Here, `bins[i]` corresponds 
-to the i-th row/column of the transfer matrix. Thus, the entry `M[i, j]` is the 
-probability of jumping from the state defined by `bins[i]` to the state defined by 
+Return the transfer matrix/operator and corresponding bins. Here, `bins[i]` corresponds
+to the i-th row/column of the transfer matrix. Thus, the entry `M[i, j]` is the
+probability of jumping from the state defined by `bins[i]` to the state defined by
 `bins[j]`.
 
 See also: [`TransferOperator`](@ref).
diff --git a/src/core.jl b/src/core.jl
index 9156518f6..0fd15ab44 100644
--- a/src/core.jl
+++ b/src/core.jl
@@ -60,6 +60,7 @@ datasets and with small box sizes `ε` without memory overflow and with maximum
 To obtain the bin information along with `p`, use [`binhist`](@ref).
 
     probabilities(x::Array_or_Dataset, n::Integer) → p::Probabilities
+
 Same as the above method, but now each dimension of the data is binned into `n::Int` equal
 sized bins instead of bins of length `ε::AbstractFloat`.
 
diff --git a/src/counting_based/CountOccurrences.jl b/src/counting_based/CountOccurrences.jl
index 085e363f9..4e7d46dd8 100644
--- a/src/counting_based/CountOccurrences.jl
+++ b/src/counting_based/CountOccurrences.jl
@@ -1,9 +1,7 @@
-abstract type CountingBasedProbabilityEstimator <: ProbabilitiesEstimator end
 export CountOccurrences
-import DelayEmbeddings: AbstractDataset
 
 """
-    CountOccurrences  <: CountingBasedProbabilityEstimator
+    CountOccurrences
 
 A probabilities/entropy estimator based on straight-forward counting of distinct elements in
 a univariate time series or multivariate dataset. From these counts, construct histograms.
diff --git a/src/dispersion/dispersion_entropy.jl b/src/dispersion/dispersion_entropy.jl
index 45eaa2a40..9a1ca301d 100644
--- a/src/dispersion/dispersion_entropy.jl
+++ b/src/dispersion/dispersion_entropy.jl
@@ -1,7 +1,7 @@
 using DelayEmbeddings
 using StatsBase
 
-export dispersion_entropy
+export entropy_dispersion
 
 """
     embed_symbols(s::AbstractVector{T}, m, τ) {where T} → Dataset{m, T}
@@ -25,7 +25,7 @@ end
 
 
 """
-    dispersion_entropy(x, s = GaussianSymbolization(n_categories = 5);
+    entropy_dispersion(x, s = GaussianSymbolization(n_categories = 5);
         m = 3, τ = 1, q = 1, base = MathConstants.e)
 
 Compute the (order-`q` generalized) dispersion entropy to the given `base` of the
@@ -49,7 +49,7 @@ Li et al. (2018) recommends that `x` has at least 1000 data points.
 [^Li2018]: Li, G., Guan, Q., & Yang, H. (2018). Noise reduction method of underwater acoustic signals based on CEEMDAN, effort-to-compress complexity, refined composite multiscale dispersion entropy and wavelet threshold denoising. Entropy, 21(1), 11.
 [^Azami2018]: Azami, H., & Escudero, J. (2018). Coarse-graining approaches in univariate multiscale sample and dispersion entropy. Entropy, 20(2), 138.
 """
-function dispersion_entropy(x::AbstractVector,
+function entropy_dispersion(x::AbstractVector,
         s::GaussianSymbolization = GaussianSymbolization(n_categories = 5);
         m = 2, τ = 1, q = 1, base = MathConstants.e)
     symbols = symbolize(x, s)
diff --git a/src/kerneldensity/kerneldensity.jl b/src/kerneldensity/kerneldensity.jl
index 0fb789892..dbd60d212 100644
--- a/src/kerneldensity/kerneldensity.jl
+++ b/src/kerneldensity/kerneldensity.jl
@@ -1,15 +1,15 @@
 using Neighborhood: Theiler, KDTree, BruteForce, bulkisearch, searchstructure
 using Distances: Metric, Euclidean
 export NaiveKernel, KDTree, BruteForce
-
+export entropy_kernel
 """
     NaiveKernel(ϵ::Real, ss = KDTree; w = 0, metric = Euclidean()) <: ProbabilitiesEstimator
 
-Estimate probabilities/entropy using a "naive" kernel density estimation approach (KDE), as 
+Estimate probabilities/entropy using a "naive" kernel density estimation approach (KDE), as
 discussed in Prichard and Theiler (1995) [^PrichardTheiler1995].
 
-Probabilities ``P(\\mathbf{x}, \\epsilon)`` are assigned to every point ``\\mathbf{x}`` by 
-counting how many other points occupy the space spanned by 
+Probabilities ``P(\\mathbf{x}, \\epsilon)`` are assigned to every point ``\\mathbf{x}`` by
+counting how many other points occupy the space spanned by
 a hypersphere of radius `ϵ` around ``\\mathbf{x}``, according to:
 
 ```math
@@ -46,3 +46,22 @@ function probabilities(x::DelayEmbeddings.AbstractDataset, est::NaiveKernel)
     p = Float64.(length.(idxs))
     return Probabilities(p)
 end
+
+"""
+    entropy_kernel(x; ϵ::Real = 0.2*StatsBase.std(x), method = KDTree; w = 0,
+        metric = Euclidean(), base = MathConstants.e)
+
+Calculate Shannon entropy using the "naive" kernel density estimation approach (KDE), as
+discussed in Prichard and Theiler (1995) [^PrichardTheiler1995].
+
+Shorthand for `entropy_renyi(x, NaiveKernel(ϵ, method, w = w, metric = metric), q = 1, base = base)`.
+
+See also: [`NaiveKernel`](@ref), [`entropy_renyi`](@ref).
+
+[^PrichardTheiler1995]: Prichard, D., & Theiler, J. (1995). Generalized redundancies for time series analysis. Physica D: Nonlinear Phenomena, 84(3-4), 476-493.
+"""
+function entropy_kernel(x; ϵ::Real = 0.2*StatsBase.std(x), method = KDTree, w = 0,
+        metric = Euclidean(), base = MathConstants.e)
+    est = NaiveKernel(ϵ, method, w = w, metric = metric)
+    return entropy_renyi(x, est; base = base, q = 1)
+end
diff --git a/src/multiscale/multiscale.jl b/src/multiscale/multiscale.jl
new file mode 100644
index 000000000..dd05d2812
--- /dev/null
+++ b/src/multiscale/multiscale.jl
@@ -0,0 +1,31 @@
+struct MultiscaleEntropy end
+
+"""
+    multiscale_entropy(x::AbstractVector)
+
+"""
+function multiscale_entropy end
+
+function coarse_grain(x::AbstractVector{T}, τ) where T
+    N = length(x)
+    ys = Vector{T}(undef, 0)
+
+    for j = 1:floor(Int, N/τ)
+        yⱼ = 0.0
+        for i = (j-1)*τ+1:j*τ
+            yⱼ += x[i]
+        end
+        yⱼ *= 1/τ
+        push!(ys, yⱼ)
+    end
+    return ys
+end
+
+x = rand(3 * 10^4 )
+y1 = coarse_grain(x, 1)
+y2 = coarse_grain(x, 2)
+y3 = coarse_grain(x, 3)
+y4 = coarse_grain(x, 4)
+y5 = coarse_grain(x, 5)
+y6 = coarse_grain(x, 6)
+y7 = coarse_grain(x, 20)
diff --git a/src/symbolic/SymbolicAmplitudeAware.jl b/src/symbolic/SymbolicAmplitudeAware.jl
index 70abbdc0f..fc5987c3e 100644
--- a/src/symbolic/SymbolicAmplitudeAware.jl
+++ b/src/symbolic/SymbolicAmplitudeAware.jl
@@ -5,7 +5,7 @@ export SymbolicAmplitudeAwarePermutation
 
 See docstring for [`SymbolicPermutation`](@ref).
 """
-struct SymbolicAmplitudeAwarePermutation{F} <: PermutationProbabilityEstimator
+struct SymbolicAmplitudeAwarePermutation{F} <: ProbabilitiesEstimator
     τ::Int
     m::Int
     A::Float64
diff --git a/src/symbolic/SymbolicPermutation.jl b/src/symbolic/SymbolicPermutation.jl
index 1ce16ca80..9ae40b5e8 100644
--- a/src/symbolic/SymbolicPermutation.jl
+++ b/src/symbolic/SymbolicPermutation.jl
@@ -3,7 +3,7 @@ export SymbolicPermutation
 """
 A probability estimator based on permutations.
 """
-abstract type PermutationProbabilityEstimator <: SymbolicProbabilityEstimator end
+abstract type PermutationProbabilityEstimator <: ProbabilitiesEstimator end
 
 """
     SymbolicPermutation(; τ = 1, m = 3, lt = Entropies.isless_rand) <: ProbabilityEstimator
diff --git a/src/symbolic/SymbolicWeightedPermutation.jl b/src/symbolic/SymbolicWeightedPermutation.jl
index 98f9f0d54..4694fcee7 100644
--- a/src/symbolic/SymbolicWeightedPermutation.jl
+++ b/src/symbolic/SymbolicWeightedPermutation.jl
@@ -8,7 +8,7 @@ export SymbolicWeightedPermutation
 
 See docstring for [`SymbolicPermutation`](@ref).
 """
-struct SymbolicWeightedPermutation{F} <: PermutationProbabilityEstimator
+struct SymbolicWeightedPermutation{F} <: ProbabilitiesEstimator
     τ::Int
     m::Int
     lt::F
diff --git a/src/symbolic/symbolic.jl b/src/symbolic/symbolic.jl
index ae761da20..3c938f9c4 100644
--- a/src/symbolic/symbolic.jl
+++ b/src/symbolic/symbolic.jl
@@ -1,9 +1,6 @@
-export SymbolicProbabilityEstimator
-
-"""
-A probability estimator based on symbolization.
-"""
-abstract type SymbolicProbabilityEstimator <: ProbabilitiesEstimator end
+export entropy_perm
+export entropy_weightedperm
+export entropy_ampperm
 
 include("utils.jl")
 include("SymbolicPermutation.jl")
@@ -12,12 +9,53 @@ include("SymbolicAmplitudeAware.jl")
 include("spatial_permutation.jl")
 
 """
-    permentropy(x; τ = 1, m = 3, base = MathConstants.e)
-Shorthand for `entropy_renyi(x, SymbolicPermutation(m = m, τ = τ); base)` which
-calculates the permutation entropy of order `m` with delay time `τ` used for embedding.
+    entropy_perm(x; τ = 1, m = 3, base = MathConstants.e)
+
+Compute the (Shannon) permutation entropy (Bandt & Pompe, 2002)[^BandtPompe2002] of order `m` with delay time `τ`.
+
+Short-hand for `entropy_renyi(x, SymbolicPermutation(τ = 1, m = 3), base = base, q = 1)`.
+
+See also: [`SymbolicPermutation`](@ref), [`entropy_renyi`](@ref).
+
+[^BandtPompe2002]: Bandt, Christoph, and Bernd Pompe. "Permutation entropy: a natural
+    complexity measure for time series." Physical review letters 88.17 (2002): 174102.
+"""
+function entropy_perm(x; τ = 1, m = 3, base = MathConstants.e, lt = isless_rand)
+    est = SymbolicPermutation(m = m, τ = τ, lt = lt)
+    return entropy_renyi(x, est; base = base, q = 1)
+end
+
+"""
+    entropy_weightedperm(x; τ = 1, m = 3, base = MathConstants.e)
+
+Compute the (Shannon) weighted permutation entropy (Fadlallah et al., 2013)[^Fadlallah2013] of order `m` with delay time `τ`.
+
+Short-hand for `entropy_renyi(x, SymbolicWeightedPermutation(τ = 1, m = 3), base = base, q = 1)`.
+
+See also: [`SymbolicPermutation`](@ref), [`entropy_renyi`](@ref).
+
+[^Fadlallah2013]: Fadlallah, Bilal, et al. "Weighted-permutation entropy: A complexity
+    measure for time series incorporating amplitude information." Physical Review E 87.2 (2013): 022911.
 """
-function permentropy(x; τ = 1, m = 3, base = MathConstants.e)
-    return entropy_renyi(x, SymbolicPermutation(m = m, τ = τ); base)
+function entropy_weightedperm(x; τ = 1, m = 3, base = MathConstants.e, lt = isless_rand)
+    est = SymbolicWeightedPermutation(m = m, τ = τ, lt = lt)
+    return entropy_renyi(x, est; base = base, q = 1)
 end
 
-export permentropy
+"""
+    entropy_ampperm(x; τ = 1, m = 3, A = 0.5, base = MathConstants.e)
+
+Compiute the (Shannon) amplitude-aware permutation entropy of order `m` with delay time `τ`,
+with weighting parameter `A`.
+
+Short-hand for `entropy_renyi(x, SymbolicAmplitudeAwarePermutation(τ = 1, m = 3, A = A), base = base, q = 1)`.
+
+See also: [`SymbolicPermutation`](@ref), [`entropy_renyi`](@ref).
+
+[^Azami2016]: Azami, H., & Escudero, J. (2016). Amplitude-aware permutation entropy:
+    Illustration in spike detection and signal segmentation. Computer methods and programs in biomedicine, 128, 40-51.
+"""
+function entropy_ampperm(x; τ = 1, m = 3, A = 0.5, base = MathConstants.e, lt = isless_rand)
+    est = SymbolicAmplitudeAwarePermutation(m = m, τ = τ, A = A, lt = lt; base)
+    return entropy_renyi(x, est; base = base, q = 1)
+end
diff --git a/src/timescales/timescales.jl b/src/timescales/timescales.jl
index df797d772..9b9a55cb6 100644
--- a/src/timescales/timescales.jl
+++ b/src/timescales/timescales.jl
@@ -1 +1,21 @@
-include("wavelet_overlap.jl")
\ No newline at end of file
+export entropy_wavelet
+
+include("wavelet_overlap.jl")
+
+"""
+    entropy_wavelet(x; wavelet = Wavelets.WT.Daubechies{12}(),
+        base = MathConstants.e)
+
+Estimate (Shannon) wave entropy (Rosso, 2001) to the given `base` using maximal overlap
+discrete wavelet transform (MODWT).
+
+See also: [`WaveletOverlap`](@ref).
+
+[^Rosso2001]:
+    Rosso et al. (2001). Wavelet entropy: a new tool for analysis of short duration
+    brain electrical signals. Journal of neuroscience methods, 105(1), 65-75.
+"""
+function entropy_wavelet(x; wavelet::W = Wavelets.WT.Daubechies{12}(), base = MathConstants.e) where W <:Wavelets.WT.OrthoWaveletClass
+    est = WaveletOverlap(wavelet)
+    entropy_renyi(x, est; base = base, q = 1)
+end

From 653756156d33c1691f55e9521e05316fd44bc87c Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Wed, 7 Sep 2022 14:03:05 +0200
Subject: [PATCH 09/32] Unnecessary capitalization

---
 src/symbolic/spatial_permutation.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/symbolic/spatial_permutation.jl b/src/symbolic/spatial_permutation.jl
index bca936047..605040c61 100644
--- a/src/symbolic/spatial_permutation.jl
+++ b/src/symbolic/spatial_permutation.jl
@@ -4,7 +4,7 @@ export SpatialSymbolicPermutation
     SpatialSymbolicPermutation(stencil, x, periodic = true)
 A symbolic, permutation-based probabilities/entropy estimator for spatiotemporal systems.
 The data are a high-dimensional array `x`, such as 2D [^Ribeiro2012] or 3D [^Schlemmer2018].
-This approach is also known as _Spatiotemporal Permutation Entropy_.
+This approach is also known as _spatiotemporal permutation entropy_.
 `x` is given because we need to know its size for optimization and bound checking.
 
 A _stencil_ defines what local area around each pixel to

From 50aa2651112e7e7666f6f2277b99e273feb9d4a7 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Wed, 7 Sep 2022 14:49:49 +0200
Subject: [PATCH 10/32] Further refactoring

---
 docs/make.jl                                  |  5 +---
 docs/src/index.md                             | 20 ++++++++++-----
 docs/src/probabilities.md                     | 21 +++++++++-------
 docs/src/shannon_entropies.md                 |  9 ++++++-
 docs/style.jl                                 |  1 -
 .../rectangular/TransferOperator.jl           | 18 +++++++++++++
 .../rectangular/VisitationFrequency.jl        | 17 +++++++++++++
 ...rsion_entropy.jl => entropy_dispersion.jl} |  0
 src/symbolic/symbolic.jl                      | 25 +++++++++++++++++++
 9 files changed, 95 insertions(+), 21 deletions(-)
 rename src/dispersion/{dispersion_entropy.jl => entropy_dispersion.jl} (100%)

diff --git a/docs/make.jl b/docs/make.jl
index b3801e406..d9e0f74a6 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -8,7 +8,7 @@ using DelayEmbeddings
 using Documenter
 using DocumenterTools: Themes
 using Entropies
-using PyPlot
+using CairoMakie
 using DynamicalSystems
 using Wavelets
 
@@ -29,7 +29,6 @@ Themes.compile(joinpath(@__DIR__, "juliadynamics-light.scss"), joinpath(@__DIR__
 Themes.compile(joinpath(@__DIR__, "juliadynamics-dark.scss"), joinpath(@__DIR__, "src/assets/themes/documenter-dark.css"))
 
 # %% Build docs
-PyPlot.ioff()
 cd(@__DIR__)
 ENV["JULIA_DEBUG"] = "Documenter"
 
@@ -64,5 +63,3 @@ if CI
         push_preview = true
     )
 end
-PyPlot.close("all")
-PyPlot.ion()
diff --git a/docs/src/index.md b/docs/src/index.md
index 614b932bc..29385b8c7 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -2,10 +2,6 @@
 
 This package provides estimators for probabilities, entropies, and complexity measures for timeseries, nonlinear dynamics and complex systems. It is used in the [CausalityTools.jl](https://github.com/JuliaDynamics/CausalityTools.jl) and [DynamicalSystems.jl](https://github.com/JuliaDynamics/DynamicalSystems.jl) packages.
 
-This package assumes that your data is represented by the `Dataset`-type from [`DelayEmbeddings.jl`](https://github.com/JuliaDynamics/DelayEmbeddings.jl), where each observation is a D-dimensional data point. See the [`DynamicalSystems.jl` documentation](https://juliadynamics.github.io/DynamicalSystems.jl/dev/) for more info. Univariate timeseries given as
-`AbstractVector{<:Real}` also work with some estimators, but are treated differently
-based on which method for probability/entropy estimation is applied.
-
 ## API & terminology
 
 In the literature, the term "entropy" is used (and abused) in multiple contexts. We use the following distinctions.
@@ -25,6 +21,18 @@ The main **API** of this package is thus contained in three functions:
 - [`entropy_tsallis`](@ref), which uses the output of [`probabilities`](@ref), or a set of pre-computed [`Probabilities`](@ref), to calculate Tsallis entropies.
 - Convenience functions for commonly used methods appear throughout the documentation.
 
-These functions dispatch on the probability estimators listed [here](@ref estimators).
+These functions dispatch on the probability estimators listed [here](@ref estimators), and are used behind the scenes by many of the [Shannon entropy](@ref shannon_entropy) convenience methods.
+
+*Note: there are fewer probability estimators than there are Shannon entropy estimators, because some Shannon entropy estimators are indirect, in the sense that they don't explicitly compute probability distributions*.
+
+## Input data
+
+### Temporal (1D) data
+
+In this package, [probability](@ref estimators), [generalized entropy](@ref generalized_entropy) and [Shannon entropy](@ref shannon_entropy) estimators assume that temporal data is represented by the `Dataset`-type from [`DelayEmbeddings.jl`](https://github.com/JuliaDynamics/DelayEmbeddings.jl), where each observation is a D-dimensional data point. See the [`DynamicalSystems.jl` documentation](https://juliadynamics.github.io/DynamicalSystems.jl/dev/) for more info. Univariate timeseries given as
+`AbstractVector{<:Real}` also work with some estimators, but are treated differently
+based on which method for probability/entropy estimation is applied.
+
+### Spatiotemporal (2D and higher) data
 
-*Note: there are fewer probability estimators than there are Shannon entropy estimators, because some Shannon entropy are indirect, in the sense that they don't explicitly compute probability distributions*
+The [`SpatialSymbolicPermutation`](@ref) probability estimator handles probability and entropy computations for arbitrary -dimensional data (e.g. 2D images, 3D images). These data should be provided as `AbstractArray{T, N}` where `N` is the dimension of the data.
diff --git a/docs/src/probabilities.md b/docs/src/probabilities.md
index 3ddceaf4a..ab6db851a 100644
--- a/docs/src/probabilities.md
+++ b/docs/src/probabilities.md
@@ -11,37 +11,40 @@ probabilities!
 ProbabilitiesEstimator
 ```
 
-## Count occurrences (counting)
+## Estimators
+
+### Count occurrences (counting)
 
 ```@docs
 CountOccurrences
 ```
 
-## Permutation (symbolic)
+### Permutation (symbolic)
 
 ```@docs
 SymbolicPermutation
+SpatialSymbolicPermutation
 ```
 
-## Visitation frequency (binning)
+### Visitation frequency (binning)
 
 ```@docs
 VisitationFrequency
 ```
 
-### Specifying binning/boxes
+#### Specifying binning/boxes
 
 ```@docs
 RectangularBinning
 ```
 
-## Transfer operator (binning)
+### Transfer operator (binning)
 
 ```@docs
 TransferOperator
 ```
 
-### Utility methods/types
+#### Utility methods/types
 
 ```@docs
 InvariantMeasure
@@ -49,13 +52,13 @@ invariantmeasure
 transfermatrix
 ```
 
-## Kernel density
+### Kernel density
 
 ```@docs
 NaiveKernel
 ```
 
-### Example
+#### Example
 
 Here, we draw some random points from a 2D normal distribution. Then, we use kernel 
 density estimation to associate a probability to each point `p`, measured by how many 
@@ -76,7 +79,7 @@ ax.zticklabelsvisible = false
 fig
 ```
 
-## Wavelet
+### Wavelet
 
 ```@docs
 WaveletOverlap
diff --git a/docs/src/shannon_entropies.md b/docs/src/shannon_entropies.md
index 81e1b4651..64f7b732b 100644
--- a/docs/src/shannon_entropies.md
+++ b/docs/src/shannon_entropies.md
@@ -2,7 +2,6 @@
 
 Many methods in the literature compute (Shannon) entropy in ways that don't explicitly result in probability distributions, so they can't be used in combination with [`probabilities`](@ref), [`entropy_renyi`](@ref) or [`entropy_tsallis`](@ref). Instead, they appear here as stand-alone functions.
 
-
 ## Nearest neighbors entropy
 
 ```@docs
@@ -62,6 +61,7 @@ fig
 entropy_perm
 entropy_weightedperm
 entropy_ampperm
+entropy_spatialperm
 ```
 
 ### Example
@@ -163,3 +163,10 @@ for a in (ax, ay, az); axislegend(a); end
 for a in (ax, ay); hidexdecorations!(a; grid=false); end
 fig
 ```
+
+## Binning-based entropy
+
+```@docs
+entropy_visitfreq
+entropy_transferoperator
+```
diff --git a/docs/style.jl b/docs/style.jl
index 8d361fffe..6271460ae 100644
--- a/docs/style.jl
+++ b/docs/style.jl
@@ -1,4 +1,3 @@
-using CairoMakie
 # %% Color theme definitions
 struct CyclicContainer <: AbstractVector{String}
     c::Vector{String}
diff --git a/src/binning_based/rectangular/TransferOperator.jl b/src/binning_based/rectangular/TransferOperator.jl
index 8c9a026d9..20291b3af 100644
--- a/src/binning_based/rectangular/TransferOperator.jl
+++ b/src/binning_based/rectangular/TransferOperator.jl
@@ -454,3 +454,21 @@ See also: [`TransferOperator`](@ref).
 function transfermatrix(iv::InvariantMeasure)
     return iv.to.transfermatrix, iv.to.bins
 end
+
+"""
+    entropy_transferoperator(x, binning::RectangularBinning; base = MathConstants.e)
+
+Compute the (Shannon) entropy of `x` using an approximation to the transfer operator over
+the state-space coarse graining produced by the provided `binning` scheme.
+
+Short-hand for `renyi_entropy(x, TransferOperator(binning); base = base, q = 1)`.
+
+See also: [`TransferOperator`](@ref).
+
+[^Diego2019]: Diego, D., Haaga, K. A., & Hannisdal, B. (2019). Transfer entropy computation
+    using the Perron-Frobenius operator. Physical Review E, 99(4), 042212.
+"""
+function entropy_transferoperator(x, b::RectangularBinning)
+    est = TransferOperator(binning)
+    renyi_entropy(x, est; base = base, q = 1)
+end
diff --git a/src/binning_based/rectangular/VisitationFrequency.jl b/src/binning_based/rectangular/VisitationFrequency.jl
index 26fbcfd03..28c6831ea 100644
--- a/src/binning_based/rectangular/VisitationFrequency.jl
+++ b/src/binning_based/rectangular/VisitationFrequency.jl
@@ -1,4 +1,6 @@
 export VisitationFrequency, probabilities
+export entropy_visitfreq
+export entropy_transferoperator
 
 """
     VisitationFrequency(r::RectangularBinning) <: BinningProbabilitiesEstimator
@@ -26,3 +28,18 @@ end
 function probabilities(x::AbstractDataset, est::VisitationFrequency)
     _non0hist(x, est.binning)[1]
 end
+
+"""
+    entropy_visitfreq(x, binning::RectangularBinning; base = MathConstants.e)
+
+Compute the (Shannon) entropy of `x` by counting visitation frequencies over
+the state-space coarse graining produced by the provided `binning` scheme.
+
+Short-hand for `renyi_entropy(x, VisitationFrequency(binning); base = base, q = 1)`.
+
+See also: [`VisitationFrequency`](@ref).
+"""
+function entropy_visitfreq(x, b::RectangularBinning)
+    est = VisitationFrequency(binning)
+    renyi_entropy(x, est; base = base, q = 1)
+end
diff --git a/src/dispersion/dispersion_entropy.jl b/src/dispersion/entropy_dispersion.jl
similarity index 100%
rename from src/dispersion/dispersion_entropy.jl
rename to src/dispersion/entropy_dispersion.jl
diff --git a/src/symbolic/symbolic.jl b/src/symbolic/symbolic.jl
index 3c938f9c4..1adac04c7 100644
--- a/src/symbolic/symbolic.jl
+++ b/src/symbolic/symbolic.jl
@@ -1,6 +1,7 @@
 export entropy_perm
 export entropy_weightedperm
 export entropy_ampperm
+export entropy_spatialperm
 
 include("utils.jl")
 include("SymbolicPermutation.jl")
@@ -59,3 +60,27 @@ function entropy_ampperm(x; τ = 1, m = 3, A = 0.5, base = MathConstants.e, lt =
     est = SymbolicAmplitudeAwarePermutation(m = m, τ = τ, A = A, lt = lt; base)
     return entropy_renyi(x, est; base = base, q = 1)
 end
+
+"""
+    entropy_spatialperm(x, stencil; periodic = false, base = MathConstants.e)
+
+Compute the spatiotemporal permutation entropy of `x`, which is a higher-dimensional
+(e.g. 2D[^Ribeiro2012] or 3D[^Schlemmer2018] array), using the given stencil for symbolizing sub-matrices,
+using circular wrapping around array boundaries if `periodic == true`.
+
+Short-hand for `entropy_renyi(x, SpatialSymbolicPermutation(stencil, x, periodic), base = base, q = 1)`.
+
+See also: [`SpatialSymbolicPermutation`](@ref), [`entropy_renyi`](@ref).
+
+[^Ribeiro2012]:
+    Ribeiro et al. (2012). Complexity-entropy causality plane as a complexity measure
+    for two-dimensional patterns. https://doi.org/10.1371/journal.pone.0040689
+
+[^Schlemmer2018]:
+    Schlemmer et al. (2018). Spatiotemporal Permutation Entropy as a Measure for
+    Complexity of Cardiac Arrhythmia. https://doi.org/10.3389/fphy.2018.00039
+"""
+function entropy_spatialperm(x, stencil; periodic = false, base = MathConstants.e)
+    est = SpatialSymbolicPermutation(stencil, x, periodic)
+    renyi_entropy(x, est; base = base, q = 1)
+end

From 1ea12f8ede2cd9d65e56116ae7a46354fcd50e7f Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Wed, 7 Sep 2022 14:54:53 +0200
Subject: [PATCH 11/32] Missing part of equation

---
 src/tsallis/tsallis.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tsallis/tsallis.jl b/src/tsallis/tsallis.jl
index 7c1d3e701..0d5fb4b27 100644
--- a/src/tsallis/tsallis.jl
+++ b/src/tsallis/tsallis.jl
@@ -6,7 +6,7 @@ export entropy_tsallis
 Compute the Tsallis entropy of `x` (Tsallis, 1998)[^Tsallis1988].
 
 ```math
-S_q(\\bf p) = \\dfrac{k}{q - 1}\\left(1 - \\sum_{p_i \\in \\bf p} \\right)
+S_q(\\bf p) = \\dfrac{k}{q - 1}\\left(1 - \\sum_{p_i \\in \\bf p} p_i^q\\right)
 ```
 
 [^Tsallis1988]: Tsallis, C. (1988). Possible generalization of Boltzmann-Gibbs statistics. Journal of statistical physics, 52(1), 479-487.

From 873e7c6dbd0805e9933ae101652de5c41428c1fc Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Wed, 7 Sep 2022 15:04:37 +0200
Subject: [PATCH 12/32] Use new name

---
 test/runtests.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index fdd4009a2..dcacd812c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -341,7 +341,7 @@ end
         hist = Entropies.dispersion_histogram(dispersion_patterns, length(x), m, τ)
         @test sum(hist) ≈ 1.0
 
-        de = dispersion_entropy(x, s, m = 4, τ = 1)
+        de = entropy_dispersion(x, s, m = 4, τ = 1)
         @test typeof(de) <: Real
         @test de >= 0.0
     end

From 256b4e1f5e68d2d5c3b77fe042069e82928ad2c9 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Thu, 8 Sep 2022 10:15:56 +0200
Subject: [PATCH 13/32] Add deprecations

---
 src/deprecated/genentropy.jl    | 94 +++++++++++++++++++++++++++++++++
 src/deprecated/symbolization.jl | 42 +++++++++++++++
 src/deprecations.jl             |  6 ++-
 3 files changed, 141 insertions(+), 1 deletion(-)
 create mode 100644 src/deprecated/genentropy.jl
 create mode 100644 src/deprecated/symbolization.jl

diff --git a/src/deprecated/genentropy.jl b/src/deprecated/genentropy.jl
new file mode 100644
index 000000000..345347cd3
--- /dev/null
+++ b/src/deprecated/genentropy.jl
@@ -0,0 +1,94 @@
+
+"""
+    genentropy(p::Probabilities; q = 1.0, base = MathConstants.e)
+
+Compute the generalized order-`q` entropy of some probabilities
+returned by the [`probabilities`](@ref) function. Alternatively, compute entropy
+from pre-computed `Probabilities`.
+
+    genentropy(x::Array_or_Dataset, est; q = 1.0, base)
+
+A convenience syntax, which calls first `probabilities(x, est)`
+and then calculates the entropy of the result (and thus `est` can be a
+`ProbabilitiesEstimator` or simply `ε::Real`).
+
+## Description
+
+Let ``p`` be an array of probabilities (summing to 1). Then the generalized (Rényi) entropy is
+
+```math
+H_q(p) = \\frac{1}{1-q} \\log \\left(\\sum_i p[i]^q\\right)
+```
+
+and generalizes other known entropies,
+like e.g. the information entropy
+(``q = 1``, see [^Shannon1948]), the maximum entropy (``q=0``,
+also known as Hartley entropy), or the correlation entropy
+(``q = 2``, also known as collision entropy).
+
+[^Rényi1960]: A. Rényi, *Proceedings of the fourth Berkeley Symposium on Mathematics,
+    Statistics and Probability*, pp 547 (1960)
+[^Shannon1948]: C. E. Shannon, Bell Systems Technical Journal **27**, pp 379 (1948)
+"""
+function genentropy end
+
+function genentropy(prob::Probabilities; q = 1.0, α = nothing, base = MathConstants.e)
+    Base.depwarn("`genentropy(x)` is deprecated, use `renyientropy(x)` instead.", :genentropy)
+
+    if α ≠ nothing
+        @warn "Keyword `α` is deprecated in favor of `q`."
+        q = α
+    end
+    q < 0 && throw(ArgumentError("Order of generalized entropy must be ≥ 0."))
+    haszero = any(iszero, prob)
+    p = if haszero
+        i0 = findall(iszero, prob.p)
+        # We copy because if someone initialized Probabilities with 0s, I would guess
+        # they would want to index the zeros as well. Not so costly anyways.
+        deleteat!(copy(prob.p), i0)
+    else
+        prob.p
+    end
+
+    if q ≈ 0
+        return log(base, length(p)) #Hartley entropy, max-entropy
+    elseif q ≈ 1
+        return -sum( x*log(base, x) for x in p ) #Shannon entropy
+    elseif isinf(q)
+        return -log(base, maximum(p)) #Min entropy
+    else
+        return (1/(1-q))*log(base, sum(x^q for x in p) ) #Renyi q entropy
+    end
+end
+
+function genentropy(::AbstractArray{<:Real})
+    Base.depwarn("`genentropy(x)` is deprecated, use `renyientropy(x)` instead.", :genentropy)
+    error("For single-argument input, do `genentropy(Probabilities(x))` instead.")
+end
+
+function genentropy(x::Array_or_Dataset, est; q = 1.0, α = nothing, base = MathConstants.e)
+    Base.depwarn("`genentropy(x, est)` is deprecated, use `renyientropy(x, est)` instead.", :genentropy)
+    if α ≠ nothing
+        @warn "Keyword `α` is deprecated in favor of `q`."
+        q = α
+    end
+    p = probabilities(x, est)
+    genentropy(p; q = q, base = base)
+end
+
+"""
+    genentropy!(p, x, est::ProbabilitiesEstimator; q = 1.0, base = MathConstants.e)
+Similarly with `probabilities!` this is an in-place version of `genentropy` that allows
+pre-allocation of temporarily used containers.
+Only works for certain estimators. See for example [`SymbolicPermutation`](@ref).
+"""
+function genentropy!(p, x, est; q = 1.0, α = nothing, base = MathConstants.e)
+    Base.depwarn("`genentropy(p, x, est)` is deprecated, use `renyientropy(p, x, est)` instead.", :genentropy)
+
+    if α ≠ nothing
+        @warn "Keyword `α` is deprecated in favor of `q`."
+        q = α
+    end
+    probabilities!(p, x, est)
+    genentropy(p; q = q, base = base)
+end
diff --git a/src/deprecated/symbolization.jl b/src/deprecated/symbolization.jl
new file mode 100644
index 000000000..ba04c3f65
--- /dev/null
+++ b/src/deprecated/symbolization.jl
@@ -0,0 +1,42 @@
+function symbolize(x::AbstractDataset{m, T}, est::PermutationProbabilityEstimator) where {m, T}
+    Base.depwarn("`symbolize(x, est::$P)` is deprecated, use `symbolize(x, scheme::OrdinalPattern)` instead.", :symbolize)
+
+    m >= 2 || error("Data must be at least 2-dimensional to symbolize. If data is a univariate time series, embed it using `genembed` first.")
+    s = zeros(Int, length(x))
+    symbolize!(s, x, est)
+    return s
+end
+
+function symbolize(x::AbstractVector{T}, est::P) where {T, P <: PermutationProbabilityEstimator}
+    Base.depwarn("`symbolize(x, est::$P)` is deprecated, use `symbolize(x, scheme::OrdinalPattern)` instead.", :symbolize)
+
+    τs = tuple([est.τ*i for i = 0:est.m-1]...)
+    x_emb = genembed(x, τs)
+
+    s = zeros(Int, length(x_emb))
+    symbolize!(s, x_emb, est)
+    return s
+end
+
+function symbolize!(s::AbstractVector{Int}, x::AbstractDataset{m, T}, est::SymbolicPermutation) where {m, T}
+    Base.depwarn("`symbolize!(s, x, est::SymbolicPermutation)` is deprecated, use `symbolize!(s, x, scheme::OrdinalPattern)` instead.", :symbolize)
+
+    @assert length(s) == length(x)
+    #=
+    Loop over embedding vectors `E[i]`, find the indices `p_i` that sort each `E[i]`,
+    then get the corresponding integers `k_i` that generated the
+    permutations `p_i`. Those integers are the symbols for the embedding vectors
+    `E[i]`.
+    =#
+    sp = zeros(Int, m) # pre-allocate a single symbol vector that can be overwritten.
+    fill_symbolvector!(s, x, sp, m, lt = est.lt)
+
+    return s
+end
+
+function symbolize!(s::AbstractVector{Int}, x::AbstractVector{T}, est::SymbolicPermutation) where T
+    Base.depwarn("`symbolize!(s, x, est::SymbolicPermutation)` is deprecated, use `symbolize!(s, x, scheme::OrdinalPattern)` instead.", :symbolize)
+    τs = tuple([est.τ*i for i = 0:est.m-1]...)
+    x_emb = genembed(x, τs)
+    symbolize!(s, x_emb, est)
+end
diff --git a/src/deprecations.jl b/src/deprecations.jl
index 947f8ccd6..7801dbf03 100644
--- a/src/deprecations.jl
+++ b/src/deprecations.jl
@@ -1 +1,5 @@
-@deprecate TimeScaleMODWT WaveletOverlap
\ No newline at end of file
+include("deprecated/genentropy.jl")
+include("deprecated/symbolization.jl")
+
+@deprecate TimeScaleMODWT WaveletOverlap
+@deprecate genentropy renyientropy

From 46c86dbc918876d952652532acc3f68c02a2d9bd Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Thu, 8 Sep 2022 10:16:09 +0200
Subject: [PATCH 14/32] Add note for single-symbol encoding in docstring

---
 src/symbolization/OrdinalPattern.jl | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/symbolization/OrdinalPattern.jl b/src/symbolization/OrdinalPattern.jl
index 4e5bfd713..b0e6958a9 100644
--- a/src/symbolization/OrdinalPattern.jl
+++ b/src/symbolization/OrdinalPattern.jl
@@ -22,6 +22,12 @@ reconstruction using embedding dimension `m` and lag `τ`. The resulting
 If using the in-place variant with univariate input, `s` must obey
 `length(s) == length(x)-(est.m-1)*est.τ`.
 
+!!! note
+    `OrdinalPattern` is intended for symbolizing *time series*. If providing a short vector,
+    say `x = [2, 5, 2, 1, 3, 4]`, then `symbolize(x, OrdinalPattern(m = 2, τ = 1)` will
+    first embed `x`, then encode/symbolize each resulting *state vector*, not the original
+    input. For symbolizing a permutation pattern, use [`encode_motif`](@ref).
+
 ## Examples
 
 ```julia

From e23dbccb97b9a4b2c24e1ed99e68a5b656fb76d3 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Thu, 8 Sep 2022 10:16:09 +0200
Subject: [PATCH 15/32] Add note for single-symbol encoding in docstring

---
 src/symbolization/OrdinalPattern.jl | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/symbolization/OrdinalPattern.jl b/src/symbolization/OrdinalPattern.jl
index 4e5bfd713..b78438e99 100644
--- a/src/symbolization/OrdinalPattern.jl
+++ b/src/symbolization/OrdinalPattern.jl
@@ -3,8 +3,15 @@ export OrdinalPattern
 """
     OrdinalPattern(m = 3, τ = 1; lt = est.lt)
 
-A symbolization scheme that converts the input data to ordinal patterns, which are then
-encoded to integers using [`encode_motif`](@ref).
+A symbolization scheme that converts the input time series to ordinal patterns, which are
+then encoded to integers using [`encode_motif`](@ref).
+
+!!! note
+    `OrdinalPattern` is intended for symbolizing *time series*. If providing a short vector,
+    say `x = [2, 5, 2, 1, 3, 4]`, then `symbolize(x, OrdinalPattern(m = 2, τ = 1)` will
+    first embed `x`, then encode/symbolize each resulting *state vector*, not the original
+    input. For symbolizing a single vector, use `sortperm` on it and use
+    [`encode_motif`](@ref) on the resulting permutation indices.
 
 # Usage
 
@@ -12,13 +19,15 @@ encoded to integers using [`encode_motif`](@ref).
     symbolize!(s, x, scheme::OrdinalPattern) → Vector{Int}
 
 If applied to an `m`-dimensional [`Dataset`](@ref) `x`, then `m` and `τ` are ignored,
-and each `m`-dimensional `xᵢ ∈ x` is directly encoded using [`encode_motif`](@ref).
+and `m`-dimensional permutation patterns are obtained directly for each
+`xᵢ ∈ x`. Permutation patterns are then encoded as integers using [`encode_motif`](@ref).
 Optionally, symbols can be written directly into a pre-allocated integer vector `s`, where
 `length(s) == length(x)` using `symbolize!`.
 
 If applied to a univariate vector `x`, then `x` is first converted to a delay
-reconstruction using embedding dimension `m` and lag `τ`. The resulting
-`m`-dimensional `xᵢ ∈ x` are then encoded using [`encode_motif`](@ref).
+reconstruction using embedding dimension `m` and lag `τ`. Permutation patterns are then
+computed for each of the resulting `m`-dimensional `xᵢ ∈ x`, and each permutation
+is then encoded as an integer using [`encode_motif`](@ref).
 If using the in-place variant with univariate input, `s` must obey
 `length(s) == length(x)-(est.m-1)*est.τ`.
 

From 25fd2313bfbc35ef5f7061a38dac9f9b82b122de Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Thu, 8 Sep 2022 10:29:30 +0200
Subject: [PATCH 16/32] Remove redundant note

---
 src/symbolization/OrdinalPattern.jl | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/symbolization/OrdinalPattern.jl b/src/symbolization/OrdinalPattern.jl
index a104718c8..b78438e99 100644
--- a/src/symbolization/OrdinalPattern.jl
+++ b/src/symbolization/OrdinalPattern.jl
@@ -31,12 +31,6 @@ is then encoded as an integer using [`encode_motif`](@ref).
 If using the in-place variant with univariate input, `s` must obey
 `length(s) == length(x)-(est.m-1)*est.τ`.
 
-!!! note
-    `OrdinalPattern` is intended for symbolizing *time series*. If providing a short vector,
-    say `x = [2, 5, 2, 1, 3, 4]`, then `symbolize(x, OrdinalPattern(m = 2, τ = 1)` will
-    first embed `x`, then encode/symbolize each resulting *state vector*, not the original
-    input. For symbolizing a permutation pattern, use [`encode_motif`](@ref).
-
 ## Examples
 
 ```julia

From 73f0a58031a339011ab021bcb4bf9102597e3688 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Thu, 8 Sep 2022 10:52:11 +0200
Subject: [PATCH 17/32] Use @deprecate for genentropy

---
 src/deprecated/genentropy.jl | 94 ------------------------------------
 src/deprecations.jl          |  3 +-
 2 files changed, 1 insertion(+), 96 deletions(-)
 delete mode 100644 src/deprecated/genentropy.jl

diff --git a/src/deprecated/genentropy.jl b/src/deprecated/genentropy.jl
deleted file mode 100644
index 345347cd3..000000000
--- a/src/deprecated/genentropy.jl
+++ /dev/null
@@ -1,94 +0,0 @@
-
-"""
-    genentropy(p::Probabilities; q = 1.0, base = MathConstants.e)
-
-Compute the generalized order-`q` entropy of some probabilities
-returned by the [`probabilities`](@ref) function. Alternatively, compute entropy
-from pre-computed `Probabilities`.
-
-    genentropy(x::Array_or_Dataset, est; q = 1.0, base)
-
-A convenience syntax, which calls first `probabilities(x, est)`
-and then calculates the entropy of the result (and thus `est` can be a
-`ProbabilitiesEstimator` or simply `ε::Real`).
-
-## Description
-
-Let ``p`` be an array of probabilities (summing to 1). Then the generalized (Rényi) entropy is
-
-```math
-H_q(p) = \\frac{1}{1-q} \\log \\left(\\sum_i p[i]^q\\right)
-```
-
-and generalizes other known entropies,
-like e.g. the information entropy
-(``q = 1``, see [^Shannon1948]), the maximum entropy (``q=0``,
-also known as Hartley entropy), or the correlation entropy
-(``q = 2``, also known as collision entropy).
-
-[^Rényi1960]: A. Rényi, *Proceedings of the fourth Berkeley Symposium on Mathematics,
-    Statistics and Probability*, pp 547 (1960)
-[^Shannon1948]: C. E. Shannon, Bell Systems Technical Journal **27**, pp 379 (1948)
-"""
-function genentropy end
-
-function genentropy(prob::Probabilities; q = 1.0, α = nothing, base = MathConstants.e)
-    Base.depwarn("`genentropy(x)` is deprecated, use `renyientropy(x)` instead.", :genentropy)
-
-    if α ≠ nothing
-        @warn "Keyword `α` is deprecated in favor of `q`."
-        q = α
-    end
-    q < 0 && throw(ArgumentError("Order of generalized entropy must be ≥ 0."))
-    haszero = any(iszero, prob)
-    p = if haszero
-        i0 = findall(iszero, prob.p)
-        # We copy because if someone initialized Probabilities with 0s, I would guess
-        # they would want to index the zeros as well. Not so costly anyways.
-        deleteat!(copy(prob.p), i0)
-    else
-        prob.p
-    end
-
-    if q ≈ 0
-        return log(base, length(p)) #Hartley entropy, max-entropy
-    elseif q ≈ 1
-        return -sum( x*log(base, x) for x in p ) #Shannon entropy
-    elseif isinf(q)
-        return -log(base, maximum(p)) #Min entropy
-    else
-        return (1/(1-q))*log(base, sum(x^q for x in p) ) #Renyi q entropy
-    end
-end
-
-function genentropy(::AbstractArray{<:Real})
-    Base.depwarn("`genentropy(x)` is deprecated, use `renyientropy(x)` instead.", :genentropy)
-    error("For single-argument input, do `genentropy(Probabilities(x))` instead.")
-end
-
-function genentropy(x::Array_or_Dataset, est; q = 1.0, α = nothing, base = MathConstants.e)
-    Base.depwarn("`genentropy(x, est)` is deprecated, use `renyientropy(x, est)` instead.", :genentropy)
-    if α ≠ nothing
-        @warn "Keyword `α` is deprecated in favor of `q`."
-        q = α
-    end
-    p = probabilities(x, est)
-    genentropy(p; q = q, base = base)
-end
-
-"""
-    genentropy!(p, x, est::ProbabilitiesEstimator; q = 1.0, base = MathConstants.e)
-Similarly with `probabilities!` this is an in-place version of `genentropy` that allows
-pre-allocation of temporarily used containers.
-Only works for certain estimators. See for example [`SymbolicPermutation`](@ref).
-"""
-function genentropy!(p, x, est; q = 1.0, α = nothing, base = MathConstants.e)
-    Base.depwarn("`genentropy(p, x, est)` is deprecated, use `renyientropy(p, x, est)` instead.", :genentropy)
-
-    if α ≠ nothing
-        @warn "Keyword `α` is deprecated in favor of `q`."
-        q = α
-    end
-    probabilities!(p, x, est)
-    genentropy(p; q = q, base = base)
-end
diff --git a/src/deprecations.jl b/src/deprecations.jl
index 7801dbf03..eacb8b325 100644
--- a/src/deprecations.jl
+++ b/src/deprecations.jl
@@ -1,5 +1,4 @@
-include("deprecated/genentropy.jl")
 include("deprecated/symbolization.jl")
 
 @deprecate TimeScaleMODWT WaveletOverlap
-@deprecate genentropy renyientropy
+@deprecate genentropy entropy_renyi

From ddeb1db94b3cf55e717aec675fe5344c5abdf46a Mon Sep 17 00:00:00 2001
From: Datseris <datseris.george@gmail.com>
Date: Fri, 9 Sep 2022 15:34:31 +0100
Subject: [PATCH 18/32] Add (preliminary) update message

---
 Project.toml     |  2 ++
 src/Entropies.jl | 64 +++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 54 insertions(+), 12 deletions(-)

diff --git a/Project.toml b/Project.toml
index 838367f99..fbb90cefb 100644
--- a/Project.toml
+++ b/Project.toml
@@ -11,6 +11,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Neighborhood = "645ca80c-8b79-4109-87ea-e1f58159d116"
 QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Scratch = "6c6a2e73-6563-6170-7368-637461726353"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
@@ -23,6 +24,7 @@ DelayEmbeddings = "2"
 Distances = "0.9, 0.10"
 Neighborhood = "0.1, 0.2"
 QuadGK = "2"
+Scracth = "1"
 SpecialFunctions = "0.10, 1.0, 2"
 StaticArrays = "0.12, 1.0"
 StatsBase = "0.33"
diff --git a/src/Entropies.jl b/src/Entropies.jl
index 7eb5811eb..d6950cd92 100644
--- a/src/Entropies.jl
+++ b/src/Entropies.jl
@@ -1,14 +1,54 @@
+"""
+A Julia language package that provides estimators for probabilities, entropies,
+and complexity measures for timeseries, nonlinear dynamics and complex systems.
+It can be used as standalone or part of several projects in the JuliaDynamics organization,
+such as [DynamicalSystems.jl](https://juliadynamics.github.io/DynamicalSystems.jl/dev/)
+or [CausalityTools.jl](https://juliadynamics.github.io/CausalityTools.jl/dev/).
+"""
 module Entropies
-    include("core.jl")
-    include("symbolization/symbolize.jl")
-    include("histogram_estimation.jl")
-    include("counting_based/CountOccurrences.jl")
-    include("symbolic/symbolic.jl")
-    include("binning_based/rectangular/rectangular_estimators.jl")
-    include("kerneldensity/kerneldensity.jl")
-    include("timescales/timescales.jl")
-    include("nearest_neighbors/nearest_neighbors.jl")
-    include("dispersion/entropy_dispersion.jl")
-    include("tsallis/tsallis.jl")
-    include("deprecations.jl")
+
+include("core.jl")
+include("symbolization/symbolize.jl")
+include("histogram_estimation.jl")
+include("counting_based/CountOccurrences.jl")
+include("symbolic/symbolic.jl")
+include("binning_based/rectangular/rectangular_estimators.jl")
+include("kerneldensity/kerneldensity.jl")
+include("timescales/timescales.jl")
+include("nearest_neighbors/nearest_neighbors.jl")
+include("dispersion/entropy_dispersion.jl")
+include("tsallis/tsallis.jl")
+include("deprecations.jl")
+
+
+# Update messages:
+using Scratch
+display_update = true
+version_number = "1.3.0"
+update_name = "update_v$(version_number)"
+update_message = """
+\nUpdate message: Entropies v$(version_number)\n
+- An overall overhaul of the documentation and API of Entropies has been performed.
+  Now `genentropy` is just `entropy_renyi`. The documentation further clarifies
+  the difference between calculating probabilities and entropies.
+- A huge amount of new content has been added to Entropies, which is best seen
+  by visiting the changelog or the new documentation.
+  E.g., Tsallis entropy, spatial permutation entropy, dispersion entropy, and many more.
+- New exported API function: `symbolize`.
+- New constructors for symbolizing: `OrdinalPattern, GaussianSymbolization`.
+"""
+
+if display_update
+    # Get scratch space for this package
+    versions_dir = @get_scratch!("versions")
+    if !isfile(joinpath(versions_dir, update_name))
+        printstyled(
+            stdout,
+            update_message;
+            color = :light_magenta,
+        )
+        touch(joinpath(versions_dir, update_name))
+    end
+end
+
 end

From bf07639d9f779a4641e55e3422f0471e52978511 Mon Sep 17 00:00:00 2001
From: Datseris <datseris.george@gmail.com>
Date: Fri, 9 Sep 2022 17:11:14 +0100
Subject: [PATCH 19/32] [wip] reusable docs

---
 docs/make.jl | 20 +++++++-------------
 docs/toc.jl  |  8 ++++++++
 2 files changed, 15 insertions(+), 13 deletions(-)
 create mode 100644 docs/toc.jl

diff --git a/docs/make.jl b/docs/make.jl
index d9e0f74a6..fefb1c49e 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,13 +1,12 @@
+using Entropies # using before activation of environment ensures dev'ed version
 cd(@__DIR__)
 using Pkg
 CI = get(ENV, "CI", nothing) == "true" || get(ENV, "GITHUB_TOKEN", nothing) !== nothing
-CI && Pkg.activate(@__DIR__)
+Pkg.activate(@__DIR__)
 CI && Pkg.instantiate()
-CI && (ENV["GKSwstype"] = "100")
 using DelayEmbeddings
 using Documenter
 using DocumenterTools: Themes
-using Entropies
 using CairoMakie
 using DynamicalSystems
 using Wavelets
@@ -16,7 +15,10 @@ using Wavelets
 # download the themes
 using DocumenterTools: Themes
 for file in ("juliadynamics-lightdefs.scss", "juliadynamics-darkdefs.scss", "juliadynamics-style.scss")
-    download("https://raw.githubusercontent.com/JuliaDynamics/doctheme/master/$file", joinpath(@__DIR__, file))
+    filepath = joinpath(@__DIR__, file)
+    if !isfile(filepath)
+        download("https://raw.githubusercontent.com/JuliaDynamics/doctheme/master/$file", joinpath(@__DIR__, file))
+    end
 end
 # create the themes
 for w in ("light", "dark")
@@ -29,17 +31,9 @@ Themes.compile(joinpath(@__DIR__, "juliadynamics-light.scss"), joinpath(@__DIR__
 Themes.compile(joinpath(@__DIR__, "juliadynamics-dark.scss"), joinpath(@__DIR__, "src/assets/themes/documenter-dark.css"))
 
 # %% Build docs
-cd(@__DIR__)
 ENV["JULIA_DEBUG"] = "Documenter"
 
-PAGES = [
-    "Entropies.jl" => "index.md",
-    "Probabilities" => "probabilities.md",
-    "Generalized entropy" => "generalized_entropies.md",
-    "Shannon entropy" => "shannon_entropies.md",
-    "Complexity measures" => "complexity_measures.md",
-    "Utility methods" => "utils.md",
-]
+PAGES = include("toc.jl")
 
 include("style.jl")
 
diff --git a/docs/toc.jl b/docs/toc.jl
new file mode 100644
index 000000000..75e369786
--- /dev/null
+++ b/docs/toc.jl
@@ -0,0 +1,8 @@
+ENTROPIES_PAGES = [
+    "Entropies.jl" => "index.md",
+    "Probabilities" => "probabilities.md",
+    "Generalized entropy" => "generalized_entropies.md",
+    "Shannon entropy" => "shannon_entropies.md",
+    "Complexity measures" => "complexity_measures.md",
+    "Utility methods" => "utils.md",
+]
\ No newline at end of file

From bb9019b593594f5c86a068182df29f0ebac5cedd Mon Sep 17 00:00:00 2001
From: Datseris <datseris.george@gmail.com>
Date: Sat, 10 Sep 2022 14:08:55 +0100
Subject: [PATCH 20/32] re-write intro page

---
 Project.toml                      |  2 +-
 docs/make.jl                      |  2 +-
 docs/src/generalized_entropies.md |  7 +++-
 docs/src/index.md                 | 55 +++++++++++++++++++------------
 docs/src/probabilities.md         | 30 ++++++++---------
 src/Entropies.jl                  |  2 +-
 src/core.jl                       | 17 +++++-----
 7 files changed, 66 insertions(+), 49 deletions(-)

diff --git a/Project.toml b/Project.toml
index fbb90cefb..14eea5845 100644
--- a/Project.toml
+++ b/Project.toml
@@ -24,7 +24,7 @@ DelayEmbeddings = "2"
 Distances = "0.9, 0.10"
 Neighborhood = "0.1, 0.2"
 QuadGK = "2"
-Scracth = "1"
+Scratch = "1"
 SpecialFunctions = "0.10, 1.0, 2"
 StaticArrays = "0.12, 1.0"
 StatsBase = "0.33"
diff --git a/docs/make.jl b/docs/make.jl
index fefb1c49e..3958ea86b 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -8,8 +8,8 @@ using DelayEmbeddings
 using Documenter
 using DocumenterTools: Themes
 using CairoMakie
+using Entropies.Wavelets
 using DynamicalSystems
-using Wavelets
 
 # %% JuliaDynamics theme.
 # download the themes
diff --git a/docs/src/generalized_entropies.md b/docs/src/generalized_entropies.md
index efe841f45..f70cc3b79 100644
--- a/docs/src/generalized_entropies.md
+++ b/docs/src/generalized_entropies.md
@@ -1,4 +1,4 @@
-# [Generalized entropies](@id generalized_entropies)
+# Generalized entropies
 
 Computing entropy boils down to one thing: first estimating a probability distribution, then applying one of the generalized entropy formulas ([`entropy_renyi`](@ref) or [`entropy_tsallis`](@ref)) to these distributions. Thus, any of the implemented [probabilities estimators](@ref estimators) can be used to compute generalized entropies.
 
@@ -13,3 +13,8 @@ Entropies.entropy_renyi
 ```@docs
 Entropies.entropy_tsallis
 ```
+
+## Shannon entropy (convenience)
+```@docs
+entropy_shannon
+```
diff --git a/docs/src/index.md b/docs/src/index.md
index 29385b8c7..bd60b78ec 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1,38 +1,51 @@
 # Entropies.jl
-
-This package provides estimators for probabilities, entropies, and complexity measures for timeseries, nonlinear dynamics and complex systems. It is used in the [CausalityTools.jl](https://github.com/JuliaDynamics/CausalityTools.jl) and [DynamicalSystems.jl](https://github.com/JuliaDynamics/DynamicalSystems.jl) packages.
+```@docs
+Entropies
+```
 
 ## API & terminology
 
-In the literature, the term "entropy" is used (and abused) in multiple contexts. We use the following distinctions.
+!!! note
+    The documentation here follows (loosely) chapter 5 of
+    [Nonlinear Dynamics](https://link.springer.com/book/10.1007/978-3-030-91032-7),
+    Datseris & Parlitz, Springer 2022.
 
-- [Generalized Rényi and Tsallis entropies](@ref generalized_entropies) are theoretically well-founded concepts that are functions of *probability distributions*.
 
-- [Shannon entropy](@ref shannon_entropies) is a special case of Rényi and Tsallis entropies. We provide convenience functions for most common Shannon entropy estimators.
+In the literature, the term "entropy" is used (and abused) in multiple contexts.
+The API of Entropies.jl and documentation aim to clarify some aspects and
+provide a simple way to obtain probabilities, entropies, or other complexity measures.
 
-- [*Probability estimation*](@ref estimators) is a separate but required step to compute entropies. We provide a range of probability estimators. These estimators can be used in isolation for estimating probability distributions, or for computing generalized Rényi and Tsallis entropies.
+### Probabilities
+Entropies and other complexity measures are computed based on _probability distributions_.
+These are obtained from [Input data](@ref) by a plethora of different ways.
+The central API function that returns a probability distribution (actual, just a vector of probabilities) is [`probabilities`](@ref), which takes in a subtype of [`ProbabilityEstimator`](@ref) to specify how the probabilities are computed.
+All estimators available in Entropies.jl can be found in the [estimators page](@ref estimators).
 
-- "Entropy-like" complexity measures, which strictly speaking don't compute entropies, and may or may not explicitly compute probability distributions, appear in the [Complexity measures](@ref) section.
+### Entropies
+Entropy is an established concept in statistics, information theory, and nonlinear dynamics. However it is also an umbrella term that may mean several computationally different quantities.
 
-The main **API** of this package is thus contained in three functions:
+[Generalized entropies](@ref) are theoretically well-founded and in Entropies.jl we have the
+- Rényi entropy [`entropy_renyi`](@ref).
+- Tsallis entropy [`entropy_tsallis`](@ref).
+- Shannon entropy [`entropy_shannon`](@ref), which is just a subcase of either of the above two.
 
-- [`probabilities`](@ref), which computes probability distributions of given datasets.
-- [`entropy_renyi`](@ref), which uses the output of [`probabilities`](@ref), or a set of pre-computed [`Probabilities`](@ref), to calculate entropies.
-- [`entropy_tsallis`](@ref), which uses the output of [`probabilities`](@ref), or a set of pre-computed [`Probabilities`](@ref), to calculate Tsallis entropies.
-- Convenience functions for commonly used methods appear throughout the documentation.
+Computing such an entropy boils down to two simple steps: first estimating a probability distribution, and then applying one of the generalized entropy formulas to the distributions.
+Thus, any of the implemented [probabilities estimators](@ref estimators) can be used to compute generalized entropies.
 
-These functions dispatch on the probability estimators listed [here](@ref estimators), and are used behind the scenes by many of the [Shannon entropy](@ref shannon_entropy) convenience methods.
 
-*Note: there are fewer probability estimators than there are Shannon entropy estimators, because some Shannon entropy estimators are indirect, in the sense that they don't explicitly compute probability distributions*.
+!!! tip "There aren't many entropies, really."
+    A crucial thing to clarify, is that many quantities that are named as entropies (e.g., permutation entropy ([`entropy_permutation`](@ref)), the wavelet entropy [`entropy_wavelet`](@ref), etc.), are _not really new entropies_. They are in fact new probability estimators. They simply devise a new way to calculate probabilities from data, and then plug those probabilities into formal entropy formulas such as the Shannon entropy. While in Entropies.jl we provide convenience functions like [`entropy_wavelet`](@ref), they really aren't anything more than 3-lines-of-code wrappers that call [`entropy_shannon`](@ref) with the appropriate [`ProbabilityEstimator`](@ref).
 
-## Input data
+    There are only a few exceptions to this rule, which are quantities that are able to compute Shannon entropies via alternate means, without explicitly computing some probability distributions, such as [TODO ADD EXAMPLE].
 
-### Temporal (1D) data
 
-In this package, [probability](@ref estimators), [generalized entropy](@ref generalized_entropy) and [Shannon entropy](@ref shannon_entropy) estimators assume that temporal data is represented by the `Dataset`-type from [`DelayEmbeddings.jl`](https://github.com/JuliaDynamics/DelayEmbeddings.jl), where each observation is a D-dimensional data point. See the [`DynamicalSystems.jl` documentation](https://juliadynamics.github.io/DynamicalSystems.jl/dev/) for more info. Univariate timeseries given as
-`AbstractVector{<:Real}` also work with some estimators, but are treated differently
-based on which method for probability/entropy estimation is applied.
+### Complexity measures
+Other complexity measures, which strictly speaking don't compute entropies, and may or may not explicitly compute probability distributions, appear in the [Complexity measures](@ref) section.
 
-### Spatiotemporal (2D and higher) data
 
-The [`SpatialSymbolicPermutation`](@ref) probability estimator handles probability and entropy computations for arbitrary -dimensional data (e.g. 2D images, 3D images). These data should be provided as `AbstractArray{T, N}` where `N` is the dimension of the data.
+## Input data
+The input data type typically depend on the probability estimator chosen. In general though, the standard DynamicalSystems.jl approach is taken and as such we have three types of input data:
+
+- _Timeseries_, which are `AbstractVector{<:Real}`, used in e.g. with [`WaveletOverlap`](@ref).
+- _Multi-dimensional timeseries, or datasets, or state space sets_, which are `Dataset`, used e.g. with [`NaiveKernel`](@ref).
+- _Spatial data_, which are higher dimensional standard `Array`, used e.g. with  [`SpatialSymbolicPermutation`](@ref).
diff --git a/docs/src/probabilities.md b/docs/src/probabilities.md
index ab6db851a..5af42cf91 100644
--- a/docs/src/probabilities.md
+++ b/docs/src/probabilities.md
@@ -5,46 +5,44 @@ For categorical or integer-valued data, probabilities can be estimated by direct
 More advanced estimators computing probabilities by first either discretizing, symbolizing or transforming the data in a way that quantifies some useful properties about the underlying data (e.g. visitation frequencies, wavelet energies, or permutation patterns), from which probability distributions can be estimated. Use `probabilities(x::Array_or_Dataset, est::ProbabilitiesEstimator)` in combination with any of the estimators listed below.
 
 ```@docs
-Probabilities
 probabilities
 probabilities!
+Probabilities
 ProbabilitiesEstimator
 ```
 
-## Estimators
-
-### Count occurrences (counting)
+## Count occurrences (counting)
 
 ```@docs
 CountOccurrences
 ```
 
-### Permutation (symbolic)
+## Permutation (symbolic)
 
 ```@docs
 SymbolicPermutation
 SpatialSymbolicPermutation
 ```
 
-### Visitation frequency (binning)
+## Visitation frequency (binning)
 
 ```@docs
 VisitationFrequency
 ```
 
-#### Specifying binning/boxes
+### Specifying binning/boxes
 
 ```@docs
 RectangularBinning
 ```
 
-### Transfer operator (binning)
+## Transfer operator (binning)
 
 ```@docs
 TransferOperator
 ```
 
-#### Utility methods/types
+### Utility methods/types
 
 ```@docs
 InvariantMeasure
@@ -52,18 +50,18 @@ invariantmeasure
 transfermatrix
 ```
 
-### Kernel density
+## Kernel density
 
 ```@docs
 NaiveKernel
 ```
 
-#### Example
+## Example
 
-Here, we draw some random points from a 2D normal distribution. Then, we use kernel 
-density estimation to associate a probability to each point `p`, measured by how many 
-points are within radius `1.5` of `p`. Plotting the actual points, along with their 
-associated probabilities estimated by the KDE procedure, we get the following surface 
+Here, we draw some random points from a 2D normal distribution. Then, we use kernel
+density estimation to associate a probability to each point `p`, measured by how many
+points are within radius `1.5` of `p`. Plotting the actual points, along with their
+associated probabilities estimated by the KDE procedure, we get the following surface
 plot.
 
 ```@example MAIN
@@ -79,7 +77,7 @@ ax.zticklabelsvisible = false
 fig
 ```
 
-### Wavelet
+## Wavelet
 
 ```@docs
 WaveletOverlap
diff --git a/src/Entropies.jl b/src/Entropies.jl
index d6950cd92..70972d489 100644
--- a/src/Entropies.jl
+++ b/src/Entropies.jl
@@ -1,5 +1,5 @@
 """
-A Julia language package that provides estimators for probabilities, entropies,
+A Julia package that provides estimators for probabilities, entropies,
 and complexity measures for timeseries, nonlinear dynamics and complex systems.
 It can be used as standalone or part of several projects in the JuliaDynamics organization,
 such as [DynamicalSystems.jl](https://juliadynamics.github.io/DynamicalSystems.jl/dev/)
diff --git a/src/core.jl b/src/core.jl
index 0fd15ab44..d3b252509 100644
--- a/src/core.jl
+++ b/src/core.jl
@@ -45,6 +45,15 @@ const ProbEst = ProbabilitiesEstimator # shorthand
 
 Directly count probabilities from the elements of `x` without any discretization,
 binning, or other processing (mostly useful when `x` contains categorical or integer data).
+`probabilities` always returns a [`Probabilities`](@ref) container (`Vector`-like).
+
+
+    probabilities(x::Array_or_Dataset, est::ProbabilitiesEstimator) → p::Probabilities
+
+Calculate probabilities representing `x` based on the provided estimator.
+The probabilities are typically unordered and may or may not contain 0s, see the
+documentation of the individual estimators for more.
+Configuration options are always given as arguments to the chosen estimator.
 
     probabilities(x::Array_or_Dataset, ε::AbstractFloat) → p::Probabilities
 
@@ -64,14 +73,6 @@ To obtain the bin information along with `p`, use [`binhist`](@ref).
 Same as the above method, but now each dimension of the data is binned into `n::Int` equal
 sized bins instead of bins of length `ε::AbstractFloat`.
 
-    probabilities(x::Array_or_Dataset, est::ProbabilitiesEstimator) → p::Probabilities
-
-Calculate probabilities representing `x` based on the provided
-estimator and return them as a [`Probabilities`](@ref) container (`Vector`-like).
-The probabilities are typically unordered and may or may not contain 0s, see the
-documentation of the individual estimators for more.
-
-The configuration options are always given as arguments to the chosen estimator.
 """
 function probabilities end
 

From 51cf580a74befe4d5fd7913ba0fe6cf8ce360d0b Mon Sep 17 00:00:00 2001
From: Datseris <datseris.george@gmail.com>
Date: Sat, 10 Sep 2022 14:12:58 +0100
Subject: [PATCH 21/32] improve entropy wording

---
 docs/src/generalized_entropies.md | 16 +++++++++++++---
 docs/src/index.md                 |  4 ++--
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/docs/src/generalized_entropies.md b/docs/src/generalized_entropies.md
index f70cc3b79..52472ed9d 100644
--- a/docs/src/generalized_entropies.md
+++ b/docs/src/generalized_entropies.md
@@ -1,6 +1,4 @@
-# Generalized entropies
-
-Computing entropy boils down to one thing: first estimating a probability distribution, then applying one of the generalized entropy formulas ([`entropy_renyi`](@ref) or [`entropy_tsallis`](@ref)) to these distributions. Thus, any of the implemented [probabilities estimators](@ref estimators) can be used to compute generalized entropies.
+# Entropies
 
 ## Rényi (generalized) entropy
 
@@ -18,3 +16,15 @@ Entropies.entropy_tsallis
 ```@docs
 entropy_shannon
 ```
+
+## Indirect entropies
+Here we list functions which compute Shannon entropies via alternate means, without explicitly computing some probability distributions.
+
+### Nearest neighbors entropy
+```@docs
+entropy_kraskov
+entropy_kozachenkoleonenko
+```
+
+## Convenience functions
+In this subsection we expand documentation strings of "entropy names" that are used commonly in the literature, such as "permutation entropy". As we made clear in [API & terminology](@ref), these are just the existing Shannon/Rényi/Tsallis entropy with a particularly chosen probability estimator.
\ No newline at end of file
diff --git a/docs/src/index.md b/docs/src/index.md
index bd60b78ec..7b2437b26 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -12,7 +12,7 @@ Entropies
 
 
 In the literature, the term "entropy" is used (and abused) in multiple contexts.
-The API of Entropies.jl and documentation aim to clarify some aspects and
+The API and documentation of Entropies.jl aim to clarify some aspects and
 provide a simple way to obtain probabilities, entropies, or other complexity measures.
 
 ### Probabilities
@@ -29,7 +29,7 @@ Entropy is an established concept in statistics, information theory, and nonline
 - Tsallis entropy [`entropy_tsallis`](@ref).
 - Shannon entropy [`entropy_shannon`](@ref), which is just a subcase of either of the above two.
 
-Computing such an entropy boils down to two simple steps: first estimating a probability distribution, and then applying one of the generalized entropy formulas to the distributions.
+Computing such an entropy most of the time boils down to two simple steps: first estimating a probability distribution, and then applying one of the generalized entropy formulas to the distributions.
 Thus, any of the implemented [probabilities estimators](@ref estimators) can be used to compute generalized entropies.
 
 

From 48c5ba24a89cb3847af3aec06f659cee91b94f2c Mon Sep 17 00:00:00 2001
From: Datseris <datseris.george@gmail.com>
Date: Sat, 10 Sep 2022 14:59:47 +0100
Subject: [PATCH 22/32] Complete re-organization of source code folders

---
 ...{generalized_entropies.md => entropies.md} |  13 +-
 src/Entropies.jl                              |   9 +-
 .../rectangular/VisitationFrequency.jl        |  45 -----
 .../rectangular/rectangular_estimators.jl     |   9 -
 src/core.jl                                   | 174 ------------------
 src/counting_based/CountOccurrences.jl        |  14 --
 src/deprecated/symbolization.jl               |  42 -----
 src/deprecations.jl                           |  43 +++++
 src/entropies/convenience_definitions.jl      |  15 ++
 .../direct_entropies}/entropy_dispersion.jl   |   0
 .../direct_entropies}/multiscale.jl           |   2 +
 .../nearest_neighbors/KozachenkoLeonenko.jl   |   0
 .../nearest_neighbors/Kraskov.jl              |   0
 .../nearest_neighbors/nearest_neighbors.jl    |   0
 src/entropies/entropies.jl                    |   6 +
 src/entropies/renyi.jl                        |  91 +++++++++
 src/entropies/shannon.jl                      |   7 +
 src/{tsallis => entropies}/tsallis.jl         |   0
 src/histogram_estimation.jl                   |  44 -----
 src/probabilities.jl                          |  83 +++++++++
 .../count_occurences.jl                       |  12 ++
 .../kernel_density.jl}                        |   0
 .../SymbolicAmplitudeAware.jl                 |   0
 .../SymbolicPermutation.jl                    |   0
 .../SymbolicWeightedPermutation.jl            |   0
 .../spatial_permutation.jl                    |   0
 .../permutation_ordinal}/symbolic.jl          |   0
 .../permutation_ordinal}/utils.jl             |   0
 .../probabilities_estimators.jl               |   6 +
 .../VisitationFrequency.jl                    |  19 ++
 .../rectangular_binning}/binning_schemes.jl   |  98 +++++-----
 .../rectangular_binning}/count_box_visits.jl  |  36 ++--
 .../histogram_estimation.jl                   |  41 +++++
 .../rectangular_binning.jl                    |   4 +
 .../timescales/timescales.jl                  |   0
 .../timescales/wavelet_overlap.jl             |   0
 .../transfer_operator}/GroupSlices.jl         |   0
 .../transfer_operator/transfer_operator.jl}   |   1 +
 38 files changed, 415 insertions(+), 399 deletions(-)
 rename docs/src/{generalized_entropies.md => entropies.md} (78%)
 delete mode 100644 src/binning_based/rectangular/VisitationFrequency.jl
 delete mode 100644 src/binning_based/rectangular/rectangular_estimators.jl
 delete mode 100644 src/core.jl
 delete mode 100644 src/counting_based/CountOccurrences.jl
 delete mode 100644 src/deprecated/symbolization.jl
 create mode 100644 src/entropies/convenience_definitions.jl
 rename src/{dispersion => entropies/direct_entropies}/entropy_dispersion.jl (100%)
 rename src/{multiscale => entropies/direct_entropies}/multiscale.jl (90%)
 rename src/{ => entropies/direct_entropies}/nearest_neighbors/KozachenkoLeonenko.jl (100%)
 rename src/{ => entropies/direct_entropies}/nearest_neighbors/Kraskov.jl (100%)
 rename src/{ => entropies/direct_entropies}/nearest_neighbors/nearest_neighbors.jl (100%)
 create mode 100644 src/entropies/entropies.jl
 create mode 100644 src/entropies/renyi.jl
 create mode 100644 src/entropies/shannon.jl
 rename src/{tsallis => entropies}/tsallis.jl (100%)
 delete mode 100644 src/histogram_estimation.jl
 create mode 100644 src/probabilities.jl
 create mode 100644 src/probabilities_estimators/count_occurences.jl
 rename src/{kerneldensity/kerneldensity.jl => probabilities_estimators/kernel_density.jl} (100%)
 rename src/{symbolic => probabilities_estimators/permutation_ordinal}/SymbolicAmplitudeAware.jl (100%)
 rename src/{symbolic => probabilities_estimators/permutation_ordinal}/SymbolicPermutation.jl (100%)
 rename src/{symbolic => probabilities_estimators/permutation_ordinal}/SymbolicWeightedPermutation.jl (100%)
 rename src/{symbolic => probabilities_estimators/permutation_ordinal}/spatial_permutation.jl (100%)
 rename src/{symbolic => probabilities_estimators/permutation_ordinal}/symbolic.jl (100%)
 rename src/{symbolic => probabilities_estimators/permutation_ordinal}/utils.jl (100%)
 create mode 100644 src/probabilities_estimators/probabilities_estimators.jl
 create mode 100644 src/probabilities_estimators/rectangular_binning/VisitationFrequency.jl
 rename src/{binning_based/rectangular => probabilities_estimators/rectangular_binning}/binning_schemes.jl (89%)
 rename src/{binning_based/rectangular => probabilities_estimators/rectangular_binning}/count_box_visits.jl (92%)
 rename src/{binning_based/rectangular => probabilities_estimators/rectangular_binning}/histogram_estimation.jl (82%)
 create mode 100644 src/probabilities_estimators/rectangular_binning/rectangular_binning.jl
 rename src/{ => probabilities_estimators}/timescales/timescales.jl (100%)
 rename src/{ => probabilities_estimators}/timescales/wavelet_overlap.jl (100%)
 rename src/{binning_based/rectangular => probabilities_estimators/transfer_operator}/GroupSlices.jl (100%)
 rename src/{binning_based/rectangular/TransferOperator.jl => probabilities_estimators/transfer_operator/transfer_operator.jl} (99%)

diff --git a/docs/src/generalized_entropies.md b/docs/src/entropies.md
similarity index 78%
rename from docs/src/generalized_entropies.md
rename to docs/src/entropies.md
index 52472ed9d..8f721e146 100644
--- a/docs/src/generalized_entropies.md
+++ b/docs/src/entropies.md
@@ -3,13 +3,13 @@
 ## Rényi (generalized) entropy
 
 ```@docs
-Entropies.entropy_renyi
+entropy_renyi
 ```
 
 ## Tsallis (generalized) entropy
 
 ```@docs
-Entropies.entropy_tsallis
+entropy_tsallis
 ```
 
 ## Shannon entropy (convenience)
@@ -18,7 +18,7 @@ entropy_shannon
 ```
 
 ## Indirect entropies
-Here we list functions which compute Shannon entropies via alternate means, without explicitly computing some probability distributions.
+Here we list functions which compute Shannon entropies via alternate means, without explicitly computing some probability distributions and then using the Shannon formulat.
 
 ### Nearest neighbors entropy
 ```@docs
@@ -27,4 +27,9 @@ entropy_kozachenkoleonenko
 ```
 
 ## Convenience functions
-In this subsection we expand documentation strings of "entropy names" that are used commonly in the literature, such as "permutation entropy". As we made clear in [API & terminology](@ref), these are just the existing Shannon/Rényi/Tsallis entropy with a particularly chosen probability estimator.
\ No newline at end of file
+In this subsection we expand documentation strings of "entropy names" that are used commonly in the literature, such as "permutation entropy". As we made clear in [API & terminology](@ref), these are just the existing Shannon/Rényi/Tsallis entropy with a particularly chosen probability estimator.
+```@docs
+entropy_permutation
+entropy_wavelet
+entropy_dispersion
+```
\ No newline at end of file
diff --git a/src/Entropies.jl b/src/Entropies.jl
index 70972d489..ce7822ac7 100644
--- a/src/Entropies.jl
+++ b/src/Entropies.jl
@@ -7,8 +7,15 @@ or [CausalityTools.jl](https://juliadynamics.github.io/CausalityTools.jl/dev/).
 """
 module Entropies
 
-include("core.jl")
+using DelayEmbeddings
+using DelayEmbeddings: AbstractDataset, Dataset, dimension
+export AbstractDataset, Dataset
+const Array_or_Dataset = Union{<:AbstractArray, <:AbstractDataset}
+
+include("probabilities.jl")
+include("entropies/entropies.jl")
 include("symbolization/symbolize.jl")
+include("entropies/entropies.jl")
 include("histogram_estimation.jl")
 include("counting_based/CountOccurrences.jl")
 include("symbolic/symbolic.jl")
diff --git a/src/binning_based/rectangular/VisitationFrequency.jl b/src/binning_based/rectangular/VisitationFrequency.jl
deleted file mode 100644
index 28c6831ea..000000000
--- a/src/binning_based/rectangular/VisitationFrequency.jl
+++ /dev/null
@@ -1,45 +0,0 @@
-export VisitationFrequency, probabilities
-export entropy_visitfreq
-export entropy_transferoperator
-
-"""
-    VisitationFrequency(r::RectangularBinning) <: BinningProbabilitiesEstimator
-
-A probability estimator based on binning data into rectangular boxes dictated by
-the binning scheme `r`.
-
-## Example
-
-```julia
-# Construct boxes by dividing each coordinate axis into 5 equal-length chunks.
-b = RectangularBinning(5)
-
-# A probabilities estimator that, when applied a dataset, computes visitation frequencies
-# over the boxes of the binning
-est = VisitationFrequency(b)
-```
-
-See also: [`RectangularBinning`](@ref).
-"""
-struct VisitationFrequency{RB<:RectangularBinning} <: BinningProbabilitiesEstimator
-    binning::RB
-end
-
-function probabilities(x::AbstractDataset, est::VisitationFrequency)
-    _non0hist(x, est.binning)[1]
-end
-
-"""
-    entropy_visitfreq(x, binning::RectangularBinning; base = MathConstants.e)
-
-Compute the (Shannon) entropy of `x` by counting visitation frequencies over
-the state-space coarse graining produced by the provided `binning` scheme.
-
-Short-hand for `renyi_entropy(x, VisitationFrequency(binning); base = base, q = 1)`.
-
-See also: [`VisitationFrequency`](@ref).
-"""
-function entropy_visitfreq(x, b::RectangularBinning)
-    est = VisitationFrequency(binning)
-    renyi_entropy(x, est; base = base, q = 1)
-end
diff --git a/src/binning_based/rectangular/rectangular_estimators.jl b/src/binning_based/rectangular/rectangular_estimators.jl
deleted file mode 100644
index 9c543f2bd..000000000
--- a/src/binning_based/rectangular/rectangular_estimators.jl
+++ /dev/null
@@ -1,9 +0,0 @@
-abstract type BinningProbabilitiesEstimator <: ProbabilitiesEstimator end
-
-include("binning_schemes.jl")
-
-include("count_box_visits.jl")
-include("histogram_estimation.jl")
-
-include("VisitationFrequency.jl")
-include("TransferOperator.jl")
\ No newline at end of file
diff --git a/src/core.jl b/src/core.jl
deleted file mode 100644
index d3b252509..000000000
--- a/src/core.jl
+++ /dev/null
@@ -1,174 +0,0 @@
-using DelayEmbeddings
-using DelayEmbeddings: AbstractDataset, Dataset, dimension
-export ProbabilitiesEstimator, Probabilities
-export probabilities, probabilities!
-export entropy_renyi, entropy_renyi!
-export Dataset, dimension
-
-const Array_or_Dataset = Union{<:AbstractArray, <:AbstractDataset}
-
-"""
-    Probabilities(x) → p
-A simple wrapper type around an `x::AbstractVector` which ensures that `p` sums to 1.
-Behaves identically to `Vector`.
-"""
-struct Probabilities{T} <: AbstractVector{T}
-    p::Vector{T}
-    function Probabilities(x::AbstractVector{T}) where T
-        s = sum(x)
-        if s ≠ 1
-            x = x ./ s
-        end
-        return new{T}(x)
-    end
-end
-
-# extend base Vector interface:
-for f in (:length, :size, :eachindex, :eltype, :lastindex, :firstindex)
-    @eval Base.$(f)(d::Probabilities) = $(f)(d.p)
-end
-Base.IteratorSize(d::Probabilities) = Base.HasLength()
-@inline Base.iterate(d::Probabilities, i = 1) = iterate(d.p, i)
-@inline Base.getindex(d::Probabilities, i) = d.p[i]
-@inline Base.setindex!(d::Probabilities, v, i) = (d.p[i] = v)
-@inline Base.:*(d::Probabilities, x::Number) = d.p * x
-@inline Base.sum(d::Probabilities{T}) where T = one(T)
-
-"""
-An abstract type for probabilities estimators.
-"""
-abstract type ProbabilitiesEstimator end
-const ProbEst = ProbabilitiesEstimator # shorthand
-
-"""
-    probabilities(x::Array_or_Dataset) → p::Probabilities
-
-Directly count probabilities from the elements of `x` without any discretization,
-binning, or other processing (mostly useful when `x` contains categorical or integer data).
-`probabilities` always returns a [`Probabilities`](@ref) container (`Vector`-like).
-
-
-    probabilities(x::Array_or_Dataset, est::ProbabilitiesEstimator) → p::Probabilities
-
-Calculate probabilities representing `x` based on the provided estimator.
-The probabilities are typically unordered and may or may not contain 0s, see the
-documentation of the individual estimators for more.
-Configuration options are always given as arguments to the chosen estimator.
-
-    probabilities(x::Array_or_Dataset, ε::AbstractFloat) → p::Probabilities
-
-Convenience syntax which provides probabilities for `x` based on rectangular binning
-(i.e. performing a histogram). In short, the state space is divided into boxes of length
-`ε`, and formally we use `est = VisitationFrequency(RectangularBinning(ε))`
-as an estimator, see [`VisitationFrequency`](@ref).
-
-This method has a linearithmic time complexity (`n log(n)` for `n = length(x)`)
-and a linear space complexity (`l` for `l = dimension(x)`).
-This allows computation of probabilities (histograms) of high-dimensional
-datasets and with small box sizes `ε` without memory overflow and with maximum performance.
-To obtain the bin information along with `p`, use [`binhist`](@ref).
-
-    probabilities(x::Array_or_Dataset, n::Integer) → p::Probabilities
-
-Same as the above method, but now each dimension of the data is binned into `n::Int` equal
-sized bins instead of bins of length `ε::AbstractFloat`.
-
-"""
-function probabilities end
-
-"""
-    probabilities!(args...)
-Identical to `probabilities(args...)`, but allows pre-allocation of temporarily used
-containers.
-
-Only works for certain estimators. See for example [`SymbolicPermutation`](@ref).
-"""
-function probabilities! end
-
-"""
-    entropy_renyi(p::Probabilities; q = 1.0, base = MathConstants.e)
-
-Compute the generalized order-`q` entropy of some probabilities
-returned by the [`probabilities`](@ref) function. Alternatively, compute entropy
-from pre-computed `Probabilities`.
-
-    entropy_renyi(x::Array_or_Dataset, est; q = 1.0, base)
-
-A convenience syntax, which calls first `probabilities(x, est)`
-and then calculates the entropy of the result (and thus `est` can be a
-`ProbabilitiesEstimator` or simply `ε::Real`).
-
-## Description
-
-Let ``p`` be an array of probabilities (summing to 1). Then the generalized (Rényi) entropy is
-
-```math
-H_q(p) = \\frac{1}{1-q} \\log \\left(\\sum_i p[i]^q\\right)
-```
-
-and generalizes other known entropies,
-like e.g. the information entropy
-(``q = 1``, see [^Shannon1948]), the maximum entropy (``q=0``,
-also known as Hartley entropy), or the correlation entropy
-(``q = 2``, also known as collision entropy).
-
-[^Rényi1960]: A. Rényi, *Proceedings of the fourth Berkeley Symposium on Mathematics, Statistics and Probability*, pp 547 (1960)
-[^Shannon1948]: C. E. Shannon, Bell Systems Technical Journal **27**, pp 379 (1948)
-"""
-function entropy_renyi end
-
-function entropy_renyi(prob::Probabilities; q = 1.0, α = nothing, base = MathConstants.e)
-    if α ≠ nothing
-        @warn "Keyword `α` is deprecated in favor of `q`."
-        q = α
-    end
-    q < 0 && throw(ArgumentError("Order of generalized entropy must be ≥ 0."))
-    haszero = any(iszero, prob)
-    p = if haszero
-        i0 = findall(iszero, prob.p)
-        # We copy because if someone initialized Probabilities with 0s, I would guess
-        # they would want to index the zeros as well. Not so costly anyways.
-        deleteat!(copy(prob.p), i0)
-    else
-        prob.p
-    end
-
-    if q ≈ 0
-        return log(base, length(p)) #Hartley entropy, max-entropy
-    elseif q ≈ 1
-        return -sum( x*log(base, x) for x in p ) #Shannon entropy
-    elseif isinf(q)
-        return -log(base, maximum(p)) #Min entropy
-    else
-        return (1/(1-q))*log(base, sum(x^q for x in p) ) #Renyi q entropy
-    end
-end
-
-entropy_renyi(::AbstractArray{<:Real}) =
-    error("For single-argument input, do `entropy_renyi(Probabilities(x))` instead.")
-
-function entropy_renyi(x::Array_or_Dataset, est; q = 1.0, α = nothing, base = MathConstants.e)
-    if α ≠ nothing
-        @warn "Keyword `α` is deprecated in favor of `q`."
-        q = α
-    end
-    p = probabilities(x, est)
-    entropy_renyi(p; q = q, base = base)
-end
-
-"""
-    entropy_renyi!(p, x, est::ProbabilitiesEstimator; q = 1.0, base = MathConstants.e)
-
-Similarly with `probabilities!` this is an in-place version of `entropy_renyi` that allows
-pre-allocation of temporarily used containers.
-
-Only works for certain estimators. See for example [`SymbolicPermutation`](@ref).
-"""
-function entropy_renyi!(p, x, est; q = 1.0, α = nothing, base = MathConstants.e)
-    if α ≠ nothing
-        @warn "Keyword `α` is deprecated in favor of `q`."
-        q = α
-    end
-    probabilities!(p, x, est)
-    entropy_renyi(p; q = q, base = base)
-end
diff --git a/src/counting_based/CountOccurrences.jl b/src/counting_based/CountOccurrences.jl
deleted file mode 100644
index 4e7d46dd8..000000000
--- a/src/counting_based/CountOccurrences.jl
+++ /dev/null
@@ -1,14 +0,0 @@
-export CountOccurrences
-
-"""
-    CountOccurrences
-
-A probabilities/entropy estimator based on straight-forward counting of distinct elements in
-a univariate time series or multivariate dataset. From these counts, construct histograms.
-Sum-normalize histograms to obtain probability distributions.
-"""
-struct CountOccurrences <: ProbabilitiesEstimator end
-
-function probabilities(x::AbstractDataset, est::CountOccurrences)
-    probabilities(x)
-end
diff --git a/src/deprecated/symbolization.jl b/src/deprecated/symbolization.jl
deleted file mode 100644
index ba04c3f65..000000000
--- a/src/deprecated/symbolization.jl
+++ /dev/null
@@ -1,42 +0,0 @@
-function symbolize(x::AbstractDataset{m, T}, est::PermutationProbabilityEstimator) where {m, T}
-    Base.depwarn("`symbolize(x, est::$P)` is deprecated, use `symbolize(x, scheme::OrdinalPattern)` instead.", :symbolize)
-
-    m >= 2 || error("Data must be at least 2-dimensional to symbolize. If data is a univariate time series, embed it using `genembed` first.")
-    s = zeros(Int, length(x))
-    symbolize!(s, x, est)
-    return s
-end
-
-function symbolize(x::AbstractVector{T}, est::P) where {T, P <: PermutationProbabilityEstimator}
-    Base.depwarn("`symbolize(x, est::$P)` is deprecated, use `symbolize(x, scheme::OrdinalPattern)` instead.", :symbolize)
-
-    τs = tuple([est.τ*i for i = 0:est.m-1]...)
-    x_emb = genembed(x, τs)
-
-    s = zeros(Int, length(x_emb))
-    symbolize!(s, x_emb, est)
-    return s
-end
-
-function symbolize!(s::AbstractVector{Int}, x::AbstractDataset{m, T}, est::SymbolicPermutation) where {m, T}
-    Base.depwarn("`symbolize!(s, x, est::SymbolicPermutation)` is deprecated, use `symbolize!(s, x, scheme::OrdinalPattern)` instead.", :symbolize)
-
-    @assert length(s) == length(x)
-    #=
-    Loop over embedding vectors `E[i]`, find the indices `p_i` that sort each `E[i]`,
-    then get the corresponding integers `k_i` that generated the
-    permutations `p_i`. Those integers are the symbols for the embedding vectors
-    `E[i]`.
-    =#
-    sp = zeros(Int, m) # pre-allocate a single symbol vector that can be overwritten.
-    fill_symbolvector!(s, x, sp, m, lt = est.lt)
-
-    return s
-end
-
-function symbolize!(s::AbstractVector{Int}, x::AbstractVector{T}, est::SymbolicPermutation) where T
-    Base.depwarn("`symbolize!(s, x, est::SymbolicPermutation)` is deprecated, use `symbolize!(s, x, scheme::OrdinalPattern)` instead.", :symbolize)
-    τs = tuple([est.τ*i for i = 0:est.m-1]...)
-    x_emb = genembed(x, τs)
-    symbolize!(s, x_emb, est)
-end
diff --git a/src/deprecations.jl b/src/deprecations.jl
index eacb8b325..88ac5264a 100644
--- a/src/deprecations.jl
+++ b/src/deprecations.jl
@@ -2,3 +2,46 @@ include("deprecated/symbolization.jl")
 
 @deprecate TimeScaleMODWT WaveletOverlap
 @deprecate genentropy entropy_renyi
+
+function symbolize(x::AbstractDataset{m, T}, est::PermutationProbabilityEstimator) where {m, T}
+    Base.depwarn("`symbolize(x, est::$P)` is deprecated, use `symbolize(x, scheme::OrdinalPattern)` instead.", :symbolize)
+
+    m >= 2 || error("Data must be at least 2-dimensional to symbolize. If data is a univariate time series, embed it using `genembed` first.")
+    s = zeros(Int, length(x))
+    symbolize!(s, x, est)
+    return s
+end
+
+function symbolize(x::AbstractVector{T}, est::P) where {T, P <: PermutationProbabilityEstimator}
+    Base.depwarn("`symbolize(x, est::$P)` is deprecated, use `symbolize(x, scheme::OrdinalPattern)` instead.", :symbolize)
+
+    τs = tuple([est.τ*i for i = 0:est.m-1]...)
+    x_emb = genembed(x, τs)
+
+    s = zeros(Int, length(x_emb))
+    symbolize!(s, x_emb, est)
+    return s
+end
+
+function symbolize!(s::AbstractVector{Int}, x::AbstractDataset{m, T}, est::SymbolicPermutation) where {m, T}
+    Base.depwarn("`symbolize!(s, x, est::SymbolicPermutation)` is deprecated, use `symbolize!(s, x, scheme::OrdinalPattern)` instead.", :symbolize)
+
+    @assert length(s) == length(x)
+    #=
+    Loop over embedding vectors `E[i]`, find the indices `p_i` that sort each `E[i]`,
+    then get the corresponding integers `k_i` that generated the
+    permutations `p_i`. Those integers are the symbols for the embedding vectors
+    `E[i]`.
+    =#
+    sp = zeros(Int, m) # pre-allocate a single symbol vector that can be overwritten.
+    fill_symbolvector!(s, x, sp, m, lt = est.lt)
+
+    return s
+end
+
+function symbolize!(s::AbstractVector{Int}, x::AbstractVector{T}, est::SymbolicPermutation) where T
+    Base.depwarn("`symbolize!(s, x, est::SymbolicPermutation)` is deprecated, use `symbolize!(s, x, scheme::OrdinalPattern)` instead.", :symbolize)
+    τs = tuple([est.τ*i for i = 0:est.m-1]...)
+    x_emb = genembed(x, τs)
+    symbolize!(s, x_emb, est)
+end
diff --git a/src/entropies/convenience_definitions.jl b/src/entropies/convenience_definitions.jl
new file mode 100644
index 000000000..e24caabdc
--- /dev/null
+++ b/src/entropies/convenience_definitions.jl
@@ -0,0 +1,15 @@
+# This file defines and exports some convenience definition of entropies
+# for commonly used names in the literature. They aren't actually new entropies
+# as discussed extensively in the documentation.
+
+function entropy_permutation(args...)
+
+end
+
+function entropy_wavelet(args...)
+
+end
+
+function entropy_dispersion(args...)
+
+end
\ No newline at end of file
diff --git a/src/dispersion/entropy_dispersion.jl b/src/entropies/direct_entropies/entropy_dispersion.jl
similarity index 100%
rename from src/dispersion/entropy_dispersion.jl
rename to src/entropies/direct_entropies/entropy_dispersion.jl
diff --git a/src/multiscale/multiscale.jl b/src/entropies/direct_entropies/multiscale.jl
similarity index 90%
rename from src/multiscale/multiscale.jl
rename to src/entropies/direct_entropies/multiscale.jl
index dd05d2812..bf408ba00 100644
--- a/src/multiscale/multiscale.jl
+++ b/src/entropies/direct_entropies/multiscale.jl
@@ -1,3 +1,5 @@
+# TODO: What's this file? It's incomplete and with rogue code...
+
 struct MultiscaleEntropy end
 
 """
diff --git a/src/nearest_neighbors/KozachenkoLeonenko.jl b/src/entropies/direct_entropies/nearest_neighbors/KozachenkoLeonenko.jl
similarity index 100%
rename from src/nearest_neighbors/KozachenkoLeonenko.jl
rename to src/entropies/direct_entropies/nearest_neighbors/KozachenkoLeonenko.jl
diff --git a/src/nearest_neighbors/Kraskov.jl b/src/entropies/direct_entropies/nearest_neighbors/Kraskov.jl
similarity index 100%
rename from src/nearest_neighbors/Kraskov.jl
rename to src/entropies/direct_entropies/nearest_neighbors/Kraskov.jl
diff --git a/src/nearest_neighbors/nearest_neighbors.jl b/src/entropies/direct_entropies/nearest_neighbors/nearest_neighbors.jl
similarity index 100%
rename from src/nearest_neighbors/nearest_neighbors.jl
rename to src/entropies/direct_entropies/nearest_neighbors/nearest_neighbors.jl
diff --git a/src/entropies/entropies.jl b/src/entropies/entropies.jl
new file mode 100644
index 000000000..065c3beed
--- /dev/null
+++ b/src/entropies/entropies.jl
@@ -0,0 +1,6 @@
+include("renyi.jl")
+include("tsallis.jl")
+include("shannon.jl")
+include("convenience_definitions.jl")
+include("direct_entropies/nearest_neighbors/nearest_neighbors.jl")
+# TODO: What else is included here?
\ No newline at end of file
diff --git a/src/entropies/renyi.jl b/src/entropies/renyi.jl
new file mode 100644
index 000000000..a12491e5f
--- /dev/null
+++ b/src/entropies/renyi.jl
@@ -0,0 +1,91 @@
+export entropy_renyi
+
+"""
+    entropy_renyi(p::Probabilities; q = 1.0, base = MathConstants.e)
+
+Compute the Rényi[^Rényi1960] generalized order-`q` entropy of some probabilities
+(typically returned by the [`probabilities`](@ref) function).
+
+    entropy_renyi(x::Array_or_Dataset, est; q = 1.0, base)
+
+A convenience syntax, which calls first `probabilities(x, est)`
+and then calculates the entropy of the result (and thus `est` can be anything
+the [`probabilities`](@ref) function accepts).
+
+## Description
+
+Let ``p`` be an array of probabilities (summing to 1).
+Then the Rényi generalized entropy is
+
+```math
+H_q(p) = \\frac{1}{1-q} \\log \\left(\\sum_i p[i]^q\\right)
+```
+
+and generalizes other known entropies,
+like e.g. the information entropy
+(``q = 1``, see [^Shannon1948]), the maximum entropy (``q=0``,
+also known as Hartley entropy), or the correlation entropy
+(``q = 2``, also known as collision entropy).
+
+[^Rényi1960]:
+    A. Rényi, _Proceedings of the fourth Berkeley Symposium on Mathematics,
+    Statistics and Probability_, pp 547 (1960)
+[^Shannon1948]: C. E. Shannon, Bell Systems Technical Journal **27**, pp 379 (1948)
+"""
+function entropy_renyi end
+
+function entropy_renyi(prob::Probabilities; q = 1.0, α = nothing, base = MathConstants.e)
+    if α ≠ nothing
+        @warn "Keyword `α` is deprecated in favor of `q`."
+        q = α
+    end
+    q < 0 && throw(ArgumentError("Order of generalized entropy must be ≥ 0."))
+    haszero = any(iszero, prob)
+    p = if haszero
+        i0 = findall(iszero, prob.p)
+        # We copy because if someone initialized Probabilities with 0s, I would guess
+        # they would want to index the zeros as well. Not so costly anyways.
+        deleteat!(copy(prob.p), i0)
+    else
+        prob.p
+    end
+
+    if q ≈ 0
+        return log(base, length(p)) #Hartley entropy, max-entropy
+    elseif q ≈ 1
+        return -sum( x*log(base, x) for x in p ) #Shannon entropy
+    elseif isinf(q)
+        return -log(base, maximum(p)) #Min entropy
+    else
+        return (1/(1-q))*log(base, sum(x^q for x in p) ) #Renyi q entropy
+    end
+end
+
+entropy_renyi(::AbstractArray{<:Real}) =
+    error("For single-argument input, do `entropy_renyi(Probabilities(x))` instead.")
+
+function entropy_renyi(x::Array_or_Dataset, est; q = 1.0, α = nothing, base = MathConstants.e)
+    if α ≠ nothing
+        @warn "Keyword `α` is deprecated in favor of `q`."
+        q = α
+    end
+    p = probabilities(x, est)
+    entropy_renyi(p; q = q, base = base)
+end
+
+"""
+    entropy_renyi!(p, x, est::ProbabilitiesEstimator; q = 1.0, base = MathConstants.e)
+
+Similarly with `probabilities!` this is an in-place version of `entropy_renyi` that allows
+pre-allocation of temporarily used containers.
+
+Only works for certain estimators. See for example [`SymbolicPermutation`](@ref).
+"""
+function entropy_renyi!(p, x, est; q = 1.0, α = nothing, base = MathConstants.e)
+    if α ≠ nothing
+        @warn "Keyword `α` is deprecated in favor of `q`."
+        q = α
+    end
+    probabilities!(p, x, est)
+    entropy_renyi(p; q = q, base = base)
+end
diff --git a/src/entropies/shannon.jl b/src/entropies/shannon.jl
new file mode 100644
index 000000000..046e2e4d0
--- /dev/null
+++ b/src/entropies/shannon.jl
@@ -0,0 +1,7 @@
+export entropy_shannon
+
+"""
+    entropy_shannon(args...; base = MathConstants.e)
+Equivalent with `entropy_renyi(args...; base, q = 1)` and provided solely for convenience.
+"""
+entropy_shannon(args...; base = MathConstants.e) = entropy_renyi(args...; base, q = 1)
diff --git a/src/tsallis/tsallis.jl b/src/entropies/tsallis.jl
similarity index 100%
rename from src/tsallis/tsallis.jl
rename to src/entropies/tsallis.jl
diff --git a/src/histogram_estimation.jl b/src/histogram_estimation.jl
deleted file mode 100644
index 03cdcaaf1..000000000
--- a/src/histogram_estimation.jl
+++ /dev/null
@@ -1,44 +0,0 @@
-export binhist
-
-probabilities(x) = _non0hist(x)
-
-# Frequencies are needed elsewhere in the package too, so keep in its own method.
-function _non0hist_frequencies(x)
-    L = length(x)
-
-    hist = Vector{Float64}()
-    # Reserve enough space for histogram:
-    sizehint!(hist, L)
-
-    sx = sort(x, alg = QuickSort)
-
-    # Fill the histogram by counting consecutive equal values:
-    prev_val, count = sx[1], 0
-    for val in sx
-        if val == prev_val
-            count += 1
-        else
-            push!(hist, count)
-            prev_val = val
-            count = 1
-        end
-    end
-    push!(hist, count)
-
-    # Shrink histogram capacity to fit its size:
-    sizehint!(hist, length(hist))
-    return hist
-end
-
-function _non0hist(x, N)
-    hist = _non0hist_frequencies(x)
-    return Probabilities(hist ./ N)
-end
-
-function _non0hist(x)
-    return _non0hist(x, length(x))
-end
-
-_non0hist(x::AbstractDataset) = _non0hist(x.data)
-_non0hist(x::AbstractDataset, N) = _non0hist(x.data, N)
-probabilities(x::AbstractDataset) = _non0hist(x.data)
diff --git a/src/probabilities.jl b/src/probabilities.jl
new file mode 100644
index 000000000..e790c3307
--- /dev/null
+++ b/src/probabilities.jl
@@ -0,0 +1,83 @@
+export ProbabilitiesEstimator, Probabilities
+export probabilities, probabilities!
+
+"""
+    Probabilities(x) → p
+A simple wrapper type around an `x::AbstractVector` which ensures that `p` sums to 1.
+Behaves identically to `Vector`.
+"""
+struct Probabilities{T} <: AbstractVector{T}
+    p::Vector{T}
+    function Probabilities(x::AbstractVector{T}) where T
+        s = sum(x)
+        if s ≠ 1
+            x = x ./ s
+        end
+        return new{T}(x)
+    end
+end
+
+# extend base Vector interface:
+for f in (:length, :size, :eachindex, :eltype, :lastindex, :firstindex)
+    @eval Base.$(f)(d::Probabilities) = $(f)(d.p)
+end
+Base.IteratorSize(d::Probabilities) = Base.HasLength()
+@inline Base.iterate(d::Probabilities, i = 1) = iterate(d.p, i)
+@inline Base.getindex(d::Probabilities, i) = d.p[i]
+@inline Base.setindex!(d::Probabilities, v, i) = (d.p[i] = v)
+@inline Base.:*(d::Probabilities, x::Number) = d.p * x
+@inline Base.sum(d::Probabilities{T}) where T = one(T)
+
+"""
+An abstract type for probabilities estimators.
+"""
+abstract type ProbabilitiesEstimator end
+const ProbEst = ProbabilitiesEstimator # shorthand
+
+"""
+    probabilities(x::Array_or_Dataset) → p::Probabilities
+
+Directly count probabilities from the elements of `x` without any discretization,
+binning, or other processing (mostly useful when `x` contains categorical or integer data).
+`probabilities` always returns a [`Probabilities`](@ref) container (`Vector`-like).
+
+
+    probabilities(x::Array_or_Dataset, est::ProbabilitiesEstimator) → p::Probabilities
+
+Calculate probabilities representing `x` based on the provided estimator.
+The probabilities are typically unordered and may or may not contain 0s, see the
+documentation of the individual estimators for more.
+Configuration options are always given as arguments to the chosen estimator.
+
+    probabilities(x::Array_or_Dataset, ε::AbstractFloat) → p::Probabilities
+
+Convenience syntax which provides probabilities for `x` based on rectangular binning
+(i.e. performing a histogram). In short, the state space is divided into boxes of length
+`ε`, and formally we use `est = VisitationFrequency(RectangularBinning(ε))`
+as an estimator, see [`VisitationFrequency`](@ref).
+
+This method has a linearithmic time complexity (`n log(n)` for `n = length(x)`)
+and a linear space complexity (`l` for `l = dimension(x)`).
+This allows computation of probabilities (histograms) of high-dimensional
+datasets and with small box sizes `ε` without memory overflow and with maximum performance.
+To obtain the bin information along with `p`, use [`binhist`](@ref).
+
+    probabilities(x::Array_or_Dataset, n::Integer) → p::Probabilities
+
+Same as the above method, but now each dimension of the data is binned into `n::Int` equal
+sized bins instead of bins of length `ε::AbstractFloat`.
+
+"""
+function probabilities end
+
+# The histogram related stuff are defined in histogram_estimation.jl file
+probabilities(x) = _non0hist(x)
+
+"""
+    probabilities!(args...)
+Identical to `probabilities(args...)`, but allows pre-allocation of temporarily used
+containers.
+
+Only works for certain estimators. See for example [`SymbolicPermutation`](@ref).
+"""
+function probabilities! end
diff --git a/src/probabilities_estimators/count_occurences.jl b/src/probabilities_estimators/count_occurences.jl
new file mode 100644
index 000000000..c951098e5
--- /dev/null
+++ b/src/probabilities_estimators/count_occurences.jl
@@ -0,0 +1,12 @@
+export CountOccurrences
+
+"""
+    CountOccurrences
+
+A probabilities/entropy estimator based on straight-forward counting of distinct elements in
+a univariate time series or multivariate dataset. This is the same as giving no
+estimator to [`probabilities`](@ref).
+"""
+struct CountOccurrences <: ProbabilitiesEstimator end
+
+probabilities(x::AbstractDataset, ::CountOccurrences) = probabilities(x)
diff --git a/src/kerneldensity/kerneldensity.jl b/src/probabilities_estimators/kernel_density.jl
similarity index 100%
rename from src/kerneldensity/kerneldensity.jl
rename to src/probabilities_estimators/kernel_density.jl
diff --git a/src/symbolic/SymbolicAmplitudeAware.jl b/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl
similarity index 100%
rename from src/symbolic/SymbolicAmplitudeAware.jl
rename to src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl
diff --git a/src/symbolic/SymbolicPermutation.jl b/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl
similarity index 100%
rename from src/symbolic/SymbolicPermutation.jl
rename to src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl
diff --git a/src/symbolic/SymbolicWeightedPermutation.jl b/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl
similarity index 100%
rename from src/symbolic/SymbolicWeightedPermutation.jl
rename to src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl
diff --git a/src/symbolic/spatial_permutation.jl b/src/probabilities_estimators/permutation_ordinal/spatial_permutation.jl
similarity index 100%
rename from src/symbolic/spatial_permutation.jl
rename to src/probabilities_estimators/permutation_ordinal/spatial_permutation.jl
diff --git a/src/symbolic/symbolic.jl b/src/probabilities_estimators/permutation_ordinal/symbolic.jl
similarity index 100%
rename from src/symbolic/symbolic.jl
rename to src/probabilities_estimators/permutation_ordinal/symbolic.jl
diff --git a/src/symbolic/utils.jl b/src/probabilities_estimators/permutation_ordinal/utils.jl
similarity index 100%
rename from src/symbolic/utils.jl
rename to src/probabilities_estimators/permutation_ordinal/utils.jl
diff --git a/src/probabilities_estimators/probabilities_estimators.jl b/src/probabilities_estimators/probabilities_estimators.jl
new file mode 100644
index 000000000..6183aa38b
--- /dev/null
+++ b/src/probabilities_estimators/probabilities_estimators.jl
@@ -0,0 +1,6 @@
+include("count_occurences.jl")
+include("rectangular_binning/rectangular_binning.jl")
+include("permutation_ordinal/symbolic.jl")
+include("kernel_density.jl")
+include("timescales/timescales.jl")
+include("transfer_operator.jl")
\ No newline at end of file
diff --git a/src/probabilities_estimators/rectangular_binning/VisitationFrequency.jl b/src/probabilities_estimators/rectangular_binning/VisitationFrequency.jl
new file mode 100644
index 000000000..862b2c37c
--- /dev/null
+++ b/src/probabilities_estimators/rectangular_binning/VisitationFrequency.jl
@@ -0,0 +1,19 @@
+export VisitationFrequency, probabilities
+
+abstract type BinningProbabilitiesEstimator <: ProbabilitiesEstimator end
+
+"""
+    VisitationFrequency(r::RectangularBinning) <: ProbabilitiesEstimator
+
+A probability estimator based on binning data into rectangular boxes dictated by
+the binning scheme `r` and then computing the frequencies of points in the bins.
+
+See also: [`RectangularBinning`](@ref).
+"""
+struct VisitationFrequency{RB<:RectangularBinning} <: BinningProbabilitiesEstimator
+    binning::RB
+end
+
+function probabilities(x::AbstractDataset, est::VisitationFrequency)
+    _non0hist(x, est.binning)[1]
+end
diff --git a/src/binning_based/rectangular/binning_schemes.jl b/src/probabilities_estimators/rectangular_binning/binning_schemes.jl
similarity index 89%
rename from src/binning_based/rectangular/binning_schemes.jl
rename to src/probabilities_estimators/rectangular_binning/binning_schemes.jl
index 9286db954..afa30ecad 100644
--- a/src/binning_based/rectangular/binning_schemes.jl
+++ b/src/probabilities_estimators/rectangular_binning/binning_schemes.jl
@@ -3,57 +3,57 @@ export RectangularBinning
 import DelayEmbeddings: Dataset, minima, maxima
 import StaticArrays: SVector, MVector
 
-""" 
+"""
     BinningScheme
 
-The supertype of all binning schemes in the CausalityTools ecosystem. 
+The supertype of all binning schemes in the CausalityTools ecosystem.
 """
 abstract type BinningScheme end
 
-""" 
+"""
     RectangularBinningScheme <: BinningScheme
 
 The supertype of all rectangular binning schemes in the CausalityTools ecosystem.
-""" 
+"""
 abstract type RectangularBinningScheme <: BinningScheme end
 
 """
-    RectangularBinning(ϵ) <: RectangularBinningScheme
-    
-Instructions for creating a rectangular box partition using the binning scheme `ϵ`. 
+    RectangularBinning(ϵ) <: BinningScheme
+
+Instructions for creating a rectangular box partition using the binning scheme `ϵ`.
 Binning instructions are deduced from the type of `ϵ`.
 
 
-Rectangular binnings may be automatically adjusted to the data in which the `RectangularBinning` 
+Rectangular binnings may be automatically adjusted to the data in which the `RectangularBinning`
 is applied, as follows:
 
-1. `ϵ::Int` divides each coordinate axis into `ϵ` equal-length intervals, 
+1. `ϵ::Int` divides each coordinate axis into `ϵ` equal-length intervals,
     extending the upper bound 1/100th of a bin size to ensure all points are covered.
 
-2. `ϵ::Float64` divides each coordinate axis into intervals of fixed size `ϵ`, starting 
+2. `ϵ::Float64` divides each coordinate axis into intervals of fixed size `ϵ`, starting
     from the axis minima until the data is completely covered by boxes.
 
-3. `ϵ::Vector{Int}` divides the i-th coordinate axis into `ϵ[i]` equal-length 
-    intervals, extending the upper bound 1/100th of a bin size to ensure all points are 
+3. `ϵ::Vector{Int}` divides the i-th coordinate axis into `ϵ[i]` equal-length
+    intervals, extending the upper bound 1/100th of a bin size to ensure all points are
     covered.
 
-4. `ϵ::Vector{Float64}` divides the i-th coordinate axis into intervals of fixed size `ϵ[i]`, starting 
+4. `ϵ::Vector{Float64}` divides the i-th coordinate axis into intervals of fixed size `ϵ[i]`, starting
     from the axis minima until the data is completely covered by boxes.
 
-Rectangular binnings may also be specified on arbitrary min-max ranges. 
+Rectangular binnings may also be specified on arbitrary min-max ranges.
 
-5. `ϵ::Tuple{Vector{Tuple{Float64,Float64}},Int64}` creates intervals 
-    along each coordinate axis from ranges indicated by a vector of `(min, max)` tuples, then divides 
-    each coordinate axis into an integer number of equal-length intervals. *Note: this does not ensure 
+5. `ϵ::Tuple{Vector{Tuple{Float64,Float64}},Int64}` creates intervals
+    along each coordinate axis from ranges indicated by a vector of `(min, max)` tuples, then divides
+    each coordinate axis into an integer number of equal-length intervals. *Note: this does not ensure
     that all points are covered by the data (points outside the binning are ignored)*.
 
 
 # Example 1: Grid deduced automatically from data (partition guaranteed to cover data points)
 
-## Flexible box sizes 
+## Flexible box sizes
 
-The following binning specification finds the minima/maxima along each coordinate axis, then 
-split each of those data ranges (with some tiny padding on the edges) into `10` equal-length 
+The following binning specification finds the minima/maxima along each coordinate axis, then
+split each of those data ranges (with some tiny padding on the edges) into `10` equal-length
 intervals. This gives (hyper-)rectangular boxes, and works for data of any dimension.
 
 ```julia
@@ -64,9 +64,9 @@ RectangularBinning(10)
 Now, assume the data consists of 2-dimensional points, and that we want a finer grid along
 one of the dimensions than over the other dimension.
 
-The following binning specification finds the minima/maxima along each coordinate axis, then 
-splits the range along the first coordinate axis (with some tiny padding on the edges) 
-into `10` equal-length intervals, and the range along the second coordinate axis (with some 
+The following binning specification finds the minima/maxima along each coordinate axis, then
+splits the range along the first coordinate axis (with some tiny padding on the edges)
+into `10` equal-length intervals, and the range along the second coordinate axis (with some
 tiny padding on the edges) into `5` equal-length intervals.
 This gives (hyper-)rectangular boxes.
 
@@ -75,11 +75,11 @@ using Entropies
 RectangularBinning([10, 5])
 ```
 
-## Fixed box sizes 
+## Fixed box sizes
 
-The following binning specification finds the minima/maxima along each coordinate axis, 
-then split the axis ranges into equal-length intervals of fixed size `0.5` until the all data 
-points are covered by boxes. This approach yields (hyper-)cubic boxes, and works for 
+The following binning specification finds the minima/maxima along each coordinate axis,
+then split the axis ranges into equal-length intervals of fixed size `0.5` until the all data
+points are covered by boxes. This approach yields (hyper-)cubic boxes, and works for
 data of any dimension.
 
 ```julia
@@ -92,25 +92,25 @@ one of the dimensions than over the other dimension.
 
 The following binning specification finds the minima/maxima along each coordinate axis, then splits
 the range along the first coordinate axis into equal-length intervals of size `0.3`,
-and the range along the second axis into equal-length intervals of size `0.1` (in both cases, 
+and the range along the second axis into equal-length intervals of size `0.1` (in both cases,
 making sure the data are completely covered by the boxes).
-This approach gives a (hyper-)rectangular boxes. 
+This approach gives a (hyper-)rectangular boxes.
 
 ```julia
 using Entropies
 RectangularBinning([0.3, 0.1])
 ```
 
-# Example 2: Custom grids (partition not guaranteed to cover data points): 
+# Example 2: Custom grids (partition not guaranteed to cover data points):
 
-Assume the data consists of 3-dimensional points `(x, y, z)`, and that we want a grid 
+Assume the data consists of 3-dimensional points `(x, y, z)`, and that we want a grid
 that is fixed over the intervals `[x₁, x₂]` for the first dimension, over `[y₁, y₂]`
 for the second dimension, and over `[z₁, z₂]` for the third dimension. We when want to
-split each of those ranges into 4 equal-length pieces. *Beware: some points may fall 
-outside the partition if the intervals are not chosen properly (these points are 
-simply discarded)*. 
+split each of those ranges into 4 equal-length pieces. *Beware: some points may fall
+outside the partition if the intervals are not chosen properly (these points are
+simply discarded)*.
 
-The following binning specification produces the desired (hyper-)rectangular boxes. 
+The following binning specification produces the desired (hyper-)rectangular boxes.
 
 ```julia
 using Entropies, DelayEmbeddings
@@ -179,14 +179,14 @@ end
 
 
 """
-    get_minima_and_edgelengths(points, 
+    get_minima_and_edgelengths(points,
         binning_scheme::RectangularBinning) → (Vector{Float}, Vector{Float})
 
 Find the minima along each axis of the embedding, and computes appropriate
-edge lengths given a rectangular `binning_scheme`, which provide instructions on how to 
+edge lengths given a rectangular `binning_scheme`, which provide instructions on how to
 grid the space. Assumes the input is a vector of points.
 
-See documentation for [`RectangularBinning`](@ref) for details on the 
+See documentation for [`RectangularBinning`](@ref) for details on the
 binning scheme.
 
 # Example
@@ -203,14 +203,14 @@ get_minima_and_edgelengths(pts, RectangularBinning(10))
 get_minima_and_edgelengths(pts, RectangularBinning([10, 8, 5, 4]))
 ```
 
-## Example 2: Custom grids (partition not guaranteed to cover data points): 
+## Example 2: Custom grids (partition not guaranteed to cover data points):
 
 ```julia
 using Entropies, DelayEmbeddings
 
-# Here, we split the first, second and third coordinate axes in 
+# Here, we split the first, second and third coordinate axes in
 # five equal-length pieces, defined over the intervals [a, b],
-# [c, d], and [e, f], respectively (beware: some points may fall 
+# [c, d], and [e, f], respectively (beware: some points may fall
 # outside the partition if the intervals are not chosen properly).
 D = Dataset(rand(100, 3));
 a, b = 0.5, 1 # not completely covering the data, which are on [0, 1]
@@ -229,13 +229,13 @@ function get_minima_and_edgelengths(points, binning_scheme::RectangularBinning)
 
     axisminima = minimum.([minimum.([pt[i] for pt in points]) for i = 1:D])
     axismaxima = maximum.([maximum.([pt[i] for pt in points]) for i = 1:D])
-    
+
     edgelengths = Vector{Float64}(undef, D)
-    
+
     # Dictated by data ranges
     if ϵ isa Float64
         edgelengths = [ϵ for i in 1:D]
-    elseif ϵ isa Vector{<:AbstractFloat}    
+    elseif ϵ isa Vector{<:AbstractFloat}
         edgelengths .= ϵ
     elseif ϵ isa Int
         edgeslengths_nonadjusted = (axismaxima  - axisminima) / ϵ
@@ -243,13 +243,13 @@ function get_minima_and_edgelengths(points, binning_scheme::RectangularBinning)
     elseif ϵ isa Vector{Int}
         edgeslengths_nonadjusted = (axismaxima  .- axisminima) ./ ϵ
         edgelengths = ((axismaxima .+ (edgeslengths_nonadjusted ./ 100)) .- axisminima) ./ ϵ
-    
+
     # Custom data ranges
     elseif ϵ isa Tuple{Vector{Tuple{T1, T2}},Int64} where {T1 <: Real, T2 <: Real}
         ranges = ϵ[1]
         n_bins = ϵ[2]
         length(ranges) == D || error("Tried to apply a $(length(ranges))-dimensional binning scheme to $(D)-dimensional data. Please provide ranges for all $(D) dimensions.")
-        
+
         # We have predefined axis minima and axis maxima.
         stepsizes = zeros(Float64, D)
         edgelengths = zeros(Float64, D)
@@ -259,7 +259,7 @@ function get_minima_and_edgelengths(points, binning_scheme::RectangularBinning)
             axisminima[i] = minimum(ranges[i])
         end
     else
-       error("get_minima_and_edgelengths not implemented for RectangularBinning(ϵ) with ϵ of type $(typeof(ϵ))") 
+       error("get_minima_and_edgelengths not implemented for RectangularBinning(ϵ) with ϵ of type $(typeof(ϵ))")
     end
 
     axisminima, edgelengths
@@ -268,10 +268,10 @@ end
 """
     get_edgelengths(pts, binning_scheme::RectangularBinning) → Vector{Float}
 
-Return the box edge length along each axis resulting from discretizing `pts` on a 
+Return the box edge length along each axis resulting from discretizing `pts` on a
 rectangular grid specified by `binning_scheme`.
 
-# Example 
+# Example
 
 ```julia
 using Entropies, DelayEmbeddings
diff --git a/src/binning_based/rectangular/count_box_visits.jl b/src/probabilities_estimators/rectangular_binning/count_box_visits.jl
similarity index 92%
rename from src/binning_based/rectangular/count_box_visits.jl
rename to src/probabilities_estimators/rectangular_binning/count_box_visits.jl
index b81aec5d6..55830849b 100644
--- a/src/binning_based/rectangular/count_box_visits.jl
+++ b/src/probabilities_estimators/rectangular_binning/count_box_visits.jl
@@ -5,14 +5,14 @@ import StaticArrays: SVector, MVector
     encode_as_bin(point, reference_point, edgelengths) → Vector{Int}
 
 Encode a point into its integer bin labels relative to some `reference_point`
-(always counting from lowest to highest magnitudes), given a set of box 
-`edgelengths` (one for each axis). The first bin on the positive side of 
-the reference point is indexed with 0, and the first bin on the negative 
+(always counting from lowest to highest magnitudes), given a set of box
+`edgelengths` (one for each axis). The first bin on the positive side of
+the reference point is indexed with 0, and the first bin on the negative
 side of the reference point is indexed with -1.
 
 See also: [`joint_visits`](@ref), [`marginal_visits`](@ref).
 
-## Example 
+## Example
 
 ```julia
 using Entropies
@@ -22,8 +22,10 @@ steps = [0.2, 0.2, 0.3]
 encode_as_bin(rand(3), refpoint, steps)
 ```
 """
-function encode_as_bin end 
+function encode_as_bin end
 
+# TODO: All of these methods have exactly the same source code, why not just use Union?
+# Or literally use AbstractVector{T}...
 function encode_as_bin(point::Vector{T}, reference_point, edgelengths) where {T <: Real}
     floor.(Int, (point .- reference_point) ./ edgelengths)
 end
@@ -49,16 +51,16 @@ end
     joint_visits(points, binning_scheme::RectangularBinning) → Vector{Vector{Int}}
 
 Determine which bins are visited by `points` given the rectangular binning
-scheme `ϵ`. Bins are referenced relative to the axis minima, and are 
+scheme `ϵ`. Bins are referenced relative to the axis minima, and are
 encoded as integers, such that each box in the binning is assigned a
-unique integer array (one element for each dimension). 
+unique integer array (one element for each dimension).
 
-For example, if a bin is visited three times, then the corresponding 
+For example, if a bin is visited three times, then the corresponding
 integer array will appear three times in the array returned.
 
 See also: [`marginal_visits`](@ref), [`encode_as_bin`](@ref).
 
-# Example 
+# Example
 
 ```julia
 using DelayEmbeddings, Entropies
@@ -77,12 +79,12 @@ end
     marginal_visits(points, binning_scheme::RectangularBinning, dims) → Vector{Vector{Int}}
 
 Determine which bins are visited by `points` given the rectangular binning
-scheme `ϵ`, but only along the desired dimensions `dims`. Bins are referenced 
-relative to the axis minima, and are encoded as integers, such that each box 
-in the binning is assigned a unique integer array (one element for each 
-dimension in `dims`). 
+scheme `ϵ`, but only along the desired dimensions `dims`. Bins are referenced
+relative to the axis minima, and are encoded as integers, such that each box
+in the binning is assigned a unique integer array (one element for each
+dimension in `dims`).
 
-For example, if a bin is visited three times, then the corresponding 
+For example, if a bin is visited three times, then the corresponding
 integer array will appear three times in the array returned.
 
 See also: [`joint_visits`](@ref), [`encode_as_bin`](@ref).
@@ -117,13 +119,13 @@ end
 """
     marginal_visits(joint_visits, dims) → Vector{Vector{Int}}
 
-If joint visits have been precomputed using [`joint_visits`](@ref), marginal 
-visits can be returned directly without providing the binning again 
+If joint visits have been precomputed using [`joint_visits`](@ref), marginal
+visits can be returned directly without providing the binning again
 using the `marginal_visits(joint_visits, dims)` signature.
 
 See also: [`joint_visits`](@ref), [`encode_as_bin`](@ref).
 
-# Example 
+# Example
 
 ```
 using DelayEmbeddings, Entropies
diff --git a/src/binning_based/rectangular/histogram_estimation.jl b/src/probabilities_estimators/rectangular_binning/histogram_estimation.jl
similarity index 82%
rename from src/binning_based/rectangular/histogram_estimation.jl
rename to src/probabilities_estimators/rectangular_binning/histogram_estimation.jl
index cd5a09200..4b6916347 100644
--- a/src/binning_based/rectangular/histogram_estimation.jl
+++ b/src/probabilities_estimators/rectangular_binning/histogram_estimation.jl
@@ -133,3 +133,44 @@ function binhist(data, ε)
     b = [β .* ε .+ mini for β in bins]
     return hist, b
 end
+
+# Frequencies are needed elsewhere in the package too, so keep in its own method.
+function _non0hist_frequencies(x)
+    L = length(x)
+
+    hist = Vector{Float64}()
+    # Reserve enough space for histogram:
+    sizehint!(hist, L)
+
+    sx = sort(x, alg = QuickSort)
+
+    # Fill the histogram by counting consecutive equal values:
+    prev_val, count = sx[1], 0
+    for val in sx
+        if val == prev_val
+            count += 1
+        else
+            push!(hist, count)
+            prev_val = val
+            count = 1
+        end
+    end
+    push!(hist, count)
+
+    # Shrink histogram capacity to fit its size:
+    sizehint!(hist, length(hist))
+    return hist
+end
+
+function _non0hist(x, N)
+    hist = _non0hist_frequencies(x)
+    return Probabilities(hist ./ N)
+end
+
+function _non0hist(x)
+    return _non0hist(x, length(x))
+end
+
+_non0hist(x::AbstractDataset) = _non0hist(x.data)
+_non0hist(x::AbstractDataset, N) = _non0hist(x.data, N)
+probabilities(x::AbstractDataset) = _non0hist(x.data)
diff --git a/src/probabilities_estimators/rectangular_binning/rectangular_binning.jl b/src/probabilities_estimators/rectangular_binning/rectangular_binning.jl
new file mode 100644
index 000000000..1c58c9b50
--- /dev/null
+++ b/src/probabilities_estimators/rectangular_binning/rectangular_binning.jl
@@ -0,0 +1,4 @@
+include("binning_schemes.jl")
+include("count_box_visits.jl")
+include("histogram_estimation.jl")
+include("VisitationFrequency.jl")
\ No newline at end of file
diff --git a/src/timescales/timescales.jl b/src/probabilities_estimators/timescales/timescales.jl
similarity index 100%
rename from src/timescales/timescales.jl
rename to src/probabilities_estimators/timescales/timescales.jl
diff --git a/src/timescales/wavelet_overlap.jl b/src/probabilities_estimators/timescales/wavelet_overlap.jl
similarity index 100%
rename from src/timescales/wavelet_overlap.jl
rename to src/probabilities_estimators/timescales/wavelet_overlap.jl
diff --git a/src/binning_based/rectangular/GroupSlices.jl b/src/probabilities_estimators/transfer_operator/GroupSlices.jl
similarity index 100%
rename from src/binning_based/rectangular/GroupSlices.jl
rename to src/probabilities_estimators/transfer_operator/GroupSlices.jl
diff --git a/src/binning_based/rectangular/TransferOperator.jl b/src/probabilities_estimators/transfer_operator/transfer_operator.jl
similarity index 99%
rename from src/binning_based/rectangular/TransferOperator.jl
rename to src/probabilities_estimators/transfer_operator/transfer_operator.jl
index 20291b3af..500639465 100644
--- a/src/binning_based/rectangular/TransferOperator.jl
+++ b/src/probabilities_estimators/transfer_operator/transfer_operator.jl
@@ -1,3 +1,4 @@
+# TODO: This needs its own docpage I feel.
 using DelayEmbeddings, SparseArrays
 include("GroupSlices.jl")
 

From b7b2758e2053c3cda976d53293cd83940a569bf0 Mon Sep 17 00:00:00 2001
From: Datseris <datseris.george@gmail.com>
Date: Sat, 10 Sep 2022 15:03:17 +0100
Subject: [PATCH 23/32] correct includes state

---
 src/Entropies.jl                                      | 11 +----------
 src/entropies/entropies.jl                            |  2 +-
 .../probabilities_estimators.jl                       |  2 +-
 3 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/src/Entropies.jl b/src/Entropies.jl
index ce7822ac7..6a1eeec33 100644
--- a/src/Entropies.jl
+++ b/src/Entropies.jl
@@ -13,18 +13,9 @@ export AbstractDataset, Dataset
 const Array_or_Dataset = Union{<:AbstractArray, <:AbstractDataset}
 
 include("probabilities.jl")
+include("probabilities_estimators/probabilities_estimators.jl")
 include("entropies/entropies.jl")
 include("symbolization/symbolize.jl")
-include("entropies/entropies.jl")
-include("histogram_estimation.jl")
-include("counting_based/CountOccurrences.jl")
-include("symbolic/symbolic.jl")
-include("binning_based/rectangular/rectangular_estimators.jl")
-include("kerneldensity/kerneldensity.jl")
-include("timescales/timescales.jl")
-include("nearest_neighbors/nearest_neighbors.jl")
-include("dispersion/entropy_dispersion.jl")
-include("tsallis/tsallis.jl")
 include("deprecations.jl")
 
 
diff --git a/src/entropies/entropies.jl b/src/entropies/entropies.jl
index 065c3beed..291bf6ed9 100644
--- a/src/entropies/entropies.jl
+++ b/src/entropies/entropies.jl
@@ -3,4 +3,4 @@ include("tsallis.jl")
 include("shannon.jl")
 include("convenience_definitions.jl")
 include("direct_entropies/nearest_neighbors/nearest_neighbors.jl")
-# TODO: What else is included here?
\ No newline at end of file
+# TODO: What else is included here from direct entropies?
\ No newline at end of file
diff --git a/src/probabilities_estimators/probabilities_estimators.jl b/src/probabilities_estimators/probabilities_estimators.jl
index 6183aa38b..ba54fda28 100644
--- a/src/probabilities_estimators/probabilities_estimators.jl
+++ b/src/probabilities_estimators/probabilities_estimators.jl
@@ -3,4 +3,4 @@ include("rectangular_binning/rectangular_binning.jl")
 include("permutation_ordinal/symbolic.jl")
 include("kernel_density.jl")
 include("timescales/timescales.jl")
-include("transfer_operator.jl")
\ No newline at end of file
+include("transfer_operator/transfer_operator.jl")
\ No newline at end of file

From fd97c391c68634831400a824b631d940c5391890 Mon Sep 17 00:00:00 2001
From: Datseris <datseris.george@gmail.com>
Date: Sat, 10 Sep 2022 16:22:50 +0100
Subject: [PATCH 24/32] finish first draft of docs new way (many TODOs left_)

---
 docs/Project.toml                             |  2 +-
 docs/make.jl                                  |  7 +-
 docs/src/entropies.md                         |  3 +-
 .../src/{shannon_entropies.md => examples.md} | 73 ++++++-------------
 docs/src/index.md                             |  2 +-
 docs/toc.jl                                   |  4 +-
 src/Entropies.jl                              |  4 +-
 src/deprecations.jl                           |  2 -
 src/entropies/convenience_definitions.jl      |  4 +
 src/entropies/shannon.jl                      |  4 +
 src/entropies/tsallis.jl                      | 38 ++++++----
 11 files changed, 68 insertions(+), 75 deletions(-)
 rename docs/src/{shannon_entropies.md => examples.md} (78%)

diff --git a/docs/Project.toml b/docs/Project.toml
index 481e4ba8d..c323f2815 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -5,7 +5,7 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8"
 DynamicalSystems = "61744808-ddfa-5f27-97ff-6e42cc95d634"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+Entropies = "ed8fcbec-b94c-44b6-89df-898894ad9591"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
-Wavelets = "29a6e085-ba6d-5f35-a997-948ac2efa89a"
diff --git a/docs/make.jl b/docs/make.jl
index 3958ea86b..8507baeab 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -11,8 +11,10 @@ using CairoMakie
 using Entropies.Wavelets
 using DynamicalSystems
 
-# %% JuliaDynamics theme.
-# download the themes
+# %% JuliaDynamics theme
+# It includes themeing for the HTML build
+# and themeing for the Makie plotting
+
 using DocumenterTools: Themes
 for file in ("juliadynamics-lightdefs.scss", "juliadynamics-darkdefs.scss", "juliadynamics-style.scss")
     filepath = joinpath(@__DIR__, file)
@@ -34,7 +36,6 @@ Themes.compile(joinpath(@__DIR__, "juliadynamics-dark.scss"), joinpath(@__DIR__,
 ENV["JULIA_DEBUG"] = "Documenter"
 
 PAGES = include("toc.jl")
-
 include("style.jl")
 
 makedocs(
diff --git a/docs/src/entropies.md b/docs/src/entropies.md
index 8f721e146..c643a8b87 100644
--- a/docs/src/entropies.md
+++ b/docs/src/entropies.md
@@ -27,9 +27,10 @@ entropy_kozachenkoleonenko
 ```
 
 ## Convenience functions
-In this subsection we expand documentation strings of "entropy names" that are used commonly in the literature, such as "permutation entropy". As we made clear in [API & terminology](@ref), these are just the existing Shannon/Rényi/Tsallis entropy with a particularly chosen probability estimator.
+In this subsection we expand documentation strings of "entropy names" that are used commonly in the literature, such as "permutation entropy". As we made clear in [API & terminology](@ref), these are just the existing Shannon/Rényi/Tsallis entropy with a particularly chosen probability estimator. We have only defined convenience functions for the most used names, and arbitrary more specialized convenience functions can be easily defined in a couple lines of code.
 ```@docs
 entropy_permutation
+entropy_spatial_permutation
 entropy_wavelet
 entropy_dispersion
 ```
\ No newline at end of file
diff --git a/docs/src/shannon_entropies.md b/docs/src/examples.md
similarity index 78%
rename from docs/src/shannon_entropies.md
rename to docs/src/examples.md
index 64f7b732b..b60412367 100644
--- a/docs/src/shannon_entropies.md
+++ b/docs/src/examples.md
@@ -1,18 +1,9 @@
-# [Shannon entropies](@id shannon_entropies)
+# Examples
 
-Many methods in the literature compute (Shannon) entropy in ways that don't explicitly result in probability distributions, so they can't be used in combination with [`probabilities`](@ref), [`entropy_renyi`](@ref) or [`entropy_tsallis`](@ref). Instead, they appear here as stand-alone functions.
-
-## Nearest neighbors entropy
-
-```@docs
-entropy_kraskov
-entropy_kozachenkoleonenko
-```
-
-### Example
+## Nearest neighbor direct entropy example
 
 This example reproduces Figure in Charzyńska & Gambin (2016)[^Charzyńska2016]. Both
-estimators nicely converge to the true entropy with increasing time series length.
+estimators nicely converge to the "true" entropy with increasing time series length.
 For a uniform 1D distribution ``U(0, 1)``, the true entropy is `0`.
 
 ```@example MAIN
@@ -29,9 +20,9 @@ for N in Ns
     kr = Float64[]
     for i = 1:nreps
         pts = Dataset([rand(Uniform(0, 1), 1) for i = 1:N]);
-        
+
         push!(kl, entropy_kozachenkoleonenko(pts, w = 0, k = 1))
-        # with k = 1, Kraskov is virtually identical to 
+        # with k = 1, Kraskov is virtually identical to
         # Kozachenko-Leonenko, so pick a higher number of neighbors
         push!(kr, entropy_kraskov(pts, w = 0, k = 3))
     end
@@ -47,7 +38,7 @@ color = (Main.COLORS[1], 0.5))
 
 ay = Axis(fig[2,1]; xlabel = "time step", ylabel = "entropy (nats)", title = "Kraskov")
 lines!(ay, Ns, mean.(Ekr); color = Cycled(2))
-band!(ay, Ns, mean.(Ekr) .+ std.(Ekr), mean.(Ekr) .- std.(Ekr); 
+band!(ay, Ns, mean.(Ekr) .+ std.(Ekr), mean.(Ekr) .- std.(Ekr);
 color = (Main.COLORS[2], 0.5))
 
 fig
@@ -55,17 +46,7 @@ fig
 
 [^Charzyńska2016]: Charzyńska, A., & Gambin, A. (2016). Improvement of the k-NN entropy estimator with applications in systems biology. Entropy, 18(1), 13.
 
-## Permutation entropy
-
-```@docs
-entropy_perm
-entropy_weightedperm
-entropy_ampperm
-entropy_spatialperm
-```
-
-### Example
-
+## Permutation entropy example
 This example reproduces an example from Bandt and Pompe (2002), where the permutation
 entropy is compared with the largest Lyapunov exponents from time series of the chaotic
 logistic map. Entropy estimates using [`SymbolicWeightedPermutation`](@ref)
@@ -115,26 +96,27 @@ end
 fig
 ```
 
-## Dispersion entropy
-
-```@docs
-entropy_dispersion
-```
-
-## Kernel entropy
 
-```@docs
-entropy_kernel
-```
+## Kernel density example
+Here, we draw some random points from a 2D normal distribution. Then, we use kernel density estimation to associate a probability to each point `p`, measured by how many points are within radius `1.5` of `p`. Plotting the actual points, along with their associated probabilities estimated by the KDE procedure, we get the following surface plot.
 
-## Wavelet entropy
-
-```@docs
-entropy_wavelet
+```@example MAIN
+using DynamicalSystems, CairoMakie, Distributions
+𝒩 = MvNormal([1, -4], 2)
+N = 500
+D = Dataset(sort([rand(𝒩) for i = 1:N]))
+x, y = columns(D)
+p = probabilities(D, NaiveKernel(1.5))
+fig, ax = scatter(D[:, 1], D[:, 2], zeros(N);
+    markersize=8, axis=(type = Axis3,)
+)
+surface!(ax, x, y, p.p)
+ax.zlabel = "P"
+ax.zticklabelsvisible = false
+fig
 ```
 
-### Example
-
+## Wavelet entropy example
 The scale-resolved wavelet entropy should be lower for very regular signals (most of the
 energy is contained at one scale) and higher for very irregular signals (energy spread
 more out across scales).
@@ -163,10 +145,3 @@ for a in (ax, ay, az); axislegend(a); end
 for a in (ax, ay); hidexdecorations!(a; grid=false); end
 fig
 ```
-
-## Binning-based entropy
-
-```@docs
-entropy_visitfreq
-entropy_transferoperator
-```
diff --git a/docs/src/index.md b/docs/src/index.md
index 7b2437b26..0ade3491b 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -16,7 +16,7 @@ The API and documentation of Entropies.jl aim to clarify some aspects and
 provide a simple way to obtain probabilities, entropies, or other complexity measures.
 
 ### Probabilities
-Entropies and other complexity measures are computed based on _probability distributions_.
+Entropies and other complexity measures are typically computed based on _probability distributions_.
 These are obtained from [Input data](@ref) by a plethora of different ways.
 The central API function that returns a probability distribution (actual, just a vector of probabilities) is [`probabilities`](@ref), which takes in a subtype of [`ProbabilityEstimator`](@ref) to specify how the probabilities are computed.
 All estimators available in Entropies.jl can be found in the [estimators page](@ref estimators).
diff --git a/docs/toc.jl b/docs/toc.jl
index 75e369786..89adeb08f 100644
--- a/docs/toc.jl
+++ b/docs/toc.jl
@@ -1,8 +1,8 @@
 ENTROPIES_PAGES = [
     "Entropies.jl" => "index.md",
     "Probabilities" => "probabilities.md",
-    "Generalized entropy" => "generalized_entropies.md",
-    "Shannon entropy" => "shannon_entropies.md",
+    "Entropies" => "entropies.md",
     "Complexity measures" => "complexity_measures.md",
+    "Entropies.jl examples" => "examples.md",
     "Utility methods" => "utils.md",
 ]
\ No newline at end of file
diff --git a/src/Entropies.jl b/src/Entropies.jl
index 6a1eeec33..18e4b2625 100644
--- a/src/Entropies.jl
+++ b/src/Entropies.jl
@@ -26,8 +26,8 @@ version_number = "1.3.0"
 update_name = "update_v$(version_number)"
 update_message = """
 \nUpdate message: Entropies v$(version_number)\n
-- An overall overhaul of the documentation and API of Entropies has been performed.
-  Now `genentropy` is just `entropy_renyi`. The documentation further clarifies
+- An overall overhaul of the documentation and API of Entropies.jl has been performed.
+  Now `genentropy` is `entropy_renyi`. The documentation further clarifies
   the difference between calculating probabilities and entropies.
 - A huge amount of new content has been added to Entropies, which is best seen
   by visiting the changelog or the new documentation.
diff --git a/src/deprecations.jl b/src/deprecations.jl
index 88ac5264a..1e3cdf246 100644
--- a/src/deprecations.jl
+++ b/src/deprecations.jl
@@ -1,5 +1,3 @@
-include("deprecated/symbolization.jl")
-
 @deprecate TimeScaleMODWT WaveletOverlap
 @deprecate genentropy entropy_renyi
 
diff --git a/src/entropies/convenience_definitions.jl b/src/entropies/convenience_definitions.jl
index e24caabdc..01400d28a 100644
--- a/src/entropies/convenience_definitions.jl
+++ b/src/entropies/convenience_definitions.jl
@@ -6,6 +6,10 @@ function entropy_permutation(args...)
 
 end
 
+function entropy_spatial_permutation(args...)
+
+end
+
 function entropy_wavelet(args...)
 
 end
diff --git a/src/entropies/shannon.jl b/src/entropies/shannon.jl
index 046e2e4d0..b1a143aae 100644
--- a/src/entropies/shannon.jl
+++ b/src/entropies/shannon.jl
@@ -3,5 +3,9 @@ export entropy_shannon
 """
     entropy_shannon(args...; base = MathConstants.e)
 Equivalent with `entropy_renyi(args...; base, q = 1)` and provided solely for convenience.
+Compute the Shannon entropy, given by
+```math
+H(p) = - \\sum_i p[i] \\log(p[i])
+```
 """
 entropy_shannon(args...; base = MathConstants.e) = entropy_renyi(args...; base, q = 1)
diff --git a/src/entropies/tsallis.jl b/src/entropies/tsallis.jl
index 0d5fb4b27..51c7765cd 100644
--- a/src/entropies/tsallis.jl
+++ b/src/entropies/tsallis.jl
@@ -1,26 +1,36 @@
 export entropy_tsallis
 
 """
-    entropy_tsallis(x::Probabilities; k = 1, q = 0)
+    entropy_tsallis(p::Probabilities; k = 1, q = 0)
+
 
 Compute the Tsallis entropy of `x` (Tsallis, 1998)[^Tsallis1988].
 
+    entropy_tsallis(x::Array_or_Dataset, est; k = 1, q = 0)
+
+A convenience syntax, which calls first `probabilities(x, est)`
+and then calculates the entropy of the result (and thus `est` can be anything
+the [`probabilities`](@ref) function accepts).
+
+## Description
+The Tsallis entropy is a generalization of the Boltzmann–Gibbs entropy,
+with `k` standing for the Boltzmann constant. It is defined as
 ```math
-S_q(\\bf p) = \\dfrac{k}{q - 1}\\left(1 - \\sum_{p_i \\in \\bf p} p_i^q\\right)
+S_q(p) = \\frac{k}{q - 1}\\left(1 - \\sum_{i} p[i]^q\\right)
 ```
 
-[^Tsallis1988]: Tsallis, C. (1988). Possible generalization of Boltzmann-Gibbs statistics. Journal of statistical physics, 52(1), 479-487.
+[^Tsallis1988]:
+    Tsallis, C. (1988). Possible generalization of Boltzmann-Gibbs statistics.
+    Journal of statistical physics, 52(1), 479-487.
 """
 function entropy_tsallis(prob::Probabilities; k = 1, q = 0)
-    haszero = any(iszero, prob)
-    p = if haszero
-        i0 = findall(iszero, prob.p)
-        # As for entropy_renyi, we copy because if someone initialized Probabilities with 0s,
-        # they'd probably want to index the zeros as well.
-        deleteat!(copy(prob.p), i0)
-    else
-        prob.p
-    end
-
-    return k/(q-1)*(1 - sum(prob .^ q))
+    # As for entropy_renyi, we copy because if someone initialized Probabilities with 0s,
+    # they'd probably want to index the zeros as well.
+    probs = Iterators.filter(!iszero, prob.p)
+    return k/(q-1)*(1 - sum(p^q for p in probs))
 end
+
+function entropy_tsallis(x, est; kwargs...)
+    p = probabilities(x, est)
+    entropy_tsallis(p; kwargs...)
+end
\ No newline at end of file

From c9084cf5c50bbc6d5362b4b3ac9c49b5aa09ed38 Mon Sep 17 00:00:00 2001
From: Datseris <datseris.george@gmail.com>
Date: Sat, 10 Sep 2022 16:30:30 +0100
Subject: [PATCH 25/32] Fix wavelet entropy docs

---
 src/entropies/convenience_definitions.jl      | 15 ++++++++++++--
 .../timescales/timescales.jl                  | 20 -------------------
 .../timescales/wavelet_overlap.jl             |  4 ++--
 3 files changed, 15 insertions(+), 24 deletions(-)

diff --git a/src/entropies/convenience_definitions.jl b/src/entropies/convenience_definitions.jl
index 01400d28a..cde5889c7 100644
--- a/src/entropies/convenience_definitions.jl
+++ b/src/entropies/convenience_definitions.jl
@@ -10,8 +10,19 @@ function entropy_spatial_permutation(args...)
 
 end
 
-function entropy_wavelet(args...)
-
+"""
+    entropy_wavelet(x; wavelet = Wavelets.WT.Daubechies{12}(), base = MathConstants.e)
+
+Compute the wavelet entropy. This function is just a convenience call to:
+```julia
+est = WaveletOverlap(wavelet)
+entropy_renyi(x, est; base, q = 1)
+```
+See [`WaveletOverlap`](@ref) for more.
+"""
+function entropy_wavelet(x; wavelet = Wavelets.WT.Daubechies{12}(), base = MathConstants.e)
+    est = WaveletOverlap(wavelet)
+    entropy_renyi(x, est; base, q = 1)
 end
 
 function entropy_dispersion(args...)
diff --git a/src/probabilities_estimators/timescales/timescales.jl b/src/probabilities_estimators/timescales/timescales.jl
index 9b9a55cb6..d1c3aee17 100644
--- a/src/probabilities_estimators/timescales/timescales.jl
+++ b/src/probabilities_estimators/timescales/timescales.jl
@@ -1,21 +1 @@
-export entropy_wavelet
-
 include("wavelet_overlap.jl")
-
-"""
-    entropy_wavelet(x; wavelet = Wavelets.WT.Daubechies{12}(),
-        base = MathConstants.e)
-
-Estimate (Shannon) wave entropy (Rosso, 2001) to the given `base` using maximal overlap
-discrete wavelet transform (MODWT).
-
-See also: [`WaveletOverlap`](@ref).
-
-[^Rosso2001]:
-    Rosso et al. (2001). Wavelet entropy: a new tool for analysis of short duration
-    brain electrical signals. Journal of neuroscience methods, 105(1), 65-75.
-"""
-function entropy_wavelet(x; wavelet::W = Wavelets.WT.Daubechies{12}(), base = MathConstants.e) where W <:Wavelets.WT.OrthoWaveletClass
-    est = WaveletOverlap(wavelet)
-    entropy_renyi(x, est; base = base, q = 1)
-end
diff --git a/src/probabilities_estimators/timescales/wavelet_overlap.jl b/src/probabilities_estimators/timescales/wavelet_overlap.jl
index 87c18aa3b..17f047558 100644
--- a/src/probabilities_estimators/timescales/wavelet_overlap.jl
+++ b/src/probabilities_estimators/timescales/wavelet_overlap.jl
@@ -6,8 +6,8 @@ import Wavelets
 
 Apply the maximal overlap discrete wavelet transform (MODWT) to a
 signal, then compute probabilities/entropy from the energies at different
-wavelet scales. This implementation is based on Rosso et
-al. (2001)[^Rosso2001].
+wavelet scales. These probabilities are used to compute the wavelet entropy,
+according to Rosso et al. (2001)[^Rosso2001].
 
 The probability `p[i]` is the relative energy for the `i`-th wavelet scale.
 To obtain a better understanding of what these probabilities mean, we prepared

From ed6f2ac4674d58064d5cc11cc07a1769aa8c0490 Mon Sep 17 00:00:00 2001
From: Datseris <datseris.george@gmail.com>
Date: Sat, 10 Sep 2022 16:44:23 +0100
Subject: [PATCH 26/32] add docstrings to convenience calls

---
 src/entropies/convenience_definitions.jl      | 34 +++++++++++++++++--
 .../SymbolicPermutation.jl                    |  1 +
 .../spatial_permutation.jl                    |  2 +-
 3 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/src/entropies/convenience_definitions.jl b/src/entropies/convenience_definitions.jl
index cde5889c7..b9a69fce6 100644
--- a/src/entropies/convenience_definitions.jl
+++ b/src/entropies/convenience_definitions.jl
@@ -1,13 +1,41 @@
 # This file defines and exports some convenience definition of entropies
 # for commonly used names in the literature. They aren't actually new entropies
 # as discussed extensively in the documentation.
+export entropy_permutation, entropy_spatial_permutation, entropy_wavelet
 
-function entropy_permutation(args...)
+"""
+    entropy_permutation(x; τ = 1, m = 3, base = MathConstants.e)
 
+Compute the permutation entropy of order `m` with delay/lag `τ`.
+This function is just a convenience call to:
+```julia
+est = SymbolicPermutation(; m, τ)
+entropy_renyi(x, est; base, q = 1)
+```
+See [`SymbolicPermutation`](@ref) for more info.
+Similarly, one can use `SymbolicWeightedPermutation` or `SymbolicAmplitudeAwarePermutation`
+for the weighted/amplitude-aware versions.
+"""
+function entropy_permutation(x; τ = 1, m = 3, base = MathConstants.e)
+    est = SymbolicPermutation(; m, τ)
+    entropy_renyi(x, est; base, q = 1)
 end
 
-function entropy_spatial_permutation(args...)
+"""
+    entropy_spatial_permutation(x, stencil, periodic = true; kwargs...)
 
+Compute the spatial permutation entropy of `x` given the `stencil`.
+Here `x` must be a matrix or higher dimensional `Array` containing spatial data.
+This function is just a convenience call to:
+```julia
+est = SpatialSymbolicPermutation(stencil, x, periodic)
+entropy_renyi(x, est; kwargs..., q = 1)
+```
+See [`SpatialSymbolicPermutation`](@ref) for more info, or how to encode stencils.
+"""
+function entropy_spatial_permutation(x, stencil, periodic = true; kwargs...)
+    est = SpatialSymbolicPermutation(stencil, x, periodic)
+    entropy_renyi(x, est; kwargs..., q = 1)
 end
 
 """
@@ -18,7 +46,7 @@ Compute the wavelet entropy. This function is just a convenience call to:
 est = WaveletOverlap(wavelet)
 entropy_renyi(x, est; base, q = 1)
 ```
-See [`WaveletOverlap`](@ref) for more.
+See [`WaveletOverlap`](@ref) for more info.
 """
 function entropy_wavelet(x; wavelet = Wavelets.WT.Daubechies{12}(), base = MathConstants.e)
     est = WaveletOverlap(wavelet)
diff --git a/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl b/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl
index 9ae40b5e8..b77956d46 100644
--- a/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl
+++ b/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl
@@ -13,6 +13,7 @@ abstract type PermutationProbabilityEstimator <: ProbabilitiesEstimator end
 Symbolic, permutation-based probabilities/entropy estimators.
 `m` is the permutation order (or the symbol size or the embedding dimension) and
 `τ` is the delay time (or lag).
+They are used to define the permutation entropies[^BandtPompe2002].
 
 ## Repeated values during symbolization
 
diff --git a/src/probabilities_estimators/permutation_ordinal/spatial_permutation.jl b/src/probabilities_estimators/permutation_ordinal/spatial_permutation.jl
index 605040c61..047808037 100644
--- a/src/probabilities_estimators/permutation_ordinal/spatial_permutation.jl
+++ b/src/probabilities_estimators/permutation_ordinal/spatial_permutation.jl
@@ -24,7 +24,7 @@ The length of the stencil decides the order of the permutation entropy, and the
 within the stencil dictates the order that pixels are compared with.
 The pixel without any offset is always first in the order.
 
-After having defined `est`, one calculates the permutation entropy of ordinal patterns
+After having defined `est`, one calculates the spatial permutation entropy
 by calling [`entropy_renyi`](@ref) with `est`, and with the array data.
 To apply this to timeseries of spatial data, simply loop over the call, e.g.:
 ```julia

From c5acd2fde7869181b3dd1e9e3effe7836aef30ec Mon Sep 17 00:00:00 2001
From: Datseris <datseris.george@gmail.com>
Date: Sat, 10 Sep 2022 17:43:59 +0100
Subject: [PATCH 27/32] final touches

---
 docs/src/index.md                        |  6 ++++--
 src/entropies/convenience_definitions.jl | 10 +++++-----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/docs/src/index.md b/docs/src/index.md
index 0ade3491b..8f96cd1bf 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -34,9 +34,11 @@ Thus, any of the implemented [probabilities estimators](@ref estimators) can be
 
 
 !!! tip "There aren't many entropies, really."
-    A crucial thing to clarify, is that many quantities that are named as entropies (e.g., permutation entropy ([`entropy_permutation`](@ref)), the wavelet entropy [`entropy_wavelet`](@ref), etc.), are _not really new entropies_. They are in fact new probability estimators. They simply devise a new way to calculate probabilities from data, and then plug those probabilities into formal entropy formulas such as the Shannon entropy. While in Entropies.jl we provide convenience functions like [`entropy_wavelet`](@ref), they really aren't anything more than 3-lines-of-code wrappers that call [`entropy_shannon`](@ref) with the appropriate [`ProbabilityEstimator`](@ref).
+    A crucial thing to clarify is that many quantities that are named as entropies (e.g., permutation entropy [`entropy_permutation`](@ref), wavelet entropy [`entropy_wavelet`](@ref), etc.), are _not really new entropies_. They are new probability estimators. They simply devise a new way to calculate probabilities from data, and then plug those probabilities into formal entropy formulas such as the Shannon entropy. The probability estimators are smartly created so that they elegantly highlight important aspects of the data relevant to complexity.
 
-    There are only a few exceptions to this rule, which are quantities that are able to compute Shannon entropies via alternate means, without explicitly computing some probability distributions, such as [TODO ADD EXAMPLE].
+    While in Entropies.jl we provide convenience functions like [`entropy_wavelet`](@ref), they really aren't anything more than 2-lines-of-code wrappers that call [`entropy_shannon`](@ref) with the appropriate [`ProbabilityEstimator`](@ref).
+
+    There are only a few exceptions to this rule, which are quantities that are able to compute Shannon entropies via alternate means, without explicitly computing some probability distributions, such as [`entropy_kraskov`](@ref).
 
 
 ### Complexity measures
diff --git a/src/entropies/convenience_definitions.jl b/src/entropies/convenience_definitions.jl
index b9a69fce6..e081e213c 100644
--- a/src/entropies/convenience_definitions.jl
+++ b/src/entropies/convenience_definitions.jl
@@ -10,7 +10,7 @@ Compute the permutation entropy of order `m` with delay/lag `τ`.
 This function is just a convenience call to:
 ```julia
 est = SymbolicPermutation(; m, τ)
-entropy_renyi(x, est; base, q = 1)
+entropy_shannon(x, est; base)
 ```
 See [`SymbolicPermutation`](@ref) for more info.
 Similarly, one can use `SymbolicWeightedPermutation` or `SymbolicAmplitudeAwarePermutation`
@@ -18,7 +18,7 @@ for the weighted/amplitude-aware versions.
 """
 function entropy_permutation(x; τ = 1, m = 3, base = MathConstants.e)
     est = SymbolicPermutation(; m, τ)
-    entropy_renyi(x, est; base, q = 1)
+    entropy_shannon(x, est; base)
 end
 
 """
@@ -29,13 +29,13 @@ Here `x` must be a matrix or higher dimensional `Array` containing spatial data.
 This function is just a convenience call to:
 ```julia
 est = SpatialSymbolicPermutation(stencil, x, periodic)
-entropy_renyi(x, est; kwargs..., q = 1)
+entropy_shannon(x, est; kwargs...)
 ```
 See [`SpatialSymbolicPermutation`](@ref) for more info, or how to encode stencils.
 """
 function entropy_spatial_permutation(x, stencil, periodic = true; kwargs...)
     est = SpatialSymbolicPermutation(stencil, x, periodic)
-    entropy_renyi(x, est; kwargs..., q = 1)
+    entropy_shannon(x, est; kwargs...)
 end
 
 """
@@ -44,7 +44,7 @@ end
 Compute the wavelet entropy. This function is just a convenience call to:
 ```julia
 est = WaveletOverlap(wavelet)
-entropy_renyi(x, est; base, q = 1)
+entropy_shannon(x, est; base)
 ```
 See [`WaveletOverlap`](@ref) for more info.
 """

From 47189f0569fda8135d92e62ba75a4bab20c48ebd Mon Sep 17 00:00:00 2001
From: Datseris <datseris.george@gmail.com>
Date: Sun, 11 Sep 2022 15:07:29 +0100
Subject: [PATCH 28/32] add documentation build phase

---
 .github/workflows/ci.yml | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b69ebef6e..d7ee7bcbd 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -15,20 +15,18 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'.
-        os:
-          - ubuntu-latest
+          - '1'
+        os:  [ubuntu-latest] # adjust according to need, e.g. os: [ubuntu-latest] if testing only on linux
         arch:
           - x64
-
     steps:
-      # Cancel ongoing CI test runs if pushing to branch again before the previous tests 
+      # Cancel ongoing CI test runs if pushing to branch again before the previous tests
       # have finished
       - name: Cancel ongoing test runs for previous commits
         uses: styfle/cancel-workflow-action@0.6.0
         with:
           access_token: ${{ github.token }}
-      
+
       # Do tests
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@v1
@@ -51,3 +49,31 @@ jobs:
       - uses: codecov/codecov-action@v1
         with:
           file: lcov.info
+  docs:
+    name: Documentation
+    runs-on: ubuntu-latest
+    steps:
+      # Cancel ongoing documentation build if pushing to branch again before the previous
+      # build is finished.
+      - name: Cancel ongoing documentation builds for previous commits
+        uses: styfle/cancel-workflow-action@0.6.0
+        with:
+          access_token: ${{ github.token }}
+
+      # Build docs
+      - uses: actions/checkout@v2
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: '1'
+      - name: Instantiate and install dependencies
+        run: |
+          julia --project=docs -e '
+              using Pkg
+              Pkg.develop(PackageSpec(path=pwd()))
+              Pkg.instantiate()'
+      - name: Generate documentation and deploy
+        env: # needed for pushing to gh-pages branch
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
+        run:
+          julia --project=docs docs/make.jl
\ No newline at end of file

From a9e65128712da70530cf3af18fd0c6eb6069c606 Mon Sep 17 00:00:00 2001
From: George Datseris <datseris.george@gmail.com>
Date: Wed, 14 Sep 2022 11:06:33 +0200
Subject: [PATCH 29/32] Add power spectrum probability estimator (#94)

* add code and docstring for spectral entropy

* add docs
---
 Project.toml                                  |  2 ++
 docs/src/probabilities.md                     | 24 ++------------
 .../timescales/power_spectrum.jl              | 32 +++++++++++++++++++
 .../timescales/timescales.jl                  |  1 +
 test/timescales.jl                            | 11 +++++++
 5 files changed, 48 insertions(+), 22 deletions(-)
 create mode 100644 src/probabilities_estimators/timescales/power_spectrum.jl

diff --git a/Project.toml b/Project.toml
index 14eea5845..2a4edc421 100644
--- a/Project.toml
+++ b/Project.toml
@@ -7,6 +7,7 @@ version = "1.3.0"
 [deps]
 DelayEmbeddings = "5732040d-69e3-5649-938a-b6b4f237613f"
 Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
+FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Neighborhood = "645ca80c-8b79-4109-87ea-e1f58159d116"
 QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
@@ -22,6 +23,7 @@ Wavelets = "29a6e085-ba6d-5f35-a997-948ac2efa89a"
 [compat]
 DelayEmbeddings = "2"
 Distances = "0.9, 0.10"
+FFTW = "1"
 Neighborhood = "0.1, 0.2"
 QuadGK = "2"
 Scratch = "1"
diff --git a/docs/src/probabilities.md b/docs/src/probabilities.md
index 5af42cf91..42e5c5177 100644
--- a/docs/src/probabilities.md
+++ b/docs/src/probabilities.md
@@ -56,29 +56,9 @@ transfermatrix
 NaiveKernel
 ```
 
-## Example
-
-Here, we draw some random points from a 2D normal distribution. Then, we use kernel
-density estimation to associate a probability to each point `p`, measured by how many
-points are within radius `1.5` of `p`. Plotting the actual points, along with their
-associated probabilities estimated by the KDE procedure, we get the following surface
-plot.
-
-```@example MAIN
-using DynamicalSystems, CairoMakie, Distributions
-𝒩 = MvNormal([1, -4], 2)
-N = 500
-D = Dataset(sort([rand(𝒩) for i = 1:N]))
-x, y = columns(D)
-p = probabilities(D, NaiveKernel(1.5))
-fig, ax = surface(x, y, p.p; axis=(type=Axis3,))
-ax.zlabel = "P"
-ax.zticklabelsvisible = false
-fig
-```
-
-## Wavelet
+## Timescales
 
 ```@docs
 WaveletOverlap
+PowerSpectrum
 ```
diff --git a/src/probabilities_estimators/timescales/power_spectrum.jl b/src/probabilities_estimators/timescales/power_spectrum.jl
new file mode 100644
index 000000000..da4a9a084
--- /dev/null
+++ b/src/probabilities_estimators/timescales/power_spectrum.jl
@@ -0,0 +1,32 @@
+export PowerSpectrum
+import FFTW
+
+"""
+    PowerSpectrum() <: ProbabilitiesEstimator
+
+Calculate the power spectrum of a timeseries (amplitude square of its Fourier transform),
+and return the spectrum normalized to sum = 1 as probabilities.
+The Shannon entropy of these probabilities is typically referred in the literature as
+_spectral entropy_, e.g. [^Llanos2016],[^Tian2017].
+
+The simpler the temporal structure of the timeseries, the lower the entropy. However,
+you can't compare entropies of timeseries with different length, because the binning
+in spectral space depends on the length of the input.
+
+[^Llanos2016]:
+    Llanos et al., _Power spectral entropy as an information-theoretic correlate of manner
+    of articulation in American English_, [The Journal of the Acoustical Society of America
+    141, EL127 (2017); https://doi.org/10.1121/1.4976109]
+
+[^Tian2017]:
+    Tian et al, _Spectral Entropy Can Predict Changes of Working Memory Performance Reduced
+    by Short-Time Training in the Delayed-Match-to-Sample Task_, [Front. Hum. Neurosci.](
+    https://doi.org/10.3389/fnhum.2017.00437)
+"""
+struct PowerSpectrum <: ProbabilitiesEstimator end
+
+function probabilities(x, ::PowerSpectrum)
+    @assert x isa AbstractVector{<:Real} "`PowerSpectrum` only works for timeseries input!"
+    f = FFTW.rfft(x)
+    Probabilities(abs2.(f))
+end
\ No newline at end of file
diff --git a/src/probabilities_estimators/timescales/timescales.jl b/src/probabilities_estimators/timescales/timescales.jl
index d1c3aee17..10123de77 100644
--- a/src/probabilities_estimators/timescales/timescales.jl
+++ b/src/probabilities_estimators/timescales/timescales.jl
@@ -1 +1,2 @@
 include("wavelet_overlap.jl")
+include("power_spectrum.jl")
\ No newline at end of file
diff --git a/test/timescales.jl b/test/timescales.jl
index fb0660ed5..bda9c179c 100644
--- a/test/timescales.jl
+++ b/test/timescales.jl
@@ -14,4 +14,15 @@ using Entropies, Test
         @test ps isa Probabilities
         @test entropy_renyi(x, WaveletOverlap(), q = 1, base = 2) isa Real
     end
+
+    @testet "Fourier Spectrum" begin
+        N = 1000
+        t = range(0, 10π, N)
+        x = sin.(t)
+        y = @. sin(t) + sin(sqrt(3)*t)
+        z = randn(N)
+        est = PowerSpectrum()
+        ents = [entropy_renyi(w, est) for w in (x,y,z)]
+        @test ents[1] < ents[2] < ents[3]
+    end
 end

From 8b4dd4b6148b2309208b828a5ed9c64eb3f2b9f2 Mon Sep 17 00:00:00 2001
From: Datseris <datseris.george@gmail.com>
Date: Wed, 14 Sep 2022 10:21:24 +0100
Subject: [PATCH 30/32] even better test reorganization

---
 src/probabilities_estimators/timescales/power_spectrum.jl | 4 ++--
 test/runtests.jl                                          | 8 +++++---
 test/timescales.jl                                        | 2 +-
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/probabilities_estimators/timescales/power_spectrum.jl b/src/probabilities_estimators/timescales/power_spectrum.jl
index da4a9a084..7d4bb7c4f 100644
--- a/src/probabilities_estimators/timescales/power_spectrum.jl
+++ b/src/probabilities_estimators/timescales/power_spectrum.jl
@@ -9,14 +9,14 @@ and return the spectrum normalized to sum = 1 as probabilities.
 The Shannon entropy of these probabilities is typically referred in the literature as
 _spectral entropy_, e.g. [^Llanos2016],[^Tian2017].
 
-The simpler the temporal structure of the timeseries, the lower the entropy. However,
+The closer the spectrum is to flat, i.e., white noise, the higher the entropy. However,
 you can't compare entropies of timeseries with different length, because the binning
 in spectral space depends on the length of the input.
 
 [^Llanos2016]:
     Llanos et al., _Power spectral entropy as an information-theoretic correlate of manner
     of articulation in American English_, [The Journal of the Acoustical Society of America
-    141, EL127 (2017); https://doi.org/10.1121/1.4976109]
+    141, EL127 (2017)](https://doi.org/10.1121/1.4976109)
 
 [^Tian2017]:
     Tian et al, _Spectral Entropy Can Predict Changes of Working Memory Performance Reduced
diff --git a/test/runtests.jl b/test/runtests.jl
index dcacd812c..e3b1d5827 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -5,9 +5,11 @@ using Wavelets
 using StaticArrays
 using Neighborhood: KDTree, BruteForce
 
-# This is how the tests should look like in the end:
-@testset "Entopies.jl tests" begin
-    include("timescales.jl")
+# TODO: This is how the tests should look like in the end:
+defaultname(file) = splitext(basename(file))[1]
+testfile(file, testname=defaultname(file)) = @testset "$testname" begin; include(file); end
+@testset "Entopies.jl" begin
+    testfile("timescales.jl")
 end
 
 @testset "Histogram estimation" begin
diff --git a/test/timescales.jl b/test/timescales.jl
index bda9c179c..7cf6cf3d0 100644
--- a/test/timescales.jl
+++ b/test/timescales.jl
@@ -15,7 +15,7 @@ using Entropies, Test
         @test entropy_renyi(x, WaveletOverlap(), q = 1, base = 2) isa Real
     end
 
-    @testet "Fourier Spectrum" begin
+    @testset "Fourier Spectrum" begin
         N = 1000
         t = range(0, 10π, N)
         x = sin.(t)

From 608a80438fff389298136e1cf2559dbfd603dd5b Mon Sep 17 00:00:00 2001
From: Datseris <datseris.george@gmail.com>
Date: Fri, 16 Sep 2022 09:06:30 +0100
Subject: [PATCH 31/32] add one more depth level

---
 docs/make.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/make.jl b/docs/make.jl
index 8507baeab..9f57b848e 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -45,6 +45,7 @@ makedocs(
         assets = [
             asset("https://fonts.googleapis.com/css?family=Montserrat|Source+Code+Pro&display=swap", class=:css),
         ],
+        collapselevel = 3,
         ),
     sitename = "Entropies.jl",
     authors = "Kristian Agasøster Haaga, George Datseris",

From aace207fd461af02aea292c3d57b0392352a61c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?=
 <kahaaga@users.noreply.github.com>
Date: Mon, 19 Sep 2022 14:38:47 +0200
Subject: [PATCH 32/32] WIP: reviewing new api (#95)

* Equivalent to, not equivalent with

* Include dispersion entropy

* Clearer wording

* Symbolization routines need to be imported before estimators using them

* Export in-place version.

* Follow Julia's linting conventions

* Run CI on all pull requests

* Don't export in-place version

* Missing a letter
---
 .github/workflows/ci.yml   |  1 +
 docs/src/index.md          |  2 +-
 src/Entropies.jl           |  4 ++--
 src/entropies/entropies.jl |  4 +++-
 src/entropies/shannon.jl   |  2 +-
 test/runtests.jl           | 14 +++++++-------
 6 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d7ee7bcbd..36d1b416f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -3,6 +3,7 @@ on:
   pull_request:
     branches:
       - main
+      - '**' # matches every branch
   push:
     branches:
       - main
diff --git a/docs/src/index.md b/docs/src/index.md
index 8f96cd1bf..3609dc576 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -50,4 +50,4 @@ The input data type typically depend on the probability estimator chosen. In gen
 
 - _Timeseries_, which are `AbstractVector{<:Real}`, used in e.g. with [`WaveletOverlap`](@ref).
 - _Multi-dimensional timeseries, or datasets, or state space sets_, which are `Dataset`, used e.g. with [`NaiveKernel`](@ref).
-- _Spatial data_, which are higher dimensional standard `Array`, used e.g. with  [`SpatialSymbolicPermutation`](@ref).
+- _Spatial data_, which are higher dimensional standard `Array`s, used e.g. with  [`SpatialSymbolicPermutation`](@ref).
diff --git a/src/Entropies.jl b/src/Entropies.jl
index 18e4b2625..a6af61930 100644
--- a/src/Entropies.jl
+++ b/src/Entropies.jl
@@ -1,7 +1,7 @@
 """
 A Julia package that provides estimators for probabilities, entropies,
 and complexity measures for timeseries, nonlinear dynamics and complex systems.
-It can be used as standalone or part of several projects in the JuliaDynamics organization,
+It can be used as a standalone package, or as part of several projects in the JuliaDynamics organization,
 such as [DynamicalSystems.jl](https://juliadynamics.github.io/DynamicalSystems.jl/dev/)
 or [CausalityTools.jl](https://juliadynamics.github.io/CausalityTools.jl/dev/).
 """
@@ -12,10 +12,10 @@ using DelayEmbeddings: AbstractDataset, Dataset, dimension
 export AbstractDataset, Dataset
 const Array_or_Dataset = Union{<:AbstractArray, <:AbstractDataset}
 
+include("symbolization/symbolize.jl")
 include("probabilities.jl")
 include("probabilities_estimators/probabilities_estimators.jl")
 include("entropies/entropies.jl")
-include("symbolization/symbolize.jl")
 include("deprecations.jl")
 
 
diff --git a/src/entropies/entropies.jl b/src/entropies/entropies.jl
index 291bf6ed9..8aa8f0a26 100644
--- a/src/entropies/entropies.jl
+++ b/src/entropies/entropies.jl
@@ -3,4 +3,6 @@ include("tsallis.jl")
 include("shannon.jl")
 include("convenience_definitions.jl")
 include("direct_entropies/nearest_neighbors/nearest_neighbors.jl")
-# TODO: What else is included here from direct entropies?
\ No newline at end of file
+include("direct_entropies/entropy_dispersion.jl")
+
+# TODO: What else is included here from direct entropies?
diff --git a/src/entropies/shannon.jl b/src/entropies/shannon.jl
index b1a143aae..ee2a50350 100644
--- a/src/entropies/shannon.jl
+++ b/src/entropies/shannon.jl
@@ -2,7 +2,7 @@ export entropy_shannon
 
 """
     entropy_shannon(args...; base = MathConstants.e)
-Equivalent with `entropy_renyi(args...; base, q = 1)` and provided solely for convenience.
+Equivalent to `entropy_renyi(args...; base, q = 1)` and provided solely for convenience.
 Compute the Shannon entropy, given by
 ```math
 H(p) = - \\sum_i p[i] \\log(p[i])
diff --git a/test/runtests.jl b/test/runtests.jl
index e3b1d5827..5c8ab8585 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -158,16 +158,16 @@ end
             @test sum(p2) ≈ 1.0
 
             # Entropies
-            @test entropy_renyi!(s, x, est, q = 1) ≈ 0  # Regular order-1 entropy
-            @test entropy_renyi!(s, y, est, q = 1) >= 0 # Regular order-1 entropy
-            @test entropy_renyi!(s, x, est, q = 2) ≈ 0  # Higher-order entropy
-            @test entropy_renyi!(s, y, est, q = 2) >= 0 # Higher-order entropy
+            @test Entropies.entropy_renyi!(s, x, est, q = 1) ≈ 0  # Regular order-1 entropy
+            @test Entropies.entropy_renyi!(s, y, est, q = 1) >= 0 # Regular order-1 entropy
+            @test Entropies.entropy_renyi!(s, x, est, q = 2) ≈ 0  # Higher-order entropy
+            @test Entropies.entropy_renyi!(s, y, est, q = 2) >= 0 # Higher-order entropy
 
             # For a time series
             sz = zeros(Int, N - (est.m-1)*est.τ)
             @test probabilities!(sz, z, est) isa Probabilities
             @test probabilities(z, est) isa Probabilities
-            @test entropy_renyi!(sz, z, est) isa Real
+            @test Entropies.entropy_renyi!(sz, z, est) isa Real
             @test entropy_renyi(z, est) isa Real
         end
 
@@ -290,7 +290,7 @@ end
             RectangularBinning([0.2, 0.3, 0.3])
         ]
 
-        @testset "Binning test $i" for i in 1:length(binnings)
+        @testset "Binning test $i" for i in eachindex(binnings)
             est = VisitationFrequency(binnings[i])
             @test probabilities(D, est) isa Probabilities
             @test entropy_renyi(D, est, q=1, base = 3) isa Real # Regular order-1 entropy
@@ -310,7 +310,7 @@ end
             RectangularBinning([0.2, 0.3, 0.3])
         ]
 
-        @testset "Binning test $i" for i in 1:length(binnings)
+        @testset "Binning test $i" for i in eachindex(binnings)
             to = Entropies.transferoperator(D, binnings[i])
             @test to isa Entropies.TransferOperatorApproximationRectangular