JuliaDynamics · kahaaga · Aug 25, 2022 · Aug 25, 2022 · Aug 25, 2022 · Aug 25, 2022
diff --git a/Project.toml b/Project.toml
@@ -10,6 +10,7 @@ Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Neighborhood = "645ca80c-8b79-4109-87ea-e1f58159d116"
 QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"

diff --git a/docs/make.jl b/docs/make.jl
@@ -43,7 +43,8 @@ PAGES = [
         "DispersionEntropy.md",
         "NearestNeighbors.md",
         "NaiveKernel.md",
-        "TimeScaleMODWT.md"
+        "TimeScaleMODWT.md",
+        "Walkthrough.md"
     ],
     "Non-exported" => "nonexported.md"
 ]

diff --git a/docs/src/Walkthrough.md b/docs/src/Walkthrough.md
@@ -0,0 +1,101 @@
+# Walkthrough entropy
+
+```@docs
+WalkthroughEntropy
+walkthrough_entropy
+```
+
+## Examples
+
+Here, we reproduce parts of Fig. 1 from Stoop et al. (2021).
+
+We start by creating some symbolic time series of length `N`. Then, because we're 
+generating the walkthrough entropy for multiple positions ``n``, we use
+`entropygenerator` for each time series, so that initialization computations only happens 
+once per time series. Finally, we compute the walkthrough entropy at all positions `1:N`.
+
+```@example
+using Entropies, PyPlot
+
+N = 1200
+# Generate time series
+x_lowfreq = "a"^(N ÷ 3) * "b"^(N ÷ 3) * "c"^(N ÷ 3);
+x_hifreq = "abc"^(N ÷ 3)
+x_rw3 = rand(['a', 'b', 'c'], N)
+x_rw2 = rand(['a', 'b'], N)
+x_a = "a"^(N)
+x_ab_2 = "ab"^(N ÷ 2)
+x_ab_20 = ("a"^20*"b"^20)^(N ÷ 40)
+x_ab_200 = ("a"^200*"b"^200)^(N ÷ 400)
+
+# Initialize entropy generators
+method = WalkthroughEntropy()
+e_lofreq = entropygenerator(x_lowfreq, method);
+e_hifreq = entropygenerator(x_hifreq, method);
+e_rw3 = entropygenerator(x_rw3, method);
+e_rw2 = entropygenerator(x_rw2, method);
+e_a = entropygenerator(x_a, method);
+e_ab_2 = entropygenerator(x_ab_2, method);
+e_ab_20 = entropygenerator(x_ab_20, method);
+e_ab_200 = entropygenerator(x_ab_200, method);
+
+# Compute walkthrough entropies through positions 1:N
+base = MathConstants.e
+hs_lofreq = [e_lofreq(i, base = base) for i = 1:N]
+hs_hifreq = [e_hifreq(i, base = base) for i = 1:N]
+hs_wn3 = [e_rw3(i, base = base) for i = 1:N]
+hs_wn2 = [e_rw2(i, base = base) for i = 1:N]
+hs_a = [e_a(i, base = base) for i = 1:N]
+hs_ab_2 = [e_ab_2(i, base = base) for i = 1:N]
+hs_ab_20 = [e_ab_20(i, base = base) for i = 1:N]
+hs_ab_200 = [e_ab_200(i, base = base) for i = 1:N]
+
+# Plot
+ns = (1:N |> collect) ./ N
+unit = "nats"
+
+
+f = figure(figsize = (10,7))
+ax = subplot(231)
+plot(xlabel = "n/N", ylabel = "h [$unit]");
+plot(ns, hs_hifreq, label = "abcabc...")
+plot(ns, hs_lofreq, label = "aa...bb..ccc")
+xlabel("n/N")
+ylabel("h ($unit)")
+legend()
+
+ax = subplot(232)
+plot(xlabel = "n/N", ylabel = "h [$unit]");
+plot(ns, hs_wn3, label = "RW (k = 3)")
+xlabel("n/N")
+ylabel("h ($unit)")
+legend()
+
+ax = subplot(234)
+plot(ns, hs_a, label = "k = 1")
+plot(ns, hs_ab_2, label = "k = 2, T = 2")
+plot(ns, hs_ab_20, label = "k = 2, T = 20")
+xlabel("n/N")
+ylabel("h ($unit)")
+legend()
+
+ax = subplot(235)
+plot(ns, hs_a, label = "k = 1")
+plot(ns, hs_ab_2, label = "k = 2, T = 2")
+plot(ns, hs_ab_200, label = "k = 2, T = 200")
+xlabel("n/N")
+ylabel("h ($unit)")
+legend()
+
+ax = subplot(236)
+plot(ns, hs_wn2, label = "RW (k = 2)")
+plot(ns, hs_ab_2, label = "k = 2, T = 2")
+xlabel("n/N")
+ylabel("h ($unit)")
+
+legend()
+tight_layout()
+PyPlot.savefig("walkthrough_entropy.png")
+```
+
+![Walkthrough entropy](walkthrough_entropy.png)
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -36,6 +36,12 @@ ProbabilitiesEstimator
 Entropies.genentropy
 ```
 
+## Reusable entropy generator
+
+```@docs
+entropygenerator
+```
+
 ## Fast histograms
 
 ```@docs

diff --git a/src/Entropies.jl b/src/Entropies.jl
@@ -1,11 +1,13 @@
 module Entropies
     include("core.jl")
+    include("api.jl")
     include("histogram_estimation.jl")
     include("counting_based/CountOccurrences.jl")
     include("symbolic/symbolic.jl")
     include("binning_based/rectangular/rectangular_estimators.jl")
     include("kerneldensity/kerneldensity.jl")
     include("wavelet/wavelet.jl")
     include("nearest_neighbors/nearest_neighbors.jl")
+    include("walkthrough/walkthrough.jl")
     include("dispersion/dispersion_entropy.jl")
 end
diff --git a/src/api.jl b/src/api.jl
@@ -0,0 +1,34 @@
+using Random
+export EntropyGenerator
+export entropygenerator
+
+
+struct EntropyGenerator{S <: EntropyEstimator, X, A, R <: AbstractRNG}
+    method::S # method with its input parameters
+    x::X      # input data
+    init::A   # pre-initialized things that speed up entropy estimation
+    rng::R    # random number generator object
+end
+
+"""
+    entropygenerator(x, method::EntropyEstimator[, rng]) → sg::EntropyGenerator
+
+Initialize a generator that computes entropies of `x` on demand, based on the given `method`.
+This is efficient, because for most methods some things can be initialized and reused
+for every computation. Optionally you can provide an `rng::AbstractRNG` object that will
+control random number generation and hence establish reproducibility of the
+generated entropy values, if they rely on random number generation. By default
+`Random.default_rng()` is used.
+
+Note: not all entropy estimators have this functionality enabled yet. The documentation
+strings for individual methods indicate whether entropy generators are available.
+
+To compute entropy using a generator, call `eg` as a function with the optional `base`
+argument, e.g.
+
+```julia
+eg = entropygenerator(x, method)
+h = eg(; base = 2)
+```
+"""
+function entropygenerator end
diff --git a/src/histogram_estimation.jl b/src/histogram_estimation.jl
@@ -1,3 +1,5 @@
+using DelayEmbeddings
+
 export binhist
 
 probabilities(x) = _non0hist(x)
@@ -40,5 +42,44 @@ function _non0hist(x)
 end
 
 _non0hist(x::AbstractDataset) = _non0hist(x.data)
-_non0hist(x::AbstractDataset, N) = _non0hist(x.data, N)
 probabilities(x::AbstractDataset) = _non0hist(x.data)
+_non0hist(x::AbstractDataset, N) = _non0hist(x.data, N)
+
+# Get both unique elements and their counts
+function vec_countmap(x, T = BigInt)
+    L = length(x)
+
+    hist = Vector{T}()
+    T = eltype(x)
+    unique_vals = Vector{T}()
+
+    # Reserve enough space for histogram:
+    sizehint!(hist, L)
+    sizehint!(unique_vals, L)
+    sx = sort(x, alg = QuickSort)
+
+    # Fill the histogram by counting consecutive equal values:
+    prev_val, count = sx[1], 0
+    push!(unique_vals, sx[1])
+    for val in sx
+        if val == prev_val
+            count += 1
+        else
+            push!(hist, count)
+            push!(unique_vals, val)
+            prev_val = val
+            count = 1
+        end
+    end
+    push!(hist, count)
+
+    # Shrink histogram capacity to fit its size:
+    sizehint!(hist, length(hist))
+    sizehint!(unique_vals, length(unique_vals))
+
+    return unique_vals, hist
+end
+
+# Get characters to sort them.
+vec_countmap(x::AbstractString) = vec_countmap(x |> collect)
+vec_countmap(x::AbstractDataset) = vec_countmap(x.data)
diff --git a/src/walkthrough/walkthrough.jl b/src/walkthrough/walkthrough.jl
@@ -0,0 +1,36 @@
+
+"""
+    WalkthroughEntropy
+
+The walkthrough entropy method (Stoop et al., 2021)[^Stoop2021].
+
+Does not work with `genentropy`, but combination with `entropygenerator`, we can use
+this estimator to compute walkthrough entropy for multiple `n` with a single initialization
+step (instead of initializing once per `n`).
+
+## Examples
+
+```jldoctest; setup = :(using Entropies)
+julia> x = "abc"^2
+"abcabc"
+
+julia> wg = entropygenerator(x, WalkthroughEntropy());
+
+julia> [wg(n) for n = 1:length(x)]
+6-element Vector{Float64}:
+  1.0986122886681098
+  1.3217558399823195
+  0.9162907318741551
+  1.3217558399823195
+  1.0986122886681098
+ -0.0
+```
+
+See also: [`entropygenerator`](@ref).
+
+[^Stoop2021]: Stoop, R. L., Stoop, N., Kanders, K., & Stoop, R. (2021). Excess entropies suggest the physiology of neurons to be primed for higher-level computation. Physical Review Letters, 127(14), 148101.
+"""
+struct WalkthroughEntropy <: EntropyEstimator end
+
+include("walkthrough_prob.jl")
+include("walkthrough_entropy.jl")