Skip to content
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Neighborhood = "645ca80c-8b79-4109-87ea-e1f58159d116"
QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Expand Down
3 changes: 2 additions & 1 deletion docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ PAGES = [
"DispersionEntropy.md",
"NearestNeighbors.md",
"NaiveKernel.md",
"TimeScaleMODWT.md"
"TimeScaleMODWT.md",
"Walkthrough.md"
],
"Non-exported" => "nonexported.md"
]
Expand Down
101 changes: 101 additions & 0 deletions docs/src/Walkthrough.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Walkthrough entropy

```@docs
WalkthroughEntropy
walkthrough_entropy
```

## Examples

Here, we reproduce parts of Fig. 1 from Stoop et al. (2021).

We start by creating some symbolic time series of length `N`. Then, because we're
generating the walkthrough entropy for multiple positions ``n``, we use
`entropygenerator` for each time series, so that initialization computations only happens
once per time series. Finally, we compute the walkthrough entropy at all positions `1:N`.

```@example
using Entropies, PyPlot

N = 1200
# Generate time series
x_lowfreq = "a"^(N ÷ 3) * "b"^(N ÷ 3) * "c"^(N ÷ 3);
x_hifreq = "abc"^(N ÷ 3)
x_rw3 = rand(['a', 'b', 'c'], N)
x_rw2 = rand(['a', 'b'], N)
x_a = "a"^(N)
x_ab_2 = "ab"^(N ÷ 2)
x_ab_20 = ("a"^20*"b"^20)^(N ÷ 40)
x_ab_200 = ("a"^200*"b"^200)^(N ÷ 400)

# Initialize entropy generators
method = WalkthroughEntropy()
e_lofreq = entropygenerator(x_lowfreq, method);
e_hifreq = entropygenerator(x_hifreq, method);
e_rw3 = entropygenerator(x_rw3, method);
e_rw2 = entropygenerator(x_rw2, method);
e_a = entropygenerator(x_a, method);
e_ab_2 = entropygenerator(x_ab_2, method);
e_ab_20 = entropygenerator(x_ab_20, method);
e_ab_200 = entropygenerator(x_ab_200, method);

# Compute walkthrough entropies through positions 1:N
base = MathConstants.e
hs_lofreq = [e_lofreq(i, base = base) for i = 1:N]
hs_hifreq = [e_hifreq(i, base = base) for i = 1:N]
hs_wn3 = [e_rw3(i, base = base) for i = 1:N]
hs_wn2 = [e_rw2(i, base = base) for i = 1:N]
hs_a = [e_a(i, base = base) for i = 1:N]
hs_ab_2 = [e_ab_2(i, base = base) for i = 1:N]
hs_ab_20 = [e_ab_20(i, base = base) for i = 1:N]
hs_ab_200 = [e_ab_200(i, base = base) for i = 1:N]

# Plot
ns = (1:N |> collect) ./ N
unit = "nats"


f = figure(figsize = (10,7))
ax = subplot(231)
plot(xlabel = "n/N", ylabel = "h [$unit]");
plot(ns, hs_hifreq, label = "abcabc...")
plot(ns, hs_lofreq, label = "aa...bb..ccc")
xlabel("n/N")
ylabel("h ($unit)")
legend()

ax = subplot(232)
plot(xlabel = "n/N", ylabel = "h [$unit]");
plot(ns, hs_wn3, label = "RW (k = 3)")
xlabel("n/N")
ylabel("h ($unit)")
legend()

ax = subplot(234)
plot(ns, hs_a, label = "k = 1")
plot(ns, hs_ab_2, label = "k = 2, T = 2")
plot(ns, hs_ab_20, label = "k = 2, T = 20")
xlabel("n/N")
ylabel("h ($unit)")
legend()

ax = subplot(235)
plot(ns, hs_a, label = "k = 1")
plot(ns, hs_ab_2, label = "k = 2, T = 2")
plot(ns, hs_ab_200, label = "k = 2, T = 200")
xlabel("n/N")
ylabel("h ($unit)")
legend()

ax = subplot(236)
plot(ns, hs_wn2, label = "RW (k = 2)")
plot(ns, hs_ab_2, label = "k = 2, T = 2")
xlabel("n/N")
ylabel("h ($unit)")

legend()
tight_layout()
PyPlot.savefig("walkthrough_entropy.png")
```

![Walkthrough entropy](walkthrough_entropy.png)
6 changes: 6 additions & 0 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ ProbabilitiesEstimator
Entropies.genentropy
```

## Reusable entropy generator

```@docs
entropygenerator
```

## Fast histograms

```@docs
Expand Down
2 changes: 2 additions & 0 deletions src/Entropies.jl
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
module Entropies
include("core.jl")
include("api.jl")
include("histogram_estimation.jl")
include("counting_based/CountOccurrences.jl")
include("symbolic/symbolic.jl")
include("binning_based/rectangular/rectangular_estimators.jl")
include("kerneldensity/kerneldensity.jl")
include("wavelet/wavelet.jl")
include("nearest_neighbors/nearest_neighbors.jl")
include("walkthrough/walkthrough.jl")
include("dispersion/dispersion_entropy.jl")
end
34 changes: 34 additions & 0 deletions src/api.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
using Random
export EntropyGenerator
export entropygenerator


struct EntropyGenerator{S <: EntropyEstimator, X, A, R <: AbstractRNG}
method::S # method with its input parameters
x::X # input data
init::A # pre-initialized things that speed up entropy estimation
rng::R # random number generator object
end

"""
entropygenerator(x, method::EntropyEstimator[, rng]) → sg::EntropyGenerator

Initialize a generator that computes entropies of `x` on demand, based on the given `method`.
This is efficient, because for most methods some things can be initialized and reused
for every computation. Optionally you can provide an `rng::AbstractRNG` object that will
control random number generation and hence establish reproducibility of the
generated entropy values, if they rely on random number generation. By default
`Random.default_rng()` is used.

Note: not all entropy estimators have this functionality enabled yet. The documentation
strings for individual methods indicate whether entropy generators are available.

To compute entropy using a generator, call `eg` as a function with the optional `base`
argument, e.g.

```julia
eg = entropygenerator(x, method)
h = eg(; base = 2)
```
"""
function entropygenerator end
43 changes: 42 additions & 1 deletion src/histogram_estimation.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using DelayEmbeddings

export binhist

probabilities(x) = _non0hist(x)
Expand Down Expand Up @@ -40,5 +42,44 @@ function _non0hist(x)
end

_non0hist(x::AbstractDataset) = _non0hist(x.data)
_non0hist(x::AbstractDataset, N) = _non0hist(x.data, N)
probabilities(x::AbstractDataset) = _non0hist(x.data)
_non0hist(x::AbstractDataset, N) = _non0hist(x.data, N)

# Get both unique elements and their counts
function vec_countmap(x, T = BigInt)
L = length(x)

hist = Vector{T}()
T = eltype(x)
unique_vals = Vector{T}()

# Reserve enough space for histogram:
sizehint!(hist, L)
sizehint!(unique_vals, L)
sx = sort(x, alg = QuickSort)

# Fill the histogram by counting consecutive equal values:
prev_val, count = sx[1], 0
push!(unique_vals, sx[1])
for val in sx
if val == prev_val
count += 1
else
push!(hist, count)
push!(unique_vals, val)
prev_val = val
count = 1
end
end
push!(hist, count)

# Shrink histogram capacity to fit its size:
sizehint!(hist, length(hist))
sizehint!(unique_vals, length(unique_vals))

return unique_vals, hist
end

# Get characters to sort them.
vec_countmap(x::AbstractString) = vec_countmap(x |> collect)
vec_countmap(x::AbstractDataset) = vec_countmap(x.data)
36 changes: 36 additions & 0 deletions src/walkthrough/walkthrough.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@

"""
WalkthroughEntropy

The walkthrough entropy method (Stoop et al., 2021)[^Stoop2021].

Does not work with `genentropy`, but combination with `entropygenerator`, we can use
this estimator to compute walkthrough entropy for multiple `n` with a single initialization
step (instead of initializing once per `n`).

## Examples

```jldoctest; setup = :(using Entropies)
julia> x = "abc"^2
"abcabc"

julia> wg = entropygenerator(x, WalkthroughEntropy());

julia> [wg(n) for n = 1:length(x)]
6-element Vector{Float64}:
1.0986122886681098
1.3217558399823195
0.9162907318741551
1.3217558399823195
1.0986122886681098
-0.0
```

See also: [`entropygenerator`](@ref).

[^Stoop2021]: Stoop, R. L., Stoop, N., Kanders, K., & Stoop, R. (2021). Excess entropies suggest the physiology of neurons to be primed for higher-level computation. Physical Review Letters, 127(14), 148101.
"""
struct WalkthroughEntropy <: EntropyEstimator end

include("walkthrough_prob.jl")
include("walkthrough_entropy.jl")
Loading