From 4984e13e25e3d0138300306f92da032e6ba00075 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Sat, 27 Nov 2021 23:17:05 +0100
Subject: [PATCH 01/19] simplex predictions

---
 simplex.jl                                    | 49 ++++++++++
 src/CausalityTools.jl                         |  1 +
 .../EmpiricalDynamicalModelling.jl            |  6 ++
 .../simplex_projection.jl                     | 92 +++++++++++++++++++
 4 files changed, 148 insertions(+)
 create mode 100644 simplex.jl
 create mode 100644 src/EmpiricalDynamicalModelling/EmpiricalDynamicalModelling.jl
 create mode 100644 src/EmpiricalDynamicalModelling/simplex_projection.jl

diff --git a/simplex.jl b/simplex.jl
new file mode 100644
index 000000000..fb9f9e779
--- /dev/null
+++ b/simplex.jl
@@ -0,0 +1,49 @@
+using CausalityTools, DynamicalSystems, Plots, StatsBase, Statistics, Distributions, Neighborhood, LinearAlgebra; gr()
+
+function eom_tentmap(dx, x, p, n)
+    x = x[1]
+    μ = p[1]
+    dx[1] = x < 0.5 ? μ*x : μ*(1 - x)
+
+    return
+end
+
+function tentmap(u₀ = rand(); μ = 1.9)
+    DiscreteDynamicalSystem(eom_tentmap, [u₀], [μ])
+end 
+
+npts = 2000
+sys = tentmap(μ = 1.9)
+ts = trajectory(sys, npts , Ttr = 1000)[:, 1]
+
+plot(legend = :topleft)
+k = 3
+kmax = 20
+cors = zeros(kmax)
+τ = 1
+d = 3
+@show ts
+training = 1:200
+prediction = 201:500
+for k = 1:kmax
+    X, X̃ = simplex_predictions(ts, k, d = d, τ = τ, training = training, prediction = prediction)
+    cors[k] = cor(X, X̃)
+end
+
+plot(legend = :bottomleft, ylabel = "Correlation coefficient (ρ)", 
+    xlabel = "Prediction time (k)",
+    ylims = (-0.05, 1.1))
+scatter!(1:kmax, cors, label = "")
+
+
+
+# r = 1:35
+# p1 = plot(prediction[r], ts[prediction][r], c = :black, lw = 2, alpha = 0.5)
+# #plot!(prediction_set, x, c = :green, lw = 2)
+# plot!(prediction[r], xpred[r], ls = :dot, c = :blue)
+# @show xpred
+
+# r = 1:35
+# p1 = plot(prediction, ts[prediction], c = :black, lw = 2, alpha = 0.5)
+# #plot!(prediction_set, x, c = :green, lw = 2)
+# plot!(prediction, xpred, ls = :dot, c = :blue)
diff --git a/src/CausalityTools.jl b/src/CausalityTools.jl
index 9a5c11d4d..ffd9e3678 100644
--- a/src/CausalityTools.jl
+++ b/src/CausalityTools.jl
@@ -15,6 +15,7 @@ module CausalityTools
     include("CrossMappings/CrossMappings.jl")
     include("SMeasure/smeasure.jl")
     include("PredictiveAsymmetry/PredictiveAsymmetry.jl")
+    include("EmpiricalDynamicalModelling/EmpiricalDynamicalModelling.jl")
     include("example_systems/ExampleSystems.jl")
 
     using Requires 
diff --git a/src/EmpiricalDynamicalModelling/EmpiricalDynamicalModelling.jl b/src/EmpiricalDynamicalModelling/EmpiricalDynamicalModelling.jl
new file mode 100644
index 000000000..ed3276307
--- /dev/null
+++ b/src/EmpiricalDynamicalModelling/EmpiricalDynamicalModelling.jl
@@ -0,0 +1,6 @@
+using Reexport
+
+@reexport module EmpiricalDynamicalModelling
+
+    include("simplex_projection.jl")
+end
\ No newline at end of file
diff --git a/src/EmpiricalDynamicalModelling/simplex_projection.jl b/src/EmpiricalDynamicalModelling/simplex_projection.jl
new file mode 100644
index 000000000..960c46b47
--- /dev/null
+++ b/src/EmpiricalDynamicalModelling/simplex_projection.jl
@@ -0,0 +1,92 @@
+export simplex_predictions
+
+"""
+    simplex_predictions(x, k, [, correspondence_measure]; 
+        τ = 1, d = 2, p = 1, 
+        training = 1:length(x) ÷ 2, prediction = length(x) ÷ 2 + 1:length(x)) → Vector{Float64}, Vector{Float64}
+
+From an embedding `Mₓ` computed from the time series `x`, first compute the 
+the `k`-step forward simplex projections for each predictee in `Mₓ[prediction]`.
+This is done by locating, for each predictee, its `d+1` nearest neighbors in 
+`Mₓ[training[1:end-p]]`, and projecting each neighbor `p` steps forward in time. 
+Then, for each scalar value `x[i]` where `i ∈ prediction`, compute the prediction 
+`x̃[i]` as an exponentially weighted average of the forward-projected neighbors 
+(there are `d+1` neighbors, so these form a simplex around the predictee).
+
+If no third argument is given, then two vectors are returned: the observed values and 
+the predicted values. If a `correspondence_measure` is given as the third argument, 
+then return the correspondence between observed and predicted values.
+
+## Example
+
+```julia
+L = 2000
+xs = 0.0:0.05:L
+ts = sin.(xs) .+ rand(xs) ./ 0.2
+
+# Compute the 3-step forward in time predictions and compute the correlation between
+# observed and predicted values.
+simplex_predictions(x, 1, Statistics.cor)
+```
+
+## Details 
+
+This is an implementation of the simplex projection method from Sugihara and May (1990)[^Sugihara1990]. The original 
+paper doesn not provide sufficiently detailed pseudocode for implementation, so the algorithm here 
+is based on Ye et al. (2015)[^Ye2015].
+
+[^Sugihara1990]: Sugihara, George, and Robert M. May. "Nonlinear forecasting as a way of distinguishing chaos from measurement error in time series." Nature 344.6268 (1990): 734-741.
+[^Ye2015]: Ye, H., Beamish, R. J., Glaser, S. M., Grant, S. C., Hsieh, C. H., Richards, L. J., ... & Sugihara, G. (2015). Equation-free mechanistic ecosystem forecasting using empirical dynamic modeling. Proceedings of the National Academy of Sciences, 112(13), E1569-E1576.
+"""
+function simplex_predictions(x, k; τ = 1, d = 2, 
+        training = 1:length(x) ÷ 2, 
+        prediction = length(x) ÷ 2 + 1:length(x)-τ*(d-1))
+    
+    # Generate embedding and separate the embedding points into a training set and 
+    # a set of predictees. 
+    τs = 0:-τ:-τ*(d-1)
+    Mₓ = genembed(x, τs)
+    predictees = Mₓ[prediction]
+
+    # TODO: what happens if there is overlap between training and prediction sets?
+    #if isempty(training ∩ prediction)
+    # For now, the training set excludes the `k` last points, so that 
+    # when later finding nearest neighbors, these neighbors have "somewhere to go" 
+    # when projected forward in time.
+    theiler = Theiler(0)
+    trainees = Mₓ[training[1:end-k]]
+
+    tree = KDTree(trainees)
+    predictee_neighbors, predictee_dists = bulksearch(tree, predictees, NeighborNumber(d+1), theiler)
+    
+    #These are re-used
+    u = zeros(d + 1)
+    w = zeros(d + 1)
+
+    # Predict scalar forward projections. We could also predict d+1-dimensional
+    # embedding states, but here we do scalars.
+    Lₚ = length(predictees)
+    x̃ = zeros(Lₚ)
+
+    for i = 1:Lₚ
+        dists = predictee_dists[i]
+        dist₁ = dists[1]
+        @inbounds for j = 1:d+1
+            w[j] = exp(-dists[j] / dist₁)
+        end
+        s = sum(w)
+
+        # Compute the p-step forward prediction of pᵢ.
+        # Select from Mₓ directly, because it also contains the points left 
+        # out when defining `training`
+        projᵢ = x[predictee_neighbors[i] .+ k]
+
+        # The predicted value is the weighted average of the forward-projected
+        # states. 
+        for j = 1:d+1
+            x̃[i] += w[j]*projᵢ[j] / s
+        end
+    end
+
+    return x[prediction .+ k], x̃
+end

From 1e2ebe1201a1de77d217521fab2f2eab4c745840 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Sun, 28 Nov 2021 00:22:10 +0100
Subject: [PATCH 02/19] Add docs

---
 docs/src/edm.md | 109 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100644 docs/src/edm.md

diff --git a/docs/src/edm.md b/docs/src/edm.md
new file mode 100644
index 000000000..411ce27e3
--- /dev/null
+++ b/docs/src/edm.md
@@ -0,0 +1,109 @@
+# Empirical dynamical modelling
+
+## Simplex projection
+
+```@docs
+simplex_predictions
+```
+
+### Example: reproducing Sugihara & May (1990)
+
+The simplex projection method was introduced in Sugihara & May (1990). Let's try to reproduce their figure 1.
+
+We start by defining the tent map and generating a time series for `μ = 1.98`.
+
+```@example simplex_projection
+using CausalityTools, DynamicalSystems, Plots, Statistics, Distributions; gr()
+
+function eom_tentmap(dx, x, p, n)
+    x = x[1]
+    μ = p[1]
+    dx[1] = x < 0.5 ? μ*x : μ*(1 - x)
+
+    return
+end
+
+function tentmap(u₀ = rand(); μ = 1.98)
+    DiscreteDynamicalSystem(eom_tentmap, [u₀], [μ])
+end 
+
+npts = 2000
+sys = tentmap(μ = 1.98)
+ts = diff(trajectory(sys, npts , Ttr = 1000)[:, 1])
+
+p_ts = plot(xlabel = "Time step", ylabel = "Value", ylims = (-1.1, 1.1))
+plot!(ts[1:100], label = "")
+savefig("simplex_ts.svg") # hide
+```
+
+![](simplex_ts.svg)
+
+Next, let's compute the predicted and observed values for `k = 1` and `k = 7` using embedding dimension 3 and 
+embedding lag 1. We'll use the first 1000 points as our training set, and try to predict the next 500 points. 
+
+```@example simplex_projection
+d, τ = 3, 1
+training = 1:1000
+prediction = 1001:1500
+x_1, x̃_1 = simplex_predictions(ts, 1, d = d, τ = τ, training = training, prediction = prediction)
+x_7, x̃_7 = simplex_predictions(ts, 7, d = d, τ = τ, training = training, prediction = prediction)
+
+p_obs_vs_pred = plot(xlabel = "Observed values", ylabel = "Predicted values")
+scatter!(x_1, x̃_1, label = "k = 1", shape = :circle)
+scatter!(x_7, x̃_7, label = "k = 7", shape = :star5)
+savefig("simplex_correspondence.svg") # hide
+```
+
+![](simplex_correspondence.svg)
+
+There is high correlation between observed and predicted values when predicting only one time step `k = 1`
+into the future. As `k` increases, the performance drops off. Let's investigate this systematically.
+
+```@example simplex_projection
+kmax = 20
+cors = zeros(kmax)
+for k = 1:kmax
+    X, X̃ = simplex_predictions(ts, k, d = d, τ = τ, training = training, prediction = prediction)
+    cors[k] = cor(X, X̃)
+end
+
+plot(legend = :bottomleft, ylabel = "Correlation coefficient (ρ)", 
+    xlabel = "Prediction time (k)",
+    ylims = (-1.1, 1.1))
+hline!([0], ls = :dash, label = "", color = :grey)
+scatter!(1:kmax, cors, label = "")
+savefig("simplex_correlation.svg") # hide
+```
+
+![](simplex_correlation.svg)
+
+The correlation between observed and predicted values is near perfect until `k = 3`, and then rapidly 
+drops off as `k` increases. At `k = 8`, there is virtually no correlation between observed and predicted values.
+This means that, for this particular system, for this particular choice of embedding and choice of training/prediction sets, the predictability of the system is limited to about 4 or 5 time steps into the future (if you want good predictions). 
+
+The main point of Sugihara & May's paper was that this drop-off of prediction accuracy with `k` is characteristic of chaotic systems, and can be used to distinguish chaos from regular behaviour in time series.
+
+Let's demonstrate this by also investigating how the correlation between observed and predicted values behaves as a function of `k` for a regular, non-chaotic time series. We'll use a sine wave with additive noise.
+
+```@example
+𝒩 = Uniform(-0.5, 0.5)
+xs = 0.0:1.0:2000.0
+r = sin.(0.5 .* xs) .+ rand(𝒩, length(xs))
+plot(r[1:200])
+
+cors_sine = zeros(kmax)
+for k = 1:kmax
+    X, X̃ = simplex_predictions(r, k, d = d, τ = τ, training = training, prediction = prediction)
+    cors_sine[k] = cor(X, X̃)
+end
+
+plot(legend = :bottomleft, ylabel = "Correlation coefficient (ρ)", 
+    xlabel = "Prediction time (k)",
+    ylims = (-1.1, 1.1))
+hline!([0], ls = :dash, label = "", color = :grey)
+scatter!(1:kmax, cors, label = "tent map", marker = :star)
+scatter!(1:kmax, cors_sine, label = "sine")
+```
+
+In contrast to the tent map, where prediction accuracy drops off and stabilizes around zero for increasing `k`, 
+the prediction accuracy is rather insensitive to the choice of `k`. 
\ No newline at end of file

From 33a97c81dfbbfc7f177ace1ad43df845d99dfc5b Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Sun, 28 Nov 2021 00:22:21 +0100
Subject: [PATCH 03/19] Add dependencies

---
 src/EmpiricalDynamicalModelling/simplex_projection.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/EmpiricalDynamicalModelling/simplex_projection.jl b/src/EmpiricalDynamicalModelling/simplex_projection.jl
index 960c46b47..4385927fd 100644
--- a/src/EmpiricalDynamicalModelling/simplex_projection.jl
+++ b/src/EmpiricalDynamicalModelling/simplex_projection.jl
@@ -1,3 +1,5 @@
+using DelayEmbeddings, Neighborhood
+
 export simplex_predictions
 
 """

From 9fc313eb50a549a8829e0f03d7c0568d2b8ca656 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Sun, 28 Nov 2021 01:13:41 +0100
Subject: [PATCH 04/19] Add docs

---
 docs/make.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/make.jl b/docs/make.jl
index 1c618fa45..c0a7d519e 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -49,6 +49,7 @@ PAGES = [
         "info_estimators.md",
     ],
 
+    "edm.md",
     "example_systems.md",
     "Utilities" => [
         "invariant_measure.md",

From 4d890431c572341a6d2d4c10e1abee934cf5c170 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Sun, 28 Nov 2021 01:56:37 +0100
Subject: [PATCH 05/19] Add Distributions to doc dependencies

---
 docs/Project.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/Project.toml b/docs/Project.toml
index 0806a9722..27620e6a7 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,5 +1,6 @@
 [deps]
 DelayEmbeddings = "5732040d-69e3-5649-938a-b6b4f237613f"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8"
 DynamicalSystems = "61744808-ddfa-5f27-97ff-6e42cc95d634"

From 1e23930cb962282e2ac10ac88d79474e07f3aadf Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Sun, 28 Nov 2021 09:39:39 +0100
Subject: [PATCH 06/19] Show last figure too

---
 docs/src/edm.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/src/edm.md b/docs/src/edm.md
index 411ce27e3..b81b77189 100644
--- a/docs/src/edm.md
+++ b/docs/src/edm.md
@@ -103,7 +103,9 @@ plot(legend = :bottomleft, ylabel = "Correlation coefficient (ρ)",
 hline!([0], ls = :dash, label = "", color = :grey)
 scatter!(1:kmax, cors, label = "tent map", marker = :star)
 scatter!(1:kmax, cors_sine, label = "sine")
+savefig("simplex_correlation_sine_tent.svg") # hide
 ```
 
-In contrast to the tent map, where prediction accuracy drops off and stabilizes around zero for increasing `k`, 
-the prediction accuracy is rather insensitive to the choice of `k`. 
\ No newline at end of file
+![](simplex_correlation_sine_tent.svg)
+
+In contrast to the tent map, for which prediction accuracy drops off and stabilizes around zero for increasing `k`, the prediction accuracy is rather insensitive to the choice of `k` for the noisy sine time series. 
\ No newline at end of file

From df7e71305138aaed956057e635736bb394ede4d4 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Sun, 28 Nov 2021 09:40:55 +0100
Subject: [PATCH 07/19] Typo

---
 docs/src/edm.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/edm.md b/docs/src/edm.md
index b81b77189..45ea1b07d 100644
--- a/docs/src/edm.md
+++ b/docs/src/edm.md
@@ -56,7 +56,7 @@ savefig("simplex_correspondence.svg") # hide
 
 ![](simplex_correspondence.svg)
 
-There is high correlation between observed and predicted values when predicting only one time step `k = 1`
+There is high correlation between observed and predicted values when predicting only one time step (`k = 1`)
 into the future. As `k` increases, the performance drops off. Let's investigate this systematically.
 
 ```@example simplex_projection

From 61a6e86c615ad596d560377b776ce4969f37d983 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Sun, 28 Nov 2021 11:03:53 +0100
Subject: [PATCH 08/19] Name all @example blocks the same

---
 docs/src/edm.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/edm.md b/docs/src/edm.md
index 45ea1b07d..e1c8913ee 100644
--- a/docs/src/edm.md
+++ b/docs/src/edm.md
@@ -85,7 +85,7 @@ The main point of Sugihara & May's paper was that this drop-off of prediction ac
 
 Let's demonstrate this by also investigating how the correlation between observed and predicted values behaves as a function of `k` for a regular, non-chaotic time series. We'll use a sine wave with additive noise.
 
-```@example
+```@example simplex_projection
 𝒩 = Uniform(-0.5, 0.5)
 xs = 0.0:1.0:2000.0
 r = sin.(0.5 .* xs) .+ rand(𝒩, length(xs))

From efd565aa158e1ca3b4f581f4034efc6021529eb2 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Mon, 29 Nov 2021 00:58:51 +0100
Subject: [PATCH 09/19] Smap implementation + docs

---
 docs/src/edm.md                               | 92 +++++++++++++++++-
 .../EmpiricalDynamicalModelling.jl            |  1 +
 src/EmpiricalDynamicalModelling/smap.jl       | 93 +++++++++++++++++++
 3 files changed, 185 insertions(+), 1 deletion(-)
 create mode 100644 src/EmpiricalDynamicalModelling/smap.jl

diff --git a/docs/src/edm.md b/docs/src/edm.md
index e1c8913ee..02c99ffbb 100644
--- a/docs/src/edm.md
+++ b/docs/src/edm.md
@@ -108,4 +108,94 @@ savefig("simplex_correlation_sine_tent.svg") # hide
 
 ![](simplex_correlation_sine_tent.svg)
 
-In contrast to the tent map, for which prediction accuracy drops off and stabilizes around zero for increasing `k`, the prediction accuracy is rather insensitive to the choice of `k` for the noisy sine time series. 
\ No newline at end of file
+In contrast to the tent map, for which prediction accuracy drops off and stabilizes around zero for increasing `k`, the prediction accuracy is rather insensitive to the choice of `k` for the noisy sine time series. 
+
+## S-map
+
+```@docs
+smap
+```
+
+The s-map, or sequential locally weighted global map, was introduced in Sugihara (1994)[^Sugihara1994]. The s-map approximates the dynamics of a system as a locally weighted global map, with a tuning parameter ``\theta`` that controls the degree of nonlinearity in the model. For ``\theta = 0``, the model is the maximum likelihood global linear solution (of eq. 2 in Sugihara, 1994), and for increasing ``\theta > 0``, the model becomes increasingly nonlinear and localized (Sugihara, 1996)[^Sugihara1996].
+
+When such a model has been constructed, it be used as prediction tool for out-of-sample points. It extends the prediction power of the simplex projection method, which only approximates the dynamics with a piecewise linear map. Let's elaborate with an example.
+
+### Example: prediction power for the Lorenz system
+
+In our implementation of `smap`, the input is a multivariate dataset - which can be a `Dataset` of either the raw variables of a multivariate dynamical system, or a `Dataset` containing an embedding of a single time series. 
+Here, we'll show an example of the former.
+
+Let's generate an example orbit from a bidirectionally coupled set of Lorenz systems. We'll use the built-in 
+`CausalityTools.ExampleSystems.lorenz_lorenz_bidir` system, and select the first three variables for analysis.
+
+```@example smap_lorenz
+using CausalityTools, Plots, DynamicalSystems, Statistics; gr()
+
+sys = CausalityTools.ExampleSystems.lorenz_lorenz_bidir()
+T, Δt = 150, 0.05
+lorenz = trajectory(sys, T, Δt = Δt, Ttr = 100)[:, 1:3]
+p_orbit = plot(xlabel = "x", ylabel = "y", zlabel = "z")
+plot!(columns(lorenz)..., marker = :circle, label = "", ms = 2, lα = 0.5)
+savefig(lorenz_attractor.svg) # hide
+```
+
+![](lorenz_attractor.svg)
+
+Now, we compute the `k`-step forward predictions for `k` ranging from `1` to `15`. The tuning parameter `θ` 
+varies from `0.0` (linear model) to `2.0` (strongly nonlinear model). Our goal is to see which model 
+yields the best predictions across multiple `k`.
+
+We'll use the first 500 points of the orbit to train the model. Then, using that model, we try to
+predict the next 500 points (which are not part of the training set). 
+Finally, we compute the correlation between the predicted values and the observed values, which measures
+the model prediction skill. This procedure is repeated for each combination of `k` and `θ`.
+
+```@example smap_lorenz
+ks, θs = 1:15, 0.0:0.5:2.0
+n_trainees, n_predictees = 500, 500;
+
+# Compute correlations between predicted values `preds` and actual values `truths` 
+# for all parameter combinations
+cors = zeros(length(ks), length(θs))
+for (i, k) in enumerate(ks)
+    for (j, θ) in enumerate(θs)
+        pred = n_trainees+1:n_trainees+n_predictees
+        train = 1:n_trainees
+
+        preds, truths = smap(lorenz, θ = θ, k = k, trainees = train, predictees = pred)
+        cors[i, j] = cor(preds, truths)
+    end
+end
+
+p_θ_k_sensitivity = plot(xlabel = "Prediction time (k)", ylabel = "cor(observed, predicted)", 
+    legend = :bottomleft, ylims = (-1.1, 1.1))
+hline!([0], ls = :dash, c = :grey, label = "")
+markers = [:star :circle :square :star5 :hexagon :circle :star]
+cols = [:black, :red, :blue, :green, :purple, :grey, :black]
+labels = ["θ = $θ" for θ in θs]
+for i = 1:length(θs)
+    plot!(ks, cors[:, i], marker = markers[i], c = cols[i], ms = 5, label = labels[i])
+end
+
+# Let's also plot the subset of points we're actually using.
+p_orbit = plot(columns(lorenz[1:n_trainees+n_predictees])..., marker = :circle, label = "", ms = 2, lα = 0.5)
+
+plot(p_orbit, p_θ_k_sensitivity, layout = grid(1, 2), size = (700, 400))
+savefig("smap_sensitivity_lorenz.svg") # hide
+```
+
+![](smap_sensitivity_lorenz.svg)
+
+The nonlinear models (colored lines and symbols) far outperform the linear model (black line + stars).
+
+Because the predictions for our system improves with increasingly nonlinear models, it indicates that our 
+system has some inherent nonlinearity. This is, of course, correct, since our Lorenz system is chaotic.
+
+A formal way to test the presence of nonlinearity is, for example, to define the null hypothesis 
+"H0: predictions do not improve when using an equivalent nonlinear versus a linear model" (equivalent in the sense  that the only parameter is `θ`) or, equivalently, "`H0: ρ_linear = ρ_nonlinear`". If predictions do in fact improve, we
+instead accept the alternative hypothesis that prediction *do* improve when using nonlinear models versus using linear models. This can be formally tested using a z-statistic [^Sugihara1994].
+
+## References
+
+[^Sugihara1994]: Sugihara, G. (1994). Nonlinear forecasting for the classification of natural time series. Philosophical Transactions of the Royal Society of London. Series A: Physical and Engineering Sciences, 348(1688), 477-495.
+[^Sugihara1996]: Sugihara, George, et al. "Nonlinear control of heart rate variability in human infants." Proceedings of the National Academy of Sciences 93.6 (1996): 2608-2613.
\ No newline at end of file
diff --git a/src/EmpiricalDynamicalModelling/EmpiricalDynamicalModelling.jl b/src/EmpiricalDynamicalModelling/EmpiricalDynamicalModelling.jl
index ed3276307..413b98851 100644
--- a/src/EmpiricalDynamicalModelling/EmpiricalDynamicalModelling.jl
+++ b/src/EmpiricalDynamicalModelling/EmpiricalDynamicalModelling.jl
@@ -3,4 +3,5 @@ using Reexport
 @reexport module EmpiricalDynamicalModelling
 
     include("simplex_projection.jl")
+    include("smap.jl")
 end
\ No newline at end of file
diff --git a/src/EmpiricalDynamicalModelling/smap.jl b/src/EmpiricalDynamicalModelling/smap.jl
new file mode 100644
index 000000000..58993d6a2
--- /dev/null
+++ b/src/EmpiricalDynamicalModelling/smap.jl
@@ -0,0 +1,93 @@
+using Distances, Statistics, LinearAlgebra, DelayEmbeddings, Neighborhood, Distances, LinearAlgebra
+
+export smap
+
+"""
+    smap(X::Dataset; θ = 1.0, k = 1, 
+        training = 1:length(x) ÷ 2 - k, 
+        predictees = length(x) ÷ 2 + 1:length(x)) → Vector{Float64}, Vector{Float64}
+
+Sequential locally weighted global linear map (S-map; Sugihara, 1994)[^Sugihara1994].
+
+For each predictee ``x_ ∈ X_{pred}`, the algorithm uses points in the library/training set 
+``X_{train} \\setminus x_i`` to fit a weighted linear model for predicting ``x_{i+k}`` 
+(xᵢ projected `k` time steps into the future).
+
+Returns two scalar vectors: `x̂s = [x̂₁₊ₖ, x̂₂₊ₖ, ..., x̂ₙ+k]`, which are the predicted values, and 
+`x̂s_truths = [x₁₊ₖ, x₂₊ₖ, ..., xₙ+k]`, which are the actual values ``x_{i+k} \\in X_{pred}`` 
+for ``i = 1, \\ldots, n``, where ``n`` is the number of predictees.
+
+## Input data
+
+The algorithm approximates a global map based on *embedding points* and predicts scalar values 
+in the *first* column of `X`. If your input data is a scalar time series, it must therefore 
+first be embedded using `DynamicalSystems.genembed` first.
+
+## Training and prediction sets
+
+By default, the first `ntrain` points of `x` is used as the library set (`Xtrain = x[1:Ntrain-k]`), and the 
+remaining half (`Xpred = x[Ntrain+1:end]`) is assigned to the prediction set. Overlapping index ranges 
+are not possible as of yet.
+
+When `θ = 0.0`, all weights are identical, and the A reduces to a linear autoregressive A. 
+Nonlinearity is introduced when `θ > 0`, so tuning this parameters can be used to distinguish 
+nonlinear dynamical systems from linear stochastic systems.
+
+[^Sugihara1994]: Sugihara, G. (1994). Nonlinear forecasting for the classification of natural time series. Philosophical Transactions of the Royal Society of London. Series A: Physical and Engineering Sciences, 348(1688), 477-495.
+"""
+function smap(x::Dataset; θ = 1.0, k = 1, trainees = 1:length(x) ÷ 2, predictees = length(x) ÷ 2 + 1:length(x)-k, metric = Euclidean(), r = 0)
+    Mtrain = x[trainees]; nₜ = length(trainees)
+    d = size(x, 2)
+    nq = length(predictees)
+
+    x̂s = zeros(nq)
+    x̂s_truths = zeros(nq)
+
+    # We really only need distances between each query point qᵢ and the points in `Mtrain`, 
+    # and we don't need them sorted, so the below approach is not ideal. The direct approach
+    # is commented out in the loop below, but that approach doesn't take into consideration
+    # the Theiler window (exclude point in a time radius of `r` around each query).
+    # Doing a neighbor search with `nₜ` neighbors seems cumbersome, but makes it easier to 
+    # use the Theiler window.
+    #tree, theiler = KDTree(x[predictees]), Theiler(r)
+    #query_neighbors, query_dists = bulksearch(tree, Mtrain, NeighborNumber(nq-k), theiler)
+    
+    # Make predictions
+    for i = 1:nq-k - 1
+        # Select the query point
+        query = predictees[i]
+        xᵢ = x[query]
+        
+        # Construct weight matrix. These weights take into account the 
+        # distance from the query point to all other training points.
+        dists = [evaluate(metric, xᵢ, xₜ) for xₜ in Mtrain]
+        #dists = query_dists[i]
+        
+        d̄ = mean(dists)        
+        wts = [exp(-θ*d / d̄) for d in dists]
+        W = Diagonal(wts[1:nₜ - k])
+
+        # We want to train a model that predicts k time steps into the future
+        # For that, each row/point in `A` must be time-shifted `k`` steps 
+        # relative to the corresponding entry in `b`. Weights are applied after 
+        # selecting the points.
+        b = W * Mtrain[1+k:end, 1]
+        A = W * [repeat([1], nₜ - k) Matrix(Mtrain[1:nₜ - k])]
+
+        # Least squares solution
+        c = A \ b 
+
+        # Eq. 3.1 in Sugihara (1994). Here, we predict x̂ᵢ using the 
+        # model obtained from the least squares solution.
+        # c[1] = constant, c[2:end] are the coefficients. 
+        x̂ᵢ = c[1] + sum(xᵢ .* c[2:end])
+
+        # Locate the corresponding grouth truth
+        x̂ᵢ_truth = x[query + k][1]
+
+        x̂s[i] = x̂ᵢ
+        x̂s_truths[i] = x̂ᵢ_truth
+    end
+    
+    return x̂s, x̂s_truths
+end
\ No newline at end of file

From 9395318f846951d2ef0bbe8c1f98e133699397d2 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Mon, 29 Nov 2021 01:45:14 +0100
Subject: [PATCH 10/19] Simplex projection best optimal embedding dimension

---
 docs/src/edm.md                               | 40 ++++++++++++++++-
 .../EmpiricalDynamicalModelling.jl            |  1 +
 .../delay_simplexprojection.jl                | 43 +++++++++++++++++++
 3 files changed, 83 insertions(+), 1 deletion(-)
 create mode 100644 src/EmpiricalDynamicalModelling/delay_simplexprojection.jl

diff --git a/docs/src/edm.md b/docs/src/edm.md
index 02c99ffbb..6f4c47cdc 100644
--- a/docs/src/edm.md
+++ b/docs/src/edm.md
@@ -110,6 +110,44 @@ savefig("simplex_correlation_sine_tent.svg") # hide
 
 In contrast to the tent map, for which prediction accuracy drops off and stabilizes around zero for increasing `k`, the prediction accuracy is rather insensitive to the choice of `k` for the noisy sine time series. 
 
+### Example: determining optimal embedding dimension
+
+```@docs
+delay_simplex
+```
+
+The simplex projection method can also be used to determine the optimal embedding dimension for a time series.
+Given an embedding lag `τ`, we can embed a time series `x` for a range of embedding dimensions `d ∈ 2:dmax` and
+compute the average prediction power over multiple `ks` using the simplex projection method.
+
+Here, we compute the average prediction skills from `k=1` up to `k=15` time steps into the future, for 
+embedding dimensions `d = 2:10`.
+
+```@example
+using CausalityTools, DynamicalSystems, Plots; gr()
+
+sys = CausalityTools.ExampleSystems.lorenz_lorenz_bidir()
+T, Δt = 150, 0.05
+lorenz = trajectory(sys, T, Δt = Δt, Ttr = 100)[:, 1:3]
+x1, x2, x3 = columns(lorenz)
+
+# Determine the optimal embedding delay
+τ = estimate_delay(x1, "ac_zero")
+
+# Compute average prediction skill for 
+ds, ks = 2:10, 1:15
+ρs = delay_simplex(x1, τ, ds = ds, ks = ks)
+
+plot(xlabel = "Embedding dimension", ylabel = "ρ̄(observed, predicted")
+plot!(ds, ds, label = "", c = :black, marker = :star)
+savefig("simplex_embedding.svg") # hide
+```
+
+![](simplex_embedding.svg)
+
+Based on the predictability criterion, the optimal embedding dimension, for this particular realization
+of the first variable of the Lorenz system, seems to be 2.
+
 ## S-map
 
 ```@docs
@@ -136,7 +174,7 @@ T, Δt = 150, 0.05
 lorenz = trajectory(sys, T, Δt = Δt, Ttr = 100)[:, 1:3]
 p_orbit = plot(xlabel = "x", ylabel = "y", zlabel = "z")
 plot!(columns(lorenz)..., marker = :circle, label = "", ms = 2, lα = 0.5)
-savefig(lorenz_attractor.svg) # hide
+savefig("lorenz_attractor.svg") # hide
 ```
 
 ![](lorenz_attractor.svg)
diff --git a/src/EmpiricalDynamicalModelling/EmpiricalDynamicalModelling.jl b/src/EmpiricalDynamicalModelling/EmpiricalDynamicalModelling.jl
index 413b98851..72c11adec 100644
--- a/src/EmpiricalDynamicalModelling/EmpiricalDynamicalModelling.jl
+++ b/src/EmpiricalDynamicalModelling/EmpiricalDynamicalModelling.jl
@@ -3,5 +3,6 @@ using Reexport
 @reexport module EmpiricalDynamicalModelling
 
     include("simplex_projection.jl")
+    include("delay_simplexprojection.jl")
     include("smap.jl")
 end
\ No newline at end of file
diff --git a/src/EmpiricalDynamicalModelling/delay_simplexprojection.jl b/src/EmpiricalDynamicalModelling/delay_simplexprojection.jl
new file mode 100644
index 000000000..3d4a3af09
--- /dev/null
+++ b/src/EmpiricalDynamicalModelling/delay_simplexprojection.jl
@@ -0,0 +1,43 @@
+using DelayEmbeddings
+
+export delay_simplex
+
+"""
+    delay_simplex(x, τ; ds = 2:10, ks = 1:10) → ρs
+
+Determine the optimal embedding dimension for `x` based on the simplex projection algorithm 
+from Sugihara & May (1990)[^Sugihara1990]. 
+
+For each `d ∈ ds`, we compute the correlation between observed and predicted values 
+for different prediction times `ks`, and average the correlation coefficients. The 
+embedding dimension for which the average correlation is highest is taken as the optimal 
+dimension. The embedding delay `τ` is given as a positive number.
+
+Returns the prediction skills `ρs` - one `ρ` for each `d ∈ ds`.
+
+Note: the library/training and prediction sets are automatically taken as the first and 
+second halves of the data, respectively. This convenience method does not allow tuning 
+the libraries further.
+
+[^Sugihara1990]: Sugihara, George, and Robert M. May. "Nonlinear forecasting as a way of distinguishing chaos from measurement error in time series." Nature 344.6268 (1990): 734-741.
+"""
+function delay_simplex(x, τ; ds = 2:10, ks = 1:8)
+    ρs = zeros(length(ds))
+    for (i, d) in enumerate(ds)
+        embedding = genembed(x, -τ)
+
+        ρs_k = zeros(length(ks))
+        for (j, k) in enumerate(ks)
+            train = 1:length(x) ÷ 2 - k
+            pred = length(x) ÷ 2 + 1 : length(x) - d*τ
+
+            x̄, x̄_actual = simplex_predictions(x, k; τ = τ, d = d, 
+                training = train, 
+                prediction = pred)
+            ρs_k[j] = cor(x̄, x̄_actual)
+        end
+        ρs[i] = mean(ρs_k)
+    end
+
+    return ρs
+end
\ No newline at end of file

From f49eaf7c4892d81454cd1141b154c96af1947b6f Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Mon, 29 Nov 2021 02:20:20 +0100
Subject: [PATCH 11/19] Typo

---
 src/EmpiricalDynamicalModelling/smap.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/EmpiricalDynamicalModelling/smap.jl b/src/EmpiricalDynamicalModelling/smap.jl
index 58993d6a2..ab4cc8c98 100644
--- a/src/EmpiricalDynamicalModelling/smap.jl
+++ b/src/EmpiricalDynamicalModelling/smap.jl
@@ -9,7 +9,7 @@ export smap
 
 Sequential locally weighted global linear map (S-map; Sugihara, 1994)[^Sugihara1994].
 
-For each predictee ``x_ ∈ X_{pred}`, the algorithm uses points in the library/training set 
+For each predictee ``x_i ∈ X_{pred}```, the algorithm uses points in the library/training set 
 ``X_{train} \\setminus x_i`` to fit a weighted linear model for predicting ``x_{i+k}`` 
 (xᵢ projected `k` time steps into the future).
 

From afdffd6a3a91322a296fd2320d7668fcb22681e9 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Mon, 29 Nov 2021 02:50:14 +0100
Subject: [PATCH 12/19] Doc typo

---
 docs/src/edm.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/docs/src/edm.md b/docs/src/edm.md
index 6f4c47cdc..bba716595 100644
--- a/docs/src/edm.md
+++ b/docs/src/edm.md
@@ -139,7 +139,7 @@ ds, ks = 2:10, 1:15
 ρs = delay_simplex(x1, τ, ds = ds, ks = ks)
 
 plot(xlabel = "Embedding dimension", ylabel = "ρ̄(observed, predicted")
-plot!(ds, ds, label = "", c = :black, marker = :star)
+plot!(ds, ρs, label = "", c = :black, marker = :star)
 savefig("simplex_embedding.svg") # hide
 ```
 
@@ -233,7 +233,5 @@ A formal way to test the presence of nonlinearity is, for example, to define the
 "H0: predictions do not improve when using an equivalent nonlinear versus a linear model" (equivalent in the sense  that the only parameter is `θ`) or, equivalently, "`H0: ρ_linear = ρ_nonlinear`". If predictions do in fact improve, we
 instead accept the alternative hypothesis that prediction *do* improve when using nonlinear models versus using linear models. This can be formally tested using a z-statistic [^Sugihara1994].
 
-## References
-
 [^Sugihara1994]: Sugihara, G. (1994). Nonlinear forecasting for the classification of natural time series. Philosophical Transactions of the Royal Society of London. Series A: Physical and Engineering Sciences, 348(1688), 477-495.
 [^Sugihara1996]: Sugihara, George, et al. "Nonlinear control of heart rate variability in human infants." Proceedings of the National Academy of Sciences 93.6 (1996): 2608-2613.
\ No newline at end of file

From 203620b8fc983fc6f08c69ef89721abec6a56a97 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Mon, 29 Nov 2021 02:52:52 +0100
Subject: [PATCH 13/19] Wording

---
 docs/src/edm.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/edm.md b/docs/src/edm.md
index bba716595..5b908c39b 100644
--- a/docs/src/edm.md
+++ b/docs/src/edm.md
@@ -156,7 +156,7 @@ smap
 
 The s-map, or sequential locally weighted global map, was introduced in Sugihara (1994)[^Sugihara1994]. The s-map approximates the dynamics of a system as a locally weighted global map, with a tuning parameter ``\theta`` that controls the degree of nonlinearity in the model. For ``\theta = 0``, the model is the maximum likelihood global linear solution (of eq. 2 in Sugihara, 1994), and for increasing ``\theta > 0``, the model becomes increasingly nonlinear and localized (Sugihara, 1996)[^Sugihara1996].
 
-When such a model has been constructed, it be used as prediction tool for out-of-sample points. It extends the prediction power of the simplex projection method, which only approximates the dynamics with a piecewise linear map. Let's elaborate with an example.
+When such a model has been constructed, it be used as prediction tool for out-of-sample points, and can be used to characterize nonlinearity in a time series (Sugihara, 1994).  Let's demonstrate with an example.
 
 ### Example: prediction power for the Lorenz system
 

From 9cdfd5949b6db6df09f75d5b39d41b3e3ffdd665 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Mon, 29 Nov 2021 02:54:42 +0100
Subject: [PATCH 14/19] Docstring wording

---
 src/EmpiricalDynamicalModelling/smap.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/EmpiricalDynamicalModelling/smap.jl b/src/EmpiricalDynamicalModelling/smap.jl
index ab4cc8c98..5c0bd8393 100644
--- a/src/EmpiricalDynamicalModelling/smap.jl
+++ b/src/EmpiricalDynamicalModelling/smap.jl
@@ -25,9 +25,9 @@ first be embedded using `DynamicalSystems.genembed` first.
 
 ## Training and prediction sets
 
-By default, the first `ntrain` points of `x` is used as the library set (`Xtrain = x[1:Ntrain-k]`), and the 
-remaining half (`Xpred = x[Ntrain+1:end]`) is assigned to the prediction set. Overlapping index ranges 
-are not possible as of yet.
+By default, the first half of the points of `x` are used as the library set 
+(`Xtrain = x[1:ntrain-k]`), and the remaining half (`Xpred = x[ntrain+1:end]`) is assigned 
+to the prediction set. Overlapping index ranges are not possible as of yet.
 
 When `θ = 0.0`, all weights are identical, and the A reduces to a linear autoregressive A. 
 Nonlinearity is introduced when `θ > 0`, so tuning this parameters can be used to distinguish 

From 1c3cf914f643e4f2fd11c2927d832d0b6366ca42 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Mon, 29 Nov 2021 19:16:07 +0100
Subject: [PATCH 15/19] Do less lags

---
 docs/src/edm.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/src/edm.md b/docs/src/edm.md
index 5b908c39b..fd611b04c 100644
--- a/docs/src/edm.md
+++ b/docs/src/edm.md
@@ -120,7 +120,7 @@ The simplex projection method can also be used to determine the optimal embeddin
 Given an embedding lag `τ`, we can embed a time series `x` for a range of embedding dimensions `d ∈ 2:dmax` and
 compute the average prediction power over multiple `ks` using the simplex projection method.
 
-Here, we compute the average prediction skills from `k=1` up to `k=15` time steps into the future, for 
+Here, we compute the average prediction skills from `k=1` up to `k=10` time steps into the future, for 
 embedding dimensions `d = 2:10`.
 
 ```@example
@@ -135,7 +135,7 @@ x1, x2, x3 = columns(lorenz)
 τ = estimate_delay(x1, "ac_zero")
 
 # Compute average prediction skill for 
-ds, ks = 2:10, 1:15
+ds, ks = 2:10, 1:10
 ρs = delay_simplex(x1, τ, ds = ds, ks = ks)
 
 plot(xlabel = "Embedding dimension", ylabel = "ρ̄(observed, predicted")

From c42bcacfa2ac4aa57f6767bcc9aa64f8fb4c1795 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Tue, 30 Nov 2021 02:06:11 +0100
Subject: [PATCH 16/19] Add some comments

---
 .../delay_simplexprojection.jl                         |  8 ++++++--
 src/EmpiricalDynamicalModelling/simplex_projection.jl  | 10 ++++++++--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/EmpiricalDynamicalModelling/delay_simplexprojection.jl b/src/EmpiricalDynamicalModelling/delay_simplexprojection.jl
index 3d4a3af09..4067c269f 100644
--- a/src/EmpiricalDynamicalModelling/delay_simplexprojection.jl
+++ b/src/EmpiricalDynamicalModelling/delay_simplexprojection.jl
@@ -28,8 +28,12 @@ function delay_simplex(x, τ; ds = 2:10, ks = 1:8)
 
         ρs_k = zeros(length(ks))
         for (j, k) in enumerate(ks)
-            train = 1:length(x) ÷ 2 - k
-            pred = length(x) ÷ 2 + 1 : length(x) - d*τ
+            train = 1:length(x) ÷ 2
+            # Predictees need somewhere to go when projected forward in time, 
+            # so make sure that we exclude predictees that are among the the 
+            # last τ*(d-1) - k points (these would result in simplices for 
+            # which one of the vertices has no target `k` steps in the future)
+            pred = length(x) ÷ 2 + 1 : length(x) - τ*(d-1) - k
 
             x̄, x̄_actual = simplex_predictions(x, k; τ = τ, d = d, 
                 training = train, 
diff --git a/src/EmpiricalDynamicalModelling/simplex_projection.jl b/src/EmpiricalDynamicalModelling/simplex_projection.jl
index 4385927fd..c98485386 100644
--- a/src/EmpiricalDynamicalModelling/simplex_projection.jl
+++ b/src/EmpiricalDynamicalModelling/simplex_projection.jl
@@ -42,7 +42,11 @@ is based on Ye et al. (2015)[^Ye2015].
 """
 function simplex_predictions(x, k; τ = 1, d = 2, 
         training = 1:length(x) ÷ 2, 
-        prediction = length(x) ÷ 2 + 1:length(x)-τ*(d-1))
+        # Predictees need somewhere to go when projected forward in time, 
+        # so make sure that we exclude predictees that are among the the 
+        # last τ*(d-1) - k points (these would result in simplices for 
+        # which one of the vertices has no target `k` steps in the future)
+        prediction = length(x) ÷ 2 + 1:length(x)-τ*(d-1)-k)
     
     # Generate embedding and separate the embedding points into a training set and 
     # a set of predictees. 
@@ -54,7 +58,9 @@ function simplex_predictions(x, k; τ = 1, d = 2,
     #if isempty(training ∩ prediction)
     # For now, the training set excludes the `k` last points, so that 
     # when later finding nearest neighbors, these neighbors have "somewhere to go" 
-    # when projected forward in time.
+    # when projected forward in time. This can be done more intelligently, because
+    # some library points are lost, and when using overlapping libraries, the 
+    # indexing is not that simple. For now, just use non-overlapping libraries.
     theiler = Theiler(0)
     trainees = Mₓ[training[1:end-k]]
 

From 281372c49f56bde1a122a2cf461c371f4b46f857 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Wed, 1 Dec 2021 03:42:21 +0100
Subject: [PATCH 17/19] Bring SMeasure up to speed

---
 src/SMeasure/smeasure.jl | 73 ++++++++++++++++++----------------------
 1 file changed, 33 insertions(+), 40 deletions(-)

diff --git a/src/SMeasure/smeasure.jl b/src/SMeasure/smeasure.jl
index c1320db08..e7507c3f5 100644
--- a/src/SMeasure/smeasure.jl
+++ b/src/SMeasure/smeasure.jl
@@ -4,12 +4,7 @@ using Reexport
 @reexport module SMeasure
 export s_measure
 
-import Distances: Metric, SqEuclidean, evaluate, Euclidean
-import NearestNeighbors: KDTree
-import ChaosTools: FixedMassNeighborhood, neighborhood
-using Neighborhood
-using ChaosTools
-using DelayEmbeddings
+using NearestNeighbors, Distances, Neighborhood, DelayEmbeddings
 
 
 """
@@ -25,8 +20,12 @@ using DelayEmbeddings
         K::Int = 2, metric = SqEuclidean(), tree_metric = Euclidean(),
         dy::Int = 2, τy::Int = 1) → Float64
 
-S-measure test [1] for the directional dependence between univariate/multivariate time 
-series `x` and `y`.
+S-measure [^Grassberger1999][^Quiroga2000] for the directional dependence between 
+univariate/multivariate time series `x` and `y`. 
+
+Returns a scalar `s ∈ [0, 1]`, where `s = 0` indicates independence between `x` and `y`, 
+and higher values indicate synchronization between `x` and `y`, with complete 
+synchronization for `s = 1.0`.
 
 ## Input data 
 
@@ -90,11 +89,8 @@ x, y = orbit[:, 1], orbit[:, 2]
 s_measure(x, y, dx = 4, τx = 3, dy = 5, τy = 1)
 ```
 
-## References
-
-Quian Quiroga, R., Arnhold, J. & Grassberger, P. [2000]
-“Learning driver-response relationships from synchronization patterns,” 
-Phys. Rev. E61(5), 5142–5148.
+[^Quiroga2000]: Quian Quiroga, R., Arnhold, J. & Grassberger, P. [2000] “Learning driver-response relationships from synchronization patterns,” Phys. Rev. E61(5), 5142–5148.
+[^Grassberger1999]: Arnhold, J., Grassberger, P., Lehnertz, K., & Elger, C. E. (1999). A robust method for detecting interdependences: application to intracranially recorded EEG. Physica D: Nonlinear Phenomena, 134(4), 419-430.
 """
 function s_measure(x::AbstractDataset{D1, T}, y::AbstractDataset{D2, T}; K::Int = 3, 
         metric = SqEuclidean(),
@@ -102,46 +98,43 @@ function s_measure(x::AbstractDataset{D1, T}, y::AbstractDataset{D2, T}; K::Int
         theiler_window::Int = 0 # only point itself excluded
         ) where {D1, D2, T}
     
-    # # Match length of datasets by excluding end points.
+    # Match length of datasets by excluding end points.
     lx = length(x); ly = length(y)
     lx > ly ? X = x[1:ly, :] : X = x
     ly > lx ? Y = y[1:lx, :] : Y = y
     N = length(X)
 
-    treeX = searchstructure(KDTree, X, tree_metric)
-    treeY = searchstructure(KDTree, Y, tree_metric)
-
     # Pre-allocate vectors to hold indices and distances during loops
-    dists_x = Vector{T}(undef, K)
-    dists_x_cond_y = Vector{T}(undef, K)
-
-    # Mean squared distances in X
-    Rx = Vector{T}(undef, N)
+    dists_x = zeros(T, K)
+    dists_x_cond_y = zeros(T, K)
 
-    # Mean squared distances in X conditioned on Y
-    Rx_cond_y = Vector{T}(undef, N)
+    # Mean squared distances in X, and 
+    # mean squared distances in X conditioned on Y
+    Rx = zeros(T, N)
+    Rx_cond_y = zeros(T, N)
 
-    # use one more neighbor, because we need to excluded the first 
-    # (itself) afterwards (distance to itself is zero)
-    neighborhoodtype = NeighborNumber(K + 1) 
-        
-    idxs_X = bulkisearch(treeX, X, neighborhoodtype)
-    idxs_Y = bulkisearch(treeY, Y, neighborhoodtype)
+    # Search for the K nearest neighbors of each points in both X and Y
+    treeX = searchstructure(KDTree, X, tree_metric)
+    treeY = searchstructure(KDTree, Y, tree_metric)
+    neighborhoodtype, theiler = NeighborNumber(K), Theiler(0)
+    idxs_X = bulkisearch(treeX, X, neighborhoodtype, theiler)
+    idxs_Y = bulkisearch(treeY, Y, neighborhoodtype, theiler)
     
-    @inbounds for i in 1:N
-        @views pxᵢ, pyᵢ = X[i], Y[i]
+    for n in 1:N
+        pxₙ = X[n]
     
-        for j = 2:K
-            dists_x[j] = @views evaluate(metric, pxᵢ, X[idxs_X[i][j]])
-            dists_x_cond_y[j] = @views evaluate(metric, pxᵢ, X[idxs_Y[i][j]])
+        for j = 1:K
+            rₙⱼ = idxs_X[n][j] # nearest neighbor indices in X
+            sₙⱼ = idxs_Y[n][j] # nearest neighbor indices in Y
+            dists_x[j] = evaluate(metric, pxₙ, X[rₙⱼ])
+            dists_x_cond_y[j] = evaluate(metric, pxₙ, X[sₙⱼ])
         end
         
-        Rx[i] = sum(dists_x) / K
-        Rx_cond_y[i] = sum(dists_x_cond_y) / K
+        Rx[n] = sum(dists_x) / K
+        Rx_cond_y[n] = sum(dists_x_cond_y) / K
     end
-
-    S = sum(Rx ./ Rx_cond_y) / N
-    return S
+    
+    return sum(Rx ./ Rx_cond_y) / N
 end
 
 function s_measure(x::AbstractVector{T}, y::AbstractVector{T}; K::Int = 3,

From 617dce2fb5e3f9619771923ab252e3d4d7111486 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Wed, 1 Dec 2021 03:43:15 +0100
Subject: [PATCH 18/19] start leave-one-out

---
 src/EmpiricalDynamicalModelling/smap.jl | 114 ++++++++++++++++++++++++
 1 file changed, 114 insertions(+)

diff --git a/src/EmpiricalDynamicalModelling/smap.jl b/src/EmpiricalDynamicalModelling/smap.jl
index 5c0bd8393..6d490f70b 100644
--- a/src/EmpiricalDynamicalModelling/smap.jl
+++ b/src/EmpiricalDynamicalModelling/smap.jl
@@ -90,4 +90,118 @@ function smap(x::Dataset; θ = 1.0, k = 1, trainees = 1:length(x) ÷ 2, predicte
     end
     
     return x̂s, x̂s_truths
+end
+
+#Based on https://stackoverflow.com/questions/48460875/vector-matrix-element-wise-multiplication-by-rows-in-julia-efficiently
+function wtsmultA!(Ax, wts, A)
+    @inbounds for j = 1:size(A,2) 
+        @simd for i = 1:size(A,1) 
+            Ax[i,j] = wts[j] * A[i,j]
+        end
+    end 
+    return Ax
+end  
+
+"""
+    fill_library_indices!(idxs, n, i)
+
+Fill `idxs` (`Vector{Int}` of length `n - 1`) with the indices `1:n`, excluding `i < n`.
+"""
+function fill_library_indices!(idxs, n, i)
+    ct = 1
+    @inbounds for k in 1:n
+        if k != i
+            idxs[ct] = k
+            ct += 1
+        end
+    end
+end
+
+export smap_leaveoneout
+# A variant of the crossmap applied when there is overlap between prediction and training sets.
+# Uses exhaustive leave-one-out cross-validation (https://en.wikipedia.org/wiki/Cross-validation_(statistics))
+function smap_leaveoneout(x::Dataset{D, T}; θ = 1.0, k = 1, metric = Euclidean(), r = 0,
+        trainees = nothing, predictees = nothing) where {D, T}
+    
+    # No need to locate neighbors. Manually do Theiler window in loop instead.
+    alldists = pairwise(Euclidean(), x)
+    X = Matrix(x)
+
+    # Length of dataset, length of training set, and dimension
+    n = length(x); nₜ = n - 1; d = size(x, 2)
+  
+    # Pre-allocate arrays. These will be re-used at every iteration.
+    wts = zeros(nₜ)
+    bx = zeros(nₜ - k)
+    A = zeros(nₜ - k, d + 1)
+    A[:, 1] = ones(nₜ - k)
+    Ax = zeros(nₜ - k, d + 1)
+
+    # Store predictions and ground truths
+    x̂s = Vector{T}(undef, 0)
+    x̂s_truth = Vector{T}(undef, 0)
+    idxs = zeros(Int, nₜ)
+    @show d + 1
+
+    for (i, xᵢ) in enumerate(x)
+        if i < n - k
+            # This leads to excessive memory allocations because we need to explicitly 
+            # collect the non-consecutive indices. We therefore define a function that generates 
+            # the indices for us without extra allocations. The benchmark timing difference is 
+            # a factor of about 30.
+            # if i < d + 1
+            #     @show "2"
+            #     fst = 
+            #     lst = 
+            #     idxs = d+1+i:n
+            # elseif d + 1 < i < n - d + 1
+            #     @show "1"
+            #     fst = 1:i-1
+            #     lst = i+1:n
+            #     idxs = union(fst, lst)
+            # elseif  d < i
+            #     @show "3"
+            #     idxs = d
+            # end
+            # @show i, idxs |> collect
+            
+
+            # #fill_library_indices!(idxs, n, i)
+            # training_set = @views X[idxs, :]
+ 
+            # # Distances from xᵢ to all other points, and the mean of those distances
+            # distsᵢ = @views alldists[i, idxs]
+            # d̄ = mean(distsᵢ)
+        
+            # wts .= exp.(-θ .* distsᵢ ./ d̄)
+            # W = Diagonal(wts[1:nₜ - k])
+
+            # # We want to train a model that predicts k time steps into the future
+            # # For that, each row/point in `A` must be time-shifted `k`` steps 
+            # # relative to the corresponding entry in `b`. Weights are applied after 
+            # # selecting the points.
+            # #@views wtsmultA!(Ax, wts[1:end-k], A[1:end-k, :])
+            # #bx .= @views wts[1:end-k] .* @views training_set[1+k:end, 1]
+
+            # bx = W * training_set[1+k:end, 1]
+            # Ax = W * [ones(nₜ - k) training_set[1:nₜ - k]]
+    
+            # # # Least squares solution
+            # c = Ax \ bx
+            
+            # # Eq. 3.1 in Sugihara (1994). Here, we predict x̂ᵢ using the 
+            # # model obtained from the least squares solution.
+            # # c[1] = constant, c[2:end] are the coefficients. 
+            # x̂ᵢ = c[1] + sum(xᵢ .* @views c[2:end])
+
+            # # Locate the corresponding grouth truth
+            # x̂ᵢ_truth = @views x[i + k][1]
+
+            # push!(x̂s, x̂ᵢ)
+            # push!(x̂s_truth, x̂ᵢ_truth)
+        end
+    end
+    
+    return x̂s, x̂s_truth 
+
 end
\ No newline at end of file

From e7583e41326b1a162c7e6a31f2aeb4d37da9ce39 Mon Sep 17 00:00:00 2001
From: Kristian Haaga <kahaaga@gmail.com>
Date: Wed, 1 Dec 2021 03:44:37 +0100
Subject: [PATCH 19/19] doc figures should not display while building

---
 docs/make.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/make.jl b/docs/make.jl
index c0a7d519e..b745da6c2 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -3,7 +3,7 @@ using Pkg
 CI = get(ENV, "CI", nothing) == "true" || get(ENV, "GITHUB_TOKEN", nothing) !== nothing
 CI && Pkg.activate(@__DIR__)
 CI && Pkg.instantiate()
-CI && (ENV["GKSwstype"] = "100")
+ENV["GKSwstype"] = "100"
 using DelayEmbeddings
 using TransferEntropy
 using Documenter