diff --git a/docs/src/complexity/compression_complexity.md b/docs/src/complexity/compression_complexity.md new file mode 100644 index 000000000..94061695a --- /dev/null +++ b/docs/src/complexity/compression_complexity.md @@ -0,0 +1,61 @@ +# [Compression complexity](@id compression_complexity) + +## Interface + +```@docs +compression_complexity +``` + +## Algorithms + +```@docs +EffortToCompress +``` + +## Example + +Below is a reproduction of figure 3 in Nagaraj et al. (2013)[^Nagaraj2013], which compares the (approximate) Lyapunov exponent and compression complexity (ETC) of 200-pt long logistic map time series with varying bifurcation parameters. + +For ETC, the time series is symbolized to a binary time series before analysis. Lyapunov exponents are estimated directly on the (non-symbolized) time series. + +```@example +using CausalityTools, Plots, StatsBase, Measures + +# Approximation to the Lyapunov exponent for a time series x, from Nagaraj (2013) +function lyapunov(x, a) + L = length(x) + lyp = 0.0 + for i in 2:length(x) + lyp += log(2, abs(a - 2*a*x[i])) + end + + return lyp / L +end + + +coeffs = 3.5:0.0001:4.0 +ls = zeros(length(coeffs)) +etcs = zeros(length(coeffs)) +for (i, a) in enumerate(coeffs) + sys = logistic2_unidir(r₁ = a, c_xy = 0.0, σ = 0.0) + # Generate time series and compute approximate Lyapunov exponents + x = trajectory(sys, 200, Ttr = 10000)[:, 1] + ls[i] = lyapunov(x, a) + + # Symbolize and compute effort-to-compress + y = [xᵢ > 0.5 ? 1 : 0 for xᵢ in x] + etcs[i] = compression_complexity(y, EffortToCompress(normalize = false)) +end + +plot(xlabel = "a", + legend = :topleft, + right_margin = 10mm, + bottom_margin = 5mm) +plot!(coeffs, ls .- mean(ls), label = "Lyapunov exponent", c = :red) +plot!(twinx(), coeffs, etcs, label = "ETC", axis = :right, c = :black) +savefig("compression_complexity_etc.svg") # hide +``` + +![](compression_complexity_etc.svg) + +[^Nagaraj2013]: Nagaraj, N., Balasubramanian, K., & Dey, S. (2013). A new complexity measure for time series analysis and classification. The European Physical Journal Special Topics, 222(3), 847-860. diff --git a/docs/src/complexity/dynamical_complexity.md b/docs/src/complexity/dynamical_complexity.md new file mode 100644 index 000000000..2897ef93e --- /dev/null +++ b/docs/src/complexity/dynamical_complexity.md @@ -0,0 +1,5 @@ +# [Dynamical complexity](@id dynamical_complexity) + +```@docs +dynamical_complexity +``` diff --git a/docs/src/complexity/interventional_complexity_causality.md b/docs/src/complexity/interventional_complexity_causality.md new file mode 100644 index 000000000..b68a15b27 --- /dev/null +++ b/docs/src/complexity/interventional_complexity_causality.md @@ -0,0 +1,15 @@ +# Interventional complexity causality (ICC) + +## Interface + +```@docs +icc +``` + +## Estimators + +```@docs +CompressionComplexityCausalityEstimator +``` + +## Examples diff --git a/src/CausalityTools.jl b/src/CausalityTools.jl index 8082a97eb..a7a515ef4 100644 --- a/src/CausalityTools.jl +++ b/src/CausalityTools.jl @@ -28,6 +28,7 @@ module CausalityTools include("methods/closeness/closeness.jl") include("methods/correlation/correlation.jl") include("methods/recurrence/methods.jl") + include("methods/compression/compression.jl") include("utils/utils.jl") diff --git a/src/InterventionalComplexityCausality/InterventionalComplexityCausality.jl b/src/InterventionalComplexityCausality/InterventionalComplexityCausality.jl new file mode 100644 index 000000000..5bf05eea3 --- /dev/null +++ b/src/InterventionalComplexityCausality/InterventionalComplexityCausality.jl @@ -0,0 +1,7 @@ +using Reexport + +@reexport module InterventionalComplexityCausality + using ..ComplexityMeasures + include("interface.jl") + include("ccc/compression_complexity_causality.jl") +end \ No newline at end of file diff --git a/src/InterventionalComplexityCausality/ccc/ccc_estimators.jl b/src/InterventionalComplexityCausality/ccc/ccc_estimators.jl new file mode 100644 index 000000000..994646d56 --- /dev/null +++ b/src/InterventionalComplexityCausality/ccc/ccc_estimators.jl @@ -0,0 +1,42 @@ +export CCCEstimator, CompressionComplexityCausalityEstimator + +""" + CompressionComplexityCausalityEstimator( + algorithm::CompressionComplexityEstimator = EffortToCompress(), + w::In = 15, + L::Int = 30) + +`CompressionComplexityCausalityEstimator` (`CCCEstimator` for short) stores +parameters used to calculate compute interventional complexity causality (ICC):. + +## Parameters + +- `algorithm`: The algorithm used to compute the compression complexity, e.g. an instance of + [`EffortToCompress`](@ref). +- `w`: The block size for `Xⁱ`, which is a block of current values of time series `X`. The + default value is set to 15, as recommended in Kathpalia & Nagaraj (2019). +- `L`: The block size for `X⁻`, `Y⁻`, `Z⁻`, ... , which are blocks of past values of + `X`, `Y`, `Z`, and so on, whose values are taken from the immediate past of the `Xⁱ` + block. The default value is `L = 30`, but for real applications, this value + should determined as described in table S2 in the supplement of Kathpalia & Nagaraj + (2019). +- `step`: ICC is computed over a sliding window of width `w + L`. `step` gives the shift, + in number of indices, between windows (``\\delta`` in Kathpalia & Nagaraj, 2019). + +## Input data requirements + +All estimators assume that the input time series are *pre-symbolized/binned* integer-valued +time series. Thus, the parameter `B` (number of bins) from Kathpalia & Nararaj +(2019)[^Kathpalia2019] is not present here. + +See also: [`icc`](@ref), [`EffortToCompress`](@ref). + +[^Kathpalia2019]: Kathpalia, A., & Nagaraj, N. (2019). Data-based intervention approach for Complexity-Causality measure. PeerJ Computer Science, 5, e196. +""" +Base.@kwdef struct CompressionComplexityCausalityEstimator{ALG} <: InterventionalComplexityCausalityEstimator + algorithm::ALG = EffortToCompress(normalize = true) + w = 15 + L = 30 + step = 1 +end +const CCCEstimator = CompressionComplexityCausalityEstimator diff --git a/src/InterventionalComplexityCausality/ccc/compression_complexity_causality.jl b/src/InterventionalComplexityCausality/ccc/compression_complexity_causality.jl new file mode 100644 index 000000000..8274d048e --- /dev/null +++ b/src/InterventionalComplexityCausality/ccc/compression_complexity_causality.jl @@ -0,0 +1,28 @@ +using Statistics +include("ccc_estimators.jl") + +function icc_i(Xⁱ::AbstractVector{J}, X⁻::AbstractVector{J}, Y⁻::AbstractVector{J}, + estimator::CompressionComplexityCausalityEstimator) where {J <: Integer} + return dc(Xⁱ, X⁻, estimator.algorithm) - dc(Xⁱ, X⁻, Y⁻, estimator.algorithm) +end + +function icc_on_segments(x::AbstractVector{J}, y::AbstractVector{J}, + estimator::CompressionComplexityCausalityEstimator) where {J <: Integer} + w, L, step = estimator.w, estimator.L, estimator.step + windows = get_windows(x, w + L, step) + segment_results = zeros(eltype(zero(1.0)), length(windows)) + for (i, window) in enumerate(windows) + current_indices = window[estimator.L + 1]:window[estimator.L + estimator.w] + past_indices = window[1:estimator.L] + Xⁱ = @views x[current_indices] + X⁻ = @views x[past_indices] + Y⁻ = @views y[past_indices] + segment_results[i] = dynamical_complexity(Xⁱ, X⁻, estimator.algorithm) - + dynamical_complexity(Xⁱ, X⁻, Y⁻, estimator.algorithm) + end + return segment_results +end + +function icc(x, y, estimator) + return Statistics.mean(icc_on_segments(x, y, estimator)) +end diff --git a/src/InterventionalComplexityCausality/interface.jl b/src/InterventionalComplexityCausality/interface.jl new file mode 100644 index 000000000..01b5dba91 --- /dev/null +++ b/src/InterventionalComplexityCausality/interface.jl @@ -0,0 +1,71 @@ +export icc, + interventional_complexity_causality, + InterventionalComplexityCausalityEstimator + +""" + InterventionalComplexityCausalityEstimator + +Causality estimators that are based on interventional complexity causality (ICC), +as described in Kathpalia & Nagaraj (2019)[^Kathpalia2019], are subtypes of +`InterventionalComplexityCausalityEstimator`. + +[^Kathpalia2019]: Kathpalia, A., & Nagaraj, N. (2019). Data-based intervention approach for Complexity-Causality measure. PeerJ Computer Science, 5, e196. +""" +abstract type InterventionalComplexityCausalityEstimator end + +""" + icc(x::Vector{Int}, y::Vector{Int}, estimator) + +Interventional complexity causality (ICC) is defined as[^Kathpalia2019] *the change in the +dynamical complexity of time series X when `Xⁱ` is seen to be generated jointly by the +dynamical evolution of both `Y⁻` and `X⁻` as opposed to by the reality of the dynamical +evolution of `X⁻` alone"*, that is + +```math +ICC_{Y^{-} \\to X^{i}} = DC(X^{i} | X^{i}) - DC(X^{i} | X^{-}, Y^{-}), +``` + +where + +- DC is [`dynamical_complexity`](@ref). +- `Xⁱ` is a block of current values of `X` (`ΔX` in the original paper), +- `X⁻` is a block of past values of `X`, not overlapping with and immediately before `Xⁱ`, and +- `Y⁻` is a block of values from a time series `Y` taken at the same indices as `X⁻`. + +`icc` computes the *mean ICC* across a series of sliding windows over the input time series +`x` and `y`, where the sliding window configuration is dictated by the `estimator`. + +## Interpretation + +If ``ICC_{Y \\to X}`` is statistically zero, then it implies no causal influence from +``Y`` to ``X``. If ``ICC_{Y \\to X}`` is statistically different from zero, then it is +inferred that ``Y`` dynamically influences ``X``, and a higher magnitudes indicates higher +degrees of causation. + +``ICC_{Y \\to X}`` can be both positive and negative. For details on interpretating both +signs, see Kathpalia and Nagaraj (2019)[^Kathpalia2019]. + +## Examples + +```jldoctest; setup = :(using CausalityTools) +using Statistics, Random +rng = MersenneTwister(1234) +x = rand(rng, 0:1, 500) +y = rand(rng, 0:1, 500) +est = CompressionComplexityCausalityEstimator( + algorithm = EffortToCompress(normalize = true), + w = 15, + L = 30, + step = 10) +icc(x, y, est) + +# output +0.10375494071146242 +``` + +See also: [`CompressionComplexityCausalityEstimator`](@ref), [`EffortToCompress`](@ref) + +[^Kathpalia2019]: Kathpalia, A., & Nagaraj, N. (2019). Data-based intervention approach for Complexity-Causality measure. PeerJ Computer Science, 5, e196. +""" +function interventional_complexity_causality end +const icc = interventional_complexity_causality diff --git a/src/interface.jl b/src/interface.jl new file mode 100644 index 000000000..38ac653f5 --- /dev/null +++ b/src/interface.jl @@ -0,0 +1,4 @@ +""" The supertype of all time series causality estimators. """ +abstract type CausalityEstimator end + +include("utils/sliding_window.jl") \ No newline at end of file diff --git a/src/methods/compression/compression.jl b/src/methods/compression/compression.jl new file mode 100644 index 000000000..795c1844f --- /dev/null +++ b/src/methods/compression/compression.jl @@ -0,0 +1 @@ +include("compression_complexity/api.jl") \ No newline at end of file diff --git a/src/methods/compression/compression_complexity/api.jl b/src/methods/compression/compression_complexity/api.jl new file mode 100644 index 000000000..d5791116c --- /dev/null +++ b/src/methods/compression/compression_complexity/api.jl @@ -0,0 +1,132 @@ +abstract type CompressionComplexityEstimator <: ComplexityEstimator end +include("effort_to_compress/effort_to_compress.jl") + + +# export compression_complexity +# import ComplexityMeasures: ComplexityMeasure +# abstract type CompressionComplexityMeasure <: ComplexityMeasure end + +# """ +# # Regular compression complexity + +# compression_complexity(x, algorithm) → N + +# Compute the compression complexity of the pre-binned/pre-symbolized (integer-valued) +# univariate time series `x` using the given `algorithm`. + +# compression_complexity(x::StateSpaceSet, algorithm, alphabet_size::Int) → N + +# Multivariate integer time series `x`, given as `StateSpaceSet`s, are first transformed into a +# 1D symbol sequence before computing compression complexity. This transformation assuming +# that each variable `xᵢ ∈ x` was symbolized using the same `alphabet_size`. The resulting +# 1D symbol sequence is (in this implementation) not correct if different alphabet sizes are +# used, so ensure during pre-processing that the same alphabet is used (e.g. +# `alphabet_size = 2` for a binary time series, and `alphabet_size = 5` for a five-box binned +# time series). + + +# # Examples + +# Quantifying the compression complexity of a univariate symbol sequence using +# the [`EffortToCompress`](@ref) algorithm. + +# ```jldoctest; setup = :(using CausalityTools) +# compression_complexity([1, 2, 1, 2, 1, 1, 1, 2], EffortToCompress()) + +# # output +# 5.0 +# ``` + +# Multivariate time series given as [`StateSpaceSet`](@ref)s also work, but must be +# symbolized using the same alphabet. + +# ```jldoctest; setup = :(using CausalityTools) +# x = [1, 2, 1, 2, 1, 1, 1, 2] +# y = [2, 1, 2, 2, 2, 1, 1, 2] +# alphabet_size = 2 +# compression_complexity(StateSpaceSet(x, y), EffortToCompress(), alphabet_size) + +# # output +# 7.0 +# ``` + +# Sliding window estimators automatically handle window creation, +# and returns a vector of complexity measures computed on each window. + +# ```jldoctest; setup = :(using CausalityTools) +# using Random; rng = MersenneTwister(1234); +# x = rand(rng, 0:1, 100) +# alg = EffortToCompress(normalize = false) +# sw = ConstantWidthSlidingWindow(alg, width = 25, step = 15) +# compression_complexity(x, sw) + +# # output +# 5-element Vector{Float64}: +# 12.0 +# 14.0 +# 14.0 +# 14.0 +# 11.0 +# ``` + +# # Joint compression complexity + +# compression_complexity(x::AbstractVector, y::AbstractVector, algorithm) → N +# compression_complexity(x::StateSpaceSet, y::StateSpaceSet, algorithm, ax::Int, ay::Int) → N + +# If a two time series `y` is provided, compute the joint compression +# complexity using alphabet size `ax` for `x` and alphabet size `ay` for `y`. + +# `x` and `y` must be either both integer-valued vectors, or both `StateSpaceSet`s (potentially +# of different dimensions). If computing the joint compression complexity between a +# univariate time series and a multivariate time series, simply wrap the univariate time +# series in a `StateSpaceSet`. + +# # Examples + +# Joint compression complexity between two time series: + +# ```jldoctest; setup = :(using CausalityTools) +# using Random; rng = MersenneTwister(1234); +# x = rand(rng, 0:2, 100) +# y = rand(rng, 0:2, 100) +# alg = EffortToCompress(normalize = true) +# compression_complexity(x, y, alg) + +# # output +# 0.2222222222222222 +# ``` + +# For multivariate time series, we must specify the alphabet size +# for each variable to ensure that results are correct. + +# ```jldoctest; setup = :(using CausalityTools, DelayEmbeddings) +# # Multivariate time series X has variables encoded with a 2-letter alphabet, +# x1 = [1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0] +# x2 = [1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0] +# X = StateSpaceSet(x1, x2) + +# # Multivariate time series Y has variables encoded with a 3-letter alphabet +# y1 = [1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0] +# y2 = [2, 2, 0, 2, 2, 2, 2, 2, 1, 1, 2] +# y3 = [2, 2, 0, 2, 2, 2, 1, 2, 1, 1, 2] +# Y = StateSpaceSet(y1, y2, y3) + +# alg = EffortToCompress(normalize = true) +# compression_complexity(X, Y, alg, 2, 3) + +# # output +# 0.9 +# ``` + +# # Returns + +# See individual algorithms for details on their return values. A `Vector` of compression +# complexities is returned if a sliding window algorithm is used - one value per window. +# ``` + +# See also: [`EffortToCompress`](@ref), [`EffortToCompressSlidingWindow`](@ref). + +# [^Kathpalia2019]: Kathpalia, A., & Nagaraj, N. (2019). Data-based intervention approach for Complexity-Causality measure. PeerJ Computer Science, 5, e196. +# """ +# function compression_complexity end diff --git a/src/methods/compression/compression_complexity/effort_to_compress/effort_to_compress.jl b/src/methods/compression/compression_complexity/effort_to_compress/effort_to_compress.jl new file mode 100644 index 000000000..7a8eb46bb --- /dev/null +++ b/src/methods/compression/compression_complexity/effort_to_compress/effort_to_compress.jl @@ -0,0 +1,50 @@ +export EffortToCompress + +""" + EffortToCompress(; normalize = false) + +The effort-to-compress (ETC; Nagaraj et al., 2013)[^Nagaraj2013] algorithm quantifies +the compression complexity of a time series using non-sequential recursive pair +substitution (NSRPS). + +Used with [`complexity`](@ref) to compute ETC and with [`complexity_normalized`](@ref) +to compute normalized ETC. +Input time series must be integer-valued. Real-valued or categorical time series +must be symbolized (i.e. represented by integers) before computing the ETC. The number +of symbols should be relatively low, i.e. `n_symbols << length(x)`, where `x` is the +input time series. If applied to two time series (e.g. +`complexity(EffortToCompress(), x1, x2)`), then (bivariate) joint ETC (Kathpalia +and Nagaraj, 2019)[^Kathpalia2019] is computed. + +## Normalization + +If `normalize == false`, then the quantity computed is the the number of compression steps +it takes for the symbol sequence to reach zero entropy (either a constant sequence, +or a length-1 sequence). + +For a length-`N` sequence, the maximum number of possible compression steps is `N - 1`. +If `normalize == true`, then the ETC value is divided by `N - 1`, yielding a number on +`[0, 1]`, where `0` indicates minimal compression complexity and `1` indicates maximal +compression complexity. + +## Description + +The ETC algorithm parses a symbol sequence from left to right and finds the symbol with the +highest frequency of occurrence. *Note: In the case of multiple symbols attaining the +maximum frequency, our implementation does not guarantee that the replaced symbol is +the symbol which first occurs in the sequence. This behaviour differs from the examples in +Nagaraj (2013) and Kathpalia (2019), where they seem to always replace the first-occurring +symbol. This does not affect the number of compression steps.* + +[^Nagaraj2013]: Nagaraj, N., Balasubramanian, K., & Dey, S. (2013). A new complexity + measure for time series analysis and classification. The European Physical Journal + Special Topics, 222(3), 847-860. +[^Kathpalia2019]: Kathpalia, A., & Nagaraj, N. (2019). Data-based intervention approach + for Complexity-Causality measure. PeerJ Computer Science, 5, e196. +""" +struct EffortToCompress <: CompressionComplexityEstimator end + +include("etc_utils.jl") +include("etc_univariate.jl") +include("etc_multivariate.jl") +include("etc_joint.jl") diff --git a/src/methods/compression/compression_complexity/effort_to_compress/etc_joint.jl b/src/methods/compression/compression_complexity/effort_to_compress/etc_joint.jl new file mode 100644 index 000000000..723da8053 --- /dev/null +++ b/src/methods/compression/compression_complexity/effort_to_compress/etc_joint.jl @@ -0,0 +1,180 @@ +# This file implements 2-variable joint effort-to-compress (ETC). +# TODO: it is also possible to compute joint ETC for more than two sequences, but it +# is probably best to use generated functions for this. I'm leaving it for future work. +import ComplexityMeasures: complexity, complexity_normalized +using ComplexityMeasures +using StaticArrays +using StateSpaceSets: StateSpaceSet + +function replacement_value(x::Vector{SVector{2, Tuple{J, J}}}, pair_to_replace) where {J <: Integer} + max_x₁ = zero(J) + max_x₂ = zero(J) + + for xᵢ in x + if maximum(xᵢ[1]) > max_x₁ + max_x₁ = maximum(xᵢ[1]) + end + if maximum(xᵢ[2]) > max_x₂ + max_x₂ = maximum(xᵢ[2]) + end + end + + return (max_x₁, max_x₂) +end + +function pair_to_be_replaced(x::Vector{SVector{2, Tuple{J, J}}}) where {J <: Integer} + # The pair to be replaced is the one that occurs most often, so compute occurrences + # of each unique pair. + hist = histogram(x) + histkeys = keys(hist) |> collect + frequencies = values(hist) |> collect + + # In the case of frequency ties, we pick the first (the algorithm doesn't care). + maxfreq_idx = findfirst(x -> x == maximum(frequencies), frequencies) + + return histkeys[maxfreq_idx] +end + +function non_sequential_recursive_pair_substitution( + pairs::Vector{SVector{2, Tuple{J, J}}}, + pair_to_replace, + symbol_to_replace_with) where {J <: Integer} + #@show pair_to_replace + #@show symbol_to_replace_with + T = eltype(first(first(pairs))) + + compressed_x = Vector{T}(undef, 0) + compressed_y = Vector{T}(undef, 0) + sizehint!(compressed_x, length(pairs)) + sizehint!(compressed_y, length(pairs)) + + j = 1 + #println("Total pairs: $(length(pairs))\n$pairs\n---") + n_replaced = 0 + while j <= length(pairs) + if pairs[j] == pair_to_replace + # If replacing the j-th symbol pair, then the j-th and (j+1)-th original symbols + # are merged. Thus, the index must be incremented by 2 (hence "non-sequential"). + push!(compressed_x, symbol_to_replace_with[1]) + push!(compressed_y, symbol_to_replace_with[2]) + + n_replaced += 1 + + #print("n_replaced=$n_replaced | Replaced at j=$j ") + if j == length(pairs) - 1 + j += 1 + @views push!(compressed_x, pairs[j][2][1]) + @views push!(compressed_y, pairs[j][2][2]) + + j = length(pairs) + 1 + else + j += 2 + end + #println("and jumped to j=$j") + else + if j == length(pairs) + # If at the last symbol pair, keep both symbols. + @views push!(compressed_x, pairs[j][1][1]) + @views push!(compressed_x, pairs[j][2][1]) + @views push!(compressed_y, pairs[j][1][2]) + @views push!(compressed_y, pairs[j][2][2]) + #print("n_replaced=$n_replaced | Kept BOTH VALUES OF ORIGINAL PAIR at j=$j ") + j += 1 + #println(" and jumped to j=$j") + elseif j < length(pairs) + # If at any symbol pair but the last, keep the first symbol. + @views push!(compressed_x, pairs[j][1][1]) + @views push!(compressed_y, pairs[j][1][2]) + #print("n_replaced=$n_replaced | Kept original at j=$j ") + j += 1 + #println("and jumped to j=$j") + end + end + + end + sizehint!(compressed_x, length(compressed_x)) + sizehint!(compressed_y, length(compressed_y)) + + return compressed_x, compressed_y +end + +function compress(x::AbstractVector{J}, y::AbstractVector{J}) where {J <: Int} + # This is a sequence of SVector{2, Tuple{Int, Int}} + seq = sequential_symbol_pairs(x, y) + + # This is a two-letter symbol pair (two integers) + pair_to_replace = pair_to_be_replaced(seq) + + # This is a single integer + symbol_to_replace_with = replacement_value(seq, pair_to_replace) + + # Compress by using non-sequential recursive pair substitution + non_sequential_recursive_pair_substitution(seq, pair_to_replace, symbol_to_replace_with) +end + +function complexity(algorithm::EffortToCompress, + x::AbstractVector{J}, + y::AbstractVector{J}, + ) where J <: Integer + length(x) == length(y) || throw(ArgumentError("lengths of `x` and `y` must be equal")) + + + # Edge case: one-element vectors return zero regardless of normalization (avoids + # division by zero). + if length(x) == length(y) == 1 + return 0.0 + end + + N = 0 + while !haszeroentropy(StateSpaceSet(x, y)) + x, y = compress(x, y) + N += 1 + end + + return N +end + +function complexity(algorithm::EffortToCompress, x::AbstractStateSpaceSet{D1, T}, y::AbstractStateSpaceSet{D2, T}, + ax::Int, ay::Int) where {D1, D2, T} + ax >= 2 && ay >= 2 || throw(ArgumentError("Alphabet sizes must be at least 2.")) + encoded_x = symbol_sequence(x, ax) + encoded_y = symbol_sequence(y, ay) + + return compression_complexity(algorithm, encoded_x, encoded_y) +end + +function complexity_normalized(algorithm::EffortToCompress, + x::AbstractVector{J}, + y::AbstractVector{J}, + ) where J <: Integer + + # Store original length before substituting, so that we don't normalize to + # the length of the compressed time series. + L = length(x) - 1 + + # The number of compression steps until a zero-entropy sequence is obtained. + N = complexity(algorithm, x, y) + + return N / L +end + +# function compression_complexity( +# sw::ConstantWidthSlidingWindow{<:CompressionComplexityEstimator}, +# x::AbstractVector{J}, +# y::AbstractVector{J}, +# ) where {J <: Integer} +# return @views [compression_complexity(sw.estimator, +# x[window], +# y[window], +# ) for window in get_windows(x, sw)] +# end +# function compression_complexity(sw::ConstantWidthSlidingWindow{<:CompressionComplexityEstimator}, x::AbstractStateSpaceSet{D1, T}, +# y::AbstractStateSpaceSet{D2, T}, ax::Int, ay::Int) where {D1, D2, T} +# ax >= 2 && ay >= 2 || throw(ArgumentError("Alphabet sizes must be at least 2.")) +# encoded_x = symbol_sequence(x, ax) +# encoded_y = symbol_sequence(y, ay) +# return @views [compression_complexity( +# encoded_x[window], +# encoded_y[window], +# sw.estimator) for window in get_windows(x, sw)] +# end diff --git a/src/methods/compression/compression_complexity/effort_to_compress/etc_multivariate.jl b/src/methods/compression/compression_complexity/effort_to_compress/etc_multivariate.jl new file mode 100644 index 000000000..7c09ad226 --- /dev/null +++ b/src/methods/compression/compression_complexity/effort_to_compress/etc_multivariate.jl @@ -0,0 +1,23 @@ +using StateSpaceSets +import ComplexityMeasures: complexity + +function complexity(algorithm::EffortToCompress, x::AbstractStateSpaceSet{D, T}, + alphabet_size::Int) where {D, T} + alphabet_size >= 2 || throw(ArgumentError("Alphabet size must be at least 2.")) + + # Assuming that the alphabet size was the same when symbolizing all variables `xᵢ ∈ x`, + # encode each unique state vector as a unique integer, so that the multivariate time + # series becomes a univariate symbol time series. + encoded_x = symbol_sequence(x, alphabet_size) + return compression_complexity(encoded_x, algorithm) +end + +# function compression_complexity(sw::ConstantWidthSlidingWindow{<:CompressionComplexityEstimator}, +# x::AbstractStateSpaceSet{D, T}, +# alphabet_size::Int) where {D, T} +# alphabet_size >= 2 || throw(ArgumentError("Alphabet size must be at least 2.")) + +# encoded_x = symbol_sequence(x, alphabet_size) +# windows = get_windows(x, sw) +# return @views [compression_complexity(encoded_x[w], sw.estimator) for w in windows] +# end diff --git a/src/methods/compression/compression_complexity/effort_to_compress/etc_univariate.jl b/src/methods/compression/compression_complexity/effort_to_compress/etc_univariate.jl new file mode 100644 index 000000000..ebabfe2eb --- /dev/null +++ b/src/methods/compression/compression_complexity/effort_to_compress/etc_univariate.jl @@ -0,0 +1,125 @@ +using StaticArrays +import ComplexityMeasures: complexity + +function replacement_value(x::Vector{SVector{2, J}}) where {J <: Int} + max_x = 0 + for xᵢ in x + if maximum(xᵢ[1]) > max_x + max_x = maximum(xᵢ[1]) + end + if maximum(xᵢ[2]) > max_x + max_x = maximum(xᵢ[2]) + end + end + + # Which value should it be replaced with? + replacement_value = max_x + 1 +end + +# TODO: this can be made much faster by not using dictionaries and using pre-allocation +# and clever indexing. +""" + pair_to_be_replaced(x::Vector{SVector{2, J}}) where {J <: Int} → SVector{2, J} + +Compute the most frequently occurring symbol pair `xᵢ`, where each `xᵢ ∈ x` is a symbol +pair. In the case of ties, we return the first pair, according to the underlying (lack of) +sorting. +""" +function pair_to_be_replaced(x::Vector{SVector{2, J}}) where {J <: Int} + # The pair to be replaced is the one that occurs most often, so compute occurrences + # of each unique pair. + hist = histogram(x) + histkeys = keys(hist) |> collect + frequencies = values(hist) |> collect + + # In the case of frequency ties, we pick the first (the algorithm doesn't care). + maxfreq_idx = findfirst(x -> x == maximum(frequencies), frequencies) + pair_to_be_replaced = histkeys[maxfreq_idx] + + return pair_to_be_replaced +end + + +function non_sequential_recursive_pair_substitution(pairs, pair_to_replace, + symbol_to_replace_with) + + T = eltype(first(pairs)) + compressed_x = Vector{T}(undef, 0) + sizehint!(compressed_x, length(pairs)) + + j = 1 + #println("Total pairs: $(length(pairs))\n$pairs\n---") + n_replaced = 0 + while j <= length(pairs) + if pairs[j] == pair_to_replace + # If replacing the j-th symbol pair, then the j-th and (j+1)-th original symbols + # are merged. Thus, the index must be incremented by 2 (hence "non-sequential"). + push!(compressed_x, symbol_to_replace_with) + n_replaced += 1 + + #print("n_replaced=$n_replaced | Replaced at j=$j ") + if j == length(pairs) - 1 + j += 1 + @views push!(compressed_x, pairs[j][2]) + j = length(pairs) + 1 + else + j += 2 + end + #println("and jumped to j=$j") + else + if j == length(pairs) + # If at the last symbol pair, keep both symbols. + @views push!(compressed_x, pairs[j][1]) + @views push!(compressed_x, pairs[j][2]) + #print("n_replaced=$n_replaced | Kept BOTH VALUES OF ORIGINAL PAIR at j=$j ") + j += 1 + #println(" and jumped to j=$j") + elseif j < length(pairs) + # If at any symbol pair but the last, keep the first symbol. + @views push!(compressed_x, pairs[j][1]) + #print("n_replaced=$n_replaced | Kept original at j=$j ") + j += 1 + #println("and jumped to j=$j") + end + end + + end + sizehint!(compressed_x, length(compressed_x)) + return compressed_x +end + +function compress(x::Vector{SVector{2, J}}) where {J <: Int} + # This is a two-letter symbol pair (two integers) + pair_to_replace = pair_to_be_replaced(x) + + # This is a single integer + symbol_to_replace_with = replacement_value(x) + + # Compress by using non-sequential recursive pair substitution + non_sequential_recursive_pair_substitution(x, pair_to_replace, symbol_to_replace_with) +end + +function complexity(algorithm::EffortToCompress, x::AbstractVector{Int}) + seq = copy(x) + N = 0.0 + while !haszeroentropy(seq) + @show sequential_symbol_pairs(seq) + seq = compress(sequential_symbol_pairs(seq)) + # The last compressed sequence has length 2, so we increment *after* compression to + # ensure we count the last compression step. + N += 1 + end + return N +end + +function complexity_normalized(algorithm::EffortToCompress, x::AbstractVector{Int}) + L = length(x) - 1 + N = complexity(algorithm, x) + return N / L +end + +# function compression_complexity(sw::ConstantWidthSlidingWindow{<:CompressionComplexityEstimator}, +# x::AbstractVector{J}, ) where J <: Int +# return @views [compression_complexity(sw.estimator, +# x[window]) for window in get_windows(x, sw)] +# end diff --git a/src/methods/compression/compression_complexity/effort_to_compress/etc_utils.jl b/src/methods/compression/compression_complexity/effort_to_compress/etc_utils.jl new file mode 100644 index 000000000..b40daf98f --- /dev/null +++ b/src/methods/compression/compression_complexity/effort_to_compress/etc_utils.jl @@ -0,0 +1,71 @@ +using StatsBase: countmap +using StateSpaceSets: AbstractStateSpaceSet +using StaticArrays: SVector, SA +export get_windows + + +allidentical(x) = all(xᵢ -> xᵢ == first(x), x) + +haszeroentropy(x) = allidentical(x) || length(x) == 1 + +histogram(x) = countmap(x) + +function vecints_to_int(x::AbstractVector{J}, base) where J <: Integer + s = zero(J) + for xᵢ in x + s = s * base + xᵢ + end + return s +end + +""" + sequential_symbol_pairs(x::AbstractVector{J}) where J <: Integer → Vector{SVector{2, J}} + +Create a sequence of two-letter symbol pairs from an integer valued time series. +""" +function sequential_symbol_pairs(x::AbstractVector{J}) where {J} + pairs = zeros(SVector{2, J}, length(x) - 1) + + for i in 1:length(x) - 1 + pairs[i] = SA[x[i], x[i+1]] + end + return pairs +end + +""" + sequential_symbol_pairs(x::AbstractVector{J}, y::AbstractVector{J}) where {J <: Integer} → Vector{SVector{2, Tuple{J, J}}} + +Generate symbol pairs from the integer-valued, same-length vectors `x` and `y`. +The `i`-th element of the returned vector is the `SVector` +`SA[(x[i], x[i+1]), (y[i], y[i+1])]`. +""" +function sequential_symbol_pairs(x::AbstractVector{J}, y::AbstractVector{J}) where {J <: Integer} + length(x) == length(y) || throw(ArgumentError("lengths of `x` and `y` must be equal")) + length(x) >= 2 || throw(ArgumentError("Lengths of x and y must be >= 2")) + + joint_pairs = Vector{SVector{2, Tuple{Int64, Int64}}}(undef, 0) + for i = 1:length(x) - 1 + push!(joint_pairs, SA[(x[i], x[i+1]), (y[i], y[i+1])]) + end + return joint_pairs +end + +""" + symbol_sequence(x::AbstractStateSpaceSet{D, T}, alphabet_size) where {D <: Integer, T <: Integer} ⇥ Vector{Int} + +Create an encoded one-dimensional integer symbol sequence from a integer-valued , assuming that the alphabet used for the original symbolization +has `alphabet_size` possible symbols. + +In the context of the effort-to-compress algorithm, `x` would typically +be a sub-StateSpaceSet as obtained by `view(d::StateSpaceSet, indices)`. +""" +function symbol_sequence(x::AbstractStateSpaceSet{D, T}, alphabet_size::J) where {D, T, J <: Integer} + return [vecints_to_int(xᵢ, alphabet_size) for xᵢ in x] +end + +# function get_windows(x, window_length::Int, step::Int = 1) +# [i:i+window_length for i in 1:step:(length(x) - window_length)] +# end + +# get_windows(x, sw::ConstantWidthSlidingWindow{<:CompressionComplexityEstimator}) = +# get_windows(x, sw.width, sw.step) diff --git a/src/methods/compression/dynamical_complexity/dynamical_complexity.jl b/src/methods/compression/dynamical_complexity/dynamical_complexity.jl new file mode 100644 index 000000000..daeccb4b6 --- /dev/null +++ b/src/methods/compression/dynamical_complexity/dynamical_complexity.jl @@ -0,0 +1,160 @@ +using Statistics +export dynamical_complexity + +""" +# Instantaneous dynamical complexity + + dynamical_complexity(Xⁱ, X⁻, algorithm::CompressionComplexityEstimator) + +Compute the dynamical complexity (DC) of `Xⁱ` given `X⁻` (eq. 1 in Kathpalia & Nagaraj, +2019): + +```math +DC(Xⁱ|X⁻) = C(X⁻ \\oplus Xⁱ) - C(X⁻), +``` + +where ``\\oplus`` signifies vector/array concatenation. + + dynamical_complexity(Xⁱ, X⁻, Y⁻, algorithm::CompressionComplexityCausalityEstimator) + +Compute the dynamical complexity of `Xⁱ` given `X⁻` and `Y⁻` (eq. 4 in +Kathpalia & Nagaraj, 2019). + +```math +DC(Xⁱ|X⁻, Y⁻) = C(X⁻ \\oplus Xⁱ, Y⁻ \\oplus Xⁱ) - C(X⁻, Y⁻), +``` + +where ``C(\\cdot, \\cdot)`` is the joint compression complexity. + +## Input requirements + +Assumes `Xⁱ`, `X⁻` and `Y⁻` are integer-valued sub-segments of pre-symbolized time series +`X` and `Y`, where `X⁻` and `Y⁻` are sampled at the (same) time indices immediately +preceding `Xⁱ`. + +## Examples + +Estimating dynamical complexity on a single segment using the [`EffortToCompress`](@ref) +algorithm. + +```jldoctest; setup = :(using CausalityTools) +using Random; rng = MersenneTwister(1234) +# Some random binary time series segments +x, y = rand(rng, 0:1, 100), rand(rng, 0:1, 100) + +# Split segments into past and present +X⁻, Xⁱ = x[1:49], x[50:end] +alg = EffortToCompress(normalize = true) +dynamical_complexity(Xⁱ, X⁻, alg) + +# output +-0.09406565656565657 +``` + +# Average dynamical complexity + + dynamical_complexity(f::Function, x, algorithm::CompressionComplexityEstimator, + w::Int, L::Int, step::Int) + +Using a constant-width sliding window, compute the instantaneous dynamical complexities of +the integer-valued, pre-symbolized time series `x`, then summarizethe +result using the function `f`. For example, if `f = Statistics.mean`, +the index ``i`` refers to the ``i``-th window, and there are ``N`` windows, we get the +average dynamical complexity + +```math +DC(X) = \\dfrac{1}{N}\\sum_{i=1}^{N}DC(Xⁱ|X⁻). +``` + +The (constant) width of the sliding window is `w + L`, where `L` is the width of the +*present* segment, and `w` is the width of the segment immediately preceding it. + + dynamical_complexity(f::Function, x, y, algorithm::CompressionComplexityEstimator, + w::Int, L::Int, step::Int) + +The same as above, but conditioned on a second time series `y`, e.g. + +```math +DC(X|Y) = \\dfrac{1}{N}\\sum_{i = 1}^{N}DC(Xⁱ|X⁻, Y⁻), +``` + +## Examples + +Mean dynamical complexity of `x` conditioned on `y`: + +```jldoctest; setup = :(using CausalityTools) +using Random; rng = MersenneTwister(1234) +x = rand(rng, 0:1, 200); +y = rand(rng, 0:1, 200); + +# Split segments into past and present +alg = EffortToCompress(normalize = true) +w, L, step = 10, 15, 5 +dynamical_complexity(Statistics.mean, x, y, alg, w, L, step) + +# output +-0.2753968253968254 +``` +""" +function dynamical_complexity end + +function dynamical_complexity(Xⁱ::AbstractVector{J}, X⁻::AbstractVector{J}, + algorithm::CompressionComplexityEstimator) where {J <: Integer} + return compression_complexity([X⁻; Xⁱ], algorithm) - + compression_complexity(X⁻, algorithm) +end + +function dynamical_complexity(Xⁱ::AbstractVector{J}, X⁻::AbstractVector{J}, Y⁻::AbstractVector{J}, + algorithm::CompressionComplexityEstimator) where {J <: Integer} + return compression_complexity([X⁻; Xⁱ], [Y⁻; Xⁱ], algorithm) - + compression_complexity(X⁻, Y⁻, algorithm) +end + +function dynamical_complexity(x, algorithm::CompressionComplexityEstimator, + w::Integer, + L::Integer, + step::Integer) + windows = get_windows(x, w + L, step) + segment_results = zeros(Float64, length(windows)) + for (i, window) in enumerate(windows) + current_indices = window[L + 1]:window[w + L] + past_indices = window[1:L] + Xⁱ = @views x[current_indices] + X⁻ = @views x[past_indices] + segment_results[i] = dynamical_complexity(Xⁱ, X⁻, algorithm) + end + return segment_results +end + + +function dynamical_complexity(x, y, + algorithm::CompressionComplexityEstimator, + w::Integer, + L::Integer, + step::Integer) + windows = get_windows(x, w + L, step) + segment_results = zeros(Float64, length(windows)) + for (i, window) in enumerate(windows) + current_indices = window[L + 1]:window[w + L] + past_indices = window[1:L] + Xⁱ = @views x[current_indices] + X⁻ = @views x[past_indices] + Y⁻ = @views y[past_indices] + segment_results[i] = dynamical_complexity(Xⁱ, X⁻, Y⁻, algorithm) + end + return segment_results +end + +dynamical_complexity(f::Function, x, + algorithm::CompressionComplexityEstimator, + w::Integer, + L::Integer, + step::Integer) = + f(dynamical_complexity(x, algorithm, w, L, step)) + +dynamical_complexity(f::Function, x, y, + algorithm::CompressionComplexityEstimator, + w::Integer, + L::Integer, + step::Integer) = + f(dynamical_complexity(x, y, algorithm, w, L, step)) diff --git a/src/utils/sliding_window.jl b/src/utils/sliding_window.jl new file mode 100644 index 000000000..5451e208c --- /dev/null +++ b/src/utils/sliding_window.jl @@ -0,0 +1,20 @@ + +abstract type SlidingWindow <: CausalityEstimator end +export ConstantWidthSlidingWindow +""" + ConstantWidthSlidingWindow(estimator; n = 20, step = 1) <: SlidingWindow + +Apply `estimator` to chunks of the time series, using windows of size `n` and `step` points +between each window. +""" +struct ConstantWidthSlidingWindow{E, J} <: SlidingWindow + estimator::E + width::J + step::J + + function ConstantWidthSlidingWindow(estimator::E; width::J = 20, step::J = 1) where {E, J} + new{E, J}(estimator, width, step) + end +end + +ConstantWidthSlidingWindow(1, width = 20, step = 1) \ No newline at end of file diff --git a/test/complexity_measures/test_dynamical_complexity.jl b/test/complexity_measures/test_dynamical_complexity.jl new file mode 100644 index 000000000..b71eebbce --- /dev/null +++ b/test/complexity_measures/test_dynamical_complexity.jl @@ -0,0 +1,15 @@ +using Statistics +@testset "Dynamical complexity" begin + # Some random binary time series segments + x, y = rand(0:1, 100), rand(0:1, 100) + + # Split segments into past and present + X⁻, Xⁱ = x[1:49], x[50:end] + alg = EffortToCompress(normalize = true) + @test dynamical_complexity(Xⁱ, X⁻, alg) isa typeof(1.0) + + # Split segments into past and present + alg = EffortToCompress(normalize = true) + w, L, step = 10, 15, 5 + @test dynamical_complexity(Statistics.mean, x, y, alg, w, L, step) isa typeof(1.0) +end diff --git a/test/complexity_measures/test_effort_to_compress.jl b/test/complexity_measures/test_effort_to_compress.jl new file mode 100644 index 000000000..854fbe1f0 --- /dev/null +++ b/test/complexity_measures/test_effort_to_compress.jl @@ -0,0 +1,172 @@ +using Test + +@testset "Effort to compress (ETC)" begin + @testset "ETC univariate" begin + alg = EffortToCompress(normalize = false) + + @testset "Paper examples" begin + x1 = [1, 2, 1, 2, 1, 1, 1, 2] + x2 = [0, 1, 0, 1, 0, 1, 0, 1] + x3 = [0, 1, 0, 0, 1, 1, 1, 0] + @test compression_complexity(x1, alg) == 5.0 + @test compression_complexity(x2, alg) == 1.0 + @test compression_complexity(x3, alg) == 6.0 + end + + @testset "MATLAB toolbox comparison" begin + x4 = [0, 3, 2, 0, 1, 1, 1, 2, 2, 2, 1] + x5 = [1, 0, 1, 1, 2, 1, 1, 1, 0, 2, 0] + x6 = [1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1] + @test compression_complexity(x4, alg) == 10.0 + @test compression_complexity(x5, alg) == 8.0 + @test compression_complexity(x6, alg) == 6.0 + end + + @test compression_complexity([1], alg) == 0.0 + x3 = [0, 1, 0, 0, 1, 1, 1, 0] + alg_norm = EffortToCompress(normalize = true) + @test compression_complexity(x3, alg_norm) == 6.0 / (length(x3) - 1) + + # Sliding windows + ts = rand(0:3, 50) + sw = ConstantWidthSlidingWindow( + EffortToCompress(normalize = true), + width = 8, + step = 5) + windows = get_windows(ts, sw) + res = compression_complexity(ts, sw) + @test length(res) == length(windows) + @test all(0.0 .<= res .<= 1.0) + end + + @testset "ETC multivariate" begin + x1 = [0, 0, 1, 0] + x2 = [1, 0, 1, 0] + x3 = [1, 1, 1, 1] + x4 = [0, 1, 1, 1] + + @test symbol_sequence(Dataset(x1, x2, x3, x4), 2) == [6, 3, 15, 3] + @test symbol_sequence(Dataset(x1, x3, x4), 2) == [2, 3, 7, 3] + @test compression_complexity(Dataset(x1, x2, x3, x4), EffortToCompress(), 2) == 3.0 + + # Sliding window + alphabet_size = 2 + x1 = rand(0:alphabet_size - 1, 100) + x2 = rand(0:alphabet_size - 1, 100) + x3 = rand(0:alphabet_size - 1, 100) + x4 = rand(0:alphabet_size - 1, 100) + data = Dataset(x1, x2, x3, x4) + window_size = 10 + step = 2 + sw = ConstantWidthSlidingWindow( + EffortToCompress(normalize = true), + width = window_size, + step = step) + + windows = get_windows(data, sw) + res = compression_complexity(data, sw, alphabet_size) + @test length(res) == length(windows) + @test all(res .>= 0) + + ############################################### + # Multivariate with different dimensionalities + ############################################### + # Variables in the first dataset have an alphabet size of 2 + x1, x2 = rand(0:1, 1000), rand(0:1, 1000) + + # Variables in the second dataset have an alphabet size of 4 + y1, y2, y3 = rand(0:3, 1000), rand(0:3, 1000), rand(0:3, 1000) + + d1 = Dataset(x1, x2) + d2 = Dataset(y1, y2, y3) + alg = EffortToCompress(normalize = true) + @test 0.0 ≤ compression_complexity(d1, d2, alg, 2, 4) ≤ 1.0 + + window_size, step = 50, 10 + sw = ConstantWidthSlidingWindow( + EffortToCompress(normalize = true), + width = window_size, + step = step) + windows = get_windows(d2, sw) + + res = compression_complexity(d1, d2, sw, 2, 4) + @test length(res) == length(windows) + @test all(0.0 .<= res .<= 1.0) + end + + @testset "ETC joint" begin + # Test case in the "Joint ETC measure for a pair of time series: ETC(X,Y)" + # section in Kathpalia & Nagaraj (2013): + alg = EffortToCompress(normalize = false) + x = [1, 2, 1, 2, 1, 2] + y = [1, 2, 1, 3, 1, 3] + @test compression_complexity(x, y, alg) == 4.0 + + # Constant sequences should not trigger compression + x = [0, 0, 0, 0, 0] + y = [1, 1, 1, 1, 1] + compression_complexity(x, y, EffortToCompress()) == 0.0 + + + # Single-element sequences already have zero-entropy. + x = [0] + y = [1] + compression_complexity(x, y, EffortToCompress()) == 0.0 + + x = [1, 1, 0, 1, 1] + y = [0, 0, 0, 0, 0] + # Should reduce in three steps as + # 11011 202 32 4 + # aaaaa bab cb d + compression_complexity(x, y, EffortToCompress()) == 3.0 + + # A good test of the correctness of the implementation is that the joint ETC(X, Y) + # obeys the relationship stated in Kathpalia & Nagaraj (2013): + # ETC(X, Y) <= ETC(X) + ETC(Y) + # Here, we test if that is the case for our implementation by running 100000 test + # cases on random pairs of length-50 sequences, whose number of symbols + # for each sequence is allowed to vary between 2 and 6 between iterations. + inequality_tests = Vector{Bool}(undef, 0) + alg = EffortToCompress(normalize = false) + + for i = 1:100000 + x = rand(0:rand(1:5), 50) + y = rand(0:rand(1:5), 50) + ETCxy = round(Int, compression_complexity(x, y, alg)) + ETCx = round(Int, compression_complexity(x, alg)) + ETCy = round(Int, compression_complexity(y, alg)) + res = ETCxy <= ETCx + ETCy + push!(inequality_tests, res) + end + N_not_true = count(inequality_tests .!= true) + N_true = count(inequality_tests .== true) + @test all(inequality_tests .== true) + + @testset "ETC joint sliding window" begin + x1 = rand(0:1, 500) + x2 = rand(0:1, 500) + + window_size = 10 + step = 2 + sw = ConstantWidthSlidingWindow( + EffortToCompress(normalize = true), + width = window_size, + step = step) + + windows = get_windows(x1, sw) + res = compression_complexity(x1, x2, sw) + @test length(res) == length(windows) + @test all(res .>= 0) + end + + @testset "ETC joint normalization" begin + results = zeros(Float64, 10000) + for i = 1:10000 + x, y = rand(0:3, 100), rand(0:5, 100) + alg = EffortToCompress(normalize = true) + results[i] = compression_complexity(x, y, alg) + end + @test all(0.0 .<= results .<= 1.0) + end + end +end diff --git a/test/complexity_measures/test_icc.jl b/test/complexity_measures/test_icc.jl new file mode 100644 index 000000000..1ff89d58a --- /dev/null +++ b/test/complexity_measures/test_icc.jl @@ -0,0 +1,10 @@ +@testset "ICC" begin + x = rand(0:1, 500) + y = rand(0:1, 500) + est = CompressionComplexityCausalityEstimator( + algorithm = EffortToCompress(normalize = true), + w = 15, + L = 30, + step = 10) + @test icc(x, y, est) isa typeof(1.0) +end diff --git a/test/runtests.jl b/test/runtests.jl index 97fd69445..563d38228 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,4 +11,8 @@ testfile(file, testname=defaultname(file)) = @testset "$testname" begin; include testfile("systems/systems.jl") end +include("complexity_measures/test_effort_to_compress.jl") +include("complexity_measures/test_dynamical_complexity.jl") +include("complexity_measures/test_icc.jl") + #include("integrations/test_uncertaindata_integration.jl")