Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "StateSpaceLearning"
uuid = "971c4b7c-2c4e-4bac-8525-e842df3cde7b"
authors = ["andreramosfc <[email protected]>"]
version = "1.4.3"
version = "2.0.0"

[deps]
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Expand Down
99 changes: 57 additions & 42 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,35 +20,41 @@ model = StructuralModel(y)
fit!(model)

# Point Forecast
prediction = StateSpaceLearning.forecast(model, 12) #Gets a 12 steps ahead prediction
prediction = forecast(model, 12) # Gets a 12 steps ahead prediction

# Scenarios Path Simulation
simulation = StateSpaceLearning.simulate(model, 12, 1000) #Gets 1000 scenarios path of 12 steps ahead predictions
simulation = simulate(model, 12, 1000) # Gets 1000 scenarios path of 12 steps ahead predictions
```

## StructuralModel Arguments

* `y::Vector`: Vector of data.
* `level::Bool`: Boolean where to consider intercept in the model (default: true)
* `stochastic_level::Bool`: Boolean where to consider stochastic level component in the model (default: true)
* `trend::Bool`: Boolean where to consider trend component in the model (default: true)
* `stochastic_trend::Bool`: Boolean where to consider stochastic trend component in the model (default: true)
* `seasonal::Bool`: Boolean where to consider seasonal component in the model (default: true)
* `stochastic_seasonal::Bool`: Boolean where to consider stochastic seasonal component in the model (default: true)
* `freq_seasonal::Int`: Seasonal frequency to be considered in the model (default: 12)
* `outlier::Bool`: Boolean where to consider outlier component in the model (default: true)
* `ζ_ω_threshold::Int`: Argument to stabilize `stochastic trend` and `stochastic seasonal` components (default: 12)
* `y::Union{Vector,Matrix}`: Time series data
* `level::String`: Level component type: "stochastic", "deterministic", or "none" (default: "stochastic")
* `slope::String`: Slope component type: "stochastic", "deterministic", or "none" (default: "stochastic")
* `seasonal::String`: Seasonal component type: "stochastic", "deterministic", or "none" (default: "stochastic")
* `cycle::String`: Cycle component type: "stochastic", "deterministic", or "none" (default: "none")
* `freq_seasonal::Union{Int,Vector{Int}}`: Seasonal frequency or vector of frequencies (default: 12)
* `cycle_period::Union{Union{Int,<:AbstractFloat},Vector{Int},Vector{<:AbstractFloat}}`: Cycle period or vector of periods (default: 0)
* `outlier::Bool`: Include outlier component (default: true)
* `ζ_threshold::Int`: Threshold for slope innovations (default: 12)
* `ω_threshold::Int`: Threshold for seasonal innovations (default: 12)
* `ϕ_threshold::Int`: Threshold for cycle innovations (default: 12)
* `stochastic_start::Int`: Starting point for stochastic components (default: 1)
* `exog::Matrix`: Matrix of exogenous variables (default: zeros(length(y), 0))
* `dynamic_exog_coefs::Union{Vector{<:Tuple}, Nothing}`: Dynamic exogenous coefficients (default: nothing)

## Features

Current features include:
* Estimation
* Components decomposition
* Forecasting
* Completion of missing values
* Predefined models, including:
* Outlier detection
* Outlier robust models
* Model estimation using elastic net based regularization
* Automatic component decomposition (trend, seasonal, cycle)
* Point forecasting and scenario simulation
* Missing value imputation
* Outlier detection and robust modeling
* Multiple seasonal frequencies support
* Deterministic and stochastic component options
* Dynamic exogenous variable handling
* Best subset selection for exogenous variables

## Quick Examples

Expand All @@ -67,7 +73,7 @@ steps_ahead = 30

model = StructuralModel(log_air_passengers)
fit!(model)
prediction_log = StateSpaceLearning.forecast(model, steps_ahead) # arguments are the output of the fitted model and number of steps ahead the user wants to forecast
prediction_log = forecast(model, steps_ahead) # arguments are the output of the fitted model and number of steps ahead the user wants to forecast
prediction = exp.(prediction_log)

plot_point_forecast(airp.passengers, prediction)
Expand All @@ -76,7 +82,7 @@ plot_point_forecast(airp.passengers, prediction)

```julia
N_scenarios = 1000
simulation = StateSpaceLearning.simulate(model, steps_ahead, N_scenarios) # arguments are the output of the fitted model, number of steps ahead the user wants to forecast and number of scenario paths
simulation = simulate(model, steps_ahead, N_scenarios) # arguments are the output of the fitted model, number of steps ahead the user wants to forecast and number of scenario paths

plot_scenarios(airp.passengers, exp.(simulation))

Expand All @@ -97,22 +103,20 @@ log_air_passengers = log.(airp.passengers)
model = StructuralModel(log_air_passengers)
fit!(model)

level = model.output.components["μ1"]["Values"] + model.output.components["ξ"]["Values"]
slope = model.output.components["ν1"]["Values"] + model.output.components["ζ"]["Values"]
seasonal = model.output.components["γ1_12"]["Values"] + model.output.components["ω_12"]["Values"]
trend = level + slope

plot(trend, w=2 , color = "Black", lab = "Trend Component", legend = :outerbottom)
plot(seasonal, w=2 , color = "Black", lab = "Seasonal Component", legend = :outerbottom)
# Access decomposed components directly
trend = model.output.decomposition["trend"]
seasonal = model.output.decomposition["seasonal_12"]

plot(trend, w=2, color = "Black", lab = "Trend Component", legend = :outerbottom)
plot(seasonal, w=2, color = "Black", lab = "Seasonal Component", legend = :outerbottom)
```

| ![quick_example_trend](./docs/src/assets/trend.svg) | ![quick_example_seas](./docs/src/assets/seasonal.svg)|
|:------------------------------:|:-----------------------------:|


### Best Subset Selection
Quick example on how to perform best subset selection in time series utilizing StateSpaceLearning.
### Best Subset Selection and Dynamic Coefficients
Example of performing best subset selection and using dynamic coefficients:

```julia
using StateSpaceLearning
Expand All @@ -122,22 +126,33 @@ using Random

Random.seed!(2024)

# Load data
airp = CSV.File(StateSpaceLearning.AIR_PASSENGERS) |> DataFrame
log_air_passengers = log.(airp.passengers)
X = rand(length(log_air_passengers), 10) # Create 10 exogenous features
β = rand(3)

y = log_air_passengers + X[:, 1:3]*β # add to the log_air_passengers series a contribution from only 3 exogenous features.

model = StructuralModel(y; Exogenous_X = X)
fit!(model; α = 1.0, information_criteria = "bic", ϵ = 0.05, penalize_exogenous = true, penalize_initial_states = true)

Selected_exogenous = model.output.components["Exogenous_X"]["Selected"]

# Create exogenous features
X = rand(length(log_air_passengers), 10)
β = rand(3)
y = log_air_passengers + X[:, 1:3]*β

# Create model with exogenous variables
model = StructuralModel(y;
exog = X
)

# Fit model with elastic net regularization
fit!(model;
α = 1.0, # 1.0 for Lasso, 0.0 for Ridge
information_criteria = "bic",
ϵ = 0.05,
penalize_exogenous = true,
penalize_initial_states = true
)

# Get selected features
selected_exog = model.output.components["exog"]["Selected"]
```

In this example, the selected exogenous features were 1, 2, 3, as expected.

### Missing values imputation
Quick example of completion of missing values for the air passengers time-series (artificial NaN values are added to the original time-series).

Expand Down Expand Up @@ -209,7 +224,7 @@ fit!(model)
residuals_variances = model.output.residuals_variances

ss_model = BasicStructural(log_air_passengers, 12)
set_initial_hyperparameters!(ss_model, Dict("sigma2_ε" => residuals_variances["ε"],
StateSpaceModels.set_initial_hyperparameters!(ss_model, Dict("sigma2_ε" => residuals_variances["ε"],
"sigma2_ξ" =>residuals_variances["ξ"],
"sigma2_ζ" =>residuals_variances["ζ"],
"sigma2_ω" =>residuals_variances["ω_12"]))
Expand Down
Binary file added docs/src/assets/dynamic_exog.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
37 changes: 34 additions & 3 deletions docs/src/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ y = solars[!, "y1"]
T = length(y)
steps_ahead = 48

model = StructuralModel(y; freq_seasonal=24, trend=false, level=false)
model = StructuralModel(y; freq_seasonal=24, slope="none", level="none", outlier=false)
fit!(model; penalize_initial_states=false)
simulation = StateSpaceLearning.simulate(model, steps_ahead, 100) #Gets a 12 steps ahead prediction
plot_scenarios(y, simulation)
Expand All @@ -63,7 +63,7 @@ y = solars[!, "y1"]
T = length(y)
steps_ahead = 48

model = StructuralModel(y; freq_seasonal=24, trend=false, level=false)
model = StructuralModel(y; freq_seasonal=24, slope="none", level="none", outlier = false)
fit!(model; penalize_initial_states=false)
simulation = StateSpaceLearning.simulate(model, steps_ahead, 100; seasonal_innovation_simulation=24) #Gets a 12 steps ahead prediction
plot_scenarios(y, simulation)
Expand Down Expand Up @@ -114,4 +114,35 @@ plot_point_forecast(y, prediction)
```
![two_seas](assets/two_seas.png)

Note that the model was able to capture both seasonalities in this case.
Note that the model was able to capture both seasonalities in this case.

## Dynamic Exogenous Coefficients

Dynamic exogenous coefficients allow the effect of exogenous variables to vary over time with specific patterns (e.g., level, slope, seasonal or cyclical). This is configured through the `dynamic_exog_coefs` parameter in the StructuralModel constructor.

The `dynamic_exog_coefs` parameter accepts a vector of tuples, where each tuple contains:
- First element: A vector of an exogenous variable
- Second element: The name of the component that the exogenous variable will be associated with
- Third element (optional): For the seasonal component, the freq_seasonal parameter and for cycle component, the cycle_period parameter.

For example:
```julia
# Make X1's effect vary annually and X2's effect vary semi-annually
airp = CSV.File(StateSpaceLearning.AIR_PASSENGERS) |> DataFrame
y = log.(airp.passengers)
X = vcat(collect(1:90), collect(90.5:-0.5:64)) + (rand(144) .* 10)
y += X * -0.03

dynamic_coefs = [(X, "level")]

model = StructuralModel(y;
dynamic_exog_coefs=dynamic_coefs
)

fit!(model)

prediction = forecast(model, 30; dynamic_exog_coefs_forecasts = [collect(63.5:-0.5:49)])

plot_point_forecast(y, prediction)
```
![dynamic_exog](assets/dynamic_exog.png)
4 changes: 2 additions & 2 deletions docs/src/features.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,10 @@ X = rand(length(log_air_passengers), 10) # Create 10 exogenous features

y = log_air_passengers + X[:, 1:3]*β # add to the log_air_passengers series a contribution from only 3 exogenous features.

model = StructuralModel(y; Exogenous_X = X)
model = StructuralModel(y; exog = X)
fit!(model; α = 1.0, information_criteria = "bic", ϵ = 0.05, penalize_exogenous = true, penalize_initial_states = true)

Selected_exogenous = model.output.components["Exogenous_X"]["Selected"]
Selected_exogenous = model.output.components["exog"]["Selected"]

```

Expand Down
15 changes: 13 additions & 2 deletions docs/src/manual.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,14 @@ simulation = StateSpaceLearning.simulate(model, 12, 1000) #Gets 1000 scenarios p
```
## Models

The package currently supports the implementation of the StructuralModel. If you have suggestions for additional models to include, we encourage you to contribute by opening an issue or submitting a pull request.
The package currently supports the implementation of the StructuralModel, which includes capabilities for handling dynamic exogenous coefficients. If you have suggestions for additional models to include, we encourage you to contribute by opening an issue or submitting a pull request.

```

When using dynamic coefficients:
- The model will create time-varying coefficients for each specified exogenous variable
- Each coefficient will follow the specified cyclical pattern
- When forecasting, you must provide future values for exogenous variables using the `Exogenous_Forecast` parameter

```@docs
StateSpaceLearning.StructuralModel
Expand All @@ -39,7 +46,11 @@ StateSpaceLearning.fit!

## Forecasting and Simulating

The package has functions to make point forecasts multiple steps ahead and to simulate scenarios based on those forecasts. These functions are implemented both for the univariate and to the multivariate cases.
The package has functions to make point forecasts multiple steps ahead and to simulate scenarios based on those forecasts. These functions are implemented for both univariate and multivariate cases, with support for exogenous variables and dynamic coefficients.

When using models with exogenous variables:
- For standard exogenous variables, provide future values using the `Exogenous_Forecast` parameter
- For dynamic coefficients, use the same `Exogenous_Forecast` parameter with values for each exogenous variable

```@docs
StateSpaceLearning.forecast
Expand Down
12 changes: 5 additions & 7 deletions paper_tests/m4_test/evaluate_model.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,13 @@ function evaluate_SSL(

model = StateSpaceLearning.StructuralModel(
normalized_y;
level=true,
stochastic_level=true,
trend=true,
stochastic_trend=true,
seasonal=true,
stochastic_seasonal=true,
level="stochastic",
slope="stochastic",
seasonal="stochastic",
freq_seasonal=12,
outlier=outlier,
ζ_ω_threshold=12,
ζ_threshold=12,
ω_threshold=12,
)
StateSpaceLearning.fit!(
model;
Expand Down
11 changes: 6 additions & 5 deletions paper_tests/m4_test/m4_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ function run_config(
save_init ? CSV.write(init_filepath, initialization_df) : nothing # Initialize empty CSV

for i in 1:48000
if i in [10001, 20001, 30001, 40001] # Clear DataFrame to save memory
if i % 1000 == 1 # Clear DataFrame to save memory
results_df = DataFrame()
initialization_df = DataFrame()
end
Expand All @@ -66,7 +66,8 @@ function run_config(
information_criteria,
)

if i in [10000, 20000, 30000, 40000, 48000]
if i % 1000 == 0
@info "Saving results for $i series"
!save_init ? append_results(filepath, results_df) : nothing
save_init ? append_results(init_filepath, initialization_df) : nothing
end
Expand Down Expand Up @@ -100,9 +101,9 @@ end
# Main script
function main()
results_table = DataFrame()
for outlier in [true]
for information_criteria in ["aic"]
for α in [0.1]
for outlier in [true, false]
for information_criteria in ["aic", "bic"]
for α in [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0]
@info "Running SSL with outlier = $outlier, information_criteria = $information_criteria, α = $α"
results_table = run_config(
results_table, outlier, information_criteria, α, false, 60
Expand Down
2 changes: 1 addition & 1 deletion paper_tests/m4_test/prepare_data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ function normalize(y::Vector, max_y::AbstractFloat, min_y::AbstractFloat)
return (y .- min_y) ./ (max_y - min_y)
end

function de_normalize(y::Vector, max_y::AbstractFloat, min_y::AbstractFloat)
function de_normalize(y, max_y::AbstractFloat, min_y::AbstractFloat)
return (y .* (max_y - min_y)) .+ min_y
end

Expand Down
8 changes: 4 additions & 4 deletions paper_tests/simulation_test/evaluate_models.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ function get_SSL_results(
freq_seasonal=12,
outlier=false,
ζ_ω_threshold=12,
Exogenous_X=X_train,
exog=X_train,
)
t = @elapsed StateSpaceLearning.fit!(
model;
Expand All @@ -39,13 +39,13 @@ function get_SSL_results(
penalize_initial_states=true,
)

selected = model.output.components["Exogenous_X"]["Selected"]
selected = model.output.components["exog"]["Selected"]
true_positives, false_positives, false_negatives, true_negatives = get_confusion_matrix(
selected, true_features, false_features
)

mse = mse_func(model.output.components["Exogenous_X"]["Coefs"], true_β)
bias = bias_func(model.output.components["Exogenous_X"]["Coefs"], true_β)
mse = mse_func(model.output.components["exog"]["Coefs"], true_β)
bias = bias_func(model.output.components["exog"]["Coefs"], true_β)

series_result = DataFrame(
[
Expand Down
10 changes: 8 additions & 2 deletions src/StateSpaceLearning.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module StateSpaceLearning

using LinearAlgebra, Statistics, GLMNet, Distributions, SparseArrays
using LinearAlgebra, Statistics, GLMNet, Distributions, SparseArrays, Random

abstract type StateSpaceLearningModel end

Expand All @@ -13,6 +13,12 @@ include("datasets.jl")
include("fit_forecast.jl")
include("plots.jl")

export fit!, forecast, simulate, StructuralModel, plot_point_forecast, plot_scenarios
export fit!,
forecast,
simulate,
StructuralModel,
plot_point_forecast,
plot_scenarios,
simulate_states

end # module StateSpaceLearning
Loading
Loading