Skip to content

Commit 249e803

Browse files
Merge pull request #20 from LAMPSPUC/add_simulation
add simulation feature
2 parents 386ced5 + 91da16e commit 249e803

File tree

13 files changed

+197
-71
lines changed

13 files changed

+197
-71
lines changed

Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ authors = ["andreramosfc <[email protected]>"]
44
version = "0.1.2"
55

66
[deps]
7+
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
78
GLMNet = "8d5ece8b-de18-5317-b113-243142960cc6"
89
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
910
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

README.md

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ model_input = output.model_input # Model inputs that were utilized to bu
2121
Create_X = output.Create_X # The function utilized to build the regression matrix.
2222
X = output.X # High Dimension Regression utilized in the estimation.
2323
coefs = output.coefs # High Dimension Regression coefficients estimated in the estimation.
24-
ϵ = output.ϵ # Residuals of the model.
24+
ε = output.ε # Residuals of the model.
2525
fitted = output.fitted # Fit in Sample of the model.
2626
components = output.components # Dictionary containing information about each component of the model, each component has the keys: "Values" (The value of the component in each timestamp) , "Coefs" (The coefficients estimated for each element of the component) and "Indexes" (The indexes of the elements of the component in the high dimension regression "X").
2727
residuals_variances = output.residuals_variances # Dictionary containing the estimated variances for the innovations components (that is the information that can be utilized to initialize the state space model).
@@ -52,7 +52,7 @@ Current features include:
5252

5353
## Quick Examples
5454

55-
### Fitting and forecasting
55+
### Fitting, forecasting and simulating
5656
Quick example of fit and forecast for the air passengers time-series.
5757

5858
```julia
@@ -65,11 +65,20 @@ log_air_passengers = log.(airp.passengers)
6565
steps_ahead = 30
6666

6767
output = StateSpaceLearning.fit_model(log_air_passengers)
68-
prediction_log = StateSpaceLearning.forecast(output, steps_ahead)
68+
prediction_log = StateSpaceLearning.forecast(output, steps_ahead) # arguments are the output of the fitted model and number of steps ahead the user wants to forecast
6969
prediction = exp.(prediction_log)
7070

7171
plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom)
72-
plot!(vcat(ones(output.T).*NaN, prediction), lab = "Forcast", w=2, color = "blue")
72+
plot!(vcat(ones(length(log_air_passengers)).*NaN, prediction), lab = "Forecast", w=2, color = "blue")
73+
74+
N_scenarios = 1000
75+
simulation = StateSpaceLearning.simulate(output, steps_ahead, N_scenarios) # arguments are the output of the fitted model, number of steps ahead the user wants to forecast and number of scenario paths
76+
77+
plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom)
78+
for s in 1:N_scenarios-1
79+
plot!(vcat(ones(length(log_air_passengers)).*NaN, exp.(simulation[:, s])), lab = "", α = 0.1 , color = "red")
80+
end
81+
plot!(vcat(ones(length(log_air_passengers)).*NaN, exp.(simulation[:, N_scenarios])), lab = "Scenarios Paths", α = 0.1 , color = "red")
7382

7483
```
7584
![quick_example_airp](./docs/assets/quick_example_airp.PNG)
@@ -119,7 +128,7 @@ X = rand(length(log_air_passengers), 10) # Create 10 exogenous features
119128

120129
y = log_air_passengers + X[:, 1:3]*β # add to the log_air_passengers series a contribution from only 3 exogenous features.
121130

122-
output = StateSpaceLearning.fit_model(y; Exogenous_X = X, estimation_input = Dict("α" => 1.0, "information_criteria" => "bic", "ϵ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true))
131+
output = StateSpaceLearning.fit_model(y; Exogenous_X = X, estimation_input = Dict("α" => 1.0, "information_criteria" => "bic", "ε" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true))
123132

124133
Selected_exogenous = output.components["Exogenous_X"]["Selected"]
125134

@@ -138,12 +147,13 @@ using Plots
138147
airp = CSV.File(StateSpaceLearning.AIR_PASSENGERS) |> DataFrame
139148
log_air_passengers = log.(airp.passengers)
140149

150+
airpassengers = Float64.(airp.passengers)
141151
log_air_passengers[60:72] .= NaN
142152

143153
output = StateSpaceLearning.fit_model(log_air_passengers)
144154

145155
fitted_completed_missing_values = ones(144).*NaN; fitted_completed_missing_values[60:72] = exp.(output.fitted[60:72])
146-
real_removed_valued = ones(144).*NaN; real_removed_valued[60:72] = deepcopy(airpassengers[60:72])
156+
real_removed_valued = ones(144).*NaN; real_removed_valued[60:72] = deepcopy(airp.passengers[60:72])
147157
airpassengers[60:72] .= NaN
148158

149159
plot(airpassengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom)

docs/src/manual.md

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ model_input = output.model_input # Model inputs that were utilized to bu
2121
Create_X = output.Create_X # The function utilized to build the regression matrix.
2222
X = output.X # High Dimension Regression utilized in the estimation.
2323
coefs = output.coefs # High Dimension Regression coefficients estimated in the estimation.
24-
ϵ = output.ϵ # Residuals of the model.
24+
ε = output.ε # Residuals of the model.
2525
fitted = output.fitted # Fit in Sample of the model.
2626
components = output.components # Dictionary containing information about each component of the model, each component has the keys: "Values" (The value of the component in each timestamp) , "Coefs" (The coefficients estimated for each element of the component) and "Indexes" (The indexes of the elements of the component in the high dimension regression "X").
2727
residuals_variances = output.residuals_variances # Dictionary containing the estimated variances for the innovations components (that is the information that can be utilized to initialize the state space model).
@@ -52,7 +52,7 @@ Current features include:
5252

5353
## Quick Examples
5454

55-
### Fitting and forecasting
55+
### Fitting, forecasting and simulating
5656
Quick example of fit and forecast for the air passengers time-series.
5757

5858
```julia
@@ -65,11 +65,20 @@ log_air_passengers = log.(airp.passengers)
6565
steps_ahead = 30
6666

6767
output = StateSpaceLearning.fit_model(log_air_passengers)
68-
prediction_log = StateSpaceLearning.forecast(output, steps_ahead)
68+
prediction_log = StateSpaceLearning.forecast(output, steps_ahead) # arguments are the output of the fitted model and number of steps ahead the user wants to forecast
6969
prediction = exp.(prediction_log)
7070

7171
plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom)
72-
plot!(vcat(ones(output.T).*NaN, prediction), lab = "Forcast", w=2, color = "blue")
72+
plot!(vcat(ones(length(log_air_passengers)).*NaN, prediction), lab = "Forecast", w=2, color = "blue")
73+
74+
N_scenarios = 1000
75+
simulation = StateSpaceLearning.simulate(output, steps_ahead, N_scenarios) # arguments are the output of the fitted model, number of steps ahead the user wants to forecast and number of scenario paths
76+
77+
plot(airp.passengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom)
78+
for s in 1:N_scenarios-1
79+
plot!(vcat(ones(length(log_air_passengers)).*NaN, exp.(simulation[:, s])), lab = "", α = 0.1 , color = "red")
80+
end
81+
plot!(vcat(ones(length(log_air_passengers)).*NaN, exp.(simulation[:, N_scenarios])), lab = "Scenarios Paths", α = 0.1 , color = "red")
7382

7483
```
7584
![quick_example_airp](./docs/assets/quick_example_airp.PNG)
@@ -119,7 +128,7 @@ X = rand(length(log_air_passengers), 10) # Create 10 exogenous features
119128

120129
y = log_air_passengers + X[:, 1:3]*β # add to the log_air_passengers series a contribution from only 3 exogenous features.
121130

122-
output = StateSpaceLearning.fit_model(y; Exogenous_X = X, estimation_input = Dict("α" => 1.0, "information_criteria" => "bic", "ϵ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true))
131+
output = StateSpaceLearning.fit_model(y; Exogenous_X = X, estimation_input = Dict("α" => 1.0, "information_criteria" => "bic", "ε" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true))
123132

124133
Selected_exogenous = output.components["Exogenous_X"]["Selected"]
125134

@@ -138,12 +147,13 @@ using Plots
138147
airp = CSV.File(StateSpaceLearning.AIR_PASSENGERS) |> DataFrame
139148
log_air_passengers = log.(airp.passengers)
140149

150+
airpassengers = Float64.(airp.passengers)
141151
log_air_passengers[60:72] .= NaN
142152

143153
output = StateSpaceLearning.fit_model(log_air_passengers)
144154

145155
fitted_completed_missing_values = ones(144).*NaN; fitted_completed_missing_values[60:72] = exp.(output.fitted[60:72])
146-
real_removed_valued = ones(144).*NaN; real_removed_valued[60:72] = deepcopy(airpassengers[60:72])
156+
real_removed_valued = ones(144).*NaN; real_removed_valued[60:72] = deepcopy(airp.passengers[60:72])
147157
airpassengers[60:72] .= NaN
148158

149159
plot(airpassengers, w=2 , color = "Black", lab = "Historical", legend = :outerbottom)

src/StateSpaceLearning.jl

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
module StateSpaceLearning
22

3-
using LinearAlgebra, Statistics, GLMNet
3+
using LinearAlgebra, Statistics, GLMNet, Distributions
44

55
include("structs.jl")
66
include("models/default_model.jl")
@@ -106,4 +106,69 @@ function forecast(output::Output, steps_ahead::Int64; Exogenous_Forecast::Matrix
106106
return complete_matrix[end-steps_ahead+1:end, :]*output.coefs
107107
end
108108

109+
"""
110+
simulate(output::Output, steps_ahead::Int64; N_scenarios::Int64 = 1000, simulate_outliers::Bool = true, Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, 0))::Matrix{Float64} where Fl
111+
112+
Generate simulations for a given number of steps ahead using the provided StateSpaceLearning output and exogenous forecast data.
113+
114+
# Arguments
115+
- `output::Output`: Output object obtained from model fitting.
116+
- `steps_ahead::Int64`: Number of steps ahead for simulation.
117+
- `N_scenarios::Int64`: Number of scenarios to simulate (default: 1000).
118+
- `simulate_outliers::Bool`: If true, simulate outliers (default: true).
119+
- `Exogenous_Forecast::Matrix{Fl}`: Exogenous variables forecast (default: zeros(steps_ahead, 0))
120+
121+
# Returns
122+
- `Matrix{Float64}`: Matrix containing simulated values.
123+
"""
124+
function simulate(output::Output, steps_ahead::Int64, N_scenarios::Int64; simulate_outliers::Bool = true,
125+
innovation_functions::Dict = Dict("stochastic_level" => Dict("create_X" => create_ξ, "component" => "ξ", "args" => (length(output.ε) + steps_ahead + 1, 0)),
126+
"stochastic_trend" => Dict("create_X" => create_ζ, "component" => "ζ", "args" => (length(output.ε) + steps_ahead + 1, 0, 1)),
127+
"stochastic_seasonal" => Dict("create_X" => create_ω, "component" => "ω", "args" => (length(output.ε) + steps_ahead + 1, output.model_input["freq_seasonal"], 0, 1))),
128+
Exogenous_Forecast::Matrix{Fl}=zeros(steps_ahead, 0))::Matrix{Float64} where Fl
129+
130+
prediction = forecast(output, steps_ahead; Exogenous_Forecast = Exogenous_Forecast)
131+
132+
T = length(output.ε)
133+
simulation_X = zeros(steps_ahead, 0)
134+
components_matrix = zeros(length(output.valid_indexes), 0)
135+
N_components = 1
136+
137+
for innovation in keys(innovation_functions)
138+
if output.model_input[innovation]
139+
innov_dict = innovation_functions[innovation]
140+
simulation_X = hcat(simulation_X, innov_dict["create_X"](innov_dict["args"]...)[end-steps_ahead:end-1, end-steps_ahead+1:end])
141+
comp = fill_innovation_coefs(T, innov_dict["component"], output)
142+
components_matrix = hcat(components_matrix, comp[output.valid_indexes])
143+
N_components += 1
144+
end
145+
end
146+
147+
components_matrix = hcat(components_matrix, output.ε[output.valid_indexes])
148+
simulation_X = hcat(simulation_X, Matrix(1.0 * I, steps_ahead, steps_ahead))
149+
components_matrix += rand(Normal(0, 1), size(components_matrix)) ./ 1e9 # Make sure matrix is positive definite
150+
151+
= cov(components_matrix)
152+
MV_dist = MvNormal(zeros(N_components), ∑)
153+
o_noises = simulate_outliers && output.model_input["outlier"] ? rand(Normal(0, std(output.components["o"]["Coefs"])), steps_ahead, N_scenarios) : zeros(steps_ahead, N_scenarios)
154+
155+
simulation = hcat([prediction for _ in 1:N_scenarios]...)
156+
for s in 1:N_scenarios
157+
sim_coefs = ones(size(simulation_X, 2)) .* NaN
158+
159+
for i in 1:steps_ahead
160+
rand_inovs = rand(MV_dist)
161+
162+
for comp in eachindex(rand_inovs)
163+
sim_coefs[i + (comp - 1) * steps_ahead] = rand_inovs[comp]
164+
end
165+
end
166+
167+
simulation[:, s] += (simulation_X * sim_coefs + o_noises[:, s])
168+
end
169+
170+
return simulation
171+
172+
end
173+
109174
end # module StateSpaceLearning

src/estimation_procedure/default_estimation_procedure.jl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -76,16 +76,16 @@ function get_path_information_criteria(model::GLMNetPath, Lasso_X::Matrix{Tl}, L
7676
method_vec = Vector{Float64}(undef, path_size)
7777
for i in 1:path_size
7878
fit = Lasso_X*model.betas[:, i] .+ model.a0[i]
79-
ϵ = Lasso_y - fit
79+
ε = Lasso_y - fit
8080

81-
method_vec[i] = get_information(T, K[i], ϵ; information_criteria = information_criteria)
81+
method_vec[i] = get_information(T, K[i], ε; information_criteria = information_criteria)
8282
end
8383

8484
best_model_idx = argmin(method_vec)
8585
coefs = intercept ? vcat(model.a0[best_model_idx], model.betas[:, best_model_idx]) : model.betas[:, best_model_idx]
8686
fit = intercept ? hcat(ones(T), Lasso_X)*coefs : Lasso_X*coefs
87-
ϵ = Lasso_y - fit
88-
return coefs, ϵ
87+
ε = Lasso_y - fit
88+
return coefs, ε
8989
end
9090

9191
"""
@@ -157,19 +157,19 @@ function fit_lasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float
157157
end
158158

159159
if hasintercept
160-
coefs, ϵ = fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = !rm_average)
160+
coefs, ε = fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = !rm_average)
161161
else
162-
coefs, ϵ = fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = false)
162+
coefs, ε = fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = false)
163163
end
164-
return rm_average ? (vcat(mean_y, coefs), ϵ) : (coefs, ϵ)
164+
return rm_average ? (vcat(mean_y, coefs), ε) : (coefs, ε)
165165

166166
end
167167

168168
"""
169169
fit_adalasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64,
170170
information_criteria::String,
171171
components_indexes::Dict{String, Vector{Int64}},
172-
ϵ::Float64, penalize_exogenous::Bool)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
172+
ε::Float64, penalize_exogenous::Bool)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
173173
174174
Fits an Adaptive Lasso (AdaLasso) regression model to the provided data and returns coefficients and residuals.
175175

src/information_criteria.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
11
"""
2-
get_information(T::Int64, K::Int64, ϵ::Vector{Float64};
2+
get_information(T::Int64, K::Int64, ε::Vector{Float64};
33
information_criteria::String = "bic", p::Int64 = 0)::Float64
44
55
Calculates information criterion value based on the provided parameters and residuals.
66
77
# Arguments
88
- `T::Int64`: Number of observations.
99
- `K::Int64`: Number of selected predictors.
10-
- `ϵ::Vector{Float64}`: Vector of residuals.
10+
- `ε::Vector{Float64}`: Vector of residuals.
1111
- `information_criteria::String`: Method for hyperparameter selection (default: "aic").
1212
- `p::Int64`: Number of total predictors (default: 0).
1313
1414
# Returns
1515
- `Float64`: Information criterion value.
1616
1717
"""
18-
function get_information(T::Int64, K::Int64, ϵ::Vector{Float64}; information_criteria::String = "aic")::Float64
18+
function get_information(T::Int64, K::Int64, ε::Vector{Float64}; information_criteria::String = "aic")::Float64
1919
if information_criteria == "bic"
20-
return T*log(var(ϵ)) + K*log(T)
20+
return T*log(var(ε)) + K*log(T)
2121
elseif information_criteria == "aic"
22-
return 2*K + T*log(var(ϵ))
22+
return 2*K + T*log(var(ε))
2323
elseif information_criteria == "aicc"
24-
return 2*K + T*log(var(ϵ)) + ((2*K^2 +2*K)/(T - K - 1))
24+
return 2*K + T*log(var(ε)) + ((2*K^2 +2*K)/(T - K - 1))
2525
end
2626
end

src/models/default_model.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ function get_components_indexes(Exogenous_X::Matrix{Fl}, model_input::Dict)::Dic
259259
end
260260

261261
"""
262-
get_variances(ϵ::Vector{Fl}, coefs::Vector{Fl}, components_indexes::Dict{String, Vector{Int64}})::Dict where Fl
262+
get_variances(ε::Vector{Fl}, coefs::Vector{Fl}, components_indexes::Dict{String, Vector{Int64}})::Dict where Fl
263263
264264
Calculates variances for each innovation component and for the residuals.
265265

src/structs.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
- `Create_X::Function`: Function used to create the StateSpaceLearning Matrix.
99
- `X::Matrix`: StateSpaceLearning Matrix data used in the model.
1010
- `coefs::Vector`: Coefficients obtained from the model.
11-
- `ϵ::Vector`: Residuals of the model.
11+
- `ε::Vector`: Residuals of the model.
1212
- `fitted::Vector`: Fitted values from the model.
1313
- `components::Dict`: Dictionary containing different components.
1414
- `residuals_variances::Dict`: Dictionary storing variances of residuals for different components.
@@ -24,7 +24,7 @@ mutable struct Output
2424
Create_X::Function
2525
X::Matrix
2626
coefs::Vector
27-
ϵ::Vector
27+
ε::Vector
2828
fitted::Vector
2929
components::Dict
3030
residuals_variances::Dict

0 commit comments

Comments
 (0)