@@ -52,71 +52,71 @@ function get_outlier_duplicate_columns(Estimation_X::Matrix{Tl}, components_inde
52
52
end
53
53
54
54
"""
55
- get_path_information_criteria(model::GLMNetPath, Estimation_X ::Matrix{Tl}, estimation_y ::Vector{Fl},
55
+ get_path_information_criteria(model::GLMNetPath, Lasso_X ::Matrix{Tl}, Lasso_y ::Vector{Fl},
56
56
information_criteria::String; intercept::Bool = true)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
57
57
58
58
Calculates the information criteria along the regularization path of a GLMNet model and returns coefficients and residuals of the best model based on the selected information criteria.
59
59
60
60
# Arguments
61
61
- `model::GLMNetPath`: Fitted GLMNetPath model object.
62
- - `Estimation_X ::Matrix{Tl}`: Matrix of predictors for estimation.
63
- - `estimation_y ::Vector{Fl}`: Vector of response values for estimation.
62
+ - `Lasso_X ::Matrix{Tl}`: Matrix of predictors for estimation.
63
+ - `Lasso_y ::Vector{Fl}`: Vector of response values for estimation.
64
64
- `information_criteria::String`: Information Criteria method for hyperparameter selection.
65
65
- `intercept::Bool`: Flag for intercept inclusion in the model (default: true).
66
66
67
67
# Returns
68
68
- `Tuple{Vector{Float64}, Vector{Float64}}`: Tuple containing coefficients and residuals of the best model.
69
69
70
70
"""
71
- function get_path_information_criteria (model:: GLMNetPath , Estimation_X :: Matrix{Tl} , estimation_y :: Vector{Fl} , information_criteria:: String ; intercept:: Bool = true ):: Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
71
+ function get_path_information_criteria (model:: GLMNetPath , Lasso_X :: Matrix{Tl} , Lasso_y :: Vector{Fl} , information_criteria:: String ; intercept:: Bool = true ):: Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
72
72
path_size = length (model. lambda)
73
- T = size (Estimation_X , 1 )
73
+ T = size (Lasso_X , 1 )
74
74
K = count (i-> i != 0 , model. betas; dims = 1 )'
75
75
76
76
method_vec = Vector {Float64} (undef, path_size)
77
77
for i in 1 : path_size
78
- fit = Estimation_X * model. betas[:, i] .+ model. a0[i]
79
- ϵ = estimation_y - fit
78
+ fit = Lasso_X * model. betas[:, i] .+ model. a0[i]
79
+ ϵ = Lasso_y - fit
80
80
81
81
method_vec[i] = get_information (T, K[i], ϵ; information_criteria = information_criteria)
82
82
end
83
83
84
84
best_model_idx = argmin (method_vec)
85
85
coefs = intercept ? vcat (model. a0[best_model_idx], model. betas[:, best_model_idx]) : model. betas[:, best_model_idx]
86
- fit = intercept ? hcat (ones (T), Estimation_X )* coefs : Estimation_X * coefs
87
- ϵ = estimation_y - fit
86
+ fit = intercept ? hcat (ones (T), Lasso_X )* coefs : Lasso_X * coefs
87
+ ϵ = Lasso_y - fit
88
88
return coefs, ϵ
89
89
end
90
90
91
91
"""
92
- fit_glmnet(Estimation_X ::Matrix{Tl}, estimation_y ::Vector{Fl}, α::Float64;
92
+ fit_glmnet(Lasso_X ::Matrix{Tl}, Lasso_y ::Vector{Fl}, α::Float64;
93
93
information_criteria::String = "aic",
94
- penalty_factor::Vector{Float64}=ones(size(Estimation_X ,2) - 1),
94
+ penalty_factor::Vector{Float64}=ones(size(Lasso_X ,2) - 1),
95
95
intercept::Bool = intercept)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
96
96
97
97
Fits a GLMNet model to the provided data and returns coefficients and residuals based on selected criteria.
98
98
99
99
# Arguments
100
- - `Estimation_X ::Matrix{Tl}`: Matrix of predictors for estimation.
101
- - `estimation_y ::Vector{Fl}`: Vector of response values for estimation.
100
+ - `Lasso_X ::Matrix{Tl}`: Matrix of predictors for estimation.
101
+ - `Lasso_y ::Vector{Fl}`: Vector of response values for estimation.
102
102
- `α::Float64`: Elastic net control factor between ridge (α=0) and lasso (α=1) (default: 0.1).
103
103
- `information_criteria::String`: Information Criteria method for hyperparameter selection (default: aic).
104
- - `penalty_factor::Vector{Float64}`: Penalty factors for each predictor (default: ones(size(Estimation_X , 2) - 1)).
104
+ - `penalty_factor::Vector{Float64}`: Penalty factors for each predictor (default: ones(size(Lasso_X , 2) - 1)).
105
105
- `intercept::Bool`: Flag for intercept inclusion in the model (default: true).
106
106
107
107
# Returns
108
108
- `Tuple{Vector{Float64}, Vector{Float64}}`: Tuple containing coefficients and residuals of the best model.
109
109
110
110
"""
111
- function fit_glmnet (Estimation_X :: Matrix{Tl} , estimation_y :: Vector{Fl} , α:: Float64 ; information_criteria:: String = " aic" , penalty_factor:: Vector{Float64} = ones (size (Estimation_X ,2 ) - 1 ), intercept:: Bool = intercept):: Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
112
- model = glmnet (Estimation_X, estimation_y , alpha = α, penalty_factor = penalty_factor, intercept = intercept, dfmax= size (Estimation_X , 2 ), lambda_min_ratio= 0.001 )
113
- return get_path_information_criteria (model, Estimation_X, estimation_y , information_criteria; intercept = intercept)
111
+ function fit_glmnet (Lasso_X :: Matrix{Tl} , Lasso_y :: Vector{Fl} , α:: Float64 ; information_criteria:: String = " aic" , penalty_factor:: Vector{Float64} = ones (size (Lasso_X ,2 ) - 1 ), intercept:: Bool = intercept):: Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
112
+ model = glmnet (Lasso_X, Lasso_y , alpha = α, penalty_factor = penalty_factor, intercept = intercept, dfmax= size (Lasso_X , 2 ), lambda_min_ratio= 0.001 )
113
+ return get_path_information_criteria (model, Lasso_X, Lasso_y , information_criteria; intercept = intercept)
114
114
end
115
115
116
116
"""
117
117
fit_lasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64, information_criteria::String,
118
- penalize_exogenous::Bool, components_indexes::Dict{String, Vector{Int64}};
119
- penalty_factor::Vector{Float64}=ones(size(Estimation_X,2) - 1), intercept:: Bool = true )::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
118
+ penalize_exogenous::Bool, components_indexes::Dict{String, Vector{Int64}}, penalty_factor::Vector{Float64} ;
119
+ rm_average:: Bool = false )::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
120
120
121
121
Fits a Lasso regression model to the provided data and returns coefficients and residuals based on selected criteria.
122
122
@@ -127,23 +127,41 @@ end
127
127
- `information_criteria::String`: Information Criteria method for hyperparameter selection (default: aic).
128
128
- `penalize_exogenous::Bool`: Flag for selecting exogenous variables. When false the penalty factor for these variables will be set to 0.
129
129
- `components_indexes::Dict{String, Vector{Int64}}`: Dictionary containing indexes for different components.
130
- - `penalty_factor::Vector{Float64}`: Penalty factors for each predictor (default: ones(size(Estimation_X, 2) - 1)) .
131
- - `intercept ::Bool`: Flag for intercept inclusion in the model (default: true ).
130
+ - `penalty_factor::Vector{Float64}`: Penalty factors for each predictor.
131
+ - `rm_average ::Bool`: Flag to consider if the intercept will be calculated is the average of the time series (default: false ).
132
132
133
133
# Returns
134
134
- `Tuple{Vector{Float64}, Vector{Float64}}`: Tuple containing coefficients and residuals of the fitted Lasso model.
135
135
136
136
"""
137
- function fit_lasso (Estimation_X:: Matrix{Tl} , estimation_y:: Vector{Fl} , α:: Float64 , information_criteria:: String , penalize_exogenous:: Bool , components_indexes:: Dict{String, Vector{Int64}} ; penalty_factor:: Vector{Float64} = ones ( size (Estimation_X, 2 ) - 1 ), intercept :: Bool = true ):: Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
137
+ function fit_lasso (Estimation_X:: Matrix{Tl} , estimation_y:: Vector{Fl} , α:: Float64 , information_criteria:: String , penalize_exogenous:: Bool , components_indexes:: Dict{String, Vector{Int64}} , penalty_factor:: Vector{Float64} ; rm_average :: Bool = false ):: Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
138
138
139
139
outlier_duplicate_columns = get_outlier_duplicate_columns (Estimation_X, components_indexes)
140
140
penalty_factor[outlier_duplicate_columns] .= Inf
141
141
142
- ! penalize_exogenous ? penalty_factor[components_indexes[" Exogenous_X" ] .- 1 ] .= 0 : nothing
143
- mean_y = mean (estimation_y); Lasso_y = intercept ? estimation_y : estimation_y .- mean_y
142
+ hasintercept = has_intercept (Estimation_X)
143
+ if hasintercept
144
+ ! penalize_exogenous ? penalty_factor[components_indexes[" Exogenous_X" ] .- 1 ] .= 0 : nothing
145
+ Lasso_X = Estimation_X[:, 2 : end ]
146
+ else
147
+ ! penalize_exogenous ? penalty_factor[components_indexes[" Exogenous_X" ]] .= 0 : nothing
148
+ Lasso_X = Estimation_X
149
+ @assert ! rm_average " Intercept must be included in the model if rm_average is set to true"
150
+ end
151
+
152
+ if rm_average
153
+ mean_y = mean (estimation_y)
154
+ Lasso_y = estimation_y .- mean_y
155
+ else
156
+ Lasso_y = estimation_y
157
+ end
144
158
145
- coefs, ϵ = fit_glmnet (Estimation_X[:, 2 : end ], Lasso_y, α; information_criteria= information_criteria, penalty_factor= penalty_factor, intercept = intercept)
146
- return ! intercept ? (vcat (mean_y, coefs), ϵ) : (coefs, ϵ)
159
+ if hasintercept
160
+ coefs, ϵ = fit_glmnet (Lasso_X, Lasso_y, α; information_criteria= information_criteria, penalty_factor= penalty_factor, intercept = ! rm_average)
161
+ else
162
+ coefs, ϵ = fit_glmnet (Lasso_X, Lasso_y, α; information_criteria= information_criteria, penalty_factor= penalty_factor, intercept = false )
163
+ end
164
+ return rm_average ? (vcat (mean_y, coefs), ϵ) : (coefs, ϵ)
147
165
148
166
end
149
167
@@ -175,22 +193,37 @@ function default_estimation_procedure(Estimation_X::Matrix{Tl}, estimation_y::Ve
175
193
176
194
@assert 0 <= α <= 1 " α must be in [0, 1]"
177
195
178
- penalty_factor = ones (size (Estimation_X, 2 ) - 1 ); penalty_factor[components_indexes[" initial_states" ][2 : end ] .- 1 ] .= 0
179
- coefs, _ = fit_lasso (Estimation_X, estimation_y, α, information_criteria, penalize_exogenous, components_indexes; penalty_factor = penalty_factor, intercept = false )
196
+ hasintercept = has_intercept (Estimation_X)
197
+
198
+ if hasintercept
199
+ penalty_factor = ones (size (Estimation_X, 2 ) - 1 )
200
+ penalty_factor[components_indexes[" initial_states" ][2 : end ] .- 1 ] .= 0
201
+ coefs, _ = fit_lasso (Estimation_X, estimation_y, α, information_criteria, penalize_exogenous, components_indexes, penalty_factor; rm_average = true )
202
+ else
203
+ penalty_factor = ones (size (Estimation_X, 2 ))
204
+ penalty_factor[components_indexes[" initial_states" ][2 : end ]] .= 0
205
+ coefs, _ = fit_lasso (Estimation_X, estimation_y, α, information_criteria, penalize_exogenous, components_indexes, penalty_factor; rm_average = false )
206
+ end
180
207
181
208
# AdaLasso per component
182
- penalty_factor = zeros (size (Estimation_X, 2 ) - 1 )
209
+ ts_penalty_factor = hasintercept ? zeros (size (Estimation_X, 2 ) - 1 ) : zeros ( size (Estimation_X, 2 ) )
183
210
for key in keys (components_indexes)
184
211
if key != " initial_states" && key != " μ1"
185
212
component = components_indexes[key]
186
213
if key != " Exogenous_X" && key != " o" && ! (key in [" ν1" , " γ1" ])
187
214
κ = count (i -> i != 0 , coefs[component]) < 1 ? 0 : std (coefs[component])
188
- penalty_factor [component .- 1 ] .= (1 / (κ + ϵ))
215
+ hasintercept ? ts_penalty_factor [component .- 1 ] . = ( 1 / (κ + ϵ)) : ts_penalty_factor[component ] .= (1 / (κ + ϵ))
189
216
else
190
- penalty_factor [component .- 1 ] = (1 ./ (abs .(coefs[component]) .+ ϵ))
217
+ hasintercept ? ts_penalty_factor [component .- 1 ] = ( 1 ./ ( abs .(coefs[component]) .+ ϵ)) : ts_penalty_factor[component ] = (1 ./ (abs .(coefs[component]) .+ ϵ))
191
218
end
192
219
end
193
220
end
194
- ! penalize_initial_states ? penalty_factor[components_indexes[" initial_states" ][2 : end ] .- 1 ] .= 0 : nothing
195
- return fit_lasso (Estimation_X, estimation_y, α, information_criteria, penalize_exogenous, components_indexes; penalty_factor= penalty_factor)
221
+
222
+ if hasintercept
223
+ ! penalize_initial_states ? ts_penalty_factor[components_indexes[" initial_states" ][2 : end ] .- 1 ] .= 0 : nothing
224
+ else
225
+ ! penalize_initial_states ? ts_penalty_factor[components_indexes[" initial_states" ][2 : end ]] .= 0 : nothing
226
+ end
227
+
228
+ return fit_lasso (Estimation_X, estimation_y, α, information_criteria, penalize_exogenous, components_indexes, penalty_factor; rm_average = false )
196
229
end
0 commit comments