Skip to content

Commit 1a8b072

Browse files
Merge pull request #73 from LAMPSPUC/speedup_simulation
speedup simulation
2 parents 01f19cf + a4fa817 commit 1a8b072

File tree

11 files changed

+264
-111
lines changed

11 files changed

+264
-111
lines changed

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,7 @@ test2.jl
1919

2020
src/boosting_estimation_procedure.jl
2121
plots/
22-
paper_tests/m4_test/evaluate_model2.jl
22+
paper_tests/m4_test/evaluate_model2.jl
23+
results_PROPHET/
24+
results_SS/
25+
sarima/

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "StateSpaceLearning"
22
uuid = "971c4b7c-2c4e-4bac-8525-e842df3cde7b"
33
authors = ["andreramosfc <[email protected]>"]
4-
version = "2.0.6"
4+
version = "2.0.7"
55

66
[deps]
77
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"

paper_tests/m4_test/evaluate_model.jl

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,16 @@ function evaluate_SSL(
3939
normalized_prediction = StateSpaceLearning.forecast(model, H)
4040
prediction = de_normalize(normalized_prediction, max_y, min_y)
4141

42+
normalized_scenarios = StateSpaceLearning.simulate(model, H, 1000)
43+
scenarios = de_normalize(normalized_scenarios, max_y, min_y)
44+
4245
mase = MASE(y_train, y_test, prediction)
4346
smape = sMAPE(y_test, prediction)
47+
crps = CRPS(scenarios, y_test)
4448

45-
results_df = vcat(results_df, DataFrame([[mase], [smape]], [:MASE, :sMAPE]))
49+
results_df = vcat(
50+
results_df, DataFrame([[mase], [smape], [crps]], [:MASE, :sMAPE, :CRPS])
51+
)
4652
initialization_df = vcat(
4753
initialization_df,
4854
DataFrame(

paper_tests/m4_test/m4_test.jl

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ df_train4 = CSV.read("paper_tests/m4_test/Monthly-train4.csv", DataFrame)
1212
df_train = vcat(df_train1, df_train2, df_train3, df_train4) # so that files are not too big and can be uploaded to github
1313
df_test = CSV.read("paper_tests/m4_test/Monthly-test.csv", DataFrame)
1414

15-
include("metrics.jl")
16-
include("evaluate_model.jl")
17-
include("prepare_data.jl")
15+
include("paper_tests/m4_test/metrics.jl")
16+
include("paper_tests/m4_test/evaluate_model.jl")
17+
include("paper_tests/m4_test/prepare_data.jl")
1818

1919
dict_vec = build_train_test_dict(df_train, df_test)
2020

@@ -25,8 +25,6 @@ function append_results(filepath, results_df)
2525
if isfile(filepath)
2626
df_old = CSV.read(filepath, DataFrame)
2727
results_df = vcat(df_old, results_df)
28-
@info "MASE avg = $(mean(results_df[:, :MASE]))"
29-
@info "sMAPE avg = $(mean(results_df[:, :sMAPE]))"
3028
end
3129
return CSV.write(filepath, results_df)
3230
end
@@ -84,6 +82,7 @@ function run_config(
8482
]);
8583
digits=3,
8684
)
85+
crps = trunc(mean(results_df[:, :CRPS]); digits=3)
8786
name = if outlier
8887
"SSL-O ($(information_criteria), α = $(α))"
8988
else
@@ -92,7 +91,11 @@ function run_config(
9291
results_table = vcat(
9392
results_table,
9493
DataFrame(
95-
"Names" => ["$name"], "MASE" => [mase], "sMAPE" => [smape], "OWA" => [owa]
94+
"Names" => ["$name"],
95+
"MASE" => [mase],
96+
"sMAPE" => [smape],
97+
"OWA" => [owa],
98+
"CRPS" => [crps],
9699
),
97100
)
98101
return results_table

paper_tests/m4_test/m4_test.py

Lines changed: 58 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
df_train3 = pd.read_csv("paper_tests/m4_test/Monthly-train3.csv")
1212
df_train4 = pd.read_csv("paper_tests/m4_test/Monthly-train4.csv")
1313
df_train = pd.concat([df_train1, df_train2, df_train3, df_train4])
14-
m4_info = pd.read_csv("paper_tests/m4_test/M4-info.csv")
1514

1615
df_test = pd.read_csv("paper_tests/m4_test/Monthly-test.csv")
1716
ssl_init_df = pd.read_csv("paper_tests/m4_test/init_SSL/SSL_aic_0.1_false.csv")
@@ -40,6 +39,20 @@ def MASE(y_train, y_test, prediction, m=12):
4039
denominator = (1 / (T - m)) * sum([abs(y_train[j] - y_train[j - m]) for j in range(m, T)])
4140
return numerator / denominator # if denominator != 0 else 0
4241

42+
def CRPS(scenarios: np.ndarray, y: np.ndarray) -> float:
43+
crps_scores = np.empty(len(y), dtype=float)
44+
for k, actual in enumerate(y):
45+
sorted_scenarios = np.sort(scenarios[k, :])
46+
m = len(sorted_scenarios)
47+
crps_score = 0.0
48+
for i in range(m):
49+
crps_score += (
50+
(sorted_scenarios[i] - actual)
51+
* (m * (actual < sorted_scenarios[i]) - (i + 1) + 0.5)
52+
)
53+
crps_scores[k] = (2 / m**2) * crps_score
54+
return np.mean(crps_scores)
55+
4356
def evaluate_ss(input, sample_size, init, hyperparameters_inicialization):
4457
train = input["train"]
4558
test = input["test"]
@@ -53,32 +66,47 @@ def evaluate_ss(input, sample_size, init, hyperparameters_inicialization):
5366
results = model.fit(start_params = hyperparameters_inicialization, disp = False, maxiter = 1e5)
5467
else:
5568
results = model.fit(disp = False, maxiter = 1e5)
56-
forecast = results.get_forecast(steps=18)
57-
normalized_forecast_values = forecast.predicted_mean
69+
config = {
70+
'repetitions': 1000,
71+
'steps': 18
72+
}
73+
forecast_obj = results.get_forecast(**config)
74+
forecast_df = forecast_obj.summary_frame()
75+
normalized_simulation = np.empty((len(forecast_df), 300))
76+
for i in range(len(forecast_df)):
77+
normalized_simulation[i] = [np.random.normal(forecast_df["mean"].values[i], forecast_df["mean_se"].values[i]) for _ in range(300)]
78+
normalized_forecast_values = forecast_df["mean"].values
5879
forecast_values = [x * (max_train - min_train) + min_train for x in normalized_forecast_values]
59-
return sMAPE(test, forecast_values), MASE(train, test, forecast_values)
80+
simulation = normalized_simulation * (max_train - min_train) + min_train
81+
return sMAPE(test, forecast_values), MASE(train, test, forecast_values), CRPS(simulation, test)
6082

6183

6284
results = []
6385
results_init = []
6486
for i in range(0, 48000):
65-
hyperparameters_inicialization = [ssl_init_df.loc[i]["ϵ"], ssl_init_df.loc[i]["ξ"],ssl_init_df.loc[i]["ζ"],ssl_init_df.loc[i]["ω_12"]]
87+
if i % 100 == 0:
88+
print("Running series ", i)
89+
hyperparameters_inicialization = [ssl_init_df.loc[i]["ϵ"], ssl_init_df.loc[i]["ξ"],ssl_init_df.loc[i]["ζ"],ssl_init_df.loc[i]["ω"]]
6690
results.append(evaluate_ss(dict_vec[i], 2794, False, hyperparameters_inicialization))
6791
results_init.append(evaluate_ss(dict_vec[i], 2794, True, hyperparameters_inicialization))
6892

6993
smape_SS = []
7094
mase_SS = []
7195
smape_SS_init = []
7296
mase_SS_init = []
97+
crps_SS = []
98+
crps_SS_init = []
7399
for i in range(0, len(results)):
74100
smape_SS.append(results[i][0])
75101
mase_SS.append(results[i][1])
76102
smape_SS_init.append(results_init[i][0])
77103
mase_SS_init.append(results_init[i][1])
104+
crps_SS.append(results[i][2])
105+
crps_SS_init.append(results_init[i][2])
78106

79107
#create dataframe with mase and smape columns:
80-
df = pd.DataFrame({'smape': smape_SS, 'mase': mase_SS})
81-
df_init = pd.DataFrame({'smape': smape_SS_init, 'mase': mase_SS_init})
108+
df = pd.DataFrame({'smape': smape_SS, 'mase': mase_SS, 'crps': crps_SS})
109+
df_init = pd.DataFrame({'smape': smape_SS_init, 'mase': mase_SS_init, 'crps': crps_SS_init})
82110
#save to csv:
83111
df.to_csv('paper_tests/m4_test/results_SS/SS.csv')
84112
df_init.to_csv('paper_tests/m4_test/results_SS/SS_init.csv')
@@ -95,20 +123,25 @@ def evaluate_ss(input, sample_size, init, hyperparameters_inicialization):
95123
def evaluate_prophet(input):
96124
train = input["train"]
97125
test = input["test"]
98-
timestamps = pd.date_range(start="2020-01-01", periods=len(train), freq='ME')
126+
timestamps = pd.date_range(start="2020-01-01", periods=len(train), freq='MS')
99127
#add random seed
100128
df = pd.DataFrame({
101129
'ds': timestamps,
102130
'y': train
103131
})
104132
model = Prophet(interval_width=0.95)
105133
model.fit(df)
106-
future = pd.DataFrame({
107-
'ds': (pd.date_range(start="2020-01-01", periods=len(train) + 18, freq='ME'))[len(train):]
108-
})
134+
future = model.make_future_dataframe(periods=18, freq='MS')
135+
future = future[-18:]
109136
model_forecast = model.predict(future)
110137
prediction = model_forecast['yhat'].values
111-
return sMAPE(test, prediction), MASE(train, test, prediction)
138+
model_prob = Prophet(interval_width=0.95, mcmc_samples=300)
139+
model_prob.fit(df)
140+
# Sample 1000 predictive paths
141+
forecast_samples = model_prob.predictive_samples(future)
142+
# Construct scenario paths
143+
simulated_paths = forecast_samples['yhat'] # shape: (num_timestamps, num_samples)
144+
return sMAPE(test, prediction), MASE(train, test, prediction), CRPS(simulated_paths, test)
112145

113146
def evaluate_chronos(input):
114147
train = input["train"]
@@ -126,27 +159,31 @@ def evaluate_chronos(input):
126159

127160
smape_prophet_vec = []
128161
mase_prophet_vec = []
162+
crps_prophet_vec = []
129163
smape_chronos_vec = []
130164
mase_chronos_vec = []
165+
131166
for i in range(0, len(dict_vec)):
132-
smape_prophet, mase_prophet = evaluate_prophet(dict_vec[i])
167+
smape_prophet, mase_prophet, crps_prophet = evaluate_prophet(dict_vec[i])
133168
smape_prophet_vec.append(smape_prophet)
134169
mase_prophet_vec.append(mase_prophet)
170+
crps_prophet_vec.append(crps_prophet)
135171
smape_chronos, mase_chronos = evaluate_chronos(dict_vec[i])
136172
smape_chronos_vec.append(smape_chronos)
137173
mase_chronos_vec.append(mase_chronos)
138-
#
139174
print("Runningg series ", i)
140175
if i % 1000 == 0:
141176
print("Runningg series ", i)
142177
smape_mean_prophet = np.mean(smape_prophet_vec)
143-
smape_emean_chronos = np.mean(smape_chronos_vec)
178+
smape_mean_chronos = np.mean(smape_chronos_vec)
144179
mase_mean_prophet = np.mean(mase_prophet_vec)
145180
mase_mean_chronos = np.mean(mase_chronos_vec)
181+
crps_mean_prophet = np.mean(crps_prophet_vec)
146182
print("Mean sMape Prophet: ", smape_mean_prophet)
147-
print("Mean sMape Chronos: ", smape_emean_chronos)
183+
print("Mean sMape Chronos: ", smape_mean_chronos)
148184
print("Mean Mase Prophet: ", mase_mean_prophet)
149185
print("Mean Mase Chronos: ", mase_mean_chronos)
186+
print("Mean CRPS Prophet: ", crps_mean_prophet)
150187

151188

152189
NAIVE_sMAPE = 14.427 #M4 Paper
@@ -159,9 +196,11 @@ def evaluate_chronos(input):
159196
mean_smape_prophet = np.mean(smape_prophet_vec)
160197
mean_mase_chronos = np.mean(mase_chronos_vec)
161198
mean_smape_chronos = np.mean(smape_chronos_vec)
199+
mean_crps_prophet = np.mean(crps_prophet_vec)
162200

163-
df_results_mean = pd.DataFrame({'smape': [mean_smape_prophet, mean_smape_chronos], 'mase': [mean_mase_prophet, mean_mase_chronos], 'owa': [owa_prophet, owa_chronos]})
164-
201+
df_prophet_results = pd.DataFrame({'smape': [mean_smape_prophet], 'mase': [mean_mase_prophet], 'owa': [owa_prophet], 'crps': [mean_crps_prophet], 'crps_median': [np.median(crps_prophet_vec)]})
202+
df_chronos_results = pd.DataFrame({'smape': [mean_smape_chronos], 'mase': [mean_mase_chronos], 'owa': [owa_chronos]})
165203
# save to csv
166204

167-
df_results_mean.to_csv('paper_tests/m4_test/metrics_results/PROPHET_CHRONOS_METRICS_RESULTS.csv')
205+
df_prophet_results.to_csv('paper_tests/m4_test/metrics_results/PROPHET_METRICS_RESULTS.csv')
206+
df_chronos_results.to_csv('paper_tests/m4_test/metrics_results/CHRONOS_METRICS_RESULTS.csv')

paper_tests/m4_test/metrics.jl

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,21 @@ end
1717
function OWA(MASE1, MASE2, sMAPE1, sMAPE2)
1818
return 0.5 * (((MASE1) / (MASE2)) + ((sMAPE1) / (sMAPE2)))
1919
end
20+
21+
function CRPS(scenarios, y)
22+
crps_scores = Vector{AbstractFloat}(undef, length(y))
23+
24+
for k in eachindex(y)
25+
sorted_scenarios = sort(scenarios[k, :])
26+
m = length(scenarios[k, :])
27+
crps_score = 0.0
28+
29+
for i in 1:m
30+
crps_score +=
31+
(sorted_scenarios[i] - y[k]) * (m * (y[k] < sorted_scenarios[i]) - i + 0.5)
32+
end
33+
crps_scores[k] = (2 / m^2) * crps_score
34+
end
35+
36+
return mean(crps_scores)
37+
end

src/estimation_procedure.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ function fit_lasso(
232232
α;
233233
information_criteria=information_criteria,
234234
penalty_factor=penalty_factor,
235-
intercept=!rm_average,
235+
intercept=(!rm_average),
236236
)
237237
else
238238
coefs, ε = fit_glmnet(

0 commit comments

Comments
 (0)