Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,7 @@ test2.jl

src/boosting_estimation_procedure.jl
plots/
paper_tests/m4_test/evaluate_model2.jl
paper_tests/m4_test/evaluate_model2.jl
results_PROPHET/
results_SS/
sarima/
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "StateSpaceLearning"
uuid = "971c4b7c-2c4e-4bac-8525-e842df3cde7b"
authors = ["andreramosfc <[email protected]>"]
version = "2.0.6"
version = "2.0.7"

[deps]
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Expand Down
8 changes: 7 additions & 1 deletion paper_tests/m4_test/evaluate_model.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,16 @@ function evaluate_SSL(
normalized_prediction = StateSpaceLearning.forecast(model, H)
prediction = de_normalize(normalized_prediction, max_y, min_y)

normalized_scenarios = StateSpaceLearning.simulate(model, H, 1000)
scenarios = de_normalize(normalized_scenarios, max_y, min_y)

mase = MASE(y_train, y_test, prediction)
smape = sMAPE(y_test, prediction)
crps = CRPS(scenarios, y_test)

results_df = vcat(results_df, DataFrame([[mase], [smape]], [:MASE, :sMAPE]))
results_df = vcat(
results_df, DataFrame([[mase], [smape], [crps]], [:MASE, :sMAPE, :CRPS])
)
initialization_df = vcat(
initialization_df,
DataFrame(
Expand Down
15 changes: 9 additions & 6 deletions paper_tests/m4_test/m4_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ df_train4 = CSV.read("paper_tests/m4_test/Monthly-train4.csv", DataFrame)
df_train = vcat(df_train1, df_train2, df_train3, df_train4) # so that files are not too big and can be uploaded to github
df_test = CSV.read("paper_tests/m4_test/Monthly-test.csv", DataFrame)

include("metrics.jl")
include("evaluate_model.jl")
include("prepare_data.jl")
include("paper_tests/m4_test/metrics.jl")
include("paper_tests/m4_test/evaluate_model.jl")
include("paper_tests/m4_test/prepare_data.jl")

dict_vec = build_train_test_dict(df_train, df_test)

Expand All @@ -25,8 +25,6 @@ function append_results(filepath, results_df)
if isfile(filepath)
df_old = CSV.read(filepath, DataFrame)
results_df = vcat(df_old, results_df)
@info "MASE avg = $(mean(results_df[:, :MASE]))"
@info "sMAPE avg = $(mean(results_df[:, :sMAPE]))"
end
return CSV.write(filepath, results_df)
end
Expand Down Expand Up @@ -84,6 +82,7 @@ function run_config(
]);
digits=3,
)
crps = trunc(mean(results_df[:, :CRPS]); digits=3)
name = if outlier
"SSL-O ($(information_criteria), α = $(α))"
else
Expand All @@ -92,7 +91,11 @@ function run_config(
results_table = vcat(
results_table,
DataFrame(
"Names" => ["$name"], "MASE" => [mase], "sMAPE" => [smape], "OWA" => [owa]
"Names" => ["$name"],
"MASE" => [mase],
"sMAPE" => [smape],
"OWA" => [owa],
"CRPS" => [crps],
),
)
return results_table
Expand Down
77 changes: 58 additions & 19 deletions paper_tests/m4_test/m4_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
df_train3 = pd.read_csv("paper_tests/m4_test/Monthly-train3.csv")
df_train4 = pd.read_csv("paper_tests/m4_test/Monthly-train4.csv")
df_train = pd.concat([df_train1, df_train2, df_train3, df_train4])
m4_info = pd.read_csv("paper_tests/m4_test/M4-info.csv")

df_test = pd.read_csv("paper_tests/m4_test/Monthly-test.csv")
ssl_init_df = pd.read_csv("paper_tests/m4_test/init_SSL/SSL_aic_0.1_false.csv")
Expand Down Expand Up @@ -40,6 +39,20 @@ def MASE(y_train, y_test, prediction, m=12):
denominator = (1 / (T - m)) * sum([abs(y_train[j] - y_train[j - m]) for j in range(m, T)])
return numerator / denominator # if denominator != 0 else 0

def CRPS(scenarios: np.ndarray, y: np.ndarray) -> float:
crps_scores = np.empty(len(y), dtype=float)
for k, actual in enumerate(y):
sorted_scenarios = np.sort(scenarios[k, :])
m = len(sorted_scenarios)
crps_score = 0.0
for i in range(m):
crps_score += (
(sorted_scenarios[i] - actual)
* (m * (actual < sorted_scenarios[i]) - (i + 1) + 0.5)
)
crps_scores[k] = (2 / m**2) * crps_score
return np.mean(crps_scores)

def evaluate_ss(input, sample_size, init, hyperparameters_inicialization):
train = input["train"]
test = input["test"]
Expand All @@ -53,32 +66,47 @@ def evaluate_ss(input, sample_size, init, hyperparameters_inicialization):
results = model.fit(start_params = hyperparameters_inicialization, disp = False, maxiter = 1e5)
else:
results = model.fit(disp = False, maxiter = 1e5)
forecast = results.get_forecast(steps=18)
normalized_forecast_values = forecast.predicted_mean
config = {
'repetitions': 1000,
'steps': 18
}
forecast_obj = results.get_forecast(**config)
forecast_df = forecast_obj.summary_frame()
normalized_simulation = np.empty((len(forecast_df), 300))
for i in range(len(forecast_df)):
normalized_simulation[i] = [np.random.normal(forecast_df["mean"].values[i], forecast_df["mean_se"].values[i]) for _ in range(300)]
normalized_forecast_values = forecast_df["mean"].values
forecast_values = [x * (max_train - min_train) + min_train for x in normalized_forecast_values]
return sMAPE(test, forecast_values), MASE(train, test, forecast_values)
simulation = normalized_simulation * (max_train - min_train) + min_train
return sMAPE(test, forecast_values), MASE(train, test, forecast_values), CRPS(simulation, test)


results = []
results_init = []
for i in range(0, 48000):
hyperparameters_inicialization = [ssl_init_df.loc[i]["ϵ"], ssl_init_df.loc[i]["ξ"],ssl_init_df.loc[i]["ζ"],ssl_init_df.loc[i]["ω_12"]]
if i % 100 == 0:
print("Running series ", i)
hyperparameters_inicialization = [ssl_init_df.loc[i]["ϵ"], ssl_init_df.loc[i]["ξ"],ssl_init_df.loc[i]["ζ"],ssl_init_df.loc[i]["ω"]]
results.append(evaluate_ss(dict_vec[i], 2794, False, hyperparameters_inicialization))
results_init.append(evaluate_ss(dict_vec[i], 2794, True, hyperparameters_inicialization))

smape_SS = []
mase_SS = []
smape_SS_init = []
mase_SS_init = []
crps_SS = []
crps_SS_init = []
for i in range(0, len(results)):
smape_SS.append(results[i][0])
mase_SS.append(results[i][1])
smape_SS_init.append(results_init[i][0])
mase_SS_init.append(results_init[i][1])
crps_SS.append(results[i][2])
crps_SS_init.append(results_init[i][2])

#create dataframe with mase and smape columns:
df = pd.DataFrame({'smape': smape_SS, 'mase': mase_SS})
df_init = pd.DataFrame({'smape': smape_SS_init, 'mase': mase_SS_init})
df = pd.DataFrame({'smape': smape_SS, 'mase': mase_SS, 'crps': crps_SS})
df_init = pd.DataFrame({'smape': smape_SS_init, 'mase': mase_SS_init, 'crps': crps_SS_init})
#save to csv:
df.to_csv('paper_tests/m4_test/results_SS/SS.csv')
df_init.to_csv('paper_tests/m4_test/results_SS/SS_init.csv')
Expand All @@ -95,20 +123,25 @@ def evaluate_ss(input, sample_size, init, hyperparameters_inicialization):
def evaluate_prophet(input):
train = input["train"]
test = input["test"]
timestamps = pd.date_range(start="2020-01-01", periods=len(train), freq='ME')
timestamps = pd.date_range(start="2020-01-01", periods=len(train), freq='MS')
#add random seed
df = pd.DataFrame({
'ds': timestamps,
'y': train
})
model = Prophet(interval_width=0.95)
model.fit(df)
future = pd.DataFrame({
'ds': (pd.date_range(start="2020-01-01", periods=len(train) + 18, freq='ME'))[len(train):]
})
future = model.make_future_dataframe(periods=18, freq='MS')
future = future[-18:]
model_forecast = model.predict(future)
prediction = model_forecast['yhat'].values
return sMAPE(test, prediction), MASE(train, test, prediction)
model_prob = Prophet(interval_width=0.95, mcmc_samples=300)
model_prob.fit(df)
# Sample 1000 predictive paths
forecast_samples = model_prob.predictive_samples(future)
# Construct scenario paths
simulated_paths = forecast_samples['yhat'] # shape: (num_timestamps, num_samples)
return sMAPE(test, prediction), MASE(train, test, prediction), CRPS(simulated_paths, test)

def evaluate_chronos(input):
train = input["train"]
Expand All @@ -126,27 +159,31 @@ def evaluate_chronos(input):

smape_prophet_vec = []
mase_prophet_vec = []
crps_prophet_vec = []
smape_chronos_vec = []
mase_chronos_vec = []

for i in range(0, len(dict_vec)):
smape_prophet, mase_prophet = evaluate_prophet(dict_vec[i])
smape_prophet, mase_prophet, crps_prophet = evaluate_prophet(dict_vec[i])
smape_prophet_vec.append(smape_prophet)
mase_prophet_vec.append(mase_prophet)
crps_prophet_vec.append(crps_prophet)
smape_chronos, mase_chronos = evaluate_chronos(dict_vec[i])
smape_chronos_vec.append(smape_chronos)
mase_chronos_vec.append(mase_chronos)
#
print("Runningg series ", i)
if i % 1000 == 0:
print("Runningg series ", i)
smape_mean_prophet = np.mean(smape_prophet_vec)
smape_emean_chronos = np.mean(smape_chronos_vec)
smape_mean_chronos = np.mean(smape_chronos_vec)
mase_mean_prophet = np.mean(mase_prophet_vec)
mase_mean_chronos = np.mean(mase_chronos_vec)
crps_mean_prophet = np.mean(crps_prophet_vec)
print("Mean sMape Prophet: ", smape_mean_prophet)
print("Mean sMape Chronos: ", smape_emean_chronos)
print("Mean sMape Chronos: ", smape_mean_chronos)
print("Mean Mase Prophet: ", mase_mean_prophet)
print("Mean Mase Chronos: ", mase_mean_chronos)
print("Mean CRPS Prophet: ", crps_mean_prophet)


NAIVE_sMAPE = 14.427 #M4 Paper
Expand All @@ -159,9 +196,11 @@ def evaluate_chronos(input):
mean_smape_prophet = np.mean(smape_prophet_vec)
mean_mase_chronos = np.mean(mase_chronos_vec)
mean_smape_chronos = np.mean(smape_chronos_vec)
mean_crps_prophet = np.mean(crps_prophet_vec)

df_results_mean = pd.DataFrame({'smape': [mean_smape_prophet, mean_smape_chronos], 'mase': [mean_mase_prophet, mean_mase_chronos], 'owa': [owa_prophet, owa_chronos]})

df_prophet_results = pd.DataFrame({'smape': [mean_smape_prophet], 'mase': [mean_mase_prophet], 'owa': [owa_prophet], 'crps': [mean_crps_prophet], 'crps_median': [np.median(crps_prophet_vec)]})
df_chronos_results = pd.DataFrame({'smape': [mean_smape_chronos], 'mase': [mean_mase_chronos], 'owa': [owa_chronos]})
# save to csv

df_results_mean.to_csv('paper_tests/m4_test/metrics_results/PROPHET_CHRONOS_METRICS_RESULTS.csv')
df_prophet_results.to_csv('paper_tests/m4_test/metrics_results/PROPHET_METRICS_RESULTS.csv')
df_chronos_results.to_csv('paper_tests/m4_test/metrics_results/CHRONOS_METRICS_RESULTS.csv')
18 changes: 18 additions & 0 deletions paper_tests/m4_test/metrics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,21 @@ end
function OWA(MASE1, MASE2, sMAPE1, sMAPE2)
return 0.5 * (((MASE1) / (MASE2)) + ((sMAPE1) / (sMAPE2)))
end

function CRPS(scenarios, y)
crps_scores = Vector{AbstractFloat}(undef, length(y))

for k in eachindex(y)
sorted_scenarios = sort(scenarios[k, :])
m = length(scenarios[k, :])
crps_score = 0.0

for i in 1:m
crps_score +=
(sorted_scenarios[i] - y[k]) * (m * (y[k] < sorted_scenarios[i]) - i + 0.5)
end
crps_scores[k] = (2 / m^2) * crps_score
end

return mean(crps_scores)
end
2 changes: 1 addition & 1 deletion src/estimation_procedure.jl
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ function fit_lasso(
α;
information_criteria=information_criteria,
penalty_factor=penalty_factor,
intercept=!rm_average,
intercept=(!rm_average),
)
else
coefs, ε = fit_glmnet(
Expand Down
Loading
Loading