Skip to content

Commit 59b2125

Browse files
author
LuizFCDuarte
committed
✅ Add kpss and ocsb tests
1 parent 539b19f commit 59b2125

File tree

4 files changed

+128
-15
lines changed

4 files changed

+128
-15
lines changed
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"GDPC1.csv": {
3+
"test_stat": 2.90119724088986,
4+
"p_value": 0.01,
5+
"lags_used": 11,
6+
"critical_values": {
7+
"10%": 0.347,
8+
"5%": 0.463,
9+
"2.5%": 0.574,
10+
"1%": 0.739
11+
}
12+
},
13+
"NROU.csv": {
14+
"test_stat": 2.0958404238268487,
15+
"p_value": 0.01,
16+
"lags_used": 11,
17+
"critical_values": {
18+
"10%": 0.347,
19+
"5%": 0.463,
20+
"2.5%": 0.574,
21+
"1%": 0.739
22+
}
23+
},
24+
"airpassengers.csv": {
25+
"test_stat": 2.0131256386303322,
26+
"p_value": 0.01,
27+
"lags_used": 9,
28+
"critical_values": {
29+
"10%": 0.347,
30+
"5%": 0.463,
31+
"2.5%": 0.574,
32+
"1%": 0.739
33+
}
34+
},
35+
"log_airpassengers.csv": {
36+
"test_stat": 2.1181889701236964,
37+
"p_value": 0.01,
38+
"lags_used": 9,
39+
"critical_values": {
40+
"10%": 0.347,
41+
"5%": 0.463,
42+
"2.5%": 0.574,
43+
"1%": 0.739
44+
}
45+
}
46+
}

test/datasets/kpss_script.py

Lines changed: 57 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import json
1+
import json, os
22
import numpy as np
33
import pandas as pd
44
from statsmodels.tsa.stattools import kpss
@@ -28,18 +28,61 @@ def save_json(data, filename):
2828
with open(filename, 'w') as f:
2929
json.dump(data, f, indent=4)
3030

31+
def read_datasets_and_apply_kpss_test():
32+
# Directory containing the datasets
33+
DATASETS_DIR = os.path.dirname(os.path.abspath(__file__))
34+
DATASETS_DIR = os.path.abspath(os.path.join(DATASETS_DIR, '../../datasets'))
35+
36+
files = [
37+
('GDPC1.csv', ';', 'GDPC1'),
38+
('NROU.csv', ';', 'NROU'),
39+
('airpassengers.csv', ',', 'value'),
40+
('log_airpassengers.csv', ',', 'value'),
41+
]
42+
43+
results = {}
44+
45+
for fname, delim, value_col in files:
46+
if "log" in fname:
47+
use_log = True
48+
fname = fname.split("_")[1]
49+
else:
50+
use_log = False
51+
path = os.path.join(DATASETS_DIR, fname)
52+
df = pd.read_csv(path, delimiter=delim)
53+
# Handle possible quoted headers
54+
if value_col not in df.columns:
55+
# Try stripping quotes
56+
df.columns = [c.strip('"') for c in df.columns]
57+
series = df[value_col].astype(float)
58+
if use_log:
59+
series = np.log(series)
60+
result = kpss(series)
61+
if use_log:
62+
fname = "log_" + fname
63+
results[fname] = {
64+
'test_stat': result[0],
65+
'p_value': result[1],
66+
'lags_used': result[2],
67+
'critical_values': result[3]
68+
}
69+
70+
# Save the results as json
71+
save_json(results, "kpss_results_datasets.json")
72+
3173
if __name__ == "__main__":
32-
series_data = {}
33-
kpss_results_data = {}
34-
for i in range(1, 11):
35-
if i % 2 == 0:
36-
series = generate_nonstationary_series(trend=True, seasonality=(i % 4 == 0))
37-
label = f"nonstationary_series_{i}"
38-
else:
39-
series = generate_stationary_series()
40-
label = f"stationary_series_{i}"
41-
series_data[label] = series
42-
kpss_results_data[label] = perform_kpss_test(series)
43-
save_json(series_data, "time_series.json")
44-
save_json(kpss_results_data, "kpss_results.json")
74+
# series_data = {}
75+
# kpss_results_data = {}
76+
# for i in range(1, 11):
77+
# if i % 2 == 0:
78+
# series = generate_nonstationary_series(trend=True, seasonality=(i % 4 == 0))
79+
# label = f"nonstationary_series_{i}"
80+
# else:
81+
# series = generate_stationary_series()
82+
# label = f"stationary_series_{i}"
83+
# series_data[label] = series
84+
# kpss_results_data[label] = perform_kpss_test(series)
85+
# save_json(series_data, "time_series.json")
86+
# save_json(kpss_results_data, "kpss_results.json")
87+
read_datasets_and_apply_kpss_test()
4588
print("Files saved: time_series.json, kpss_results.json")

test/models/sarima_predict.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ end
155155
exog = exogSeries,
156156
seasonality = 12,
157157
objectiveFunction = "lasso",
158-
seasonalIntegrationTest = "ch"
158+
seasonalIntegrationTest = "ocsb"
159159
)
160160
forecastExog = Sarimax.predict!(modelExog; stepsAhead = length(testingSet))
161161
mapeExog = MAPE(testingSet, forecastExog)

test/test_statistical_tests.jl

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,30 @@ using JSON
6666
@test_throws ArgumentError Sarimax.kpss_test(data, regression=:invalid)
6767
end
6868

69+
@testset "Test in airpassengers" begin
70+
kpss_datasets = JSON.parsefile(joinpath(@__DIR__, "datasets", "kpss_results_datasets.json"))
71+
airpassengers = loadDataset(AIR_PASSENGERS)
72+
kpss_result = Sarimax.kpss_test(values(airpassengers);regression=:c)
73+
@test isapprox(kpss_result["test_statistic"], kpss_datasets["airpassengers.csv"]["test_stat"], atol=5e-3)
74+
@test kpss_result["p_value"] == kpss_datasets["airpassengers.csv"]["p_value"]
75+
76+
airpassengersLog = log.(values(airpassengers))
77+
kpss_result = Sarimax.kpss_test(airpassengersLog;regression=:c)
78+
@test isapprox(kpss_result["test_statistic"], kpss_datasets["log_airpassengers.csv"]["test_stat"], atol=5e-3)
79+
@test kpss_result["p_value"] == kpss_datasets["log_airpassengers.csv"]["p_value"]
80+
81+
gdpc1 = loadDataset(GDPC1)
82+
kpss_result = Sarimax.kpss_test(values(gdpc1);regression=:c)
83+
@test isapprox(kpss_result["test_statistic"], kpss_datasets["GDPC1.csv"]["test_stat"], atol=5e-3)
84+
@test kpss_result["p_value"] == kpss_datasets["GDPC1.csv"]["p_value"]
85+
86+
nrou = loadDataset(NROU)
87+
kpss_result = Sarimax.kpss_test(values(nrou);regression=:c)
88+
@test isapprox(kpss_result["test_statistic"], kpss_datasets["NROU.csv"]["test_stat"], atol=5e-3)
89+
@test kpss_result["p_value"] == kpss_datasets["NROU.csv"]["p_value"]
90+
end
91+
92+
6993
@testset "Lag Selection" begin
7094
Random.seed!(123)
7195
data = randn(100)

0 commit comments

Comments
 (0)