Skip to content

Commit cb27081

Browse files
author
LuizFCDuarte
committed
✨ Add new tests to ocsb
1 parent 1859591 commit cb27081

File tree

4 files changed

+2090
-252
lines changed

4 files changed

+2090
-252
lines changed

test/datasets/ocsb_results_datasets.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,9 @@
1010
"airpassengers.csv": {
1111
"test_stat": -1.0402140896188212,
1212
"D": 1
13+
},
14+
"log_airpassengers.csv": {
15+
"test_stat": -3.247584501115398,
16+
"D": 0
1317
}
1418
}

test/datasets/ocsb_script.py

Lines changed: 64 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import numpy as np
33
import pandas as pd
44
from pmdarima.arima.seasonality import OCSBTest
5+
import os
56

67
def generate_stationary_series(length=200):
78
np.random.seed(42)
@@ -17,28 +18,72 @@ def generate_nonstationary_series(length=200, trend=True, seasonality=False):
1718

1819
def perform_ocsb_test(series):
1920
ocsb = OCSBTest(m=12)
20-
p_value = ocsb.is_stationary(series)
21-
return {"OCSB p-value": p_value}
21+
series = np.array(series)
22+
return {"D": ocsb.estimate_seasonal_differencing_term(series), "OCSB test statistic": ocsb._compute_test_statistic(series)}
2223

2324
def save_json(data, filename):
2425
with open(filename, 'w') as f:
2526
json.dump(data, f, indent=4)
2627

28+
def read_datasets_and_apply_ocsb_test():
29+
# Directory containing the datasets
30+
DATASETS_DIR = os.path.dirname(os.path.abspath(__file__))
31+
DATASETS_DIR = os.path.abspath(os.path.join(DATASETS_DIR, '../../datasets'))
32+
33+
files = [
34+
('GDPC1.csv', ';', 'GDPC1'),
35+
('NROU.csv', ';', 'NROU'),
36+
('airpassengers.csv', ',', 'value'),
37+
('log_airpassengers.csv', ',', 'value'),
38+
]
39+
40+
results = {}
41+
42+
for fname, delim, value_col in files:
43+
if "log" in fname:
44+
use_log = True
45+
fname = fname.split("_")[1]
46+
else:
47+
use_log = False
48+
path = os.path.join(DATASETS_DIR, fname)
49+
df = pd.read_csv(path, delimiter=delim)
50+
# Handle possible quoted headers
51+
if value_col not in df.columns:
52+
# Try stripping quotes
53+
df.columns = [c.strip('"') for c in df.columns]
54+
series = df[value_col].astype(float)
55+
if use_log:
56+
series = np.log(series)
57+
ocsb = OCSBTest(m=12)
58+
D = ocsb.estimate_seasonal_differencing_term(series.values)
59+
test_stat = ocsb._compute_test_statistic(series.values)
60+
if use_log:
61+
fname = "log_" + fname
62+
results[fname] = {
63+
'test_stat': test_stat,
64+
'D': D
65+
}
66+
67+
# Save the results as json
68+
save_json(results, "ocsb_results_datasets.json")
69+
2770
if __name__ == "__main__":
28-
series_data = {}
29-
ocsb_results_data = {}
30-
31-
for i in range(1, 11):
32-
if i % 2 == 0:
33-
series = generate_nonstationary_series(trend=True, seasonality=(i % 4 == 0))
34-
label = f"nonstationary_series_{i}"
35-
else:
36-
series = generate_stationary_series()
37-
label = f"stationary_series_{i}"
38-
series_data[label] = series
39-
ocsb_results_data[label] = perform_ocsb_test(series)
40-
41-
save_json(series_data, "ocsb_time_series.json")
42-
save_json(ocsb_results_data, "ocsb_results.json")
43-
44-
print("Files saved: ocsb_time_series.json, ocsb_results.json")
71+
# series_data = {}
72+
# ocsb_results_data = {}
73+
74+
# for i in range(1, 11):
75+
# if i % 2 == 0:
76+
# series = generate_nonstationary_series(trend=True, seasonality=(i % 4 == 0))
77+
# label = f"nonstationary_series_{i}"
78+
# else:
79+
# series = generate_stationary_series()
80+
# label = f"stationary_series_{i}"
81+
# series_data[label] = series
82+
# ocsb_results_data[label] = perform_ocsb_test(series)
83+
84+
# save_json(series_data, "ocsb_time_series.json")
85+
# save_json(ocsb_results_data, "ocsb_results.json")
86+
87+
read_datasets_and_apply_ocsb_test()
88+
89+
print("Files saved: ocsb_time_series.json, ocsb_results.json")

0 commit comments

Comments
 (0)