2
2
import numpy as np
3
3
import pandas as pd
4
4
from pmdarima .arima .seasonality import OCSBTest
5
+ import os
5
6
6
7
def generate_stationary_series (length = 200 ):
7
8
np .random .seed (42 )
@@ -17,28 +18,72 @@ def generate_nonstationary_series(length=200, trend=True, seasonality=False):
17
18
18
19
def perform_ocsb_test (series ):
19
20
ocsb = OCSBTest (m = 12 )
20
- p_value = ocsb . is_stationary (series )
21
- return {"OCSB p-value " : p_value }
21
+ series = np . array (series )
22
+ return {"D" : ocsb . estimate_seasonal_differencing_term ( series ), " OCSB test statistic " : ocsb . _compute_test_statistic ( series ) }
22
23
23
24
def save_json (data , filename ):
24
25
with open (filename , 'w' ) as f :
25
26
json .dump (data , f , indent = 4 )
26
27
28
+ def read_datasets_and_apply_ocsb_test ():
29
+ # Directory containing the datasets
30
+ DATASETS_DIR = os .path .dirname (os .path .abspath (__file__ ))
31
+ DATASETS_DIR = os .path .abspath (os .path .join (DATASETS_DIR , '../../datasets' ))
32
+
33
+ files = [
34
+ ('GDPC1.csv' , ';' , 'GDPC1' ),
35
+ ('NROU.csv' , ';' , 'NROU' ),
36
+ ('airpassengers.csv' , ',' , 'value' ),
37
+ ('log_airpassengers.csv' , ',' , 'value' ),
38
+ ]
39
+
40
+ results = {}
41
+
42
+ for fname , delim , value_col in files :
43
+ if "log" in fname :
44
+ use_log = True
45
+ fname = fname .split ("_" )[1 ]
46
+ else :
47
+ use_log = False
48
+ path = os .path .join (DATASETS_DIR , fname )
49
+ df = pd .read_csv (path , delimiter = delim )
50
+ # Handle possible quoted headers
51
+ if value_col not in df .columns :
52
+ # Try stripping quotes
53
+ df .columns = [c .strip ('"' ) for c in df .columns ]
54
+ series = df [value_col ].astype (float )
55
+ if use_log :
56
+ series = np .log (series )
57
+ ocsb = OCSBTest (m = 12 )
58
+ D = ocsb .estimate_seasonal_differencing_term (series .values )
59
+ test_stat = ocsb ._compute_test_statistic (series .values )
60
+ if use_log :
61
+ fname = "log_" + fname
62
+ results [fname ] = {
63
+ 'test_stat' : test_stat ,
64
+ 'D' : D
65
+ }
66
+
67
+ # Save the results as json
68
+ save_json (results , "ocsb_results_datasets.json" )
69
+
27
70
if __name__ == "__main__" :
28
- series_data = {}
29
- ocsb_results_data = {}
30
-
31
- for i in range (1 , 11 ):
32
- if i % 2 == 0 :
33
- series = generate_nonstationary_series (trend = True , seasonality = (i % 4 == 0 ))
34
- label = f"nonstationary_series_{ i } "
35
- else :
36
- series = generate_stationary_series ()
37
- label = f"stationary_series_{ i } "
38
- series_data [label ] = series
39
- ocsb_results_data [label ] = perform_ocsb_test (series )
40
-
41
- save_json (series_data , "ocsb_time_series.json" )
42
- save_json (ocsb_results_data , "ocsb_results.json" )
43
-
44
- print ("Files saved: ocsb_time_series.json, ocsb_results.json" )
71
+ # series_data = {}
72
+ # ocsb_results_data = {}
73
+
74
+ # for i in range(1, 11):
75
+ # if i % 2 == 0:
76
+ # series = generate_nonstationary_series(trend=True, seasonality=(i % 4 == 0))
77
+ # label = f"nonstationary_series_{i}"
78
+ # else:
79
+ # series = generate_stationary_series()
80
+ # label = f"stationary_series_{i}"
81
+ # series_data[label] = series
82
+ # ocsb_results_data[label] = perform_ocsb_test(series)
83
+
84
+ # save_json(series_data, "ocsb_time_series.json")
85
+ # save_json(ocsb_results_data, "ocsb_results.json")
86
+
87
+ read_datasets_and_apply_ocsb_test ()
88
+
89
+ print ("Files saved: ocsb_time_series.json, ocsb_results.json" )
0 commit comments