1
- import json
1
+ import json , os
2
2
import numpy as np
3
3
import pandas as pd
4
4
from statsmodels .tsa .stattools import kpss
@@ -28,18 +28,61 @@ def save_json(data, filename):
28
28
with open (filename , 'w' ) as f :
29
29
json .dump (data , f , indent = 4 )
30
30
31
+ def read_datasets_and_apply_kpss_test ():
32
+ # Directory containing the datasets
33
+ DATASETS_DIR = os .path .dirname (os .path .abspath (__file__ ))
34
+ DATASETS_DIR = os .path .abspath (os .path .join (DATASETS_DIR , '../../datasets' ))
35
+
36
+ files = [
37
+ ('GDPC1.csv' , ';' , 'GDPC1' ),
38
+ ('NROU.csv' , ';' , 'NROU' ),
39
+ ('airpassengers.csv' , ',' , 'value' ),
40
+ ('log_airpassengers.csv' , ',' , 'value' ),
41
+ ]
42
+
43
+ results = {}
44
+
45
+ for fname , delim , value_col in files :
46
+ if "log" in fname :
47
+ use_log = True
48
+ fname = fname .split ("_" )[1 ]
49
+ else :
50
+ use_log = False
51
+ path = os .path .join (DATASETS_DIR , fname )
52
+ df = pd .read_csv (path , delimiter = delim )
53
+ # Handle possible quoted headers
54
+ if value_col not in df .columns :
55
+ # Try stripping quotes
56
+ df .columns = [c .strip ('"' ) for c in df .columns ]
57
+ series = df [value_col ].astype (float )
58
+ if use_log :
59
+ series = np .log (series )
60
+ result = kpss (series )
61
+ if use_log :
62
+ fname = "log_" + fname
63
+ results [fname ] = {
64
+ 'test_stat' : result [0 ],
65
+ 'p_value' : result [1 ],
66
+ 'lags_used' : result [2 ],
67
+ 'critical_values' : result [3 ]
68
+ }
69
+
70
+ # Save the results as json
71
+ save_json (results , "kpss_results_datasets.json" )
72
+
31
73
if __name__ == "__main__" :
32
- series_data = {}
33
- kpss_results_data = {}
34
- for i in range (1 , 11 ):
35
- if i % 2 == 0 :
36
- series = generate_nonstationary_series (trend = True , seasonality = (i % 4 == 0 ))
37
- label = f"nonstationary_series_{ i } "
38
- else :
39
- series = generate_stationary_series ()
40
- label = f"stationary_series_{ i } "
41
- series_data [label ] = series
42
- kpss_results_data [label ] = perform_kpss_test (series )
43
- save_json (series_data , "time_series.json" )
44
- save_json (kpss_results_data , "kpss_results.json" )
74
+ # series_data = {}
75
+ # kpss_results_data = {}
76
+ # for i in range(1, 11):
77
+ # if i % 2 == 0:
78
+ # series = generate_nonstationary_series(trend=True, seasonality=(i % 4 == 0))
79
+ # label = f"nonstationary_series_{i}"
80
+ # else:
81
+ # series = generate_stationary_series()
82
+ # label = f"stationary_series_{i}"
83
+ # series_data[label] = series
84
+ # kpss_results_data[label] = perform_kpss_test(series)
85
+ # save_json(series_data, "time_series.json")
86
+ # save_json(kpss_results_data, "kpss_results.json")
87
+ read_datasets_and_apply_kpss_test ()
45
88
print ("Files saved: time_series.json, kpss_results.json" )
0 commit comments