33import numpy as np
44import pandas as pd
55import pytest
6+ from pytest_mock .plugin import MockerFixture
67
78from qolmat .utils import data
8- from pytest_mock .plugin import MockerFixture
9+
10+ columns = ["No" , "year" , "month" , "day" , "hour" , "a" , "b" , "wd" ]
11+ df_beijing = pd .DataFrame (
12+ [
13+ [1 , 2013 , 3 , 1 , 0 , 1 , 2 , "NW" ],
14+ [2 , 2014 , 3 , 1 , 0 , 3 , np .nan , "NW" ],
15+ [3 , 2015 , 3 , 1 , 0 , np .nan , 6 , "NW" ],
16+ ],
17+ columns = columns ,
18+ )
19+ index_preprocess_beijing = pd .MultiIndex .from_tuples (
20+ [
21+ ("Beijing" , datetime .datetime (2013 , 3 , 1 )),
22+ ("Beijing" , datetime .datetime (2014 , 3 , 1 )),
23+ ("Beijing" , datetime .datetime (2015 , 3 , 1 )),
24+ ],
25+ names = ["station" , "datetime" ],
26+ )
27+ df_preprocess_beijing = pd .DataFrame (
28+ [[1 , 2 ], [3 , np .nan ], [np .nan , 6 ]], columns = ["a" , "b" ], index = index_preprocess_beijing
29+ )
930
1031columns = ["No" , "year" , "month" , "day" , "hour" , "a" , "b" , "wd" , "station" ]
11- df = pd .DataFrame (
32+ df_offline = pd .DataFrame (
1233 [
1334 [1 , 2013 , 3 , 1 , 0 , 1 , 2 , "NW" , "Gucheng" ],
1435 [2 , 2014 , 3 , 1 , 0 , 3 , np .nan , "NW" , "Gucheng" ],
1738 columns = columns ,
1839)
1940
20- index_preprocess = pd .MultiIndex .from_tuples (
41+ index_preprocess_offline = pd .MultiIndex .from_tuples (
2142 [
2243 ("Gucheng" , datetime .datetime (2013 , 3 , 1 )),
2344 ("Gucheng" , datetime .datetime (2014 , 3 , 1 )),
2445 ("Gucheng" , datetime .datetime (2015 , 3 , 1 )),
2546 ],
2647 names = ["station" , "datetime" ],
2748)
28- df_preprocess = pd .DataFrame (
29- [[1 , 2 ], [3 , np .nan ], [np .nan , 6 ]], columns = ["a" , "b" ], index = index_preprocess
49+ df_preprocess_offline = pd .DataFrame (
50+ [[1 , 2 ], [3 , np .nan ], [np .nan , 6 ]], columns = ["a" , "b" ], index = index_preprocess_offline
3051)
3152
53+
3254urllink = "https://archive.ics.uci.edu/ml/machine-learning-databases/00501/"
3355zipname = "PRSA2017_Data_20130301-20170228"
3456
4062# list_df_result = data.download_data(zipname, urllink)
4163
4264
43- @pytest .mark .parametrize ("name_data" , ["Beijing" , "Artificial" , "Bug" ])
44- def test_utils_data_get_data (name_data : str , mocker : MockerFixture ) -> None :
65+ @pytest .mark .parametrize (
66+ "name_data, df" ,
67+ [
68+ ("Beijing" , df_beijing ),
69+ ("Beijing_offline" , df_offline ),
70+ ("Artificial" , None ),
71+ ("Bug" , None ),
72+ ],
73+ )
74+ def test_utils_data_get_data (name_data : str , df : pd .DataFrame , mocker : MockerFixture ) -> None :
4575 mock_download = mocker .patch ("qolmat.utils.data.download_data" , return_value = [df ])
46- mocker .patch ("qolmat.utils.data.preprocess_data" , return_value = df_preprocess )
76+ mocker .patch (
77+ "qolmat.utils.data.preprocess_data_beijing_offline" , return_value = df_preprocess_offline
78+ )
79+ mocker .patch ("qolmat.utils.data.preprocess_data_beijing" , return_value = df_preprocess_beijing )
4780 try :
4881 df_result = data .get_data (name_data = name_data )
4982 except ValueError :
50- assert name_data not in ["Beijing" , "Artificial" ]
83+ assert name_data not in ["Beijing" , "Beijing_offline" , " Artificial" ]
5184 np .testing .assert_raises (ValueError , data .get_data , name_data )
5285 return
5386
5487 if name_data == "Beijing" :
5588 assert mock_download .call_count == 1
56- pd .testing .assert_frame_equal (df_result , df_preprocess )
89+ pd .testing .assert_frame_equal (df_result , df_preprocess_beijing )
90+ elif name_data == "Beijing_offline" :
91+ assert mock_download .call_count == 1
92+ pd .testing .assert_frame_equal (df_result , df_preprocess_offline )
5793 elif name_data == "Artificial" :
5894 expected_columns = ["signal" , "X" , "A" , "E" ]
5995 assert isinstance (df_result , pd .DataFrame )
@@ -62,13 +98,17 @@ def test_utils_data_get_data(name_data: str, mocker: MockerFixture) -> None:
6298 assert False
6399
64100
65- @pytest .mark .parametrize ("df" , [df ])
66- def test_utils_data_preprocess_data (df : pd .DataFrame ) -> None :
67- result = data .preprocess_data (df )
68- pd .testing .assert_frame_equal (result , df_preprocess , atol = 1e-3 )
101+ @pytest .mark .parametrize ("df" , [df_offline ])
102+ def test_utils_data_preprocess_data_beijing_offline (df : pd .DataFrame ) -> None :
103+ result = data .preprocess_data_beijing_offline (df )
104+ print (result )
105+ print (df_preprocess_offline )
106+ print (result .dtypes )
107+ print (df_preprocess_offline .dtypes )
108+ pd .testing .assert_frame_equal (result , df_preprocess_offline , atol = 1e-3 )
69109
70110
71- @pytest .mark .parametrize ("df" , [df_preprocess ])
111+ @pytest .mark .parametrize ("df" , [df_preprocess_offline ])
72112def test_utils_data_add_holes (df : pd .DataFrame ) -> None :
73113 df_out = data .add_holes (df , 0.0 , 1 )
74114 assert df_out .isna ().sum ().sum () == 2
@@ -78,33 +118,33 @@ def test_utils_data_add_holes(df: pd.DataFrame) -> None:
78118
79119@pytest .mark .parametrize ("name_data" , ["Beijing" ])
80120def test_utils_data_get_data_corrupted (name_data : str , mocker : MockerFixture ) -> None :
81- mock_download = mocker .patch ("qolmat.utils.data.download_data" , return_value = [df ])
82- mocker .patch ("qolmat.utils.data.preprocess_data " , return_value = df_preprocess )
121+ mock_download = mocker .patch ("qolmat.utils.data.download_data" , return_value = [df_beijing ])
122+ mocker .patch ("qolmat.utils.data.preprocess_data_beijing " , return_value = df_preprocess_beijing )
83123 df_out = data .get_data_corrupted ()
84124 df_result = pd .DataFrame (
85- [[1 , 2 ], [np .nan , np .nan ], [np .nan , 6 ]], columns = ["a" , "b" ], index = index_preprocess
125+ [[1 , 2 ], [np .nan , np .nan ], [np .nan , 6 ]], columns = ["a" , "b" ], index = index_preprocess_beijing
86126 )
87127 assert mock_download .call_count == 1
88128 pd .testing .assert_frame_equal (df_result , df_out )
89129
90130
91- @pytest .mark .parametrize ("df" , [df_preprocess ])
131+ @pytest .mark .parametrize ("df" , [df_preprocess_beijing ])
92132def test_utils_data_add_station_features (df : pd .DataFrame ) -> None :
93- columns_out = ["a" , "b" ] + ["station=Gucheng " ]
133+ columns_out = ["a" , "b" ] + ["station=Beijing " ]
94134 expected = pd .DataFrame (
95135 [
96136 [1 , 2 , 1.0 ],
97137 [3 , np .nan , 1.0 ],
98138 [np .nan , 6 , 1.0 ],
99139 ],
100140 columns = columns_out ,
101- index = index_preprocess ,
141+ index = index_preprocess_beijing ,
102142 )
103143 result = data .add_station_features (df )
104144 pd .testing .assert_frame_equal (result , expected , atol = 1e-3 )
105145
106146
107- @pytest .mark .parametrize ("df" , [df_preprocess ])
147+ @pytest .mark .parametrize ("df" , [df_preprocess_beijing ])
108148def test_utils_data_add_datetime_features (df : pd .DataFrame ) -> None :
109149 columns_out = ["a" , "b" ] + ["time_cos" ]
110150 expected = pd .DataFrame (
@@ -114,7 +154,7 @@ def test_utils_data_add_datetime_features(df: pd.DataFrame) -> None:
114154 [np .nan , 6 , 0.512 ],
115155 ],
116156 columns = columns_out ,
117- index = index_preprocess ,
157+ index = index_preprocess_beijing ,
118158 )
119159 result = data .add_datetime_features (df )
120160 pd .testing .assert_frame_equal (result , expected , atol = 1e-3 )
0 commit comments