1
1
import numpy as np
2
2
import pandas as pd
3
- from pandas import testing as tm
4
3
import pytest
5
4
6
5
from pandas_datareader import data as web
7
6
8
7
pytestmark = pytest .mark .stable
9
8
10
9
10
+ def assert_equal (x , y ):
11
+ assert np .isclose (x , y , rtol = 1e-2 )
12
+
13
+
11
14
class TestEcondb (object ):
12
15
def test_infer_start_end_from_symbols (self ):
13
16
df = web .DataReader (
@@ -23,88 +26,69 @@ def test_infer_start_end_from_symbols(self):
23
26
assert df .index [0 ].year == 2010
24
27
assert df .index [- 1 ].year == 2018
25
28
26
- @pytest .mark .xfail (reason = "Dataset does not exist on Econdb" )
27
- def test_get_cdh_e_fos (self ):
28
- # EUROSTAT
29
- # Employed doctorate holders in non managerial and non professional
30
- # occupations by fields of science (%)
31
- df = web .DataReader (
32
- "dataset=CDH_E_FOS&GEO=NO,PL,PT,RU&FOS07=FOS1&Y_GRAD=TOTAL" ,
33
- "econdb" ,
34
- start = pd .Timestamp ("2005-01-01" ),
35
- end = pd .Timestamp ("2010-01-01" ),
36
- )
37
- assert isinstance (df , pd .DataFrame )
38
- assert df .shape == (2 , 4 )
39
-
40
- # the levels and not returned consistently for econdb
41
- names = list (df .columns .names )
42
- levels = [lvl .values .tolist () for lvl in list (df .columns .levels )]
43
-
44
- exp_col = pd .MultiIndex .from_product (levels , names = names )
45
- exp_idx = pd .DatetimeIndex (["2006-01-01" , "2009-01-01" ], name = "TIME_PERIOD" )
46
-
47
- values = np .array ([[25.49 , np .nan , 39.05 , np .nan ], [20.38 , 25.1 , 27.77 , 38.1 ]])
48
- expected = pd .DataFrame (values , index = exp_idx , columns = exp_col )
49
- tm .assert_frame_equal (df , expected )
50
-
51
- def test_get_tourism (self ):
52
- # OECD
53
- # TOURISM_INBOUND
29
+ tickers = [
30
+ f"{ sec } { geo } "
31
+ for sec in ["RGDP" , "CPI" , "URATE" ]
32
+ for geo in ["US" , "UK" , "ES" , "AR" ]
33
+ ]
54
34
35
+ @pytest .mark .parametrize ("ticker" , tickers )
36
+ def test_fetch_single_ticker_series (self , ticker ):
55
37
df = web .DataReader (
56
- "dataset=OE_TOURISM_INBOUND&COUNTRY=JPN,USA&VARIABLE=INB_ARRIVALS_TOTAL " ,
38
+ f"ticker= { ticker } " ,
57
39
"econdb" ,
58
- start = pd .Timestamp ("2008-01-01" ),
59
- end = pd .Timestamp ("2012-01-01" ),
60
- )
61
- df = df .astype (float )
62
- jp = np .array ([8351000 , 6790000 , 8611000 , 6219000 , 8368000 ], dtype = float )
63
- us = np .array (
64
- [175702304 , 160507424 , 164079728 , 167600272 , 171320416 ], dtype = float
40
+ start = pd .Timestamp ("2010-01-01" ),
41
+ end = pd .Timestamp ("2013-01-27" ),
65
42
)
66
- index = pd .date_range ("2008-01-01" , "2012-01-01" , freq = "AS" , name = "TIME_PERIOD" )
67
-
68
- # check the values coming back are equal
69
- np .testing .assert_array_equal (df .values [:, 0 ], jp )
70
- np .testing .assert_array_equal (df .values [:, 1 ], us )
71
-
72
- # sometimes the country and variable columns are swapped
73
- df = df .swaplevel (2 , 1 , axis = 1 )
74
- for label , values in [("Japan" , jp ), ("United States" , us )]:
75
- expected = pd .Series (
76
- values , index = index , name = "Total international arrivals"
77
- )
78
- expected .index .freq = None
79
- tm .assert_series_equal (
80
- df [label ]["Tourism demand surveys" ]["Total international arrivals" ],
81
- expected ,
82
- )
43
+ assert df .shape [1 ] == 1
44
+ assert isinstance (df .index , pd .DatetimeIndex )
83
45
84
- def test_bls (self ):
85
- # BLS
86
- # CPI
46
+ def test_single_nonticker_series (self ):
87
47
df = web .DataReader (
88
48
"ticker=BLS_CU.CUSR0000SA0.M.US" ,
89
49
"econdb" ,
90
50
start = pd .Timestamp ("2010-01-01" ),
91
51
end = pd .Timestamp ("2013-01-27" ),
92
52
)
53
+ assert df .shape [1 ] == 1
54
+ assert isinstance (df .index , pd .DatetimeIndex )
55
+ assert_equal (df .loc ["2010-05-01" ][0 ], 217.3 )
93
56
94
- assert df .loc ["2010-05-01" ][0 ] == 217.3
57
+ def test_filtered_dataset (self ):
58
+ df = web .DataReader (
59
+ "&" .join (
60
+ [
61
+ "dataset=PRC_HICP_MIDX" ,
62
+ "v=Geopolitical entity (reporting)" ,
63
+ "h=TIME" ,
64
+ "from=2022-03-01" ,
65
+ "to=2022-09-01" ,
66
+ "COICOP=[CP00]" ,
67
+ "FREQ=[M]" ,
68
+ "GEO=[ES,AT,CZ,IT,CH]" ,
69
+ "UNIT=[I15]" ,
70
+ ]
71
+ ),
72
+ "econdb" ,
73
+ )
74
+ assert df .shape [1 ] == 5
75
+ assert isinstance (df .index , pd .DatetimeIndex )
95
76
96
77
def test_australia_gdp (self ):
97
78
df = web .DataReader (
98
- "dataset=ABS_GDP&to=2019-09-01&from=1959-09-01&h=TIME&v=Indicator" , "econdb"
99
- )
100
- assert (
101
- df .loc [
102
- "2017-10-01" ,
103
- (
104
- "GDP per capita: Current prices - National Accounts" ,
105
- "Seasonally Adjusted" ,
106
- "AUD" ,
107
- ),
108
- ]
109
- == 18329
79
+ "&" .join (
80
+ [
81
+ "dataset=ABS_GDP" ,
82
+ "4=[7]" ,
83
+ "6=[11]" ,
84
+ "16=[1267]" ,
85
+ "v=TIME" ,
86
+ "h=Indicator" ,
87
+ "from=2019-10-01" ,
88
+ "to=2022-06-01" ,
89
+ "GEO=[13]" ,
90
+ ]
91
+ ),
92
+ "econdb" ,
110
93
)
94
+ assert_equal (df .squeeze ().loc ["2020-10-01" ], 508603 )
0 commit comments